千家信息网

hadoop中如何实现KeyValueTextInputFormat

发表于:2024-11-24 作者:千家信息网编辑
千家信息网最后更新 2024年11月24日,这篇文章主要为大家展示了"hadoop中如何实现KeyValueTextInputFormat",内容简而易懂,条理清晰,希望能够帮助大家解决疑惑,下面让小编带领大家一起研究并学习一下"hadoop中
千家信息网最后更新 2024年11月24日hadoop中如何实现KeyValueTextInputFormat

这篇文章主要为大家展示了"hadoop中如何实现KeyValueTextInputFormat",内容简而易懂,条理清晰,希望能够帮助大家解决疑惑,下面让小编带领大家一起研究并学习一下"hadoop中如何实现KeyValueTextInputFormat"这篇文章吧。

package com.test;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader;import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;/** * hello jim * hello tim *  * 最后输出 * hello 1 * jim 1 * hello 1 * tim 1 */public class WordCountKeyValue extends Configured implements Tool {  public static class Map extends Mapper {  /**   * key hello   * value jim   */  public void map(Text key, Text value, Context context) throws IOException, InterruptedException {   context.write(key, new IntWritable(1));   context.write(value, new IntWritable(1));  } }  public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {  Configuration conf = this.getConf();  //指定KeyValueTextInputFormat分割符,默认分割符是\t  //conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");  conf.set(KeyValueLineRecordReader.KEY_VALUE_SEPERATOR, "\t");    Job job = new Job(conf);  job.setJobName(WordCountKeyValue.class.getSimpleName());  job.setJarByClass(WordCountKeyValue.class);    FileInputFormat.addInputPath(job, new Path(args[0]));  FileOutputFormat.setOutputPath(job, new Path(args[1]));    job.setNumReduceTasks(0);  job.setMapperClass(Map.class);    job.setInputFormatClass(KeyValueTextInputFormat.class);  job.setOutputFormatClass(TextOutputFormat.class);    job.setMapOutputKeyClass(Text.class);  job.setMapOutputValueClass(IntWritable.class);  job.waitForCompletion(true);  return job.isSuccessful()?0:1; }  public static void main(String[] args) throws Exception {  int exit = ToolRunner.run(new WordCount(), args);  System.exit(exit); } }

以上是"hadoop中如何实现KeyValueTextInputFormat"这篇文章的所有内容,感谢各位的阅读!相信大家都有了一定的了解,希望分享的内容对大家有所帮助,如果还想学习更多知识,欢迎关注行业资讯频道!

0