千家信息网

hadoop中如何实现GenericWritable

发表于:2025-02-03 作者:千家信息网编辑
千家信息网最后更新 2025年02月03日,这篇文章主要介绍了hadoop中如何实现GenericWritable,具有一定借鉴价值,感兴趣的朋友可以参考下,希望大家阅读完这篇文章之后大有收获,下面让小编带着大家一起了解一下。package c
千家信息网最后更新 2025年02月03日hadoop中如何实现GenericWritable

这篇文章主要介绍了hadoop中如何实现GenericWritable,具有一定借鉴价值,感兴趣的朋友可以参考下,希望大家阅读完这篇文章之后大有收获,下面让小编带着大家一起了解一下。

package com.test;import java.io.IOException;import java.util.Iterator;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.GenericWritable;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;/** * 业务场景: * 含有两个文件,两个文件中单词之间的分隔方式不一样,但是统计出单词在两个文件中公共出现的次数 *  * 文件来源1,逗号分隔text1.txt *  hello,what *  you,haha * 文件来源2,制表符分隔text2.txt * girl boy * father mother */public class WordCountGenericWritable extends Configured implements Tool {  public static class Map1 extends Mapper {  public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {   String line = value.toString();      StringTokenizer st = new StringTokenizer(line, ",");   while(st.hasMoreElements()) {    context.write(new Text(st.nextElement().toString()), new MyGenericWritable(new LongWritable(1)));   }  } }  public static class Map2 extends Mapper {  public void map(Text key, Text value, Context context) throws IOException, InterruptedException {   context.write(key, new MyGenericWritable(new Text("1")));   context.write(value, new MyGenericWritable(new Text("1")));  } }  public static class Reduce extends Reducer {  public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {   int count = 0;   Iterator it = values.iterator();   while(it.hasNext()) {    MyGenericWritable myGw = it.next();    Writable value = myGw.get();    if(value instanceof LongWritable) {     count = count + Long.valueOf(((LongWritable)value).get()).intValue();    }     if(value instanceof Text) {     count = count + Long.valueOf(((Text)value).toString()).intValue();    }   }   context.write(key, new IntWritable(count));  } }  public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {  Configuration conf = this.getConf();  Job job = new Job(conf);  job.setJobName(WordCountGenericWritable.class.getSimpleName());  job.setJarByClass(WordCountGenericWritable.class);    MultipleInputs.addInputPath(job, new Path("hdfs://grid131:9000/text1.txt"), TextInputFormat.class, Map1.class);  MultipleInputs.addInputPath(job, new Path("hdfs://grid131:9000/text2.txt"), KeyValueTextInputFormat.class, Map2.class);    FileOutputFormat.setOutputPath(job, new Path(args[1]));  job.setReducerClass(Reduce.class);    job.setOutputFormatClass(TextOutputFormat.class);    //当map的输出类型和reduce的输出类型不一致的时候,需要单独设置map输出类型  job.setMapOutputKeyClass(Text.class);  job.setMapOutputValueClass(MyGenericWritable.class);    job.setOutputKeyClass(Text.class);  job.setOutputValueClass(IntWritable.class);    job.waitForCompletion(true);    return job.isSuccessful()?0:1; }  public static void main(String[] args) throws Exception {  int exit = ToolRunner.run(new WordCount(), args);  System.exit(exit); } }class MyGenericWritable extends GenericWritable { public MyGenericWritable() {   }  public MyGenericWritable(LongWritable longWritable) {  super.set(longWritable); }  public MyGenericWritable(Text text) {  super.set(text); }  @Override protected Class[] getTypes() {  return new Class[]{LongWritable.class, Text.class}; } }

感谢你能够认真阅读完这篇文章,希望小编分享的"hadoop中如何实现GenericWritable"这篇文章对大家有帮助,同时也希望大家多多支持,关注行业资讯频道,更多相关知识等着你来学习!

0