hadoop MapReduce java示例
wordcount工作流程
input-> 拆分Split->映射map->派发Shuffle->缩减reduce->output
hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount 10803060234.txt /output
package wordcount;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Test {
public Test() { // TODO Auto-generated constructor stub}public static void main(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://172.26.19.40:9000"); conf.set("mapreduce.job.jar", "target/wc.jar"); conf.set("mapreduce.framework.name", "yarn"); conf.set("yarn.resourcemanager.hostname", "hmaster"); conf.set("mapreduce.app-submission.cross-platform", "true"); Job job = Job.getInstance(conf); job.setMapperClass(WordMapper.class); job.setReducerClass(WordReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, ""); FileOutputFormat.setOutputPath(job, new Path("")); job.waitForCompletion(true);}
}
package wordcount;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WordMapper extends Mapper
@Overrideprotected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { String lineValue = value.toString(); String[] words = lineValue.split(" "); IntWritable cIntWritable = new IntWritable(1); for(String word : words) { context.write(new Text(word), cIntWritable); }}
}
package wordcount;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WordReducer extends Reducer
@Overrideprotected void reduce(Text key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { Long tmpCount = 0L; for(IntWritable value : values) { tmpCount = tmpCount + value.get(); } context.write(key, new LongWritable(tmpCount));}
}
UTF-8 2.7.3 junit junit 4.12 org.apache.hadoop hadoop-client ${hadoop.version} org.apache.hadoop hadoop-common ${hadoop.version} org.apache.hadoop hadoop-hdfs ${hadoop.version}