千家信息网

MapReduce编写wordcount程序代码实现

发表于:2025-01-23 作者:千家信息网编辑
千家信息网最后更新 2025年01月23日,MapReduce经典案例代码(wordcount)以经典的wordcount为例,通过自定义的mapper和reducer来实现单词计数package com.fwmagic.mapreduce;i
千家信息网最后更新 2025年01月23日MapReduce编写wordcount程序代码实现

MapReduce经典案例代码(wordcount)

以经典的wordcount为例,通过自定义的mapper和reducer来实现单词计数

package com.fwmagic.mapreduce;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;/** * MapReduce单词统计 */public class WordCountDemo {    /**     * 自定义Mapper继承:org.apache.hadoop.mapreduce.Mapper,实现map方法     */    public static class WordCountMapper extends Mapper {        @Override        protected void map(LongWritable key, Text value,                           Mapper.Context context)                throws IOException, InterruptedException {            String[] words = value.toString().split(" ");            for (String word : words) {                context.write(new Text(word), new IntWritable(1));            }        }    }    /**     * 自定义Reducer继承:org.apache.hadoop.mapreduce.Reducer,实现reduce方法     */    public static class WordCountReducer extends Reducer {        @Override        protected void reduce(Text key, Iterable values,                              Reducer.Context context)                throws IOException, InterruptedException {            int count = 0;            for (IntWritable writable : values) {                count += writable.get();            }            context.write(key, new IntWritable(count));        }    }    /**     * job启动类,设置参数并集群中提交job     * @param args     * @throws Exception     */    public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        Job job = Job.getInstance(conf);        job.setJarByClass(WordCountDemo.class);        job.setMapperClass(WordCountMapper.class);        job.setReducerClass(WordCountReducer.class);        job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(IntWritable.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);        FileInputFormat.setInputPaths(job, new Path("/wordcount/input"));        FileOutputFormat.setOutputPath(job, new Path("/wordcount/output"));        boolean b = job.waitForCompletion(true);        System.exit(b ? 0 : 1);    }}

集群中/wordcount/input目录下数据内容

打包项目,执行job

hadoop jar fwmagic-wordcount.jar 

执行输出结果

0