千家信息网

如何用MapReduce求各个部门的总工资

发表于:2024-10-12 作者:千家信息网编辑
千家信息网最后更新 2024年10月12日,本篇内容介绍了"如何用MapReduce求各个部门的总工资"的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所
千家信息网最后更新 2024年10月12日如何用MapReduce求各个部门的总工资

本篇内容介绍了"如何用MapReduce求各个部门的总工资"的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!

数据

EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO

      7369 SMITH      CLERK           7902 17-12月-80            800                    20      7499 ALLEN      SALESMAN        7698 20-2月 -81           1600        300         30      7521 WARD       SALESMAN        7698 22-2月 -81           1250        500         30      7566 JONES      MANAGER         7839 02-4月 -81           2975                    20      7654 MARTIN     SALESMAN        7698 28-9月 -81           1250       1400         30      7698 BLAKE      MANAGER         7839 01-5月 -81           2850                    30      7782 CLARK      MANAGER         7839 09-6月 -81           2450                    10      7839 KING       PRESIDENT            17-11月-81           5000                    10      7844 TURNER     SALESMAN        7698 08-9月 -81           1500          0         30      7900 JAMES      CLERK           7698 03-12月-81            950                    30      7902 FORD       ANALYST         7566 03-12月-81           3000                    20      7934 MILLER     CLERK           7782 23-1月 -82           1300                    10

代码

package cn.kissoft.hadoop.week07;import java.io.IOException;import java.text.DateFormat;import java.text.SimpleDateFormat;import java.util.Date;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;import cn.kissoft.hadoop.util.HdfsUtil;/** * Homework-01:求各个部门的总工资 *  * @author wukong(jinsong.sun@139.com) */public class TotalSalaryByDeptMR extends Configured implements Tool {        public static class M extends Mapper {                @Override                public void map(LongWritable key, Text value, Context context)                                throws IOException, InterruptedException {                        String line = value.toString();                        String deptno = line.substring(79).trim();                        String sal = line.substring(57, 68).trim();                        int salary = Integer.valueOf(sal);                        context.write(new Text(deptno), new IntWritable(salary));                }        }        public static class R extends Reducer {                @Override                public void reduce(Text key, Iterable values,                                Context context) throws IOException, InterruptedException {                        int sum = 0;                        for (IntWritable value : values) {                                sum += value.get();                        }                        context.write(key, new IntWritable(sum));                }        }        @Override        public int run(String[] args) throws Exception {                Configuration conf = getConf();                Job job = new Job(conf, "Job-TotalSalaryByDeptMR");//              job.setJarByClass(this.getClass());                                job.setMapperClass(M.class);                job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(IntWritable.class);                        job.setReducerClass(R.class);                                job.setOutputFormatClass(TextOutputFormat.class);//              job.setOutputKeyClass(NullWritable.class); // 指定输出的KEY的格式                job.setOutputKeyClass(Text.class); // 指定输出的KEY的格式                job.setOutputValueClass(IntWritable.class); // 指定输出的VALUE的格式                                FileInputFormat.addInputPath(job, new Path(args[0])); // 输入路径                FileOutputFormat.setOutputPath(job, new Path(args[1])); // 输出路径                                return job.waitForCompletion(true) ? 0 : 1; //              job.waitForCompletion(true);//              return job.isSuccessful() ? 0 : 1;        }        /**         *          * @param args hdfs://bd11:9000/user/wukong/w07/emp.txt hdfs://bd11:9000/user/wukong/w07/out01/         * @throws Exception         */        public static void main(String[] args) throws Exception {                checkArgs(args);                HdfsUtil.rm(args[1], true);                                Date start = new Date();                int res = ToolRunner.run(new Configuration(), new TotalSalaryByDeptMR(), args);                printExcuteTime(start, new Date());                System.exit(res);        }        /**         * 判断参数个数是否正确,如果无参数运行则显示以作程序说明。         *          * @param args         */        private static void checkArgs(String[] args) {                if (args.length != 2) {                        System.err.println("");                        System.err.println("Usage: Test_1 < input path > < output path > ");                        System.err                                        .println("Example: hadoop jar ~/Test_1.jar hdfs://localhost:9000/home/james/Test_1 hdfs://localhost:9000/home/james/output");                        System.err.println("Counter:");                        System.err.println("\t" + "LINESKIP" + "\t"                                        + "Lines which are too short");                        System.exit(-1);                }        }        /**         * 打印程序运行时间         *          * @param start         * @param end         */        private static void printExcuteTime(Date start, Date end) {                DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");                float time = (float) ((end.getTime() - start.getTime()) / 60000.0);                System.out.println("任务开始:" + formatter.format(start));                System.out.println("任务结束:" + formatter.format(end));                System.out.println("任务耗时:" + String.valueOf(time) + " 分钟");        }}

运行结果

10     875020      677530      9400

控制台

14/08/31 23:01:01 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable14/08/31 23:01:01 WARN mapred.JobClient: No job jar file set.  User classes may not be found. See JobConf(Class) or JobConf#setJar(String).14/08/31 23:01:01 INFO input.FileInputFormat: Total input paths to process : 114/08/31 23:01:02 WARN snappy.LoadSnappy: Snappy native library not loaded14/08/31 23:01:02 INFO mapred.JobClient: Running job: job_local248108448_000114/08/31 23:01:02 INFO mapred.LocalJobRunner: Waiting for map tasks14/08/31 23:01:02 INFO mapred.LocalJobRunner: Starting task: attempt_local248108448_0001_m_000000_014/08/31 23:01:02 INFO mapred.Task:  Using ResourceCalculatorPlugin : null14/08/31 23:01:02 INFO mapred.MapTask: Processing split: hdfs://bd11:9000/user/wukong/w07/emp.txt:0+111914/08/31 23:01:02 INFO mapred.MapTask: io.sort.mb = 10014/08/31 23:01:02 INFO mapred.MapTask: data buffer = 79691776/9961472014/08/31 23:01:02 INFO mapred.MapTask: record buffer = 262144/32768014/08/31 23:01:02 INFO mapred.MapTask: Starting flush of map output14/08/31 23:01:02 INFO mapred.MapTask: Finished spill 014/08/31 23:01:02 INFO mapred.Task: Task:attempt_local248108448_0001_m_000000_0 is done. And is in the process of commiting14/08/31 23:01:02 INFO mapred.LocalJobRunner: 14/08/31 23:01:02 INFO mapred.Task: Task 'attempt_local248108448_0001_m_000000_0' done.14/08/31 23:01:02 INFO mapred.LocalJobRunner: Finishing task: attempt_local248108448_0001_m_000000_014/08/31 23:01:02 INFO mapred.LocalJobRunner: Map task executor complete.14/08/31 23:01:02 INFO mapred.Task:  Using ResourceCalculatorPlugin : null14/08/31 23:01:02 INFO mapred.LocalJobRunner: 14/08/31 23:01:02 INFO mapred.Merger: Merging 1 sorted segments14/08/31 23:01:02 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 110 bytes14/08/31 23:01:02 INFO mapred.LocalJobRunner: 14/08/31 23:01:02 INFO mapred.Task: Task:attempt_local248108448_0001_r_000000_0 is done. And is in the process of commiting14/08/31 23:01:02 INFO mapred.LocalJobRunner: 14/08/31 23:01:02 INFO mapred.Task: Task attempt_local248108448_0001_r_000000_0 is allowed to commit now14/08/31 23:01:02 INFO output.FileOutputCommitter: Saved output of task 'attempt_local248108448_0001_r_000000_0' to hdfs://bd11:9000/user/wukong/w07/out0114/08/31 23:01:02 INFO mapred.LocalJobRunner: reduce > reduce14/08/31 23:01:02 INFO mapred.Task: Task 'attempt_local248108448_0001_r_000000_0' done.14/08/31 23:01:03 INFO mapred.JobClient:  map 100% reduce 100/08/31 23:01:03 INFO mapred.JobClient: Job complete: job_local248108448_000114/08/31 23:01:03 INFO mapred.JobClient: Counters: 1914/08/31 23:01:03 INFO mapred.JobClient:   File Output Format Counters 14/08/31 23:01:03 INFO mapred.JobClient:     Bytes Written=2414/08/31 23:01:03 INFO mapred.JobClient:   File Input Format Counters 14/08/31 23:01:03 INFO mapred.JobClient:     Bytes Read=111914/08/31 23:01:03 INFO mapred.JobClient:   FileSystemCounters14/08/31 23:01:03 INFO mapred.JobClient:     FILE_BYTES_READ=42614/08/31 23:01:03 INFO mapred.JobClient:     HDFS_BYTES_READ=223814/08/31 23:01:03 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=13857814/08/31 23:01:03 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=2414/08/31 23:01:03 INFO mapred.JobClient:   Map-Reduce Framework14/08/31 23:01:03 INFO mapred.JobClient:     Reduce input groups=314/08/31 23:01:03 INFO mapred.JobClient:     Map output materialized bytes=11414/08/31 23:01:03 INFO mapred.JobClient:     Combine output records=014/08/31 23:01:03 INFO mapred.JobClient:     Map input records=1214/08/31 23:01:03 INFO mapred.JobClient:     Reduce shuffle bytes=014/08/31 23:01:03 INFO mapred.JobClient:     Reduce output records=314/08/31 23:01:03 INFO mapred.JobClient:     Spilled Records=2414/08/31 23:01:03 INFO mapred.JobClient:     Map output bytes=8414/08/31 23:01:03 INFO mapred.JobClient:     Total committed heap usage (bytes)=32610713614/08/31 23:01:03 INFO mapred.JobClient:     SPLIT_RAW_BYTES=10514/08/31 23:01:03 INFO mapred.JobClient:     Map output records=1214/08/31 23:01:03 INFO mapred.JobClient:     Combine input records=014/08/31 23:01:03 INFO mapred.JobClient:     Reduce input records=12任务开始:2014-08-31 23:01:01任务结束:2014-08-31 23:01:03任务耗时:0.024416666 分钟

"如何用MapReduce求各个部门的总工资"的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注网站,小编将为大家输出更多高质量的实用文章!

0