1 package cn.edu.ruc.cloudcomputing.book.chapter04;
 2 
 3 import java.io.IOException;
 4 import java.util.*;
 5 
 6 import org.apache.hadoop.fs.Path;
 7 import org.apache.hadoop.conf.*;
 8 import org.apache.hadoop.io.*;
 9 import org.apache.hadoop.mapreduce.*;
10 import org.apache.hadoop.mapreduce.lib.input.*;
11 import org.apache.hadoop.mapreduce.lib.output.*;
12 import org.apache.hadoop.util.*;
13 
14 public class WordCount extends Configured implements Tool {
15     public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
16         public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
17             String line = value.toString(); //toString将输入的纯文本文件的数据转化成String
18             System.out.println(line);//为了便于程序的调试,输出读入的内容   
19                 //将输入的数据先按行进行分割
20             StringTokenizer tokenizerArticle = new StringTokenizer(line,"\n");         
21              //分别对每一行进行处理
22             while(tokenizerArticle.hasMoreTokens()){
23                    //每行按空格划分
24                 StringTokenizer tokenizerLine = new StringTokenizer(tokenizerArticle.nextToken()); 
25                 String strName = tokenizerLine.nextToken(); //学生姓名部分  
26                 String strScore = tokenizerLine.nextToken();//成绩部分
27                 Text name = new Text(strName);//学生姓名  
28                 int scoreInt = Integer.parseInt(strScore);//学生成绩score of student
29                 context.write(name, new IntWritable(scoreInt));//输出姓名和成绩
30             }
31         }
32     }
33     
34     public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
35         public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
36             int sum = 0;
37             int count=0;
38             Iterator<IntWritable> iterator = values.iterator();
39             while (iterator.hasNext()) {
40                 sum += iterator.next().get();    //计算总分
41                 count++;//统计总的科目数
42             } 
43             int average = (int) sum/count;//计算平均成绩
44             context.write(key, new IntWritable(average));
45         }
46     } 
47     public int run(String [] args) throws Exception {
48          Job job = new Job(getConf());
49          job.setJarByClass(Score_Process.class);
50          job.setJobName("Score_Process");
51          job.setOutputKeyClass(Text.class);
52          job.setOutputValueClass(IntWritable.class);
53          job.setMapperClass(Map.class);
54          job.setCombinerClass(Reduce.class);
55          job.setReducerClass(Reduce.class);
56          job.setInputFormatClass(TextInputFormat.class);
57          job.setOutputFormatClass(TextOutputFormat.class);
58     
59          FileInputFormat.setInputPaths(job, new Path(args[0]));
60          FileOutputFormat.setOutputPath(job, new Path(args[1]));
61          boolean success = job.waitForCompletion(true);
62          return success ? 0 : 1;
63     }
64     public static void main(String[] args) throws Exception {
65          int ret = ToolRunner.run(new Score_Process(), args);
66          System.exit(ret);
67     }
68 }