MapReduce_MaxValue

 1 import java.io.IOException;
 2 import java.net.URI;
 3 
 4 import org.apache.hadoop.conf.Configuration; 
 5 import org.apache.hadoop.conf.Configured;
 6 import org.apache.hadoop.fs.FileSystem;
 7 import org.apache.hadoop.fs.Path; 
 8 import org.apache.hadoop.io.LongWritable; 
 9 import org.apache.hadoop.io.IntWritable; 
10 import org.apache.hadoop.io.Text; 
11 import org.apache.hadoop.mapreduce.Job; 
12 import org.apache.hadoop.mapreduce.Mapper; 
13 import org.apache.hadoop.mapreduce.Reducer; 
14 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
15 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
16 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
17 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 
18 import org.apache.hadoop.util.Tool; 
19 import org.apache.hadoop.util.ToolRunner; 
20 
21 //求最大值 
22 public class MaxValue extends Configured implements Tool { 
23     private static final String INPUT_PATH = "hdfs://h201:9000/user/hadoop/input_maxvalue";
24     private static final String OUTPUT_PATH = "hdfs://h201:9000/user/hadoop/output";
25     public static class MapClass extends Mapper<LongWritable, Text, IntWritable, IntWritable> { 
26         private int maxNum = 0; 
27         public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 
28             String[] str = value.toString().split(" "); 
29             try {// 对于非数字字符我们忽略掉
30                 for(int i=0;i<str.length;i++){
31                     int temp = Integer.parseInt(str[i]); 
32                     if (temp > maxNum) { 
33                         maxNum = temp; 
34                     }
35                 }
36             } catch (NumberFormatException e) { 
37             } 
38         } 
39 
40         @Override 
41     protected void cleanup(Context context) throws IOException, InterruptedException { 
42             context.write(new IntWritable(maxNum), new IntWritable(maxNum)); 
43         } 
44     } 
45 
46     public static class Reduce extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> { 
47         private int maxNum = 0; 
48         public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 
49             for (IntWritable val : values) { 
50                 if ( val.get() > maxNum) { 
51                     maxNum = val.get(); 
52                 } 
53             } 
54         } 
55 
56         @Override 
57         protected void cleanup(Context context) throws IOException, InterruptedException { 
58             context.write(new IntWritable(maxNum), new IntWritable(maxNum)); 
59         } 
60     } 
61 
62     public int run(String[] args) throws Exception { 
63         Configuration conf = getConf(); 
64         conf.set("mapred.jar","mv.jar");
65         final FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf);//读路径信息
66         fileSystem.delete(new Path(OUTPUT_PATH), true);//删除路径信息 输出路径不能存在
67         Job job = new Job(conf, "MaxNum"); 
68         job.setJarByClass(MaxValue.class); 
69         FileInputFormat.setInputPaths(job, INPUT_PATH); 
70         FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH)); 
71         job.setMapperClass(MapClass.class); 
72         job.setCombinerClass(Reduce.class); 
73         job.setReducerClass(Reduce.class); 
74         job.setInputFormatClass(TextInputFormat.class); 
75         job.setOutputFormatClass(TextOutputFormat.class); 
76         job.setOutputKeyClass(IntWritable.class); 
77         job.setOutputValueClass(IntWritable.class); 
78         System.exit(job.waitForCompletion(true) ? 0 : 1); 
79         return 0; 
80     } 
81 
82     public static void main(String[] args) throws Exception { 
83         long start = System.nanoTime(); 
84         int res = ToolRunner.run(new Configuration(), new MaxValue(), args); 
85         System.out.println(System.nanoTime()-start); 
86         System.exit(res); 
87     } 
88 }

*************
setup(),此方法被MapReduce框架仅且执行一次,在执行Map任务前,进行相关变量或者资源的集中初始化工作。若是将资源初始化工作放在方法map()中,导致Mapper任务在解析每一行输入时都会进行资源初始化工作,导致重复,程序运行效率不高!

cleanup(),此方法被MapReduce框架仅且执行一次,在执行完毕Map任务后,进行相关变量或资源的释放工作。若是将释放资源工作放入方法map()中,也会导致Mapper任务在解析、处理每一行文本后释放资源,而且在下一行文本解析前还要重复初始化,导致反复重复,程序运行效率不高!
*************

posted @ 2018-06-10 15:26  蜘蛛侠0  阅读(163)  评论(0编辑  收藏  举报