hadoop mapreduce 模板

 1 package seven.ili.patent;
 2 
 3 import java.io.IOException;
 4 
 5 import org.apache.hadoop.conf.Configuration;
 6 import org.apache.hadoop.conf.Configured;
 7 import org.apache.hadoop.fs.Path;
 8 import org.apache.hadoop.io.IntWritable;
 9 import org.apache.hadoop.io.LongWritable;
10 import org.apache.hadoop.io.Text;
11 import org.apache.hadoop.mapreduce.Job;
12 import org.apache.hadoop.mapreduce.Mapper;
13 import org.apache.hadoop.mapreduce.Partitioner;
14 import org.apache.hadoop.mapreduce.Reducer;
15 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
16 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
17 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
18 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
19 import org.apache.hadoop.util.Tool;
20 import org.apache.hadoop.util.ToolRunner;
21 
22 public class AgeStatistics extends Configured implements Tool {  
23     public static class MapClass extends Mapper<LongWritable, Text, Text, Text> {    
24         public void map(LongWritable key, Text value, Context context)  
25             throws IOException, InterruptedException {  
26             
27             String[] str = value.toString().split("\t", -2);  
28             String gender = str[2];
29             context.write(new Text(gender), new Text(value));
30         }  
31     }  
32 
33     public static class Reduce extends Reducer<Text, Text, Text, IntWritable> {  
34         public int max = -1; 
35         public void reduce(Text key, Iterable<Text> values, Context context)  
36                 throws IOException, InterruptedException {  
37             max = -1;
38             for (Text val : values) {  
39                 String[] str = val.toString().split("\t", -2);
40                 if (Integer.parseInt(str[3]) > max)
41                     max = Integer.parseInt(str[3]);
42             }
43             context.write(new Text(key), new IntWritable(max));
44         }   
45     }
46     
47     public static class AgePartitioner extends Partitioner<Text, Text>{
48         @Override
49         public int getPartition(Text key, Text value, int numReduceTasks) {
50             String[] str = value.toString().split("\t");
51             int age = Integer.parseInt(str[1]);
52             if (numReduceTasks == 0){
53                 return 0;
54             }
55             if (age <= 20)
56                 return 0;
57             else if (age > 20 && age <= 50)
58                 return 1 % numReduceTasks;
59             else
60                 return 2 % numReduceTasks;
61         }
62     }
63     
64 
65     public int run(String[] args) throws Exception {  
66         Configuration conf = getConf();  
67         Job job = new Job(conf, "TopKNum");  
68         job.setJarByClass(AgeStatistics.class);  
69         FileInputFormat.setInputPaths(job, new Path(args[0]));  
70         FileOutputFormat.setOutputPath(job, new Path(args[1]));  
71         job.setMapperClass(MapClass.class);  
72         job.setMapOutputKeyClass(Text.class);
73         job.setMapOutputValueClass(Text.class);
74         //job.setCombinerClass(Reduce.class); 
75         job.setPartitionerClass(AgePartitioner.class);
76         job.setReducerClass(Reduce.class);  
77         job.setNumReduceTasks(3);
78         job.setInputFormatClass(TextInputFormat.class);  
79         job.setOutputFormatClass(TextOutputFormat.class);  
80         job.setOutputKeyClass(Text.class);  
81         job.setOutputValueClass(IntWritable.class); 
82         System.exit(job.waitForCompletion(true) ? 0 : 1);  
83         return 0;  
84     }  
85     public static void main(String[] args) throws Exception {  
86         int res = ToolRunner.run(new Configuration(), new AgeStatistics(), args);  
87         System.exit(res);  
88     }  
89 
90 }
posted on 2012-12-06 18:10 brainworm 阅读(286) 评论(0) 收藏举报
刷新页面返回顶部
Brainworm

hadoop mapreduce 模板

公告

导航