Partitioner

使用自定义partitioner来处理手机上网日志信息

为什么要使用分区?

  1.根据业务需要,产生多个输出文件
  2.多个reduce任务在运行,提高整体job的运行效率

  1 package partitioner;
  2 
  3 import java.io.DataInput;
  4 import java.io.DataOutput;
  5 import java.io.IOException;
  6 
  7 import org.apache.hadoop.conf.Configuration;
  8 import org.apache.hadoop.fs.Path;
  9 import org.apache.hadoop.io.LongWritable;
 10 import org.apache.hadoop.io.Text;
 11 import org.apache.hadoop.io.Writable;
 12 import org.apache.hadoop.mapreduce.Job;
 13 import org.apache.hadoop.mapreduce.Mapper;
 14 import org.apache.hadoop.mapreduce.Partitioner;
 15 import org.apache.hadoop.mapreduce.Reducer;
 16 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 17 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 18 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 19 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 20 import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
 21 /**
 22  * 分区必须打包jar运行
 23  *
 24  */
 25 public class KpiApp {
 26     static final String INPUT_PATH = "hdfs://chaoren:9000/wlan";//wlan是个文件夹,日志文件放在/wlan目录下
 27     static final String OUT_PATH = "hdfs://chaoren:9000/out";
 28 
 29     public static void main(String[] args) throws Exception {
 30         final Job job = new Job(new Configuration(),
 31                 KpiApp.class.getSimpleName());
 32         
 33         //打包运行
 34         job.setJarByClass(KpiApp.class);
 35         
 36         // 1.1 指定输入文件路径
 37         FileInputFormat.setInputPaths(job, INPUT_PATH);
 38         // 指定哪个类用来格式化输入文件
 39         job.setInputFormatClass(TextInputFormat.class);
 40 
 41         // 1.2指定自定义的Mapper类
 42         job.setMapperClass(MyMapper.class);
 43         // 指定输出<k2,v2>的类型
 44         job.setMapOutputKeyClass(Text.class);
 45         job.setMapOutputValueClass(KpiWritable.class);
 46 
 47         // 1.3 指定分区类
 48         job.setPartitionerClass(KpiPartitioner.class);
 49         job.setNumReduceTasks(2);//分成两个区
 50 
 51         // 1.4 TODO 排序、分区
 52 
 53         // 1.5 TODO (可选)归约
 54 
 55         // 2.2 指定自定义的reduce类
 56         job.setReducerClass(MyReducer.class);
 57         // 指定输出<k3,v3>的类型
 58         job.setOutputKeyClass(Text.class);
 59         job.setOutputValueClass(KpiWritable.class);
 60 
 61         // 2.3 指定输出到哪里
 62         FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
 63         // 设定输出文件的格式化类
 64         job.setOutputFormatClass(TextOutputFormat.class);
 65 
 66         // 把代码提交给JobTracker执行
 67         job.waitForCompletion(true);
 68     }
 69 
 70     static class KpiPartitioner extends HashPartitioner<Text, KpiWritable>{
 71 
 72         @Override
 73         public int getPartition(Text key, KpiWritable value, int numReduceTasks) {
 74             return (key.toString().length() == 11) ? 0 : 1;//0代表的是手机号码  1代表非手机号码
 75         }
 76         
 77     }
 78     
 79     static class MyMapper extends Mapper<LongWritable, Text, Text, KpiWritable> {
 80         protected void map(
 81                 LongWritable key,
 82                 Text value,
 83                 org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, KpiWritable>.Context context)
 84                 throws IOException, InterruptedException {
 85             final String[] splited = value.toString().split("\t");
 86             final String msisdn = splited[1];
 87             final Text k2 = new Text(msisdn);
 88             final KpiWritable v2 = new KpiWritable(splited[6], splited[7],
 89                     splited[8], splited[9]);
 90             context.write(k2, v2);
 91         };
 92     }
 93 
 94     static class MyReducer extends
 95             Reducer<Text, KpiWritable, Text, KpiWritable> {
 96         /**
 97          * @param k2
 98          *            表示整个文件中不同的手机号码
 99          * @param v2s
100          *            表示该手机号在不同时段的流量的集合
101          */
102         protected void reduce(
103                 Text k2,
104                 java.lang.Iterable<KpiWritable> v2s,
105                 org.apache.hadoop.mapreduce.Reducer<Text, KpiWritable, Text, KpiWritable>.Context context)
106                 throws IOException, InterruptedException {
107             long upPackNum = 0L;
108             long downPackNum = 0L;
109             long upPayLoad = 0L;
110             long downPayLoad = 0L;
111 
112             for (KpiWritable kpiWritable : v2s) {
113                 upPackNum += kpiWritable.upPackNum;
114                 downPackNum += kpiWritable.downPackNum;
115                 upPayLoad += kpiWritable.upPayLoad;
116                 downPayLoad += kpiWritable.downPayLoad;
117             }
118 
119             final KpiWritable v3 = new KpiWritable(upPackNum + "", downPackNum
120                     + "", upPayLoad + "", downPayLoad + "");
121             context.write(k2, v3);
122         };
123     }
124 }
125 
126 class KpiWritable implements Writable {
127     long upPackNum;
128     long downPackNum;
129     long upPayLoad;
130     long downPayLoad;
131 
132     public KpiWritable() {
133     }
134 
135     public KpiWritable(String upPackNum, String downPackNum, String upPayLoad,
136             String downPayLoad) {
137         this.upPackNum = Long.parseLong(upPackNum);
138         this.downPackNum = Long.parseLong(downPackNum);
139         this.upPayLoad = Long.parseLong(upPayLoad);
140         this.downPayLoad = Long.parseLong(downPayLoad);
141     }
142 
143     public void readFields(DataInput in) throws IOException {
144         this.upPackNum = in.readLong();
145         this.downPackNum = in.readLong();
146         this.upPayLoad = in.readLong();
147         this.downPayLoad = in.readLong();
148     }
149 
150     public void write(DataOutput out) throws IOException {
151         out.writeLong(upPackNum);
152         out.writeLong(downPackNum);
153         out.writeLong(upPayLoad);
154         out.writeLong(downPayLoad);
155     }
156 
157     @Override
158     public String toString() {
159         return upPackNum + "\t" + downPackNum + "\t" + upPayLoad + "\t"
160                 + downPayLoad;
161     }
162 }

 将上面代码打包导出,复制到Linux中,然后在命令行下执行并查看结果,也可以在chaoren:50030中查看到作业的相关情况

 

 

posted @ 2017-04-01 23:24  ahu-lichang  阅读(8559)  评论(0编辑  收藏  举报