实例数据MR统计FLOW

 1 package LogMR;
 2 
 3 import java.io.DataInput;
 4 import java.io.DataOutput;
 5 import java.io.IOException;
 6 
 7 import org.apache.hadoop.io.Writable;
 8 
 9 public class FlowBean implements Writable {
10     
11     private String TOS;//服务类型（来自哪个应用程序的请求）
12     private long up_flow;//上行流量
13     private long d_flow;//下行流量
14     private long s_flow;//流量和
15     private long count;
16     
17     public String getTOS() {
18         return TOS;
19     }
20 
21     public void setTOS(String tOS) {
22         TOS = tOS;
23     }
24 
25     public long getUp_flow() {
26         return up_flow;
27     }
28 
29     public void setUp_flow(long up_flow) {
30         this.up_flow = up_flow;
31     }
32 
33     public long getD_folw() {
34         return d_flow;
35     }
36 
37     public void setD_folw(long d_folw) {
38         this.d_flow = d_folw;
39     }
40 
41     public long getS_flow() {
42         return s_flow;
43     }
44 
45     public void setS_flow(long s_flow) {
46         this.s_flow = s_flow;
47     }
48     public long getCount() {
49         return count;
50     }
51 
52     public void setCount(long count) {
53         this.count = count;
54     }
55     
56     //在反序列化时，反射机制需要调用构造函数，所以显示定义一个空参构造函数
57     public FlowBean(){}
58     //为了对象数据的初始化方便，加入一个带参的构造函数
59     public FlowBean(String tOS, long up_flow, long d_flow,long count) {
60         super();
61         TOS = tOS;
62         this.up_flow = up_flow;
63         this.d_flow = d_flow;
64         this.s_flow =  up_flow+d_flow;
65         this.count=count;
66     }
67     
68     //将对象数据序列化到流
69     @Override
70     public void write(DataOutput out) throws IOException {
71         // TODO Auto-generated method stub
72         out.writeUTF(TOS);
73         out.writeLong(up_flow);
74         out.writeLong(d_flow);
75         out.writeLong(s_flow);    
76         out.writeLong(count);
77     }
78     
79     //从数据流中反序列化出对象的数据
80     //从数据流中读出对象字段时，必须跟序列化时的顺序保持一致
81     //会用到反射机制
82     @Override
83     public void readFields(DataInput in) throws IOException {
84         // TODO Auto-generated method stub
85         TOS = in.readUTF();
86         up_flow = in.readLong();
87         d_flow = in.readLong();
88         s_flow = in.readLong();
89         count=in.readLong();
90     }
91     
92     @Override
93     public String toString() {
94         return ""+"\t"+up_flow+"\t"+d_flow+"\t"+s_flow+"\t"+count;
95     }
96 }

package LogMR;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * FlowBean 是我们自定义的一种数据类型，要在hadoop的各个节点之间传输，应该遵循hadoop的序列化机制
 * 就必须实现hadoop响应的序列化接口
 * 
 *
 */

public class LogMapper extends Mapper<LongWritable, Text, Text, FlowBean>{
    
    //拿到日志中的一行数据，切分各个字段，抽取出我们需要的字段：服务类型，上行流量，下行流量，然后封装成kv发送出去
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context)
            throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        //拿一行数据
        String line=new String(value.getBytes(),0,value.getLength(),"GBK");//GBK编码处理
        //切分成各个字段
        String[] fields=StringUtils.split(line,"\t");
        //拿到我们需要的字段
        String TOS=fields[1];
        long up_flow=Long.parseLong(fields[8]);
        long d_flow=Long.parseLong(fields[9]);
        
        //封装数据为kv并输出
        context.write(new Text(TOS), new FlowBean(TOS,up_flow,d_flow,1));
    }
    
/*    public static Text transformTextToUTF8(Text text, String encoding) {
        String value = null;
        try {
        value = new String(text.getBytes(), 0, text.getLength(), encoding);
        } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
        }
        return new Text(value);
        }*/
}

 1 package LogMR;
 2 
 3 import java.io.IOException;
 4 
 5 import org.apache.hadoop.io.Text;
 6 import org.apache.hadoop.mapreduce.Reducer;
 7 
 8 public class LogReducer extends  Reducer<Text, FlowBean, Text, FlowBean>{
 9     @Override
10     //框架每传递一组数据<服务类型（例如dns，微信聊天），｛flowbean，flowbean，flowbean......｝>
11     //调用一次Reduce方法
12     //reduce中的业务逻辑就是遍历values，然后进行累加求和
13     protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context)
14             throws IOException, InterruptedException {
15         // TODO Auto-generated method stub
16         long up_flow_counter=0;
17         long d_flow_counter=0;
18         long count=0;
19         
20         for(FlowBean bean:values){
21             up_flow_counter+=bean.getUp_flow();
22             d_flow_counter+=bean.getD_folw();
23             count+=bean.getCount();
24         }
25         context.write(key, new FlowBean(key.toString(),up_flow_counter,d_flow_counter,count));
26     }
27 
28 }

 1 package LogMR;
 2 
 3 import org.apache.hadoop.conf.Configuration;
 4 import org.apache.hadoop.conf.Configured;
 5 import org.apache.hadoop.fs.Path;
 6 import org.apache.hadoop.io.Text;
 7 import org.apache.hadoop.mapreduce.Job;
 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
10 import org.apache.hadoop.util.Tool;
11 import org.apache.hadoop.util.ToolRunner;
12 
13 //这是job描述和提交类的规范写法
14 public class LogRunner extends Configured implements Tool {
15 
16     @Override
17     public int run(String[] arg0) throws Exception {
18         // TODO Auto-generated method stub
19         
20         //读取配置文件
21         Configuration conf = new Configuration();
22         Job job=Job.getInstance(conf);
23         
24         job.setJarByClass(LogRunner.class);
25         
26         job.setMapperClass(LogMapper.class);
27         job.setReducerClass(LogReducer.class);
28         
29         job.setMapOutputKeyClass(Text.class);
30         job.setMapOutputValueClass(FlowBean.class);
31         
32         job.setOutputKeyClass(Text.class);
33         job.setOutputValueClass(FlowBean.class);
34         
35         FileInputFormat.setInputPaths(job,new Path(arg0[0]));
36         FileOutputFormat.setOutputPath(job, new Path(arg0[1]));
37         
38         return job.waitForCompletion(true)?0:1;
39     }
40     
41     public static void main(String[] args) throws Exception {
42         int res=ToolRunner.run(new Configuration(),new LogRunner(),args);
43         System.exit(res);
44     }
45 }

posted on 2016-09-26 16:07 wEndu 阅读(543) 评论(0) 编辑收藏举报