1 package LogMR; 2 3 import java.io.DataInput; 4 import java.io.DataOutput; 5 import java.io.IOException; 6 7 import org.apache.hadoop.io.Writable; 8 9 public class FlowBean implements Writable { 10 11 private String TOS;//服务类型(来自哪个应用程序的请求) 12 private long up_flow;//上行流量 13 private long d_flow;//下行流量 14 private long s_flow;//流量和 15 private long count; 16 17 public String getTOS() { 18 return TOS; 19 } 20 21 public void setTOS(String tOS) { 22 TOS = tOS; 23 } 24 25 public long getUp_flow() { 26 return up_flow; 27 } 28 29 public void setUp_flow(long up_flow) { 30 this.up_flow = up_flow; 31 } 32 33 public long getD_folw() { 34 return d_flow; 35 } 36 37 public void setD_folw(long d_folw) { 38 this.d_flow = d_folw; 39 } 40 41 public long getS_flow() { 42 return s_flow; 43 } 44 45 public void setS_flow(long s_flow) { 46 this.s_flow = s_flow; 47 } 48 public long getCount() { 49 return count; 50 } 51 52 public void setCount(long count) { 53 this.count = count; 54 } 55 56 //在反序列化时,反射机制需要调用构造函数,所以显示定义一个空参构造函数 57 public FlowBean(){} 58 //为了对象数据的初始化方便,加入一个带参的构造函数 59 public FlowBean(String tOS, long up_flow, long d_flow,long count) { 60 super(); 61 TOS = tOS; 62 this.up_flow = up_flow; 63 this.d_flow = d_flow; 64 this.s_flow = up_flow+d_flow; 65 this.count=count; 66 } 67 68 //将对象数据序列化到流 69 @Override 70 public void write(DataOutput out) throws IOException { 71 // TODO Auto-generated method stub 72 out.writeUTF(TOS); 73 out.writeLong(up_flow); 74 out.writeLong(d_flow); 75 out.writeLong(s_flow); 76 out.writeLong(count); 77 } 78 79 //从数据流中反序列化出对象的数据 80 //从数据流中读出对象字段时,必须跟序列化时的顺序保持一致 81 //会用到反射机制 82 @Override 83 public void readFields(DataInput in) throws IOException { 84 // TODO Auto-generated method stub 85 TOS = in.readUTF(); 86 up_flow = in.readLong(); 87 d_flow = in.readLong(); 88 s_flow = in.readLong(); 89 count=in.readLong(); 90 } 91 92 @Override 93 public String toString() { 94 return ""+"\t"+up_flow+"\t"+d_flow+"\t"+s_flow+"\t"+count; 95 } 96 }
package LogMR; import java.io.IOException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; /** * FlowBean 是我们自定义的一种数据类型,要在hadoop的各个节点之间传输,应该遵循hadoop的序列化机制 * 就必须实现hadoop响应的序列化接口 * * */ public class LogMapper extends Mapper<LongWritable, Text, Text, FlowBean>{ //拿到日志中的一行数据,切分各个字段,抽取出我们需要的字段:服务类型,上行流量,下行流量,然后封装成kv发送出去 @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub //拿一行数据 String line=new String(value.getBytes(),0,value.getLength(),"GBK");//GBK编码处理 //切分成各个字段 String[] fields=StringUtils.split(line,"\t"); //拿到我们需要的字段 String TOS=fields[1]; long up_flow=Long.parseLong(fields[8]); long d_flow=Long.parseLong(fields[9]); //封装数据为kv并输出 context.write(new Text(TOS), new FlowBean(TOS,up_flow,d_flow,1)); } /* public static Text transformTextToUTF8(Text text, String encoding) { String value = null; try { value = new String(text.getBytes(), 0, text.getLength(), encoding); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } return new Text(value); }*/ }
1 package LogMR; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.io.Text; 6 import org.apache.hadoop.mapreduce.Reducer; 7 8 public class LogReducer extends Reducer<Text, FlowBean, Text, FlowBean>{ 9 @Override 10 //框架每传递一组数据<服务类型(例如dns,微信聊天),{flowbean,flowbean,flowbean......}> 11 //调用一次Reduce方法 12 //reduce中的业务逻辑就是遍历values,然后进行累加求和 13 protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context) 14 throws IOException, InterruptedException { 15 // TODO Auto-generated method stub 16 long up_flow_counter=0; 17 long d_flow_counter=0; 18 long count=0; 19 20 for(FlowBean bean:values){ 21 up_flow_counter+=bean.getUp_flow(); 22 d_flow_counter+=bean.getD_folw(); 23 count+=bean.getCount(); 24 } 25 context.write(key, new FlowBean(key.toString(),up_flow_counter,d_flow_counter,count)); 26 } 27 28 }
1 package LogMR; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.conf.Configured; 5 import org.apache.hadoop.fs.Path; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Job; 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 10 import org.apache.hadoop.util.Tool; 11 import org.apache.hadoop.util.ToolRunner; 12 13 //这是job描述和提交类的规范写法 14 public class LogRunner extends Configured implements Tool { 15 16 @Override 17 public int run(String[] arg0) throws Exception { 18 // TODO Auto-generated method stub 19 20 //读取配置文件 21 Configuration conf = new Configuration(); 22 Job job=Job.getInstance(conf); 23 24 job.setJarByClass(LogRunner.class); 25 26 job.setMapperClass(LogMapper.class); 27 job.setReducerClass(LogReducer.class); 28 29 job.setMapOutputKeyClass(Text.class); 30 job.setMapOutputValueClass(FlowBean.class); 31 32 job.setOutputKeyClass(Text.class); 33 job.setOutputValueClass(FlowBean.class); 34 35 FileInputFormat.setInputPaths(job,new Path(arg0[0])); 36 FileOutputFormat.setOutputPath(job, new Path(arg0[1])); 37 38 return job.waitForCompletion(true)?0:1; 39 } 40 41 public static void main(String[] args) throws Exception { 42 int res=ToolRunner.run(new Configuration(),new LogRunner(),args); 43 System.exit(res); 44 } 45 }
我本将心向明月,奈何明月照沟渠,落花有意随流水,流水无心恋落花。