MR框架-->Word4

用户流量排序

实现编码：

FlowBean类：把上行流量和下行流量以及总流量封装到一个bean中进行描述，注意要实现hadoop的序列化接口Writable

package com.hp.mr;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class FlowBean implements Writable {
    //私有属性
    private int upFlow;
    private int downFlow;
    private int sumFlow;
    //无参构造方法
    public FlowBean() {
        
    }
    //有参构造
    public FlowBean(int upFlow,int downFlow) {
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = upFlow + downFlow;
    }
    //get set方法
    public int getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(int upFlow) {
        this.upFlow = upFlow;
    }

    public int getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(int downFlow) {
        this.downFlow = downFlow;
    }

    public int getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(int sumFlow) {
        this.sumFlow = sumFlow;
    }
    //序列化
    @Override
    public void readFields(DataInput in) throws IOException {
        upFlow = in.readInt();
        downFlow = in.readInt();
    }
    //反序列化
    @Override
    public void write(DataOutput out) throws IOException {
        out.write(upFlow);
        out.write(downFlow);
        
    }
    @Override
    public String toString() {
        return upFlow + "\t" +  downFlow + "\t" + sumFlow ;
    }

}

需要注意的是：用户流量的上行和下行流量总和的思路跟之前的都一样，而FlowBean的使用方法，把多个需要统计的信息用Bean对象来封装，注意Mapper和Reducer的KEY和VALUE的数据类型

Mapper类：

package com.hp.mr;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context)
            throws IOException, InterruptedException {
        //获取一行数据
        String line = value.toString();
        //指定规则截取
        String[] words = line.split("\t");
        //获取有效数据
        String phone = words[1];
        String upFlow = words[2];
        String downFlow = words[3];
        int up = Integer.parseInt(upFlow);
        int down =  Integer.parseInt(downFlow);
        FlowBean fs = new FlowBean(up, down);
        //写入上下文
        context.write(new Text(phone),fs);
    }
}

Reducer类：

package com.hp.mr;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FlowReducer extends Reducer<Text, FlowBean, Text, FlowBean> {
    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context)
            throws IOException, InterruptedException {
        //定义空变量
        int up = 0;
        int down = 0;
        //获取上行下行流量
        FlowBean fs = new FlowBean();
        //上下流量
        /*int upFlow = fs.getUpFlow();
        int downFlow = fs.getDownFlow();*/
        for (FlowBean flowBean : values) {
            down += flowBean.getUpFlow();
            up += flowBean.getDownFlow();
        }
        FlowBean fl = new FlowBean(up, down);
        //写入上下文
        context.write(key, fl);
    }
}

Submitter类：

package com.hp.mr;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Submitter {
    public static void main(String[] args) throws Exception {
        //添加配置文件
        Configuration conf = new Configuration();
        //创建FileSystem对象
        FileSystem fs = FileSystem.get(conf);
        //判断输出路径是否存在
        if(fs.exists(new Path(args[1]))) {
            fs.delete(new Path(args[1]),true);
        }
        //创建Job对象
        Job job = Job.getInstance(conf);
        //设置提交主类
        job.setJarByClass(Submitter.class);
        //设置Mapper相关的参数
        job.setMapperClass(FlowMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);
        //设置Reducer类相关的参数
        job.setReducerClass(FlowReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);
        //设置输入路劲
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        //设置输出路径
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        //提交任务
        job.waitForCompletion(true);
    }
}

posted @ 2020-11-12 16:54 大可耐啊阅读(110) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

大可耐啊

欢迎你的点击，这些都是我在学校的学习笔记，勉强算是一个刚刚会走的底层程序员吧。我会不定时的更新

MR框架-->Word4

用户流量排序

实现编码：

公告