mapreduce案例一

作业1：
电信小项目：
业务数据：电信手机用户行为日志
数据分隔符：\t
字段：用户手机号码,时间戳,城市id,城市区域id,所在区域停留时间,开始进入区域的时间,离开区域的时间,日期
D55433A437AEC8D8D3DB2BCA56E9E64392A9D93C 117210031795040 83401 8340104 301 20180503190539 20180503233517 20180503
D55433A437AEC8D8D3DB2BCA56E9E64392A9D93C 117205031830040 83401 8340104 510 20180503085547 20180503172154 20180503
D55433A437AEC8D8D3DB2BCA56E9E64392A9D93C 117210031800040 83401 8340104 37 20180503180350 20180503180350 20180503
D55433A437AEC8D8D3DB2BCA56E9E64392A9D93C 117210031820040 83401 8340104 10 20180503173254 20180503173254 20180503
47BE1E866CFC071DB19D5E1C056BE28AE24C16E7 117135031850040 83401 8340104 11 20180503224834 20180503224834 20180503
47BE1E866CFC071DB19D5E1C056BE28AE24C16E7 119560032075040 83211 8321112 0 20180503204816 20180503204816 20180503
47BE1E866CFC071DB19D5E1C056BE28AE24C16E7 119560032075040 83211 8321112 1 20180503104337 20180503104337 20180503
47BE1E866CFC071DB19D5E1C056BE28AE24C16E7 119805031860040 83204 8320412 1 20180503203340 20180503203400 20180503
47BE1E866CFC071DB19D5E1C056BE28AE24C16E7 118850031995040 83201 8320104 0 20180503100209 20180503100209 20180503
数据进入hdfs的方式：put的方式。
需求：1 求每个用户平均停留时长。
2，将城市id和城市名称进行关联。

代码需求一：

package com.shujia.dianxin;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class DxDemo {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJobName("用户平均停留时长");

        job.setJarByClass(DxDemo.class);
        job.setMapperClass(DxMapper.class);
        job.setReducerClass(DxReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.waitForCompletion(true);

    }
}

Mapper类

package com.shujia.dianxin;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class DxMapper extends Mapper<LongWritable,Text, Text,LongWritable> {
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        String line = value.toString();
        if(line.contains("\\N")){
            return;
        }
        String[] words = line.split("\t");
        String users=words[0];
        String time=words[4];
        String cityid=words[2];
        for(int i=0;i<words.length;i++){
            context.write(new Text("用户:"+users+":"+"城市id:"+cityid),new LongWritable(Long.parseLong(time)));
        }
    }
}

Reduce类

package com.shujia.dianxin;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


import java.io.IOException;

public class DxReducer extends Reducer<Text, LongWritable,Text,LongWritable> {
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Reducer<Text, LongWritable, Text, LongWritable>.Context context) 

throws IOException, InterruptedException {
        long sum=0L;
        long count=0L;
        for (LongWritable value : values) {
            long t1 = value.get();
            sum+=t1;
            count++;
        }
        long avg=sum / count;

        context.write(key,new LongWritable(avg));
    }
}

代码需求二：

package com.shujia.dianxin;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.LinkedList;


class DxAndCityMapper extends Mapper<LongWritable,Text,Text,Text>{
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
        String line = value.toString();
        //context是hadoop mr任务执行时的上下文
        FileSplit inputSplit =(FileSplit) context.getInputSplit();
        String name = inputSplit.getPath().getName();
        //用户:,,,:城市id:,,,    avg
        if(name.contains("part")){
            String[] word = line.split(":");
            //取出用户
            String usr=word[1];
            String[] s2=word[3].split("\\s+");
            //取出城市id
            String id1=s2[0];
            //取出平均停留时间
            String avg=s2[1];
            context.write(new Text(id1),new Text("#"+avg+"-"+usr));
        }else if(name.contains("city")){
            String[] split=line.split(",");
            String id1=split[0];
            context.write(new Text(id1),new Text("$"+split[1]));
        }
    }
}

class DxAndCityReducer extends Reducer<Text,Text,Text,Text>{
    @Override
    protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
        //定义两个集合
        //存储#对应的数据
        LinkedList<String> avg=new LinkedList<>();
        //存储$对应的数据
        LinkedList<String> city1=new LinkedList<>();

        for (Text value : values) {
            String s=value.toString();
            if(s.startsWith("#")){
                String s3=s.substring(1);
                String[] split=s3.split("-");
                avg.add(split[1]+"\t"+split[0]);
            }else if (s.startsWith("$")) {
                String s3=s.substring(1);
                city1.add(s3);
            }
        }
        for (String city : city1) {
            for (String s : avg) {
                context.write(key,new Text(city+"\t"+s));
            }
        }
    }
}



public class DxAndCity {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJobName("与城市名称表连接");

        job.setJarByClass(DxAndCity.class);
        job.setMapperClass(DxAndCityMapper.class);
        job.setReducerClass(DxAndCityReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.waitForCompletion(true);

    }
}

错误一：需要把city文件拉取到与需求一产生的文件同一个目录下，才能运行

错误二：map中提取城市id的代码逻辑错误

都是因为不细心犯下的错误

posted on 2022-08-31 17:27 不想写代码的小玉阅读(52) 评论(0) 编辑收藏举报

刷新页面返回顶部

wqy1027

mapreduce案例一

导航

公告