MapReduce-【找博客共同好友案例】

Posted on 2020-05-12 12:35  MissRong  阅读(123)  评论(0)    收藏  举报

MapReduce-【找博客共同好友案例】

1)需求:

以下是博客的好友列表数据,冒号前是一个用户,冒号后是该用户的所有好友(数据中的好友关系是单向的)

下面这些数据可以理解为比如:A:B,C,D,F,E,O 这个就是A喜欢的人

A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

求出哪些人两两之间有共同好友,及他俩的共同好友都有谁?

2)需求分析:

先求出A、B、C….等是谁的好友,可以理解为比如A被谁喜欢

第一次输出结果

A I,K,C,B,G,F,H,O,D,

B A,F,J,E,

C A,E,B,H,F,G,K,

D G,C,K,A,L,F,E,H,

E G,M,L,H,A,F,B,D,

F L,M,D,C,G,A,

G M,

H O,

I O,C,

J O,

K B,

L D,E,

M E,F,

O A,H,I,J,F,

第二次输出结果

A-B E C

A-C D F

A-D E F

A-E D B C

A-F O B C D E

A-G F E C D

….

3)代码实现:

一、封装驱动类

package MapReduce.ZhaoSameFriends;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class FriendsDriver {
    /**
     * 封装好的Driver类
     * @param runClass 驱动类名
     * @param mapperClass mapper类名
     * @param reducerClass reducer类名
     * @param textClass mapper输出的K类型
     * @param textClass1 mapper输出的V类型
     * @param textClass2 reducer输出的K类型
     * @param textClass3 reducer输出的V类型
     * @param arg 输入文件路径
     * @param arg1 输出文件路径
     * @throws IOException
     * @throws ClassNotFoundException
     * @throws InterruptedException
     */
    public static void run(Class<?> runClass, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass,
                           Class<?> textClass, Class<?> textClass1, Class<?> textClass2, Class<?> textClass3, String arg, String arg1) throws IOException, ClassNotFoundException, InterruptedException {
        //1.获取配置信息
        Configuration conf = new Configuration();
        //2.创建Job对象
        Job job = Job.getInstance(conf);
        //3.加载Jar包、关联Mapper和Reducer类
        job.setJarByClass(runClass);
        job.setMapperClass(mapperClass);
        job.setReducerClass(reducerClass);
        //4.Mapper、Reducer输出的K、V类型
        job.setMapOutputKeyClass(textClass);
        job.setMapOutputValueClass(textClass1);
        job.setOutputKeyClass(textClass2);
        job.setOutputValueClass(textClass3);
        //5.输入输出文件路径
        FileInputFormat.setInputPaths(job,new Path(arg));
        FileOutputFormat.setOutputPath(job,new Path(arg1));
        //6.提交Job
        job.waitForCompletion(true);
    }
}

二、将原数据中的冒号两侧的数据互换位置,由最喜欢变成被喜欢的关系,多对一的形式。

package MapReduce.ZhaoSameFriends;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 获取A、B、C(person)是谁的好友
 */
public class FriendsMapper extends Mapper<LongWritable,Text ,Text, Text> {
    Text k = new Text();
    Text v = new Text();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //获取数据,A(person):B,C,D,F,E,O
        String lines = value.toString();
        //获取A-Person和B、C、D、F、E、O(friend)
        String[] person = lines.split(":");
        //将A-person作为map输出的value
        v.set(person[0]);
        //将friend作为map输出的k
        String[] friends=person[1].split(",");
        for(String friend:friends){
            k.set(friend);
            context.write(k,v);
        }
    }
}

三、第一次输出形式:A I,K,C,B,G,F,H,O,D,

package MapReduce.ZhaoSameFriends;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.Iterator;

public class FriendsReducer extends Reducer <Text,Text,Text,Text>{
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        StringBuilder stringBuilder = new StringBuilder();
        //拼接
        Iterator<Text> iterator = values.iterator();
        while(iterator.hasNext()){
            stringBuilder.append(iterator.next()).append(",");
        }
        //输出
        context.write(key,new Text(stringBuilder.toString().substring(0,stringBuilder.lastIndexOf(","))));
    }
}

四、驱动类1,进行第一次的输出

package MapReduce.ZhaoSameFriends;

import org.apache.hadoop.io.Text;

import java.io.IOException;

public class FriendsRun {
    public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {
        args = new String[]{"D:/friends.txt", "D:/Hadoop-result/Friends1"};
        FriendsDriver.run(FriendsRun.class, FriendsMapper.class, FriendsReducer.class,
                Text.class, Text.class, Text.class, Text.class, args[0], args[1]);
    }
}

五、调整key的形式,例如:A-B

package MapReduce.ZhaoSameFriends;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 拼接Key:A-B,A-C,...
 */
public class Friends2Mapper extends Mapper<LongWritable,Text,Text, Text> {
    Text v = new Text();
    Text k = new Text();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        //切分,获取到person和friend
        String[] strings = line.split("\t");
        //friend放到value中
        v.set(strings[0]);
        //person放到key中
        String[] persons = strings[1].split(",");
        for(int i=0;i<persons.length;i++){
            for(int j=i+1;j<persons.length;j++){
                k.set(persons[i]+"-"+persons[j]);
                context.write(k,v);
            }
        }
    }
}

六、第二次输出形式:A-B E C

package MapReduce.ZhaoSameFriends;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.Iterator;

public class Friends2Reducer extends Reducer<Text, Text,Text,Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        //拼接friends
        StringBuilder stringBuilder = new StringBuilder();
        Iterator<Text> iterator = values.iterator();
        while(iterator.hasNext()){
            stringBuilder.append(iterator.next()).append(",");
        }
        //输出
        context.write(key,new Text(stringBuilder.toString().substring(0,stringBuilder.lastIndexOf(","))));
    }
}

七、驱动类2,进行第二次的输出

package MapReduce.ZhaoSameFriends;

import org.apache.hadoop.io.Text;

import java.io.IOException;

public class Friends2Run {
    public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {
        args = new String[]{"D:/Hadoop-result/Friends1", "D:/Hadoop-result/Friends2"};
        FriendsDriver.run(Friends2Run.class,Friends2Mapper.class,Friends2Reducer.class,
                Text.class,Text.class,Text.class,Text.class,args[0],args[1]);
    }
}

 

 

博客园  ©  2004-2025
浙公网安备 33010602011771号 浙ICP备2021040463号-3