MapReduce-【找博客共同好友案例】
1)需求:
以下是博客的好友列表数据,冒号前是一个用户,冒号后是该用户的所有好友(数据中的好友关系是单向的)
下面这些数据可以理解为比如:A:B,C,D,F,E,O 这个就是A喜欢的人
A:B,C,D,F,E,O |
求出哪些人两两之间有共同好友,及他俩的共同好友都有谁?
2)需求分析:
先求出A、B、C、….等是谁的好友,可以理解为比如A被谁喜欢
第一次输出结果
A I,K,C,B,G,F,H,O,D, B A,F,J,E, C A,E,B,H,F,G,K, D G,C,K,A,L,F,E,H, E G,M,L,H,A,F,B,D, F L,M,D,C,G,A, G M, H O, I O,C, J O, K B, L D,E, M E,F, O A,H,I,J,F, |
第二次输出结果
A-B E C A-C D F A-D E F A-E D B C A-F O B C D E A-G F E C D …. |
3)代码实现:
一、封装驱动类
package MapReduce.ZhaoSameFriends; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class FriendsDriver { /** * 封装好的Driver类 * @param runClass 驱动类名 * @param mapperClass mapper类名 * @param reducerClass reducer类名 * @param textClass mapper输出的K类型 * @param textClass1 mapper输出的V类型 * @param textClass2 reducer输出的K类型 * @param textClass3 reducer输出的V类型 * @param arg 输入文件路径 * @param arg1 输出文件路径 * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static void run(Class<?> runClass, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass, Class<?> textClass, Class<?> textClass1, Class<?> textClass2, Class<?> textClass3, String arg, String arg1) throws IOException, ClassNotFoundException, InterruptedException { //1.获取配置信息 Configuration conf = new Configuration(); //2.创建Job对象 Job job = Job.getInstance(conf); //3.加载Jar包、关联Mapper和Reducer类 job.setJarByClass(runClass); job.setMapperClass(mapperClass); job.setReducerClass(reducerClass); //4.Mapper、Reducer输出的K、V类型 job.setMapOutputKeyClass(textClass); job.setMapOutputValueClass(textClass1); job.setOutputKeyClass(textClass2); job.setOutputValueClass(textClass3); //5.输入输出文件路径 FileInputFormat.setInputPaths(job,new Path(arg)); FileOutputFormat.setOutputPath(job,new Path(arg1)); //6.提交Job job.waitForCompletion(true); } }
二、将原数据中的冒号两侧的数据互换位置,由最喜欢变成被喜欢的关系,多对一的形式。
package MapReduce.ZhaoSameFriends; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /** * 获取A、B、C(person)是谁的好友 */ public class FriendsMapper extends Mapper<LongWritable,Text ,Text, Text> { Text k = new Text(); Text v = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //获取数据,A(person):B,C,D,F,E,O String lines = value.toString(); //获取A-Person和B、C、D、F、E、O(friend) String[] person = lines.split(":"); //将A-person作为map输出的value v.set(person[0]); //将friend作为map输出的k String[] friends=person[1].split(","); for(String friend:friends){ k.set(friend); context.write(k,v); } } }
三、第一次输出形式:A I,K,C,B,G,F,H,O,D,
package MapReduce.ZhaoSameFriends; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; import java.util.Iterator; public class FriendsReducer extends Reducer <Text,Text,Text,Text>{ @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { StringBuilder stringBuilder = new StringBuilder(); //拼接 Iterator<Text> iterator = values.iterator(); while(iterator.hasNext()){ stringBuilder.append(iterator.next()).append(","); } //输出 context.write(key,new Text(stringBuilder.toString().substring(0,stringBuilder.lastIndexOf(",")))); } }
四、驱动类1,进行第一次的输出
package MapReduce.ZhaoSameFriends; import org.apache.hadoop.io.Text; import java.io.IOException; public class FriendsRun { public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { args = new String[]{"D:/friends.txt", "D:/Hadoop-result/Friends1"}; FriendsDriver.run(FriendsRun.class, FriendsMapper.class, FriendsReducer.class, Text.class, Text.class, Text.class, Text.class, args[0], args[1]); } }
五、调整key的形式,例如:A-B
package MapReduce.ZhaoSameFriends; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /** * 拼接Key:A-B,A-C,... */ public class Friends2Mapper extends Mapper<LongWritable,Text,Text, Text> { Text v = new Text(); Text k = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); //切分,获取到person和friend String[] strings = line.split("\t"); //friend放到value中 v.set(strings[0]); //person放到key中 String[] persons = strings[1].split(","); for(int i=0;i<persons.length;i++){ for(int j=i+1;j<persons.length;j++){ k.set(persons[i]+"-"+persons[j]); context.write(k,v); } } } }
六、第二次输出形式:A-B E C
package MapReduce.ZhaoSameFriends; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; import java.util.Iterator; public class Friends2Reducer extends Reducer<Text, Text,Text,Text> { @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //拼接friends StringBuilder stringBuilder = new StringBuilder(); Iterator<Text> iterator = values.iterator(); while(iterator.hasNext()){ stringBuilder.append(iterator.next()).append(","); } //输出 context.write(key,new Text(stringBuilder.toString().substring(0,stringBuilder.lastIndexOf(",")))); } }
七、驱动类2,进行第二次的输出
package MapReduce.ZhaoSameFriends; import org.apache.hadoop.io.Text; import java.io.IOException; public class Friends2Run { public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { args = new String[]{"D:/Hadoop-result/Friends1", "D:/Hadoop-result/Friends2"}; FriendsDriver.run(Friends2Run.class,Friends2Mapper.class,Friends2Reducer.class, Text.class,Text.class,Text.class,Text.class,args[0],args[1]); } }