Hadoop实例之寻找博客中共同好友
以下是博客的好友列表数据,冒号前是一个用户,冒号后是该用户的所有好友(数据中的好友关系是单向的)
求出哪些人两两之间有共同好友,及他俩的共同好友都有谁?
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
首先要求出A、B、C、….等是谁的好友
package mapreduce; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * * @author * 2019.9.3 * 找到博客中共同好友 第一步 得到ABC...是谁的好友 * */ public class FindFriend { public static class FindFriendsMapper extends Mapper<LongWritable, Text, Text, Text> { public static Text peoples = new Text(); //定义输出键 public static Text friends = new Text();//定义输出值 @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { String line = value.toString(); //读取数据并转为字符串 数据样例为A:B,C,D,F,E,O String array[] = line.split(":"); //处理数据,先根据:将字符串分为两部分存入数组 String friend[]=array[1].split(",");//处理后一部分字符串,根据,分割并存入数组friend for(int i=0;i<friend.length;i++)//循环数组friend { peoples.set(friend[i]);//以样例为例,将A的好友B当做输出键 friends.set(array[0]);//将A做为输出值 context.write(peoples,friends);//该样例键值对key:B value<A> 含义即为B是A的好友 } } } public static class FindFriendsReduce extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { String friend="";//定义一个字符串用来拼接最终结果 for (Text value : values) {//循环values,拼接字符串,即可得到以该键值如B为好友的所有值 friend=friend+value+","; } context.write(key,new Text(friend));//写入 } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { final String INPUT_PATH = "hdfs://192.168.68.130:9000/user/hadoop/blogfriend.txt";//读入文件地址 final String OUTPUT_PATH = "hdfs://192.168.68.130:9000/user/hadoop/output";//输出文件地址,output不能存在 Configuration conf = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf); if (fileSystem.exists(new Path(OUTPUT_PATH))) { fileSystem.delete(new Path(OUTPUT_PATH), true); } Job job = Job.getInstance(conf, "Find_Friend");// 设置一个用户定义的job名称 job.setJarByClass(Find_Friend.class); job.setMapperClass(FindFriendsMapper.class); // 为job设置Mapper类 job.setReducerClass(FindFriendsReduce.class); // 为job设置Reducer类 job.setOutputKeyClass(Text.class); // 为job的输出数据设置Key类 job.setOutputValueClass(Text.class); // 为job输出设置value类 FileInputFormat.addInputPath(job, new Path(INPUT_PATH)); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH)); System.exit(job.waitForCompletion(true) ? 0 : 1); // 运行job } }
该段程序运行完成后,得到的结果应为:
A I,K,C,B,G,F,H,O,D,
B A,F,J,E,
C A,E,B,H,F,G,K,
D G,C,K,A,L,F,E,H,
E G,M,L,H,A,F,B,D,
F L,M,D,C,G,A,
G M,
H O,
I O,C,
J O,
K B,
L D,E,
M E,F,
O A,H,I,J,F,
接下来来进行第二步,得到哪些人两两之间有共同好友,及他俩的共同好友都有谁
package mapreduce; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.HashSet; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Find_Friend { public static class FindFriendsMapper extends Mapper<LongWritable, Text, Text, Text> { public static Text peoples = new Text();//定义输出键 public static Text friends = new Text();//定义输出值 @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { String line = value.toString();//读入文件,得到样例字符串为A I,K,C,B,G,F,H,O,D, String array[] = line.split(" ");//处理字符串并存入数组 String friend[]=array[1].split(",");//处理字符串并存入数组 //循环得到某两个具有共同好友的人,以及他们的共同好友是谁 如:I,K共同好友为A I,C共同好友为A for(int i=0;i<friend.length-1;i++) { for(int j=i+1;j<friend.length;j++) { Set<String> set = new HashSet<String>();//这里定义一个hashset类型的数组,向hashset数组中存入数据时,会自动按照哈希数进行排序,这样就避免了如A-C,C-A的重复 //将具有共同好友的两个人存入数组 set.add(friend[i]); set.add(friend[j]); peoples.set(set.toString()); friends.set(array[0]); context.write(peoples,friends);//写入 } } } } public static class FindFriendsReduce extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { String friend=""; for (Text value : values) { friend=friend+value+" "; } context.write(key,new Text(friend)); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { final String INPUT_PATH = "hdfs://192.168.68.130:9000/user/hadoop/output/part-r-00000";//地址为第一步输出的文件地址 final String OUTPUT_PATH = "hdfs://192.168.68.130:9000/user/hadoop/output8"; Configuration conf = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf); if (fileSystem.exists(new Path(OUTPUT_PATH))) { fileSystem.delete(new Path(OUTPUT_PATH), true); } Job job = Job.getInstance(conf, "Find_Friend");// 设置一个用户定义的job名称 job.setJarByClass(Find_Friend.class); job.setMapperClass(FindFriendsMapper.class); // 为job设置Mapper类 job.setReducerClass(FindFriendsReduce.class); // 为job设置Reducer类 job.setOutputKeyClass(Text.class); // 为job的输出数据设置Key类 job.setOutputValueClass(Text.class); // 为job输出设置value类 FileInputFormat.addInputPath(job, new Path(INPUT_PATH)); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH)); System.exit(job.waitForCompletion(true) ? 0 : 1); // 运行job } }
得到的结果最终结果为:
[A, B] C E
[A, C] D F
[A, D] F E
[A, E] B D C
[A, F] C B O D E
[A, G] F D E C
[A, H] D O E C
[A, I] O
[A, J] O B
[A, K] D C
[A, L] E D F
[A, M] F E
[B, C] A
[B, D] E A
[B, E] C
[B, F] E A C
[B, G] C A E
[B, H] E C A
[B, I] A
[B, K] C A
[B, L] E
[B, M] E
[B, O] A
[C, D] F A
[C, E] D
[C, F] D A
[C, G] D F A
[C, H] A D
[C, I] A
[C, K] D A
[C, L] D F
[C, M] F
[C, O] I A
[D, E] L
[D, F] A E
[D, G] E A F
[D, H] E A
[D, I] A
[D, K] A
[D, L] E F
[D, M] E F
[D, O] A
[E, F] B M D C
[E, G] D C
[E, H] D C
[E, J] B
[E, K] D C
[E, L] D
[F, G] C D A E
[F, H] A E O C D
[F, I] O A
[F, J] O B
[F, K] A C D
[F, L] E D
[F, M] E
[F, O] A
[G, H] A D E C
[G, I] A
[G, K] C A D
[G, L] F D E
[G, M] F E
[G, O] A
[H, I] A O
[H, J] O
[H, K] A D C
[H, L] D E
[H, M] E
[H, O] A
[I, J] O
[I, K] A
[I, O] A
[K, L] D
[K, O] A
[L, M] F E