每日学习
今天继续做MapReduce实验:
现有某电商的用户好友数据文件,名为 buyer1,buyer1中包含(buyer_id,friends_id)两个字段,内容是以"\t"分隔,编写MapReduce进行单表连接,查询出用户的间接好友关系。例如:10001的好友是10002,而10002的好友是10005,那么10001和10005就是间接好友关系。
package exper; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.HashMap; import java.util.Map; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class MapJoin { public static class MyMapper extends Mapper<Object, Text, Text, Text> { private Map<String, String> dict = new HashMap<>(); @Override protected void setup(Context context) throws IOException, InterruptedException { String fileName = context.getLocalCacheFiles()[0].getName(); //System.out.println(fileName); BufferedReader reader = new BufferedReader(new FileReader(fileName)); String codeandname = null; while (null != (codeandname = reader.readLine())) { String str[] = codeandname.split(" "); dict.put(str[0], str[2] + " " + str[3]); } reader.close(); } @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String[] kv = value.toString().split("\t"); if (dict.containsKey(kv[1])) { context.write(new Text(kv[1]), new Text(dict.get(kv[1]) + "\t" + kv[2])); } } } public static class MyReducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text text : values) { context.write(key, text); } } } public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException { Job job = Job.getInstance(); job.setJobName("mapjoin"); job.setJarByClass(MapJoin.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Path in = new Path("hdfs://Hadoop102:8020/mapreduce/5in/order_item1"); Path out = new Path("hdfs://Hadoop102:8020/mapreduce/6out"); FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, out); URI uri = new URI("hdfs://Hadoop102:8020/mapreduce/5in/order1");//有问题 job.addCacheFile(uri); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
现有某电商网站两张信息表,分别为订单表orders1和订单明细表order_items1,orders1表记录了用户购买商品的下单日期以及订单编号,order_items1表记录了商品id,订单id以及明细id,
package exper; import java.io.IOException; import java.util.Iterator; import java.util.Vector; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class ReduceJoin { public static class mymapper extends Mapper<Object, Text, Text, Text> { @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { String filePath = ((FileSplit) context.getInputSplit()).getPath().toString(); if (filePath.contains("orders1")) { String line = value.toString(); String[] arr = line.split(" "); context.write(new Text(arr[0]), new Text("1+" + arr[2] + "\t" + arr[3])); //System.out.println(arr[0] + "_1+" + arr[2]+"\t"+arr[3]); } else if (filePath.contains("order_items1")) { String line = value.toString(); String[] arr = line.split(" "); context.write(new Text(arr[1]), new Text("2+" + arr[2])); //System.out.println(arr[1] + "_2+" + arr[2]); } } } public static class myreducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Vector<String> left = new Vector<String>(); Vector<String> right = new Vector<String>(); for (Text val : values) { String str = val.toString(); if (str.startsWith("1+")) { left.add(str.substring(2)); } else if (str.startsWith("2+")) { right.add(str.substring(2)); } } int sizeL = left.size(); int sizeR = right.size(); //System.out.println(key + "left:"+left); //System.out.println(key + "right:"+right); for (int i = 0; i < sizeL; i++) { for (int j = 0; j < sizeR; j++) { context.write(key, new Text(left.get(i) + "\t" + right.get(j))); //System.out.println(key + " \t" + left.get(i) + "\t" + right.get(j)); } } } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(); job.setJobName("reducejoin"); job.setJarByClass(ReduceJoin.class); job.setMapperClass(mymapper.class); job.setReducerClass(myreducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Path left = new Path("D:\\mapreduce\\5in\\order1"); // Path right = new Path("D:\\mapreduce\\5in\\order_item1"); // Path out = new Path("D:\\mapreduce\\6out"); FileInputFormat.addInputPath(job,new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job,new Path(args[2])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }