每日学习

今天继续做MapReduce实验:

现有某电商的用户好友数据文件,名为 buyer1buyer1中包含(buyer_id,friends_id)两个字段,内容是以"\t"分隔,编写MapReduce进行单表连接,查询出用户的间接好友关系。例如:10001的好友是10002,而10002的好友是10005,那么1000110005就是间接好友关系。

 

package exper;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MapJoin {

    public static class MyMapper extends Mapper<Object, Text, Text, Text> {
        private Map<String, String> dict = new HashMap<>();

        @Override
        protected void setup(Context context) throws IOException,
                InterruptedException {
            String fileName = context.getLocalCacheFiles()[0].getName();
            //System.out.println(fileName);
            BufferedReader reader = new BufferedReader(new FileReader(fileName));
            String codeandname = null;
            while (null != (codeandname = reader.readLine())) {
                String str[] = codeandname.split("   ");
                dict.put(str[0], str[2] + "   " + str[3]);
            }
            reader.close();
        }

        @Override
        protected void map(Object key, Text value, Context context)
                throws IOException, InterruptedException {
            String[] kv = value.toString().split("\t");
            if (dict.containsKey(kv[1])) {
                context.write(new Text(kv[1]), new Text(dict.get(kv[1]) + "\t" + kv[2]));
            }
        }
    }

    public static class MyReducer extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            for (Text text : values) {
                context.write(key, text);
            }
        }
    }

    public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
        Job job = Job.getInstance();
        job.setJobName("mapjoin");
        job.setJarByClass(MapJoin.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        Path in = new Path("hdfs://Hadoop102:8020/mapreduce/5in/order_item1");
        Path out = new Path("hdfs://Hadoop102:8020/mapreduce/6out");
        FileInputFormat.addInputPath(job, in);
        FileOutputFormat.setOutputPath(job, out);

        URI uri = new URI("hdfs://Hadoop102:8020/mapreduce/5in/order1");//有问题
        job.addCacheFile(uri);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

 

现有某电商网站两张信息表,分别为订单表orders1和订单明细表order_items1orders1表记录了用户购买商品的下单日期以及订单编号,order_items1表记录了商品id,订单id以及明细id

 

package exper;

import java.io.IOException;
import java.util.Iterator;
import java.util.Vector;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class ReduceJoin {
    public static class mymapper extends Mapper<Object, Text, Text, Text> {
        @Override
        protected void map(Object key, Text value, Context context)
                throws IOException, InterruptedException {
            String filePath = ((FileSplit) context.getInputSplit()).getPath().toString();
            if (filePath.contains("orders1")) {
                String line = value.toString();
                String[] arr = line.split("   ");
                context.write(new Text(arr[0]), new Text("1+" + arr[2] + "\t" + arr[3]));
                //System.out.println(arr[0]    +    "_1+"    +    arr[2]+"\t"+arr[3]);
            } else if (filePath.contains("order_items1")) {
                String line = value.toString();
                String[] arr = line.split("   ");
                context.write(new Text(arr[1]), new Text("2+" + arr[2]));
                //System.out.println(arr[1]    +    "_2+"    +    arr[2]);
            }
        }
    }

    public static class myreducer extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            Vector<String> left = new Vector<String>();
            Vector<String> right = new Vector<String>();
            for (Text val : values) {
                String str = val.toString();
                if (str.startsWith("1+")) {
                    left.add(str.substring(2));
                } else if (str.startsWith("2+")) {
                    right.add(str.substring(2));
                }
            }

            int sizeL = left.size();
            int sizeR = right.size();
            //System.out.println(key    +    "left:"+left);
            //System.out.println(key    +    "right:"+right);
            for (int i = 0; i < sizeL; i++) {
                for (int j = 0; j < sizeR; j++) {
                    context.write(key, new Text(left.get(i) + "\t" + right.get(j)));
                    //System.out.println(key    +    "    \t"    +    left.get(i)    +    "\t"    +    right.get(j));
                }
            }
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Job job = Job.getInstance();
        job.setJobName("reducejoin");
        job.setJarByClass(ReduceJoin.class);

        job.setMapperClass(mymapper.class);
        job.setReducerClass(myreducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

//        Path left = new Path("D:\\mapreduce\\5in\\order1");
//        Path right = new Path("D:\\mapreduce\\5in\\order_item1");
//        Path out = new Path("D:\\mapreduce\\6out");

        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileInputFormat.addInputPath(job, new Path(args[1]));
        FileOutputFormat.setOutputPath(job,new Path(args[2]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

 

posted @ 2021-11-22 23:58  哦心有  阅读(55)  评论(0编辑  收藏  举报