大表 + 大表完成用户和用户订单 = 读取的方式+读取的方式

join端连接

1.自定义key
       package com.mine.hdfs.mr.mapjoin.reducejoin;

       import org.apache.hadoop.io.WritableComparable;

       import java.io.DataInput;
       import java.io.DataOutput;
       import java.io.IOException;

       /**
       */
       public class ComboKey2 implements WritableComparable<ComboKey2> {
           //0-customer 1-order
           private int type ;
           private int cid ;
           private int oid ;
           private String customerInfo = "" ;
           private String orderInfo = "" ;

           public int compareTo(ComboKey2 o) {
               int type0 = o.type ;
               int cid0= o.cid;
               int oid0 = o.oid;
               String customerInfo0 = o.customerInfo;
               String orderInfo0 = o.orderInfo ;
               //是否同一个customer的数据
               if(cid == cid0){
                   //同一个客户的两个订单
                   if(type == type0){
                       return oid - oid0 ;
                   }
                   //一个Customer + 他的order
                   else{
                       if(type ==0)
                           return -1 ;
                       else
                           return 1 ;
                   }
               }
               //cid不同
               else{
                   return cid - cid0 ;
               }
           }

           public void write(DataOutput out) throws IOException {
               out.writeInt(type);
               out.writeInt(cid);
               out.writeInt(oid);
               out.writeUTF(customerInfo);
               out.writeUTF(orderInfo);
           }

           public void readFields(DataInput in) throws IOException {
               this.type = in.readInt();
               this.cid = in.readInt();
               this.oid = in.readInt();
               this.customerInfo = in.readUTF();
               this.orderInfo = in.readUTF();
           }
       }

   2.自定义分区类
       public class CIDPartitioner extends Partitioner<ComboKey2,NullWritable>{

           public int getPartition(ComboKey2 key, NullWritable nullWritable, int numPartitions) {
               return key.getCid() % numPartitions;
           }
       }
   3.创建Mapper
       package com.mine.hdfs.mr.mapjoin.reducejoin;

       import org.apache.hadoop.io.LongWritable;
       import org.apache.hadoop.io.NullWritable;
       import org.apache.hadoop.io.Text;
       import org.apache.hadoop.mapreduce.InputSplit;
       import org.apache.hadoop.mapreduce.Mapper;
       import org.apache.hadoop.mapreduce.lib.input.FileSplit;

       import java.io.IOException;

       /**
       * mapper
       */
       public class ReduceJoinMapper extends Mapper<LongWritable,Text,ComboKey2,NullWritable> {

           protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
               //
               String line = value.toString() ;

               //判断是customer还是order
               FileSplit split = (FileSplit)context.getInputSplit();
               String path = split.getPath().toString();
               //客户信息
               ComboKey2 key2 = new ComboKey2();
               if(path.contains("customers")){
                   String cid = line.substring(0,line.indexOf(","));
                   String custInfo = line ;
                   key2.setType(0);
                   key2.setCid(Integer.parseInt(cid));
                   key2.setCustomerInfo(custInfo);
               }
               //order info
               else{
                   String cid = line.substring(line.lastIndexOf(",") + 1);
                   String oid = line.substring(0, line.indexOf(","));
                   String oinfo = line.substring(0, line.lastIndexOf(","));
                   key2.setType(1);
                   key2.setCid(Integer.parseInt(cid));
                   key2.setOid(Integer.parseInt(oid));
                   key2.setOrderInfo(oinfo);
               }
               context.write(key2,NullWritable.get());
           }
       }

   4.创建Reducer
       package com.mine.hdfs.mr.mapjoin.reducejoin;

       import org.apache.hadoop.io.NullWritable;
       import org.apache.hadoop.io.Text;
       import org.apache.hadoop.mapreduce.Reducer;

       import java.io.IOException;
       import java.util.Iterator;

       /**
       * ReduceJoinReducer,reducer端连接实现。
       */
       public class ReduceJoinReducer extends Reducer<ComboKey2,NullWritable,Text,NullWritable> {

           protected void reduce(ComboKey2 key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
               Iterator<NullWritable> it = values.iterator();
               it.next();
               int type = key.getType();
               int cid = key.getCid() ;
               String cinfo = key.getCustomerInfo() ;
               while(it.hasNext()){
                   it.next();
                   String oinfo = key.getOrderInfo();
                   context.write(new Text(cinfo + "," + oinfo),NullWritable.get());
               }
           }
       }

   5.创建排序对比器
       package com.mine.hdfs.mr.mapjoin.reducejoin;

       import com.mine.hdfs.maxtemp.allsort.secondarysort.ComboKey;
       import org.apache.hadoop.io.WritableComparable;
       import org.apache.hadoop.io.WritableComparator;

       /**
       * 组合Key排序对比器
       */
       public class ComboKey2Comparator extends WritableComparator {
           protected ComboKey2Comparator() {
               super(ComboKey2.class, true);
           }

           public int compare(WritableComparable a, WritableComparable b) {
               ComboKey2 k1 = (ComboKey2) a;
               ComboKey2 k2 = (ComboKey2) b;
               return k1.compareTo(k2);
           }
       }

   6.分组对比器
       package com.mine.hdfs.mr.mapjoin.reducejoin;

       import com.mine.hdfs.maxtemp.allsort.secondarysort.ComboKey;
       import org.apache.hadoop.io.WritableComparable;
       import org.apache.hadoop.io.WritableComparator;

       /**
       * CID分组对比器
       */
       public class CIDGroupComparator extends WritableComparator{

           protected CIDGroupComparator() {
               super(ComboKey2.class, true);
           }

           public int compare(WritableComparable a, WritableComparable b) {
               ComboKey2 k1 = (ComboKey2) a;
               ComboKey2 k2 = (ComboKey2) b;
               return k1.getCid() - k2.getCid();
           }
       }

   7.创建App
       package com.mine.hdfs.mr.mapjoin.reducejoin;

       import com.mine.hdfs.maxtemp.allsort.secondarysort.*;
       import org.apache.hadoop.conf.Configuration;
       import org.apache.hadoop.fs.Path;
       import org.apache.hadoop.io.IntWritable;
       import org.apache.hadoop.io.NullWritable;
       import org.apache.hadoop.io.Text;
       import org.apache.hadoop.mapreduce.Job;
       import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
       import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
       import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

       /**
       *
       */
       public class ReduceJoinApp {
           public static void main(String[] args) throws Exception {

               Configuration conf = new Configuration();
               conf.set("fs.defaultFS","file:///");

               Job job = Job.getInstance(conf);

               //设置job的各种属性
               job.setJobName("ReduceJoinApp");                        //作业名称
               job.setJarByClass(ReduceJoinApp.class);                 //搜索类

               //添加输入路径
               FileInputFormat.addInputPath(job,new Path("D:\\mr\\reducejoin"));
               //设置输出路径
               FileOutputFormat.setOutputPath(job,new Path("D:\\mr\\reducejoin\\out"));

               job.setMapperClass(ReduceJoinMapper.class);             //mapper类
               job.setReducerClass(ReduceJoinReducer.class);           //reducer类

               //设置Map输出类型
               job.setMapOutputKeyClass(ComboKey2.class);            //
               job.setMapOutputValueClass(NullWritable.class);      //

               //设置ReduceOutput类型
               job.setOutputKeyClass(Text.class);
               job.setOutputValueClass(NullWritable.class);         //

               //设置分区类
               job.setPartitionerClass(CIDPartitioner.class);
               //设置分组对比器
               job.setGroupingComparatorClass(CIDGroupComparator.class);
               //设置排序对比器
               job.setSortComparatorClass(ComboKey2Comparator.class);
               job.setNumReduceTasks(2);                           //reduce个数
               job.waitForCompletion(true);
           }
       }

posted on 2019-04-22 23:16 Yr-Zhang 阅读(348) 评论(0) 编辑收藏举报

刷新页面返回顶部

☆☆☆★☆☆☆

导航

公告