hadoop复合键排序使用方法

在hadoop中处理复杂业务时，需要用到复合键，复合不同于单纯的继承Writable接口，而是继承了 WritableComparable<T>接口，而实际上，WritableComparable<T>接口继承了 Writable和Comparable<T>接口，如果只需要使用某一个类作为传值对象而不是作为key，继承Writable接口即可。

上源码：

    public interface WritableComparable<T> extends Writable, Comparable<T> {  
    }

public interface Writable {  
  
  void write(DataOutput out) throws IOException;  
  
  void readFields(DataInput in) throws IOException;  
}

    public interface Comparable<T> {  
          
        public int compareTo(T o);  
    }

以下是实现复合key的实例：

public class SortKey implements WritableComparable<SortKey>{  
      
    private Text name;  
    private IntWritable right;  
      
  
    public SortKey() {  
        set(new Text(), new IntWritable());  
    }  
      
      
    public SortKey(Text name, IntWritable right) {  
        set(name, right);  
    }  
  
  
    private void set(Text name,IntWritable right){  
        this.name = name;  
        this.right = right;  
    }  
      
      
  
    /** 
     * @return the name 
     */  
    public Text getName() {  
        return name;  
    }  
  
  
    /** 
     * @param name the name to set 
     */  
    public void setName(Text name) {  
        this.name = name;  
    }  
  
  
    /** 
     * @return the right 
     */  
    public IntWritable getRight() {  
        return right;  
    }  
  
  
    /** 
     * @param right the right to set 
     */  
    public void setRight(IntWritable right) {  
        this.right = right;  
    }  
  
  
    @Override  
    public void write(DataOutput out) throws IOException {  
        name.write(out);  
        right.write(out);  
    }  
  
    @Override  
    public void readFields(DataInput in) throws IOException {  
        name.readFields(in);  
        right.readFields(in);  
    }  
  
    @Override  
    public int compareTo(SortKey o) {  
        int cmp = name.compareTo(o.name);  
        if(cmp != 0){  
            return cmp;  
        }else{  
            return right.compareTo(o.right);  
        }  
    }

    <span style="white-space:pre">    </span>//到目前为止，你只能将其作为key来使用，但是如果你需要按照key的某一个值来排序，以下是重点

static{  
        WritableComparator.define(SortKey.class, new Comparator());  
    }  
      
    public static class Comparator extends WritableComparator{  
          
        private static final Text.Comparator TEXT_COMPARATOR = new Text.Comparator();  
          
        protected Comparator() {  
            super(SortKey.class);  
        }  
  
        /* (non-Javadoc) 
         * @see org.apache.hadoop.io.WritableComparator#compare(byte[], int, int, byte[], int, int) 
         */  
        @Override  
        public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {  
            try{  
                int firstL1 = WritableUtils.decodeVIntSize(b1[s1]) + readVInt(b1, s1);  
                int firstL2 = WritableUtils.decodeVIntSize(b2[s2]) + readVInt(b2, s2);  
                return TEXT_COMPARATOR.compare(b1, s1, firstL1, b2, s2, firstL2);  
            }catch(Exception e){  
                throw new IllegalArgumentException(e);  
            }  
        }     
    }  
  
}

posted on 2016-03-24 16:44 菜鸟老三阅读(346) 评论(0) 编辑收藏举报

刷新页面返回顶部

hadoop复合键排序使用方法

导航

公告