hadoop中compare函数
在看hadoop 的二次排序的时候,改写了下, 加了第三个参数, 本来以为是在
public int compareTo(IntPair o) { System.out.println("-----------compareTo"); if (first != o.first) { return first < o.first ? -1 : 1; } else if (second != o.second) { return second < o.second ? -1 : 1; }else if (third != o.third) { return third < o.third ? -1 : 1;} return 0; }
本来以为排序在这里面进行, 后来发现不是,把比较第3个字段的代码去掉, 发现还是有序的。
后来通过打印得知在compare函数中,稍微改写了下
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { // 二进制数组读取 int intvalue = readInt(b1, s1); System.out.println("s1 = " + b1.length);
// 验证b1中存储的数据 int third = 0; for(int i =s1 + 9; i<= s1+ 12; i++){ third += (b1[i]&0xff) << (24-8*i); } System.out.println("third = " + third); return compareBytes(b1, s1, l1, b2, s2, l2); } }
有3个整形值, s1为开始位置, l1为长度12, 这样我们就可以读出我们的值
return compareBytes(b1, s1, l1, b2, s2, l2);调用 return FastByteComparisons.compareTo(b1, s1, l1, b2, s2, l2);
public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2) { if ((buffer1 == buffer2) && (offset1 == offset2) && (length1 == length2)) { return 0; } int end1 = offset1 + length1; int end2 = offset2 + length2; int i = offset1; for (int j = offset2; (i < end1) && (j < end2); ++j) { int a = buffer1[i] & 0xFF; int b = buffer2[j] & 0xFF; if (a != b) return (a - b); ++i; } return (length1 - length2); } } }
从代码中就知道了,通过字节数组比较三个值, 这样就出来的结果就是有序的了
结论, 理论上N个字段这样出来的结果的都是有序的,只是比较的长度有所变化
测试又加了一个字段, 输出结果都是有序的。
测试代码
public static class IntPair implements WritableComparable<IntPair> { private int first = 0; private int second = 0; private int third = 0; private int fourth = 0; /** * Set the left and right values. */ public void set(int left, int right, int third, int fourth) { first = left; second = right; this.third = third; this.fourth = fourth; } public int getFirst() { return first; } public int getSecond() { return second; } public int getThird() { return third; } public int getFourth() { return fourth; } @Override public String toString() { System.out.println("third = " + third); return first + "\t" + second + "\t" + third + "\t" + fourth; } /** * Read the two integers. * Encoded as: MIN_VALUE -> 0, 0 -> -MIN_VALUE, MAX_VALUE-> -1 */ @Override public void readFields(DataInput in) throws IOException { first = in.readInt();// + Integer.MIN_VALUE; second = in.readInt();// + Integer.MIN_VALUE; third = in.readInt();// + Integer.MIN_VALUE; fourth = in.readInt(); } @Override public void write(DataOutput out) throws IOException { /* out.writeInt(first - Integer.MIN_VALUE); out.writeInt(second - Integer.MIN_VALUE); out.writeInt(third - Integer.MIN_VALUE); */ out.writeInt(first ); out.writeInt(second ); out.writeInt(third ); out.writeInt(fourth); } @Override public int hashCode() { return first * 157 + second*10 + third; } @Override public boolean equals(Object right) { if (right instanceof IntPair) { IntPair r = (IntPair) right; return r.first == first && r.second == second && r.third == third && r.fourth == fourth; } else { return false; } } /** A Comparator that compares serialized IntPair. */ public static class Comparator extends WritableComparator { public Comparator() { super(IntPair.class); } // 排序比较器,数据全部存在byte数组 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { // 二进制数组读取 int intvalue = readInt(b1, s1); System.out.println("s1 = " + b1.length); int third = 0; for(int i =s1 + 9; i<= s1+ 12; i++){ third += (b1[i]&0xff) << (24-8*i); } System.out.println("third = " + third); return compareBytes(b1, s1, l1, b2, s2, l2); } } static { // register this comparator WritableComparator.define(IntPair.class, new Comparator()); } // 好像没用上 @Override public int compareTo(IntPair o) { System.out.println("-----------compareTo"); if (first != o.first) { return first < o.first ? -1 : 1; } else if (second != o.second) { return second < o.second ? -1 : 1; }// else if (third != o.third) { // return third < o.third ? -1 : 1;} return 0; } }
public static class StrPair implements WritableComparable<StrPair> { private Text first; private Text second ; private Text third ; private Text fourth; // 这句很重要, 要不读的时候会出错 public StrPair(){ set(new Text(),new Text(),new Text(),new Text()); } public void set(Text left, Text right, Text third, Text fourth) { this.first = left; this.second = right; this.third = third; this.fourth = fourth; } public Text getFirst() { return first; } public Text getSecond() { return second; } public Text getThird() { return third; } public Text getFourth() { return fourth; } @Override public String toString() { return first + "\t" + second + "\t" + third + "\t" + fourth; } @Override public void readFields(DataInput in) throws IOException { first.readFields(in); second.readFields(in); third.readFields(in); fourth.readFields(in); } @Override public void write(DataOutput out) throws IOException { System.out.println(out); first.write(out); second.write(out); third.write(out); fourth.write(out); System.out.println("First = " + second.toString()); } @Override public int hashCode() { return first.hashCode()* 157 + second.hashCode()*10 + third.hashCode(); } @Override public boolean equals(Object right) { if (right instanceof StrPair) { StrPair r = (StrPair) right; return first.equals(r.first) && second.equals(r.second) && third.equals(r.third) && fourth.equals(r.fourth); } else { return false; } } /** A Comparator that compares serialized StrPair. */ public static class Comparator extends WritableComparator { public Comparator() { super(StrPair.class); } // 排序比较器,数据全部存在byte数组 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { // 二进制数组读取 int intvalue = readInt(b1, s1); System.out.println("s1 = " + b1.length); /* int third = 0; for(int i =s1 + 9; i<= s1+ 12; i++){ third += (b1[i]&0xff) << (24-8*i); } System.out.println("third = " + third); */ return compareBytes(b1, s1, l1, b2, s2, l2); } } static { // register this comparator WritableComparator.define(StrPair.class, new Comparator()); } @Override public int compareTo(StrPair o) {/* if (first != o.first) { return first < o.first ? -1 : 1; } else if (second != o.second) { return second < o.second ? -1 : 1; }// else if (third != o.third) { // return third < o.third ? -1 : 1;} return 0; */ return 0; } } /** * Partition based on the first part of the pair. */ public static class FirstPartitioner extends Partitioner<StrPair,Text>{ @Override // public int getPartition(StrPair key, Text value, int numPartitions) { return Math.abs(key.getFirst().hashCode() * 127) % numPartitions; } } /** * Compare only the first part of the pair, so that reduce is called once * for each value of the first part. */ public static class FirstGroupingComparator implements RawComparator<StrPair> { @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { return WritableComparator.compareBytes(b1, s1, Integer.SIZE/8, b2, s2, Integer.SIZE/8); } @Override public int compare(StrPair o1, StrPair o2) { Text l = o1.getFirst(); Text r = o2.getFirst(); return l.equals(r)?0:1; // return l == r ? 0 : (l < r ? -1 : 1); } }