spark java API 实现二次排序
package com.spark.sort; import java.io.Serializable; import scala.math.Ordered; public class SecondSortKey implements Serializable, Ordered<SecondSortKey> { /** * serialVersionUID */ private static final long serialVersionUID = -2749925310062789494L; private String first; private long second; public SecondSortKey(String first, long second) { super(); this.first = first; this.second = second; } public String getFirst() { return first; } public void setFirst(String first) { this.first = first; } public long getSecond() { return second; } public void setSecond(long second) { this.second = second; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((first == null) ? 0 : first.hashCode()); result = prime * result + (int) (second ^ (second >>> 32)); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; SecondSortKey other = (SecondSortKey) obj; if (first == null) { if (other.first != null) return false; } else if (!first.equals(other.first)) return false; if (second != other.second) return false; return true; } @Override public boolean $greater(SecondSortKey that) { if (this.first.compareTo(that.getFirst()) > 0) { return true; } else if (this.first.equals(that.getFirst()) && this.second > that.getSecond()) { return true; } return false; } @Override public boolean $greater$eq(SecondSortKey that) { if (this.$greater(that)) { return true; }else if(this.first.equals(that.getFirst()) && this.second == that.getSecond()){ return true; } return false; } @Override public boolean $less(SecondSortKey that) { if (this.first.compareTo(that.getFirst()) < 0) { return true; } else if (this.first.equals(that.getFirst()) && this.second < that.getSecond()) { return true; } return false; } @Override public boolean $less$eq(SecondSortKey that) { if (this.$less(that)) { return true; }else if(this.first.equals(that.getFirst()) && this.second == that.getSecond()){ return true; } return false; } @Override public int compare(SecondSortKey that) { if (this.first.compareTo(that.getFirst()) != 0) { return this.first.compareTo(that.getFirst()); } else { return (int) (this.second - that.getSecond()); } } @Override public int compareTo(SecondSortKey that) { if (this.first.compareTo(that.getFirst()) != 0) { return this.first.compareTo(that.getFirst()); } else { return (int) (this.second - that.getSecond()); } } }
1
package com.spark.sort; 2 3 import org.apache.spark.SparkConf; 4 import org.apache.spark.api.java.JavaPairRDD; 5 import org.apache.spark.api.java.JavaRDD; 6 import org.apache.spark.api.java.JavaSparkContext; 7 import org.apache.spark.api.java.function.Function; 8 import org.apache.spark.api.java.function.PairFunction; 9 10 import scala.Tuple2; 11 12 public class SecondSort { 13 14 public static void main(String[] args) { 15 SparkConf sparkConf = new SparkConf().setAppName("secondsort").setMaster("local"); 16 JavaSparkContext jsc = new JavaSparkContext(sparkConf); 17 JavaRDD<String> textFileRDD = jsc.textFile("D:\\test\\input\\sort"); 18 JavaPairRDD<SecondSortKey,String> pairRDD = textFileRDD.mapToPair(new PairFunction<String, SecondSortKey, String>() { 19 @Override 20 public Tuple2<SecondSortKey, String> call(String t) throws Exception { 21 String[] split = t.split("\t"); 22 String first = split[0]; 23 Long second = Long.valueOf(split[1]); 24 SecondSortKey ssk = new SecondSortKey(first, second); 25 return new Tuple2<SecondSortKey, String>(ssk, t); 26 } 27 }); 28 29 //排序 30 JavaPairRDD<SecondSortKey, String> sortByKeyRDD =pairRDD.sortByKey(); 31 32 //过滤自定义的key 33 JavaRDD<String> mapRDD = sortByKeyRDD.map(new Function<Tuple2<SecondSortKey,String>, String>() { 34 35 @Override 36 public String call(Tuple2<SecondSortKey, String> v1) throws Exception { 37 38 return v1._2; 39 } 40 }); 41 42 mapRDD.saveAsTextFile("D:\\test\\output\\sort"); 43 44 jsc.close(); 45 } 46 47 48 }
源数据:
a 12
a 2
b 26
c 85
ab 32
ab 23
ac 12
b 85
a 36
b 69
c 25
排序之后:
a 2
a 12
a 36
ab 23
ab 32
ac 12
b 26
b 69
b 85
c 25
c 85