spark常用的算子总结(6)—— sortByKey
//省略sc
val arr = List(("A",1),("B",2),("A",2),("B",3))
val rdd = sc.parallelize(arr)
val sortByKeyRDD = rdd.sortByKey()
sortByKeyRDD.foreach(println)
sc.stop
# (A,1)
# (A,2)
# (B,2)
# (B,3)
# 统计单词的词频
val rdd = sc.textFile("/home/scipio/README.md")
val wordcount = rdd.flatMap(_.split(' ')).map((_,1)).reduceByKey(_+_)
val wcsort = wordcount.map(x => (x._2,x._1)).sortByKey(false).map(x => (x._2,x._1))
# (“apple” 10)要根据value排序,所以颠倒一下,排序了再颠倒回来
wcsort.saveAsTextFile("/home/scipio/sort.txt")
# 升序的话,sortByKey(true)