spark的二次排序

通过scala实现二次排序

package _core.SortAndTopN

import org.apache.spark.{SparkConf, SparkContext}

/**
  * Author Mr. Guo
  * Create 2018/9/29 - 22:00
  */
class SecondarySort(val first: Int, val second: Int) extends Ordered[SecondarySort] with Serializable {

  override def compare(that: SecondarySort): Int = {
    if (this.first - that.first != 0) {
      this.first - that.first
    } else {
      this.second - that.second
    }
  }
}

object SecondarySortApp {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("SecondarySortApp").setMaster("local[2]")
    val sc = new SparkContext(conf)

    val lines = sc.textFile("file:///E:\\工作\\test_data\\secondSorted.txt")
    val pairWithSortkey = lines.map(line => {
      (new SecondarySort(line.split(" ")(0).toInt, line.split(" ")(1).toInt)
        , line)
    })
    
    val sorted = pairWithSortkey.sortByKey(false)
    val sortResult = sorted.map(sortedline=>sortedline._2)
    sortResult.collect.foreach(println)
  }
}

  

posted @ 2018-10-26 14:20  郭小白  阅读(528)  评论(0编辑  收藏  举报