ReduceByKey

package sparkcore

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo08ReduceByKey {
def main(args: Array[String]): Unit = {

val conf: SparkConf = new SparkConf().setAppName("ReduceByKey").setMaster("local")

val sc = new SparkContext(conf)

val linesRDD: RDD[String] = sc.textFile("data/words.txt")

val wordsRDD: RDD[String] = linesRDD.flatMap(_.split(","))

val kvRDD: RDD[(String, Int)] = wordsRDD.map(word=>(word,1))

/**
* reduceByKey:对同一个key的value进行聚合处理
*/
val countRDD: RDD[(String, Int)] = kvRDD.reduceByKey((i:Int,j:Int)=>i+j)

countRDD.foreach(println)
//简写,如果参数只是用了溢出,可以通过下划线代替
val count2: RDD[(String, Int)] = kvRDD.reduceByKey(_+_)
}

}
posted @ 2021-08-20 11:12  tonggang_bigdata  阅读(92)  评论(0编辑  收藏  举报