package sparkcore
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object Demo08ReduceByKey {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("ReduceByKey").setMaster("local")
val sc = new SparkContext(conf)
val linesRDD: RDD[String] = sc.textFile("data/words.txt")
val wordsRDD: RDD[String] = linesRDD.flatMap(_.split(","))
val kvRDD: RDD[(String, Int)] = wordsRDD.map(word=>(word,1))
/**
* reduceByKey:对同一个key的value进行聚合处理
*/
val countRDD: RDD[(String, Int)] = kvRDD.reduceByKey((i:Int,j:Int)=>i+j)
countRDD.foreach(println)
//简写,如果参数只是用了溢出,可以通过下划线代替
val count2: RDD[(String, Int)] = kvRDD.reduceByKey(_+_)
}
}