通过spark的WordCount看Scala的函数简写
import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object WordCountPlus { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setMaster("local").setAppName("WordCountPlus") val sc: SparkContext = new SparkContext(sparkConf) val lines: RDD[String] = sc.textFile("input/*.txt") //val word: RDD[String] = lines.flatMap(_.split(" ", -1)) //val word: RDD[String] = lines.flatMap(lines => lines.split(" ", 1)) val word: RDD[String] = lines.flatMap(flatMap) //val word2One: RDD[(String, Int)] = word.map((_, 1)) //val word2One: RDD[(String, Int)] = word.map(word => { (word, 1) }) val word2One: RDD[(String, Int)] = word.map(map) val groupBy: RDD[(String, Iterable[(String, Int)])] = word2One.groupBy(_._1) //val groupBy: RDD[(String, Iterable[(String, Int)])] = word2One.groupBy(x => { (x._1) }) //val groupBy: RDD[(String, Iterable[(String, Int)])] = word2One.groupBy(groupBy) //val wordCount = groupBy.map(_._2.reduce((t1, t2) => { (t1._1, t1._2 + t2._2) })) //val wordCount: RDD[(String, Int)] = groupBy.map(_._2.reduce(reduce)) //val wordCount: RDD[(String, Int)] = groupBy.map(x => { x._2.reduce(reduce) }) //val wordCount: RDD[(String, Int)] = groupBy.map(mapAndReduce) val wordCount: RDD[(String, Int)] = groupBy.map(mapAndReduce) wordCount.collect().foreach(println) } def mapAndReduce(word: (String, Iterable[(String, Int)])): (String, Int) = { val res: (String, Int) = (word._1, word._2.reduce(reduce)._2) res } def reduce(tuple2_1: (String, Int), tuple2_2: (String, Int)): (String, Int) = { val res: (String, Int) = (tuple2_1._1, tuple2_1._2 + tuple2_2._2) res } //传递的是规则 def groupBy(word: (String, Int)): String = { val res = word._1 res } def map(word: String): (String, Int) = { val res: (String, Int) = (word, 1) res } def flatMap(lines: String): Array[String] = { val res: Array[String] = lines.split(" ", -1) res } /* def mapAndReduce2(word: (String, Iterable[(String, Int)])): (String, Int) = { val res: (String, Int) = (word._1,word._2.reduce(reduce2)) res } def reduce2(count1: Int, count2: Int): Int = { count1 + count2 } */ }