通过spark的WordCount看Scala的函数简写

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object WordCountPlus {
  def main(args: Array[String]): Unit = {

    val sparkConf = new SparkConf().setMaster("local").setAppName("WordCountPlus")

    val sc: SparkContext = new SparkContext(sparkConf)

    val lines: RDD[String] = sc.textFile("input/*.txt")

    //val word: RDD[String] = lines.flatMap(_.split(" ", -1))
    //val word: RDD[String] = lines.flatMap(lines => lines.split(" ", 1))
    val word: RDD[String] = lines.flatMap(flatMap)

    //val word2One: RDD[(String, Int)] = word.map((_, 1))
    //val word2One: RDD[(String, Int)] = word.map(word => { (word, 1) })
    val word2One: RDD[(String, Int)] = word.map(map)

    val groupBy: RDD[(String, Iterable[(String, Int)])] = word2One.groupBy(_._1)
    //val groupBy: RDD[(String, Iterable[(String, Int)])] = word2One.groupBy(x => { (x._1) })
    //val groupBy: RDD[(String, Iterable[(String, Int)])] = word2One.groupBy(groupBy)

    //val wordCount = groupBy.map(_._2.reduce((t1, t2) => { (t1._1, t1._2 + t2._2) }))
    //val wordCount: RDD[(String, Int)] = groupBy.map(_._2.reduce(reduce))
    //val wordCount: RDD[(String, Int)] = groupBy.map(x => { x._2.reduce(reduce) })
    //val wordCount: RDD[(String, Int)] = groupBy.map(mapAndReduce)
    val wordCount: RDD[(String, Int)] = groupBy.map(mapAndReduce)

    wordCount.collect().foreach(println)

  }

  def mapAndReduce(word: (String, Iterable[(String, Int)])): (String, Int) = {
    val res: (String, Int) = (word._1, word._2.reduce(reduce)._2)
    res
  }

  def reduce(tuple2_1: (String, Int), tuple2_2: (String, Int)): (String, Int) = {
    val res: (String, Int) = (tuple2_1._1, tuple2_1._2 + tuple2_2._2)
    res
  }

  //传递的是规则
  def groupBy(word: (String, Int)): String = {
    val res = word._1
    res
  }

  def map(word: String): (String, Int) = {
    val res: (String, Int) = (word, 1)
    res
  }

  def flatMap(lines: String): Array[String] = {
    val res: Array[String] = lines.split(" ", -1)
    res
  }

  /*
  def mapAndReduce2(word: (String, Iterable[(String, Int)])): (String, Int) = {
    val res: (String, Int) = (word._1,word._2.reduce(reduce2))
    res
  }

  def reduce2(count1: Int, count2: Int): Int = {
    count1 + count2
  }
*/
}

  

posted @ 2020-11-11 14:13  阳光下的me  阅读(149)  评论(0编辑  收藏  举报