import org.apache.spark.streaming._
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.DStream

val conf = new SparkConf().setAppName("WordCountOnLine").setMaster("local[3]")
val ssc = new StreamingContext(conf, Seconds(5))

val receiverInputStream = ssc.socketTextStream("spark1", 9999)
val words = receiverInputStream.flatMap(_.split(" "))
val pairs = words.map(word => (word, 1))

// 需要设置checkpoint,不然无法进行
ssc.checkpoint("hdfs://spark1:9000/world_count/checkpoint")
// 与之前state进行比较,更新state的值
val wordCount: DStream[(String, Int)] = pairs.updateStateByKey((values: Seq[Int], state: Option[Int]) => { 
      var newValue = state.getOrElse(0)
      for (value <- values) {
            newValue += value
      }
      Option(newValue)
})
wordCount.print()

ssc.start()
ssc.awaitTermination()
posted on 2020-12-15 13:16  jaysonteng  阅读(90)  评论(0编辑  收藏  举报