import org.apache.spark.streaming._
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.DStream
val conf = new SparkConf().setAppName("WordCountOnLine").setMaster("local[3]")
val ssc = new StreamingContext(conf, Seconds(5))
val receiverInputStream = ssc.socketTextStream("spark1", 9999)
val words = receiverInputStream.flatMap(_.split(" "))
val pairs = words.map(word => (word, 1))
// 需要设置checkpoint,不然无法进行
ssc.checkpoint("hdfs://spark1:9000/world_count/checkpoint")
// 与之前state进行比较,更新state的值
val wordCount: DStream[(String, Int)] = pairs.updateStateByKey((values: Seq[Int], state: Option[Int]) => {
var newValue = state.getOrElse(0)
for (value <- values) {
newValue += value
}
Option(newValue)
})
wordCount.print()
ssc.start()
ssc.awaitTermination()