SparkStreaming from Kafka Demo

import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}

object KafkaSparkStreaming extends App{
  //1.初始化Spark配置信息
  val sparkConf = new SparkConf().setMaster("local[*]").setAppName("KafkaSparkStreaming")
  //2.初始化 Spark  StreamingContext
  private val streamContext = new StreamingContext(sparkConf,Seconds(5))

//  //2.定义kafka参数
//  val brokers = "hadoop102:9092,hadoop103:9092,hadoop101:9092"
//  val topic = "source"
//  val consumerGroup = "spark"

  //TODO 保存数据的状态,需要设置检查点路径
  streamContext.sparkContext.setCheckpointDir("checkpoint1")

  private val kafkaDStream: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(
    streamContext,
    "hadoop101:2181",
    "sparkstream",
    Map("stream" -> 3)
  )
  private val wordStreams: DStream[String] = kafkaDStream.flatMap(t=>t._2.split(" "))

  //将单词映射成元组(word,1)
  val wordAndOneStreams = wordStreams.map((_, 1))

//  //将相同的单词次数做统计 ,无状态统计
//  val wordAndCountStreams = wordAndOneStreams.reduceByKey(_+_)
//  //打印
//    wordAndCountStreams.print()

  // TODO 使用有状态统计更新
  private val stateDStream: DStream[(String, Int)] = wordAndOneStreams.updateStateByKey {
    case (seq, buffer) => {
      val sum = buffer.getOrElse(0) + seq.sum
      Option(sum)
    }
  }
  stateDStream.print()


  //启动SparkStreamingContext
  streamContext.start()
  streamContext.awaitTermination()
}

posted on 2020-09-10 11:19  一剑风徽  阅读(127)  评论(0编辑  收藏  举报

导航