foreachRDD操作 检查点 与 数据写入到sql

package com.day16

import com.day15.ConnectionPoolUtil
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.{HashPartitioner, SparkConf}
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
  * foreachRDD操作
  */
object updateStateByKeyJDBC {
  val updateFunc = (ite:Iterator[(String, Seq[Int], Option[Int])]) => {
    ite.map(t=>(t._1,t._2.sum + t._3.getOrElse(0)))
  }
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("ysbj").setMaster("local[2]")
    val ssc = new StreamingContext(conf,Seconds(5))
    // 先写一个检查点
    ssc.checkpoint("hdfs://node1:9000/checkpoint")
    // 编写一些基本的配置信息
    // 配置连接Kafka信息
    val zks = "192.168.28.131:2181,192.168.28.131:2182,192.168.28.131:2183"
    // kafka 消费组
    val groupId = "gp1"
    // 配置Kafka基本信息
    val topics = Map[String,Int]("test1"->1)
    // 创建数据流
    val data: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(ssc,zks,groupId,topics)
    // 将结果集合统计
    val words: DStream[(String, Int)] = data.flatMap(_._2.split(" ")).map((_,1))
    // 应用到UpdateStateByKey
    val value = words.updateStateByKey(updateFunc,new HashPartitioner(ssc.sparkContext.defaultParallelism),true)
    // 将数据存入mysql数据库
    value.foreachRDD(f=>{
      f.foreachPartition(f=>{
        // 获取jdbc连接
        val conn = ConnectionPoolUtil.getConnections
        f.foreach(f=>{
          // 执行插入语句(SQL)
          val sql = "insert into streaming(word,count)" +
            "values('"+f._1+"',"+f._2+")"
          val stmt = conn.createStatement()
          stmt.executeUpdate(sql)
        })
        ConnectionPoolUtil.returnConn(conn)
      })
    })
    // 启动
    ssc.start()
    ssc.awaitTermination()
  }

}

posted @ 2019-03-04 22:23  VIP8cnbl  阅读(278)  评论(0编辑  收藏  举报