foreachRDD操作检查点与数据写入到sql

package com.day16

import com.day15.ConnectionPoolUtil
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.{HashPartitioner, SparkConf}
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
* foreachRDD操作
*/
object updateStateByKeyJDBC {
val updateFunc = (ite:Iterator[(String, Seq[Int], Option[Int])]) => {
    ite.map(t=>(t._1,t._2.sum + t._3.getOrElse(0)))
}
def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("ysbj").setMaster("local[2]")
    val ssc = new StreamingContext(conf,Seconds(5))
    // 先写一个检查点
    ssc.checkpoint("hdfs://node1:9000/checkpoint")
    // 编写一些基本的配置信息
    // 配置连接Kafka信息
    val zks = "192.168.28.131:2181,192.168.28.131:2182,192.168.28.131:2183"
    // kafka 消费组
    val groupId = "gp1"
    // 配置Kafka基本信息
    val topics = Map[String,Int]("test1"->1)
    // 创建数据流
    val data: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(ssc,zks,groupId,topics)
    // 将结果集合统计
    val words: DStream[(String, Int)] = data.flatMap(_._2.split(" ")).map((_,1))
    // 应用到UpdateStateByKey
    val value = words.updateStateByKey(updateFunc,new HashPartitioner(ssc.sparkContext.defaultParallelism),true)
    // 将数据存入mysql数据库
    value.foreachRDD(f=>{
      f.foreachPartition(f=>{
        // 获取jdbc连接
        val conn = ConnectionPoolUtil.getConnections
        f.foreach(f=>{
          // 执行插入语句（SQL）
          val sql = "insert into streaming(word,count)" +
            "values('"+f._1+"',"+f._2+")"
          val stmt = conn.createStatement()
          stmt.executeUpdate(sql)
        })
        ConnectionPoolUtil.returnConn(conn)
      })
    })
    // 启动
    ssc.start()
    ssc.awaitTermination()
}

}

posted @ 2019-03-04 22:23 VIP8cnbl 阅读(278) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

零壹

foreachRDD操作检查点与数据写入到sql

公告

零壹

foreachRDD操作 检查点 与 数据写入到sql

公告

foreachRDD操作检查点与数据写入到sql