foreachRDD操作 检查点 与 数据写入到sql
package com.day16
import com.day15.ConnectionPoolUtil
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.{HashPartitioner, SparkConf}
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* foreachRDD操作
*/
object updateStateByKeyJDBC {
val updateFunc = (ite:Iterator[(String, Seq[Int], Option[Int])]) => {
ite.map(t=>(t._1,t._2.sum + t._3.getOrElse(0)))
}
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("ysbj").setMaster("local[2]")
val ssc = new StreamingContext(conf,Seconds(5))
// 先写一个检查点
ssc.checkpoint("hdfs://node1:9000/checkpoint")
// 编写一些基本的配置信息
// 配置连接Kafka信息
val zks = "192.168.28.131:2181,192.168.28.131:2182,192.168.28.131:2183"
// kafka 消费组
val groupId = "gp1"
// 配置Kafka基本信息
val topics = Map[String,Int]("test1"->1)
// 创建数据流
val data: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(ssc,zks,groupId,topics)
// 将结果集合统计
val words: DStream[(String, Int)] = data.flatMap(_._2.split(" ")).map((_,1))
// 应用到UpdateStateByKey
val value = words.updateStateByKey(updateFunc,new HashPartitioner(ssc.sparkContext.defaultParallelism),true)
// 将数据存入mysql数据库
value.foreachRDD(f=>{
f.foreachPartition(f=>{
// 获取jdbc连接
val conn = ConnectionPoolUtil.getConnections
f.foreach(f=>{
// 执行插入语句(SQL)
val sql = "insert into streaming(word,count)" +
"values('"+f._1+"',"+f._2+")"
val stmt = conn.createStatement()
stmt.executeUpdate(sql)
})
ConnectionPoolUtil.returnConn(conn)
})
})
// 启动
ssc.start()
ssc.awaitTermination()
}
}