数据案例----数据写入Kafka、flink消费

1、创建生产者,将数据写入Kafka
package com.shujia.flink.dx
import java.util.Properties
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import scala.io.Source
object Demo1DataToKafka {
def main(args: Array[String]): Unit = {
val properties = new Properties()
properties.setProperty("bootstrap.servers", "master:9092,node1:9092,node2:9092")
properties.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
properties.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
val kafkaProducer = new KafkaProducer[String, String](properties)
val data: List[String] = Source.fromFile("data/dianxin_data").getLines().toList
for (line <- data) {
val record = new ProducerRecord[String, String]("dianxin", line)
kafkaProducer.send(record)
kafkaProducer.flush()
Thread.sleep(100)
}
kafkaProducer.close()
}
}
2、创建消费者来消费kafka中的数据,并将需求的结果保存到MySQL
package com.shujia.flink.dx
import java.sql.{Connection, DriverManager, PreparedStatement}
import java.util.Properties
import org.apache.flink.api.common.functions.{ReduceFunction, RuntimeContext}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.common.state.{MapState, MapStateDescriptor, ReducingState, ReducingStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector
object Demo2CityFlow {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val properties = new Properties()
properties.setProperty("bootstrap.servers", "master:9092,node1:9092,node2:9092")
properties.setProperty("group.id", "asdasdsa")
val flinkKakfaConsumer = new FlinkKafkaConsumer[String](
"dianxin",
new SimpleStringSchema(),
properties
)
flinkKakfaConsumer.setStartFromEarliest()
val dianxinDS: DataStream[String] = env.addSource(flinkKakfaConsumer)
val kvDS: DataStream[(String, String)] = dianxinDS.map(line => {
val split: Array[String] = line.split(",")
val mdn: String = split(0)
val city: String = split(2)
(city, mdn)
})
val keyByDS: KeyedStream[(String, String), String] = kvDS.keyBy(_._1)
val cityCountDS: DataStream[(String, Int)] = keyByDS.process(new KeyedProcessFunction[String, (String, String), (String, Int)] {
var mapState: MapState[String, Int] = _
var reduceState: ReducingState[Int] = _
override def open(parameters: Configuration): Unit = {
val context: RuntimeContext = getRuntimeContext
val mapStateDesc = new MapStateDescriptor[String, Int]("mdns", classOf[String], classOf[Int])
mapState = context.getMapState(mapStateDesc)
val reduceStateDesc = new ReducingStateDescriptor[Int]("count", new ReduceFunction[Int] {
override def reduce(x: Int, y: Int): Int = x + y
}, classOf[Int])
reduceState = context.getReducingState(reduceStateDesc)
}
override def processElement(
value: (String, String),
ctx: KeyedProcessFunction[String, (String, String), (String, Int)]#Context,
out: Collector[(String, Int)]): Unit = {
val (city, mdn) = value
if (!mapState.contains(mdn)) {
mapState.put(mdn, 1)
reduceState.add(1)
val count: Int = reduceState.get()
out.collect((city, count))
}
}
})
cityCountDS.addSink(new RichSinkFunction[(String, Int)] {
override def invoke(value: (String, Int), context: SinkFunction.Context[_]): Unit = {
val (city, num) = value
stat.setString(1, city)
stat.setInt(2, num)
stat.execute()
}
var con: Connection = _
var stat: PreparedStatement = _
override def open(parameters: Configuration): Unit = {
Class.forName("com.mysql.jdbc.Driver")
con = DriverManager.getConnection("jdbc:mysql://master:3306/bigdata?useUnicode=true&characterEncoding=utf-8", "root", "123456")
stat = con.prepareStatement("replace into city_count(city,num) values(?,?)")
}
override def close(): Unit = {
stat.close()
con.close()
}
})
env.execute()
}
}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 零经验选手,Compose 一天开发一款小游戏!
· 一起来玩mcp_server_sqlite,让AI帮你做增删改查!!