每日一题 为了工作 2020 0502 第六十一题
//使用kafka+sparkStreaming进行数据处理
//从kafka拉取数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | package com.swust.predict import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.SparkConf import org.apache.spark.streaming.dstream.{DStream, InputDStream} import org.apache.spark.streaming.kafka 010 .ConsumerStrategies.Subscribe import org.apache.spark.streaming.kafka 010 .{CanCommitOffsets, HasOffsetRanges, KafkaUtils, OffsetRange} import org.apache.spark.streaming.kafka 010 .LocationStrategies.PreferConsistent import org.apache.spark.streaming.{Seconds, StreamingContext} object GetDataFromKafka { def main(args : Array[String]) : Unit = { val conf = new SparkConf().setAppName( "GetDataFromKafka" ).setMaster( "local[*]" ) conf.set( "spark.streaming.kafka.consumer.cache.enabled" , "false" ) val ssc = new StreamingContext(conf,Seconds( 5 )) val topics = Set( "car_events" ) val brokers = "data001:9092,data003:9092,data004:9092" val kafkaParams = Map[String,Object]( "bootstrap.servers" -> brokers, "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "predictGroup" , // "auto.offset.reset" -> "latest" , "enable.auto.commit" -> ( false : java.lang.Boolean) //默认是true ) val index = 1 //创建Dstream val kafkaDstream : InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream( ssc, PreferConsistent, Subscribe[String, String](topics, kafkaParams) ) val events : DStream[String] = kafkaDstream.map(line = > { val value : String = line.value().toString value }) val show : Unit = events.foreachRDD(rdd = > { rdd.foreachPartition(data = > { //data.take(200) data.foreach(one = > { println(one) }) }) }) // events.foreachRDD(rdd =>{ // val ranges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges // events.asInstanceOf[CanCommitOffsets].commitAsync(ranges) // }) ssc.start() ssc.awaitTermination() } } |
//向kafka推送数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | package com.traffic.streaming import java.util.Properties import net.sf.json.JSONObject import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.spark.{SparkConf, SparkContext} //向kafka car_events中生产数据 object KafkaEventProducer { def main(args : Array[String]) : Unit = { //设置需要写入数据的消息队列 val topic = "car_events" //设置配置属性信息 val props = new Properties() props.put( "bootstrap.servers" , "data001:9092,data003:9092,data004:9092" ) props.put( "key.serializer" , "org.apache.kafka.common.serialization.StringSerializer" ) props.put( "value.serializer" , "org.apache.kafka.common.serialization.StringSerializer" ) //创建kafka消息队列的生产者对象 val producer = new KafkaProducer[String,String](props) val sparkConf = new SparkConf().setAppName( "traffic data" ).setMaster( "local[4]" ) val sc = new SparkContext(sparkConf) val records : Array[Array[String]] = sc.textFile( "F:\\code\\AnyMaven\\data\\carFlow_all_column_test.txt" ) .filter(! _ .startsWith( ";" )) //过滤掉不以;开头的数据 .map( _ .split( "," )).collect() for (i <- 1 to 1000 ) { for (record <- records) { // prepare event data val event = new JSONObject() event.put( "camera_id" , record( 0 )) event.put( "car_id" , record( 2 )) event.put( "event_time" , record( 4 )) event.put( "speed" , record( 6 )) event.put( "road_id" , record( 13 )) // produce event message //向kafka中输入数据 producer.send( new ProducerRecord[String, String](topic, event.toString)) // println("Message sent: " + event) Thread.sleep( 200 ) } } sc.stop } } |
//运行结果
//向kafka集群拖送数据
//从kafka集群拉取数据
【推荐】还在用 ECharts 开发大屏?试试这款永久免费的开源 BI 工具!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 理解Rust引用及其生命周期标识(下)
· 从二进制到误差:逐行拆解C语言浮点运算中的4008175468544之谜
· .NET制作智能桌面机器人:结合BotSharp智能体框架开发语音交互
· 软件产品开发中常见的10个问题及处理方法
· .NET 原生驾驭 AI 新基建实战系列:向量数据库的应用与畅想
· C# 13 中的新增功能实操
· Ollama本地部署大模型总结
· 【杭电多校比赛记录】2025“钉耙编程”中国大学生算法设计春季联赛(4)
· 2025成都.NET开发者Connect圆满结束
· langchain0.3教程:从0到1打造一个智能聊天机器人