引入pom依赖
| <?xml version="1.0" encoding="UTF-8"?> |
| <project xmlns="http://maven.apache.org/POM/4.0.0" |
| xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
| <modelVersion>4.0.0</modelVersion> |
| |
| <groupId>org.example</groupId> |
| <artifactId>kafkacode</artifactId> |
| <version>1.0-SNAPSHOT</version> |
| |
| <dependencies> |
| // spark-streaming-kafka-0-10_2.11中引入的依赖已经包含了该依赖,所以这里 |
| |
| |
| |
| |
| |
| |
| <dependency> |
| <groupId>org.apache.kafka</groupId> |
| <artifactId>kafka_2.12</artifactId> |
| <version>0.11.0.0</version> |
| </dependency> |
| |
| <dependency> |
| <groupId>org.apache.spark</groupId> |
| <artifactId>spark-streaming_2.11</artifactId> |
| <version>2.3.1</version> |
| </dependency> |
| |
| <dependency> |
| <groupId>org.apache.spark</groupId> |
| <artifactId>spark-streaming-kafka-0-10_2.11</artifactId> |
| <version>2.3.1</version> |
| </dependency> |
| </dependencies> |
| </project> |
sparkstreaming作为消费者代码
| package sparkstreaming_kafka |
| |
| import org.apache.kafka.clients.consumer.ConsumerRecord |
| import org.apache.kafka.common.serialization.StringDeserializer |
| import org.apache.spark.SparkConf |
| import org.apache.spark.rdd.RDD |
| import org.apache.spark.streaming.dstream.{DStream, InputDStream} |
| import org.apache.spark.streaming.kafka010.KafkaUtils |
| import org.apache.spark.streaming.{Seconds, StreamingContext} |
| import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe |
| import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent |
| |
| |
| |
| |
| |
| object KafkaAndSparkStreaming { |
| def main(args: Array[String]): Unit = { |
| |
| val sparkConf = new SparkConf().setAppName("kafka").setMaster("local[2]") |
| val ssc = new StreamingContext(sparkConf,Seconds(10)) |
| |
| |
| val topics = Array("student") |
| val kafkaParam = Map( |
| "bootstrap.servers" -> "192.168.200.111:9092,192.168.200.112:9092,192.168.200.113:9092", |
| "key.deserializer" -> classOf[StringDeserializer], |
| "value.deserializer" -> classOf[StringDeserializer], |
| "group.id" -> "spark", |
| "auto.offset.reset" -> "latest", |
| "enable.auto.commit" -> (false: java.lang.Boolean) |
| ) |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| val dStream: DStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](ssc, PreferConsistent, Subscribe[String, String](topics, kafkaParam)) |
| dStream.foreachRDD((rdd:RDD[ConsumerRecord[String, String]]) => { |
| rdd.foreach((data:ConsumerRecord[String, String]) => { |
| println("sparkstreaming读取处理了一条kafka的数据"+data.key()+data.value()) |
| }) |
| }) |
| |
| ssc.start() |
| ssc.awaitTermination() |
| } |
| } |
启动
| # 命令行生产数据 |
| [root@node3 ~]# kafka-console-producer.sh -broker-list node1:9092,node2:9092,node3:9092 --topic student |
| |
| # java代码生成数据 |
| package new_callback_pro; |
| |
| import org.apache.kafka.clients.producer.Callback; |
| import org.apache.kafka.clients.producer.KafkaProducer; |
| import org.apache.kafka.clients.producer.ProducerRecord; |
| import org.apache.kafka.clients.producer.RecordMetadata; |
| |
| import java.util.Properties; |
| |
| |
| public class NewProducerCallBack { |
| public static void main(String[] args) { |
| |
| Properties prop = new Properties(); |
| prop.put("bootstrap.servers", "192.168.200.111:9092,192.168.200.112:9092,192.168.200.113:9092"); |
| |
| prop.put("key.serializer", "org.apache.kafka.common.serialization.IntegerSerializer"); |
| |
| prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); |
| |
| |
| KafkaProducer<Integer, String> producer = new KafkaProducer<Integer, String>(prop); |
| |
| |
| for (int i = 0; i < 100; i++) { |
| |
| ProducerRecord<Integer, String> record = new ProducerRecord<Integer, String>("student", i, "message" + i); |
| producer.send(record, new Callback() { |
| |
| |
| |
| |
| |
| public void onCompletion(RecordMetadata recordMetadata, Exception e) { |
| System.out.println("当前这个数据的分区为:"+recordMetadata.partition() + "---offset:" + recordMetadata.offset()); |
| System.out.println("当前的主题为"+recordMetadata.topic()); |
| System.out.println("key为:" + recordMetadata.serializedKeySize() + "---value为:" + recordMetadata.serializedValueSize()); |
| } |
| }); |
| } |
| |
| |
| producer.flush(); |
| } |
| } |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通