1.环境准备
1.Kafka集群环境准备
1.准备一个Kafka集群环境并启动
Kafka 3.6.1 集群安装与部署
2.创建first Topic
/usr/kafka/kafka_2.13-3.6.1/bin/kafka-topics.sh --bootstrap-server 192.168.58.130:9092 --create --partitions 1 --replication-factor 3 --topic first
2.Flink环境准备
1.新建Maven项目 flink-kafka 【略】
2.添加POM依赖
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.18.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
<version>1.18.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>1.18.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-files</artifactId>
<version>1.18.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
<version>3.1.0-1.18</version>
</dependency>
</dependencies>
3. 资源路径resources下新建log4j.properties 文件,更改打印日志的级别为 error
log4j.rootLogger=error, stdout,R
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%5L) : %m%n
log4j.appender.R=org.apache.log4j.RollingFileAppender
log4j.appender.R.File=../log/agent.log
log4j.appender.R.MaxFileSize=1024KB
log4j.appender.R.MaxBackupIndex=1
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%6L) : %m%n
4.新建包 cn.coreqi.flink 用于存放代码【略】
2.Flink 生产者
1.新建java 类:FlinkKafkaProducer1
package cn.coreqi.flink.producer;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import java.util.ArrayList;
import java.util.Properties;
public class FlinkKafkaProducer1 {
public static void main(String[] args) throws Exception {
// 0 初始化 flink 环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
// 1 读取集合中数据
ArrayList<String> wordsList = new ArrayList<>();
wordsList.add("hello");
wordsList.add("world");
DataStream<String> stream = env.fromCollection(wordsList);
// 2 kafka 生产者配置信息
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.58.130:9092");
/* // 3 创建 kafka 生产者
FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>(
"first",
new SimpleStringSchema(),
properties
);
// 4 生产者和 flink 流关联
stream.addSink(kafkaProducer);*/
// Flink1.14之前使用Flink KafkaConsumer和 FlinkKafkaProducer操作kafka ,但是Flink1.14之后弃用,取而代之的是KafkaSource和 KafkaSink
KafkaSink<String> kafkaSink = KafkaSink.<String>builder()
// 指定kafka的地址和端口
.setBootstrapServers("192.168.58.130:9092,192.168.58.131:9092,192.168.58.132:9092")
// 指定序列化器,指定Topic名称,具体的序列化
.setRecordSerializer(
KafkaRecordSerializationSchema
.<String>builder()
.setTopic("first")
.setValueSerializationSchema(new SimpleStringSchema())
.build())
// 写到kafka的一致性级别,精准一次 | 至少一次
.setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)
// 如果是精准一次,必须设置 事务的前缀
.setTransactionalIdPrefix("coreqi-")
//如果是精准一次,必须设置 事务超时时间: 大于 checkpoint 间隔,小于 max 15 分钟
.setProperty(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, 10*60*1000+"")
.build();
stream.sinkTo(kafkaSink);
// 5 执行
env.execute();
}
}
2.启动 Kafka 消费者
/usr/kafka/kafka_2.13-3.6.1/bin/kafka-console-consumer.sh --bootstrap-server 192.168.58.130:9092 --topic first
3.执行 FlinkKafkaProducer1 程序,观察 kafka 消费者控制台情况
3.Flink 消费者
1.新建 java 类:FlinkKafkaConsumer1
package cn.coreqi.flink.consumer;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import java.util.Properties;
public class FlinkKafkaConsumer1 {
public static void main(String[] args) throws Exception {
// 0 初始化 flink 环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
// 1 kafka 消费者配置信息
Properties properties = new Properties();
properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.58.130:9092");
// 2 创建 kafka 消费者
/* FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<>(
"first",
new SimpleStringSchema(),
properties
);
// 3 消费者和 flink 流关联
env.addSource(kafkaConsumer).print();*/
// Flink1.14之前使用Flink KafkaConsumer和 FlinkKafkaProducer操作kafka ,但是Flink1.14之后弃用,取而代之的是KafkaSource和 KafkaSink
KafkaSource<String> kafkaSource =
KafkaSource.<String>builder()
.setBootstrapServers("192.168.58.130:9092,192.168.58.131:9092,192.168.58.132:9092") //指定kafka节点的地址和端口
.setGroupId("coreqi") //指定消费者组的ID
.setTopics("first") // 指定消费者的Topic
.setValueOnlyDeserializer(new SimpleStringSchema()) //指定value的反序列化器
.setStartingOffsets(OffsetsInitializer.latest())
.build();
env.fromSource(
kafkaSource,
WatermarkStrategy.noWatermarks(),
"kafkaSource"
).print();
// 4 执行
env.execute();
}
}
2.启动 FlinkKafkaConsumer1 消费者【略】
3.启动 kafka 生产者
/usr/kafka/kafka_2.13-3.6.1/bin/kafka-console-producer.sh --bootstrap-server 192.168.58.130:9092 --topic first
4.观察 IDEA 控制台数据打印