Kafka 集成Flink

1.环境准备

1.Kafka集群环境准备

1.准备一个Kafka集群环境并启动

Kafka 3.6.1 集群安装与部署

2.创建first Topic

/usr/kafka/kafka_2.13-3.6.1/bin/kafka-topics.sh --bootstrap-server 192.168.58.130:9092 --create --partitions 1 --replication-factor 3 --topic first

2.Flink环境准备

2.添加POM依赖

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.18.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
            <version>1.18.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
            <version>1.18.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-files</artifactId>
            <version>1.18.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka</artifactId>
            <version>3.1.0-1.18</version>
        </dependency>
    </dependencies>

3. 资源路径resources下新建log4j.properties 文件,更改打印日志的级别为 error

log4j.rootLogger=error, stdout,R
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%5L) : %m%n
log4j.appender.R=org.apache.log4j.RollingFileAppender
log4j.appender.R.File=../log/agent.log
log4j.appender.R.MaxFileSize=1024KB
log4j.appender.R.MaxBackupIndex=1
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%6L) : %m%n

1.新建java 类:FlinkKafkaProducer1

package cn.coreqi.flink.producer;

import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;

import java.util.ArrayList;
import java.util.Properties;

public class FlinkKafkaProducer1 {
    public static void main(String[] args) throws Exception {
        // 0 初始化 flink 环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(3);
        // 1 读取集合中数据
        ArrayList<String> wordsList = new ArrayList<>();
        wordsList.add("hello");
        wordsList.add("world");

        DataStream<String> stream = env.fromCollection(wordsList);
        // 2 kafka 生产者配置信息
        Properties properties = new Properties();
        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.58.130:9092");

/*        // 3 创建 kafka 生产者
        FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>(
                "first",
                new SimpleStringSchema(),
                properties
        );

        // 4 生产者和 flink 流关联
        stream.addSink(kafkaProducer);*/

        // Flink1.14之前使用Flink KafkaConsumer和 FlinkKafkaProducer操作kafka ,但是Flink1.14之后弃用,取而代之的是KafkaSource和 KafkaSink
        KafkaSink<String> kafkaSink = KafkaSink.<String>builder()
                // 指定kafka的地址和端口
                .setBootstrapServers("192.168.58.130:9092,192.168.58.131:9092,192.168.58.132:9092")
                // 指定序列化器,指定Topic名称,具体的序列化
                .setRecordSerializer(
                        KafkaRecordSerializationSchema
                                .<String>builder()
                                .setTopic("first")
                                .setValueSerializationSchema(new SimpleStringSchema())
                                .build())
                // 写到kafka的一致性级别,精准一次 | 至少一次
                .setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)
                // 如果是精准一次,必须设置 事务的前缀
                .setTransactionalIdPrefix("coreqi-")
                //如果是精准一次,必须设置 事务超时时间: 大于 checkpoint 间隔,小于 max 15 分钟
                .setProperty(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, 10*60*1000+"")
                .build();

        stream.sinkTo(kafkaSink);

        // 5 执行
        env.execute();
    }
}

2.启动 Kafka 消费者

/usr/kafka/kafka_2.13-3.6.1/bin/kafka-console-consumer.sh --bootstrap-server 192.168.58.130:9092 --topic first

3.执行 FlinkKafkaProducer1 程序,观察 kafka 消费者控制台情况

1.新建 java 类:FlinkKafkaConsumer1

package cn.coreqi.flink.consumer;

import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Properties;

public class FlinkKafkaConsumer1 {
    public static void main(String[] args) throws Exception {
        // 0 初始化 flink 环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(3);
        // 1 kafka 消费者配置信息
        Properties properties = new Properties();
        properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.58.130:9092");
        // 2 创建 kafka 消费者
/*        FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<>(
                "first",
                new SimpleStringSchema(),
                properties
        );
        // 3 消费者和 flink 流关联
        env.addSource(kafkaConsumer).print();*/

        // Flink1.14之前使用Flink KafkaConsumer和 FlinkKafkaProducer操作kafka ,但是Flink1.14之后弃用,取而代之的是KafkaSource和 KafkaSink
        KafkaSource<String> kafkaSource =
                KafkaSource.<String>builder()
                        .setBootstrapServers("192.168.58.130:9092,192.168.58.131:9092,192.168.58.132:9092") //指定kafka节点的地址和端口
                        .setGroupId("coreqi")   //指定消费者组的ID
                        .setTopics("first")   // 指定消费者的Topic
                        .setValueOnlyDeserializer(new SimpleStringSchema()) //指定value的反序列化器
                        .setStartingOffsets(OffsetsInitializer.latest())
                        .build();

        env.fromSource(
                kafkaSource,
                WatermarkStrategy.noWatermarks(),
                "kafkaSource"
        ).print();

        // 4 执行
        env.execute();
    }
}

2.启动 FlinkKafkaConsumer1 消费者【略】

3.启动 kafka 生产者

/usr/kafka/kafka_2.13-3.6.1/bin/kafka-console-producer.sh --bootstrap-server 192.168.58.130:9092 --topic first

4.观察 IDEA 控制台数据打印

posted @ 2024-02-24 10:06  SpringCore  阅读(164)  评论(0编辑  收藏  举报