FlinkCDC从Mysql数据写入Kafka
环境安装:
1.jdk
2.Zookeeper
3.Kafka
4.maven
一、binlog监控Mysql的库
二、编写FlinkCDC程序
1.添加pom文件
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.lxz</groupId> <artifactId>gmall-logger</artifactId> <version>0.0.1-SNAPSHOT</version> <name>gmall-20210909</name> <description>Demo project for Spring Boot</description> <properties> <java.version>1.8</java.version> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <spring-boot.version>2.4.1</spring-boot.version> <maven.compiler.source>${java.version}</maven.compiler.source> <maven.compiler.target>${java.version}</maven.compiler.target> <flink.version>1.12.0</flink.version> <scala.version>2.12</scala.version> <hadoop.version>3.1.3</hadoop.version> </properties> <dependencies> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_${scala.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka_${scala.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_${scala.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-cep_${scala.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-json</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.68</version> </dependency> <dependency> <groupId>com.alibaba.ververica</groupId> <artifactId>flink-connector-mysql-cdc</artifactId> <version>1.2.0</version> </dependency> <!--如果保存检查点到hdfs上,需要引入此依赖--> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <!--Flink默认使用的是slf4j记录日志,相当于一个日志的接口,我们这里使用log4j作为具体的日志实现--> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>1.7.25</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.25</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-to-slf4j</artifactId> <version>2.14.0</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.kafka</groupId> <artifactId>spring-kafka</artifactId> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <optional>true</optional> </dependency> </dependencies> <dependencyManagement> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-dependencies</artifactId> <version>${spring-boot.version}</version> <type>pom</type> <scope>import</scope> </dependency> </dependencies> </dependencyManagement> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.8.1</version> <configuration> <source>1.8</source> <target>1.8</target> <encoding>UTF-8</encoding> </configuration> </plugin> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> <version>2.3.0.RELEASE</version> <executions> <execution> <goals> <goal>repackage</goal> </goals> </execution> </executions> <configuration> <classifier>boot</classifier> <mainClass>com.lxz.gamll20210909.Gamll20210909Application</mainClass> </configuration> </plugin> </plugins> </build> </project>
2.MykafkaUtil工具类
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import java.util.Properties;
public class MyKafkaUtil { private static String KAFKA_SERVER = "hadoop201:9092,hadoop202:9092,hadoop203:9092"; private static Properties properties = new Properties(); static { properties.setProperty("bootstrap.servers",KAFKA_SERVER); } public static FlinkKafkaProducer<String> getKafkaSink(String topic){ return new FlinkKafkaProducer<String>(topic,new SimpleStringSchema(),properties); } }
3.FlinkCDC主程序
import com.alibaba.fastjson.JSONObject; import com.alibaba.ververica.cdc.connectors.mysql.MySQLSource; import com.alibaba.ververica.cdc.connectors.mysql.table.StartupOptions; import com.alibaba.ververica.cdc.debezium.DebeziumDeserializationSchema; import com.alibaba.ververica.cdc.debezium.DebeziumSourceFunction; import com.lxz.gamll20210909.util.MyKafkaUtil; import io.debezium.data.Envelope; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.util.Collector; import org.apache.kafka.connect.data.Field; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.source.SourceRecord; public class Flink_CDCWithCustomerSchema { public static void main(String[] args) throws Exception { //1.创建执行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); //2.创建Flink-MySQL-CDC的Source DebeziumSourceFunction<String> mysqlSource = MySQLSource.<String>builder() .hostname("hadoop201") .port(3306) .username("root") .password("000000") .databaseList("gmall-20210712") .startupOptions(StartupOptions.latest()) // .startupOptions(KafkaOptions.StartupOptions.class) .deserializer(new DebeziumDeserializationSchema<String>() { //自定义数据解析器 @Override public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception { //获取主题信息,包含着数据库和表名 mysql_binlog_source.gmall-flink.z_user_info String topic = sourceRecord.topic(); String[] arr = topic.split("\\."); String db = arr[1]; String tableName = arr[2]; //获取操作类型 READ DELETE UPDATE CREATE Envelope.Operation operation = Envelope.operationFor(sourceRecord); //获取值信息并转换为Struct类型 Struct value = (Struct) sourceRecord.value(); //获取变化后的数据 Struct after = value.getStruct("after"); //创建JSON对象用于存储数据信息 JSONObject data = new JSONObject(); if (after != null) { Schema schema = after.schema(); for (Field field : schema.fields()) { data.put(field.name(), after.get(field.name())); } } //创建JSON对象用于封装最终返回值数据信息 JSONObject result = new JSONObject(); result.put("operation", operation.toString().toLowerCase()); result.put("data", data); result.put("database", db); result.put("table", tableName); //发送数据至下游 collector.collect(result.toJSONString()); } @Override public TypeInformation<String> getProducedType() { return TypeInformation.of(String.class); } }) .build(); //3.使用CDC Source从MySQL读取数据 DataStreamSource<String> mysqlDS = env.addSource(mysqlSource); //4.打印数据 mysqlDS.addSink(MyKafkaUtil.getKafkaSink("ods_base_db")); //5.执行任务 env.execute(); } }
三、结果
1.启动FlinkCDC主程序
2.在服务器上开一个kafka的消费者
bin/kafka-console-consumer.sh --bootstrap-server hadoop201:9092 --topic ods_base_db
3.在Mysql中插入数据看Kafka会不会消费
Mysql端
Kafka端
成功消费。
不要为了追逐,而忘记当初的样子。