使用flink-cdc实现实时数据同步
使用flink-cdc实现实时数据库同步
- Flink CDC Connectors 是Apache Flink的一组源连接器,使用变更数据捕获 (CDC) 从不同的数据库中获取变更。
- 基于查询的 CDC:sqoop、dataX等,离线调度查询作业,批处理。把一张表同步到其他系统,每次通过查询去获取表中最新的数据;无法保障数据一致性,查的过程中有可能数据已经发生了多次变更;不保障实时性,基于离线调度存在天然的延迟。
- 基于日志的 CDC:canel、Oracle Goldengate等,实时消费日志,流处理,例如 MySQL 的 binlog 日志完整记录了数据库中的变更,可以把 binlog 文件当作流的数据源;保障数据一致性,因为 binlog 文件包含了所有历史变更明细;保障实时性,因为类似 binlog 的日志文件是可以流式消费的,提供的是实时数据。
安装 kali Linux 只用于安装Linux环境
flinkcdc安装
官方文档
下载flinkcdc
https://downloads.apache.org/flink/flink-1.13.6/flink-1.13.6-bin-scala_2.12.tgz
下载各种connector,不一定是数据库,注意版本
- 下载地址:
https://repo.maven.apache.org/maven2/org/apache/flink/
https://repo1.maven.org/maven2/com/ververica/
wget https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7_2.12/1.13.6/flink-sql-connector-elasticsearch7_2.12-1.13.6.jar
wget https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-mysql-cdc/2.1.1/flink-sql-connector-mysql-cdc-2.1.1.jar
wget https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-postgres-cdc/2.1.1/flink-sql-connector-postgres-cdc-2.1.1.jar
安装flinkcdc
- 解压flink-1.13.6-bin-scala_2.12.tgz到flink-1.13.6
tar -zxvf flink-1.13.6-bin-scala_2.12.tgz
- 将connector复制到flink-1.13.6/lib
cp flink-sql-connector-* flink-1.13.6/lib
- 启动flink集群:start-cluster
cd flink-1.13.6
./bin/start-cluster.sh
安装es、kibana、mysql
开防火墙 非必须
firewall-cmd --permanent --zone=public --add-port=9200/tcp
firewall-cmd --permanent --zone=public --add-port=9300/udp
firewall-cmd --permanent --zone=public --add-port=3306/tcp
firewall-cmd --permanent --zone=public --add-port=5601/tcp --add-port=5601/udp
firewall-cmd --reload
1.第一种方式 podman或者docker
# 授权目录以便正常启动ES
podman network create es_net
mkdir /opt/es/data/
chmod 776 /opt/es/data/ -R
# 安装ES
podman run --name=elasticsearch -d \
-e cluster.name=elasticsearch \
-e bootstrap.memory_lock=true \
-e ES_JAVA_OPTS="-Xms512m -Xmx512m" \
-e discovery.type=single-node \
-v /opt/es/data/es/data:/usr/share/elasticsearch/data \
-v /opt/es/data/es/logs:/usr/share/elasticsearch/logs \
-v /opt/es/data/es/plugins:/usr/share/elasticsearch/plugins \
--net es_net \
--network-alias elasticsearch \
-p 9200:9200 -p 9300:9300 \
--ulimit memlock=-1:-1 \
--ulimit nofile=65536:65536 \
docker.elastic.co/elasticsearch/elasticsearch:7.6.0
# 安装kibna
podman run --name=kibana -d \
-e elasticsearch.hosts=http://elasticsearch:9200 \
-v /opt/es/data/kibana/data:/usr/share/kibana/data \
--net es_net \
--network-alias kibana \
-p 5601:5601 \
docker.elastic.co/kibana/kibana:7.6.0
1.第二种方式 docker-compose或者podman-compose
- 创建docker-compose.yml文件
version: '3'
services:
mysql:
# 这个镜像是基于mysql5.7.35,已经开启binlog
image: debezium/example-mysql:1.1
ports:
- "3306:3306"
environment:
- MYSQL_ROOT_PASSWORD=123456
- MYSQL_USER=mysqluser
- MYSQL_PASSWORD=mysqlpw
networks:
- es_net
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.6.0
# localhost: elasticsearch
container_name: elasticsearch
# user: root
environment:
- cluster.name=elasticsearch
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.type=single-node
volumes:
- ./data/es/data:/usr/share/elasticsearch/data
- ./data/es/logs:/usr/share/elasticsearch/logs
- ./data/es/plugins:/usr/share/elasticsearch/plugins
# - /docker/es/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
ports:
- "9200:9200"
- "9300:9300"
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
networks:
- es_net
kibana:
image: docker.elastic.co/kibana/kibana:7.6.0
container_name: kibana
# hostname: kibana
# user: root
depends_on:
- elasticsearch
ports:
- "5601:5601"
environment:
- "elasticsearch.hosts=http://elasticsearch:9200"
networks:
- es_net
# 网络
networks:
es_net: {}
- 启动容器
docker-compose up -d
或者
podman-compose up -d
安装es中文分词器
podman exec -it elasticsearch bash
./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.6.0/elasticsearch-analysis-ik-7.6.0.zip
数据同步
mysql准备测试数据
- 登录
podman-compose exec mysql mysql -uroot -p123456
- 建表
CREATE DATABASE test;
USE test;
CREATE TABLE test_1 (
id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) NOT NULL,
description VARCHAR(512)
);
ALTER TABLE test_1 AUTO_INCREMENT = 10001;
INSERT INTO test_1
VALUES (default,"scooter","Small 2-wheel scooter"),
(default,"car battery","12V car battery"),
(default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3"),
(default,"hammer","12oz carpenter's hammer");
select t.id, t.name, t.description from test_1 t;
- 如果mysql没有启动binlog,需要开启binlog,并且使用的账号要有binlog访问权限
查看是否开启:
show variables like '%log_bin%'
# on为开启
开启binlog方式,进入容器找到配置文件 /etc/mysql/mysql.conf.d/mysql.cnf或者/etc/mysql/my.cnf,在[mysqld]下面追加如下配置后重启
server_id=1918435
log_bin=mysql-bin
授权用户权限,至少要有SELECT, REPLICATION SLAVE, REPLICATION,缺一不可
-- 已有用户
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'user'@'%';
-- 新用户
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'user'@'%' IDENTIFIED BY 'pwd';
es预先创建索引,默认索引可能不满足要求 非必须
- 浏览器访问kibana: http://localhost:5601/app/kibana#/dev_tools/console
- 创建索引
# 如果存在删除索引 非必须
DELETE index_test_1
# 创建索引
PUT index_test_1
# 设置字段
PUT index_test_1/_mapping
{
"properties" : {
"_class" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"id" : {
"type" : "long"
},
"name" : {
"type" : "keyword"
},
"description" : {
"type" : "text",
"analyzer" : "ik_max_word"
}
}
}
在flink的sql-client中创建任务
- 启动sql客户端:sql-client
# 在flink-1.13.6目录
./bin/sql-client.sh
- 分别创建对应mysql和es的表
-- mysql 生产
CREATE TABLE test_1 (
id INT,
name STRING,
description STRING
PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector' = 'mysql-cdc',
'hostname' = 'localhost',
'port' = '3306',
'username' = 'root',
'password' = '123456',
'database-name' = 'test',
'table-name' = 'test_1'
);
CREATE TABLE index_test_1 (
id INT,
name STRING,
description STRING,
PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector' = 'elasticsearch-7',
'hosts' = 'http://localhost:9200',
'index' = 'index_test_1'
);
- 开启 checkpoint,每隔3秒做一次 checkpoint
-- Flink SQL
SET execution.checkpointing.interval = 3s;
- 创建任务
INSERT INTO index_test_1
SELECT d.* FROM test_1 d;
查看结果
- kali系统打开浏览器访问localhost
查看es数据:http://localhost:9200/index_test_1/_search
亦可以kibana查看
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· 上周热点回顾(2.24-3.2)