Canal 实现 同步 到 MySQL 从库 单表仅数据
admin 配置
mysql -uroot -p < /opt/module/canal/admin/conf/canal_manager.sql >> ~/execsql.log
deployer 配置
vim /opt/module/canal/deployer/conf/canal.properties
canal.serverMode = tcp
vim /opt/module/canal/deployer/conf/example/instance.properties
canal.instance.master.address=hadoop106:3306 #mysql源地址 canal.instance.dbUsername=root canal.instance.dbPassword=root
adapter 配置
vim /opt/module/canal/adapter/conf/application.yml
①
canal.conf: mode: tcp #tcp kafka rocketMQ rabbitMQ # Canal 的 deployer 服务所在的 IP 地址:11111 canalServerHost: hadoop108:11111 # flatMessage: true # mqServers 127.0.0.1:9092 #or rocketmq # zookeeperHosts: batchSize: 500 syncBatchSize: 1000 retries: 0
②
secretKey: srcDataSources: # url: jdbc:mysql://mysql源数据库IP1:3306/数据库名?useUnicode=true&characterEncoding=UTF-8&serverTimezone=GMT&useSSL=false url: jdbc:mysql://hadoop106:3306/FlinkEtl?useUnicode=true username: root password: root
③
canalAdapters: - instance: example # canal instance Name or mq topic name groups: - groupId: g1 outerAdapters: - name: logger - name: rdb key: mysql1 properties: jdbc.driverClassName: com.mysql.jdbc.Driver
# jdbc.url: jdbc:mysql://mysql目标地址IP2:3306/数据库名?useUnicode=true&characterEncoding=UTF-8&serverTimezone=GMT&useSSL=false jdbc.url: jdbc:mysql://hadoop108:3306/FlinkEtl?useUnicode=true jdbc.username: root jdbc.password: root
vim /opt/module/canal/adapter/conf/rdb/mytest_user.yml
dataSourceKey: defaultDS destination: example #和上面③中的该配置保持一致 groupId: g1 #和③中的该配置保持一致 outerAdapterKey: mysql1 concurrent: true dbMapping: # mysql源数据库名称 database: FlinkEtl # mysql源数据库下的表名称 table: employee # 同步到目标数据库-数据表名称 targetTable: employee targetPk: id: id # 如果源表和目标表的字段结构等完全一致,直接开启此配置即可,下面的 targetColumns 注释掉,不注释以 targetColumns 为准 mapAll: true # targetColumns: # id: # name: # role_id: # c_time: # test1: # 也注掉 # etlCondition: "where c_time>={}" # 批量提交的大小 # commitBatch: 3000 ## Mirror schema synchronize config #dataSourceKey: defaultDS #destination: example #groupId: g1 #outerAdapterKey: mysql1 #concurrent: true #dbMapping: # mirrorDb: true # database: mytest
IP1 和 IP2 上面安装的 mysql 数据库下 创建了 FlintEtl.employee 表和 FlintEtl.employee 表,
现在的需求是,数据要从 hadoop106 IP1: FlintEtl.employee => hadoop108 IP2: FlintEtl.employee 表同步,
SQL:
show variables like'%validate_password%'; set global validate_password_length= 4; set global validate_password_mixed_case_count= 0; set global validate_password_number_count= 0; set global validate_password_special_char_count= 0; set global validate_password_policy= 0; create user 'canal'@'%' identified by 'canal'; grant select,replication slave, replication client on *.* to 'canal'@'%'; select * from mysql.user where user = 'canal' \G; set global validate_password_length= 8; set global validate_password_mixed_case_count= 1; set global validate_password_number_count= 1; set global validate_password_special_char_count= 1; set global validate_password_policy= 1; show variables like 'log_bin%'; show variables like '%binlog_format%';
drop table if exists `employee`; create table `employee` ( `id` int(11) not null , `name` varchar(255) character set utf8mb4 collate utf8mb4_unicode_ci null default null, `age` int(11) null default null, `gender` varchar(25) character set utf8mb4 collate utf8mb4_unicode_ci null default null, PRIMARY KEY (`id`) using btree ) engine = InnoDB character set = utf8mb4 collate = utf8mb4_unicode_ci row_format = Dynamic; -- ---------------------------- -- Records of employee -- ---------------------------- insert into `employee` values (1, 'lisi', 15, '男'); insert into `employee` values (2, 'zhangsan', 22, '男'); insert into `employee` values (3, 'wangwu', 35, '男'); insert into `employee` values (4, 'zhaoliu', 25, '男'); insert into `employee` values (5, 'tianqi', 32, '男'); insert into `employee` values (6, 'xiaohong', 25, '女'); insert into `employee` values (7, 'xiaohang', 15, '女');
canal 启动
sh /opt/module/canal/deployer/bin/startup.sh vim /opt/module/canal/deployer/logs/canal/canal.log G sh /opt/module/canal/adaper/bin/startup.sh vim /opt/module/canal/adaper/logs/adapter/adapter.log G
END
=================== 只单表同步数据时,配置备份 ==========================
canal.properties
################################################# ######### common argument ############# ################################################# # tcp bind ip canal.ip = # register ip to zookeeper canal.register.ip = canal.port = 11111 canal.metrics.pull.port = 11112 # canal instance user/passwd # canal.user = canal # canal.passwd = E3619321C1A937C46A0D8BD1DAC39F93B27D4458 # canal admin config #canal.admin.manager = 127.0.0.1:8089 canal.admin.port = 11110 canal.admin.user = admin canal.admin.passwd = 4ACFE3202A5FF5CF467898FC58AAB1D615029441 # admin auto register #canal.admin.register.auto = true #canal.admin.register.cluster = #canal.admin.register.name = canal.zkServers = # flush data to zk canal.zookeeper.flush.period = 1000 canal.withoutNetty = false # tcp, kafka, rocketMQ, rabbitMQ, pulsarMQ canal.serverMode = tcp # flush meta cursor/parse position to file canal.file.data.dir = ${canal.conf.dir} canal.file.flush.period = 1000 ## memory store RingBuffer size, should be Math.pow(2,n) canal.instance.memory.buffer.size = 16384 ## memory store RingBuffer used memory unit size , default 1kb canal.instance.memory.buffer.memunit = 1024 ## meory store gets mode used MEMSIZE or ITEMSIZE canal.instance.memory.batch.mode = MEMSIZE canal.instance.memory.rawEntry = true ## detecing config canal.instance.detecting.enable = false #canal.instance.detecting.sql = insert into retl.xdual values(1,now()) on duplicate key update x=now() canal.instance.detecting.sql = select 1 canal.instance.detecting.interval.time = 3 canal.instance.detecting.retry.threshold = 3 canal.instance.detecting.heartbeatHaEnable = false # support maximum transaction size, more than the size of the transaction will be cut into multiple transactions delivery canal.instance.transaction.size = 1024 # mysql fallback connected to new master should fallback times canal.instance.fallbackIntervalInSeconds = 60 # network config canal.instance.network.receiveBufferSize = 16384 canal.instance.network.sendBufferSize = 16384 canal.instance.network.soTimeout = 30 # binlog filter config canal.instance.filter.druid.ddl = true canal.instance.filter.query.dcl = false canal.instance.filter.query.dml = false canal.instance.filter.query.ddl = false canal.instance.filter.table.error = false canal.instance.filter.rows = false canal.instance.filter.transaction.entry = false canal.instance.filter.dml.insert = false canal.instance.filter.dml.update = false canal.instance.filter.dml.delete = false # binlog format/image check canal.instance.binlog.format = ROW,STATEMENT,MIXED canal.instance.binlog.image = FULL,MINIMAL,NOBLOB # binlog ddl isolation canal.instance.get.ddl.isolation = false # parallel parser config canal.instance.parser.parallel = true ## concurrent thread number, default 60% available processors, suggest not to exceed Runtime.getRuntime().availableProcessors() #canal.instance.parser.parallelThreadSize = 16 ## disruptor ringbuffer size, must be power of 2 canal.instance.parser.parallelBufferSize = 256 # table meta tsdb info canal.instance.tsdb.enable = true canal.instance.tsdb.dir = ${canal.file.data.dir:../conf}/${canal.instance.destination:} canal.instance.tsdb.url = jdbc:h2:${canal.instance.tsdb.dir}/h2;CACHE_SIZE=1000;MODE=MYSQL; #canal.instance.tsdb.url = jdbc:mysql://hadoop106:3306/canal_tsdb?useUnicode=true&characterEncoding=UTF-8&useSSL=false canal.instance.tsdb.dbUsername = root canal.instance.tsdb.dbPassword = root # dump snapshot interval, default 24 hour canal.instance.tsdb.snapshot.interval = 24 # purge snapshot expire , default 360 hour(15 days) canal.instance.tsdb.snapshot.expire = 360 ################################################# ######### destinations ############# ################################################# canal.destinations = example # conf root dir canal.conf.dir = ../conf # auto scan instance dir add/remove and start/stop instance canal.auto.scan = true canal.auto.scan.interval = 5 # set this value to 'true' means that when binlog pos not found, skip to latest. # WARN: pls keep 'false' in production env, or if you know what you want. canal.auto.reset.latest.pos.mode = false # 报错 com.alibaba.druid.pool.DruidDataSource canal.instance.tsdb.spring.xml = classpath:spring/tsdb/h2-tsdb.xml #canal.instance.tsdb.spring.xml = classpath:spring/tsdb/mysql-tsdb.xml canal.instance.global.mode = spring canal.instance.global.lazy = false canal.instance.global.manager.address = ${canal.admin.manager} #canal.instance.global.spring.xml = classpath:spring/memory-instance.xml canal.instance.global.spring.xml = classpath:spring/file-instance.xml #canal.instance.global.spring.xml = classpath:spring/default-instance.xml ################################################## ######### MQ Properties ############# ################################################## # aliyun ak/sk , support rds/mq canal.aliyun.accessKey = canal.aliyun.secretKey = canal.aliyun.uid= canal.mq.flatMessage = true canal.mq.canalBatchSize = 50 canal.mq.canalGetTimeout = 100 # Set this value to "cloud", if you want open message trace feature in aliyun. canal.mq.accessChannel = local canal.mq.database.hash = true canal.mq.send.thread.size = 30 canal.mq.build.thread.size = 8 ################################################## ######### Kafka ############# ################################################## kafka.bootstrap.servers = 127.0.0.1:9092 kafka.acks = all kafka.compression.type = none kafka.batch.size = 16384 kafka.linger.ms = 1 kafka.max.request.size = 1048576 kafka.buffer.memory = 33554432 kafka.max.in.flight.requests.per.connection = 1 kafka.retries = 0 kafka.kerberos.enable = false kafka.kerberos.krb5.file = "../conf/kerberos/krb5.conf" kafka.kerberos.jaas.file = "../conf/kerberos/jaas.conf" ################################################## ######### RocketMQ ############# ################################################## rocketmq.producer.group = test rocketmq.enable.message.trace = false rocketmq.customized.trace.topic = rocketmq.namespace = rocketmq.namesrv.addr = 127.0.0.1:9876 rocketmq.retry.times.when.send.failed = 0 rocketmq.vip.channel.enabled = false rocketmq.tag = ################################################## ######### RabbitMQ ############# ################################################## rabbitmq.host = rabbitmq.virtual.host = rabbitmq.exchange = rabbitmq.username = rabbitmq.password = rabbitmq.deliveryMode = ################################################## ######### Pulsar ############# ################################################## pulsarmq.serverUrl = pulsarmq.roleToken = pulsarmq.topicTenantPrefix =
instance.properties
################################################# ## mysql serverId , v1.0.26+ will autoGen # canal.instance.mysql.slaveId=0 # enable gtid use true/false canal.instance.gtidon=false # position info canal.instance.master.address=hadoop106:3306 canal.instance.master.journal.name= canal.instance.master.position= canal.instance.master.timestamp= canal.instance.master.gtid= # rds oss binlog canal.instance.rds.accesskey= canal.instance.rds.secretkey= canal.instance.rds.instanceId= # tsdb 是为预防表结构发生变化,而在 canal 订阅 binlog 时产生问题 # canal使用数据库存储上一次的表结构信息,然后对比两次的表结构,可解决此错误。 # table meta tsdb info canal.instance.tsdb.enable=true #canal.instance.tsdb.url=jdbc:mysql://127.0.0.1:3306/canal_tsdb?useUnicode=true&characterEncoding=UTF-8&useSSL=false #canal.instance.tsdb.dbUsername=root #canal.instance.tsdb.dbPassword=root # 新增 配置文件 # 因为tsdb默认使用的是h2数据库(java内置数据库)。不增加此配置默认还是使用内置数据库, #canal.instance.tsdb.spring.xml = classpath:spring/tsdb/mysql-tsdb.xml #canal.instance.standby.address = #canal.instance.standby.journal.name = #canal.instance.standby.position = #canal.instance.standby.timestamp = #canal.instance.standby.gtid= # username/password canal.instance.dbUsername=root canal.instance.dbPassword=root canal.instance.connectionCharset = UTF-8 # enable druid Decrypt database password canal.instance.enableDruid=false #canal.instance.pwdPublicKey=MFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBALK4BUxdDltRRE5/zXpVEVPUgunvscYFtEip3pmLlhrWpacX7y7GCMo2/JM6LeHmiiNdH1FWgGCpUfircSwlWKUCAwEAAQ== # table regex canal.instance.filter.regex=.*\\..* # table black regex canal.instance.filter.black.regex=mysql\\.slave_.* # table field filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2) #canal.instance.filter.field=test1.t_product:id/subject/keywords,test2.t_company:id/name/contact/ch # table field black filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2) #canal.instance.filter.black.field=test1.t_product:subject/product_image,test2.t_company:id/name/contact/ch # mq config canal.mq.topic=example # dynamic topic route by schema or table regex #canal.mq.dynamicTopic=mytest1.user,topic2:mytest2\\..*,.*\\..* canal.mq.partition=0 # hash partition config #canal.mq.enableDynamicQueuePartition=false #canal.mq.partitionsNum=3 #canal.mq.dynamicTopicPartitionNum=test.*:4,mycanal:6 #canal.mq.partitionHash=test.table:id^name,.*\\..* #################################################
adapter
application.yml
server: port: 8081 logging: level: com.alibaba.otter.canal.client.adapter.rdb: DEBUG spring: jackson: date-format: yyyy-MM-dd HH:mm:ss time-zone: GMT+8 default-property-inclusion: non_null canal.conf: mode: tcp #tcp kafka rocketMQ rabbitMQ # Canal 的 deployer 服务所在的 IP 地址:11111 canalServerHost: hadoop108:11111 # flatMessage: true # mqServers 127.0.0.1:9092 #or rocketmq # zookeeperHosts: batchSize: 500 syncBatchSize: 1000 # -1 表示无限重试 retries: 0 timeout: accessKey: secretKey: consumerProperties: # canal tcp consumer canal.tcp.server.host: 127.0.0.1:11111 canal.tcp.zookeeper.hosts: canal.tcp.batch.size: 500 canal.tcp.username: canal.tcp.password: # kafka consumer kafka.bootstrap.servers: 127.0.0.1:9092 kafka.enable.auto.commit: false kafka.auto.commit.interval.ms: 1000 kafka.auto.offset.reset: latest kafka.request.timeout.ms: 40000 kafka.session.timeout.ms: 30000 kafka.isolation.level: read_committed kafka.max.poll.records: 1000 # rocketMQ consumer rocketmq.namespace: rocketmq.namesrv.addr: 127.0.0.1:9876 rocketmq.batch.size: 1000 rocketmq.enable.message.trace: false rocketmq.customized.trace.topic: rocketmq.access.channel: rocketmq.subscribe.filter: # rabbitMQ consumer rabbitmq.host: rabbitmq.virtual.host: rabbitmq.username: rabbitmq.password: rabbitmq.resource.ownerId: srcDataSources: defaultDS: url: jdbc:mysql://hadoop106:3306/FlinkEtl?useUnicode=true&characterEncoding=UTF-8&serverTimezone=GMT&useSSL=false username: root password: root canalAdapters: - instance: example # canal instance Name or mq topic name groups: - groupId: g1 outerAdapters: - name: logger - name: rdb key: mysql1 properties: jdbc.driverClassName: com.mysql.jdbc.Driver jdbc.url: jdbc:mysql://hadoop108:3306/FlinkEtl?useUnicode=true&characterEncoding=UTF-8&serverTimezone=GMT&useSSL=false jdbc.username: root jdbc.password: root # druid.stat.enable: false # druid.stat.slowSqlMillis: 1000 # - name: rdb # key: oracle1 # properties: # jdbc.driverClassName: oracle.jdbc.OracleDriver # jdbc.url: jdbc:oracle:thin:@localhost:49161:XE # jdbc.username: mytest # jdbc.password: m121212 # - name: rdb # key: postgres1 # properties: # jdbc.driverClassName: org.postgresql.Driver # jdbc.url: jdbc:postgresql://localhost:5432/postgres # jdbc.username: postgres # jdbc.password: 121212 # threads: 1 # commitSize: 3000 # - name: hbase # properties: # hbase.zookeeper.quorum: 127.0.0.1 # hbase.zookeeper.property.clientPort: 2181 # zookeeper.znode.parent: /hbase # - name: es # hosts: 127.0.0.1:9300 # 127.0.0.1:9200 for rest mode # properties: # mode: transport # or rest # # security.auth: test:123456 # only used for rest mode # cluster.name: elasticsearch # - name: kudu # key: kudu # properties: # kudu.master.address: 127.0.0.1 # ',' split multi address # - name: phoenix # key: phoenix # properties: # jdbc.driverClassName: org.apache.phoenix.jdbc.PhoenixDriver # jdbc.url: jdbc:phoenix:127.0.0.1:2181:/hbase/db # jdbc.username: # jdbc.password:
bootstrap.yml
canal: manager: jdbc: url: jdbc:mysql://hadoop108:3306/canal_manager?useUnicode=true&characterEncoding=UTF-8&serverTimezone=GMT&useSSL=false username: root password: root
mytest_user.yml
dataSourceKey: defaultDS
destination: example
#和上面第二步中的该配置保持一致
groupId: g1
#和第二步中的该配置保持一致
outerAdapterKey: mysql1
concurrent: true
dbMapping:
# mysql源数据库名称
database: FlinkEtl
# mysql源数据库下的表名称
table: employee
# 同步到目标数据库-数据表名称
targetTable: employee
targetPk:
id: id
# 如果源表和目标表的字段结构等完全一致,直接开启此配置即可,下面的targetColumns可以注释掉
mapAll: true
# targetColumns:
# id:
# name:
# role_id:
# c_time:
# test1:
# 注掉
# etlCondition: "where c_time>={}"
# 批量提交的大小
# commitBatch: 3000
## Mirror schema synchronize config
#dataSourceKey: defaultDS
#destination: example
#groupId: g1
#outerAdapterKey: mysql1
#concurrent: true
#dbMapping:
# mirrorDb: true
# database: mytest
=================镜像同步 配置===========================
原文:实战教程第四章4.10:如何使用 CANAL 将 MySQL 数据实时同步到 OceanBase - 知乎 (zhihu.com)
#dataSourceKey: defaultDS #destination: example #和上面③的该配置保持一致 #groupId: g1 #和③中的该配置保持一致 #outerAdapterKey: mysql1 #concurrent: true #dbMapping: # mysql源数据库名称 #database: FlinkEtl # mysql源数据库下的表名称 #table: employee # 同步到目标数据库-数据表名称 #targetTable: employee #targetPk: #id: id # 如果源表和目标表的字段结构等完全一致,直接开启此配置即可,下面的targetColumns可以注释掉 #mapAll: true # 如果targetColumns也配置了映射,则以targetColumns配置为准)targetColumns: # 字段映射, 格式: 目标表字段: 源表字段, 如果字段名一样源表字段名可不填 # targetColumns: # id: # name: # role_id: # c_time: # test1: # 注掉 # etlCondition: "where c_time>={}" # 批量提交的大小 # commitBatch: 3000 ## Mirror schema synchronize config dataSourceKey: defaultDS destination: example groupId: g1 outerAdapterKey: mysql1 concurrent: true dbMapping: mirrorDb: true database: FlinkEtl