cannl配置同步mysql数据
前提:表结构
CREATE TABLE `user` (
`uid` int(4) NOT NULL AUTO_INCREMENT,
`user_name` varchar(20) DEFAULT NULL,
`password` varchar(20) DEFAULT NULL,
`real_name` varchar(20) DEFAULT NULL,
`sflag` int(1) DEFAULT NULL,
`lock` int(1) DEFAULT '0',
`create_date` varchar(20) DEFAULT NULL,
PRIMARY KEY (`uid`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=11 DEFAULT CHARSET=utf8;
CREATE TABLE `user_role` (
`uid` int(4) DEFAULT NULL,
`rid` int(4) DEFAULT NULL,
KEY `fk_uid` (`uid`) USING BTREE,
KEY `fk_rid` (`rid`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
一、配置deployer为tcp直连,java和adaptor直接订阅deployer
1.deployer的配置,cannl.properties
# 配置文件目录
-rwxr-xr-x@ 1 eternity staff 6510 7 30 07:50 canal.properties
-rwxr-xr-x@ 1 eternity staff 319 6 22 2021 canal_local.properties
drwxr-xr-x 6 eternity staff 192 7 30 08:02 eternity
drwxr-xr-x 6 eternity staff 192 7 30 08:02 example
-rwxr-xr-x@ 1 eternity staff 4429 5 20 15:26 logback.xml
drwxr-xr-x 3 eternity staff 96 6 22 2021 metrics
drwxr-xr-x 8 eternity staff 256 10 9 2021 spring
cannl.properties
#################################################
######### common argument #############
#################################################
# tcp, kafka, rocketMQ, rabbitMQ, pulsarMQ
canal.serverMode = tcp
#################################################
######### destinations #############
#################################################
# cannl监控的是eternity的实例,对应的是conf文件夹下面的eternity文件夹
canal.destinations = eternity
# conf root dir。eternity实例的配置文件从../conf找
canal.conf.dir = ../conf
实例配置,instance.properties
已经有现成的实例example,把example文件夹拷贝一份,更改为eternity,此时就对应上了上面配置文件中配置的实例eternity
#################################################
## mysql serverId , v1.0.26+ will autoGen
# canal.instance.mysql.slaveId=0
# position info
# 配置需要监控的数据地址
canal.instance.master.address=localhost:3306
# username/password
# 配置需要监控数据库的帐号和密码
canal.instance.dbUsername=root
canal.instance.dbPassword=6231989zh
# table regex
# 配置需要监控的表,此时可以设置某张特定的表或者全部的表,使用.*\\..*会报错
canal.instance.filter.regex=.*\..*
2.java订阅
pom引入依赖
<!-- cannl https://mvnrepository.com/artifact/com.alibaba.otter/canal.client -->
<dependency>
<groupId>com.alibaba.otter</groupId>
<artifactId>canal.client</artifactId>
<version>1.1.6</version>
</dependency>
<!--我这边不手动引入会报找不到com.alibaba.otter.canal.protocol.Message-->
<dependency>
<groupId>com.alibaba.otter</groupId>
<artifactId>canal.protocol</artifactId>
<version>1.1.6</version>
</dependency>
配置文件
canal:
ip: 127.0.0.1
port: 11111
username: root
password: 6231989zh
destination: eternity
batch-size: 1500
subscribe: .*\..*
代码监控。
①.CanalRunClient,监听
import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.client.CanalConnectors;
import com.alibaba.otter.canal.protocol.Message;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.net.InetSocketAddress;
/**
* @Description: CanalTask启动类
* @Author: yyl
* @Date: 2022/7/13
*/
@Component
@Slf4j
public class CanalRunClient implements ApplicationRunner {
@Value("${canal.ip}")
private String ip;
@Value("${canal.port}")
private Integer port;
@Value("${canal.username}")
private String username;
@Value("${canal.password}")
private String password;
@Value("${canal.destination}")
private String destination;
@Value("${canal.batch-size}")
private Integer batchSize;
@Value("${canal.subscribe}")
private String subscribe;
@Resource
MessageHandler messageHandler;
@Override
public void run(ApplicationArguments args) throws Exception {
log.info("----->>>>>>>>启动canal");
startCanal();
}
private void startCanal() {
// 创建链接
CanalConnector connector = CanalConnectors.newSingleConnector(new InetSocketAddress(ip, port), destination, username, password);
try {
//打开连接
connector.connect();
//订阅数据库表,全部表
connector.subscribe(subscribe);
// connector.subscribe("seata_account\\..*");
//回滚到未进行ack的地方,下次fetch的时候,可以从最后一个没有ack的地方开始
connector.rollback();
while (true) {
//获取指定数量的数据
Message message = connector.getWithoutAck(batchSize); // 获取指定数量的数据
//获取批量ID
long batchId = message.getId();
//获取批量的数量
int size = message.getEntries().size();
//如果没有数据
if (batchId == -1 || size == 0) {
try {
//现成休眠1s
Thread.sleep(500);
} catch (InterruptedException e) {
e.printStackTrace();
}
} else {
//如果有数据,处理数据
messageHandler.handler(message);
}
connector.ack(batchId); // 提交确认
// connector.rollback(batchId); // 处理失败, 回滚数据
}
} finally {
connector.disconnect();
}
}
}
②.MessageHandler
import com.alibaba.otter.canal.protocol.CanalEntry;
import com.alibaba.otter.canal.protocol.Message;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.List;
@Service
@Slf4j
public class MessageHandler {
@Resource
private AbstractEntryHandler abstractEntryHandler;
public void handler(Message message) {
List<CanalEntry.Entry> entries = message.getEntries();
for (CanalEntry.Entry entry : entries) {
if (entry.getEntryType().equals(CanalEntry.EntryType.ROWDATA)) {
log.info("----->>>>>>>开始处理CanalEntry");
abstractEntryHandler.handler(entry);
}
}
}
}
③.AbstractEntryHandler,具体处理逻辑
import com.alibaba.otter.canal.protocol.CanalEntry;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @Description: 获取到数据后进行相应的处理
* @Author: yyl
* @Date: 2022/7/13
*/
@Service
@Slf4j
public class AbstractEntryHandler {
public final void handler(CanalEntry.Entry entry) {
CanalEntry.RowChange rowChage = null;
try {
rowChage = CanalEntry.RowChange.parseFrom(entry.getStoreValue());
} catch (Exception e) {
throw new RuntimeException("ERROR ## parser of eromanga-event has an error , data:" + entry.toString(), e);
}
CanalEntry.EventType eventType = rowChage.getEventType();
boolean isDdl = rowChage.getIsDdl();
log.info("----------库名:{}--------表名:{}--------", entry.getHeader().getSchemaName(), entry.getHeader().getTableName());
String operation = null;
Map<String, String> map = new HashMap<>();
switch (eventType) {
case INSERT:
rowChage.getRowDatasList().forEach(rowData -> {
List<CanalEntry.Column> columns = rowData.getAfterColumnsList();
for (CanalEntry.Column column : columns) {
map.put(camelName(column.getName()), column.getValue());
}
});
operation = "添加";
break;
case UPDATE:
rowChage.getRowDatasList().forEach(rowData -> {
List<CanalEntry.Column> columns = rowData.getAfterColumnsList();
for (CanalEntry.Column column : columns) {
map.put(camelName(column.getName()), column.getValue());
}
Map<String, String> map1 = new HashMap<>();
List<CanalEntry.Column> columns1 = rowData.getBeforeColumnsList();
for (CanalEntry.Column column : columns1) {
map1.put(camelName(column.getName()), column.getValue());
}
log.info("---------更新之前map={}----------", map1);
});
operation = "更新";
break;
case DELETE:
rowChage.getRowDatasList().forEach(rowData -> {
List<CanalEntry.Column> columns = rowData.getBeforeColumnsList();
for (CanalEntry.Column column : columns) {
map.put(camelName(column.getName()), column.getValue());
}
});
operation = "删除";
break;
default:
break;
}
log.info("---------操作:{},数据={}----------", operation, map);
}
/**
* 将下划线大写方式命名的字符串转换为驼峰式。如果转换前的下划线大写方式命名的字符串为空,则返回空字符串。</br>
* 例如:HELLO_WORLD->HelloWorld
*
* @param name 转换前的下划线大写方式命名的字符串
* @return 转换后的驼峰式命名的字符串
*/
public static String camelName(String name) {
StringBuilder result = new StringBuilder();
// 快速检查
if (name == null || name.isEmpty()) {
// 没必要转换
return "";
} else if (!name.contains("_")) {
// 不含下划线,仅将首字母小写
return name.substring(0, 1).toLowerCase() + name.substring(1);
}
// 用下划线将原始字符串分割
String camels[] = name.split("_");
for (String camel : camels) {
// 跳过原始字符串中开头、结尾的下换线或双重下划线
if (camel.isEmpty()) {
continue;
}
// 处理真正的驼峰片段
if (result.length() == 0) {
// 第一个驼峰片段,全部字母都小写
result.append(camel.toLowerCase());
} else {
// 其他的驼峰片段,首字母大写
result.append(camel.substring(0, 1).toUpperCase());
result.append(camel.substring(1).toLowerCase());
}
}
return result.toString();
}
}
3.adaptor监控同步
bootstrap.yml配置文件中的配置屏蔽
# canal:
# manager:
# jdbc:
# url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
# username: root
# password: 6231989zh
application.yml配置文件修改
canal.conf:
mode: tcp #tcp kafka rocketMQ rabbitMQ
consumerProperties:
# canal tcp consumer
# 配置需要订阅的地址,也就是deployer的地址
canal.tcp.server.host: 127.0.0.1:11111
canal.tcp.zookeeper.hosts:
canal.tcp.batch.size: 500
canal.tcp.username:
canal.tcp.password:
# 配置数据源的地址
srcDataSources:
defaultDS:
url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
username: root
password: 6231989zh
canalAdapters:
- instance: eternity # cannl实例的名称或者mq的topic名称
groups:
- groupId: g1
outerAdapters:
- name: logger
- name: es7 # 实例的名字,es的话,es6或者es7
hosts: 127.0.0.1:9300 # es的地址,前面不需要http://
properties:
mode: transport # or rest
# security.auth: test:123456 # only used for rest mode
cluster.name: elasticsearch # 配置的es的集群名称,可以通过http://localhost:9200/查看
es7实例,修改配置文件,找到里面的配置文件,拷贝一份,更改为eternity_user.yml
dataSourceKey: defaultDS # 对应的是application.yml中的srcDataSources下面的数据源名称
destination: eternity # 对应的是deployer中实例的名称
groupId: g1
esMapping:
_index: user # es中索引的名称
_id: _id # es中_id的值取自mysql数据库表中user的uid
# upsert: true
# pk: id
sql: "SELECT
u.uid as _id,
u.user_name,
u.password,
u.real_name,
u.sflag,
u.lock,
u.create_date,
ur.rid
FROM
user u left join user_role ur on u.uid = ur.uid " # 此sql是为了同步数据使用,cannl只会同步增量数据,全量和增量数据需要使用此sql导入
# objFields:
# _labels: array:;
etlCondition: "where u.uid>={}" # 此条件是配合上面的sql使用,全量数据导入的时候加上限定条件,指定需要导入数据的范围,条件可以改变
commitBatch: 3000
全量数据导入
http://localhost:8081/etl/es7/eternity_user.yml
二、配置deployer消息投递到rabbitmq,java和adaptor直接订阅rabbitmq的queue
0.前提配置
- exchange:canal.exchange
- queue:canal.queue
- key:canal.routing.key
1.deployer的配置,cannl.properties
cannl.properties
#################################################
######### common argument #############
#################################################
# tcp, kafka, rocketMQ, rabbitMQ, pulsarMQ
canal.serverMode = rabbitMQ
#################################################
######### destinations #############
#################################################
# cannl监控的是eternity的实例,对应的是conf文件夹下面的eternity文件夹
canal.destinations = eternity
# conf root dir。eternity实例的配置文件从../conf找
canal.conf.dir = ../conf
##################################################
######### RabbitMQ #############
##################################################
rabbitmq.host = 127.0.0.1
rabbitmq.virtual.host = /
rabbitmq.exchange =canal.exchange
rabbitmq.username = canal
rabbitmq.password = canal
rabbitmq.deliveryMode =
实例配置,instance.properties
已经有现成的实例example,把example文件夹拷贝一份,更改为eternity,此时就对应上了上面配置文件中配置的实例eternity
#################################################
## mysql serverId , v1.0.26+ will autoGen
# canal.instance.mysql.slaveId=0
# position info
# 配置需要监控的数据地址
canal.instance.master.address=localhost:3306
# username/password
# 配置需要监控数据库的帐号和密码
canal.instance.dbUsername=root
canal.instance.dbPassword=6231989zh
# table regex
# 配置需要监控的表,此时可以设置某张特定的表或者全部的表,使用.*\\..*会报错
canal.instance.filter.regex=.*\..*
# mq config,此时配置的是rabbitmq的key
canal.mq.topic=canal.routing.key
2.adaptor监控同步
bootstrap.yml配置文件中的配置屏蔽
# canal:
# manager:
# jdbc:
# url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
# username: root
# password: 6231989zh
application.yml配置文件修改
canal.conf:
mode: rabbitMQ #tcp kafka rocketMQ rabbitMQ
consumerProperties:
# canal tcp consumer
# 配置需要订阅的地址,也就是deployer的地址
canal.tcp.server.host: 127.0.0.1:11111
canal.tcp.zookeeper.hosts:
canal.tcp.batch.size: 500
canal.tcp.username:
canal.tcp.password:
# 配置数据源的地址
srcDataSources:
defaultDS:
url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
username: root
password: 6231989zh
canalAdapters:
- instance: canal.queue # cannl实例的名称或者rabbitmq的queue名称
groups:
- groupId: g1
outerAdapters:
- name: logger
- name: es7 # 实例的名字,es的话,es6或者es7
hosts: 127.0.0.1:9300 # es的地址,前面不需要http://
properties:
mode: transport # or rest
# security.auth: test:123456 # only used for rest mode
cluster.name: elasticsearch # 配置的es的集群名称,可以通过http://localhost:9200/查看
es7实例,修改配置文件,找到里面的配置文件,拷贝一份,更改为eternity_user.yml
dataSourceKey: defaultDS # 对应的是application.yml中的srcDataSources下面的数据源名称
destination: canal.queue # 对应的是rabbitmq的queue名称
groupId: g1
esMapping:
_index: user # es中索引的名称
_id: _id # es中_id的值取自mysql数据库表中user的uid
# upsert: true
# pk: id
sql: "SELECT
u.uid as _id,
u.user_name,
u.password,
u.real_name,
u.sflag,
u.lock,
u.create_date,
ur.rid
FROM
user u left join user_role ur on u.uid = ur.uid " # 此sql是为了同步数据使用,cannl只会同步增量数据,全量和增量数据需要使用此sql导入
# objFields:
# _labels: array:;
etlCondition: "where u.uid>={}" # 此条件是配合上面的sql使用,全量数据导入的时候加上限定条件,指定需要导入数据的范围,条件可以改变
commitBatch: 3000
三、配置deployer消息投递到kafka,adaptor直接订阅kafka的topic
0.前提条件
- topic:canal_kafka_test
1.deployer的配置,cannl.properties
cannl.properties
#################################################
######### common argument #############
#################################################
# tcp, kafka, rocketMQ, rabbitMQ, pulsarMQ
canal.serverMode = kafka
#################################################
######### destinations #############
#################################################
# cannl监控的是eternity的实例,对应的是conf文件夹下面的eternity文件夹
canal.destinations = eternity
# conf root dir。eternity实例的配置文件从../conf找
canal.conf.dir = ../conf
##################################################
######### Kafka #############
##################################################
kafka.bootstrap.servers = 192.168.3.12:9092,192.168.3.12:9093,192.168.3.12:9094
kafka.acks = all
kafka.compression.type = none
kafka.batch.size = 16384
kafka.linger.ms = 1
kafka.max.request.size = 1048576
kafka.buffer.memory = 33554432
kafka.max.in.flight.requests.per.connection = 1
kafka.retries = 0
kafka.kerberos.enable = false
kafka.kerberos.krb5.file = "../conf/kerberos/krb5.conf"
kafka.kerberos.jaas.file = "../conf/kerberos/jaas.conf"
实例配置,instance.properties
已经有现成的实例example,把example文件夹拷贝一份,更改为eternity,此时就对应上了上面配置文件中配置的实例eternity
#################################################
## mysql serverId , v1.0.26+ will autoGen
# canal.instance.mysql.slaveId=0
# position info
# 配置需要监控的数据地址
canal.instance.master.address=localhost:3306
# username/password
# 配置需要监控数据库的帐号和密码
canal.instance.dbUsername=root
canal.instance.dbPassword=6231989zh
# table regex
# 配置需要监控的表,此时可以设置某张特定的表或者全部的表,使用.*\\..*会报错
canal.instance.filter.regex=.*\..*
# mq config,此时配置的是kafka的topic
canal.mq.topic=canal_kafka_test
2.adaptor监控同步
bootstrap.yml配置文件中的配置屏蔽
# canal:
# manager:
# jdbc:
# url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
# username: root
# password: 6231989zh
application.yml配置文件修改
canal.conf:
mode: kafka #tcp kafka rocketMQ rabbitMQ
consumerProperties:
# canal tcp consumer
# 配置需要订阅的地址,也就是deployer的地址
canal.tcp.server.host: 127.0.0.1:11111
canal.tcp.zookeeper.hosts:
canal.tcp.batch.size: 500
canal.tcp.username:
canal.tcp.password:
# 配置数据源的地址
srcDataSources:
defaultDS:
url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
username: root
password: 6231989zh
canalAdapters:
- instance: canal_kafka_test # cannl实例的名称或者kafka的topic名称
groups:
- groupId: g1
outerAdapters:
- name: logger
- name: es7 # 实例的名字,es的话,es6或者es7
hosts: 127.0.0.1:9300 # es的地址,前面不需要http://
properties:
mode: transport # or rest
# security.auth: test:123456 # only used for rest mode
cluster.name: elasticsearch # 配置的es的集群名称,可以通过http://localhost:9200/查看
es7实例,修改配置文件,找到里面的配置文件,拷贝一份,更改为eternity_user.yml
dataSourceKey: defaultDS # 对应的是application.yml中的srcDataSources下面的数据源名称
destination: canal_kafka_test # 对应的是kafka的topic名称
groupId: g1
esMapping:
_index: user # es中索引的名称
_id: _id # es中_id的值取自mysql数据库表中user的uid
# upsert: true
# pk: id
sql: "SELECT
u.uid as _id,
u.user_name,
u.password,
u.real_name,
u.sflag,
u.lock,
u.create_date,
ur.rid
FROM
user u left join user_role ur on u.uid = ur.uid " # 此sql是为了同步数据使用,cannl只会同步增量数据,全量和增量数据需要使用此sql导入
# objFields:
# _labels: array:;
etlCondition: "where u.uid>={}" # 此条件是配合上面的sql使用,全量数据导入的时候加上限定条件,指定需要导入数据的范围,条件可以改变
commitBatch: 3000