cannl配置同步mysql数据

前提:表结构

CREATE TABLE `user` (
  `uid` int(4) NOT NULL AUTO_INCREMENT,
  `user_name` varchar(20) DEFAULT NULL,
  `password` varchar(20) DEFAULT NULL,
  `real_name` varchar(20) DEFAULT NULL,
  `sflag` int(1) DEFAULT NULL,
  `lock` int(1) DEFAULT '0',
  `create_date` varchar(20) DEFAULT NULL,
  PRIMARY KEY (`uid`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=11 DEFAULT CHARSET=utf8;

CREATE TABLE `user_role` (
  `uid` int(4) DEFAULT NULL,
  `rid` int(4) DEFAULT NULL,
  KEY `fk_uid` (`uid`) USING BTREE,
  KEY `fk_rid` (`rid`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

一、配置deployer为tcp直连,java和adaptor直接订阅deployer

1.deployer的配置,cannl.properties

# 配置文件目录
-rwxr-xr-x@ 1 eternity  staff  6510  7 30 07:50 canal.properties
-rwxr-xr-x@ 1 eternity  staff   319  6 22  2021 canal_local.properties
drwxr-xr-x  6 eternity  staff   192  7 30 08:02 eternity
drwxr-xr-x  6 eternity  staff   192  7 30 08:02 example
-rwxr-xr-x@ 1 eternity  staff  4429  5 20 15:26 logback.xml
drwxr-xr-x  3 eternity  staff    96  6 22  2021 metrics
drwxr-xr-x  8 eternity  staff   256 10  9  2021 spring

cannl.properties

#################################################
######### 		common argument		#############
#################################################
# tcp, kafka, rocketMQ, rabbitMQ, pulsarMQ
canal.serverMode = tcp
#################################################
######### 		destinations		#############
#################################################
# cannl监控的是eternity的实例,对应的是conf文件夹下面的eternity文件夹
canal.destinations = eternity
# conf root dir。eternity实例的配置文件从../conf找
canal.conf.dir = ../conf

实例配置,instance.properties

已经有现成的实例example,把example文件夹拷贝一份,更改为eternity,此时就对应上了上面配置文件中配置的实例eternity

#################################################
## mysql serverId , v1.0.26+ will autoGen
# canal.instance.mysql.slaveId=0
# position info
# 配置需要监控的数据地址
canal.instance.master.address=localhost:3306

# username/password
# 配置需要监控数据库的帐号和密码
canal.instance.dbUsername=root
canal.instance.dbPassword=6231989zh

# table regex
# 配置需要监控的表,此时可以设置某张特定的表或者全部的表,使用.*\\..*会报错
canal.instance.filter.regex=.*\..*

2.java订阅

pom引入依赖

<!-- cannl https://mvnrepository.com/artifact/com.alibaba.otter/canal.client -->
<dependency>
  <groupId>com.alibaba.otter</groupId>
  <artifactId>canal.client</artifactId>
  <version>1.1.6</version>
</dependency>
<!--我这边不手动引入会报找不到com.alibaba.otter.canal.protocol.Message-->
<dependency>
  <groupId>com.alibaba.otter</groupId>
  <artifactId>canal.protocol</artifactId>
  <version>1.1.6</version>
</dependency>

配置文件

canal:
  ip: 127.0.0.1
  port: 11111
  username: root
  password: 6231989zh
  destination: eternity
  batch-size: 1500
  subscribe: .*\..*

代码监控。

①.CanalRunClient,监听

import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.client.CanalConnectors;
import com.alibaba.otter.canal.protocol.Message;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.stereotype.Component;

import javax.annotation.Resource;
import java.net.InetSocketAddress;

/**
 * @Description: CanalTask启动类
 * @Author: yyl
 * @Date: 2022/7/13
 */
@Component
@Slf4j
public class CanalRunClient implements ApplicationRunner {

    @Value("${canal.ip}")
    private String ip;

    @Value("${canal.port}")
    private Integer port;

    @Value("${canal.username}")
    private String username;

    @Value("${canal.password}")
    private String password;

    @Value("${canal.destination}")
    private String destination;

    @Value("${canal.batch-size}")
    private Integer batchSize;

    @Value("${canal.subscribe}")
    private String subscribe;

    @Resource
    MessageHandler messageHandler;

    @Override
    public void run(ApplicationArguments args) throws Exception {
        log.info("----->>>>>>>>启动canal");
        startCanal();
    }

    private void startCanal() {
        // 创建链接
        CanalConnector connector = CanalConnectors.newSingleConnector(new InetSocketAddress(ip, port), destination, username, password);
        try {
            //打开连接
            connector.connect();
            //订阅数据库表,全部表
            connector.subscribe(subscribe);
//            connector.subscribe("seata_account\\..*");
            //回滚到未进行ack的地方,下次fetch的时候,可以从最后一个没有ack的地方开始
            connector.rollback();
            while (true) {
                //获取指定数量的数据
                Message message = connector.getWithoutAck(batchSize); // 获取指定数量的数据
                //获取批量ID
                long batchId = message.getId();
                //获取批量的数量
                int size = message.getEntries().size();
                //如果没有数据
                if (batchId == -1 || size == 0) {
                    try {
                        //现成休眠1s
                        Thread.sleep(500);
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                } else {
                    //如果有数据,处理数据
                    messageHandler.handler(message);
                }

                connector.ack(batchId); // 提交确认
                // connector.rollback(batchId); // 处理失败, 回滚数据
            }

        } finally {
            connector.disconnect();
        }
    }

}

②.MessageHandler

import com.alibaba.otter.canal.protocol.CanalEntry;
import com.alibaba.otter.canal.protocol.Message;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

import javax.annotation.Resource;
import java.util.List;

@Service
@Slf4j
public class MessageHandler {

    @Resource
    private AbstractEntryHandler abstractEntryHandler;

    public void handler(Message message) {
        List<CanalEntry.Entry> entries = message.getEntries();
        for (CanalEntry.Entry entry : entries) {
            if (entry.getEntryType().equals(CanalEntry.EntryType.ROWDATA)) {
                log.info("----->>>>>>>开始处理CanalEntry");
                abstractEntryHandler.handler(entry);
            }
        }
    }
}

③.AbstractEntryHandler,具体处理逻辑

import com.alibaba.otter.canal.protocol.CanalEntry;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

import java.util.HashMap;
import java.util.List;
import java.util.Map;


/**
 * @Description: 获取到数据后进行相应的处理
 * @Author: yyl
 * @Date: 2022/7/13
 */
@Service
@Slf4j
public class AbstractEntryHandler {

    public final void handler(CanalEntry.Entry entry) {

        CanalEntry.RowChange rowChage = null;
        try {
            rowChage = CanalEntry.RowChange.parseFrom(entry.getStoreValue());
        } catch (Exception e) {
            throw new RuntimeException("ERROR ## parser of eromanga-event has an error , data:" + entry.toString(), e);
        }

        CanalEntry.EventType eventType = rowChage.getEventType();

        boolean isDdl = rowChage.getIsDdl();

        log.info("----------库名:{}--------表名:{}--------", entry.getHeader().getSchemaName(), entry.getHeader().getTableName());
        String operation = null;
        Map<String, String> map = new HashMap<>();
        switch (eventType) {
            case INSERT:
                rowChage.getRowDatasList().forEach(rowData -> {
                    List<CanalEntry.Column> columns = rowData.getAfterColumnsList();
                    for (CanalEntry.Column column : columns) {
                        map.put(camelName(column.getName()), column.getValue());
                    }
                });
                operation = "添加";
                break;
            case UPDATE:
                rowChage.getRowDatasList().forEach(rowData -> {
                    List<CanalEntry.Column> columns = rowData.getAfterColumnsList();
                    for (CanalEntry.Column column : columns) {
                        map.put(camelName(column.getName()), column.getValue());
                    }
                    Map<String, String> map1 = new HashMap<>();
                    List<CanalEntry.Column> columns1 = rowData.getBeforeColumnsList();
                    for (CanalEntry.Column column : columns1) {
                        map1.put(camelName(column.getName()), column.getValue());
                    }
                    log.info("---------更新之前map={}----------", map1);
                });
                operation = "更新";
                break;
            case DELETE:
                rowChage.getRowDatasList().forEach(rowData -> {
                    List<CanalEntry.Column> columns = rowData.getBeforeColumnsList();
                    for (CanalEntry.Column column : columns) {
                        map.put(camelName(column.getName()), column.getValue());
                    }
                });
                operation = "删除";
                break;
            default:
                break;
        }
        log.info("---------操作:{},数据={}----------", operation, map);
    }

    /**
     * 将下划线大写方式命名的字符串转换为驼峰式。如果转换前的下划线大写方式命名的字符串为空,则返回空字符串。</br>
     * 例如:HELLO_WORLD->HelloWorld
     *
     * @param name 转换前的下划线大写方式命名的字符串
     * @return 转换后的驼峰式命名的字符串
     */
    public static String camelName(String name) {
        StringBuilder result = new StringBuilder();
        // 快速检查
        if (name == null || name.isEmpty()) {
            // 没必要转换
            return "";
        } else if (!name.contains("_")) {
            // 不含下划线,仅将首字母小写
            return name.substring(0, 1).toLowerCase() + name.substring(1);
        }
        // 用下划线将原始字符串分割
        String camels[] = name.split("_");
        for (String camel : camels) {
            // 跳过原始字符串中开头、结尾的下换线或双重下划线
            if (camel.isEmpty()) {
                continue;
            }
            // 处理真正的驼峰片段
            if (result.length() == 0) {
                // 第一个驼峰片段,全部字母都小写
                result.append(camel.toLowerCase());
            } else {
                // 其他的驼峰片段,首字母大写
                result.append(camel.substring(0, 1).toUpperCase());
                result.append(camel.substring(1).toLowerCase());
            }
        }
        return result.toString();
    }

}

3.adaptor监控同步

bootstrap.yml配置文件中的配置屏蔽

# canal:
#   manager:
#     jdbc:
#      url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
#      username: root
#      password: 6231989zh

application.yml配置文件修改

canal.conf:
  mode: tcp #tcp kafka rocketMQ rabbitMQ
  consumerProperties:
    # canal tcp consumer
    # 配置需要订阅的地址,也就是deployer的地址
    canal.tcp.server.host: 127.0.0.1:11111
    canal.tcp.zookeeper.hosts:
    canal.tcp.batch.size: 500
    canal.tcp.username:
    canal.tcp.password:
  # 配置数据源的地址
  srcDataSources:
    defaultDS:
      url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
      username: root
      password: 6231989zh
  canalAdapters:
  - instance: eternity # cannl实例的名称或者mq的topic名称
    groups:
    - groupId: g1
      outerAdapters:
      - name: logger
      - name: es7 # 实例的名字,es的话,es6或者es7
        hosts: 127.0.0.1:9300 # es的地址,前面不需要http://
        properties:
          mode: transport # or rest
          # security.auth: test:123456 #  only used for rest mode
          cluster.name: elasticsearch # 配置的es的集群名称,可以通过http://localhost:9200/查看

es7实例,修改配置文件,找到里面的配置文件,拷贝一份,更改为eternity_user.yml

dataSourceKey: defaultDS # 对应的是application.yml中的srcDataSources下面的数据源名称
destination: eternity # 对应的是deployer中实例的名称
groupId: g1
esMapping:
  _index: user # es中索引的名称
  _id: _id # es中_id的值取自mysql数据库表中user的uid
#  upsert: true
#  pk: id
  sql: "SELECT
         u.uid as _id,
         u.user_name,
         u.password,
         u.real_name,
         u.sflag,
         u.lock,
         u.create_date,
         ur.rid
        FROM
         user u left join user_role ur on u.uid = ur.uid " # 此sql是为了同步数据使用,cannl只会同步增量数据,全量和增量数据需要使用此sql导入
#  objFields:
#    _labels: array:;
  etlCondition: "where u.uid>={}" # 此条件是配合上面的sql使用,全量数据导入的时候加上限定条件,指定需要导入数据的范围,条件可以改变
  commitBatch: 3000

全量数据导入

http://localhost:8081/etl/es7/eternity_user.yml

二、配置deployer消息投递到rabbitmq,java和adaptor直接订阅rabbitmq的queue

0.前提配置

  • exchange:canal.exchange
  • queue:canal.queue
  • key:canal.routing.key

1.deployer的配置,cannl.properties

cannl.properties

#################################################
######### 		common argument		#############
#################################################
# tcp, kafka, rocketMQ, rabbitMQ, pulsarMQ
canal.serverMode = rabbitMQ
#################################################
######### 		destinations		#############
#################################################
# cannl监控的是eternity的实例,对应的是conf文件夹下面的eternity文件夹
canal.destinations = eternity
# conf root dir。eternity实例的配置文件从../conf找
canal.conf.dir = ../conf
##################################################
######### 		    RabbitMQ	     #############
##################################################
rabbitmq.host = 127.0.0.1
rabbitmq.virtual.host = /
rabbitmq.exchange =canal.exchange
rabbitmq.username = canal
rabbitmq.password = canal
rabbitmq.deliveryMode = 

实例配置,instance.properties

已经有现成的实例example,把example文件夹拷贝一份,更改为eternity,此时就对应上了上面配置文件中配置的实例eternity

#################################################
## mysql serverId , v1.0.26+ will autoGen
# canal.instance.mysql.slaveId=0
# position info
# 配置需要监控的数据地址
canal.instance.master.address=localhost:3306

# username/password
# 配置需要监控数据库的帐号和密码
canal.instance.dbUsername=root
canal.instance.dbPassword=6231989zh

# table regex
# 配置需要监控的表,此时可以设置某张特定的表或者全部的表,使用.*\\..*会报错
canal.instance.filter.regex=.*\..*
# mq config,此时配置的是rabbitmq的key
canal.mq.topic=canal.routing.key

2.adaptor监控同步

bootstrap.yml配置文件中的配置屏蔽

# canal:
#   manager:
#     jdbc:
#      url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
#      username: root
#      password: 6231989zh

application.yml配置文件修改

canal.conf:
  mode: rabbitMQ #tcp kafka rocketMQ rabbitMQ
  consumerProperties:
    # canal tcp consumer
    # 配置需要订阅的地址,也就是deployer的地址
    canal.tcp.server.host: 127.0.0.1:11111
    canal.tcp.zookeeper.hosts:
    canal.tcp.batch.size: 500
    canal.tcp.username:
    canal.tcp.password:
  # 配置数据源的地址
  srcDataSources:
    defaultDS:
      url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
      username: root
      password: 6231989zh
  canalAdapters:
  - instance: canal.queue # cannl实例的名称或者rabbitmq的queue名称
    groups:
    - groupId: g1
      outerAdapters:
      - name: logger
      - name: es7 # 实例的名字,es的话,es6或者es7
        hosts: 127.0.0.1:9300 # es的地址,前面不需要http://
        properties:
          mode: transport # or rest
          # security.auth: test:123456 #  only used for rest mode
          cluster.name: elasticsearch # 配置的es的集群名称,可以通过http://localhost:9200/查看

es7实例,修改配置文件,找到里面的配置文件,拷贝一份,更改为eternity_user.yml

dataSourceKey: defaultDS # 对应的是application.yml中的srcDataSources下面的数据源名称
destination: canal.queue # 对应的是rabbitmq的queue名称
groupId: g1
esMapping:
  _index: user # es中索引的名称
  _id: _id # es中_id的值取自mysql数据库表中user的uid
#  upsert: true
#  pk: id
  sql: "SELECT
         u.uid as _id,
         u.user_name,
         u.password,
         u.real_name,
         u.sflag,
         u.lock,
         u.create_date,
         ur.rid
        FROM
         user u left join user_role ur on u.uid = ur.uid " # 此sql是为了同步数据使用,cannl只会同步增量数据,全量和增量数据需要使用此sql导入
#  objFields:
#    _labels: array:;
  etlCondition: "where u.uid>={}" # 此条件是配合上面的sql使用,全量数据导入的时候加上限定条件,指定需要导入数据的范围,条件可以改变
  commitBatch: 3000

三、配置deployer消息投递到kafka,adaptor直接订阅kafka的topic

0.前提条件

  • topic:canal_kafka_test

1.deployer的配置,cannl.properties

cannl.properties

#################################################
######### 		common argument		#############
#################################################
# tcp, kafka, rocketMQ, rabbitMQ, pulsarMQ
canal.serverMode = kafka
#################################################
######### 		destinations		#############
#################################################
# cannl监控的是eternity的实例,对应的是conf文件夹下面的eternity文件夹
canal.destinations = eternity
# conf root dir。eternity实例的配置文件从../conf找
canal.conf.dir = ../conf
##################################################
######### 		     Kafka 		     #############
##################################################
kafka.bootstrap.servers = 192.168.3.12:9092,192.168.3.12:9093,192.168.3.12:9094
kafka.acks = all
kafka.compression.type = none
kafka.batch.size = 16384
kafka.linger.ms = 1
kafka.max.request.size = 1048576
kafka.buffer.memory = 33554432
kafka.max.in.flight.requests.per.connection = 1
kafka.retries = 0

kafka.kerberos.enable = false
kafka.kerberos.krb5.file = "../conf/kerberos/krb5.conf"
kafka.kerberos.jaas.file = "../conf/kerberos/jaas.conf"

实例配置,instance.properties

已经有现成的实例example,把example文件夹拷贝一份,更改为eternity,此时就对应上了上面配置文件中配置的实例eternity

#################################################
## mysql serverId , v1.0.26+ will autoGen
# canal.instance.mysql.slaveId=0
# position info
# 配置需要监控的数据地址
canal.instance.master.address=localhost:3306

# username/password
# 配置需要监控数据库的帐号和密码
canal.instance.dbUsername=root
canal.instance.dbPassword=6231989zh

# table regex
# 配置需要监控的表,此时可以设置某张特定的表或者全部的表,使用.*\\..*会报错
canal.instance.filter.regex=.*\..*
# mq config,此时配置的是kafka的topic
canal.mq.topic=canal_kafka_test

2.adaptor监控同步

bootstrap.yml配置文件中的配置屏蔽

# canal:
#   manager:
#     jdbc:
#      url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
#      username: root
#      password: 6231989zh

application.yml配置文件修改

canal.conf:
  mode: kafka #tcp kafka rocketMQ rabbitMQ
  consumerProperties:
    # canal tcp consumer
    # 配置需要订阅的地址,也就是deployer的地址
    canal.tcp.server.host: 127.0.0.1:11111
    canal.tcp.zookeeper.hosts:
    canal.tcp.batch.size: 500
    canal.tcp.username:
    canal.tcp.password:
  # 配置数据源的地址
  srcDataSources:
    defaultDS:
      url: jdbc:mysql://127.0.0.1:3306/apiboot?useUnicode=true
      username: root
      password: 6231989zh
  canalAdapters:
  - instance: canal_kafka_test # cannl实例的名称或者kafka的topic名称
    groups:
    - groupId: g1
      outerAdapters:
      - name: logger
      - name: es7 # 实例的名字,es的话,es6或者es7
        hosts: 127.0.0.1:9300 # es的地址,前面不需要http://
        properties:
          mode: transport # or rest
          # security.auth: test:123456 #  only used for rest mode
          cluster.name: elasticsearch # 配置的es的集群名称,可以通过http://localhost:9200/查看

es7实例,修改配置文件,找到里面的配置文件,拷贝一份,更改为eternity_user.yml

dataSourceKey: defaultDS # 对应的是application.yml中的srcDataSources下面的数据源名称
destination: canal_kafka_test # 对应的是kafka的topic名称
groupId: g1
esMapping:
  _index: user # es中索引的名称
  _id: _id # es中_id的值取自mysql数据库表中user的uid
#  upsert: true
#  pk: id
  sql: "SELECT
         u.uid as _id,
         u.user_name,
         u.password,
         u.real_name,
         u.sflag,
         u.lock,
         u.create_date,
         ur.rid
        FROM
         user u left join user_role ur on u.uid = ur.uid " # 此sql是为了同步数据使用,cannl只会同步增量数据,全量和增量数据需要使用此sql导入
#  objFields:
#    _labels: array:;
  etlCondition: "where u.uid>={}" # 此条件是配合上面的sql使用,全量数据导入的时候加上限定条件,指定需要导入数据的范围,条件可以改变
  commitBatch: 3000

posted @ 2023-01-10 14:09  未月廿三  阅读(128)  评论(0编辑  收藏  举报