Ubuntu部署Canal同步Mysql到ES

下载安装包7.16.3

mysql:https://mirrors.tuna.tsinghua.edu.cn/mysql/yum/mysql-8.0-community-el7-x86_64/

es:https://mirrors.tuna.tsinghua.edu.cn/elasticstack/yum/elastic-7.x/7.16.3/

kibana:https://mirrors.tuna.tsinghua.edu.cn/elasticstack/yum/elastic-7.x/7.16.3/

elasticsearch下载地址:https://www.elastic.co/cn/downloads/past-releases#elasticsearch
kibana下载地址:https://www.elastic.co/cn/downloads/past-releases#kibana
canal下载地址:https://github.com/alibaba/canal/releases

参考文档:https://blog.csdn.net/zh1998wx/article/details/123101442?spm=1001.2101.3001.6650.1&utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7Edefault-1-123101442-blog-104116866.pc_relevant_aa&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7Edefault-1-123101442-blog-104116866.pc_relevant_aa&utm_relevant_index=2

安装es

es配置文件

root@es:/es7# grep -v ^# app/es7/config/elasticsearch.yml
cluster.name: es7-test
node.name: node-1
path.data: /es7
path.logs: /es7/logs
bootstrap.memory_lock: true
network.host: 0.0.0.0
discovery.seed_hosts: ["172.25.0.129"]
cluster.initial_master_nodes: ["172.25.0.129"]
#在文件中末尾处添加下面参数,开启x-pack验证
xpack.security.enabled: true
xpack.license.self_generated.type: basic
xpack.security.transport.ssl.enabled: true
#关闭geoip数据库的更新,要不然启动es会报错
ingest.geoip.downloader.enabled: false

设置 elasticsearch 账号密码

cd app/es7/bin/
./elasticsearch-setup-passwords interactive

 安装ik分词器

下载地址:Releases · medcl/elasticsearch-analysis-ik · GitHub

root@es:/es7/app/es7/plugins/ik# ll
total 1440
drwxr-xr-x 3 es es   4096 Jun 19 18:22 ./
drwxr-xr-x 3 es es   4096 Jun 19 18:12 ../
-rw-r--r-- 1 es es 263965 Jan 18 18:46 commons-codec-1.9.jar
-rw-r--r-- 1 es es  61829 Jan 18 18:46 commons-logging-1.2.jar
drwxr-xr-x 2 es es   4096 Jan 18 18:45 config/
-rw-r--r-- 1 es es  54596 Jan 19 11:54 elasticsearch-analysis-ik-7.16.3.jar
-rw-r--r-- 1 es es 736658 Jan 18 18:46 httpclient-4.5.2.jar
-rw-r--r-- 1 es es 326724 Jan 18 18:46 httpcore-4.4.4.jar
-rw-r--r-- 1 es es   1807 Jan 19 11:54 plugin-descriptor.properties
-rw-r--r-- 1 es es    125 Jan 19 11:54 plugin-security.policy

 

启动

root@es:/opt/canal/admin/bin# netstat -ntpl
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address           Foreign Address         State       PID/Program name
tcp        0      0 127.0.0.53:53           0.0.0.0:*               LISTEN      1139/systemd-resolv
tcp        0      0 0.0.0.0:22              0.0.0.0:*               LISTEN      1234/sshd: /usr/sbi
tcp        0      0 0.0.0.0:8089            0.0.0.0:*               LISTEN      7899/java
tcp        0      0 127.0.0.1:6010          0.0.0.0:*               LISTEN      2644/sshd: root@pts
tcp        0      0 127.0.0.1:6011          0.0.0.0:*               LISTEN      2996/sshd: root@pts
tcp        0      0 0.0.0.0:5601            0.0.0.0:*               LISTEN      1178/node
tcp        0      0 0.0.0.0:11110           0.0.0.0:*               LISTEN      7792/java
tcp        0      0 0.0.0.0:11111           0.0.0.0:*               LISTEN      7792/java
tcp        0      0 0.0.0.0:11112           0.0.0.0:*               LISTEN      7792/java
tcp        0      0 0.0.0.0:8081            0.0.0.0:*               LISTEN      7702/java
tcp6       0      0 :::9300                 :::*                    LISTEN      4386/java
tcp6       0      0 :::22                   :::*                    LISTEN      1234/sshd: /usr/sbi
tcp6       0      0 ::1:6010                :::*                    LISTEN      2644/sshd: root@pts
tcp6       0      0 ::1:6011                :::*                    LISTEN      2996/sshd: root@pts
tcp6       0      0 :::9000                 :::*                    LISTEN      7446/java
tcp6       0      0 :::9200                 :::*                    LISTEN      4386/java

安装kibana

#打开kibana端口:5601
server.port: 5601
#设置kibana服务host:127.0.0.1
server.host: "127.0.0.1"
#配置kibana连接es的地址和端口
elasticsearch.hosts: ["http://127.0.0.1:9200"]
#配置kibana连接es的验证用户和密码
elasticsearch.username: "elastic"
elasticsearch.password: "123456"
#设置中文编码
i18n.locale: "zh-CN"
#在末尾处添加以下属性,开启es对kibana的xpack认证
xpack.monitoring.ui.container.elasticsearch.enabled: true

下载和安装canal

 canal-servercanal-adaptercanal-admin

配置MySQL

[mysqld]
#设置serveri_id
server_id=1
#开启二进制日志功能
log-bin=mall-mysql-bin 
#设置使用的二进制日志格式(mixed,statement,row)
binlog_format=row 

拥有从库权限的账号,用于订阅binlog,这里创建的账号为canal

mysql> create user canal identified by 'passwd';
mysql> grant all on *.* to 'canal'@'%';
mysql> flush privtleges;

创建一个用于测试的数据库:canal

mysql> CREATE DATABASE canal;
mysql> CREATE DATABASE IF NOT EXISTS canal default charset utf8 COLLATE utf8_general_ci;

然后创建一张数据库表 product

mysql> use canal;
mysql> CREATE TABLE `product`  (
  `id` bigint(20) NOT NULL AUTO_INCREMENT,
  `title` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
  `sub_title` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
  `price` decimal(10, 2) NULL DEFAULT NULL,
  `pic` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
  PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;

配置 canal-server (canal-deploy)
canal-server(canal-deploy):可以直接监听MySQL的binlog,把自己伪装成MySQL的从库,只负责接收数据,并不做处理。

进入 conf/example 路径,修改配置文件 instance.properties,主要是修改数据库相关配置

root@es:/opt/canal/deployer/conf/example# grep -v ^#  insance.properties

canal.instance.gtidon=false

canal.instance.master.address=172.25.0.128:3306
canal.instance.master.journal.name=
canal.instance.master.position=
canal.instance.master.timestamp=
canal.instance.master.gtid=

canal.instance.rds.accesskey=
canal.instance.rds.secretkey=
canal.instance.rds.instanceId=

canal.instance.tsdb.enable=true


canal.instance.dbUsername=canal
canal.instance.dbPassword=passwd
canal.instance.connectionCharset = UTF-8
canal.instance.enableDruid=false

canal.instance.filter.regex=.*\\..*
canal.instance.filter.black.regex=mysql\\.slave_.*

canal.mq.topic=example
canal.mq.partition=0

配置 canal-adapter

修改配置文件conf/application.ym

root@es:/opt/canal/adapter/conf# grep -v ^#  application.yml
server:
  port: 8081
spring:
  jackson:
    date-format: yyyy-MM-dd HH:mm:ss
    time-zone: GMT+8
    default-property-inclusion: non_null

canal.conf:
  mode: tcp #tcp kafka rocketMQ rabbitMQ
  flatMessage: true
  zookeeperHosts:
  syncBatchSize: 1000
  retries:
  timeout:
  accessKey:
  secretKey:
  consumerProperties:
    # canal tcp consumer
    canal.tcp.server.host: 127.0.0.1:11111
    canal.tcp.zookeeper.hosts:
    canal.tcp.batch.size: 500
    canal.tcp.username:
    canal.tcp.password:
    # kafka consumer
    kafka.bootstrap.servers: 127.0.0.1:9092
    kafka.enable.auto.commit: false
    kafka.auto.commit.interval.ms: 1000
    kafka.auto.offset.reset: latest
    kafka.request.timeout.ms: 40000
    kafka.session.timeout.ms: 30000
    kafka.isolation.level: read_committed
    kafka.max.poll.records: 1000
    # rocketMQ consumer
    rocketmq.namespace:
    rocketmq.namesrv.addr: 127.0.0.1:9876
    rocketmq.batch.size: 1000
    rocketmq.enable.message.trace: false
    rocketmq.customized.trace.topic:
    rocketmq.access.channel:
    rocketmq.subscribe.filter:
    # rabbitMQ consumer
    rabbitmq.host:
    rabbitmq.virtual.host:
    rabbitmq.username:
    rabbitmq.password:
    rabbitmq.resource.ownerId:

  srcDataSources:
     defaultDS:
       url: jdbc:mysql://172.25.0.128:3306/canal?useUnicode=true   # canal为授权的database名称
       username: canal
       password: passwd
  canalAdapters:
  - instance: example # canal instance Name or mq topic name
    groups:
    - groupId: g1
      outerAdapters:
      - name: logger
      - name: es7
        hosts: http://172.25.0.129:9200 # 127.0.0.1:9200 for rest mode
        properties:
            mode: rest # or rest
            security.auth: elastic:passwd #  only used for rest mode
            cluster.name: es7-test

额外

配置普通用户同步

创建账号
create user 'customer_test'@'%'  IDENTIFIED BY 'Read429..';
授权
GRANT INSERT,SELECT,REPLICATION SLAVE,REPLICATION CLIENT ON *.*  TO 'customer_test'@'%';
修改密码规则
ALTER USER 'customer_test'@'%' IDENTIFIED WITH mysql_native_password BY 'Read429..';
刷新
flush privileges;
密码尽量不使用@

修改 canal-adapter/conf/es7/mytest_user.yml 文件,用于配置MySQL中的表与Elasticsearch中索引的映射关系

root@es:/opt/canal/adapter/conf/es7# cat mytest_user.yml
dataSourceKey: defaultDS
destination: example
groupId: g1
esMapping:
  _index: mytest_user   # 索引要跟es创建的对应
  _id: _id
#  upsert: true
#  pk: id
#  sql: "select a.id as _id, a.name, a.role_id, b.role_name,
#        a.c_time from user a
#        left join role b on b.id=a.role_id"

  sql: "SELECT   # 查询sql
         p.id AS _id,
         p.title,
         p.sub_title,
         p.price,
         p.pic
        FROM
         product p"

#  objFields:
#    _labels: array:;
  etlCondition: "where a.c_time>={}"
  commitBatch: 3000

详解:

dataSourceKey: defaultDS
destination: comment
groupId: g1
esMapping:
_index: commentrepeatedsj
_id: comment_md5
upsert: true
pk: commentMd5
sql: "SELECT
comment_md5 AS commentMd5,
topic_msg_id AS topicMsgId,
create_time AS createTime
FROM
comment_repeated_sj_0"

objFields:
_labels: array:;
etlCondition: "where a.c_time>={}"
commitBatch: 300

这是一个数据同步任务的配置文件,相比之前的配置文件,增加了一些参数,下面逐个解释一下:

dataSourceKey: 数据源的标识,这里设置为 defaultDS,表示使用默认数据源。
destination: 目标数据的类型,这里设置为 comment,表示将数据同步到名为 comment 的目标数据源中。
groupId: 数据同步任务的分组标识,这里设置为 g1。
esMapping: Elasticsearch 索引的映射配置,包括索引名称 _index、主键名称 _id、操作类型 upsert、主键名称 pk、SQL 查询语句 sql 等。
_index: Elasticsearch 索引的名称,这里设置为 commentrepeatedsj。
_id: Elasticsearch 索引的主键名称,这里设置为 comment_md5,表示使用数据中的 comment_md5 字段作为主键。
upsert: 数据的操作类型,这里设置为 true,表示进行 upsert 操作,即如果数据已存在则更新,否则插入。
pk: 数据源中的主键名称,这里设置为 commentMd5,表示将数据源中的 comment_md5 字段与 Elasticsearch 索引中的 _id 字段进行映射。
sql: 数据源的 SQL 查询语句,用于从源数据源中获取数据。
etlCondition: 数据的过滤条件,这里表示只同步 c_time 大于等于某个值的数据。
commitBatch: 数据同步的批次大小,这里设置为 300。
#objFields: 对象字段的映射配置,这里是一个注释,表示不进行对象字段映射。如果需要进行对象字段映射,可以将注释去掉,并设置对应的映射关系。

总的来说,这个配置文件定义了一个数据同步任务,将源数据源中的数据同步到 Elasticsearch 中,其中包括了数据源的标识、目标数据类型、分组标识、Elasticsearch 索引的映射配置、数据过滤条件和同步批次等信息,同时还指定了主键映射关系和进行 upsert 操作。

额外需求
需要同步id与_id,同时字段和mysql内置字段冲突
dataSourceKey: defaultDS
destination: example_1222
groupId: g1
esMapping:   # 字段映射
  _index: hotsearchdata
  id: id   # 字段映射  
  _id: _id  # 字段映射
  upsert: true
  pk: id
  sql: "select
        id AS id,  # 字段映射
        id AS _id,   # 字段映射
        `key`,
        title,
        author,
        author_id AS authorId,
        source_url AS sourceUrl,
        create_time AS createTime,
        publish_time AS publishTime,
        update_time AS updateTime,
        content,
        cover_img AS coverImg,
        video_url AS videoUrl,
        source,
        comment_count AS commentCount,
        transmit_count AS transmitCount,
        like_count AS likeCount,
        share_count AS shareCount,
        read_count AS readCount
  from
    hot_search_data"
#  objFields:
#    _labels: array:;
  etlCondition: "where a.id>={}"
  commitBatch: 3000

 

报错:

ERROR Config: comment_repeated_sj_0.yml esMapping._id or esMapping.pk
这个错误提示是在执行数据同步任务时出现的,它表示配置文件中缺少 esMapping._id 或 esMapping.pk 字段,导致无法确定主键字段,从而无法进行数据同步操作。

在 Elasticsearch 中,每个文档都必须有一个唯一的标识符,称为 _id。在数据同步任务中,需要将源数据源中的某个字段映射到 Elasticsearch 索引中的 _id 字段,以确保每个文档有一个唯一的标识符。

通常情况下,可以在配置文件中将 esMapping._id 或 esMapping.pk 字段设置为源数据源中的某个唯一字段,例如数据库表中的主键字段。如果配置文件中没有设置这两个字段,Elasticsearch 将无法确定文档的唯一标识符,从而无法进行数据同步操作。

因此,您需要检查配置文件中是否已经设置了 esMapping._id 或 esMapping.pk 字段,并确保这些字段映射到了源数据源中的某个唯一字段。如果配置文件中确实没有设置这些字段,则需要添加它们并设置正确的映射关系,以便数据同步任务能够正常执行。

配置canal-admin

 导入sql,将canal_manager.sql导入到myaql中
source /root/canal_manager.sql

 修改配置文件conf/application.yml,按如下配置即可,主要是修改数据源配置和canal-admin的管理账号配置,注意需要用一个有读写权限的数据库账号,比如管理账号root:root

root@es:/opt/canal/admin/conf# grep -v ^#  application.yml
server:
  port: 8089
spring:
  jackson:
    date-format: yyyy-MM-dd HH:mm:ss
    time-zone: GMT+8

spring.datasource:
  address: 172.25.0.128:3306
  database: canal_manager
  username: canal
  password: passwd
  driver-class-name: com.mysql.jdbc.Driver
  url: jdbc:mysql://${spring.datasource.address}/${spring.datasource.database}?useUnicode=true&characterEncoding=UTF-8&useSSL=false
  hikari:
    maximum-pool-size: 30
    minimum-idle: 1

canal:
  adminUser: admin
  adminPasswd: admin

测试集群健康状态和ik分词器

root@es:/es7/app/es7/plugins/ik# curl -k -u elastic:密码 -XGET http://192.168.20.133:9200/_cluster/health?pretty -i
HTTP/1.1 200 OK
X-elastic-product: Elasticsearch
content-type: application/json; charset=UTF-8
content-length: 463

{
  "cluster_name" : "es7-test",
  "status" : "green",
  "timed_out" : false,
  "number_of_nodes" : 1,
  "number_of_data_nodes" : 1,
  "active_primary_shards" : 20,
  "active_shards" : 20,
  "relocating_shards" : 0,
  "initializing_shards" : 0,
  "unassigned_shards" : 0,
  "delayed_unassigned_shards" : 0,
  "number_of_pending_tasks" : 0,
  "number_of_in_flight_fetch" : 0,
  "task_max_waiting_in_queue_millis" : 0,
  "active_shards_percent_as_number" : 100.0
}

ik

root@es:/es7/app/es7/plugins/ik# curl -k -u elastic:密码 -XGET http://192.168.20.133:9200/_analyze -i  -H 'Content-Type: application/json' -d '{"text":"这个月工资减半","tokenizer": "ik_max_word"}'
HTTP/1.1 200 OK
X-elastic-product: Elasticsearch
content-type: application/json; charset=UTF-8
content-length: 501

{"tokens":[{"token":"这个","start_offset":0,"end_offset":2,"type":"CN_WORD","position":0},{"token":"个月","start_offset":1,"end_offset":3,"type":"CN_WORD","position":1},{"token":"月工资","start_offset":2,"end_offset":5,"type":"CN_WORD","position":2},{"token":"月工","start_offset":2,"end_offset":4,"type":"CN_WORD","position":3},{"token":"工资","start_offset":3,"end_offset":5,"type":"CN_WORD","position":4},{"token":"减半","start_offset":5,"end_offset":7,"type":"CN_WORD","position":5}]}r

创建es索引

PUT canal_product

{
    "settings":{
        "number_of_shards":10,
        "number_of_replicas":0,
        "analysis":{
            "analyzer":{
                "ik":{
                    "tokenizer":"ik_max_word"
                }
            }
        }
    },
    "mappings":{
        "properties":{
            "_id":{
                "type":"integer"
            },
            "title":{
                "type":"text"
            },
            "sub_title":{
                "type":"text"
            },
            "price":{
                "type":"integer"
            },
            "pic":{
                "type":"text"
            }
        }
    }
}

在MySQL数据库中创建数据

# 之后使用如下SQL语句在数据库中创建一条记录 
INSERT INTO product ( id, title, sub_title, price, pic ) VALUES ( 5, '小米8', ' 全面屏游戏智能手机 6GB+64GB', 1999.00, NULL );

 查询

GET canal_product/_search
root@es:/opt/canal/adapter/bin# curl -X POST localhost:8081/etl/es7/mytest_user.yml
{"succeeded":true,"resultMessage":"导入ES 数据:5 条"}

 

posted @ 2022-06-21 09:46  不会跳舞的胖子  阅读(215)  评论(0编辑  收藏  举报