LogStash 同步 MySQL 数据到 Elasticsearch
版本
LogStash 版本:7.6.2
Elasticsearch版本:7.6.2
kibana版本:7.6.2
操作系统:Windows 10
配置
本地默认配置启动,不需要关心其他配置,只是修改了jvm.options配置文件,把内存调小了一点
在logstash目录下建了一个connector的文件夹存MySQL的连接jar包,记得把jar包放进去
创建MySQL的脚本,在logstash目录下建了一个script的文件夹存放脚本
一次性同步数据脚本
input {
jdbc {
# MySQL jar包的位置
jdbc_driver_library => "D:/Serves/logstash-7.6.2/connector/mysql-connector-java-8.0.19.jar"
# MySQL 驱动类,我用的是MySQL8
jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
# MySQL 的jdbc地址
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/solo?useUnicode=true&characterEncoding=utf-8&serverTimezone=GMT%2B8"
# MySQL 用户名,该用户要开启远程访问权限
jdbc_user => "root"
# MySQL 密码
jdbc_password => "Root.123"
# 要执行的sql
statement => "SELECT * from TEST"
}
}
output {
elasticsearch {
#elasticsearch集群地址,不用列出所有节点,默认端口号也可省略
hosts => [ "localhost:9200" ]
#索引值,查询的时候会用到;需要先在elasticsearch中创建对应的mapping,也可以采用默认的mapping
index => "role"
# 类型
document_type => "_doc"
#指定插入elasticsearch文档ID,对应input中sql字段id
document_id => "%{id}"
}
stdout {
# JSON格式输出,配置该项,在命令行界面打印sql执行的日志
codec => json_lines
}
}
Kibana查看索引
GET _cat/indices
yellow open megacorp b75hLxe3RBauFMWKnltqbg 1 1 3 1 13.2kb 13.2kb
green open .kibana_task_manager_1 zjA3t6CWRTWE0VOPQeYHYw 1 0 2 2 56.7kb 56.7kb
green open .apm-agent-configuration 6RmJvSAPRE6xuLX1GUERKg 1 0 0 0 283b 283b
green open .kibana_1 mBaMHmjzSBSnwlEMTMm57w 1 0 32 13 64.9kb 64.9kb
同步数据
启动Logstash,把MySQL的数据同步到Elasticsearch
D:\Serves\logstash-7.6.2
λ bin\logstash -f .\script\mysql-test.conf
#打印日志
Java HotSpot(TM) 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release.
......
[2020-04-30T09:36:23,488][INFO ][logstash.inputs.jdbc ][main] (0.049250s) SELECT * from TEST
{"name":"lisi","@timestamp":"2020-04-30T01:36:23.566Z","pwd":"123456","id":"2","@version":"1"}
{"name":"wangwu","@timestamp":"2020-04-30T01:36:23.567Z","pwd":"123456","id":"3","@version":"1"}
{"name":"zhangsan","@timestamp":"2020-04-30T01:36:23.556Z","pwd":"123456","id":"1","@version":"1"}
[2020-04-30T09:36:25,230][INFO ][logstash.runner ] Logstash shut down.
Kibana查看同步结果
GET _cat/indices
#发现多了一个索引 role
yellow open megacorp b75hLxe3RBauFMWKnltqbg 1 1 3 1 13.2kb 13.2kb
yellow open role olBw8F1LTWmrBBmKAYbmmg 1 1 3 0 15.8kb 15.8kb
green open .kibana_task_manager_1 zjA3t6CWRTWE0VOPQeYHYw 1 0 2 0 31.6kb 31.6kb
green open .apm-agent-configuration 6RmJvSAPRE6xuLX1GUERKg 1 0 0 0 283b 283b
green open .kibana_1 mBaMHmjzSBSnwlEMTMm57w 1 0 32 15 42.9kb 42.9kb
#查看索引数据
GET role/_search
{
"query": {
"match_all": {}
}
}
#结果
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "role",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"name" : "wangwu",
"@timestamp" : "2020-04-30T01:36:23.567Z",
"pwd" : "123456",
"id" : "3",
"@version" : "1"
}
},
{
"_index" : "role",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "zhangsan",
"@timestamp" : "2020-04-30T01:36:23.556Z",
"pwd" : "123456",
"id" : "1",
"@version" : "1"
}
},
{
"_index" : "role",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"name" : "lisi",
"@timestamp" : "2020-04-30T01:36:23.566Z",
"pwd" : "123456",
"id" : "2",
"@version" : "1"
}
}
]
}
}
附件-增量循环脚本
如果出现存储sql_last_value的txt文件不存储每次的id的情况,请检查数据库的id字段是不是number类型,目前追踪字段的类型,目前只有数字(numeric)和时间类型(timestamp),默认是数字类型。
input {
jdbc {
# MySQL jar包的位置
jdbc_driver_library => "D:/Serves/logstash-7.6.2/connector/mysql-connector-java-8.0.19.jar"
# MySQL 驱动类,我用的是MySQL8
jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
# MySQL 的jdbc地址
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/solo?useUnicode=true&characterEncoding=utf-8&serverTimezone=GMT%2B8"
# MySQL 用户名,该用户要开启远程访问权限
jdbc_user => "root"
# MySQL 密码
jdbc_password => "Root.123"
#连接池引发PoolTimeoutError之前等待获取连接的秒数
jdbc_pool_timeout => 5
#分页-开启
jdbc_paging_enabled => true
#分页大小
jdbc_page_size => 10000
#处理中文乱码问题
codec => plain { charset => "UTF-8"}
#使用其它字段追踪,而不是用时间
use_column_value => true
#追踪的字段
tracking_column => "id"
#追踪字段的类型,目前只有数字(numeric)和时间类型(timestamp),默认是数字类型
tracking_column_type => "numeric"
#记录最后一次运行的结果,用于增量更新
record_last_run => true
#上一个sql_last_value值的存放文件路径, 必须要在文件中指定字段的初始值
last_run_metadata_path => "D:/Serves/logstash-7.6.2/script/last_value/increase.txt"
#强制使用标识符字段的小写
lowercase_column_names => true
#定时字段,各字段含义(由左至右)分、时、天、月、年,一分钟更新一次,类似Linux的crontab
schedule => "* * * * *"
# 要执行的sql
statement => "SELECT * from TEST where id > :sql_last_value"
# 每次运行是否清除
clean_run => false
}
}
filter {
json {
source => "message"
remove_field => ["message"]
}
#处理时间少8个小时问题,抄袭的
ruby {
code => "event.set('timestamp', event.get('@timestamp').time.localtime + 8*60*60)"
}
ruby {
code => "event.set('@timestamp',event.get('timestamp'))"
}
mutate {
remove_field => ["timestamp"]
}
}
output {
elasticsearch {
#elasticsearch集群地址,不用列出所有节点,默认端口号也可省略
hosts => [ "localhost:9200" ]
#索引值,查询的时候会用到;需要先在elasticsearch中创建对应的mapping,也可以采用默认的mapping
index => "role"
# 类型
document_type => "_doc"
#指定插入elasticsearch文档ID,对应input中sql字段id
document_id => "%{id}"
}
stdout {
# JSON格式输出,配置该项,在命令行界面打印sql执行的日志
codec => json_lines
}
}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步