elasticsearch通过logstash数据迁移
问题描述
自建es,数据迁移到aws上es
问题解决
对每个索引进行同步,写入到aws上es
env
- centos7.x es (slef building)
- aws es
step1: check index && version
curl -s -u xxx:'yyy' https://xxxxx:9200 //view version
curl -s -u xxx:'yyy' https://xxxxx:9200/_cat/indices?h=index
step2: logstash
01、openjdk
#aws vpc 内准备一台虚机
wget https://download.java.net/java/GA/jdk11/9/GPL/openjdk-11.0.2_linux-x64_bin.tar.gz
export JAVA_HOME=/tmp/reindex/jdk-11.0.2
export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
export PATH=$JAVA_HOME/bin:$PATH
https://artifacts.elastic.co/downloads/logstash/logstash-7.4.2.tar.gz #迁移对logstash的版本没有太大的要求一般与主版本相同即可 7.x = 7.x
02、logstash conf
1.tmpl
input {
elasticsearch {
hosts => ["xxx:9200"]
index => "${INDEX}"
size =>5000
scroll =>"50m"
docinfo => true
}
}
filter {
}
output {
elasticsearch {
hosts => ["xxxx-1.es.amazonaws.com:443"]
ssl => true
user => "xxx"
password => "xxxx"
pool_max => 5000
pool_max_per_route =>500
index => "%{[@metadata][_index]}_fix" #新建的索引加上_fix或者跟原索引相同去掉_fix
document_type => "%{[@metadata][_type]}"
document_id => "%{[@metadata][_id]}"
ilm_enabled => false
}
}
03、根据索引执行迁移数据
export INDEX="ui_click" && envsubst < ./1.tmpl >1.conf
./bin/logstash -f 1.conf -w 50 -b 5000 -u 120
step3: validify data
get /login/_count #数据量
get /login/_search #数据
get /login/_search #范围查询
{
"size": 10,
"query":
{
"range": {
"time": { #field time in login
"gte": "1592409600000", #ms
"lte": "1591200000000"
}
}
}
}
get /login/_search?sort=time:desc&size=1 #字段排序降序time