使用logstash迁移elasticsearch(实时同步)

环境：

原elasticsearch版本:6.5.0

目的elasticsearch 版本:7.4.0

1.下载logstash

我这里下载的是6.8.5版本

https://artifacts.elastic.co/downloads/logstash/logstash-6.8.5.tar.gz

2.上传到目标服务器进行解压

我这里logstash是部署在目标服务器,可以根据各自的情况进行部署具体的服务器

在root账号下处理

[root@localhost soft]# tar -xvf logstash-6.8.5.tar.gz
[root@localhost soft]# mv logstash-6.8.5 /opt/

3.迁移单个index

添加配置文件,文件内容如下:

[root@localhost config]# cd /opt/logstash-6.8.5/config

配置文件1：

[root@localhost config]# more sync_single_index.conf
input {
    elasticsearch {
        hosts => ["http://192.168.1.136:19200"]
        index => "index_test"
        size => 1000
        scroll => "1m"
        docinfo => true
    }
}
# 该部分被注释，表示filter是可选的
filter {
  mutate {
    remove_field => ["@timestamp", "@version"]  #过滤掉logstash 自己加上的字段
  }
}

output {
    elasticsearch {
        hosts => ["http://192.168.1.118:9200"]
        user => "elastic"
        password => "elastic"
        index => "index_test"
    }
}

配置文件2也可以的:

[root@localhost config]# more sync_single_index.conf 
input {
    elasticsearch {
        hosts => ["http://192.168.1.108:19200"]
        index => "app_message_all"
        user => "elastic"
        password => "elastic"
        size => 1000
        scroll => "1m"
        docinfo => true
    }
}
# 该部分被注释，表示filter是可选的
filter {
  mutate {
    remove_field => ["@timestamp", "@version"]  #过滤掉logstash 自己加上的字段
  }
}

output {
    elasticsearch {
        hosts => ["http://192.168.1.109:19200"]
        user => "elastic"
        password => "elastic"
        index => "%{[@metadata][_index]}"
        document_type => "%{[@metadata][_type]}"
        document_id => "%{[@metadata][_id]}"
    }
}

执行如下脚本进行迁移

/opt/logstash-6.8.5/bin/logstash -f /opt/logstash-6.8.5/config/sync_single_index.conf

可以编写shell脚本后台执行
vi run_sync_single_index.sh

#!/bin/bash
/opt/logstash-6.8.5/bin/logstash -f /opt/logstash-6.8.5/config/sync_single_index.conf

后台执行
nohup ./run_sync_single_index.sh > run_sync_single_index.out 2>&1 &

说明:若不加上如下过滤项的话，那么在新的index中会添加相应的字段

filter {
  mutate {
    remove_field => ["@timestamp", "@version"]  #过滤掉logstash自己加上的字段，源index是没有的
  }
}

新index新增的字段

[root@localhost ~]# curl -u elastic:elastic -H "Content-Type: application/json" -XGET "http://192.168.1.109:19200/app_message_all_nofilter/_mappings?pretty=true"
{
  "app_message_all_nofilter" : {
    "mappings" : {
      "doc" : {
        "properties" : {
          "@timestamp" : {
            "type" : "date"
          },
          "@version" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          },

4.迁移所有的index

配置文件内容如下:

[root@localhost config]# more sync_all_index.conf
input {
    elasticsearch {
        hosts => ["http://192.168.1.108:19200"]
        index => "*"
        user => "elastic"
        password => "elastic"
        size => 1000
        scroll => "1m"
        docinfo => true
    }
}
# 该部分被注释，表示filter是可选的
filter {
  mutate {
    remove_field => ["@timestamp", "@version"]  #过滤掉logstash 自己加上的字段
  }
}

output {
    elasticsearch {
        hosts => ["http://192.168.1.109:19200"]
        user => "elastic"
        password => "elastic"
        index => "%{[@metadata][_index]}"
        document_type => "%{[@metadata][_type]}"
        document_id => "%{[@metadata][_id]}"
    }
}

执行如下脚本迁移

多个索引同步是并行执行的
/opt/logstash-6.8.5/bin/logstash -f /opt/logstash-6.8.5/config/sync_all_index.conf

###########################准实时同步例子##########################

说明:

1.发现全量同步完成后，到增量同步需要停上一段时间数据才会有变化。

2.源端新增的index也会同步

每2分钟同步一次(检查源头是否有新数据)

[root@localhost config]# more sync_all_index.conf
input {
    elasticsearch {
        hosts => ["http://192.168.1.108:19200"]
        index => "*"
        user => "elastic"
        password => "elastic"
        size => 1000
        scroll => "1m"
        docinfo => true
        schedule => "*/2 * * * *"
    }
}
# 该部分被注释，表示filter是可选的
filter {
  mutate {
    remove_field => ["@timestamp", "@version"]  #过滤掉logstash 自己加上的字段
  }
}

output {
    elasticsearch {
        hosts => ["http://192.168.1.109:19200"]
        user => "elastic"
        password => "elastic"
        index => "%{[@metadata][_index]}"
        document_type => "%{[@metadata][_type]}"
        document_id => "%{[@metadata][_id]}"
    }
}

每分钟同步一次

[root@localhost config]# more sync_all_index.conf
input {
    elasticsearch {
        hosts => ["http://192.168.1.108:19200"]
        index => "*"
        user => "elastic"
        password => "elastic"
        size => 1000
        scroll => "1m"
        docinfo => true
        schedule => "* * * * *"
    }
}
# 该部分被注释，表示filter是可选的
filter {
  mutate {
    remove_field => ["@timestamp", "@version"]  #过滤掉logstash 自己加上的字段
  }
}

output {
    elasticsearch {
        hosts => ["http://192.168.1.109:19200"]
        user => "elastic"
        password => "elastic"
        index => "%{[@metadata][_index]}"
        document_type => "%{[@metadata][_type]}"
        document_id => "%{[@metadata][_id]}"
    }
}

说明:

索引模糊匹配:如 index => "hospital*"
具体多个索引,逗号分隔:index => "hospital_info_demo1,hospital_info_demo2,hospital_info_demo3,hospital_info_demo4"
模糊匹配后排除某个索引:index => "hospital*,-hospital_info_demo4"
index => "hospital*,-hospital_info_demo4,-hospital_info_demo3"
同步所有index,过滤掉系统的index: index => "*,-.monitoring*,-.security*,-.kibana*"

output 索引名称可以添加自定义字符串:index => "copy_%{[@metadata][_index]}"