docker构建带ik分词器Elasticsearch镜像
- 创建Dockerfile 文件:
FROM elasticsearch:7.4.2
RUN cd /usr/share/elasticsearch && sh -c '/bin/echo -e "y" | elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.4.2/elasticsearch-analysis-ik-7.4.2.zip'
RUN cd /usr/share/elasticsearch && sh -c '/bin/echo -e "y" | elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-pinyin/releases/download/v7.4.2/elasticsearch-analysis-pinyin-7.4.2.zip'
- 在Dockerfile文件的同级目录下,执行构建镜像命令:
docker build -t es_ik:7.4.2 .
- 启动容器:
docker run -itd -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" --name es-ik es_ik:7.4.2
- 验证分词器效果:
curl -XPOST "http://127.0.0.1:9200/_analyze?pretty" -H 'Content-Type: application/json' -d '{"text":"希望疫情早日结束,出门不必再戴口罩","tokenizer":"ik_smart"}'
返回
{
"tokens" : [
{
"token" : "希望",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "疫情",
"start_offset" : 2,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 1
},
{
"token" : "早日",
"start_offset" : 4,
"end_offset" : 6,
"type" : "CN_WORD",
"position" : 2
},
{
"token" : "结束",
"start_offset" : 6,
"end_offset" : 8,
"type" : "CN_WORD",
"position" : 3
},
{
"token" : "出门",
"start_offset" : 9,
"end_offset" : 11,
"type" : "CN_WORD",
"position" : 4
},
{
"token" : "不必",
"start_offset" : 11,
"end_offset" : 13,
"type" : "CN_WORD",
"position" : 5
},
{
"token" : "再",
"start_offset" : 13,
"end_offset" : 14,
"type" : "CN_CHAR",
"position" : 6
},
{
"token" : "戴",
"start_offset" : 14,
"end_offset" : 15,
"type" : "CN_CHAR",
"position" : 7
},
{
"token" : "口罩",
"start_offset" : 15,
"end_offset" : 17,
"type" : "CN_WORD",
"position" : 8
}
]
}