初试ElasticSearch做菜谱搜索 整理思路
1.docker部署elasticSearch集群及kibana服务
借鉴https://blog.csdn.net/ctwy291314/article/details/111313419这位博主的部署方式。
2.学习es的DSL语法
推荐【慕课】ElasticSearch+Spark 构建高匹配度搜索服务+千人千面推荐系统
3.logstash的logstash-input-jdbc插件对数据初始化全量索引构建
4.阿里canal中间件完成准实时增量索引构建
5.业务功能开发(中文IK分词器插件安装、定制化分词、同义词扩展、相关性重塑)
6.总结一些东西
GET cookbook/_search { "explain": true, "query": { "function_score": { "query": { "bool": { "must": [ { "multi_match": { "query": "国庆佳节", "fields": [ "name^10", #权重 "introduction", "description", "materials", "seasons", "categories", "platforms", "themes", "tags" ], "type": "most_fields" #还有别的类型 } #如果term放到这里也可以过滤但是会有计分 } ], "filter": [ #filter不参与计分 标签 状态 等建议放在这里 { "term": { "verified": { "value": "true" } } }, { "term": { "grounding": { "value": "true" } } }, { "term": { "tags": "家常菜" } }, { "term": { "tags": "夜宵" } } ] } }, "functions": [ #额外自定义计分 { "field_value_factor": { "field": "collect_count" }, "weight": 0.00002 #权重 }, { "field_value_factor": { "field": "view_count" }, "weight": 0.00002 #权重 } ], "score_mode": "sum", "boost_mode": "sum" #replace function score 可以替换 query score } }, "sort": [ #sort 非_source下的字段依然有分数 否则不计分 { "_score": { "order": "desc" } } ], "aggs": { "group_by_tags": { "terms": { "field": "tags" } } } app内的排序可以使用sort不计分排序 也可以用boost_mode = replace ,自定义分数来排序 #分析索引分词 GET cookbook/_analyze { "field": "tags", "text": ["创意菜 甜 西餐 甜品 电烤箱"] } #分析搜索分词 GET _analyze?pretty {"text": ["ROKI"],"analyzer": "ik_max_word"} GET _analyze?pretty {"text": ["创意菜 甜 西餐 甜品 电烤箱"],"analyzer": "ik_smart"}
2020-12-23 增 name字段支持中文拼音搜索
PUT /cookbook/ { "settings": { "number_of_shards": 10, "number_of_replicas": 3 } } POST cookbook/_close PUT cookbook/_settings { "settings": { "index": { "analysis": { "analyzer": { "ik_pinyin_analyzer": { "type": "custom", "tokenizer": "ik_max_word", "filter": ["my_pinyin"] } }, "filter": { "my_pinyin": { "type": "pinyin", "keep_separate_first_letter": false, "keep_full_pinyin": true, "keep_original": false, "limit_first_letter_length": 10, "lowercase": true, "remove_duplicated_term": true } } } } } } #字段映射 PUT cookbook/_mappings { "dynamic": false, "properties": { "id": { "type": "integer" }, "name": { "type": "text", "analyzer": "ik_pinyin_analyzer", "search_analyzer": "ik_pinyin_analyzer" }, "introduction": { "type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_max_word" }, "description": { "type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_max_word" }, "collect_count": { "type": "integer" }, "view_count": { "type": "integer" }, "difficulty": { "type": "integer" }, "need_time": { "type": "integer" }, "prepare_desc": { "type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_max_word" }, "type": { "type": "integer" }, "cookbook_type": { "type": "integer" }, "recommend": { "type": "boolean" }, "verified": { "type": "boolean" }, "grounding": { "type": "boolean" }, "allow_distribution": { "type": "boolean" }, "tags": { "type": "text", "analyzer": "whitespace", "fielddata": true }, "materials": { "type": "text", "analyzer": "ik_smart", "search_analyzer": "ik_smart" }, "seasons": { "type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_max_word" }, "category_tags": { "type": "text", "analyzer": "whitespace", "fielddata": true }, "categories": { "type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_max_word" }, "platforms": { "type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_max_word" }, "themes": { "type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_max_word" }, "pub_time": { "type": "date" } } } #打开索引 POST cookbook/_open
2020-12-24 记elasticsearch-rest-client 下的httpclient jar包冲突。查看7.9.3下引用的是
直接覆盖同版本的引用
2020-12-28
Ik分词 同时支持 拼音和同义词,可以根据自定义filter 多层嵌套
# 先定义同义词分词器 PUT cookbook/_settings { "settings": { "index": { "analysis": { "analyzer": { "ik_synonym_pinyin_max_word": { "type": "custom", "tokenizer": "ik_max_word", "filter": [ "my_synonym_filter", "my_pinyin_filter" ] }, "ik_synonym_pinyin_smart": { "type": "custom", "tokenizer": "ik_smart", "filter": [ "my_synonym_filter", "my_pinyin_filter" ] } }, "filter": { "my_synonym_filter": { "type": "synonym", "synonyms_path": "analysis-ik/synonyms.txt" }, "my_pinyin_filter": { "type": "pinyin", "keep_separate_first_letter": false, "keep_full_pinyin": true, "keep_original": false, "limit_first_letter_length": 10, "lowercase": true, "remove_duplicated_term": true } } } } } }