Elasticsearch系列(二)--query、filter、aggregations
本文基于ES6.4版本,我也是出于学习阶段,对学习内容做个记录,如果文中有错误,请指出。
实验数据:
index:book
type:novel
mappings:
{ "mappings": { "novel": { "dynamic": "false", "properties": { "word_count": { "type": "integer" }, "author": { "type": "keyword" }, "title": { "type": "text" }, "publish_date": { "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis", "type": "date" } } } } }
通过put创建索引,使用head可视化界面,数据如下:
Elasticsearch的查询分为:
1、子条件查询:查询特定字段的特定值
Query context
查询过程中,除了判断Document是否满足条件,还会计算出_score表示匹配程度,数值越大,证明匹配程度越高
1、查询全部:/book/novel/_search
"hits": { "total": 10, "max_score": 1.0, "hits": [ { "_index": "book", "_type": "novel", "_id": "5", "_score": 1.0, "_source": { "title": "永夜君王", "word_count": "110000", "publish_date": "2015-03-01", "author": "烟雨江南" } }, { "_index": "book", "_type": "novel", "_id": "8", "_score": 1.0, "_source": { "title": "万古令", "word_count": "110000", "publish_date": "2015-03-01", "author": "听奕" } }, { "_index": "book", "_type": "novel", "_id": "9", "_score": 1.0, "_source": { "title": "天帝传", "word_count": "110000", "publish_date": "2015-03-01", "author": "飞天鱼" } }, { "_index": "book", "_type": "novel", "_id": "10", "_score": 1.0, "_source": { "title": "剑来", "word_count": "110000", "publish_date": "2015-03-01", "author": "烽火戏诸侯" } }, { "_index": "book", "_type": "novel", "_id": "2", "_score": 1.0, "_source": { "title": "完美世界", "word_count": "130000", "publish_date": "2017-03-01", "author": "辰东" } }, { "_index": "book", "_type": "novel", "_id": "4", "_score": 1.0, "_source": { "title": "民国谍影", "word_count": "110000", "publish_date": "2019-03-01", "author": "寻青藤" } }, { "_index": "book", "_type": "novel", "_id": "6", "_score": 1.0, "_source": { "title": "遮天", "word_count": "110000", "publish_date": "2015-03-01", "author": "辰东" } }, { "_index": "book", "_type": "novel", "_id": "1", "_score": 1.0, "_source": { "title": "万古神帝", "word_count": "30000", "publish_date": "2017-01-01", "author": "飞天鱼" } }, { "_index": "book", "_type": "novel", "_id": "7", "_score": 1.0, "_source": { "title": "圣墟", "word_count": "110000", "publish_date": "2015-03-01", "author": "辰东" } }, { "_index": "book", "_type": "novel", "_id": "3", "_score": 1.0, "_source": { "title": "星辰变", "word_count": "100000", "publish_date": "2018-03-01", "author": "我吃西红柿" } } ] }
2、查询id为1的数据:/book/novel/1
{ "_index": "book", "_type": "novel", "_id": "1", "_version": 1, "found": true, "_source": { "title": "万古神帝", "word_count": "30000", "publish_date": "2017-01-01", "author": "飞天鱼" } }
3、只查询title和author字段:/1?_source=title,author
{ "_index": "book", "_type": "novel", "_id": "1", "_version": 1, "found": true, "_source": { "author": "飞天鱼", "title": "万古神帝" } }
4、只是显示_source部分:/book/novel/1/_source
{ "title": "万古神帝", "word_count": "30000", "publish_date": "2017-01-01", "author": "飞天鱼" }
5、筛选单字段查询:/book/novel/_search
{ "query": { "match": { "author": "飞天鱼" } } }
"hits": { "total": 2, "max_score": 1.2039728, "hits": [ { "_index": "book", "_type": "novel", "_id": "9", "_score": 1.2039728, "_source": { "title": "天帝传", "word_count": "110000", "publish_date": "2015-03-01", "author": "飞天鱼" } }, { "_index": "book", "_type": "novel", "_id": "1", "_score": 0.6931472, "_source": { "title": "万古神帝", "word_count": "30000", "publish_date": "2017-01-01", "author": "飞天鱼" } } ] }
6、limit:我们查询到2条数据,如果我们只想得到第一条数据,可以使用from和size联合查询
{ "query": { "match": { "author": "飞天鱼" } }, "from": 0, "size": 1 }
"hits": { "total": 2, "max_score": 1.2039728, "hits": [ { "_index": "book", "_type": "novel", "_id": "9", "_score": 1.2039728, "_source": { "title": "天帝传", "word_count": "110000", "publish_date": "2015-03-01", "author": "飞天鱼" } } ] }
{ "query": { "match": { "author": "辰东" } }, "sort": [ { "word_count": { "order": "desc" } } ] }
"hits": { "total": 3, "max_score": null, "hits": [ { "_index": "book", "_type": "novel", "_id": "2", "_score": null, "_source": { "title": "完美世界", "word_count": "130000", "publish_date": "2017-03-01", "author": "辰东" }, "sort": [ 130000 ] }, { "_index": "book", "_type": "novel", "_id": "6", "_score": null, "_source": { "title": "遮天", "word_count": "110000", "publish_date": "2015-03-01", "author": "辰东" }, "sort": [ 110000 ] }, { "_index": "book", "_type": "novel", "_id": "7", "_score": null, "_source": { "title": "圣墟", "word_count": "110000", "publish_date": "2015-03-01", "author": "辰东" }, "sort": [ 110000 ] } ] }
8、其余匹配match_phrase:
query、match的方式本质上就是模糊查询,而且中文会自动分词到最大粒度,可以看到会查询到只要匹配任意一个字都是可以的
{ "query": { "match": { "title": "万古神帝" } } }
"hits": { "total": 3, "max_score": 2.439878, "hits": [ { "_index": "book", "_type": "novel", "_id": "1", "_score": 2.439878, "_source": { "title": "万古神帝", "word_count": "30000", "publish_date": "2017-01-01", "author": "飞天鱼" } }, { "_index": "book", "_type": "novel", "_id": "8", "_score": 2.4079456, "_source": { "title": "万古令", "word_count": "110000", "publish_date": "2015-03-01", "author": "听奕" } }, { "_index": "book", "_type": "novel", "_id": "9", "_score": 1.2039728, "_source": { "title": "天帝传", "word_count": "110000", "publish_date": "2015-03-01", "author": "飞天鱼" } } ] }
所以这里有了其余匹配match_phrase,结果只有完全包含"万古神帝"的title才可以被查询到
{ "query": { "match_phrase": { "title": "万古神帝" } } }
"hits": { "total": 1, "max_score": 2.439878, "hits": [ { "_index": "book", "_type": "novel", "_id": "1", "_score": 2.439878, "_source": { "title": "万古神帝", "word_count": "30000", "publish_date": "2017-01-01", "author": "飞天鱼" } } ] }
9、多条件查询multi_match:查询title或者author包含"万古神帝"的数据
{ "query": { "multi_match": { "query": "万古神天", "fields": ["title","author"] } } }
"hits": { "total": 4, "max_score": 2.4079456, "hits": [ { "_index": "book", "_type": "novel", "_id": "8", "_score": 2.4079456, "_source": { "title": "万古令", "word_count": "110000", "publish_date": "2015-03-01", "author": "听奕" } }, { "_index": "book", "_type": "novel", "_id": "1", "_score": 1.8299085, "_source": { "title": "万古神帝", "word_count": "30000", "publish_date": "2017-01-01", "author": "飞天鱼" } }, { "_index": "book", "_type": "novel", "_id": "9", "_score": 1.2039728, "_source": { "title": "天帝传", "word_count": "110000", "publish_date": "2015-03-01", "author": "飞天鱼" } }, { "_index": "book", "_type": "novel", "_id": "6", "_score": 1.1727304, "_source": { "title": "遮天", "word_count": "110000", "publish_date": "2015-03-01", "author": "辰东" } } ] }
10、语法查询query_string:
{ "query": { "query_string": { "query": "万古" } } }
这里和match没有区别,query可以使用AND和OR,match的filed也可以,注意这里一定是大写,小写就被当做搜索的内容了
{ "query": { "query_string": { "query": "万古 OR 剑来" } } }
{ "query": { "match": { "title": "万古 OR 剑来" } } }
指定fields:
{ "query": { "query_string": { "query": "万古 OR 剑来 OR 辰东 ", "fields": ["author","title"] } } }
11、精确匹配term:
title为text类型,author为keyword类型,实验发现查询title只有是单个字的时候才能匹配(精确匹配查不到数据),而author必须是精确匹配
例如:title不支持精确匹配,支持模糊查询(而且是单个字才可以,多个字照样查不到数据)
{ "query": { "term": { "title": "剑来" } } }
如果只是查询一个字就可以
{ "query": { "term": { "title": "来" } } }
"hits": { "total": 1, "max_score": 1.3940737, "hits": [ { "_index": "book", "_type": "novel", "_id": "10", "_score": 1.3940737, "_source": { "title": "剑来", "word_count": "110000", "publish_date": "2015-03-01", "author": "烽火戏诸侯" } } ] }
查询author字段:有三条数据
{ "query": { "term": { "author": "辰东" } } }
"hits": [ { "_index": "book", "_type": "novel", "_id": "7", "_score": 0.6931472, "_source": { "title": "圣墟", "word_count": "110000", "publish_date": "2015-03-01", "author": "辰东" } }, { "_index": "book", "_type": "novel", "_id": "2", "_score": 0.47000363, "_source": { "title": "完美世界", "word_count": "130000", "publish_date": "2017-03-01", "author": "辰东" } }, { "_index": "book", "_type": "novel", "_id": "6", "_score": 0.47000363, "_source": { "title": "遮天", "word_count": "110000", "publish_date": "2015-03-01", "author": "辰东" } } ] }
author不知道模糊查询:下面结果为null
{ "query": { "term": { "author": "东" } } }
12、范围查找range:包括integer和日期类型,日期支持now函数,也就是当前日期
{ "query": { "range": { "word_count": { "gt": 110000, "lte": 130000 } } } }
"hits": { "total": 1, "max_score": 1.0, "hits": [ { "_index": "book", "_type": "novel", "_id": "2", "_score": 1.0, "_source": { "title": "完美世界", "word_count": "130000", "publish_date": "2017-03-01", "author": "辰东" } } ] }
Filter context
查询过程中,只是判断Document是否满足条件,只有yes or no。用来做数据过滤,而且ES还会对结果进行缓存,效率相对query更高一点
{ "query": { "bool": { "filter": { "term": { "word_count": 130000 } } } } }
"hits": { "total": 1, "max_score": 0.0, "hits": [ { "_index": "book", "_type": "novel", "_id": "2", "_score": 0.0, "_source": { "title": "完美世界", "word_count": "130000", "publish_date": "2017-03-01", "author": "辰东" } } ] }
2、复合条件查询:组合子条件查询
1、固定分数查询:不支持match,支持filter
{ "query": { "constant_score": { "filter": { "match": { "title": "天帝传" } } } } } { "query": { "constant_score": { "filter": { "match": { "title": "天帝传" } }, "boost": 2 } } }
2、bool查询:
should:就是or的关系
{ "query": { "bool": { "should": [ { "match": { "author": "辰东" } }, { "match": { "title": "天帝传" } } ] } } }
must:相当于and
{ "query": { "bool": { "must": [ { "match": { "author": "辰东" } }, { "match": { "title": "天帝传" } } ] } } }
must_not:相当于<>
{ "query": { "bool": { "must_not": { "term": { "author": "辰东" } } } } }
bool查询也可以使用filter:
{ "query": { "bool": { "must": [ { "match": { "author": "辰东" } }, { "match": { "title": "天帝传" } } ], "filter": [ { "term": { "word_count": 110000 } } ] } } }
aggregations:
{ "aggs": { "group_by_author": { "terms": { "field": "author" } } } }
"aggregations": { "group_by_author": { "doc_count_error_upper_bound": 0, "sum_other_doc_count": 0, "buckets": [ { "key": "辰东", "doc_count": 3 }, { "key": "飞天鱼", "doc_count": 2 }, { "key": "听奕", "doc_count": 1 }, { "key": "寻青藤", "doc_count": 1 }, { "key": "我吃西红柿", "doc_count": 1 }, { "key": "烟雨江南", "doc_count": 1 }, { "key": "烽火戏诸侯", "doc_count": 1 } ] } }
支持多聚合结果:
{ "aggs": { "group_by_author": { "terms": { "field": "author" } }, "group_by_word_count": { "terms": { "field": "word_count" } } } }
aggregations除了支持term,还有stats、min、max、avg等
{ "aggs": { "group_by_author": { "stats": { "field": "word_count" } } } }
"aggregations": { "group_by_author": { "count": 10, "min": 30000.0, "max": 130000.0, "avg": 103000.0, "sum": 1030000.0 } }
avg:
{ "aggs": { "group_by_author": { "avg": { "field": "word_count" } } } }