elasticsearch—入门(进阶)
文档的 CRUD 与批量操作
create document. 自动生成 _id
POST users/_doc { "user": "Mike", "post_date": "2019-04-15T14:12:12", "message": "trying out Kibana" }
create document. 指定Id。如果id已经存在,报错
PUT users/_doc/1?op_type=create { "user": "Jack", "post_date": "2019-05-15T14:12:12", "message": "trying out Elasticsearch" }
指定 ID 如果已经存在,就报错
PUT users/_create/1 { "user": "Jack", "post_date": "2019-05-15T14:12:12", "message": "trying out Elasticsearch" }
Get the document by ID
GET users/_doc/1
Update 指定 ID (先删除,在写入)
PUT users/_doc/1 { "user": "Mike" } GET users/_doc/1
在原文档上增加字段
POST users/_update/1/ { "doc":{ "post_date" : "2019-05-15T14:12:12", "message" : "trying out Elasticsearch" } } GET users/_doc/1
删除文档
DELETE users/_doc/1
Bulk 操作
如果一个具有相同索引的文档已经存在,则create将失败,而index将根据需要添加或替换一个文档
执行第1次
POST _bulk {"index":{"_index":"test","_id":"1"}} {"field1":"value1"} {"delete":{"_index":"test","_id":"2"}} {"create":{"_index":"test2","_id":"3"}} {"field1":"value3"} {"update":{"_id":"1","_index":"test"}} {"doc":{"field2":"value2"}}
运行结果如下
{ "took": 30, "errors": false, "items": [ { "index": { "_index": "test", "_type": "_doc", "_id": "1", "_version": 1, "result": "created", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "status": 201, "_seq_no" : 0, "_primary_term": 1 } }, { "delete": { "_index": "test", "_type": "_doc", "_id": "2", "_version": 1, "result": "not_found", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "status": 404, "_seq_no" : 1, "_primary_term" : 2 } }, { "create": { "_index": "test", "_type": "_doc", "_id": "3", "_version": 1, "result": "created", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "status": 201, "_seq_no" : 2, "_primary_term" : 3 } }, { "update": { "_index": "test", "_type": "_doc", "_id": "1", "_version": 2, "result": "updated", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "status": 200, "_seq_no" : 3, "_primary_term" : 4 } } ] }
执行第2次
POST _bulk { "index" : { "_index" : "test", "_id" : "1" } } { "field1" : "value1" } { "delete" : { "_index" : "test", "_id" : "2" } } { "create" : { "_index" : "test2", "_id" : "3" } } { "field1" : "value3" } { "update" : {"_id" : "1", "_index" : "test"} } { "doc" : {"field2" : "value2"} }
运行结果如下
{ "took" : 2, "errors" : true, "items" : [ { "index" : { "_index" : "test", "_type" : "_doc", "_id" : "1", "_version" : 21, "result" : "updated", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 30, "_primary_term" : 1, "status" : 200 } }, { "delete" : { "_index" : "test", "_type" : "_doc", "_id" : "2", "_version" : 1, "result" : "not_found", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 31, "_primary_term" : 1, "status" : 404 } }, { "create" : { "_index" : "test2", "_type" : "_doc", "_id" : "3", "status" : 409, "error" : { "type" : "version_conflict_engine_exception", "reason" : "[3]: version conflict, document already exists (current version [1])", "index_uuid" : "tNhUI2pOSqqMTxTwQJyPjQ", "shard" : "0", "index" : "test2" } } }, { "update" : { "_index" : "test", "_type" : "_doc", "_id" : "1", "_version" : 22, "result" : "updated", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 32, "_primary_term" : 1, "status" : 200 } } ] }
mget 操作
GET /_mget { "docs": [ { "_index": "test", "_id": "1" }, { "_index": "test", "_id": "2" } ] }
URI中指定index
GET /test/_mget { "docs": [ { "_id": "1" }, { "_id": "2" } ] }
_source false 不显示,指定要返回的值
GET /_mget { "docs": [ { "_index": "test", "_id": "1", "_source": false }, { "_index": "test", "_id": "2", "_source": [ "field3", "field4" ] }, { "_index": "test", "_id": "3", "_source": { "include": [ "user" ], "exclude": [ "user.location" ] } } ] }
POST kibana_sample_data_ecommerce/_msearch {} {"query":{"match_all":{}},"size":1} {"index":"kibana_sample_data_flights"} {"query":{"match_all":{}},"size":2}
运行结果如下
{ "took" : 1, "responses" : [ { "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 4675, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "kibana_sample_data_ecommerce", "_type" : "_doc", "_id" : "j_UFdngBnrEhdHydz7qu", "_score" : 1.0, "_source" : { "category" : [ "Men's Clothing" ], "currency" : "EUR", "customer_first_name" : "Eddie", "customer_full_name" : "Eddie Underwood", "customer_gender" : "MALE", "customer_id" : 38, "customer_last_name" : "Underwood", "customer_phone" : "", "day_of_week" : "Monday", "day_of_week_i" : 0, "email" : "eddie@underwood-family.zzz", "manufacturer" : [ "Elitelligence", "Oceanavigations" ], "order_date" : "2021-04-19T09:28:48+00:00", "order_id" : 584677, "products" : [ { "base_price" : 11.99, "discount_percentage" : 0, "quantity" : 1, "manufacturer" : "Elitelligence", "tax_amount" : 0, "product_id" : 6283, "category" : "Men's Clothing", "sku" : "ZO0549605496", "taxless_price" : 11.99, "unit_discount_amount" : 0, "min_price" : 6.35, "_id" : "sold_product_584677_6283", "discount_amount" : 0, "created_on" : "2016-12-26T09:28:48+00:00", "product_name" : "Basic T-shirt - dark blue/white", "price" : 11.99, "taxful_price" : 11.99, "base_unit_price" : 11.99 }, { "base_price" : 24.99, "discount_percentage" : 0, "quantity" : 1, "manufacturer" : "Oceanavigations", "tax_amount" : 0, "product_id" : 19400, "category" : "Men's Clothing", "sku" : "ZO0299602996", "taxless_price" : 24.99, "unit_discount_amount" : 0, "min_price" : 11.75, "_id" : "sold_product_584677_19400", "discount_amount" : 0, "created_on" : "2016-12-26T09:28:48+00:00", "product_name" : "Sweatshirt - grey multicolor", "price" : 24.99, "taxful_price" : 24.99, "base_unit_price" : 24.99 } ], "sku" : [ "ZO0549605496", "ZO0299602996" ], "taxful_total_price" : 36.98, "taxless_total_price" : 36.98, "total_quantity" : 2, "total_unique_products" : 2, "type" : "order", "user" : "eddie", "geoip" : { "country_iso_code" : "EG", "location" : { "lon" : 31.3, "lat" : 30.1 }, "region_name" : "Cairo Governorate", "continent_name" : "Africa", "city_name" : "Cairo" } } } ] }, "status" : 200 }, { "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 10000, "relation" : "gte" }, "max_score" : 1.0, "hits" : [ { "_index" : "kibana_sample_data_flights", "_type" : "_doc", "_id" : "a_UFdngBnrEhdHyd1sBb", "_score" : 1.0, "_source" : { "FlightNum" : "9HY9SWR", "DestCountry" : "AU", "OriginWeather" : "Sunny", "OriginCityName" : "Frankfurt am Main", "AvgTicketPrice" : 841.2656419677076, "DistanceMiles" : 10247.856675613455, "FlightDelay" : false, "DestWeather" : "Rain", "Dest" : "Sydney Kingsford Smith International Airport", "FlightDelayType" : "No Delay", "OriginCountry" : "DE", "dayOfWeek" : 0, "DistanceKilometers" : 16492.32665375846, "timestamp" : "2021-03-22T00:00:00", "DestLocation" : { "lat" : "-33.94609833", "lon" : "151.177002" }, "DestAirportID" : "SYD", "Carrier" : "Kibana Airlines", "Cancelled" : false, "FlightTimeMin" : 1030.7704158599038, "Origin" : "Frankfurt am Main Airport", "OriginLocation" : { "lat" : "50.033333", "lon" : "8.570556" }, "DestRegion" : "SE-BD", "OriginAirportID" : "FRA", "OriginRegion" : "DE-HE", "DestCityName" : "Sydney", "FlightTimeHour" : 17.179506930998397, "FlightDelayMin" : 0 } }, { "_index" : "kibana_sample_data_flights", "_type" : "_doc", "_id" : "bPUFdngBnrEhdHyd1sBb", "_score" : 1.0, "_source" : { "FlightNum" : "X98CCZO", "DestCountry" : "IT", "OriginWeather" : "Clear", "OriginCityName" : "Cape Town", "AvgTicketPrice" : 882.9826615595518, "DistanceMiles" : 5482.606664853586, "FlightDelay" : false, "DestWeather" : "Sunny", "Dest" : "Venice Marco Polo Airport", "FlightDelayType" : "No Delay", "OriginCountry" : "ZA", "dayOfWeek" : 0, "DistanceKilometers" : 8823.40014044213, "timestamp" : "2021-03-22T18:27:00", "DestLocation" : { "lat" : "45.505299", "lon" : "12.3519" }, "DestAirportID" : "VE05", "Carrier" : "Logstash Airways", "Cancelled" : false, "FlightTimeMin" : 464.3894810759016, "Origin" : "Cape Town International Airport", "OriginLocation" : { "lat" : "-33.96480179", "lon" : "18.60169983" }, "DestRegion" : "IT-34", "OriginAirportID" : "CPT", "OriginRegion" : "SE-BD", "DestCityName" : "Venice", "FlightTimeHour" : 7.73982468459836, "FlightDelayMin" : 0 } } ] }, "status" : 200 } ] }
相关阅读
倒排索引入门
Elasticsearch 使用一种称为 倒排索引 的结构,它适用于快速的全文搜索。一个倒排索引由文档中所有不重复词的列表构成,对于其中每个词,有一个包含它的文档列表
相关阅读
使用分析器进行分词
内置分词器
-
Stand Analyzer 默认分词器,按词切分小写处理
-
Simple Analyzer – 按照非字母切分(符号被过滤),小写处理
-
Stop Analyzer – 小写处理,停用词过滤(the,a,is)
-
Whitespace Analyzer – 按照空格切分,不转小写
-
Keyword Analyzer – 不分词,直接将输入当作输出
-
Patter Analyzer – 正则表达式,默认 \W+ (非字符分隔)
-
Language – 提供了30多种常见语言的分词器
stand
GET _analyze { "analyzer": "standard", "text": "2 running Quick brown-foxes leap over lazy dogs in the summer evening." }
输出结果如下
{ "tokens" : [ { "token" : "2", "start_offset" : 0, "end_offset" : 1, "type" : "<NUM>", "position" : 0 }, { "token" : "running", "start_offset" : 2, "end_offset" : 9, "type" : "<ALPHANUM>", "position" : 1 }, { "token" : "quick", "start_offset" : 10, "end_offset" : 15, "type" : "<ALPHANUM>", "position" : 2 }, { "token" : "brown", "start_offset" : 16, "end_offset" : 21, "type" : "<ALPHANUM>", "position" : 3 }, { "token" : "foxes", "start_offset" : 22, "end_offset" : 27, "type" : "<ALPHANUM>", "position" : 4 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "<ALPHANUM>", "position" : 5 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "<ALPHANUM>", "position" : 6 }, { "token" : "lazy", "start_offset" : 38, "end_offset" : 42, "type" : "<ALPHANUM>", "position" : 7 }, { "token" : "dogs", "start_offset" : 43, "end_offset" : 47, "type" : "<ALPHANUM>", "position" : 8 }, { "token" : "in", "start_offset" : 48, "end_offset" : 50, "type" : "<ALPHANUM>", "position" : 9 }, { "token" : "the", "start_offset" : 51, "end_offset" : 54, "type" : "<ALPHANUM>", "position" : 10 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "<ALPHANUM>", "position" : 11 }, { "token" : "evening", "start_offset" : 62, "end_offset" : 69, "type" : "<ALPHANUM>", "position" : 12 } ] }
simpe
GET _analyze { "analyzer": "simple", "text": "2 running Quick brown-foxes leap over lazy dogs in the summer evening." }
输出结果如下
{ "tokens" : [ { "token" : "running", "start_offset" : 2, "end_offset" : 9, "type" : "word", "position" : 0 }, { "token" : "quick", "start_offset" : 10, "end_offset" : 15, "type" : "word", "position" : 1 }, { "token" : "brown", "start_offset" : 16, "end_offset" : 21, "type" : "word", "position" : 2 }, { "token" : "foxes", "start_offset" : 22, "end_offset" : 27, "type" : "word", "position" : 3 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "word", "position" : 4 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "word", "position" : 5 }, { "token" : "lazy", "start_offset" : 38, "end_offset" : 42, "type" : "word", "position" : 6 }, { "token" : "dogs", "start_offset" : 43, "end_offset" : 47, "type" : "word", "position" : 7 }, { "token" : "in", "start_offset" : 48, "end_offset" : 50, "type" : "word", "position" : 8 }, { "token" : "the", "start_offset" : 51, "end_offset" : 54, "type" : "word", "position" : 9 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "word", "position" : 10 }, { "token" : "evening", "start_offset" : 62, "end_offset" : 69, "type" : "word", "position" : 11 } ] }
stop
GET _analyze { "analyzer": "stop", "text": "2 running Quick brown-foxes leap over lazy dogs in the summer evening." }
输出结果如下
{ "tokens" : [ { "token" : "running", "start_offset" : 2, "end_offset" : 9, "type" : "word", "position" : 0 }, { "token" : "quick", "start_offset" : 10, "end_offset" : 15, "type" : "word", "position" : 1 }, { "token" : "brown", "start_offset" : 16, "end_offset" : 21, "type" : "word", "position" : 2 }, { "token" : "foxes", "start_offset" : 22, "end_offset" : 27, "type" : "word", "position" : 3 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "word", "position" : 4 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "word", "position" : 5 }, { "token" : "lazy", "start_offset" : 38, "end_offset" : 42, "type" : "word", "position" : 6 }, { "token" : "dogs", "start_offset" : 43, "end_offset" : 47, "type" : "word", "position" : 7 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "word", "position" : 10 }, { "token" : "evening", "start_offset" : 62, "end_offset" : 69, "type" : "word", "position" : 11 } ] }
whitespace
GET _analyze { "analyzer": "whitespace", "text": "2 running Quick brown-foxes leap over lazy dogs in the summer evening." }
输出结果如下
{ "tokens" : [ { "token" : "2", "start_offset" : 0, "end_offset" : 1, "type" : "word", "position" : 0 }, { "token" : "running", "start_offset" : 2, "end_offset" : 9, "type" : "word", "position" : 1 }, { "token" : "Quick", "start_offset" : 10, "end_offset" : 15, "type" : "word", "position" : 2 }, { "token" : "brown-foxes", "start_offset" : 16, "end_offset" : 27, "type" : "word", "position" : 3 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "word", "position" : 4 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "word", "position" : 5 }, { "token" : "lazy", "start_offset" : 38, "end_offset" : 42, "type" : "word", "position" : 6 }, { "token" : "dogs", "start_offset" : 43, "end_offset" : 47, "type" : "word", "position" : 7 }, { "token" : "in", "start_offset" : 48, "end_offset" : 50, "type" : "word", "position" : 8 }, { "token" : "the", "start_offset" : 51, "end_offset" : 54, "type" : "word", "position" : 9 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "word", "position" : 10 }, { "token" : "evening.", "start_offset" : 62, "end_offset" : 70, "type" : "word", "position" : 11 } ] }
keyword
GET _analyze { "analyzer": "keyword", "text": "2 running Quick brown-foxes leap over lazy dogs in the summer evening." }
输出结果如下
{ "tokens" : [ { "token" : "2 running Quick brown-foxes leap over lazy dogs in the summer evening.", "start_offset" : 0, "end_offset" : 70, "type" : "word", "position" : 0 } ] }
pattern
GET _analyze { "analyzer": "pattern", "text": "2 running Quick brown-foxes leap over lazy dogs in the summer evening." }
输出结果如下
{ "tokens" : [ { "token" : "2", "start_offset" : 0, "end_offset" : 1, "type" : "word", "position" : 0 }, { "token" : "running", "start_offset" : 2, "end_offset" : 9, "type" : "word", "position" : 1 }, { "token" : "quick", "start_offset" : 10, "end_offset" : 15, "type" : "word", "position" : 2 }, { "token" : "brown", "start_offset" : 16, "end_offset" : 21, "type" : "word", "position" : 3 }, { "token" : "foxes", "start_offset" : 22, "end_offset" : 27, "type" : "word", "position" : 4 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "word", "position" : 5 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "word", "position" : 6 }, { "token" : "lazy", "start_offset" : 38, "end_offset" : 42, "type" : "word", "position" : 7 }, { "token" : "dogs", "start_offset" : 43, "end_offset" : 47, "type" : "word", "position" : 8 }, { "token" : "in", "start_offset" : 48, "end_offset" : 50, "type" : "word", "position" : 9 }, { "token" : "the", "start_offset" : 51, "end_offset" : 54, "type" : "word", "position" : 10 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "word", "position" : 11 }, { "token" : "evening", "start_offset" : 62, "end_offset" : 69, "type" : "word", "position" : 12 } ] }
english
GET _analyze { "analyzer": "english", "text": "2 running Quick brown-foxes leap over lazy dogs in the summer evening." }
输出结果如下
{ "tokens" : [ { "token" : "2", "start_offset" : 0, "end_offset" : 1, "type" : "<NUM>", "position" : 0 }, { "token" : "run", "start_offset" : 2, "end_offset" : 9, "type" : "<ALPHANUM>", "position" : 1 }, { "token" : "quick", "start_offset" : 10, "end_offset" : 15, "type" : "<ALPHANUM>", "position" : 2 }, { "token" : "brown", "start_offset" : 16, "end_offset" : 21, "type" : "<ALPHANUM>", "position" : 3 }, { "token" : "fox", "start_offset" : 22, "end_offset" : 27, "type" : "<ALPHANUM>", "position" : 4 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "<ALPHANUM>", "position" : 5 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "<ALPHANUM>", "position" : 6 }, { "token" : "lazi", "start_offset" : 38, "end_offset" : 42, "type" : "<ALPHANUM>", "position" : 7 }, { "token" : "dog", "start_offset" : 43, "end_offset" : 47, "type" : "<ALPHANUM>", "position" : 8 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "<ALPHANUM>", "position" : 11 }, { "token" : "even", "start_offset" : 62, "end_offset" : 69, "type" : "<ALPHANUM>", "position" : 12 } ] }
icu_analyzer
POST _analyze { "analyzer": "icu_analyzer", "text": "他说的确实在理”" }
输出结果如下
{ "tokens" : [ { "token" : "他", "start_offset" : 0, "end_offset" : 1, "type" : "<IDEOGRAPHIC>", "position" : 0 }, { "token" : "说的", "start_offset" : 1, "end_offset" : 3, "type" : "<IDEOGRAPHIC>", "position" : 1 }, { "token" : "确实", "start_offset" : 3, "end_offset" : 5, "type" : "<IDEOGRAPHIC>", "position" : 2 }, { "token" : "在", "start_offset" : 5, "end_offset" : 6, "type" : "<IDEOGRAPHIC>", "position" : 3 }, { "token" : "理", "start_offset" : 6, "end_offset" : 7, "type" : "<IDEOGRAPHIC>", "position" : 4 } ] }
中文 stand
POST _analyze { "analyzer": "standard", "text": "他说的确实在理”" }
输出结果如下
{ "tokens" : [ { "token" : "他", "start_offset" : 0, "end_offset" : 1, "type" : "<IDEOGRAPHIC>", "position" : 0 }, { "token" : "说", "start_offset" : 1, "end_offset" : 2, "type" : "<IDEOGRAPHIC>", "position" : 1 }, { "token" : "的", "start_offset" : 2, "end_offset" : 3, "type" : "<IDEOGRAPHIC>", "position" : 2 }, { "token" : "确", "start_offset" : 3, "end_offset" : 4, "type" : "<IDEOGRAPHIC>", "position" : 3 }, { "token" : "实", "start_offset" : 4, "end_offset" : 5, "type" : "<IDEOGRAPHIC>", "position" : 4 }, { "token" : "在", "start_offset" : 5, "end_offset" : 6, "type" : "<IDEOGRAPHIC>", "position" : 5 }, { "token" : "理", "start_offset" : 6, "end_offset" : 7, "type" : "<IDEOGRAPHIC>", "position" : 6 } ] }
相关阅读
-
https://www.elastic.co/guide/en/elasticsearch/reference/7.1/indices-analyze.html
-
https://www.elastic.co/guide/en/elasticsearch/reference/current/analyzer-anatomy.html
Search APl
URI Search
- 在URL中使用查询参数
Request Body Search
- 使 Elasticsearch用提供的,基于JSON格式的更加完备的 Query Domain Specific Language (DSL)
在索引kibana_sample_data_ecommerce查询customer_first_name:Eddie = Eddie
GET kibana_sample_data_ecommerce/_search?q=customer_first_name:Eddie
查询以kibana开头的索引查询customer_first_name:Eddie = Eddie
GET kibana*/_search?q=customer_first_name:Eddie
在所有索引中查找customer_first_name:Eddie = Eddie
GET /_all/_search?q=customer_first_name:Eddie
响应结果说明
衡量相关性
Information Retrieval
-
Precision(查准率)-尽可能返回较少的无关文档
-
Recall(查全率)-尽量返回较多的相关文档
-
Ranking--是否能够按照相关度进行排序?
相关阅读
-
https://www.elastic.co/guide/en/elasticsearch/reference/7.1/search-search.html
-
https://searchenginewatch.com/sew/news/2065080/search-engines-101
-
https://www.huffpost.com/entry/search-engines-101-part-i_b_1104525
-
https://baike.baidu.com/item/%E6%90%9C%E7%B4%A2%E5%BC%95%E6%93%8E%E5%8F%91%E5%B1%95%E5%8F%B2/2422574
URI Search详解
URI SearchURI-通过 query实现搜索
GET /movies/_search?q=2012&df=title&sort=year:desc&from=0&size=10&timeout=1s
-
q指定查询语句,使用 Query String Syntax
-
df默认字段,不指定时,会对所有字段进行查询
-
Sort排序/from和size用于分页
-
Profile可以查看查询是如何被执行的
指定字段vs泛查询 q=title:2012/q=2012
基本查询 (查title中包含2012的数据)
GET /movies/_search?q=2012&df=title { "profile":"true" }
泛查询,正对_all,所有字段
GET /movies/_search?q=2012 { "profile": "true" }
指定字段查询(不用指定df和q联合写法)
GET /movies/_search?q=title:2012&sort=year:desc&from=0&size=10&timeout=1s { "profile": "true" }
执行过程如下
Term v. Phrase (同时出现顺序一致)
-
Beautiful Mind等效于 Beautiful OR Mind
-
“Beautiful Mind”,等效于 Beautiful AND Mind. Phrase查询,还要求前后顺序保持一致(不拆分都要有)
GET /movies/_search?q=title:Beautiful Mind { "profile": "true" }
可以看到会把在title中包含 beautiful or Mind的结果都查出来
使用引号,Phrase查询
GET /movies/_search?q=title:"Beautiful Mind" { "profile":"true" }
把title中包含 beautiful Mind的结果查出来
分组与引号
-
title: (Beautiful AND Mind)
-
title=“Beautiful Mind”
分组泛查询
GET /movies/_search?q=title:(Beautiful Mind) { "profile": "true" }
布尔查询
AND/OR/NOT或者&&/!
- 必须大写
分组
-
+表示must
-
-表示must_not
GET /movies/_search?q=title:(Beautiful AND Mind) { "profile":"true" }
NOT 不包含
GET /movies/_search?q=title:(Beautiful NOT Mind) { "profile":"true" }
包含 Beautiful 不包含Mind
范围查询
区间表示:[闭区间,{}开区间
-
year:2019to2018]
-
year:[* TO 2018]
算数符号
-
year:>2010
-
year(>2010&&<=2018
-
year:+>2010+<=2018)
查询电影大于等于1980年
GET /movies/_search?q=year>=1980 { "profile":"true" }
通配符查询
查询title中包含以b开头的单词
GET /movies/_search?q=title:b* { "profile":"true" }
模糊匹配&近似度匹配 ~加数字可以指定运行输错几个字符
GET /movies/_search?q=title:beautifl~1 { "profile":"true" }
相关阅读
Request Body 与 Query DSL
ignore_unavailable=true,可以忽略尝试访问不存在的索引“404_idx”导致的报错查询movies分页
日期和hello拼接样式如下
使用查询表达式 —— Match
返回包含 last or christmas 的数据
POST movies/_search { "query": { "match": { "title": "last christmas" } } }
返回包含 last christmas 的数据
POST movies/_search { "query": { "match": { "title": { "query": "last christmas", "operator": "and" } } } }
短语搜索 - Match Phrase 在一条语句中搜索短语
适用于在一条语句中用短语进行查询
POST movies/_search { "query": { "match_phrase": { "title":{ "query": "one love" } } } }
比如我们想在 title中搜索 one Love 但中间有个词不记得了,可以加入 slop参数
POST movies/_search { "query": { "match_phrase": { "title":{ "query": "one love", "slop": 1 } } } }
多字段短语匹配
POST movies/_search { "query": { "multi_match": { "query": "one love", "type": "phrase", "fields": ["title", "id"], "slop": 1 } } }
相关阅读
Query & Simple Query String Query
数据准备
PUT /users/_doc/1 { "name":"Ruan Yiming", "about":"java, golang, node, swift, elasticsearch" } PUT /users/_doc/2 { "name":"Li Yiming", "about":"Hadoop" } POST users/_search { "query": { "query_string": { "default_field": "name", "query": "Ruan AND Yiming" } } } POST users/_search { "query": { "query_string": { "fields":["name","about"], "query": "(Ruan AND Yiming) OR (Java AND Elasticsearch)" } } }
Simple Query 默认的operator是 Or 不支持bool 表达式
POST users/_search { "query": { "simple_query_string": { "query": "Ruan AND Yiming", "fields": ["name"] } } }
operator 设置成AND
POST users/_search { "query": { "simple_query_string": { "query": "Ruan Yiming", "fields": ["name"], "default_operator": "AND" } } }
query_string 支持复杂的表达式
查询同时出现(AND)
POST /movies/_search { "profile": true, "query": { "query_string": { "default_field": "title", "query": "Beautiful AND Mind" } } }
查询同时出现(+)
GET /movies/_search { "profile": true, "query": { "simple_query_string": { "query": "Beautiful +mind", "fields": [ "title" ] } } }
多fields中查询包含2012
GET /movies/_search { "profile": true, "query": { "query_string": { "fields": [ "title", "year" ], "query": "2012" } } }
Dynamic Mapping 和常见字段类型
相关demo演示
写入文档,查看 Mapping PUT mapping_test/_doc/1 { "firstName": "Chan", "lastName": "Jackie", "loginDate": "2018-07-24T10:29:48.103Z" } #查看 Mapping文件 GET mapping_test/_mapping #Delete index DELETE mapping_test #dynamic mapping,推断字段的类型 PUT mapping_test/_doc/1 { "uid": "123", "isVip": false, "isAdmin": "true", "age": 19, "heigh": 180 } #查看 Dynamic GET mapping_test/_mapping #默认Mapping支持dynamic,写入的文档中加入新的字段 PUT dynamic_mapping_test/_doc/1 { "newField":"someValue" } #该字段可以被搜索,数据也在_source中出现 POST dynamic_mapping_test/_search { "query": { "match": { "newField": "someValue" } } } #修改为dynamic false PUT dynamic_mapping_test/_mapping { "dynamic": false } #新增 anotherField PUT dynamic_mapping_test/_doc/10 { "anotherField": "someValue" } #该字段不可以被搜索,因为dynamic已经被设置为false POST dynamic_mapping_test/_search { "query": { "match": { "anotherField": "someValue" } } } get dynamic_mapping_test/_doc/10 #修改为strict PUT dynamic_mapping_test/_mapping { "dynamic": "strict" } #写入数据出错,HTTP Code 400 PUT dynamic_mapping_test/_doc/12 { "lastField": "value" } DELETE dynamic_mapping_test
相关阅读
显式Mapping设置与常见参数介绍
如何显示定义一个 Mapping
自定义 Mapping 的建议
控制当前字段是否被索引
Index Options
null value
DELETE users PUT users { "mappings": { "properties": { "firstName": { "type": "text" }, "lastName": { "type": "text" }, "mobile": { "type": "keyword", "null_value": "NULL" } } } } PUT users/_doc/1 { "firstName": "Ruan", "lastName": "Yiming", "mobile": null } PUT users/_doc/2 { "firstName": "Ruan2", "lastName": "Yiming2" } GET users/_search { "query": { "match": { "mobile": "NULL" } } }
copy to
DELETE users PUT users { "mappings": { "properties": { "firstName": { "type": "text", "copy_to": "fullName" }, "lastName": { "type": "text", "copy_to": "fullName" } } } } PUT users/_doc/1 { "firstName": "Ruan", "lastName": "Yiming" } GET users/_search?q=fullName:(Ruan Yiming) POST users/_search { "query": { "match": { "fullName": { "query": "Ruan Yiming", "operator": "and" } } } }
数组类型
PUT users/_doc/1 { "name": "onebird", "interests": "reading" } PUT users/_doc/1 { "name": "twobirds", "interests": [ "reading", "music" ] } POST users/_search { "query": { "match_all": {} } } GET users/_mapping
补充阅读
多字段特性及Mapping中配置自定义Analyzer
多字段类型
Exact Values Vs Full text
Exact Values 不需要被分词
自定义分词
Character Filter
POST _analyze { "tokenizer": "keyword", "char_filter": [ "html_strip" ], "text": "<b>hello world</b>" }
自定义分词器过滤 HTMl标签
PUT my_index { "mappings": { "properties": { "text": { "type": "text", "analyzer": "my_analyzer" } } }, "settings": { "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "char_filter": [ "my_char_filter" ] } }, "char_filter": { "my_char_filter": { "type": "html_strip" } } } } } PUT my_index/_doc/1 { "text":"<b>hello world</b>" } PUT my_index/_doc/2 { "text":"<good>hello world</good>" } PUT my_index/_doc/3 { "text":"<hello>test world</hello>" } PUT my_index/_doc/4 { "text":"<hello world='1'>test 1</hello>" } GET my_index/_search { "query": { "match_all": {} } } GET my_index/_search { "query":{ "query_string":{ "query":"world" } } }
Tokenizer
POST _analyze { "tokenizer": "path_hierarchy", "text": "/user/ymruan/a/b/c/d/e" }
Tokenizer Filter
使用char filter进行替换
POST _analyze { "tokenizer": "standard", "char_filter": [ { "type": "mapping", "mappings": [ "- => _" ] } ], "text": "123-456, I-test! test-990 650-555-1234" }
char filter 替换表情符号
POST _analyze { "tokenizer": "standard", "char_filter": [ { "type": "mapping", "mappings": [ ":) => happy", ":( => sad" ] } ], "text": [ "I am felling :)", "Feeling :( today" ] }
white space and snowball
GET _analyze { "tokenizer": "whitespace", "filter": [ "stop", "snowball" ], "text": [ "The gilrs in China are playing this game!" ] }
输错结果如下
{ "tokens" : [ { "token" : "The", "start_offset" : 0, "end_offset" : 3, "type" : "word", "position" : 0 }, { "token" : "gilr", "start_offset" : 4, "end_offset" : 9, "type" : "word", "position" : 1 }, { "token" : "China", "start_offset" : 13, "end_offset" : 18, "type" : "word", "position" : 3 }, { "token" : "play", "start_offset" : 23, "end_offset" : 30, "type" : "word", "position" : 5 }, { "token" : "game!", "start_offset" : 36, "end_offset" : 41, "type" : "word", "position" : 7 } ] }
whitespace与stop
GET _analyze { "tokenizer": "whitespace", "filter": [ "stop", "snowball" ], "text": [ "The rain in Spain falls mainly on the plain." ] }
输错结果如下
GET _analyze { "tokenizer": "whitespace", "filter": [ "stop", "snowball" ], "text": [ "The rain in Spain falls mainly on the plain." ] }
输出结果下 大写的 The 还存在
{ "tokens" : [ { "token" : "The", "start_offset" : 0, "end_offset" : 3, "type" : "word", "position" : 0 }, { "token" : "rain", "start_offset" : 4, "end_offset" : 8, "type" : "word", "position" : 1 }, { "token" : "Spain", "start_offset" : 12, "end_offset" : 17, "type" : "word", "position" : 3 }, { "token" : "fall", "start_offset" : 18, "end_offset" : 23, "type" : "word", "position" : 4 }, { "token" : "main", "start_offset" : 24, "end_offset" : 30, "type" : "word", "position" : 5 }, { "token" : "plain.", "start_offset" : 38, "end_offset" : 44, "type" : "word", "position" : 8 } ] }
GET _analyze { "tokenizer": "whitespace", "filter": [ "lowercase", "stop", "snowball" ], "text": [ "The gilrs in China are playing this game!" ] }
正则表达式
GET _analyze { "tokenizer": "standard", "char_filter": [ { "type": "pattern_replace", "pattern": "http://(.*)", "replacement": "$1" } ], "text": "http://www.elastic.co" }
Dynamic Template和Index Template
什么是 Index Template
Index Template 的工作方式
Demo
创建索引模板
PUT _template/template_default { "index_patterns": [ "*" ], "order": 0, "version": 1, "settings": { "number_of_shards": 1, "number_of_replicas": 1 } } PUT /_template/template_test { "index_patterns": [ "test*" ], "order": 1, "settings": { "number_of_shards": 1, "number_of_replicas": 2 }, "mappings": { "date_detection": false, "numeric_detection": true } }
查看template信息
GET /_template/template_default GET /_template/temp*
写入新的数据,index以test开
PUT testtemplate/_doc/1 { "someNumber":"1", "someDate":"2019/01/01" } GET testtemplate/_mapping
可以看到日期没有转换成时间格式,字符串转换成数字类型
GET testtemplate/_settings
PUT testmy { "settings": { "number_of_replicas": 5 } } PUT testmy/_doc/1 { "key": "value" } GET testmy/_settings
DELETE testmy DELETE /_template/template_default DELETE /_template/template_test
什么是Dynaminc Mapping
Dynaminc Template
PUT my_index { "mappings": { "dynamic_templates": [ { "strings_as_boolean": { "match_mapping_type": "string", "match": "is*", "mapping": { "type": "boolean" } } }, { "strings_as_keywords": { "match_mapping_type": "string", "mapping": { "type": "text" } } } ] } } PUT my_index/_doc/2 { "firstName":"Ruan", "isVIP":"true" } GET my_index/_mapping
is开头的字段名修改成bool类,字符串的字段名设置为keyword
匹配规则参数
以name开头的字段,不以 middle结尾的
DELETE my_index #结合路径 PUT my_index { "mappings": { "dynamic_templates": [ { "full_name": { "path_match": "name.*", "path_unmatch": "*.middle", "mapping": { "type": "text", "copy_to": "full_name" } } } ] } } PUT my_index/_doc/1 { "name": { "first": "John", "middle": "Winston", "last": "Lennon" } } PUT my_index/_doc/2 { "namesamiddle": { "first": "asd", "middle": "dasd", "last": "Lennon" } } GET my_index/_search?q=full_name:John
相关阅读
Elasticsearch聚合分析简介
什么是聚合
集合的分类
Metric & Bucket
Bucket
Metric
一个Bucket的例子
按照目的地进行分组统计
GET kibana_sample_data_flights/_search { "size": 0, "aggs": { "flight_dest": { "terms": { "field": "DestCountry" } } } }
加入Metric 查看航班目的地的统计信息,增加平均,最高最低价格
GET kibana_sample_data_flights/_search { "size": 0, "aggs": { "flight_dest": { "terms": { "field": "DestCountry" }, "aggs": { "avg_price": { "avg": { "field": "AvgTicketPrice" } }, "max_price": { "max": { "field": "AvgTicketPrice" } }, "min_price": { "min": { "field": "AvgTicketPrice" } } } } } }
嵌套查看价格统计信息+天气信息
GET kibana_sample_data_flights/_search { "size": 0, "aggs": { "flight_dest": { "terms": { "field": "DestCountry" }, "aggs": { "stats_price": { "stats": { "field": "AvgTicketPrice" } }, "wather": { "terms": { "field": "DestWeather", "size": 5 } } } } } }
elasticsearch 添加索引字段
PUT legislation-qa/_mapping { "properties": { "repeal_date": { "type": "date", "fields": { "keyword": { "type": "keyword" } } } } }
查询数量超过 10000的问题
POST legislation/_search { "track_total_hits": true, "query": { "match_all": {} } }
自我测试
答案
测试2
答案
批量更新出现错误
Too many dynamic script compilations within, max: [75/5m]; please use indexed, or scripts with parameters instead; this limit can be changed by the [script.context.update.max_compilations_rate] setting')
大概意思就是每分钟编译的内容太多了,默认是75/5m,
那把这个设置大点就可以了:
PUT _cluster/settings { "transient" : { "script.max_compilations_rate" : "1000/1m" } }
修改 结果返回最大数量
PUT legislation/_settings { "index":{ "max_result_window":1000000 } }