es7.9.0

elk各版本下载地址 https://elasticsearch.cn/download/

官方操作文档 https://www.elastic.co/guide/en/elasticsearch/reference/7.x/analyzer-anatomy.html

参考博客 https://www.jianshu.com/p/29e5b1a7ce61

# 查询所有cluster健康度
GET _cat/health?v

# 查看所有index
GET _cat/indices?v

# 创建mapping
PUT /index_demo
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "properties": {
      "name":{
        "type": "text"
      }
    }
  }
}
# 指定ik分词器
# 测试默认的标准分词器
GET /ecommerce/_analyze
{
  "analyzer": "standard",
  "text": "a pig is in the house"
}
PUT /ecommerce
{
  "settings": {
    "analysis": {
      "analyzer": {
        "default": {
          "type": "ik_max_word".
      "stopwords": "_english_"
     }
    }
   }
  }
}

# 定制分词器 将&转换为and,a 、the不做处理,将html标签过滤掉,将字符转为小写的
PUT /index_demo
{
  "settings": {
    "analysis": {
      "char_filter": {
        "&_to_and": {
          "type": "mapping",
          "mappings": ["&=> and"]
        }
      },
      "filter": {
        "my_stopwords":{
          "type": "stop",
          "stopwords": ["the", "a"]
        }
      },
      "analyzer": {
        "my_analyzer":{
          "type":"custom",
          "char_filter": ["html_strip", "&_to_and"],
          "tokenizer":"standard",
          "filter":["lowercase","my_stopwords"]
        }
      }
    }
  }
}    
# 新增 有则更新 删除原有再新增  
POST /ecommerce/_doc/1
{
  "name":"小米手机",
  "desc":"支持5G、全面屏6.4",
  "price":3000,
  "producer":"小米",
  "tags":["mobile","5G"]
}

# 新增 有则更新 删除原有再新增  
PUT /ecommerce/_doc/2
{
  "name":"华为MacBook",
  "desc":"支持5G、全面屏15.2寸",
  "price":8000,
  "producer":"Huawei",
  "tags":["笔记本电脑","huawei"]
}

# 显示指定新增 如果id存在则报错
POST /ecommerce/_create/3
{
  "name":"华为P40 pro",
  "desc":"支持5G、超清摄像",
  "price":12000,
  "producer":"Huawei 成都",
  "tags":["mobile","huawei","5G"]
}

# 添加 可重复添加
POST /ecommerce/_doc
{
  "name":"Ipad mini 5",
  "desc":"7.9英寸",
  "price":4000,
  "producer":"apple",
  "tags":["笔记本电脑","apple"]
}

# 更新 如果id不存在则报错
POST /ecommerce/_update/1
{
  "doc": {
    "price":2000
  }
}

# 查询所有document
GET /ecommerce/_search
# 查不到不会报错
GET /ecommerce/_doc/399

# 删除 查不到不会报错
DELETE /ecommerce/_doc/455

# 查询名称包含华为的商品,并且按照售价降序排序 sort默认升序
GET /ecommerce/_search?q=name:华为&sort=price:desc

# 只返回source指定字段
GET /ecommerce/_search?_source=name,price

# 获取index动态映射
GET /ecommerce/_mapping

# 将一个field索引两次来解决字符串排序问题 一个分词,用来进行搜索;一个不分词,用来进行排序
GET /ecommerce/_search
{
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "tags.keyword": {
        "order": "desc"
      }
    }
  ]
}

# 同时包含两个keyword
GET /ecommerce/_search
{
  "query": {
    "match": {
      "name": {
        "query": "小米 Huawei",
        "operator": "and"
      }
    }
  }
}

# 至少包含75%
GET /ecommerce/_search
{
  "query": {
    "match": {
      "name": {
        "query": "小米 Huawei apple vivo",
        "minimum_should_match": "75%"
      }
    }
  }
}

# full-text search 只要有一个keyword匹配即可 constant_score通过boost指定score值 如果缺失boost字段 默认score=1.0 只过滤不计算score constant_score字段可缺失 缺失时score为0
GET /ecommerce/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "match": {
          "name": "华为 小米"
        }        
      },
      "boost": 1.2
    }
  }
}

# 查询所有并分页返回指定字段 从0开始
GET /ecommerce/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "match_all": {}
      },
      "boost": 1.2
    }
  },
  "from": 0,
  "size": 3,
  "_source": ["name", "price"]
}

# filter 5000<=price<=8000 should条件为可选 即可匹配也可不匹配 但匹配会得到更大score minimum_should_match为should中的match至少需要满足几个才能作为结果返回 bool查询可嵌套
GET /ecommerce/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "name": "华为"
          }
        }
      ],
      "should": [
        {
          "match": {
       "desc": {  
        "query": "5G",
"boost": 2
       }
} }, { "match": { "price": "4000" } }, { "match": { "producer": "伯明翰" } } ], "must_not": [ { "match": { "tags": "apple" } },
     {
"bool": {
"must": {
        "match": {
          "desc": "xxx"
        } 
       }
      }
} ],
"minimum_should_match": 2, "filter": [ { "range": { "price": { "gte": 5000, "lte": 8000 } } } ] } } } # filter改should后score提升1 filter不计算score可提升性能 比较range与filter里的range GET /ecommerce/_search { "query": { "bool": { "must": [ { "match": { "name": "华为" } }, { "range": { "price": { "gte": 5000, "lte": 8000 } } } ], "should": [ { "match": { "desc": "5G" } } ], "must_not": [ { "match": { "tags": "apple" } } ] } } } # must和filter是数组 只有一个时可以简写 GET /ecommerce/_search { "query": { "bool": { "must": { "match": { "name": "华为" } }, "filter": { "range": { "price": { "gte": 5000, "lte": 8000 } } } } } } # phrase search 短语搜索 输入的短语必须完全匹配 GET /ecommerce/_search { "query": { "match_phrase": { "producer": "华为 小米" } } } # 高亮搜索结果 默认plain <em>标签 fragment_size指定高亮字段长度 GET /ecommerce/_search { "query": { "match": { "producer": "Huawei" } }, "highlight": {
   "pre_tags": ["<tag1>"],
"post_tags": ["<tag2/>"],
"fields": { "producer": {},
"type": "plain",
"fragment_size": 5,
"number_of_fragments": 2,
"no_match_size": 150 } } } # aggregations聚合 会报错 (aggregations只能对非keyword字段进行分组 如果需要必须先设置fielddata) size
=0即不返回分组前的原始数据 terms里的size是聚合结果返回的数量
PUT /ecommerce/_mapping
{
  "properties": {
    "tags": {
      "type": "text",
      "fielddata": true
    }
  }
}

GET /ecommerce/_search
{
  "size": 0,
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags""size": 2
      }
    }
  }
}
# 验证
GET /ecommerce/_validate/query?explain
{
  "query": {
    "match": {
      "nam": "TEXT"
    }
  }
}

# 多字段匹配
GET /ecommerce/_search
{
  "query": {
    "multi_match": {
      "query": "ttt",
      "fields": ["name", "desc"]
    }
  }
}
# 计算每个tag下的商品数量 分词后注意tags与tags.keyword的区别 terms中的字段必须是数组类型 否则报错 非数组类型必须使用term 聚合操作只能用terms不能用term
GET /ecommerce/_search
{
  "query": {
    "match": {
      "name": "华为"
    }
  },
  "aggs": {
    "all_tags": {
      "terms": {
        "field": "tags.keyword",
        "size": 10
      }
    }
  }
}

# 计算每个tag下的商品的平均价格,并且按照平均价格降序排序
GET /ecommerce/_search
{
  "size": 0,
  "aggs": {
    "all_tags": {
      "terms": {
        "field": "tags",
        "order": {
          "avg_price": "desc"
        }
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

# 按照指定的价格范围区间进行分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格 按区间分组 0-2000/2000-5000/5000-
GET /ecommerce/_search
{
  "size": 0,
  "aggs": {
    "group_by_price": {
      "range": {
        "field": "price",
        "ranges": [
          {
            "to": 2000
          },
          {
            "from": 2000,
            "to": 5000
          },
          {
            "from": 5000
          }
        ]
      },
      "aggs": {
        "group_by_tags": {
          "terms": {
            "field": "tags"
          },
          "aggs": {
            "avg_price": {
              "avg": {
                "field": "price"
              }
            }
          }
        }
      }
    }
  }
}

# 并发控制乐观锁 带version参数更新
POST /ecommerce/_update/2?version=3
{
  "doc": {
    "tags": ["laptop", "Huawei"]
  }
}

# external指用户自己维护的版本号 如果>=2就可以更新成功
PUT /ecommerce/_doc/1?version=2&version_type=external
{
    "name" : "小米10Pro",
    "desc" : "支持5G、全面屏6.4",
    "price" : 3000,
    "producer" : "小米",
    "tags" : [
      "xiaomi",
      "mobile",
      "5G"
    ]
}

# 在修改冲突时指定重试的次数
POST /ecommerce/_update/1?retry_on_conflict=2
{
  "doc": {
    "price": 3000
  }
}

# mget批量操作 不同的index 并对返回结果过滤
GET /_mget
{
  "docs": [
    {
      "_index": "ecommerce",
      "_id": 1,
      "_source": "price"
    },
    {
      "_index": "goods",
      "_id": 2,
      "_source": ["name", "price"]
    }
  ]
}

# 同一个index不同的field
GET /ecommerce/_mget
{
  "docs": [
    {
      "_id": 1
    },
    {
      "_id": 2
    }
  ]
}

# 同一个index相同的field ids获取时不能指定_source
GET /ecommerce/_mget
{
  "ids": [1,2]
}

# bulk批量操作 括号不能换行 update需要带上doc
POST /_bulk
{"delete": {"_index": "ecommerce", "_id": 3}}
{"create": {"_index": "ecommerce", "_id": 3}}
{"price":5000}
{"update":{"_index":"ecommerce","_id":3}}
{"doc":{"price":6000}}

POST /ecommerce/_bulk
{"delete": {"_id": 3}}
{"create": {"_id": 3}}
{"price": 5000}
{"update": {"_id": 3}}
{"doc": {"price": 6000}}

GET /ecommerce/_search?scroll=1ms
{
  "query": {
    "match_all": {}
  },
  "size": 3
}

GET /_search/scroll
{
  "scroll": "1ms",
  "scroll_id": "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFEtFNnVaWGNCTkZUUmY3VFdxZ1VIAAAAAAAACCwWVFhicjJ5dFpSRENxVVJPdDROcnpkQQ=="
}

# 统计每个颜色的平均价格,同时统计每个颜色下每个品牌的平均价格 GET /tvs/_search { "size": 0, "aggs": { "group_by_color": { "terms": { "field": "color" }, "aggs": { "color_avg_price": { "avg": { "field": "price" } }, "group_by_brand": { "terms": { "field": "brand" }, "aggs": { "brand_avg_price": { "avg": { "field": "price" } } } } } } } } # 按照field区间interval分组求和 GET /tvs/_search { "size" : 0, "aggs":{ "price":{ "histogram":{ "field": "price", "interval": 2000 }, "aggs":{ "revenue": { "sum": { "field" : "price" } } } } } } # date histogram之统计每月电视销量 GET /tvs/_search { "size": 0, "aggs": { "sales": { "date_histogram": { "field": "sold_date", "calendar_interval": "month", "format": "yyyy-MM-dd", "min_doc_count": 0, "extended_bounds": { "min": "2019-01-01", "max": "2020-12-31" } } } } } # 下钻分析之统计每季度每个品牌的销售额 GET /tvs/_search { "size": 0, "aggs": { "group_by_sold_date": { "date_histogram": { "field": "sold_date", "calendar_interval": "quarter", "format": "yyyy-MM-dd", "min_doc_count": 0, "extended_bounds": { "min": "2016-01-01", "max": "2017-12-31" } }, "aggs": { "group_by_brand": { "terms": { "field": "brand" }, "aggs": { "sum_price": { "sum": { "field": "price" } } } }, "total_sum_price": { "sum": { "field": "price" } } } } } } # global bucket:单个品牌与所有品牌销量对比 GET /tvs/_search { "size": 0, "query": { "term": { "brand": { "value": "长虹" } } }, "aggs": { "single_brand_avg_price": { "avg": { "field": "price" } }, "all": { "global": {}, "aggs": { "all_brand_avg_price": { "avg": { "field": "price" } } } } } } # bucket filter:统计牌品最近一个月的平均价格 GET /tvs/_search { "size": 0, "query": { "term": { "brand": { "value": "长虹" } } }, "aggs": { "recent_150d": { "filter": { "range": { "sold_date": { "gte": "now-150d" } } }, "aggs": { "recent_150d_avg_price": { "avg": { "field": "price" } } } }, "recent_140d": { "filter": { "range": { "sold_date": { "gte": "now-140d" } } }, "aggs": { "recent_140d_avg_price": { "avg": { "field": "price" } } } }, "recent_130d": { "filter": { "range": { "sold_date": { "gte": "now-130d" } } }, "aggs": { "recent_130d_avg_price": { "avg": { "field": "price" } } } } } } # cardinality去重算法以及每月销售品牌数量统计 precision_threshold表示确保field的unique value在指定个数内时 结果100%正确 但需额外占用100*8个字节 GET /tvs/_search { "size" : 0, "aggs" : { "months" : { "date_histogram": { "field": "sold_date", "calendar_interval": "month" }, "aggs": { "distinct_colors" : { "cardinality" : { "field" : "brand",
          "precision_threshold": 100 } } } } } }

# 多字段搜索之best fields:主要是将某一个field匹配尽可能多的关键词的doc优先返回回来

# dis_max只取某一个query最大的分数返回,完全不考虑其他query的分数 tie_breaker将其他query的分数,乘以tie_breaker,然后综合那个最高分的query,一起计算并返回 0<tie_breaker<1 minimum_should_match 去长尾 只有匹配至少这个值的字段才返回

GET /forum/_search
{
  "query": {
    "multi_match": {
        "query":                "java solution",
        "type":                 "best_fields", 
        "fields":               [ "title^2", "content" ],
        "tie_breaker":          0.3,
        "minimum_should_match": "50%" 
    }
  } 
}

GET /forum/_search
{
  "query": {
    "dis_max": {
      "queries":  [
        {
          "match": {
            "title": {
              "query": "java beginner",
              "minimum_should_match": "50%",
              "boost": 2
            }
          }
        },
        {
          "match": {
            "body": {
              "query": "java beginner",
              "minimum_should_match": "30%"
            }
          }
        }
      ],
      "tie_breaker": 0.3
    }
  } 
}
# 多字段搜索之most fields:主要是尽可能返回更多field匹配到某个关键词的doc,优先返回回来
GET /forum/_search
{
   "query": {
        "multi_match": {
            "query":  "learning courses",
            "type":   "most_fields", 
            "fields": [ "sub_title", "sub_title.std" ]
        }
    }
}

# most fields 与 best fields 各有优缺点 可使用原生cross fields
GET /forum/_search
{
  "query": {
    "multi_match": {
      "query": "Peter Smith",
      "type": "cross_fields", 
      "operator": "and",
      "fields": ["author_first_name", "author_last_name"]
    }
  }
} 

# 近似匹配 包含java或elasticsearch或两者之间距离不超过50的前50条数据
GET /forum/_search 
{
  "query": {
    "match": {
      "content": "java elasticsearch"
    }
  },
  "rescore": {
    "window_size": 50,
    "query": {
      "rescore_query": {
        "match_phrase": {
          "content": {
            "query": "java elasticsearch",
            "slop": 50
          }
        }
      }
    }
  }
}

# 前缀搜索 和filter一样不计算score 单不会像filter一样利用缓存 而是在document中逐一比对 prefix越短 性能越差 此外还有通配符搜索、正则搜索 性能都差
GET /forum/_search
{
  "query": {
    "prefix": {
      "articleID.keyword": {
        "value": "X"
      }
    }
  }
}

# fuzzy 模糊搜索 自动将拼写错误的搜索文本,进行纠正,纠正以后去尝试匹配索引中的数据 fuzziness 指定的修订最大次数,默认为2
GET /forum/_search
{
  "query": {
    "fuzzy": {
      "title": {
        "value": "hell",
        "fuzziness": 2
      }
    }
  }
}
GET /forum/_search
{
  "query": {
    "match": {
      "title": {
        "query": "helio",
        "fuzziness": "AUTO",
        "operator": "and"
      }
    }
  }
}

# percentiles 分别统计latency字段50%、95%、99%的百分位数
GET /website/_search 
{
  "size": 0,
  "aggs": {
    "latency_percentiles": {
      "percentiles": {
        "field": "latency",
        "percents": [
          50,
          95,
          99
        ]
      }
    }
  }
}

# percentile ranks 计算指定field值在200内占比、600内占比
GET /website/_search 
{
  "size": 0,
  "aggs": {
    "group_by_province": {
      "terms": {
        "field": "province"
      },
      "aggs": {
        "latency_percentile_ranks": {
          "percentile_ranks": {
            "field": "latency",
            "values": [
              200,
              600
            ]
          }
        }
      }
    }
  }
}

# nested object 类似这样的嵌套对象结构
PUT /website/_doc/6
{
  "title": "花无缺发表的一篇帖子",
  "content":  "我是花无缺,大家要不要考虑一下投资房产和买股票的事情啊。。。",
  "tags":  [ "投资", "理财" ],
  "comments": [ 
    {
      "name":    "小鱼儿",
      "comment": "什么股票啊?推荐一下呗",
      "age":     28,
      "stars":   4,
      "date":    "2016-09-01"
    },
    {
      "name":    "黄药师",
      "comment": "我喜欢投资房产,风,险大收益也大",
      "age":     31,
      "stars":   5,
      "date":    "2016-10-22"
    }
  ]
}
# 想搜索嵌套对象的多个字段 以下搜索不能成功
GET /website/_search
{
  "query": {
    "bool": {
      "must": [
        { "match": { "comments.name": "黄药师" }},
        { "match": { "comments.age":  28      }} 
      ]
    }
  }
}
# 需要先修改原有mapping结构
PUT /website
{
  "mappings": {
      "properties": {
        "comments": {
          "type": "nested", 
          "properties": {
            "name":    { "type": "text"  },
            "comment": { "type": "text"  },
            "age":     { "type": "short"   },
            "stars":   { "type": "short"   },
            "date":    { "type": "date"    }
          }
        }
      }
    }
}
# 并重新组织DSL
GET /website/_search 
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "title": "花无缺"
          }
        },
        {
          "nested": {
            "path": "comments",
            "query": {
              "bool": {
                "must": [
                  {
                    "match": {
                      "comments.name": "黄药师"
                    }
                  },
                  {
                    "match": {
                      "comments.age": 31
                    }
                  }
                ]
              }
            }
          }
        }
      ]
    }
  }
}

# nested object 聚合操作
GET /website/_search 
{
  "size": 0, 
  "aggs": {
    "comments_path": {
      "nested": {
        "path": "comments"
      }, 
      "aggs": {
        "group_by_comments_date": {
          "date_histogram": {
            "field": "comments.date",
            "calendar_interval": "month",
            "format": "yyyy-MM"
          },
          "aggs": {
            "avg_stars": {
              "avg": {
                "field": "comments.stars"
              }
            }
          }
        }
      }
    }
  }
}

# 在nested object字段分组中进行非nested object字段分组
GET /website/_search 
{
  "size": 0,
  "aggs": {
    "comments_path": {
      "nested": {
        "path": "comments"
      },
      "aggs": {
        "group_by_comments_age": {
          "histogram": {
            "field": "comments.age",
            "interval": 10
          },
          "aggs": {
            "reverse_path": {
              "reverse_nested": {}, 
              "aggs": {
                "group_by_tags": {
                  "terms": {
                    "field": "tags.keyword"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

 

posted @ 2021-02-04 11:07  agasha  阅读(149)  评论(0编辑  收藏  举报