ES基础

一、概念

1.NRT 近实时,基本上是实时的 2.Cluster 集群 3.Node 节点 4.document&filed 5.Index 6.Type 7shard 8replica

 

二、简单的集群操作和crud

查看集群将抗状态 
GET /_cat/health?v
查看索引 
GET /_cat/indices?v
删除索引 DELETE /test_index 添加索引和记录 PUT /test_index/test_type/1 { "name":"zhangsan", "age":12, "gender":"man" } 获取记录 GET /test_index/test_type/1 #替换必须带上所有字段 PUT /test_index/test_type/1 { "name":"lisi", "age":12, "gender":"man" }

 

三、搜索

PUT /ecommerce/product/1
{
    "name" : "gaolujie yagao",
    "desc" :  "gaoxiao meibai",
    "price" :  30,
    "producer" :      "gaolujie producer",
    "tags": [ "meibai", "fangzhu" ]
}

PUT /ecommerce/product/2
{
    "name" : "jiajieshi yagao",
    "desc" :  "youxiao fangzhu",
    "price" :  25,
    "producer" :      "jiajieshi producer",
    "tags": [ "fangzhu" ]
}

PUT /ecommerce/product/3
{
    "name" : "zhonghua yagao",
    "desc" :  "caoben zhiwu",
    "price" :  40,
    "producer" :      "zhonghua producer",
    "tags": [ "qingxin" ]
}

GET /ecommerce/product/_search
{
  "query": {"match_all": {}}
}
#page from 0 
GET ecommerce/product/_search
{
  "query": {"match": {
    "name": "yagao"
  }},
  "_source": ["name","price","producer"], 
  "sort": [
    {
      "price": {
        "order": "desc"
      }
    }
  ],"from": 0,"size": 2
}
#搜索商品名称包含yagao,而且售价大于25元的商品
GET /ecommerce/product/_search
{
    "query" : {
        "bool" : {
            "must" : {
                "match" : {
                    "name" : "yagao" 
                }
            },
            "filter" : {
                "range" : {
                    "price" : { "gt" : 25 } 
                }
            }
        }
    }
}

GET /ecommerce/product/_search
{
  "query": {
    "bool": {
      "must": {
        "match": {
          "name": "yagao"
        }
      },
      
      "filter":{
        "range":{
          "price":{"gt":25}
        }
      }
      
    }
  }
}


GET /ecommerce/product/_search
{
  "query": {
    "match": {
      "producer": "zhonghua producer"
    }
  }
}
GET /ecommerce/product/_search
{
  "query": {
    "match_phrase": {
      "producer": "gaolujie producer"
    }
  }
}


GET /ecommerce/product/_search
{
  "query": {
    "match": {
      "producer": "zhonghua producer"
    }
  },
  "highlight": {"fields": {"producer": {}}}
}

 四、聚合搜索

GET /ecommerce/product/_search

PUT /ecommerce/_mapping/product
{
  "properties": {"tags":{"type":"text","fielddata": true}}
}


GET /ecommerce/product/_search
{
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags"
       
      }
    }
  }
}


GET /ecommerce/product/_search
{
  "query": {"match_phrase": {
    "name": "yagao"
  }}, 
  "aggs": {
    "groupbyprice":{
      "range": {
        "field": "price",
        "ranges": [
          {
            "from": 0,
            "to": 20
          },{
            "from": 20,
            "to": 40
          },{
            "from": 40,
            "to": 50
          }
        ]
      }
    },
    
    "groupbytags": {
      "terms": {
        "field": "tags","order": {
          "avg_price": "asc"
        }
      },"aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

 

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": 0.25811607,
    "hits": [
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "2",
        "_score": 0.25811607,
        "_source": {
          "name": "jiajieshi yagao",
          "desc": "youxiao fangzhu",
          "price": 25,
          "producer": "jiajieshi producer",
          "tags": [
            "fangzhu"
          ]
        }
      },
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "1",
        "_score": 0.25811607,
        "_source": {
          "name": "gaolujie yagao",
          "desc": "gaoxiao meibai",
          "price": 30,
          "producer": "gaolujie producer",
          "tags": [
            "meibai",
            "fangzhu"
          ]
        }
      },
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "3",
        "_score": 0.25811607,
        "_source": {
          "name": "zhonghua yagao",
          "desc": "caoben zhiwu",
          "price": 40,
          "producer": "zhonghua producer",
          "tags": [
            "qingxin"
          ]
        }
      }
    ]
  },
  "aggregations": {
    "groupbyprice": {
      "buckets": [
        {
          "key": "0.0-20.0",
          "from": 0,
          "to": 20,
          "doc_count": 0
        },
        {
          "key": "20.0-40.0",
          "from": 20,
          "to": 40,
          "doc_count": 2
        },
        {
          "key": "40.0-50.0",
          "from": 40,
          "to": 50,
          "doc_count": 1
        }
      ]
    },
    "groupbytags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "fangzhu",
          "doc_count": 2,
          "avg_price": {
            "value": 27.5
          }
        },
        {
          "key": "meibai",
          "doc_count": 1,
          "avg_price": {
            "value": 30
          }
        },
        {
          "key": "qingxin",
          "doc_count": 1,
          "avg_price": {
            "value": 40
          }
        }
      ]
    }
  }
}

 五、字段和乐观锁

  _id可以指定,不指定的话自动生成。

  _source,可以指定显示字段

  _type,_index索引和type

  _version控制锁

 

#返回结果只包含name,price字段
GET /ecommerce/product/1?_source=name,price #创建索引 GET /my_index/my_type/1 { "test_field":"abc" }

    #es,_version=1,?version=1,才能更新成功,两个版本号必须相同才能更新成功
    #es,_version=1,?version>1&version_type=external,才能成功,比如说?version=2&version_type=external,版本号必须大于当前版本号

PUT /my_index/my_type/1?version=7&version_type=external
{ "test_field":"dad"}

 六、脚本更新

GET /test_index/test_type/10

PUT /test_index/test_type/11/
{
  "num":0,"tags":[],"tag":""
}
#新建test-add-tags.groovy文件,里面内容为 ctx._source.tag+=new_tag
#上面不加引号的为参数, POST /test_index/test_type/11/_update { "script": { "lang": "groovy", "file": "test-add-tags", "params": { "new_tag":"tag1" } } }
#如果存在10的数据,操作脚本,不存在插入。 POST /test_index/test_type/10/_update { "script": "ctx._source.num+=1" ,"upsert": {"num":0,"tag":[]} }

  七、批量查找和删除

#mget   
GET /test_index/_mget
{
  "docs":[
    { "_id":11},{ "_id":10}
    
    ]
}

PUT /_bulk
{"delete":{"_index":"test_index","_type":"test_type","_id":11}}
{"index":{"_index":"test_index","_type":"test_type","_id":"15"}}
{"num":10,"tag":[]}


PUT /test_index/test_type/_bulk
{"index":{"_id":15}}
{"num":15,"tag":[1,2]}
{"update":{"_id":15,"_retry_on_conflict":3}}
{"doc":{"num":16,"tag":[]}}

  八、index的shard个数一经指定,不能改变

  一般情况是hash(id)%shardNum=position 当shardNum改变时,就找不到document其位置。

 九、es的数据写一致性如何保证的

  在写请求时可以指定其活跃数来保证。put /index/type/id?consistency=quorum

   consistency的枚举:one ,all(all shard),quorum=int( (primary + number_of_replicas) / 2 ) + 1,默认为quorum

  当设置为quorum时,活跃数小于quorum时,就会等待 活跃数恢复到quorum,可以指定超时时间。put /index/type/id?consistency=quorum&timeout=30

  十、mapping

   es  search?q=单词,es  search?q=字段:单词 

    GET /test_index/test_type/_search?q=单词 ,当单词包含的特殊符号时,如-,等,也会对单词进行分词,所以可以full text,可以理解为部分匹配。

原理是在保存document时,会建立一个_all 字段,去匹配_all字段即可。

 

{

"name": "jack",
"age": 26,
"email": "jack@sina.com",
"address": "guamgzhou"
}

_all :"jack 26 jack@sina.com guangzhou",

有些字段类型,比如日期,在保存时,已经指定了为日期类型,不会分词,在es  search?q=字段:单词时必须精确匹配。

PUT /website/article/1
{
"post_date": "2017-01-01",
"title": "my first article",
"content": "this is my first article in this website",
"author_id": 11400
}

PUT /website/article/2
{
"post_date": "2017-01-02",
"title": "my second article",
"content": "this is my second article in this website",
"author_id": 11400
}

PUT /website/article/3
{
"post_date": "2017-01-03",
"title": "my third article",
"content": "this is my third article in this website",
"author_id": 11400
}

GET /website/article/_search?q=2017-01-01 3个结果 (部分匹配,因为对2017-01-01进行拆分了)
GET /website/article/_search?q=post_date:2017-01-01 1 (时间为特殊类型,必须全量匹配)

  

GET /website/_mapping/article
{
"website": { "mappings": { "article": { "properties": { "author_id": { "type": "long" }, "content": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "post_date": { "type": "date" }, "title": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } } } } } } }

 

 mapping,就是index的type的元数据,每个type都有一个自己的mapping,决定了数据类型,建立倒排索引的行为,还有进行搜索的行为

PUT /website
{
  "mappings": {
    "article":{
      "properties": {
        "author_id":{"type":"long"},
        "title":{"type":"text","analyzer": "english"},
        "content":{"type":"text","analyzer": "standard"},
        "post_date":{"type":"date"},
        "publisher_id":{"type":"text","index": "not_analyzed"}
      }
    }
  }
}

只能新建mapping,和添加字段指定mapping,不能更新原有字段mapping。

object类型创建

{
  "company": {
    "mappings": {
      "employee": {
        "properties": {
          "address": {
            "properties": {
              "city": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "country": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "province": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              }
            }
          },
          "age": {
            "type": "long"
          },
          "join_date": {
            "type": "date"
          },
          "name": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      }
    }
  }
}

本小节主要通过查询例子,然后引出mapping中的字段类型和倒排索引,full text和精确查询,来加深mapping的感性的认识,最后说明mapping如何创建。

十一、Query DSL

GET /website/article/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "content": "this"
          }
        },{
          "match": {
            "author_id": 11400
          }
        }
      ] 
    }
  }
}

GET /website/article/_search
{"_source": "title", 
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "author_id": 11400
          }
        }
      ],
      "should": [
        {
          "match": {
            "content": "this"
          }
        }
      ],
      "must_not": [
        {"match": {
          "_id": 3
        }}
      ]
    }
  },"sort": [
    {
      "post_date": {
        "order": "asc"
      }
    }
  ]
}
GET /test_index/test_type/_search
{
  "query": {
    "term": {
      "field1": "test2"
    }
  }
}

GET /test_index/test_type/_search
{
  "query": {
    "terms": 
      {
        "field": ["test2","test"]
      } 
  }
}

bool
must,must_not,should,filter

每个子查询都会计算一个document针对它的相关度分数,然后bool综合所有分数,合并为一个分数,当然filter是不会计算分数的

 

{
    "bool": {
        "must":     { "match": { "title": "how to make millions" }},
        "must_not": { "match": { "tag":   "spam" }},
        "should": [
            { "match": { "tag": "starred" }}
        ],
        "filter": {
          "bool": { 
              "must": [
                  { "range": { "date": { "gte": "2014-01-01" }}},
                  { "range": { "price": { "lte": 29.99 }}}
              ],
              "must_not": [
                  { "term": { "category": "ebooks" }}
              ]
          }
        }
    }
}

 十二、scroll批量查询和使用场景(reindex)

批量查询指定页数,类似分段查找。

GET /my_index/_search/?scroll=1m
{
"query":{
"match_all": {}
},
"sort":["_doc"],
"size":1
}
GET /_search/scroll
{
"scroll": "1m", 
"scroll_id" : "DnF1ZXJ5VGhlbkZldGNoBQAAAAAAAAr4Fi1qYWV6ZUtxVC1tX1B2U0J5YTNEa1EAAAAAAAAK9BYtamFlemVLcVQtbV9QdlNCeWEzRGtRAAAAAAAACvYWLWphZXplS3FULW1fUHZTQnlhM0RrUQAAAAAAAAr3Fi1qYWV6ZUtxVC1tX1B2U0J5YTNEa1EAAAAAAAAK9RYtamFlemVLcVQtbV9QdlNCeWEzRGtR"
}

如果想全部index重新建立新索引。可以根据上面的查询,然后批量插入到一个新的索引中,最终通过批量操作,把原来索引的别名指向新索引即可。

 

PUT my_index_new/my_type/1
{
"type":"2017-01-02"
}
PUT my_index_new/my_type/2
{
"type":"2017-01-03"
}


POST /_bulk
{
{"index":{ "_index": "my_index_new", "_type": "my_type", "_id": "2" }}
{"title":"2017-01-02"}
}

 


POST /_aliases
{
"actions": [
{ "remove": { "index": "my_index", "alias": "good_index" }},
{ "add": { "index": "my_index_new", "alias": "good_index" }}
]
}

 

es流程

 

 上图中,当机器故障时,有os cache中的数据丢失,可以冲translog回放回到os cache中。disk保存的是上一个commit点的数据。目前的数据=disk+osche数据。

posted @ 2020-05-30 21:26  javabeginer  阅读(238)  评论(0编辑  收藏  举报