ElasticSearch入门笔记

1 常用API

1 基础概念

  • ES与关系型数据库对应关系
关系型数据库 Elasticsearch 备注
Database Index
Table Type 6.x版本一个Index只有一个Type,7.x删除.
Record document
Scheme mapping
Column Field
  • ES数据结构
    • 核心类型
      • 字符串:string、text、keyword
      • 整型:integer、long、short、byte
      • 浮点型:double、float、half_float、scaled_float
      • 布尔型:boolean
      • 日期:date
      • 范围类型:range(integer、long、float、double、date)
      • 二进制:binary
    • 复合类型
      • 对象:object
      • 嵌套:nested

2 HTTP常用API

(1)大纲

# 搜索
GET /_search

# 新增或替换,例:PUT /ecommerce/product/1
PUT /indexName/typeName/id值

# 新增
PUT /indexName/typeName/id值/_create

# 根据主键查询
GET /indexName/typeName/id

# 更新
POST /indexName/typeName/id/_update

# 删除
DELETE /indexName/typeName/id

(2)获取所有数据

GET /_search?timeout=10ms
{
  "query": {
    "match_all": {}
  }
}

{
  "took": 51, # 时间花费
  "timed_out": false,
  "_shards": { # 分片信息
    "total": 259,
    "successful": 259,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 723164, # 总数
    "max_score": 1, # 分值
    "hits": [ # 默认返回前十条
    	# ...
    ]
  }
}

(3)新增或替换Document

PUT /ecommerce/_doc/1
{
  "name": "gaolujie yagao",
  "desc": "gaoxiao meibai",
  "price": 30,
  "producer": "gaolujie producer",
  "tags": ["meibai","gangzhu"]
}

{
  "_index": "ecommerce",
  "_type": "_doc",
  "_id": "1",
  "_version": 3, # document的版本号,每次替换、更新、删除都会增长,用来保证文档的变更能够正确顺序执行,避免乱序造成数据丢失
  "result": "created", # 新增为created,更新为updated
  "_shards": {
    "total": 2,
    "successful": 2,
    "failed": 0
  },
  "_seq_no": 2, # 严格递增的顺序号,每个文档一个,Shard级别严格递增,保证后写入的doc的_seq_no大于先写入的doc的_seq_no;
  "_primary_term": 1 # 每当primary shard发生重新分配,如重启、primary选举等,该值会递增;
}

(4) 根据主键查询

GET /ecommerce/_doc/1

{
  "_index": "ecommerce",
  "_type": "product",
  "_id": "1",
  "_version": 5, # 版本号
  "found": true,
  "_source": { # 原始数据
    "name": "gaolujie yagao3",
    "desc": "gaoxiao meibai",
    "price": 30,
    "producer": "gaolujie producer",
    "tags": [
      "meibai",
      "gangzhu"
    ]
  }
}

# 查询不存在的Document
{
  "_index": "ecommerce",
  "_type": "product",
  "_id": "2",
  "found": false
}

(5)更新Document

# 当前版本为4才允许更新(乐观锁),6.7后的版本为if_seq_no和if_primary_term
POST /ecommerce/_doc/1/_update?version=4
{
  "doc":{
    "name":"gaolujie yagao2"
  }
}

# 正常执行成功
{
  "_index": "ecommerce",
  "_type": "product",
  "_id": "1",
  "_version": 5,
  "result": "updated",
  "_shards": {
    "total": 2,
    "successful": 2,
    "failed": 0
  },
  "_seq_no": 4,
  "_primary_term": 1
}

# 再次执行相同的更新,实际并未更新
{
  "_index": "ecommerce",
  "_type": "product",
  "_id": "1",
  "_version": 5,
  "result": "noop",
  "_shards": {
    "total": 0,
    "successful": 0,
    "failed": 0
  }
}

# 更新不存在的数据
{
  "error": {
    "root_cause": [
      {
        "type": "document_missing_exception",
        "reason": "[product][1]: document missing",
        "index_uuid": "xuKznBF0QDiMzkoPOulFhg",
        "shard": "2",
        "index": "ecommerce"
      }
    ],
    "type": "document_missing_exception",
    "reason": "[product][1]: document missing",
    "index_uuid": "xuKznBF0QDiMzkoPOulFhg",
    "shard": "2",
    "index": "ecommerce"
  },
  "status": 404
}

(6) 删除Document

DELETE /ecommerce/product/1

{
  "_index": "ecommerce",
  "_type": "product",
  "_id": "1",
  "_version": 6,
  "result": "deleted",
  "_shards": {
    "total": 2,
    "successful": 2,
    "failed": 0
  },
  "_seq_no": 5,
  "_primary_term": 1
}

# 再次执行删除
{
  "_index": "ecommerce",
  "_type": "product",
  "_id": "1",
  "_version": 7, # 执行
  "result": "not_found",
  "_shards": {
    "total": 2,
    "successful": 2,
    "failed": 0
  },
  "_seq_no": 6,
  "_primary_term": 1
}

(7)注意点

  • document是不可变的,每次操作都存一个新的版本,老的版本先标记为deleted(逻辑删除),再新增一个新的版本,es会自动在适当的时机在后台自动删除(物理删除)标记为deleted的document;
  • partial update相比全量替换的优点:查询、修改、写回操作都发生在es的一个shard内部,避免类网络数据传输的开销,减少并发冲突的情况;

3 Java常用API

(1)准备

  • pom依赖
<!-- maven版本与待连接的ES版本要一致 -->
<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>elasticsearch-rest-high-level-client</artifactId>
    <version>6.4.3</version>
</dependency>
<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>elasticsearch-rest-client</artifactId>
    <version>6.4.3</version>
</dependency>
<dependency>
    <groupId>org.elasticsearch</groupId>
    <artifactId>elasticsearch</artifactId>
    <version>6.4.3</version>
</dependency>
  • RestHighLevelClient:Java操作ES的类
@Slf4j
public class EsTest {

    // 格式为http://ip1:port1,http://ip2:port2
    private static String hostName = "";
    private static String userName = "";
    private static String password = "";
    
	  private static RestHighLevelClient client = elasticClient();
    
    private static RestHighLevelClient elasticClient() {
        String[] hostArr = StringUtils.split(hostName, ",");
        HttpHost[] httpHosts = Arrays.stream(hostArr).map(HttpHost::create).toArray(HttpHost[]::new);
        RestClientBuilder restClientBuilder = RestClient.builder(httpHosts);
        CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(userName, password));
        restClientBuilder.setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider));
        return new RestHighLevelClient(restClientBuilder);
    }
  
}

(2)大纲

// 搜索
SearchResponse response = client.search(new SearchRequest());

// 新增替换索引
IndexRequest indexRequest = new IndexRequest(index, type, id);
indexRequest.source(new ObjectMapper().writeValueAsBytes("某个对象"), XContentType.JSON);
IndexResponse response = client.index(indexRequest);

// 根据主键查询
GetResponse res = client.get(new GetRequest(index, type, id));

// 更新
UpdateRequest request = new UpdateRequest(index, type, id));
request.doc("key:value数据对");
UpdateResponse response = client.update(request);

// 删除
DeleteResponse res = client.delete(new DeleteRequest(index, type, id));

(3)获取所有数据

@Test
public void search() throws IOException {
    SearchRequest request = new SearchRequest();
    SearchResponse response = client.search(request);
    SearchHits hits = response.getHits();
    log.info("hits:{}", hits);
}

(4)新增或替换Document

@Test
public void put() throws IOException {
    IndexRequest indexRequest = new IndexRequest("ecommerce", "product", "1");

    Ecommerce ecommerce = new Ecommerce();
    ecommerce.setName("gaolujie yagao");
    ecommerce.setDesc("gaoxiao meibai");
    ecommerce.setPrice(30D);
    ecommerce.setProducer("gaolujie producer");
    List<String> tags = new ArrayList<>();
    tags.add("meibai");
    tags.add("fangzhu");
    ecommerce.setTags(tags);

    indexRequest.source(new ObjectMapper().writeValueAsBytes(ecommerce), XContentType.JSON);
    
    // 当opType为create代表只允许新增,不允许替换更新;
    // indexRequest.create(true);
    
    IndexResponse response = client.index(indexRequest);
    if (CREATED.equals(response.getResult())) {
        log.info("创建 response:{}", response);
    }else if (UPDATED.equals(response.getResult())){
        log.info("替换 response:{}", response);
    }
}

(5)根据主键查询

@Test
public void get() throws IOException {
    GetRequest request = new GetRequest("ecommerce", "product", "1");
    GetResponse response = client.get(request);
    // exists就是found,没理解为啥改字段名
    if (response.isExists()){
        Map<String, Object> source = response.getSource();
        log.info("source:{}", source);
    }
}

(6)更新Document

@Test
public void update() throws IOException {
    UpdateRequest request = new UpdateRequest("ecommerce", "product", "1").version(2);

    Map<String,Object> doc = new HashMap<>();
    doc.put("name","gaolujie yagao2");

    request.doc(doc);

    UpdateResponse response = client.update(request);
    if (UPDATED.equals(response.getResult())) {
        log.info("response:{}", response);
    }
}

(7)删除Document

@Test
public void delete() throws IOException {
    DeleteRequest request = new DeleteRequest("ecommerce", "product", "1");
    DeleteResponse response = client.delete(request);
    if (DELETED.equals(response.getResult())) {
        log.info("response:{}", response);
    }
}

2 常用检索API

1 基础检索概念

  • query string search:参数拼接在url,不带json参数;
  • query DSL:请求使用json格式来构建查询语法;

2 HTTP常用检索API

(1)大纲

# 搜索请求地址
GET /index/type/_search

# 查询所有
{
    "query":{ "match_all":{} }
}

# 分页
{
    "from":0, # 第一页
  	"size":1, # 每一页的大小
}

# 排序
{
    "sort":[
    	{fieldName: {"order": "asc"}} # 排序规则
  	],
}

# _source返回字段限定
{
 	"_source": [fieldName1,fieldName2,fieldName3]
}

# 全文检索,中间有空格将拆分为多个单词匹配
{
	"query":{ 
        "match":{
        	"name":"gaolujie yagao"
        } 
    }
}

# 短语检索,作为整个单词匹配,hits默认按照score排序
{
	"query":{ 
        "match_phrase":{
        	"name":"gaolujie yagao"
        } 
    }
}

# 多条件查询
{
  "query":{
    "bool":{
      "must":[
      ],
      "should":[
      ],
      "must_not":[
      ]
    }
  }
}

# 数组包含匹配
{
	"query":{ 
        "terms": {
            "tags": [
              "meibai",
              "qingxin"
            ]
        }
    }
}

(2)全量搜索、分页、排序、_source字段限定返回

GET /ecommerce/product/_search
{
  
  "query":{"match_all": {}}, # 全量搜索
  "from":0, # 第一页
  "size":1, # 每一页的大小
  "sort":[
    {"price": {"order": "asc"}} # 排序规则
  ],
  "_source": ["name","price","tags"] # source只返回限定字段
}

(3)全文检索、短语检索、条件过滤

GET /ecommerce/product/_search
{
  "query":{
    "bool":{
      "must":{
        "match":{ # 全文检索匹配
          "name":"gaolujie yagao" 
        }
      },
      "filter": { # filter下可以用bool进行嵌套多个过滤条件
        "range":{
          "price":{"gt": 25} # 价格大于25
        }
      }
    }
  }
}

# 上面的filter为简写,下面为完整写法
"filter": { 
    "bool": {
        "must":{
            "range":{
           	 	"price":{"gt": 25} 
            }
        }
    }
}

{
  "query":{
    "bool":{
      "must":{
        "match_phrase":{ # 短语检索匹配
          "name":"gaolujie yagao" 
        }
      }
    }
  }
}

{
  "query":{
    "bool":{
      "must":{
        "match_phrase_prefix":{ # 短语检索前缀匹配
          "name":"ya" 
        }
      }
    }
  }
}

(4)多条件、数组包含

  • must:必需满足;
  • should:可以满足、也可以不满足;
  • must_not:不能满足;
GET /ecommerce/product/_search
{
  "query":{
    "bool":{
      "must":[
        {
          "match":{
            "name":"yagao"
          }
        },
        {
          "terms": { # 数组包含meibai或qingxin
            "tags": [
              "meibai",
              "qingxin"
            ]
          }
        }
      ],
      "should": [ # 当minimum_should_match为0时,should只会影响score。
        {
          "prefix": { # name字段的前缀为gaolu
            "name": {
              "value": "gaolu"
            }
          }
        }
      ],
      "must_not": [
        {
          "match": {
            "price": 25
          }
        }
      ]
    }
  }
}

3 Java常用检索API

(1)全量搜索、分页、排序、_source字段限定返回

@Test
public void test() throws IOException {
    SearchRequest req = new SearchRequest("ecommerce").types("product");

    SearchSourceBuilder source = new SearchSourceBuilder();
    source.query(QueryBuilders.matchAllQuery());
    source.from(0).size(1);
    source.sort("price", SortOrder.ASC);
    source.fetchSource(new String[]{"name", "price", "tags"}, null);
    req.source(source);
    
    SearchResponse res = client.search(req, RequestOptions.DEFAULT);
    log.info("hits:{}", res.getHits());
}

(2)全文检索、短语检索、条件过滤

@Test
public void match() throws IOException {
    SearchRequest req = new SearchRequest("ecommerce").types("product");

    SearchSourceBuilder source = new SearchSourceBuilder();
    BoolQueryBuilder bool = QueryBuilders.boolQuery();

    MatchQueryBuilder match = QueryBuilders.matchQuery("name", "gaolujie yagao");
    // MatchPhraseQueryBuilder matchPhrase = QueryBuilders.matchPhraseQuery("name", "gaolujie yagao");
    // MatchPhrasePrefixQueryBuilder matchPhrasePrefix = QueryBuilders.matchPhrasePrefixQuery("name", "ya");
    bool.must(match);
    // .must(matchPhrase)
    // .must(matchPhrasePrefix);

    BoolQueryBuilder filterBool = QueryBuilders.boolQuery();
    RangeQueryBuilder range = QueryBuilders.rangeQuery("price").gt(25);
    filterBool.must(range);

    bool.filter(filterBool);

    source.query(bool);
    req.source(source);
    SearchResponse res = client.search(req, RequestOptions.DEFAULT);
    log.info("hist:{}",res.getHits());
}

(3)多条件、数组包含

@Test
public void multi() throws IOException {
    SearchRequest req = new SearchRequest("ecommerce").types("product");

    SearchSourceBuilder source = new SearchSourceBuilder();
    BoolQueryBuilder bool = QueryBuilders.boolQuery();

    MatchQueryBuilder match1 = QueryBuilders.matchQuery("name", "yagao");
    TermsQueryBuilder terms = QueryBuilders.termsQuery("tags", "meibai", "qingxin");
    bool.must(match1).must(terms);

    PrefixQueryBuilder prefix = QueryBuilders.prefixQuery("name", "gaolu");
    bool.should(prefix);

    MatchQueryBuilder match2 = QueryBuilders.matchQuery("price", 25);
    bool.mustNot(match2);

    source.query(bool);
    req.source(source);
    SearchResponse res = client.search(req, RequestOptions.DEFAULT);
    log.info("hist:{}",res.getHits());
}

3 常用批量操作API

1 概述

  • 批量操作能够大大减少网络的请求次数,缩减网络开销。

2 常用批量操作HTTP API

(1)大纲

# 批量查询
GET /_mget
{
  "docs":[
    {
      "_index":index1,
      "_type":type1,
      "_id":id1
    },
    {
      "_index":index2,
      "_type":type2,
      "_id":id2
    }
  ]
}

# 限定index和type
GET /index/type/_mget
{
  "ids":[id1,id2]
}

{
  "docs":[
  	{
      "_id":id1
  	},
  	{
      "_id":id2
  	}
  ]
}

# 批量新增
POST /_bulk
{"create":{"_index":index1,"_type":type1,"_id":id1}}#create代表创建时存在即报错;
{} # 数据1
{"index":{"_index":index2,"_type":type2,"_id":id2}}#index代表新增或替换;id为空则自动分配
{} # 数据2

# 批量更新
POST /_bulk
{"update":{"_index":index1,"_type":type1,"_id":id1}}
{"doc":{field1:value1}} # partial update

# 批量删除
POST /_bulk
{"delete":{"_index":index1,"_type":type1,"_id":id1}}
{"delete":{"_index":index2,"_type":type2,"_id":id2}}

(2)批量操作

GET /_mget
{
  "docs":[
    {
      "_index":"ecommerce",
      "_type":"product",
      "_id":1
    },
    {
      "_index":"ecommerce",
      "_type":"product",
      "_id":2
    }
  ]
}

POST /_bulk
{"create":{"_index":"ecommerce","_type":"product","_id":4}} # 新增
{"name": "gaolujie yagao"}
{"index":{"_index":"ecommerce","_type":"product","_id":4}} # 新增或替换
{"name": "gaolujie yagao1"}
{"update":{"_index":"ecommerce","_type":"product","_id":4}} # 更新
{"doc":{"name":"test"}}
{"delete":{"_index":"ecommerce","_type":"product","_id":4}} # 最后删除掉

# 返回结果:每个操作都会返回对应的结果
{
  "took": 18,
  "errors": false,
  "items": [
    {
      "create": {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "4",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 2,
          "failed": 0
        },
        "_seq_no": 25,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "4",
        "_version": 2,
        "result": "updated",
        "_shards": {
          "total": 2,
          "successful": 2,
          "failed": 0
        },
        "_seq_no": 26,
        "_primary_term": 1,
        "status": 200
      }
    },
    {
      "update": {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "4",
        "_version": 3,
        "result": "updated", # 假设更新失败为NOOP
        "_shards": {
          "total": 2,
          "successful": 2,
          "failed": 0
        },
        "_seq_no": 27,
        "_primary_term": 1,
        "status": 200
      }
    },
    {
      "delete": {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "4",
        "_version": 4,
        "result": "deleted", # 假设删除失败为NOT_FOUND
        "_shards": {
          "total": 2,
          "successful": 2,
          "failed": 0
        },
        "_seq_no": 28,
        "_primary_term": 1,
        "status": 200
      }
    }
  ]
}

# 下面例子为新建失败的情况,HTTP请求码味409。
{
  "took": 4,
  "errors": true,
  "items": [
    {
      "create": {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "4",
        "status": 409,
        "error": {
          "type": "version_conflict_engine_exception",
          "reason": "[product][4]: version conflict, document already exists (current version [1])",
          "index_uuid": "U69yO3BCRwSs3Or9qjrpVA",
          "shard": "0",
          "index": "ecommerce"
        }
      }
    }
  ]
}

3 常用批量操作JAVA API

// mget:RestHighLevelClient从6.3版本开始支持。
@Test
public void mget() throws IOException {
    MultiGetRequest multiGetRequest = new MultiGetRequest();
    MultiGetRequest.Item item1 = new MultiGetRequest.Item("ecommerce", "product", "1");
    MultiGetRequest.Item item2 = new MultiGetRequest.Item("ecommerce", "product", "22");
    multiGetRequest.add(item1).add(item2);

    MultiGetResponse mgetRes = client.mget(multiGetRequest, RequestOptions.DEFAULT);
    List<GetResponse> hits = Arrays.stream(mgetRes.getResponses())
        .map(MultiGetItemResponse::getResponse)
        .filter(GetResponse::isExists)
        .collect(Collectors.toList());
    log.info("hits:{}", hits);
}

// bulk
@Test
public void bulk() throws IOException {
    BulkRequest bulkRequest = new BulkRequest();
    ObjectMapper objectMapper = new ObjectMapper();

    Ecommerce ecommerce1 = new Ecommerce();
    ecommerce1.setName("gaolujie yagao");
    ecommerce1.setDesc("gaoxiao meibai");
    ecommerce1.setPrice(30D);
    ecommerce1.setProducer("gaolujie producer");
    List<String> tags = new ArrayList<>();
    tags.add("meibai");
    tags.add("fangzhu");
    ecommerce1.setTags(tags);
    IndexRequest create = new IndexRequest("ecommerce", "product", "4").create(true)
        .source(objectMapper.writeValueAsBytes(ecommerce1), XContentType.JSON);
    bulkRequest.add(create);

    Ecommerce ecommerce2 = new Ecommerce();
    ecommerce2.setName("gaolujie yagao");
    ecommerce2.setDesc("gaoxiao meibai");
    ecommerce2.setPrice(30D);
    ecommerce2.setProducer("gaolujie producer");
    List<String> tags2 = new ArrayList<>();
    tags2.add("meibai");
    tags2.add("fangzhu");
    ecommerce2.setTags(tags2);
    IndexRequest index = new IndexRequest("ecommerce", "product", "4")
        .source(objectMapper.writeValueAsBytes(ecommerce2), XContentType.JSON);
    bulkRequest.add(index);

    Ecommerce ecommerce3 = new Ecommerce();
    ecommerce3.setName("gaolujie yagao");
    UpdateRequest update = new UpdateRequest("ecommerce", "product", "4")
        .doc(objectMapper.writeValueAsBytes(ecommerce3), XContentType.JSON);
    bulkRequest.add(update);

    DeleteRequest delete = new DeleteRequest("ecommerce", "product", "4");
    bulkRequest.add(delete);

    DeleteRequest delete2 = new DeleteRequest("ecommerce", "product", "4");
    bulkRequest.add(delete2);

    BulkResponse bulkRes = client.bulk(bulkRequest, RequestOptions.DEFAULT);
    List<Object> results = Arrays.stream(bulkRes.getItems()).collect(Collectors.toList());
    log.info("results:{}", results);
}

4 特殊情况

  • create:新增数据重复,可能抛出主键冲突异常,status为409;
  • update:更新不存在的数据,将抛出文档缺失的异常,status为404;
  • delete:删除不存在的文档,不抛出异常,result为not_found,status为404;
  • 操作数据成功时,status为200;300以上的返回码一般判定为错误;

4 嵌套聚合、下钻分析、聚合分析

1 概述

  • 聚合框架有助于搜索查询提供聚合数据。
  • 有许多不同类型的聚合:
    • buckets:生成存储桶的一组聚合,每个存储桶都与与一个和一个文档条件相关联。汇总结束时每个桶将获得一组文档;
    • metric:追踪和计算一组文档的指标的聚合;
    • matrix:一组聚合;
    • pipeline:聚合其他聚合的输出及其相关度量的聚合;
  • 聚合可以嵌套,存储桶聚合可以具有子聚合(存储桶或指标);

2 常用聚合操作HTTP API

(1)准备

PUT /ecommerce/_mapping/product
{
  "properties": {
    "tags":{
      "type": "text",
      "fielddata": true # 正排索引加载进内存,才能对分词的field执行聚合操作
    }
  }
}

(2)常规聚合

GET ecommerce/product/_search
{
  "size": 0, # hits结果不返回
  "aggs": {
    "all_tags": { # 任意去名称
      "terms": { # 根据字段分组
        "field": "tags", # 字段名
        "order": { # 排序规则,默认为降序
          "_key": "desc"
        }
      }
    }
  }
}

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 6,
    "max_score": 0,
    "hits": [] # 指定size为0,所以不返回数据
  },
  "aggregations": {
    "all_tags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "qingxin",# 每个bucket对应的值
          "doc_count": 1 # bucket内的数据量
        },
        {
          "key": "meibai",
          "doc_count": 5
        },
        {
          "key": "fangzhu",
          "doc_count": 5
        }
      ]
    }
  }
}

# 获取匹配的前几个:top_hits,包含或过滤:_source
GET ecommerce/product/_search
{
  "size": 0,
  "aggs": {
    "all_tags": {
      "terms": {
        "field": "tags"
      },
      "aggs":{
        "top_tags":{
          "top_hits": {
            "_source": {
              "includes": ["name"]
            }, 
            "size": 1
          }
        }
      }
    }
  }
}

# 此处为buckets中第一个值
{
    "key": "fangzhu",
    "doc_count": 5,
    "top_tags": { # top_hits的值
        "hits": {
            "total": 5,
            "max_score": 1,
            "hits": [ # 实际有5条数据,但size为1,所以返回1条数据。
                {
                    "_index": "ecommerce",
                    "_type": "product",
                    "_id": "2",
                    "_score": 1,
                    "_source": {
                    	"name": "gaolujie yagao"
                    }
                }
            ]
        }
    }
}

(3)指标聚合:聚合为一个指标值

  • 常见:平均、加权平均、最大、最小、百分位数、百分位排名、总数、值聚合;
  • 复合:统计(min、max、sum、count、avg);
  • 复杂:脚本式指标聚合、热门聚合;
GET ecommerce/product/_search
{
  "size": 0,
  "aggs": {
    "group_by_tags":{
      "terms": {
        "field": "tags"
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        },
        "max_price": {
          "max": {
            "field": "price"
          }
        },
        "min_price": {
          "min": {
            "field": "price"
          }
        },
        "sum_price":{
          "sum": {
            "field": "price"
          }
        }
      }
    }
  }
}

(4)桶聚合:聚合为一个数据集

  • 子聚合
    • depth_first:直接进行子聚合的计算;
    • breadth_first:计算出当前聚合的结果,针对结果对子聚合进行计算;
  • 范围分组、直方图分组、过滤器;
# 子聚合
GET /ecommerce/product/_search
{
  "size":0,
  "aggs": {
    "all_tags": {
      "terms": {
        "field": "tags",
        "collect_mode": "breadth_first", 
        "order": { # 根据聚合结果(平均价格)排序
          "avg_price": "desc"
        }
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}          

# 只展示aggs部分
"aggregations": {
    "all_tags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "fangzhu",
          "doc_count": 7,
          "avg_price": {
            "value": 55
          }
        },
        {
          "key": "qingxin",
          "doc_count": 1,
          "avg_price": {
            "value": 40
          }
        },
        {
          "key": "meibai",
          "doc_count": 6,
          "avg_price": {
            "value": 26.666666666666668
          }
        }
      ]
    }
}

# 范围聚合
GET ecommerce/product/_search
{
  "size":0,
  "aggs": {
    "group_by_price": {
      "range": {
        "field": "price",
        "ranges": [
           {"from": 0,"to": 50},
           {"from": 50}
        ]
      },
      "aggs": {
        "avg_price": {
          "avg": {"field": "price"}
        }
      }
    }
  }
}

"aggregations": {
    "group_by_price": {
      "buckets": [
        {
          "key": "0.0-50.0",
          "from": 0,
          "to": 50,
          "doc_count": 7,
          "avg_price": {
            "value": 28.571428571428573
          }
        },
        {
          "key": "50.0-*",
          "from": 50,
          "doc_count": 1,
          "avg_price": {
            "value": 225
          }
        }
      ]
   }
}

# 直方图,其实就是固定间隔
GET ecommerce/product/_search
{
  "size":0,
  "aggs": {
    "group_by_price": {
      "histogram": {
        "field": "price",
        "interval": 50
      }, 
      "aggs": {
        "avg_price": {
          "avg": {"field": "price"}
        }
      }
    }
  }
}

# 日期固定间隔
GET sales/sale/_search
{
  "size": 0,
  "aggs": {
    "sales": {
      "date_histogram": {
        "field": "sale_date",
        "interval": "month", # 月度为间隔
        "format": "yyyy-MM-dd",
        "min_doc_count": 0,
        "extended_bounds": {
          "min": "2021-04-01",
          "max": "2021-04-30"
        }
      }
    }
  }
}

3 常用批量操作JAVA API

(1)常规聚合

@Test
public void termsAggregation() throws IOException {
    SearchRequest searchRequest = new SearchRequest("ecommerce").types("product");

    SearchSourceBuilder source = new SearchSourceBuilder();
    source.size(0);
    TermsAggregationBuilder termsAgg = AggregationBuilders.terms("all_tags")
        .field("tags")
        .order(BucketOrder.aggregation("_key", false));

    source.aggregation(termsAgg);
    searchRequest.source(source);
    SearchResponse res = client.search(searchRequest, RequestOptions.DEFAULT);
    log.info("buckets:{}", ((Terms)res.getAggregations().get("all_tags")).getBuckets());
}

(2)指标聚合

@Test
public void test() throws IOException {
    SearchRequest searchRequest = new SearchRequest("ecommerce").types("product");

    SearchSourceBuilder source = new SearchSourceBuilder().size(0);
    TermsAggregationBuilder termsAgg = AggregationBuilders.terms("all_tags")
        .field("tags");

    termsAgg.subAggregation(AggregationBuilders.avg("avg_price").field("price"))
        .subAggregation(AggregationBuilders.max("max_price").field("price"))
        .subAggregation(AggregationBuilders.min("min_price").field("price"))
        .subAggregation(AggregationBuilders.sum("sum_price").field("price"));

    searchRequest.source(source.aggregation(termsAgg));
    SearchResponse res = client.search(searchRequest, RequestOptions.DEFAULT);
    log.info("buckets:{}", ((Terms) res.getAggregations().get("all_tags")).getBuckets());
}

(3)桶聚合

// 子聚合,根据子聚合排序
@Test
public void subAggregation() throws IOException {
    SearchRequest searchRequest = new SearchRequest("ecommerce").types("product");

    SearchSourceBuilder source = new SearchSourceBuilder().size(0);
    TermsAggregationBuilder termsAgg = AggregationBuilders.terms("all_tags")
        .field("tags")
        .collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST)
        .order(BucketOrder.aggregation("avg_price", false));

    termsAgg.subAggregation(AggregationBuilders.avg("avg_price").field("price"));

    searchRequest.source(source.aggregation(termsAgg));
    SearchResponse res = client.search(searchRequest, RequestOptions.DEFAULT);
    log.info("buckets:{}", ((Terms) res.getAggregations().get("all_tags")).getBuckets());
}

// 范围聚合
@Test
public void rangeAggregation() throws IOException {
    SearchRequest searchRequest = new SearchRequest("ecommerce").types("product");

    SearchSourceBuilder source = new SearchSourceBuilder().size(0);
    
    RangeAggregationBuilder rangeAgg = AggregationBuilders.range("group_by_price")
        .field("price")
        .addRange(0.0, 50)
        .addRange(new RangeAggregator.Range("50.0-*", 50D, null));

    rangeAgg.subAggregation(AggregationBuilders.avg("avg_price").field("price"));

    searchRequest.source(source.aggregation(rangeAgg));
    SearchResponse res = client.search(searchRequest, RequestOptions.DEFAULT);
    log.info("buckets:{}", ((Terms) res.getAggregations().get("group_by_price")).getBuckets());
}

// 直方图类似,不写出来了。

// 日期固定间隔
@Test
public void dateRangeAggregation() throws IOException {
    SearchRequest searchRequest = new SearchRequest("sales");

    SearchSourceBuilder source = new SearchSourceBuilder().size(0);

    DateHistogramAggregationBuilder dateHistogramAgg = AggregationBuilders.dateHistogram("date_histogram")
        .field("sale_date")
        .dateHistogramInterval(DateHistogramInterval.MONTH)
        .format("yyyy-MM-dd")
        .minDocCount(0)
        .extendedBounds(new ExtendedBounds("2021-04-01", "2021-04-30"));

    searchRequest.source(source.aggregation(dateHistogramAgg));
    SearchResponse res = client.search(searchRequest, RequestOptions.DEFAULT);
    log.info("buckets:{}", ((Terms) res.getAggregations().get("date_histogram")).getBuckets());
}
posted @ 2022-07-07 19:53  月下小魔王  阅读(136)  评论(0编辑  收藏  举报