ElasticSearch对字段去重统计技术,实现SQL中的count(distinct(*)) 效果

 1 public long countDistinctField(String esIndex, String countField, SearchSourceBuilder sourceBuilder) {
 2          long count = 0;
 3          if (StringUtils.isBlank(esIndex) || StringUtils.isBlank(countField)) {
 4              return count;
 5          }
 6  
 7          SearchRequest searchRequest = new SearchRequest();
 8          searchRequest.indices(esIndex);
 9          String identifier = UUID.randomUUID().toString();
10          AggregationBuilder aggregationBuilder = AggregationBuilders.cardinality(identifier).field(countField);
11          sourceBuilder.aggregation(aggregationBuilder);
12          sourceBuilder.size(0);
13          searchRequest.source(sourceBuilder);
14          try {
15              SearchResponse result = client.search(searchRequest, RequestOptions.DEFAULT);
16              Histogram histogram = (Histogram) result.getAggregations().asMap().get(countField);
17              long total_value = 0;
18              for (Histogram.Bucket t : histogram.getBuckets()) {
19                  Cardinality cardinality = t.getAggregations().get(identifier);
20                  long value = cardinality.getValue();
21                  total_value = total_value + value;
22              }
23              return total_value;
24          } catch (Exception e) {
25              log.error("Count field failed!", e);
26          }
27          return 0;
28      }

 

posted @ 2020-06-18 18:47  seufelix  阅读(3124)  评论(0编辑  收藏  举报