【ElasticSearch】聚合Aggregation
【ElasticSearch】聚合Aggregation
关于text类型默认是禁止聚合/排序操作的,通过 fielddata=true 开启
PUT cms_search_inside_0d1a60ff-654d-4c1d-9d92-795ff0f9/_mapping { "properties": { "mc_0_pubOrg": { "type": "text", "fielddata": true } } }
1、分组聚合,无子聚合
GET myindex/_search
{
"from": 0,
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"siteId": [
"1298113079338340354"
],
"boost": 1
}
},
{
"range": {
"resourcePublicationDate": {
"from": "2023-01-01 00:00:00",
"to": "2023-12-31 00:00:00",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"aggregations": {
"groupByFieldId": {
"terms": {
"field": "catalogId",
"size": 100,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "asc"
},
{
"_key": "asc"
}
]
}
}
}
}
结果
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 17,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"groupByFieldId" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1461259099665141761",
"doc_count" : 2
},
{
"key" : "1460860333350993921",
"doc_count" : 3
},
{
"key" : "1460860104652374017",
"doc_count" : 12
}
]
}
}
}
Java实现
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.from(0);
searchSourceBuilder.size(0);
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.must(QueryBuilders.termsQuery(ElasticsearchConstants.ES_QUERY_SITE_ID, siteIdList));
RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery(ElasticsearchConstants.ES_QUERY_RESOURCE_PUBLICATION_DATE);
rangeQueryBuilder.gte(startTime);
rangeQueryBuilder.lte(endTime);
boolQueryBuilder.must(rangeQueryBuilder);
boolQueryBuilder.must(QueryBuilders.termQuery(ElasticsearchConstants.ES_QUERY_STATUS, 3));
boolQueryBuilder.must(QueryBuilders.termQuery(ElasticsearchConstants.ES_QUERY_ENABLE, 1));
boolQueryBuilder.must(QueryBuilders.termQuery(ElasticsearchConstants.ES_QUERY_TEMPLATE_STATUS, 30));
BucketOrder order;
if ("asc".equals(sort)) {
order = BucketOrder.count(true);
} else {
order = BucketOrder.count(false);
}
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("groupByFieldId").field("catalogId").size(number).order(order);
searchSourceBuilder.aggregation(termsAggregationBuilder);
searchSourceBuilder.query(boolQueryBuilder);
String[] indices = new String[]{"myindex"};
SearchRequest request = Requests.searchRequest(indices).source(searchSourceBuilder);
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
if (response.status() == RestStatus.OK) {
// 方式一
ParsedStringTerms groupByFieldId = (ParsedStringTerms) response.getAggregations().asMap().get("groupByFieldId");
List buckets = groupByFieldId.getBuckets();
if (CollectionUtil.isNotEmpty(buckets)) {
for (Object bucket : buckets) {
ParsedStringTerms.ParsedBucket groupParsedBucket = (ParsedStringTerms.ParsedBucket) bucket;
String catalogId = groupParsedBucket.getKeyAsString();
Long count = groupParsedBucket.getDocCount();
StatisticResultVo vo = new StatisticResultVo();
vo.setId(catalogId);
vo.setNumber(count);
list.add(vo);
}
}
// 方式二
Map<String, Aggregation> aggregationMap = response.getAggregations().asMap();
Aggregation aggregation = aggregationMap.get("groupByFieldId");
if (aggregation instanceof ParsedStringTerms) {
ParsedStringTerms parsedStringTerms = (ParsedStringTerms) aggregation;
List<? extends Terms.Bucket> goupBucketList = parsedStringTerms.getBuckets();
if (CollectionUtil.isNotEmpty(goupBucketList)) {
for (Terms.Bucket goupBucket : goupBucketList) {
if (goupBucket instanceof ParsedStringTerms.ParsedBucket) {
ParsedStringTerms.ParsedBucket groupParsedBucket = (ParsedStringTerms.ParsedBucket) goupBucket;
String catalogId = groupParsedBucket.getKeyAsString();
Long count = groupParsedBucket.getDocCount();
StatisticResultVo vo = new StatisticResultVo();
vo.setId(catalogId);
vo.setNumber(count);
list.add(vo);
}
}
}
}
}
2、分组聚合,有子聚合
GET myindex/_search
{
"from": 0,
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"siteId": [
"1298113079338340354"
],
"boost": 1
}
},
{
"range": {
"accessTime": {
"from": "2023-01-01 00:00:00",
"to": "2023-12-31 00:00:00",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"aggregations": {
"groupByFieldId": {
"terms": {
"field": "catId",
"size": 100,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "asc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"uvCount": {
"cardinality": {
"field": "accessIp"
}
}
}
}
}
}
结果
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"groupByFieldId" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1460860104652374017",
"doc_count" : 3,
"uvCount" : {
"value" : 1
}
},
{
"key" : "1460860333350993921",
"doc_count" : 3,
"uvCount" : {
"value" : 1
}
}
]
}
}
}
java实现
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.from(0);
searchSourceBuilder.size(0);
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.must(QueryBuilders.termsQuery("siteId", siteIdList));
RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("accessTime");
rangeQueryBuilder.gte(startTime);
rangeQueryBuilder.lte(endTime);
boolQueryBuilder.must(rangeQueryBuilder);
BucketOrder order;
if ("asc".equals(sort)) {
order = BucketOrder.count(true);
} else {
order = BucketOrder.count(false);
}
searchSourceBuilder.aggregation(AggregationBuilders.terms("groupByFieldId").field("catId").size(number).order(order).subAggregation(AggregationBuilders.cardinality("uvCount").field("accessIp")));
searchSourceBuilder.query(boolQueryBuilder);
String[] indices = new String[]{"myindex"};
log.info("DSL:" + searchSourceBuilder.toString());
SearchRequest request = Requests.searchRequest(indices).source(searchSourceBuilder);
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
if (response.status() == RestStatus.OK) {
ParsedStringTerms groupByFieldId = (ParsedStringTerms) response.getAggregations().asMap().get("groupByFieldId");
List buckets = groupByFieldId.getBuckets();
if (CollectionUtil.isNotEmpty(buckets)) {
for (Object bucket : buckets) {
ParsedStringTerms.ParsedBucket parsedBucket = (ParsedStringTerms.ParsedBucket) bucket;
ParsedCardinality cardinalityValue = (ParsedCardinality) parsedBucket.getAggregations().asMap().get("uvCount");
int pvCount = new Long(parsedBucket.getDocCount()).intValue();
int uvCount = new Long(cardinalityValue.getValue()).intValue();
String id = parsedBucket.getKeyAsString();
StatisticResultVo vo = new StatisticResultVo();
vo.setId(id);
vo.setNumber(pvCount);
vo.setNumber2(uvCount);
list.add(vo);
}
}
}
3、多重分组聚合
GET myindex/_search
{
"from": 0,
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"siteId": {
"value": "1298113079338340354",
"boost": 1
}
}
},
{
"range": {
"accessTime": {
"from": "2023-01-01 00:00:00",
"to": "2023-12-31 23:59:59",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"aggregations": {
"groupByTime": {
"terms": {
"field": "timeym",
"size": 9999999,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": {
"_key": "asc"
}
},
"aggregations": {
"groupByIp": {
"terms": {
"field": "accessIp",
"size": 9999999,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"uvCount": {
"cardinality": {
"field": "accessIp"
}
}
}
}
}
}
}
}
结果
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.13/security-minimal-setup.html to enable security.
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"groupByTime" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "202309",
"doc_count" : 1,
"groupByIp" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "0:0:0:0:0:0:0:1",
"doc_count" : 1,
"uvCount" : {
"value" : 1
}
}
]
}
},
{
"key" : "202310",
"doc_count" : 7,
"groupByIp" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "0:0:0:0:0:0:0:1",
"doc_count" : 4,
"uvCount" : {
"value" : 1
}
},
{
"key" : "192.168.100.21",
"doc_count" : 2,
"uvCount" : {
"value" : 1
}
},
{
"key" : "10.25.62.4",
"doc_count" : 1,
"uvCount" : {
"value" : 1
}
}
]
}
}
]
}
}
}
java实现
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.from(0);
searchSourceBuilder.size(0);
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
// 站点
if (StrUtil.isNotEmpty(statisticsVo.getSiteId())) {
boolQueryBuilder.must(QueryBuilders.termQuery("siteId", statisticsVo.getSiteId()));
}
// 时间
RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("accessTime");
rangeQueryBuilder.gte(statisticsVo.getStartTime());
rangeQueryBuilder.lte(statisticsVo.getEndTime());
boolQueryBuilder.must(rangeQueryBuilder);
searchSourceBuilder.query(boolQueryBuilder);
// 聚合
String field;
if (statisticsVo.getDateType().equals(3)) {
field = "timeym";
} else if (statisticsVo.getDateType().equals(2)) {
field = "timeymd";
} else {
field = "timeymdh";
}
TermsAggregationBuilder groupByIpAggregation = AggregationBuilders.terms("groupByIp").field("accessIp").size(9999999).order(BucketOrder.count(false))
.subAggregation(AggregationBuilders.cardinality("uvCount").field("accessIp"));
searchSourceBuilder.aggregation(AggregationBuilders.terms("groupByTime").field(field).size(9999999).order(BucketOrder.key(true)).subAggregation(groupByIpAggregation));
log.info("大屏访问量 DSL:" + searchSourceBuilder.toString());
SearchRequest request = Requests.searchRequest(tableName).source(searchSourceBuilder);
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
if (response.status() == RestStatus.OK) {
ParsedStringTerms groupByTime = (ParsedStringTerms) response.getAggregations().asMap().get("groupByTime");
List timeList = groupByTime.getBuckets();
if (CollectionUtil.isNotEmpty(timeList)) {
for (Object timeObject : timeList) {
ParsedStringTerms.ParsedBucket timeBucket = (ParsedStringTerms.ParsedBucket) timeObject;
String timeKey = timeBucket.getKeyAsString();
int timeTotal = new Long(timeBucket.getDocCount()).intValue();
TemplateAccessVo item = new TemplateAccessVo();
if (statisticsVo.getDateType().equals(3)) {
item.setCountTime(timeKey.substring(0, 4) + "-" + timeKey.substring(4, 6));
} else if (statisticsVo.getDateType().equals(2)) {
item.setCountTime(timeKey.substring(0, 4) + "-" + timeKey.substring(4, 6) + "-" + timeKey.substring(6, 8));
} else {
item.setCountTime(timeKey.substring(8, 10) + ":00");
}
item.setPvCount(timeTotal);
ParsedStringTerms groupByIp = (ParsedStringTerms) timeBucket.getAggregations().asMap().get("groupByIp");
List ipList = groupByIp.getBuckets();
if (CollectionUtil.isNotEmpty(ipList)) {
List<TemplateAccessVo> ipStatistics = new ArrayList<>();
item.setIpStatistics(ipStatistics);
for (Object ipObject : ipList) {
ParsedStringTerms.ParsedBucket ipBucket = (ParsedStringTerms.ParsedBucket) ipObject;
String ipKey = ipBucket.getKeyAsString();
int ipTotal = new Long(ipBucket.getDocCount()).intValue();
ParsedCardinality cardinalityValue = (ParsedCardinality) ipBucket.getAggregations().asMap().get("uvCount");
int uvCount = new Long(cardinalityValue.getValue()).intValue();
TemplateAccessVo sub = new TemplateAccessVo();
sub.setCountIP(ipKey);
sub.setPvCount(ipTotal);
sub.setUvCount(uvCount);
ipStatistics.add(sub);
}
}
list.add(item);
}
}
}
4、日期聚合
按天聚合
"aggregations": {
"dateHistogram": {
"date_histogram": {
"field": "myDate",
"format": "yyyy-MM-dd",
"interval": "day",
"offset": 0,
"order": {
"_key": "asc"
},
"keyed": false,
"min_doc_count": 0
}
}
}
按月聚合
"aggregations": {
"dateHistogram": {
"date_histogram": {
"field": "myDate",
"format": "yyyy-MM",
"interval": "month",
"offset": 0,
"order": {
"_key": "asc"
},
"keyed": false,
"min_doc_count": 0
}
}
}
按钮聚合
"aggregations": {
"dateHistogram": {
"date_histogram": {
"field": "myDate",
"format": "yyyy",
"interval": "year",
"offset": 0,
"order": {
"_key": "asc"
},
"keyed": false,
"min_doc_count": 0
}
}
}
int type = 1;
DateHistogramInterval interval;
String format;
String start;
String end;
// 1按天、2按月、3按年
if (type.equals(1)) {
interval = DateHistogramInterval.DAY;
format = "yyyy-MM-dd";
start = "2023-10-01";
end = "2023-10-31";
} else if (type.equals(2)) {
interval = DateHistogramInterval.MONTH;
format = "yyyy-MM";
start = "2022-09";
end = "2023-10";
} else {
interval = DateHistogramInterval.YEAR;
format = "yyyy";
start = "2022";
end = "2023";
}
// 聚合
DateHistogramAggregationBuilder aggregationBuilder = AggregationBuilders.dateHistogram("myDate")
.dateHistogramInterval(interval)
.minDocCount(0)
.field("myDate")
.format(format)
.order(BucketOrder.key(true))
.extendedBounds(new LongBounds(start, end));
GET cms_search_inside_0d1a60ff-654d-4c1d-9d92-795ff0f9/_search { "query": { "bool": { "must": [ { "term": { "siteId": { "value": "1298113079338340354", "boost": 1 } } }, { "wildcard": { "searchType": { "wildcard": "*wenjian*", "boost": 1 } } }, { "multi_match": { "query": "西安市未央区", "fields": [ "resourceSummary^1.0", "title^1.0" ], "type": "best_fields", "operator": "OR", "analyzer": "ik_smart", "slop": 0, "prefix_length": 0, "max_expansions": 50, "minimum_should_match": "75%", "zero_terms_query": "NONE", "auto_generate_synonyms_phrase_query": true, "fuzzy_transpositions": true, "boost": 1 } } ], "must_not": [ { "terms": { "cmsCatalogId": [ "1531451844967424002", "1460863730129059841", "1531451955084681217", "1484063351423234049" ], "boost": 1 } } ], "adjust_pure_negative": true, "boost": 1 } }, "aggregations": { "group": { "terms": { "field": "mc_0_pubOrg", "size": 2000, "min_doc_count": 1, "shard_min_doc_count": 0, "show_term_doc_count_error": false, "order": [ { "_count": "desc" }, { "_key": "asc" } ] } } }, "highlight": { "fields": { "title": { "fragment_size": 800000, "number_of_fragments": 0 }, "resourceSummary": { "fragment_size": 800000, "number_of_fragments": 0 } } }, "collapse": { "field": "templateId", "inner_hits": { "name": "collapse", "ignore_unmapped": true, "from": 0, "size": 0, "version": false, "seq_no_primary_term": false, "explain": false, "track_scores": true, "sort": [ { "_score": { "order": "desc" } } ] } } }
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· PowerShell开发游戏 · 打蜜蜂
· 在鹅厂做java开发是什么体验
· 百万级群聊的设计实践
· WPF到Web的无缝过渡:英雄联盟客户端的OpenSilver迁移实战
· 永远不要相信用户的输入:从 SQL 注入攻防看输入验证的重要性
2022-01-06 【Rust】String