【ES】ElasticSearch中"distinct","count"和"group by"的实现
distinct
实现 1:
SELECT DISTINCT(user_id) FROM table WHERE user_id_type = 3;
{
"query": {
"term": {
"user_id_type": 3
}
},
"collapse": {
"field": "user_id"
}
}
注:有时候这个 user_id 不是keyword的话,就需要在它后面加一个keyword,不然不支持的
报错大概是这个样子的:
{
"error": {
"root_cause": [
{
"type": "search_context_exception",
"reason": "unknown type for collapse field `user_id`, only keywords and numbers are accepted"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "event-*",
"node": "98_Cvv7KT8m9jmN8",
"reason": {
"type": "search_context_exception",
"reason": "unknown type for collapse field `user_id`, only keywords and numbers are accepted"
}
}
]
},
"status": 500
}
加上keyword就是这样子:
{
"query": {
"term": {
"user_id_type": 3
}
},
"collapse": {
"field": "user_id.keyword"
}
}
输出:
{
...
"hits": {
"hits": [
{
"_index": "es_qd_mkt_visitor_packet_dev_v1_20180621",
"_type": "ad_crowd",
"_source": {
"user_id": "wx2af8414b502d4ca2_oHtrD0Vxv-_8c678figJNHmtaVQQ",
"user_id_type": 3
},
"fields": {
"user_id": [
"wx2af8414b502d4ca2_oHtrD0Vxv-_8c678figJNHmtaVQQ"
]
}
}
]
}
}
count + distinct
SELECT COUNT(DISTINCT(user_id)) FROM table WHERE user_id_type = 3;
去重后统计总数
{
"query": {
"term": {
"user_id_type": 3
}
},
"aggs": {
"count": {
"cardinality": {
"field": "user_id.keyword"
}
}
}
}
结果:
{
...
"hits": {
...
},
"aggregations": {
"count": {
"value": 7
}
}
}
这里的 hits 是匹配的其中一条数据,是做示例用的。如果只需要总数,这个 hits 感觉没啥用。
真正的结果是在 aggregations.count.value
count + group by
SELECT COUNT(user_id) FROM table GROUP BY user_id_type;
{
"aggs": {
"user_type": {
"terms": {
"field": "user_id_type.keyword"
}
}
}
}
结果:
{
...
"hits": {
...
},
"aggregations": {
"user_type": {
...
"buckets": [
{
"key": 4,
"doc_count": 1220
},
{
"key": 3,
"doc_count": 488
}
]
}
}
}
count + distinct + group by
SELECT COUNT(DISTINCT(user_id)) FROM table GROUP BY user_id_type;
{
"aggs": {
"user_type": {
"terms": {
"field": "user_id_type.keyword"
},
"aggs": {
"count": {
"cardinality": {
"field": "user_id.keyword"
}
}
}
}
}
}
** 总结:对于既有group by又有distinct的查询要求,需要在aggs中嵌套子aggs **
{
...
"hits": {
...
},
"aggregations": {
"user_type": {
...
"buckets": [
{
"key": 4,
"doc_count": 1220, //去重前数据1220条
"count": {
"value": 276 //去重后数据276条
}
},
{
"key": 3,
"doc_count": 488, //去重前数据488条
"count": {
"value": 121 //去重后数据121条
}
}
]
}
}
}