druid:timeseries,topn,groupby查询
查询案例汇总
- druid支持三种数据查询类型。timeseries,topn,groupby
- timeseries是无维度,topn支持单维度,groupby支持多维度。
- post http://120.xx.22.xx:18082/druid/v2
基本差别
"queryType": "timeseries"
区分
"queryType": "topN",
"threshold": 100000,
"dimension": "app_ver",
"metric": "access_duration"
区分
"queryType": "groupBy",
"threshold": 100000,
"dimensions": ["app_ver", "channel"],
"metric": "access_duration"
可以很明显的看出:
topN拥有dimension和metric
groupBy拥有dimensions和metric
当然threshold也是必须的
timeseries
{
"aggregations": [
{
"fieldName": "unique_user",
"type": "thetaSketch",
"name": "new_user",
"isInputThetaSketch": false,
"size": 16384
}
],
"filter": {
"fields": [
{
"type": "selector",
"dimension": "event",
"value": "register"
},
{
"type": "selector",
"dimension": "register_days",
"value": 0
},
{
"type": "selector",
"dimension": "appkey",
"value": "apprenrenwang"
},
{
"type": "selector",
"dimension": "platform",
"value": "ios"
}
],
"type": "and"
},
"intervals": "2019-10-22T16:00:00.000Z/2019-10-23T16:00:00.000Z",
"dataSource": "apprenrenwang-app_statistic",
"granularity": "all",
"postAggregations": [],
"queryType": "timeseries"
}
topN
{
"aggregations": [
{
"fieldName": "count",
"type": "longSum", //算法类型,求和
"name": "pv"
},
{
"fieldName": "unique_user",
"type": "thetaSketch",
"name": "uv",
"isInputThetaSketch": false,
"size": 16384
},
{
"fieldName": "unique_ip", //字段名
"type": "thetaSketch", //算法结构, 去重后数量
"name": "ip", //输出名,相当于count(distinct unique_ip)
"isInputThetaSketch": false, //
"size": 16384 //默认大小
},
{
"fieldName": "unique_short_cookie",
"type": "thetaSketch",
"name": "access_times", //访问次数
"isInputThetaSketch": false,
"size": 16384
}
],
"filter": {
"fields": [
{
"type": "selector", //event=startup事件
"dimension": "event",
"value": "startup"
},
{
"type": "selector",
"dimension": "appkey",
"value": "weba9vgweb"
},
{
"type": "selector",
"dimension": "platform",
"value": "wap"
}
],
"type": "and"
},
"intervals": "2019-07-09T16:00:00.000Z/2019-07-17T16:00:00.000Z",
"dataSource": "weba9vgweb-web_statistic", // 数据源, 类似于表
"granularity": "all", //时间划分粒度month year day
"threshold": 100000, //门槛,阀门topn, 上限
"postAggregations": [], //对aggregations二次聚合
"queryType": "topN", //查询类型,即timeseries,topN,groupBy等;
"dimension": "os", //维度, 查询哪些维度作为聚合条件
"metric": "pv" //公制单位,用来做计算或者统计的列
}
简化格式后的topN
topN带metric
{
"aggregations": [], //是关于已经定义好字典的aggregations的列表
"filter": {
"fields": [], //是关于已经定义好filter的字典的列表
"type": "and"
},
"intervals": "2019-07-09T16:00:00.000Z/2019-07-17T16:00:00.000Z",
"dataSource": "weba9vgweb-web_statistic",
"granularity": "all", //划分粒度month year day
"threshold": 100000, //门槛,阀门
"postAggregations": [], // 二次聚合
"queryType": "topN",
"dimension": "os", //维度
"metric": "pv" //公制单位
}
groupBy
{
"aggregations": [
{
"fieldName": "dmn1_sum",
"type": "longSum",
"name": "access_duration"
}
],
"filter": {
"fields": [
{
"type": "selector",
"dimension": "event",
"value": "startup"
},
{
"type": "selector",
"dimension": "appkey",
"value": "apprenrenwang"
}
],
"type": "and"
},
"intervals": "2019-10-22T16:00:00.000Z/2019-10-23T16:00:00.000Z",
"dataSource": "apprenrenwang-app_statistic",
"granularity": "all",
"queryType": "groupBy",
"threshold": 100000,
"dimensions": ["app_ver", "channel"],
"metric": "access_duration"
}
返回结果对比
timeseries:
[{
"timestamp": "2019-10-22T16:00:00.000Z",
"result": {
"new_user": 0.0
}
}]
topN
[
{
"timestamp": "2019-11-11T16:00:00.000Z",
"result": [
{
"access_duration": 2005260,
"app_ver": "1.0.1"
},
{
"access_duration": 89423,
"app_ver": "1.0.3"
},
{
"access_duration": 4552,
"app_ver": "1.0"
}
]
}
]
groupBy:
[{
"version": "v1",
"timestamp": "2019-10-22T16:00:00.000Z",
"event": {
"channel": "null",
"access_duration": 140042,
"app_ver": "1.0.0"
}
},
{
"version": "v1",
"timestamp": "2019-10-22T16:00:00.000Z",
"event": {
"channel": "qijian",
"access_duration": 96193,
"app_ver": "1.0.0"
}
}
]
结果分析和处理
- 首先返回的都是列表, 要将需要信息组织成列表
- timeseries列表里只有一个字典, 取ret[0].get("result").get("xxx")
- topN列表里只有一个字典, 取ret[0].get("result")然后for循环遍历
- groupBy列表有很多字典, for循环遍历然后item.get("event").get("xxx")
if self._query_type == "timeseries":
result = resp_data
elif self._query_type == "topN":
result = resp_data[0].get("result")
else:
result = [item.get("event") for item in resp_data]
只有处理后基本就是[{}, {}, {}], 就可取数据了