08 ES基本的聚合查询
按protocol聚合
{
"_source": "protocol",
"size": 1,
"aggs": {
"agg_protocols": {
"terms": {
"field": "protocol.raw",
"size": 1000
}
}
}
}
指定地区,按port聚合
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"bool": {
"filter": {
"term": {
"geoip.country_code2.raw": "ID"
}
}
}
}
]
}
}
}
},
"size": 0,
"aggs": {
"agg_port": {
"terms": {
"field": "port",
"size": 9999
}
}
}
}
指定地区和时间段,按ip聚合(独立ip 即ip去重)
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"bool": {
"filter": {
"term": {
"geoip3.country_code2": "ID"
}
}
}
},
{
"range": {
"lastupdatetime": {
"gte": "2020-01-01 00:00:00",
"lt": "2021-01-01 00:00:00"
}
}
}
]
}
}
}
},
"_source": ["ip", "port", "protocol"],
"size": 0,
"aggs": {
"distinct_ips": {
"cardinality": {
"field": "ip",
"precision_threshold": 40000
}
}
}
}
并且
port=80 并且country_code2=VN , 结果按独立ip聚合
例一:
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"bool": {
"filter": {
"term": {
"port": "80"
}
}
}
},
{
"bool": {
"filter": {
"term": {
"geoip2.country_code2": "VN"
}
}
}
}
]
}
}
}
},
"_source": [],
"size": 1,
"aggs": {
"distinct_ips": {
"cardinality": {
"field": "ip",
"precision_threshold": 40000
}
}
}
}
例二:
(ip="106.75.96.205" && port=8082) && (product="其他基础软件" ||product="NGINX" )
{
"query": {
"bool": {
"must": [
{
"term": {
"ip.ip_raw": "106.75.96.205"
}
},
{
"term": {
"port.port_raw": "8082"
}
},
{
"bool": {
"should": [
{
"term": {
"product.raw": "其他基础软件"
}
},
{
"term": {
"product.raw": "NGINX"
}
}
]
}
}
],
"must_not": [],
"should": []
}
},
"from": 0,
"size": 10,
"sort": [],
"aggs": {}
}
例三:
(ip="106.75.96.205" && port=8082) && (product="其他基础软件" ||product="NGINX" )
{
"query": {
"bool": {
"must": [
{
"term": {
"ip.ip_raw": "106.75.96.205"
}
},
{
"term": {
"port.port_raw": "8082"
}
},
{
"terms": {
"product.raw": [
"其他基础软件",
"NGINX"
]
}
}
],
"must_not": [],
"should": []
}
},
"from": 0,
"size": 10,
"sort": [],
"aggs": {}
}
product是个数组:
例四:
port=80 && (geoip3.country_code2="ID" || geoip3.country_code2="VN") , 结果按独立ip聚合
cardinality : 独立IP数
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"bool": {
"filter": {
"term": {
"port": "80"
}
}
}
},
{
"bool": {
"should": [
{
"match": {
"geoip3.country_code2": "ID"
}
},
{
"match": {
"geoip3.country_code2": "VN"
}
}
]
}
}
]
}
}
}
},
"_source": [
"ip",
"port",
"geoip3"
],
"size": 2,
"aggs": {
"distinct_ips": {
"cardinality": {
"field": "ip",
"precision_threshold": 40000
}
}
}
}
聚合再求独立ip数
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"term": {
"asn.as_organization": "Sen"
}
},
{
"term": {
"asn.as_organization": "Tt"
}
}
]
}
}
]
}
}
}
},
"_source": [
"ip"
],
"size": 0,
"aggs": {
"agg_country_name": {
"terms": {
"field": "geoip.country_name.raw",
"size": 10
},
"aggs": {
"agg_country_name_ip": {
"cardinality": {
"field": "ip"
}
}
}
}
}
}
效果:
cardinality 即去重计算,类似sql中 count(distinct),先去重再求和,计算指定field值的种类数。
例子:
https://blog.csdn.net/u010503427/article/details/81592468
参考:
https://www.jianshu.com/p/1b430a637971
聚合后将聚合结果进行分页的解决办法
原数据:
{
"took":0,
"timed_out":false,
"_shards":{
"total":6,
"successful":6,
"skipped":0,
"failed":0
},
"hits":{
"total":{
"value":25,
"relation":"eq"
},
"max_score":1,
"hits":[
{
"_index":"certdata-domaindetection",
"_type":"_doc",
"_id":"d4eea6ea5ee801301f11b1bf68770a2e",
"_score":1,
"_source":{
"beginAt":"2022-10-18 21:58:12",
"data":{
"ipv4":[
"66.102.251.24"
],
"spentTime":76
},
"endAt":"2022-10-18 21:59:55",
"errorMsg":"",
"nodeID":"domainDetection_1",
"parentTaskID":"1019",
"storageAt":"2022-10-18 21:59:55",
"success":false,
"targetHost":"sina.com",
"taskID":"1879969334"
}
}
]
}
}
query
POST certdata-domaindetection/_search
{
"query":{
"bool":{
"must":[
{
"term":{
"data.ipv4":{
"value":"47.103.24.173"
}
}
}
]
}
},
"size":1,
"aggs":{
"agg_targetHost":{
"terms":{
"field":"targetHost",
"size":999999
},
"aggs":{
"myBucketSort":{
"bucket_sort":{
"from":0,
"size":2,
"gap_policy":"skip"
}
}
}
},
"termsCount":{
"cardinality":{
"field":"targetHost",
"precision_threshold":30000
}
}
}
}
结果:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 6,
"successful" : 6,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 2.140244,
"hits" : [
{
"_index" : "certdata-domaindetection",
"_type" : "_doc",
"_id" : "a1db3d40a4088d61c7526cc272fa1b82",
"_score" : 2.140244,
"_source" : {
"beginAt" : "2022-10-19 18:06:47",
"data" : {
"ipv4" : [
"139.159.241.37",
"8.134.50.24",
"47.103.24.173",
"119.3.70.188",
"120.92.78.97"
],
"spentTime" : 41
},
"endAt" : "2022-10-19 18:08:22",
"nodeID" : "domainDetection_1",
"parentTaskID" : "1031",
"storageAt" : "2022-10-19 18:08:23",
"targetHost" : "bilibili.com",
"taskID" : "1879969356"
}
}
]
},
"aggregations" : {
"agg_targetHost" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "bilibili.com",
"doc_count" : 2
}
]
},
"termsCount" : {
"value" : 1
}
}
}
go代码:
type AggStatKeyStrMap map[string]int64
// blockChainStatWithParamInput ... 入参
type blockChainStatWithParamInput struct {
model.PublicChainType
Status model.BlockchainStatus
Country []string
Province string
City string
Page int
Size int
}
/*
按搜索条件聚合指定字段
isStatusSearch 是否按状态搜索
aggField 聚合的字段
*/
func (d *blockChainService) GetBlockChainStatWithParam(ctx context.Context, in blockChainStatWithParamInput, isStatusSearch bool, aggField string) (aggStatMap model.AggStatKeyStrMap, count int64, err error) {
ctx, span := gtrace.NewSpan(ctx, "GetBlockChainStatWithParam")
defer span.End()
span.SetAttributes(
attribute.String("GetBlockChainStatWithParam.AggField", aggField),
)
query := elasticCore.NewBoolQuery()
if in.Type > 0 {
query.Must(elasticCore.NewTermQuery("type", in.Type))
}
if isStatusSearch {
query.Must(elasticCore.NewTermQuery("status", in.Status))
}
if len(in.Country) > 0 {
tmpInter := make([]interface{}, 0)
for _, v := range in.Country {
tmpInter = append(tmpInter, v)
}
query.Must(elasticCore.NewTermsQuery("country", tmpInter...))
}
if len(in.Province) > 0 {
query.Must(elasticCore.NewTermQuery("province", in.Province))
}
if len(in.City) > 0 {
query.Must(elasticCore.NewTermQuery("city", in.City))
}
sourceQ, _ := query.Source()
fofacore.PrintQuery(sourceQ)
//该单位备案的其他域名
pageIndex := 1
if in.Page != 0 {
pageIndex = in.Page
}
pageSize := 10
if in.Size != 0 {
pageSize = in.Size
}
ctgAgg := elasticCore.NewTermsAggregation().Field(aggField)
source, _ := ctgAgg.Source()
marshalSource, _ := json.Marshal(source)
glog.Debugf(ctx, "GetBlockChainAddrStat source:%v \n", string(marshalSource))
span.SetAttributes(
attribute.String("ast.EsIndexName", string(ast.EsBlockchainNodedetect)),
)
// 获取去重的字段数量(通用)
cardinalityAgg := elasticCore.NewCardinalityAggregation().
Field(aggField).
PrecisionThreshold(3000)
//Rehash(true)
//打印query语句
sourceCardinalityAgg, _ := cardinalityAgg.Source()
fofacore.PrintQuery(sourceCardinalityAgg)
res, err := EsClient(string(ast.EsBlockchainNodedetect)).
Query(query).
From((pageIndex-1)*pageSize).
Size(pageSize).
Aggregation("ctg", ctgAgg).Pretty(true).
Aggregation("cardinalityCount", cardinalityAgg).
Do(context.Background())
if err != nil {
glog.Errorf(ctx, "[GetBlockChainAddrStat] es error: %s", err)
return
}
// 去重后的总数
term, _ := res.Aggregations.Cardinality("cardinalityCount")
for _, bucket := range term.Aggregations {
b, _ := bucket.MarshalJSON()
t, _ := strconv.Atoi(string(b))
count = int64(t)
}
if count == 0 {
return
}
ctg, found := res.Aggregations.Terms("ctg")
if !found {
glog.Errorf(ctx, "[GetBlockChainAddrStat] es can not get res")
return
}
if len(ctg.Buckets) == 0 {
return
}
var aggStatKeyStrMap = make(model.AggStatKeyStrMap, 0)
for _, item := range ctg.Buckets {
aggStatKeyStrMap[item.Key.(string)] = item.DocCount
}
return aggStatKeyStrMap, count, nil
}
参考文档:
https://wenku.baidu.com/view/da561c28954bcf84b9d528ea81c758f5f61f291d.html?wkts=1667355955869&bdQuery=elasticsearch+bucket_sort
子聚合
例子:
POST test/_search
{
"size": 0,
"aggs": {
"agg_country_protocol_type": {
"terms": {
"field": "geoip.country_name.raw",
"size": 999999,
"order": {
"_key": "asc"
}
},
"aggs": {
"protocol_agg": {
"terms": {
"field": "protocol.raw",
"size": 999999,
"order": {
"_key": "asc"
}
}
}
}
},
"agg_compony_type": {
"terms": {
"field": "company.keyword",
"size": 999999,
"order": {
"_key": "asc"
}
},
"aggs": {
"agg_compony_protocol": {
"terms": {
"field": "protocol.raw",
"size": 999999,
"order": {
"_key": "asc"
}
}
}
}
}
}
}
[Haima的博客]
http://www.cnblogs.com/haima/