08 ES基本的聚合查询

按protocol聚合

{
  "_source": "protocol", 
  "size": 1,
  "aggs": {
    "agg_protocols": {
      "terms": {
        "field": "protocol.raw",
        "size": 1000
      }
    }
  }
}

指定地区,按port聚合

{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "bool": {
                "filter": {
                  "term": {
                    "geoip.country_code2.raw": "ID"
                  }
                }
              }
            }
          ]
        }
      }
    }
  },
  "size": 0,
  "aggs": {
    "agg_port": {
      "terms": {
        "field": "port",
        "size": 9999
      }
    }
  }
}

指定地区和时间段,按ip聚合(独立ip 即ip去重)

{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "bool": {
                "filter": {
                  "term": {
                    "geoip3.country_code2": "ID"
                  }
                }
              }
            },
            {
              "range": {
                "lastupdatetime": {
                  "gte": "2020-01-01 00:00:00",
                  "lt": "2021-01-01 00:00:00"
                }
              }
            }
          ]
        }
      }
    }
  },
  "_source": ["ip", "port", "protocol"],
  "size": 0,
  "aggs": {
    "distinct_ips": {
      "cardinality": {
        "field": "ip",
        "precision_threshold": 40000
      }
    }
  }
}

并且

port=80 并且country_code2=VN , 结果按独立ip聚合

例一:

{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "bool": {
                "filter": {
                  "term": {
                    "port": "80"
                  }
                }
              }
            },
            {
              "bool": {
                "filter": {
                  "term": {
                    "geoip2.country_code2": "VN"
                  }
                }
              }
            }
          ]
        }
      }
    }
  },
  "_source": [],
  "size": 1,
  "aggs": {
    "distinct_ips": {
      "cardinality": {
        "field": "ip",
        "precision_threshold": 40000
      }
    }
  }
}

例二:
(ip="106.75.96.205" && port=8082) && (product="其他基础软件" ||product="NGINX" )

{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "ip.ip_raw": "106.75.96.205"
          }
        },
        {
          "term": {
            "port.port_raw": "8082"
          }
        },
        {
          "bool": {
            "should": [
              {
                "term": {
                  "product.raw": "其他基础软件"
                }
              },
              {
                "term": {
                  "product.raw": "NGINX"
                }
              }
            ]
          }
        }
      ],
      "must_not": [],
      "should": []
    }
  },
  "from": 0,
  "size": 10,
  "sort": [],
  "aggs": {}
}

例三:
(ip="106.75.96.205" && port=8082) && (product="其他基础软件" ||product="NGINX" )

{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "ip.ip_raw": "106.75.96.205"
          }
        },
        {
          "term": {
            "port.port_raw": "8082"
          }
        },
        {
          "terms": {
            "product.raw": [
              "其他基础软件",
              "NGINX"
            ]
          }
        }
      ],
      "must_not": [],
      "should": []
    }
  },
  "from": 0,
  "size": 10,
  "sort": [],
  "aggs": {}
}

product是个数组:

例四:
port=80 && (geoip3.country_code2="ID" || geoip3.country_code2="VN") , 结果按独立ip聚合
cardinality : 独立IP数

{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "bool": {
                "filter": {
                  "term": {
                    "port": "80"
                  }
                }
              }
            },
            {
              "bool": {
                "should": [
                  {
                    "match": {
                      "geoip3.country_code2": "ID"
                    }
                  },
                  {
                    "match": {
                      "geoip3.country_code2": "VN"
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    }
  },
  "_source": [
    "ip",
    "port",
    "geoip3"
  ],
  "size": 2,
  "aggs": {
    "distinct_ips": {
      "cardinality": {
        "field": "ip",
        "precision_threshold": 40000
      }
    }
  }
}

聚合再求独立ip数

{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "bool": {
                "should": [
                  {
                    "term": {
                      "asn.as_organization": "Sen"
                    }
                  },
                  {
                    "term": {
                      "asn.as_organization": "Tt"
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    }
  },
  "_source": [
    "ip"
  ],
  "size": 0,
  "aggs": {
    "agg_country_name": {
      "terms": {
        "field": "geoip.country_name.raw",
        "size": 10
      },
      "aggs": {
        "agg_country_name_ip": {
          "cardinality": {
            "field": "ip"
          }
        }
      }
    }
  }
}

效果:

cardinality 即去重计算,类似sql中 count(distinct),先去重再求和,计算指定field值的种类数。

例子:
https://blog.csdn.net/u010503427/article/details/81592468

参考:
https://www.jianshu.com/p/1b430a637971

聚合后将聚合结果进行分页的解决办法

原数据:

{
    "took":0,
    "timed_out":false,
    "_shards":{
        "total":6,
        "successful":6,
        "skipped":0,
        "failed":0
    },
    "hits":{
        "total":{
            "value":25,
            "relation":"eq"
        },
        "max_score":1,
        "hits":[
            {
                "_index":"certdata-domaindetection",
                "_type":"_doc",
                "_id":"d4eea6ea5ee801301f11b1bf68770a2e",
                "_score":1,
                "_source":{
                    "beginAt":"2022-10-18 21:58:12",
                    "data":{
                        "ipv4":[
                            "66.102.251.24"
                        ],
                        "spentTime":76
                    },
                    "endAt":"2022-10-18 21:59:55",
                    "errorMsg":"",
                    "nodeID":"domainDetection_1",
                    "parentTaskID":"1019",
                    "storageAt":"2022-10-18 21:59:55",
                    "success":false,
                    "targetHost":"sina.com",
                    "taskID":"1879969334"
                }
            }
        ]
    }
}

query

POST certdata-domaindetection/_search
{
    "query":{
        "bool":{
            "must":[
                {
                    "term":{
                        "data.ipv4":{
                            "value":"47.103.24.173"
                        }
                    }
                }
            ]
        }
    },
    "size":1,
    "aggs":{
        "agg_targetHost":{
            "terms":{
                "field":"targetHost",
                "size":999999
            },
            "aggs":{
                "myBucketSort":{
                    "bucket_sort":{
                        "from":0,
                        "size":2,
                        "gap_policy":"skip"
                    }
                }
            }
        },
        "termsCount":{
            "cardinality":{
                "field":"targetHost",
                "precision_threshold":30000
            }
        }
    }
}

结果:

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 6,
    "successful" : 6,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 2.140244,
    "hits" : [
      {
        "_index" : "certdata-domaindetection",
        "_type" : "_doc",
        "_id" : "a1db3d40a4088d61c7526cc272fa1b82",
        "_score" : 2.140244,
        "_source" : {
          "beginAt" : "2022-10-19 18:06:47",
          "data" : {
            "ipv4" : [
              "139.159.241.37",
              "8.134.50.24",
              "47.103.24.173",
              "119.3.70.188",
              "120.92.78.97"
            ],
            "spentTime" : 41
          },
          "endAt" : "2022-10-19 18:08:22",
          "nodeID" : "domainDetection_1",
          "parentTaskID" : "1031",
          "storageAt" : "2022-10-19 18:08:23",
          "targetHost" : "bilibili.com",
          "taskID" : "1879969356"
        }
      }
    ]
  },
  "aggregations" : {
    "agg_targetHost" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "bilibili.com",
          "doc_count" : 2
        }
      ]
    },
    "termsCount" : {
      "value" : 1
    }
  }
}

go代码:


type AggStatKeyStrMap map[string]int64

// blockChainStatWithParamInput ...  入参
type blockChainStatWithParamInput struct {
	model.PublicChainType
	Status   model.BlockchainStatus
	Country  []string
	Province string
	City     string
	Page     int
	Size     int
}


/*
    按搜索条件聚合指定字段
	isStatusSearch 是否按状态搜索
	aggField 聚合的字段
*/

func (d *blockChainService) GetBlockChainStatWithParam(ctx context.Context, in blockChainStatWithParamInput, isStatusSearch bool, aggField string) (aggStatMap model.AggStatKeyStrMap, count int64, err error) {
	ctx, span := gtrace.NewSpan(ctx, "GetBlockChainStatWithParam")
	defer span.End()
	span.SetAttributes(
		attribute.String("GetBlockChainStatWithParam.AggField", aggField),
	)

	query := elasticCore.NewBoolQuery()

	if in.Type > 0 {
		query.Must(elasticCore.NewTermQuery("type", in.Type))
	}
	if isStatusSearch {
		query.Must(elasticCore.NewTermQuery("status", in.Status))
	}
	if len(in.Country) > 0 {
		tmpInter := make([]interface{}, 0)
		for _, v := range in.Country {
			tmpInter = append(tmpInter, v)
		}
		query.Must(elasticCore.NewTermsQuery("country", tmpInter...))
	}
	if len(in.Province) > 0 {
		query.Must(elasticCore.NewTermQuery("province", in.Province))
	}
	if len(in.City) > 0 {
		query.Must(elasticCore.NewTermQuery("city", in.City))
	}
	sourceQ, _ := query.Source()
	fofacore.PrintQuery(sourceQ)

	//该单位备案的其他域名
	pageIndex := 1
	if in.Page != 0 {
		pageIndex = in.Page
	}
	pageSize := 10
	if in.Size != 0 {
		pageSize = in.Size
	}
	ctgAgg := elasticCore.NewTermsAggregation().Field(aggField)
	source, _ := ctgAgg.Source()
	marshalSource, _ := json.Marshal(source)
	glog.Debugf(ctx, "GetBlockChainAddrStat source:%v \n", string(marshalSource))

	span.SetAttributes(
		attribute.String("ast.EsIndexName", string(ast.EsBlockchainNodedetect)),
	)

	// 获取去重的字段数量(通用)
	cardinalityAgg := elasticCore.NewCardinalityAggregation().
		Field(aggField).
		PrecisionThreshold(3000)
	//Rehash(true)

	//打印query语句
	sourceCardinalityAgg, _ := cardinalityAgg.Source()
	fofacore.PrintQuery(sourceCardinalityAgg)

	res, err := EsClient(string(ast.EsBlockchainNodedetect)).
		Query(query).
		From((pageIndex-1)*pageSize).
		Size(pageSize).
		Aggregation("ctg", ctgAgg).Pretty(true).
		Aggregation("cardinalityCount", cardinalityAgg).
		Do(context.Background())
	if err != nil {
		glog.Errorf(ctx, "[GetBlockChainAddrStat] es error: %s", err)
		return
	}

	// 去重后的总数
	term, _ := res.Aggregations.Cardinality("cardinalityCount")
	for _, bucket := range term.Aggregations {
		b, _ := bucket.MarshalJSON()
		t, _ := strconv.Atoi(string(b))
		count = int64(t)
	}
	if count == 0 {
		return
	}

	ctg, found := res.Aggregations.Terms("ctg")
	if !found {
		glog.Errorf(ctx, "[GetBlockChainAddrStat] es can not get res")
		return
	}
	if len(ctg.Buckets) == 0 {
		return
	}
	var aggStatKeyStrMap = make(model.AggStatKeyStrMap, 0)
	for _, item := range ctg.Buckets {
		aggStatKeyStrMap[item.Key.(string)] = item.DocCount
	}

	return aggStatKeyStrMap, count, nil
}

参考文档:
https://wenku.baidu.com/view/da561c28954bcf84b9d528ea81c758f5f61f291d.html?wkts=1667355955869&bdQuery=elasticsearch+bucket_sort

子聚合

例子:

POST test/_search
{
  "size": 0,
  "aggs": {
    "agg_country_protocol_type": {
      "terms": {
        "field": "geoip.country_name.raw",
        "size": 999999,
        "order": {
          "_key": "asc"
        }
      },
      "aggs": {
        "protocol_agg": {
          "terms": {
            "field": "protocol.raw",
            "size": 999999,
            "order": {
              "_key": "asc"
            }
          }
        }
      }
    },
    "agg_compony_type": {
      "terms": {
        "field": "company.keyword",
        "size": 999999,
        "order": {
          "_key": "asc"
        }
      },
      "aggs": {
        "agg_compony_protocol": {
          "terms": {
            "field": "protocol.raw",
            "size": 999999,
            "order": {
              "_key": "asc"
            }
          }
        }
      }
    }
  }
}

posted @ 2021-07-27 00:31  HaimaBlog  阅读(579)  评论(0编辑  收藏  举报