elasticsearch group by sum avg max min

我博客中所有ES相关的资料对应的ES版本为7.2.4

一:对单个字段进行分组求和

1、表结构图片:

根据任务id分组,分别统计出每个任务id下有多少个文字标题


1.SQL:select id, count(*) as sum from task group by taskid;  
java ES连接工具类

public class ESClientConnectionUtil {
public static TransportClient client=null;
public final static String HOST = "192.168.200.211"; //服务器部署
public final static Integer PORT = 9301; //端口

public static TransportClient getESClient(){
System.setProperty("es.set.netty.runtime.available.processors", "false");
if (client == null) {
synchronized (ESClientConnectionUtil.class) {
try {
//设置集群名称
Settings settings = Settings.builder().put("cluster.name", "es5").put("client.transport.sniff", true).build();
//创建client
client = new PreBuiltTransportClient(settings).addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(HOST), PORT));
} catch (Exception ex) {
ex.printStackTrace();

System.out.println(ex.getMessage());
}
}
}
return client;
}
public static TransportClient getESClientConnection(){
if (client == null) {
System.setProperty("es.set.netty.runtime.available.processors", "false");
try {
//设置集群名称
Settings settings = Settings.builder().put("cluster.name", "es5").put("client.transport.sniff", true).build();
//创建client
client = new PreBuiltTransportClient(settings).addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(HOST), PORT));
} catch (Exception ex) {
ex.printStackTrace();
System.out.println(ex.getMessage());
}
}
return client;
}

//判断索引是否存在
public static boolean judgeIndex(String index){
client= getESClientConnection();
IndicesAdminClient adminClient;
//查询索引是否存在
adminClient= client.admin().indices();
IndicesExistsRequest request = new IndicesExistsRequest(index);
IndicesExistsResponse responses = adminClient.exists(request).actionGet();

if (responses.isExists()) {
return true;
}
return false;
}
}
java ES语句(根据单列进行分组求和)

//根据 任务id分组进行求和
SearchRequestBuilder sbuilder = client.prepareSearch("hottopic").setTypes("hot");
//根据taskid进行分组统计,统计出的列别名叫sum
TermsAggregationBuilder termsBuilder = AggregationBuilders.terms("sum").field("taskid");
sbuilder.addAggregation(termsBuilder);
SearchResponse responses= sbuilder.execute().actionGet();
//得到这个分组的数据集合
Terms terms = responses.getAggregations().get("sum");
List<BsKnowledgeInfoDTO> lists = new ArrayList<>();
for(int i=0;i<terms.getBuckets().size();i++){
//statistics
String id =terms.getBuckets().get(i).getKey().toString();//id
Long sum =terms.getBuckets().get(i).getDocCount();//数量
System.out.println("=="+terms.getBuckets().get(i).getDocCount()+"------"+terms.getBuckets().get(i).getKey());
}
//分别打印出统计的数量和id值
根据多列进行分组求和

//根据 任务id分组进行求和
SearchRequestBuilder sbuilder = client.prepareSearch("hottopic").setTypes("hot");
//根据taskid进行分组统计,统计出的列别名叫sum
TermsAggregationBuilder termsBuilder = AggregationBuilders.terms("sum").field("taskid");
//根据第二个字段进行分组
TermsAggregationBuilder aAggregationBuilder2 = AggregationBuilders.terms("region_count").field("birthplace");
//如果存在第三个,以此类推;
sbuilder.addAggregation(termsBuilder.subAggregation(aAggregationBuilder2));
SearchResponse responses= sbuilder.execute().actionGet();
//得到这个分组的数据集合
Terms terms = responses.getAggregations().get("sum");
List<BsKnowledgeInfoDTO> lists = new ArrayList<>();
for(int i=0;i<terms.getBuckets().size();i++){
//statistics
String id =terms.getBuckets().get(i).getKey().toString();//id
Long sum =terms.getBuckets().get(i).getDocCount();//数量
System.out.println("=="+terms.getBuckets().get(i).getDocCount()+"------"+terms.getBuckets().get(i).getKey());
}
//分别打印出统计的数量和id值



对多个field求max/min/sum/avg

SearchRequestBuilder requestBuilder = client.prepareSearch("hottopic").setTypes("hot");
//根据taskid进行分组统计,统计别名为sum
TermsAggregationBuilder aggregationBuilder1 = AggregationBuilders.terms("sum").field("taskid")
//根据tasktatileid进行升序排列
.order(Order.aggregation("tasktatileid", true));
// 求tasktitleid 进行求平均数 别名为avg_title
AggregationBuilder aggregationBuilder2 = AggregationBuilders.avg("avg_title").field("tasktitleid");
//
AggregationBuilder aggregationBuilder3 = AggregationBuilders.sum("sum_taskid").field("taskid");
requestBuilder.addAggregation(aggregationBuilder1.subAggregation(aggregationBuilder2).subAggregation(aggregationBuilder3));
SearchResponse response = requestBuilder.execute().actionGet();

Terms aggregation = response.getAggregations().get("sum");
Avg terms2 = null;
Sum term3 = null;
for (Terms.Bucket bucket : aggregation.getBuckets()) {
terms2 = bucket.getAggregations().get("avg_title"); // org.elasticsearch.search.aggregations.metrics.avg.InternalAvg
term3 = bucket.getAggregations().get("sum_taskid"); // org.elasticsearch.search.aggregations.metrics.sum.InternalSum
System.out.println("编号=" + bucket.getKey() + ";平均=" + terms2.getValue() + ";总=" + term3.getValue());
}








(1)统计某个字段的数量
ValueCountBuilder vcb= AggregationBuilders.count("count_uid").field("uid");
(2)去重统计某个字段的数量(有少量误差)
CardinalityBuilder cb= AggregationBuilders.cardinality("distinct_count_uid").field("uid");
(3)聚合过滤
FilterAggregationBuilder fab= AggregationBuilders.filter("uid_filter").filter(QueryBuilders.queryStringQuery("uid:001"));
(4)按某个字段分组
TermsBuilder tb= AggregationBuilders.terms("group_name").field("name");
(5)求和
SumBuilder sumBuilder= AggregationBuilders.sum("sum_price").field("price");
(6)求平均
AvgBuilder ab= AggregationBuilders.avg("avg_price").field("price");
(7)求最大值
MaxBuilder mb= AggregationBuilders.max("max_price").field("price");
(8)求最小值
MinBuilder min= AggregationBuilders.min("min_price").field("price");
(9)按日期间隔分组
DateHistogramBuilder dhb= AggregationBuilders.dateHistogram("dh").field("date");
(10)获取聚合里面的结果
TopHitsBuilder thb= AggregationBuilders.topHits("top_result");
(11)嵌套的聚合
NestedBuilder nb= AggregationBuilders.nested("negsted_path").path("quests");
(12)反转嵌套
AggregationBuilders.reverseNested("res_negsted").path("kps ");

 先贴出Es的代码

{

// 这里是对hits的数据进行限制只返回一条数据,因为我不要这里的数据,所以避免数据过多就直接返回最小
    "from":0,
    "size":1,
    "query":{
        "match_all":{
            "boost":1
        }
    },
    "aggregations":{
        "agg":{
            "terms":{
                "field":"pid",

   //这里代表聚合查询出多少条数据,注意这里的size要比最下面分页的size要大,因为是对聚合后的数据分页,如果不写的话默认是10
                "size":10
            },
            "aggregations":{
                "top":{
                    "top_hits":{
                        "from":0,

// 这里的size表示重复的返回几条,这里我们返回1
                        "size":1,
                        "version":false,
                        "seq_no_primary_term":false,
                        "explain":false,
                        "_source":{
                            "includes":[

// 这个是需要返回的字段
                                "pid"
                            ],
                            "excludes":[

                            ]
                        }
                    }
                },
                "bucket_field":{
                    "bucket_sort":{
                        "sort":[

                        ],

// 这里是聚合分页从第几页开始,每页多少条(举个列子第一页(0,10),第二页就是(10,20))
                        "from":0,
                        "size":10
                    }
                }
            }
        }
    }
}

Java 代码

//构造查询器

SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

//查询条件查询所有
QueryBuilder queryBuilders = QueryBuilders.matchAllQuery();

// 需要返回字段的集合
String[] param= {"pid"};

// 对需要返回的数据包括哪些,不包括哪些,重复的只返回1条
TopHitsAggregationBuilder top1 = AggregationBuilders.topHits("top").fetchSource(param, Strings.EMPTY_ARRAY).size(1);

// 通过pid聚合并且聚合后返回10条数据,注意这里的size(这里代表聚合查询出多少条数据,注意这里的size要比最下面分页的size要大,因为是对聚合后的数据分页,如果不写的话默认是10)
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("agg").field("pid").subAggregation(top1).size(10);

// 聚合分页
termsAggregationBuilder.subAggregation(new BucketSortPipelineAggregationBuilder("bucket_field",null).from(0).size(10));

// 这里的.from(0).size(1) 表示最外层hits返回的数据

searchSourceBuilder.query(queryBuilders).aggregation(termsAggregationBuilder).from(0).size(1);

//解析返回的数据
SearchResponse response = getSearchResponse(searchSourceBuilder);
Terms agg = response.getAggregations().get("agg");
for (Terms.Bucket bucket : agg.getBuckets()) {
    TopHits top = bucket.getAggregations().get("top");
    for (SearchHit searchHit : top.getHits()) {
        System.out.println(searchHit.getSourceAsMap());
    }
}

// 执行查询并且返回response
private SearchResponse getSearchResponse(SearchSourceBuilder searchSourceBuilder) {
// 注入自己的es进行查询
    SearchResponse response = esTemplate.query("distinct_test_es","dgg_doc", searchSourceBuilder);
return response;
}


最后结果为已去重,并且可以分页

参考文章

https://copyfuture.com/blogs-details/20200426182136741urzeidk5edmsnj0

es group by max并显示非group by 的其他字段

https://blog.csdn.net/freedom_zzc/article/details/123114794

聚合排序

https://jingyan.baidu.com/article/72ee561a132395e16138dff4.html

分组 聚合 之后 分页 (es对分组聚合之后的数据 没有10000的限制。)

https://blog.csdn.net/weixin_45239670/article/details/122431901

posted @ 2022-06-15 11:22  方东信  阅读(779)  评论(0编辑  收藏  举报