查询示例(聚合查询)

bucket:

数据分组,一些数据按照某个字段进行bucket划分,这个字段值相同的数据放到一个bucket中。类似于Mysql中的group by后的查询结果。

metric:

对一个数据分组执行的统计,比如计算最大值,最小值,平均值等

类似于Mysql中的max(),min(),avg()函数的值,都是在group by后使用的。

数据准备

PUT /music
{
      "mappings": {
        "properties": {
          "id": {
            "type": "keyword"
          },
          "author": {
            "type": "text",
            "analyzer": "english",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "name": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "content": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "language": {
            "type": "text",
            "analyzer": "english",
            "fielddata": true
          },
          "tags": {
            "type": "text",
            "analyzer": "english"
          },
          "length": {
            "type": "long"
          },
          "likes": {
            "type": "long"
          },
          "isRelease": {
            "type": "boolean"
          },
          "releaseDate": {
            "type": "date"
          }
        }
      }
  }
View Code
POST /music/_doc/_bulk
{ "index": {}}
{ "author" : "zhangsan", "name" : "red", "content" : "honda", "language":"ch","tags":"tags","length":3,"likes":10,"isRelease":true,"releaseDate" : "2021-10-28" }
{ "index": {}}
{ "author" : "lisi", "name" : "blue", "content" : "honda", "language":"en","tags":"tags","length":3,"likes":100,"isRelease":true,"releaseDate" : "2021-10-28" }
{ "index": {}}
{ "author" : "zhangsan1", "name" : "red", "content" : "honda", "language":"ch","tags":"tags","length":30,"likes":10,"isRelease":true,"releaseDate" : "2021-11-28" }
{ "index": {}}
{ "author" : "lisi1", "name" : "blue", "content" : "honda", "language":"en","tags":"tags","length":30,"likes":100,"isRelease":true,"releaseDate" : "2021-11-28" }
{ "index": {}}
{ "author" : "zhangsan2", "name" : "red", "content" : "honda", "language":"ch","tags":"tags","length":80,"likes":10,"isRelease":true,"releaseDate" : "2021-11-28" }
{ "index": {}}
{ "author" : "lisi2", "name" : "blue", "content" : "honda", "language":"en","tags":"tags","length":80,"likes":100,"isRelease":true,"releaseDate" : "2022-10-28" }
{ "index": {}}
{ "author" : "zhangsan2", "name" : "red", "content" : "honda", "language":"ch","tags":"tags","length":80,"likes":10,"isRelease":true,"releaseDate" : "2022-10-28" }
{ "index": {}}
{ "author" : "lisi2", "name" : "blue", "content" : "honda", "language":"en","tags":"tags","length":80,"likes":100,"isRelease":true,"releaseDate" : "2022-10-28" }
View Code

1、统计目前收录的每种语言的歌曲数量

GET /music/_doc/_search
{
  "size": 0,
  "aggs": {
    "song_qty_by_language": {
      "terms": {
        "field": "language"
      }
    }
  }
} 
//返回结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
  "took" : 25,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "song_qty_by_language" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "ch",
          "doc_count" : 4
        },
        {
          "key" : "en",
          "doc_count" : 4
        }
      ]
    }
  }
}
View Code

size:0 表示只要统计后的结果,原始数据不展现,如果是大于0的,则会返回原数据

aggs:固定语法 ,聚合分析都要声明aggs

song_qty_by_language:聚合的名称,可以随便写,建议规范命名

terms:按什么字段进行分组

field:具体的字段名称 

2、按语种统计每种语种歌曲的平均时长

GET /music/_doc/_search
{
  "size": 0,
  "aggs": {
    "lang": {
      "terms": {
        "field": "language"
      },
      "aggs": {
        "length_avg": {
          "avg": {
            "field": "length"
          }
        }
      }
    }
  }
} 
//返回
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "lang" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "ch",
          "doc_count" : 4,
          "length_avg" : {
            "value" : 48.25
          }
        },
        {
          "key" : "en",
          "doc_count" : 4,
          "length_avg" : {
            "value" : 48.25
          }
        }
      ]
    }
  }
}
View Code

两层aggs聚合查询,先按语种统计,得到数据分组,再在数据分组里算平均时长。

3、统计最长时长、最短时长等的歌曲

GET /music/_doc/_search
{
  "size": 0,
  "aggs": {
    "color": {
      "terms": {
        "field": "language"
      },
      "aggs": {
        "length_avg": {
          "avg": {
            "field": "length"
          }
        },
        "length_max": {
          "max": {
            "field": "length"
          }
        },
        "length_min": {
          "min": {
            "field": "length"
          }
        },
        "length_sum": {
          "sum": {
            "field": "length"
          }
        }
      }
    }
  }
}
//返回
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
  "took" : 59,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "color" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "ch",
          "doc_count" : 4,
          "length_sum" : {
            "value" : 193.0
          },
          "length_max" : {
            "value" : 80.0
          },
          "length_min" : {
            "value" : 3.0
          },
          "length_avg" : {
            "value" : 48.25
          }
        },
        {
          "key" : "en",
          "doc_count" : 4,
          "length_sum" : {
            "value" : 193.0
          },
          "length_max" : {
            "value" : 80.0
          },
          "length_min" : {
            "value" : 3.0
          },
          "length_avg" : {
            "value" : 48.25
          }
        }
      ]
    }
  }
}
View Code

4、按时长分段统计歌曲平均时长

以30秒为一段,看各段区间的平均值。

histogram语法位置跟terms一样,作范围分区,搭配interval参数一起使用

interval:30表示分的区间段为[0,30),[30,60),[60,90),[90,120)

段的闭合关系是左开右闭,如果数据在某段区间内没有,也会返回空的区间。

GET /music/_doc/_search
{
  "size": 0,
  "aggs": {
    "sales_price_range": {
      "histogram": {
        "field": "length",
        "interval": 30
      },
      "aggs": {
        "length_avg": {
          "avg": {
            "field": "length"
          }
        }
      }
    }
  }
}
View Code

5、按上架日期分段统计新歌数量

GET /music/_doc/_search
{
  "size": 0,
  "aggs": {
    "sales": {
      "date_histogram": {
        "field": "releaseDate",
        "interval": "month",
        "format": "yyyy-MM-dd",
        "min_doc_count": 0,
        "extended_bounds": {
          "min": "2000-10-01",
          "max": "2088-12-31"
        }
      }
    }
  }
} 
View Code

6、统计今年每个季度新发布歌曲的点赞数量,以及每个语种的点赞数量

GET /music/_doc/_search
{
  "size": 0,
  "aggs": {
    "sales": {
      "date_histogram": {
        "field": "releaseDate",
        "interval": "quarter",
        "format": "yyyy-MM-dd",
        "min_doc_count": 0,
        "extended_bounds": {
          "min": "2022-01-01",
          "max": "2022-08-31"
        }
      },
      "aggs": {
        "lang_qty": {
          "terms": {
            "field": "language"
          },
          "aggs": {
            "like_sum": {
              "sum": {
                "field": "likes"
              }
            }
          }
        },
        "total" :{
          "sum": {
            "field": "likes"
          }
        }
      }
    }
  }
} 
View Code

7、查询某种语种的歌曲数量

相当于mysql中where与group by联合使用

GET /music/_doc/_search
{
  "size": 0,
  "query": {
    "match": {
      "language": "en"
    }
  },
  "aggs": {
    "sales": {
      "terms": {
        "field": "language"
      }
    }
  }
} 
View Code

8、指定作者的歌与全部歌曲的点赞数量对比

GET /music/_doc/_search
{
  "size": 0,
  "query": {
    "match": {
      "author": "zhangsan"
    }
  },
  "aggs": {
    "likes": {
      "sum": {
        "field": "likes"
      }
    },
    "all": {
      "global": {},
      "aggs": {
        "all_likes": {
          "sum": {
            "field": "likes"
          }
        }
      }
    }
  }
} 
View Code

9、统计近2月,近1月的点赞数

GET /music/_doc/_search
{
  "size": 0,
  "aggs": {
    "recent_60d": {
      "filter": {
        "range": {
          "releaseDate": {
            "gte": "now-60d"
          }
        }
      },
      "aggs": {
        "recent_60d_likes_sum": {
          "sum": {
            "field": "likes"
          }
        }
      }
    },
    "recent_30d": {
      "filter": {
        "range": {
          "releaseDate": {
            "gte": "now-30d"
          }
        }
      },
      "aggs": {
        "recent_30d_likes_sum": {
          "avg": {
            "field": "likes"
          }
        }
      }
    }
  }
} 
View Code

10、统计排序

默认按doc_count降序排序,排序规则可以改,order里面可以指定aggs的别名,如length_avg,类似于mysql的order by cnt asc。

计算每种语言的歌曲数量,和每种语言的歌曲平均时长

GET /music/_doc/_search
{
  "size": 0,
  "aggs": {
    "group_by_lang": {
      "terms": {
        "field": "language",
        "order": {
          "length_avg": "desc"
        }
      },
      "aggs": {
        "length_avg": {
          "avg": {
            "field": "length"
          }
        }
      }
    }
  }
} 
View Code

 

bucket:
2
数据分组,一些数据按照某个字段进行bucket划分,这个字段值相同的数据放到一个bucket中。类似于Mysql中的group by后的查询结果。
3
4
metric:
5
对一个数据分组执行的统计,比如计算最大值,最小值,平均值等
6
类似于Mysql中的max(),min(),avg()函数的值,都是在group by后使用的。
posted @ 2021-06-05 17:36  都市之夜  阅读(177)  评论(0编辑  收藏  举报