elasticsearch 常见查询及聚合的JAVA API
ES 常见查询
(1)根据ID 进行单个查询
GetResponse response = client.prepareGet("accounts", "person", "1").setOperationThreaded(false).get();
相对于sql 的 select * from accounts.person where id=1 ;
(2)分页查询所有记录
QueryBuilder qb=new MatchAllQueryBuilder(); SearchResponse response= client.prepareSearch("accounts").setTypes("person").setQuery(qb).setFrom(0) .setSize(100).get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); }
response.getHits()是所有命中记录 相较于sql select * from accounts.person limit 100;
(3)根据多条件组合与查询
QueryBuilder qb=QueryBuilders.boolQuery().must(QueryBuilders.termQuery("title","JAVA开发工程师")).must(QueryBuilders.termQuery("age",30)) ; SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(qb).setFrom(0) .setSize(100); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); }
must 就像sql里的and 相较于sql select * from accounts.person where title='JAVA开发工程师' and age=30
(4)多条件或查询
QueryBuilder qb=QueryBuilders.termQuery("user","kimchy14"); QueryBuilder qb1=QueryBuilders.termQuery("user","kimchy15"); SortBuilder sortBuilder=SortBuilders.fieldSort("age"); sortBuilder.order(SortOrder.DESC); QueryBuilder s=QueryBuilders.boolQuery().should(qb).should(qb1);//.must(qb5); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(s).addSort(sortBuilder).setFrom(0) .setSize(100); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); }
should 就像sql里的or SortBuilder 的作用不言而喻就是用来排序 以上代码相较于sql select * from accounts.person where user='kimchy14' or user='kimchy15' ;
(5)范围查询
// RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("age").from(30,true).to(30,true); // RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("age").gt(30 ); RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("age").gte(30 ); QueryBuilder s=QueryBuilders.boolQuery().must(rangeQueryBuilder);//.must(qb5); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(s).setFrom(0) .setSize(100); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); }
范围查询rangeQuery.from(30,true)方法是大于30 后面的参数是是否包含 为true的话就是大于等于30 to就相当于小于 如果也有包含参数为true的话就是小于等于 gt 是大于 gte是大于等于 lt是小于 lte是小于等于 第一句的builder就相当于 select * from accounts.person where age >=30 and age<=30;
(6)包含查询
List<String> strs=new ArrayList<>(); strs.add("kimchy14"); strs.add("kimchy15"); strs.add("kimchy16"); QueryBuilder qb=QueryBuilders.termsQuery("user",strs); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(qb).setFetchSource("age",null).setFrom(0) .setSize(100); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); }
包含查询使用termsQuery 可以传列表 也可以传多个参数 或者数组 setFetchSource有两个参数 第一个参数是包含哪些参数 第二个参数是排除哪些参数 以上这段代码就相当于sql select age from accounts.person where user in ('kimchy14','kimchy15','kimchy16');
(7)专门按id进行的包含查询
QueryBuilder qb=QueryBuilders.idsQuery(0+""); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(qb).setFetchSource("age",null).setFrom(0) .setSize(100); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); }
(8)按通配符查询
QueryBuilder qb = QueryBuilders.wildcardQuery("user", "k*hy17*"); //Fuzziness fuzziness=Fuzziness.fromEdits(2); // QueryBuilder qb = QueryBuilders.fuzzyQuery("user","mchy2").fuzziness(fuzziness); //QueryBuilder qb = QueryBuilders.prefixQuery("user", "kimchy2"); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(qb).setFetchSource("user",null).setFrom(0) .setSize(100); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); }
通配符查询像我们sql里的like 但是还不一样 like的百分号可以加到前后 elasticsearch技术解析与实战中有一句话 是这么说的 为了避免极端缓慢的通配符查询 通配符索引词不应该以一个通配符开头 通配符查询应该避免以通配符开头
常见统计 统计分为指标 和 桶 桶就是我们统计的样本 指标就是我们平时所查的count sum 与sql不一样的是 我们还可以将统计的样本拿到 就是response.getHits
(9)统计count
AggregationBuilder termsBuilder = AggregationBuilders.count("ageCount").field("age"); RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("age").from(30,true).to(30,true); QueryBuilder s=QueryBuilders.boolQuery().must(rangeQueryBuilder);//.must(qb5); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(s).setFrom(0).setSize(100).addAggregation(termsBuilder); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); } ValueCount valueCount= response.getAggregations().get("ageCount"); long value=valueCount.getValue();
这段代码就相当于 sql select count(age) ageCount form accounts.person where age >=30 and age<=30
(10)查询最大值
AggregationBuilder termsBuilder = AggregationBuilders.max("max").field("age"); RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("age").from(30,true).to(30,true); QueryBuilder s=QueryBuilders.boolQuery().must(rangeQueryBuilder);//.must(qb5); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(s).setFrom(0).setSize(100).addAggregation(termsBuilder); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); } Max valueCount= response.getAggregations().get("max"); double value=valueCount.getValue();
(11)统计总和
AggregationBuilder termsBuilder = AggregationBuilders.sum("sum").field("age"); RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("age").from(30,true).to(30,true); QueryBuilder s=QueryBuilders.boolQuery().must(rangeQueryBuilder);//.must(qb5); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(s).setFrom(0).setSize(100).addAggregation(termsBuilder); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); } Sum valueCount= response.getAggregations().get("sum"); double value=valueCount.getValue();
(12)平均数
AggregationBuilder termsBuilder = AggregationBuilders.avg("avg").field("age"); RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("age").from(30,true).to(30,true); QueryBuilder s=QueryBuilders.boolQuery().must(rangeQueryBuilder);//.must(qb5); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(s).setFrom(0).setSize(100).addAggregation(termsBuilder); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); } Avg valueCount= response.getAggregations().get("avg"); double value=valueCount.getValue();
(13)统计样本基本指标
AggregationBuilder termsBuilder = AggregationBuilders.stats("stats").field("age"); RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("age").from(30,true).to(30,true); QueryBuilder s=QueryBuilders.boolQuery().must(rangeQueryBuilder);//.must(qb5); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(s).setFrom(0).setSize(100).addAggregation(termsBuilder); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); SearchHits searchHits = response.getHits(); for(SearchHit hit:searchHits.getHits()){ logger.log(Level.INFO , hit.getSourceAsString()); } Stats valueCount= response.getAggregations().get("stats"); logger.log(Level.INFO,"max"+valueCount.getMaxAsString()); logger.log(Level.INFO,"avg"+valueCount.getAvgAsString()); logger.log(Level.INFO,"sum"+valueCount.getSumAsString()); logger.log(Level.INFO,"min"+valueCount.getMinAsString()); logger.log(Level.INFO,"count"+valueCount.getCount());
分组统计 相当于group by 后拿各组指标进行统计
(14)分组求各组数据
AggregationBuilder termsBuilder = AggregationBuilders.terms("by_age").field("age"); AggregationBuilder sumBuilder=AggregationBuilders.sum("ageSum").field("age"); AggregationBuilder avgBuilder=AggregationBuilders.avg("ageAvg").field("age"); AggregationBuilder countBuilder=AggregationBuilders.count("ageCount").field("age"); termsBuilder.subAggregation(sumBuilder).subAggregation(avgBuilder).subAggregation(countBuilder); //TermsAggregationBuilder all = AggregationBuilders.terms("age").field("age"); //all.subAggregation(termsBuilder); RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("age").from(30,true).to(36,true); QueryBuilder s=QueryBuilders.boolQuery().must(rangeQueryBuilder);//.must(qb5); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(s).setFetchSource(null,"gender").setFrom(0).setSize(100).addAggregation(termsBuilder); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); Aggregations terms= response.getAggregations(); for (Aggregation a:terms){ LongTerms teamSum= (LongTerms)a; for(LongTerms.Bucket bucket:teamSum.getBuckets()){ logger.info(bucket.getKeyAsString()+" "+bucket.getDocCount()+" "+((Sum)bucket.getAggregations().asMap().get("ageSum")).getValue()+" "+((Avg)bucket.getAggregations().asMap().get("ageAvg")).getValue()+" "+((ValueCount)bucket.getAggregations().asMap().get("ageCount")).getValue()); } }
第一行 termsBuilder 就相当于根据年龄对数据进行分组 group by 后面对sumBuilder avgBuilder countBuilder等就是在组内 求和 求平均数 求数量
(15)多分组求各组数据
TermsAggregationBuilder all = AggregationBuilders.terms("by_gender").field("gender"); AggregationBuilder age = AggregationBuilders.terms("by_age").field("age"); AggregationBuilder sumBuilder=AggregationBuilders.sum("ageSum").field("age"); //AggregationBuilder avgBuilder=AggregationBuilders.avg("ageAvg").field("age"); // AggregationBuilder countBuilder=AggregationBuilders.count("ageCount").field("age"); all.subAggregation(age.subAggregation(sumBuilder)); RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("age").from(30,true).to(32,true); QueryBuilder s=QueryBuilders.boolQuery().must(rangeQueryBuilder);//.must(qb5); SearchRequestBuilder sv=client.prepareSearch("accounts").setTypes("person").setQuery(rangeQueryBuilder).addAggregation(all); logger.log(Level.INFO,sv.toString()); SearchResponse response= sv.get(); Aggregations terms= response.getAggregations(); for (Aggregation a:terms){ StringTerms stringTerms= (StringTerms)a; for(StringTerms.Bucket bucket:stringTerms.getBuckets()){ // logger.info(bucket.getKeyAsString()); Aggregation aggs=bucket.getAggregations().getAsMap().get("by_age"); LongTerms terms1= (LongTerms)aggs; for (LongTerms.Bucket bu:terms1.getBuckets()){ logger.info(bucket.getKeyAsString()+" "+bu.getKeyAsString()+" "+bu.getDocCount()+" "+((Sum)bu.getAggregations().asMap().get("ageSum")).getValue()); } } }
每增加一个分组指标就需要多加一个termsBuilder 其他等一切跟普通分组一样 每次拿到
以上就是我总结的基本的查询 聚合 等常见功能 其他等诸如 求各组前多少数据是用topHits 这些基本够我们日常操作了 。
最后我们总结下 精确查询用term 组合查询用bool 范围用range and查询用must or查询用should not查询用must not 常见的接收聚合返回结果的类型 ValueCount AVG SUM MAX MIN 按照英文意义就可以理解 分组聚合查询时候还需要根据实际情况看是返回那种terms