ElasticSearch Aggs的一些使用方法
这段代码是关于多层聚合和嵌套域的聚合,来源:https://github.com/elasticsearch/elasticsearch/blob/master/src/test/java/org/elasticsearch/search/aggregations/bucket/NestedTests.java
1 /* 2 * Licensed to Elasticsearch under one or more contributor 3 * license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright 5 * ownership. Elasticsearch licenses this file to you under 6 * the Apache License, Version 2.0 (the "License"); you may 7 * not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.elasticsearch.search.aggregations.bucket; 20 21 import org.elasticsearch.ElasticsearchException; 22 import org.elasticsearch.action.index.IndexRequestBuilder; 23 import org.elasticsearch.action.search.SearchResponse; 24 import org.elasticsearch.common.xcontent.XContentBuilder; 25 import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; 26 import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; 27 import org.elasticsearch.search.aggregations.bucket.nested.Nested; 28 import org.elasticsearch.search.aggregations.bucket.terms.LongTerms; 29 import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; 30 import org.elasticsearch.search.aggregations.bucket.terms.Terms; 31 import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket; 32 import org.elasticsearch.search.aggregations.metrics.max.Max; 33 import org.elasticsearch.search.aggregations.metrics.stats.Stats; 34 import org.elasticsearch.search.aggregations.metrics.sum.Sum; 35 import org.elasticsearch.test.ElasticsearchIntegrationTest; 36 import org.hamcrest.Matchers; 37 import org.junit.Test; 38 39 import java.util.ArrayList; 40 import java.util.List; 41 42 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; 43 import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; 44 import static org.elasticsearch.search.aggregations.AggregationBuilders.*; 45 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; 46 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; 47 import static org.hamcrest.Matchers.equalTo; 48 import static org.hamcrest.Matchers.is; 49 import static org.hamcrest.core.IsNull.notNullValue; 50 51 /** 52 * 53 */ 54 @ElasticsearchIntegrationTest.SuiteScopeTest 55 public class NestedTests extends ElasticsearchIntegrationTest { 56 57 static int numParents; 58 static int[] numChildren; 59 static SubAggCollectionMode aggCollectionMode; 60 61 @Override 62 public void setupSuiteScopeCluster() throws Exception { 63 64 assertAcked(prepareCreate("idx") 65 .addMapping("type", "nested", "type=nested")); 66 67 List<IndexRequestBuilder> builders = new ArrayList<>(); 68 69 numParents = randomIntBetween(3, 10); 70 numChildren = new int[numParents]; 71 aggCollectionMode = randomFrom(SubAggCollectionMode.values()); 72 logger.info("AGG COLLECTION MODE: " + aggCollectionMode); 73 int totalChildren = 0; 74 for (int i = 0; i < numParents; ++i) { 75 if (i == numParents - 1 && totalChildren == 0) { 76 // we need at least one child overall 77 numChildren[i] = randomIntBetween(1, 5); 78 } else { 79 numChildren[i] = randomInt(5); 80 } 81 totalChildren += numChildren[i]; 82 } 83 assertTrue(totalChildren > 0); 84 85 for (int i = 0; i < numParents; i++) { 86 XContentBuilder source = jsonBuilder() 87 .startObject() 88 .field("value", i + 1) 89 .startArray("nested"); 90 for (int j = 0; j < numChildren[i]; ++j) { 91 source = source.startObject().field("value", i + 1 + j).endObject(); 92 } 93 source = source.endArray().endObject(); 94 builders.add(client().prepareIndex("idx", "type", ""+i+1).setSource(source)); 95 } 96 97 prepareCreate("empty_bucket_idx").addMapping("type", "value", "type=integer", "nested", "type=nested").execute().actionGet(); 98 for (int i = 0; i < 2; i++) { 99 builders.add(client().prepareIndex("empty_bucket_idx", "type", ""+i).setSource(jsonBuilder() 100 .startObject() 101 .field("value", i*2) 102 .startArray("nested") 103 .startObject().field("value", i + 1).endObject() 104 .startObject().field("value", i + 2).endObject() 105 .startObject().field("value", i + 3).endObject() 106 .startObject().field("value", i + 4).endObject() 107 .startObject().field("value", i + 5).endObject() 108 .endArray() 109 .endObject())); 110 } 111 112 assertAcked(prepareCreate("idx_nested_nested_aggs") 113 .addMapping("type", jsonBuilder().startObject().startObject("type").startObject("properties") 114 .startObject("nested1") 115 .field("type", "nested") 116 .startObject("properties") 117 .startObject("nested2") 118 .field("type", "nested") 119 .endObject() 120 .endObject() 121 .endObject() 122 .endObject().endObject().endObject())); 123 124 builders.add( 125 client().prepareIndex("idx_nested_nested_aggs", "type", "1") 126 .setSource(jsonBuilder().startObject() 127 .startArray("nested1") 128 .startObject() 129 .field("a", "a") 130 .startArray("nested2") 131 .startObject() 132 .field("b", 2) 133 .endObject() 134 .endArray() 135 .endObject() 136 .startObject() 137 .field("a", "b") 138 .startArray("nested2") 139 .startObject() 140 .field("b", 2) 141 .endObject() 142 .endArray() 143 .endObject() 144 .endArray() 145 .endObject()) 146 ); 147 indexRandom(true, builders); 148 ensureSearchable(); 149 } 150 151 @Test 152 public void simple() throws Exception { 153 SearchResponse response = client().prepareSearch("idx") 154 .addAggregation(nested("nested").path("nested") 155 .subAggregation(stats("nested_value_stats").field("nested.value"))) 156 .execute().actionGet(); 157 158 assertSearchResponse(response); 159 160 161 double min = Double.POSITIVE_INFINITY; 162 double max = Double.NEGATIVE_INFINITY; 163 long sum = 0; 164 long count = 0; 165 for (int i = 0; i < numParents; ++i) { 166 for (int j = 0; j < numChildren[i]; ++j) { 167 final long value = i + 1 + j; 168 min = Math.min(min, value); 169 max = Math.max(max, value); 170 sum += value; 171 ++count; 172 } 173 } 174 175 Nested nested = response.getAggregations().get("nested"); 176 assertThat(nested, notNullValue()); 177 assertThat(nested.getName(), equalTo("nested")); 178 assertThat(nested.getDocCount(), equalTo(count)); 179 assertThat(nested.getAggregations().asList().isEmpty(), is(false)); 180 181 Stats stats = nested.getAggregations().get("nested_value_stats"); 182 assertThat(stats, notNullValue()); 183 assertThat(stats.getMin(), equalTo(min)); 184 assertThat(stats.getMax(), equalTo(max)); 185 assertThat(stats.getCount(), equalTo(count)); 186 assertThat(stats.getSum(), equalTo((double) sum)); 187 assertThat(stats.getAvg(), equalTo((double) sum / count)); 188 } 189 190 @Test 191 public void onNonNestedField() throws Exception { 192 try { 193 client().prepareSearch("idx") 194 .addAggregation(nested("nested").path("value") 195 .subAggregation(stats("nested_value_stats").field("nested.value"))) 196 .execute().actionGet(); 197 198 fail("expected execution to fail - an attempt to nested facet on non-nested field/path"); 199 200 } catch (ElasticsearchException ese) { 201 } 202 } 203 204 @Test 205 public void nestedWithSubTermsAgg() throws Exception { 206 SearchResponse response = client().prepareSearch("idx") 207 .addAggregation(nested("nested").path("nested") 208 .subAggregation(terms("values").field("nested.value").size(100) 209 .collectMode(aggCollectionMode))) 210 .execute().actionGet(); 211 212 assertSearchResponse(response); 213 214 215 long docCount = 0; 216 long[] counts = new long[numParents + 6]; 217 for (int i = 0; i < numParents; ++i) { 218 for (int j = 0; j < numChildren[i]; ++j) { 219 final int value = i + 1 + j; 220 ++counts[value]; 221 ++docCount; 222 } 223 } 224 int uniqueValues = 0; 225 for (long count : counts) { 226 if (count > 0) { 227 ++uniqueValues; 228 } 229 } 230 231 Nested nested = response.getAggregations().get("nested"); 232 assertThat(nested, notNullValue()); 233 assertThat(nested.getName(), equalTo("nested")); 234 assertThat(nested.getDocCount(), equalTo(docCount)); 235 assertThat(nested.getAggregations().asList().isEmpty(), is(false)); 236 237 LongTerms values = nested.getAggregations().get("values"); 238 assertThat(values, notNullValue()); 239 assertThat(values.getName(), equalTo("values")); 240 assertThat(values.getBuckets(), notNullValue()); 241 assertThat(values.getBuckets().size(), equalTo(uniqueValues)); 242 for (int i = 0; i < counts.length; ++i) { 243 final String key = Long.toString(i); 244 if (counts[i] == 0) { 245 assertNull(values.getBucketByKey(key)); 246 } else { 247 Bucket bucket = values.getBucketByKey(key); 248 assertNotNull(bucket); 249 assertEquals(counts[i], bucket.getDocCount()); 250 } 251 } 252 } 253 254 @Test 255 public void nestedAsSubAggregation() throws Exception { 256 SearchResponse response = client().prepareSearch("idx") 257 .addAggregation(terms("top_values").field("value").size(100) 258 .collectMode(aggCollectionMode) 259 .subAggregation(nested("nested").path("nested") 260 .subAggregation(max("max_value").field("nested.value")))) 261 .execute().actionGet(); 262 263 assertSearchResponse(response); 264 265 266 LongTerms values = response.getAggregations().get("top_values"); 267 assertThat(values, notNullValue()); 268 assertThat(values.getName(), equalTo("top_values")); 269 assertThat(values.getBuckets(), notNullValue()); 270 assertThat(values.getBuckets().size(), equalTo(numParents)); 271 272 for (int i = 0; i < numParents; i++) { 273 String topValue = "" + (i + 1); 274 assertThat(values.getBucketByKey(topValue), notNullValue()); 275 Nested nested = values.getBucketByKey(topValue).getAggregations().get("nested"); 276 assertThat(nested, notNullValue()); 277 Max max = nested.getAggregations().get("max_value"); 278 assertThat(max, notNullValue()); 279 assertThat(max.getValue(), equalTo(numChildren[i] == 0 ? Double.NEGATIVE_INFINITY : (double) i + numChildren[i])); 280 } 281 } 282 283 @Test 284 public void nestNestedAggs() throws Exception { 285 SearchResponse response = client().prepareSearch("idx_nested_nested_aggs") 286 .addAggregation(nested("level1").path("nested1") 287 .subAggregation(terms("a").field("nested1.a") 288 .collectMode(aggCollectionMode) 289 .subAggregation(nested("level2").path("nested1.nested2") 290 .subAggregation(sum("sum").field("nested1.nested2.b"))))) 291 .get(); 292 assertSearchResponse(response); 293 294 295 Nested level1 = response.getAggregations().get("level1"); 296 assertThat(level1, notNullValue()); 297 assertThat(level1.getName(), equalTo("level1")); 298 assertThat(level1.getDocCount(), equalTo(2l)); 299 300 StringTerms a = level1.getAggregations().get("a"); 301 Terms.Bucket bBucket = a.getBucketByKey("a"); 302 assertThat(bBucket.getDocCount(), equalTo(1l)); 303 304 Nested level2 = bBucket.getAggregations().get("level2"); 305 assertThat(level2.getDocCount(), equalTo(1l)); 306 Sum sum = level2.getAggregations().get("sum"); 307 assertThat(sum.getValue(), equalTo(2d)); 308 309 a = level1.getAggregations().get("a"); 310 bBucket = a.getBucketByKey("b"); 311 assertThat(bBucket.getDocCount(), equalTo(1l)); 312 313 level2 = bBucket.getAggregations().get("level2"); 314 assertThat(level2.getDocCount(), equalTo(1l)); 315 sum = level2.getAggregations().get("sum"); 316 assertThat(sum.getValue(), equalTo(2d)); 317 } 318 319 320 @Test 321 public void emptyAggregation() throws Exception { 322 SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx") 323 .setQuery(matchAllQuery()) 324 .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0) 325 .subAggregation(nested("nested").path("nested"))) 326 .execute().actionGet(); 327 328 assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l)); 329 Histogram histo = searchResponse.getAggregations().get("histo"); 330 assertThat(histo, Matchers.notNullValue()); 331 Histogram.Bucket bucket = histo.getBucketByKey(1l); 332 assertThat(bucket, Matchers.notNullValue()); 333 334 Nested nested = bucket.getAggregations().get("nested"); 335 assertThat(nested, Matchers.notNullValue()); 336 assertThat(nested.getName(), equalTo("nested")); 337 assertThat(nested.getDocCount(), is(0l)); 338 } 339 }
上面的代码是链接上的.下面的是自己的应用.
1 public static Map<String, Object> GetRegionInfo(Client client, 2 RequestSignal requestSignal, Set<String> set) { 3 4 Map<String, Object> result = new HashMap<String, Object>(); 5 6 AggregationBuilder aggs1 = AggregationBuilders.nested("level1").path( 7 "nna_regions"); 8 AggregationBuilder aggs2 = AggregationBuilders.filter("level2").filter( 9 ConstValue.AGGS_FILTERBUILDER); 10 AggregationBuilder aggs3 = AggregationBuilders.terms("level3") 11 .field("nna_regions.sca_region").size(5000); 12 SumBuilder aggs4 = AggregationBuilders.sum("level4").field( 13 "nna_regions.dna_score"); 14 15 SearchResponse response = client 16 .prepareSearch("flume-*-content-*") 17 .setQuery(ConstValue.queryBuilder_statAction(requestSignal)) 18 .setSearchType("count") 19 .addAggregation( 20 aggs1.subAggregation(aggs2.subAggregation(aggs3 21 .subAggregation(aggs4)))).execute().actionGet(); 22 23 Nested level1 = response.getAggregations().get("level1"); 24 Filter level2 = level1.getAggregations().get("level2"); 25 26 Terms level3 = level2.getAggregations().get("level3"); 27 Collection<Terms.Bucket> collection = level3.getBuckets(); 28 29 for (Terms.Bucket bucket : collection) { 30 String region = bucket.getKey(); 31 long count = bucket.getDocCount(); 32 double score = 0; 33 if (set.contains(region)) { 34 Sum sum = bucket.getAggregations().get("level4"); 35 36 if (sum == null) { 37 System.out.println("null"); 38 } else { 39 score = sum.getValue(); 40 } 41 Map<String, Object> tmp = new HashMap<String, Object>(); 42 tmp.put("count", count); 43 tmp.put("score", score); 44 result.put(region, tmp); 45 } 46 } 47 return result; 48 }
aggs1是取得嵌套域的名.
其他的代码,关于取日期域值的方法.
1 private String statRequest(Client esClient) { 2 FilteredQueryBuilder builder = QueryBuilders.filteredQuery( 3 QueryBuilders.matchAllQuery(), 4 FilterBuilders.rangeFilter("tfp_save_time").from(begTime) 5 .to(endTime).includeLower(true).includeUpper(true)); 6 7 AggregationBuilder aggs1 = AggregationBuilders.terms("inp_type").field( 8 "inp_type"); 9 10 AggregationBuilder aggs = AggregationBuilders.dateHistogram("By_Date") 11 .field("tfp_save_time").format("yyyy-MM-dd HH:mm:ss") 12 .extendedBounds(begTime, endTime).interval(statType); 13 14 15 SearchResponse response = esClient.prepareSearch("flume-*-content*") 16 .setQuery(builder).setSearchType("count") 17 .addAggregation(aggs1.subAggregation(aggs)).execute() 18 .actionGet(); 19 20 Terms terms = response.getAggregations().get("inp_type"); 21 Collection<Terms.Bucket> inp_type = terms.getBuckets(); 22 Iterator<Bucket> inp_type_It = inp_type.iterator(); 23 // Gson gson = new GsonBuilder().disableHtmlEscaping().create(); 24 25 StatResult statResult = new StatResult(); // result. 26 27 while (inp_type_It.hasNext()) { 28 29 HashMap<String, Integer> test = new HashMap<String, Integer>();// result 30 // nested. 31 Bucket inpBucket = inp_type_It.next(); 32 // System.out.println(inpBucket.getKey()); 33 // System.out.println(inpBucket.getDocCount()); 34 DateHistogram dateHistogram = (DateHistogram) inpBucket 35 .getAggregations().get("By_Date"); 36 Collection<DateHistogram.Bucket> by_date = (Collection<DateHistogram.Bucket>) dateHistogram 37 .getBuckets(); 38 39 Iterator<DateHistogram.Bucket> by_date_It = by_date.iterator(); 40 41 while (by_date_It.hasNext()) { 42 DateHistogram.Bucket bucket = by_date_It.next(); 43 44 int count = Integer.parseInt(String.valueOf(bucket 45 .getDocCount())); 46 String newdate = postDate(bucket.getKey()); 47 48 test.put(newdate, count); 49 } 50 if (!test.isEmpty()) { 51 statResult.add(inpBucket.getKey(), test); 52 } 53 } 54 return statResult.toString(); 55 }