Elasticsearch 之(48) Java API 基于scroll实现月度销售数据批量下载
比如说,现在要下载大批量的数据,从es,放到excel中,我们说,月度,或者年度,销售记录,很多,比如几千条,几万条,几十万条
就是要看宝马的销售记录
其实就要用到我们之前《 Elasticsearch 之(5)kibana多种搜索方式》中讲解的es scroll api,对大量数据批量的获取和处理
就是要看宝马的销售记录
2条数据,做一个演示,每个批次下载一条宝马的销售记录,分2个批次给它下载完
package com.es.app; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.get.MultiGetItemResponse; import org.elasticsearch.action.get.MultiGetResponse; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.transport.client.PreBuiltTransportClient; import java.net.InetAddress; public class ScrollCarInfoApp { public static void main(String[] args) throws Exception { Settings settings = Settings.builder() .put("cluster.name", "elasticsearch") .put("client.transport.sniff", true) .build(); TransportClient client = new PreBuiltTransportClient(settings) .addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName("localhost"), 9300)); SearchResponse searchResponse = client.prepareSearch("car_shop") .setTypes("sales") .setQuery(QueryBuilders.termQuery("brand.keyword", "宝马")) .setScroll(new TimeValue(60000)) .setSize(1) .get(); int batchCount = 0; do { for(SearchHit searchHit : searchResponse.getHits().getHits()) { System.out.println("batch: " + ++batchCount); System.out.println(searchHit.getSourceAsString()); // 每次查询一批数据,比如1000行,然后写入本地的一个excel文件中 // 如果说你一下子查询几十万条数据,不现实,jvm内存可能都会爆掉 } searchResponse = client.prepareSearchScroll(searchResponse.getScrollId()) .setScroll(new TimeValue(60000)) .execute() .actionGet(); } while(searchResponse.getHits().getHits().length != 0); client.close(); } }
batch: 1 { "brand": "宝马", "name": "宝马320", "price": 320000, "produce_date": "2017-01-01", "sale_price": 280000, "sale_date": "2017-01-25" } batch: 2 { "brand": "宝马", "name": "宝马310", "price": 320000, "produce_date": "2017-01-01", "sale_price": 280000, "sale_date": "2017-01-25" }