es分页查询 scroll

分页代码1:

public long queryByScroll(QueryBuilder query, int size, HitFunction function) {

try {


            String[] includeFields = new String[]{"venderId"};
            SearchRequestBuilder builder = client
                    .prepareSearch(INDEX)
                    .setTypes(TYPE)
                    // .addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC) // Scroll requests have optimizations that make them faster when the sort order is _doc. If you want to iterate over all documents regardless of the order, this is the most efficient option:
                    .setScroll(TimeValue.timeValueSeconds(DEFAULT_TIME_VALUE_IN_SECONDS))
                    .setQuery(query)
                    .setFetchSource(includeFields, null)
                    .setSize(size); // max of {size} hits will be returned for each scroll


            SearchResponse response = builder.get();
            long totalHits = response.getHits().getTotalHits();

            log.info("get from es, size:{}", totalHits);
            // Scroll until no hits are returned
            AtomicInteger counter = new AtomicInteger(0);
            scroll:
            do {
                log.info("enter while, response:{}, totalSize:{}",  JSON.toJSONString(response), response.getHits().getTotalHits());
                for (SearchHit hit : response.getHits().getHits()) {
                    //对查询到的结果处理

                    if (!function.apply(counter.getAndIncrement(), hit)) {
                        if (log.isInfoEnabled()) {
                            log.info("index scroll break at index: {}, id: {}", counter.get(), hit.getId());
                        }
                        break scroll;
                    }
                }
                response = client
                        .prepareSearchScroll(response.getScrollId())
                        .setScroll(TimeValue.timeValueSeconds(DEFAULT_TIME_VALUE_IN_SECONDS))
                        .execute()
                        .actionGet();
                log.info("before exit while, response:{}",  JSON.toJSONString(response));
            } while (response.getHits().getHits().length != 0); // Zero hits mark the end of the scroll and the while loop.

            // Search context are automatically removed when the scroll timeout has been exceeded. However keeping scrolls open has a cost, as discussed in the previous section so scrolls should be explicitly cleared as soon as the scroll is not being used anymore using the clear-scroll API:
            ClearScrollResponse clearScrollResponse = client
                    .prepareClearScroll()
                    .addScrollId(response.getScrollId())
                    .get();

            if (log.isInfoEnabled()) {
                log.info("Clear scroll response:{}", clearScrollResponse.isSucceeded());
            }
            return totalHits;
        } catch (Exception e) {
            log.error("queryByScroll error", e);
        }
        return 0;
    }

调用处：

venderCrowdEsDao.queryByScroll(QueryBuilders.boolQuery().must(QueryBuilders.termQuery("venderMode", "POP")),
                200,  (index, hit) -> {
                    //获取venderId，通过venderId调用外呼获取电话、邮箱，首先获取运营负责人的电话，若为空，则使用店铺负责人的电话，若都为空则不存储。
                    Long venderId = Long.valueOf(String.valueOf(hit.getSourceAsMap().get("venderId")));
                    log.info("function applyL:{}", venderId);
                    VenderContactPO contactResult = venderServiceRpc.getContactsInfoByVenderId(venderId);
                    VenderContactPO contactPO = new VenderContactPO();//最终要存储到数据表中的对象
                    //加密，插入vender_contact_info表
                    if(contactResult != null){
                        contactPO.setVenderId(contactResult.getVenderId());
                        contactPO.setEmail(contactResult.getEmail());
                        contactPO.setPhoneNum(contactResult.getPhoneNum());
                    }else {
                        contactPO.setEmail("");
                        contactPO.setPhoneNum("");
                        contactPO.setVenderId(venderId);
                    }
                    contactPO.setShopName(venderServiceRpc.getShopNameByVenderId(venderId));

                    venderContactService.insertIfNotPresent(contactPO);
                    return true;
                })

分页代码2:

分页查询并批量删除

public long queryByScroll(QueryBuilder builder, int size, HitFunction function) {
        try {
            SearchRequestBuilder requestBuilder = getClient()
                    .prepareSearch(getIndex())
                    .setTypes(getType())
                    .setScroll(TimeValue.timeValueSeconds(120))
                    .setQuery(builder)
                    .setSize(size); // max of {size} hits will be returned for each scroll

            SearchResponse response = requestBuilder.get();
            long totalHits = response.getHits().getTotalHits();

            log.info("get from es, size:{}", totalHits);
            AtomicInteger count = new AtomicInteger(1);
            // Scroll until no hits are returned
            scroll:
            do {
                log.info("enter while, totalSize:{}",  JSON.toJSONString(response), response.getHits().getTotalHits());
                //对每页数据处理
                function.apply(response.getHits().getHits());

                response = getClient()
                        .prepareSearchScroll(response.getScrollId())
                        .setScroll(TimeValue.timeValueSeconds(120))
                        .execute()
                        .actionGet();
                log.info("before exit while");
                count.addAndGet(1);
            } while (response.getHits().getHits().length != 0); // Zero hits mark the end of the scroll and the while loop.

            // Search context are automatically removed when the scroll timeout has been exceeded. However keeping scrolls open has a cost, as discussed in the previous section so scrolls should be explicitly cleared as soon as the scroll is not being used anymore using the clear-scroll API:
            ClearScrollResponse clearScrollResponse = getClient()
                    .prepareClearScroll()
                    .addScrollId(response.getScrollId())
                    .get();

            if (log.isInfoEnabled()) {
                log.info("Clear scroll response:{}", clearScrollResponse.isSucceeded());
            }
            return totalHits;
        } catch (Exception e) {
            log.error("queryByScroll error", e);
        }
        return 0;
    }

调用处2:

@Override
    public long queryByScrollAndProcess(int size) {
    taskRecordGateway.queryByScroll(QueryBuilders.matchAllQuery()，size,
        (hits) -> {
            delTaskRecordByHits(hits);
            log.info("--->queryByScrollAndMove");
            return true;
        });
        return 0;
    }

@Override
    public void delTaskRecordByHits(SearchHit[] searchHits) {
        BulkRequestBuilder bulkRequest = getClient().prepareBulk();

        for(SearchHit hit : searchHits){
            String sourceAsString = hit.getSourceAsString();
            TaskRecordDomain taskRecordDomain = com.jd.fastjson.JSON.parseObject(sourceAsString, TaskRecordDomain.class);
            String id = hit.getId();
            bulkRequest.add(getClient().prepareDelete(getIndex(), getType(), id).setRouting(taskRecordDomain.getVenderIdentity()).request());
        }

        BulkResponse bulkResponse = bulkRequest.get();
        if (bulkResponse.hasFailures()) {
            for(BulkItemResponse item : bulkResponse.getItems()){
                log.info("删除失败, id:{}, failureMessage:{}", item.getId(), item.getFailureMessage());
            }
        }else {
            log.info("删除成功");
        }
    }

posted on 2020-11-26 15:36 芝麻＿糊阅读(3093) 评论(0) 编辑收藏举报

刷新页面返回顶部

芝麻＿糊

导航

公告

es分页查询 scroll