SpringBoot:SpringBoot+Elasticsearch整合工具类

前言

SpringBoot与Elasticsearch有严格的版本约束,非必要的情况下,使用适配的版本进行开发。

Elasticsearch的jar版本与Elasticsearch服务器版本也有严格的版本约束,需要完全对应版本约束。

Maven依赖

<!-- 依赖声明 -->
    <dependencyManagement>
        <dependencies>
            <!-- SpringBoot的依赖配置-->
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-dependencies</artifactId>
                <version>2.5.9</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-thymeleaf</artifactId>
        </dependency>
        <!-- elasticsearch依赖 -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
            <!-- <version>2.5.9</version> -->
            <!-- 适配Elasticsearch服务器7.16.1版本 -->
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
        <!-- excel工具 -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>4.1.2</version>
        </dependency>

        <!-- JSONObject对象依赖的jar包 -->
        <dependency>
            <groupId>commons-beanutils</groupId>
            <artifactId>commons-beanutils</artifactId>
            <version>1.9.3</version>
        </dependency>
        <dependency>
            <groupId>commons-collections</groupId>
            <artifactId>commons-collections</artifactId>
            <version>3.2.1</version>
        </dependency>
        <dependency>
            <groupId>commons-lang</groupId>
            <artifactId>commons-lang</artifactId>
            <version>2.6</version>
        </dependency>
        <dependency>
            <groupId>commons-logging</groupId>
            <artifactId>commons-logging</artifactId>
            <version>1.1.1</version>
        </dependency>
        <dependency>
            <groupId>net.sf.ezmorph</groupId>
            <artifactId>ezmorph</artifactId>
            <version>1.0.6</version>
        </dependency>
        <dependency>
            <groupId>net.sf.json-lib</groupId>
            <artifactId>json-lib</artifactId>
            <version>2.2.3</version>
            <classifier>jdk8</classifier><!-- 指定jdk版本 -->
        </dependency>
        <dependency>
            <groupId>commons-lang</groupId>
            <artifactId>commons-lang</artifactId>
            <version>2.6</version>
        </dependency>
        <!-- 阿里JSON解析器 -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.79</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <scope>test</scope>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>8</source>
                    <target>8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

配置文件

server:
  port: 8088

spring:
  data:
    elasticsearch:
      client:
        reactive:
          # 配置es节点信息, (集群使用逗号分隔) 9200端口是http查询使用的, 9300集群使用, 这里我使用9200
          endpoints: 192.168.126.200:9200
          # 登录账号
          username: es
          # 登录密码
          password: 123456

定义配置类

package com.java.hz.config;

import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

/**
 * @Program: Demo
 * @Description: ES配置类
 * @Author: zhaoyue
 * @Create: 2022-08-04
 */

@Configuration
public class EsConfig {
    public static final RequestOptions COMMON_OPTIONS;

    @Value("${spring.data.elasticsearch.client.reactive.endpoints}")
    private String urls;
    @Value("${spring.data.elasticsearch.client.reactive.username}")
    private String account;
    @Value("${spring.data.elasticsearch.client.reactive.password}")
    private String password;

    static {
        RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
        COMMON_OPTIONS = builder.build();
    }

    // 注册到IOC中
    @Bean("restHighLevelClient")
    public RestHighLevelClient restHighLevelClient() {
        RestClientBuilder builder = null;
        String ipAddr = null;
        String[] urlArr;
        Integer port = null;
        if (!StringUtils.isBlank(urls)) {
            String[] urlsArr = urls.split(",");
            for (int i = 0; i < urlsArr.length; i++) {
                String url = urlsArr[i];
                urlArr = url.split(":");
                ipAddr = urlArr[0];
                port = (urlArr[1] == null ? 0 : Integer.parseInt(urlArr[1]));
                builder = RestClient.builder(new HttpHost(ipAddr, port, "http"));
            }
        }
        CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(account, password));
        builder.setHttpClientConfigCallback(f -> f.setDefaultCredentialsProvider(credentialsProvider));
        RestHighLevelClient restClient = new RestHighLevelClient(builder);
        return restClient;
    }
}

定义固定字段的实体类

如果索引库的字段是固定的,可以定义实体类进行操作。如果字段不固定使用Map或者JSONObject都可以操作。

import com.alibaba.fastjson.annotation.JSONField;
import com.fasterxml.jackson.annotation.JsonFormat;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.DateFormat;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;

import java.util.Date;

/**
 * @Program: bioinfo
 * ES库名:gene_snp_data
 * @Description: ES数据库文件类型
 * @Author: zhaoyue
 * @Create: 2022-08-05
 */
@Document(indexName = "gene_snp_data")
// get set方法注解
@Getter
@Setter
public class GeneSnpData {

    /**
     * ID
     */
    @Id
    private String id;
    /**
     * 文件ID
     */
    @Field(type = FieldType.Keyword)
    private String file_id;
    /**
     * 位置
     */
    @Field(type = FieldType.Long)
    private Long position;
    /**
     * 创建时间
     */
    @Field(type = FieldType.Date,
            format = DateFormat.date_hour_minute_second)
    @JsonFormat(shape = JsonFormat.Shape.STRING, pattern = "yyyy-MM-dd'T'HH:mm:ss",timezone="UTC-8")
    private Date create_time;
    /**
     * 开始时间(用于时间范围条件查询)
     */
    private Date startTime;
    /**
     * 结束时间(用于时间范围条件查询)
     */
    private Date endTime;
    /**
     * 起始位置(用于范围条件查询)
     */
    private Long beginPosition;
    /**
     * 终止位置(用于范围条件查询)
     */
    private Long endPosition;
}

FieldType是用来标明数据类型,重点讲解三个类型。

  • FieldType.Text
    这个类型表示数据为字符串, 但是,查询的时候会按照分词的方式进行查询, 相当于 like '%%'
  • FieldType.Keyword
    这个类型也表示数据为字符串, 查询的时候不会进行分词查询, 相当于 ==
  • FieldType.Date
    这个类型表示时间类型, 它会自动将你的时间转为UTC时间(相当于-8小时)存储, 要想存储准确时间需要自己加8小时

 Elasticsearch工具类

import com.alibaba.fastjson.JSONObject;
import com.wiserice.gene.domain.GeneSnpData;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.admin.indices.alias.Alias;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.core.CountRequest;
import org.elasticsearch.client.core.CountResponse;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.reindex.BulkByScrollResponse;
import org.elasticsearch.index.reindex.DeleteByQueryRequest;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.collapse.CollapseBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.annotation.Transient;
import org.springframework.data.domain.Pageable;
import org.springframework.stereotype.Component;

import javax.annotation.Resource;
import java.io.IOException;
import java.util.*;

/**
 * @Program: bioinfo
 * @Description: elasticsearch工具类
 * @Author: zhaoyue
 * @Create: 2022-08-08
 */
@Component
public class ElasticsearchUtils {
    // 日志输出
    private Logger logger = LoggerFactory.getLogger(ElasticsearchUtils.class);

    // 依赖注入
    @Resource
    RestHighLevelClient restHighLevelClient;

    /**
     * 创建索引库
     *
     * @param index 索引库名称
     * @param max 单次查询最大量
     */
    public void createIndex(String index, Long max) {
        CreateIndexRequest request = new CreateIndexRequest(index);
        // 设置setting (setting定义可参照 IndexMetadata 类)
        request.settings(Settings.builder()
                // 分片数
                .put("index.number_of_shards", 10) 
                // 副本数
                .put("index.number_of_replicas", 1) 
                // 设置单次查询最大量, 默认10000
                .put("index.max_result_window", max == 0 ? 10000 : max)
                // 将数据路由到一组shard上面,设置index.routing_partition_size,默认值是1,即只路由到1个shard
                // 可以将其设置为大于1且小于索引shard总数的某个值,就可以路由到一组shard了。值越大,数据越均匀
                .put("index.routing_partition_size", 2) 
        );
        try {
            // 设置mapping参数
            request.mapping(XContentFactory.jsonBuilder()
                    // 强制设置当前索引的(增删改查)操作时, 必须添加路由参数, 否则报错
                    // 根据业务场景可以选择注释掉
                    .startObject()
                    .startObject("_routing")
                    .field("required", true)
                    .endObject()
                    .endObject());
        } catch (IOException e) {
            e.printStackTrace();
        }
        request.alias(new Alias(index + "_alias"));// 设置别名
        request.setTimeout(TimeValue.timeValueMinutes(2));// 设置创建索引超时时长2分钟
        // 同步请求
        try {
            CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);
            // 处理响应
            boolean acknowledged = createIndexResponse.isAcknowledged();
            boolean shardsAcknowledged = createIndexResponse.isShardsAcknowledged();
            logger.info(" = = = >>> 索引库{}创建 : {}", index, acknowledged + "-" + shardsAcknowledged);
        } catch (IOException e) {
            logger.info(" = = = >>> 索引{}创建异常:" + e.getMessage(), index);
        }
    }

    /**
     * 删除索引库
     */
    public boolean deleteIndex(String index) {
        boolean acknowledged = isExistIndex(index);
        if(acknowledged) {
            // 创建删除索引请求
            DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest(index);
            // 执行
            AcknowledgedResponse delete = null;
            try {
                delete = restHighLevelClient.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
            } catch (IOException e) {
                e.printStackTrace();
            }
            // 得到相应
            acknowledged = delete.isAcknowledged();
        }
        logger.info(" = = = >>> 删除索引库-{} : {}", index, acknowledged);
        return acknowledged;
    }

    /**
     * 判断索引库是否存在
     */
    public boolean isExistIndex(String index) {
        GetIndexRequest request = new GetIndexRequest(index);
        //参数
        request.local(false);//从主节点返回本地索引信息状态
        request.humanReadable(true);//以适合人类的格式返回
        request.includeDefaults(false);//是否返回每个索引的所有默认配置
        boolean exists = false;
        try {
            exists = restHighLevelClient.indices().exists(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        logger.info(" = = = >>> 判断索引库 {} 是否存在 : {}", index, exists);
        return exists;
    }

    /**
     * 关闭索引
     */
    public void closeIndex(String index) {
        CloseIndexRequest request=new CloseIndexRequest(index);
        AcknowledgedResponse close = null;
        try {
            close = restHighLevelClient.indices().close(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        boolean acknowledged = close.isAcknowledged();
        logger.info(" = = = >>> 关闭索引库-{} : {}", index, acknowledged);
    }

    /**
     * 开启索引
     */
    public void testOpenIndex(String index) {
        OpenIndexRequest request=new OpenIndexRequest(index);
        OpenIndexResponse open = null;
        try {
            open = restHighLevelClient.indices().open(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        boolean acknowledged = open.isAcknowledged();
        logger.info(" = = = >>> 开启索引库-{} : {}", index, acknowledged);
    }
    
    /**
     * 查询索引库条数
     *
     * @param index 索引库名称
     * @param boolQueryBuilder 条件对象
     */
    public Long searchCount(String index, BoolQueryBuilder boolQueryBuilder) {
        long start = System.currentTimeMillis();
        logger.info(" = = = >>> 查询索引库 {} 总条数", index);
        CountRequest countRequest = new CountRequest();
        countRequest.indices(index);
        if (boolQueryBuilder != null) {
            countRequest.query(boolQueryBuilder);
        }
        CountResponse count = null;
        try {
            count = restHighLevelClient.count(countRequest, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        logger.info(" = = = >>> 索引库 {} 的总条数: {} , 耗时: {}/秒", index,
                count.getCount(), (System.currentTimeMillis() - start) / 1000);
        return count.getCount();
    }

    /**
     * 查询数据
     *
     * @param index            索引库名称
     * @param pageable         分页对象
     * @param routing          路由
     * @param clazz            转换对象类型
     * @param boolQueryBuilder  条件对象
     * @param showField        指定查询字段 可传空数组
     * @param shieldField      指定排除字段 可传空数组
     * @param orderField       排序字段
     * @param bl               true.正序 false.倒序
     * @param <T>
     * @return
     */
    public <T> List searchData(String index, Pageable pageable, String routing, Class<T> clazz, BoolQueryBuilder boolQueryBuilder,
                               String[] showField, String[] shieldField, String orderField, boolean bl) {
        long start_time = System.currentTimeMillis();
        logger.info(" = = = >>> 索引库 {} 条件查询", index);
        List<T> list = new ArrayList<>();
        //1.创建请求
        SearchRequest request = new SearchRequest();
        // 指定索引库
        request.indices(index);
        // 指定路由查询数据
        if(routing != null && routing != ""){
            request.routing(routing);
        }
        // 创建请求参数
        SearchSourceBuilder ssb = new SearchSourceBuilder();
        if (pageable != null && !pageable.isUnpaged()) {
            // 开始页数
            Integer start = (pageable.getPageNumber() == 0 ? 1 : pageable.getPageNumber()) - 1;
            // 这里要自己计算出起始页是第多少条,from是指从多少条开始截取(包含第from这条)
            int from = start * pageable.getPageSize();
            // 查全部数据
            ssb.from(from).size(pageable.getPageSize())
                // 如果不写或者写false时, 当总记录数超过10000时会返回总数10000
                // 配置为true就会返回真实条数(前提是真实条数小于创建索引时的最大值, 否则返回创建索引时的最大值)
                    .trackTotalHits(true);
        } else {
            ssb.size(20000).trackTotalHits(true);
        }
        //排序
        if (StringUtils.isNotEmpty(orderField)) {
            if (bl) {
                // 正序
                ssb.sort(orderField, SortOrder.ASC);
            } else {
                // 倒序
                ssb.sort(orderField, SortOrder.DESC);
            }
        }
        // 指定查询字段和排除字段
        ssb.fetchSource(showField, shieldField);
        // 添加判断条件
        ssb.query(boolQueryBuilder);
        // 请求的查询json
        request.source(ssb);
        // 开始查询数据
        SearchResponse response = null;
        try {
            // 获取查询的返回值
            response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        // 返回值
        SearchHit[] searchHits = response.getHits().getHits();
        for (SearchHit hit : searchHits) {
            // 获取每个JSON对象字符串
            String jsonStr = hit.getSourceAsString();
            list.add(JSONObject.parseObject(jsonStr, clazz));
        }
        logger.info(" = = = >>> 索引库 {} 条件查询的条数: {} , 耗时: {}/秒", index,
                list.size(), (System.currentTimeMillis() - start_time) / 1000);
        return list;
    }

    /**
     * 分组排序
     *
     * @param index            索引库名称
     * @param routing          路由
     * @param clazz            转换对象类型
     * @param boolQueryBuilder 条件对象
     * @param showField        指定查询字段 可传空数组
     * @param shieldField      指定查询字段 可传空数组
     * @param groupField       分组字段 字段类型必须是字符串
     * @param sortField        排序字段
     * @param bl               排序规则 true.ack   false.desc
     * @param <T>
     * @return
     */
    public <T> List searchDataByGroup(String index, String routing, Class<T> clazz, BoolQueryBuilder boolQueryBuilder,
                                      String[] showField, String[] shieldField, String groupField, String sortField,
                                      boolean bl) {
        long start_time = System.currentTimeMillis();
        logger.info(" = = = >>> 索引库 {} 条件查询", index);
        List<T> list = new ArrayList<>();
        //1.创建请求
        SearchRequest request = new SearchRequest();
        // 指定索引库
        request.indices(index);
        // 指定路由查询数据
        if(routing != null && routing != ""){
            request.routing(routing);
        }
        // 创建请求参数
        SearchSourceBuilder ssb = new SearchSourceBuilder();
        // 指定查询字段和排除字段
        ssb.fetchSource(showField, shieldField);
        // 添加判断条件
        ssb.query(boolQueryBuilder);
        // 分组排序
        TermsAggregationBuilder partJobAggBuilder = AggregationBuilders.terms("terms")
                .field(groupField + ".keyword");
        if (bl) {
            // 从小到大
            partJobAggBuilder.subAggregation(AggregationBuilders.min("min").field(sortField));
        } else {
            // 从大到小
            partJobAggBuilder.subAggregation(AggregationBuilders.max("max").field(sortField));
        }
        ssb.aggregation(partJobAggBuilder);
        // 请求的查询json
        request.source(ssb);
        // 开始查询数据
        SearchResponse response = null;
        try {
            response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        // 返回值
        SearchHit[] searchHits = response.getHits().getHits();
        for (SearchHit hit : searchHits) {
            // 获取每个JSON对象字符串
            String jsonStr = hit.getSourceAsString();
            list.add(JSONObject.parseObject(jsonStr, clazz));
        }
        logger.info(" = = = >>> 索引库 {} 分组排序查询的条数: {} , 耗时: {}/秒", index,
                list.size(), (System.currentTimeMillis() - start_time) / 1000);
        return list;
    }

    /**
     * 分组计数
     *
     * @param index        索引库名称
     * @param boolQueryBuilder        条件对象
     * @param groupField        分组字段 字段类型必须是字符串
     * @return
     */
    public JSONObject searchDataByGroupNum(String index, BoolQueryBuilder boolQueryBuilder, String groupField){
        long start_time = System.currentTimeMillis();
        logger.info(" = = = >>> 索引库 {} 条件查询", index);
        //1.创建请求
        SearchRequest request = new SearchRequest();
        // 指定索引库
        request.indices(index);
        // 创建请求参数
        SearchSourceBuilder ssb = new SearchSourceBuilder();
        // 添加判断条件
        ssb.query(boolQueryBuilder);
        //根据单个字段进行分组统计,统计出的列别名叫sum
        TermsAggregationBuilder termsBuilder = AggregationBuilders.terms("sum").field(groupField+".keyword");
        ssb.aggregation(termsBuilder);
        // 请求的查询json
        request.source(ssb);
        // 开始查询数据
        SearchResponse response = null;
        try {
            response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        // 得到这个分组的数据集合
        Terms terms = response.getAggregations().get("sum");
        JSONObject json = new JSONObject();
        for (int i = 0; i < terms.getBuckets().size(); i++) {
            String key = terms.getBuckets().get(i).getKey().toString(); //分组后的值
            Long sum = terms.getBuckets().get(i).getDocCount();// 值的数量
            json.put(key, sum);
            logger.info(" = = = >>> key:{} count:{}", terms.getBuckets().get(i).getKey(), terms.getBuckets().get(i).getDocCount());
        }
        logger.info(" = = = >>> 索引库 {} 分组计数查询的条数: {} , 耗时: {}/秒", index,
                json.size(), (System.currentTimeMillis() - start_time) / 1000);
        return json;
    }

    /**
     * 数据去重
     *
     * @param index        索引库的名称
     * @param clazz        转换对象类型
     * @param boolQueryBuilder        条件对象
     * @param showField        指定查询字段 可传空数组
     * @param shieldField        指定查询字段 可传空数组
     * @param distinctField        去重字段
     * @param <T>
     * @return
     */
    public <T> List searchDataByDistinct(String index, Class<T> clazz, BoolQueryBuilder boolQueryBuilder,
                                         String[] showField, String[] shieldField, String distinctField) {
        long start_time = System.currentTimeMillis();
        logger.info(" = = = >>> 索引库 {} 条件查询", index);
        List<T> list = new ArrayList<>();
        //1.创建请求
        SearchRequest request = new SearchRequest();
        // 指定索引库
        request.indices(index);
        // 创建请求参数
        SearchSourceBuilder ssb = new SearchSourceBuilder();
        // 指定查询字段和排除字段
        ssb.fetchSource(showField, shieldField);
        // 添加判断条件
        ssb.query(boolQueryBuilder);
        // 去重
        ssb.collapse(new CollapseBuilder(distinctField)).trackTotalHits(true);
        // 请求的查询json
        request.source(ssb);
        // 开始查询数据
        SearchResponse response = null;
        try {
            response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        // 返回值
        SearchHit[] searchHits = response.getHits().getHits();
        for (SearchHit hit : searchHits) {
            // 获取每个JSON对象字符串
            String jsonStr = hit.getSourceAsString();
            list.add(JSONObject.parseObject(jsonStr, clazz));
        }
        logger.info(" = = = >>> 索引库 {} 去重查询的条数: {} , 耗时: {}/秒", index,
                list.size(), (System.currentTimeMillis() - start_time) / 1000);
        return list;
    }

    /**
     * 保存、修改数据(固定字段对象)
     *
     * @param fileEntity    实体对象
     * @param index         索引库名称
     * @param routing       路由
     * @throws IOException
     */
    @Transient
    public void saveDataByObject(GeneSnpData fileEntity, String index, String routing) {
        long start = System.currentTimeMillis();
        logger.info(" = = = >>> 索引库 {} 数据存储", index);
        // 当ID重复时, 新数据就会覆盖掉旧数据, 也就是修改
        // 设置ID主键
        IndexRequest request = new IndexRequest(index).id(fileEntity.getId().toString());
        // 指定路由保存数据
        if(routing != null && routing != ""){
            request.routing(routing);
        }
        // 传入JSON字符串
        request.source(JSONObject.toJSONString(fileEntity), XContentType.JSON);
        IndexResponse response = null;
        try {
            response = restHighLevelClient.index(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        logger.info(" = = = >>> 索引库 {} 存储耗时: {}/秒", index,
                (System.currentTimeMillis() - start) / 1000);
    }
    
    /**
     * 保存、修改数据(不固定字段对象)
     *
     * @param map       数据 ( id:必填字段 )
     * @param index     索引库名称
     * @param routing   路由
     * @throws IOException
     */
    @Transient
    public void saveData(Map<String, Object> map, String index, String routing) {
        long start = System.currentTimeMillis();
        logger.info(" = = = >>> 索引库 {} 数据存储", index);
        // 当ID重复时, 新数据就会覆盖掉旧数据, 也就是修改
        // 设置ID主键
        IndexRequest request = new IndexRequest(index).id(map.get("id").toString());
        // 指定路由保存数据
        if(routing != null && routing != ""){
            request.routing(routing);
        }
        // 传入JSON字符串
        request.source(JSONObject.toJSONString(map), XContentType.JSON);
        IndexResponse response = null;
        try {
            response = restHighLevelClient.index(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        logger.info(" = = = >>> 索引库 {} 存储耗时: {}/秒", index,
                (System.currentTimeMillis() - start) / 1000);
    }

    /**
     * 删除索引库内所有数据
     *
     * @param indices   索引库名称
     * @throws IOException
     * @return
     */
    public Long deleteAllDataByIndices(String indices) {
        long start = System.currentTimeMillis();
        logger.info(" = = = >>> 索引库 {} 删除全部数据", indices);
        DeleteByQueryRequest request = new DeleteByQueryRequest(indices);
        // 异常继续执行
        request.setConflicts("proceed");
        request.setQuery(new BoolQueryBuilder().filter(QueryBuilders.boolQuery()));
        BulkByScrollResponse response = null;
        try {
            response = restHighLevelClient.deleteByQuery(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        logger.info(" = = = >>> 索引库 {} 删除全部数据, 条数: {}, 耗时: {}/秒", indices,
                response.getDeleted(), (System.currentTimeMillis() - start) / 1000);
        return response.getDeleted();
    }

    /**
     * 条件删除数据
     *
     * @param indices           索引库名称
     * @param routing           路由
     * @param boolQueryBuilder  条件对象
     * @throws IOException
     * @return
     */
    public Long deleteDataByCondition(String indices, String routing, BoolQueryBuilder boolQueryBuilder) {
        long start = System.currentTimeMillis();
        logger.info(" = = = >>> 索引库 {} 条件删除", indices);
        long deleted = 0L;
        DeleteByQueryRequest request = new DeleteByQueryRequest(indices);
        if(routing != null && routing != ""){
            request.setRouting(routing);
        }
        // 异常继续执行
        request.setConflicts("proceed");
        request.setQuery(boolQueryBuilder);
        try {
            BulkByScrollResponse response = restHighLevelClient.deleteByQuery(request, RequestOptions.DEFAULT);
            deleted = response.getDeleted();
        } catch (IOException e) {
            e.printStackTrace();
        }
        logger.info(" = = = >>> 索引库 {} 条件删除, 条数: {}, 耗时: {}/秒", indices,
                deleted, (System.currentTimeMillis() - start) / 1000);
        return deleted;
    }

    /**
     * 自定义判断条件(GeneSnpData 固定字段对象)
     *
     * @return
     */
    public BoolQueryBuilder judgeConditionByGeneSnpData(GeneSnpData fileEntity) {
        logger.info(" = = = >>> 拼接 gene_snp_data 查询条件");
        // 查询条件对象
        BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
        /**
         * ID
         */
        if (StringUtils.isNotBlank(fileEntity.getId())) {
            boolQueryBuilder.filter(QueryBuilders.matchQuery("id.keyword", fileEntity.getId()));
        }
        /**
         * 文件ID
         */
        if (StringUtils.isNotBlank(fileEntity.getFile_id())) {
            boolQueryBuilder.filter(QueryBuilders.matchQuery("file_id.keyword", fileEntity.getFile_id()));
        }
        /**
         * 所属染色体 模糊查询
         */
        if (StringUtils.isNotBlank(geneContrastResult.getChr_ind())) {
            boolQueryBuilder.filter(QueryBuilders.wildcardQuery("chr_ind.keyword", "*"+geneContrastResult.getChr_ind()+"*"));
        }
        /**
         * 位置
         */
        if (fileEntity.getChr_position() != null) {
            boolQueryBuilder.filter(QueryBuilders.matchQuery("position", fileEntity.getPosition()));
        }
        /**
         * 位置 范围查询条件
         */
        if (fileEntity.getBeginPosition() != null && fileEntity.getEndPosition() != null) {
            boolQueryBuilder.filter(QueryBuilders
                    .rangeQuery("position")
                    // 大于等于 开始位置
                    .gte(fileEntity.getBeginPosition())
                    // 小于等于 结束位置
                    .lte(fileEntity.getEndPosition()));
        }
        /**
         * 创建时间(时间范围检索)
         */
        if (fileEntity.getStartTime() != null
                && fileEntity.getEndTime() != null) {
            boolQueryBuilder.filter(QueryBuilders
                    //传入时间,格式 2020-01-02T03:17:37.638Z
                    .rangeQuery("create_time")
                    //因为是FieldType.Date修饰的时间会转为UTC(-8小时), 所以需要+8小时 getUTCTime()函数
                    // 大于等于 开始时间
                    .from(getUTCTime(fileEntity.getStartTime()))
                    // 小于等于 结束时间
                    .to(getUTCTime(fileEntity.getEndTime())));
        }
        logger.info(" = = = >>> gene_snp_data 查询条件: {}", boolQueryBuilder.toString()
                .replace(" ", "").replace("\n", ""));
        return boolQueryBuilder;
    }

    /**
     * 自定义判断条件(不固定字段对象)
     *
     * @return
     */
    public BoolQueryBuilder judgeConditionByMap(Map<String, Object> map) {
        logger.info(" = = = >>> 拼接 map 查询条件");
        // 查询条件对象
        BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
        /**
         * ID
         */
        if (map.get("id") != null && StringUtils.isNotBlank(map.get("id").toString())) {
            boolQueryBuilder.filter(QueryBuilders.matchQuery("id.keyword", map.get("id").toString()));
        }
        /**
         * 文件ID
         */
        if (map.get("file_id") != null && StringUtils.isNotBlank(map.get("file_id").toString())) {
            boolQueryBuilder.filter(QueryBuilders.matchQuery("file_id.keyword", map.get("file_id").toString()));
        }
        /**
         * 所属染色体 模糊查询
         */
        if (StringUtils.isNotBlank(geneContrastResult.getChr_ind())) {
            boolQueryBuilder.filter(QueryBuilders.wildcardQuery("chr_ind.keyword", "*"+geneContrastResult.getChr_ind()+"*"));
        }
        /**
         * 创建时间(时间范围检索)
         */
        if (map.get("startTime") != null
                && map.get("endTime") != null) {
            boolQueryBuilder.filter(QueryBuilders
                    //传入时间,格式 2020-01-02T03:17:37.638Z
                    .rangeQuery("create_time")
                    .from(getUTCTime((Date) map.get("startTime")))
                    .to(getUTCTime((Date) map.get("endTime"))));
        }
        logger.info(" = = = >>> map 查询条件: {}", boolQueryBuilder.toString()
                .replace(" ", "").replace("\n", ""));
        return boolQueryBuilder;
    }

    /**
     * 时间+8小时
     *
     * @param date 时间
     */
    public Date getUTCTime(Date date) {
        Calendar cal = Calendar.getInstance();
        cal.setTime(date);
        cal.add(Calendar.HOUR, 8);
        return cal.getTime();
    }
}

重点

  Elasticsearch是一个分布式的搜索引擎,每个索引都可以有多个分片,用来将一份大索引的数据切分成多个小的物理索引,解决单个索引数据量过大导致的性能问题,另外每个shard还可以配置多个副本,来保证高可靠以及更好的抗并发的能力;将一个索引切分成多个shard,大多数时候是没有问题的,但是在es里面如果索引被切分成多个shard,在使用group进行聚合时,可能会出现问题,这个在官网文档里,描述也非常清楚:https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_shard_size_3

例如:分组聚合排序时(group order),如果有多个shard就会出现问题

举例:我创建一个索引有三个shard。

首先对索引存入一批数据

# 包含类型LG01、LG02、LG03、LG04的数据
{"type":"LG01","number":21896}
{"type":"LG01","number":144166}
{"type":"LG04","number":512086}
{"type":"LG01","number":448631}
{"type":"LG01","number":638216}
{"type":"LG01","number":1226413}
{"type":"LG02","number":3241037}
{"type":"LG01","number":390360}
{"type":"LG01","number":484768}
{"type":"LG01","number":821269}
{"type":"LG01","number":46390360}
{"type":"LG01","number":46484768}
{"type":"LG03","number":270662}
{"type":"LG01","number":46637427}
{"type":"LG01","number":46912605}
{"type":"LG03","number":10863477}
{"type":"LG02","number":388297}
{"type":"LG02","number":437768}
{"type":"LG02","number":753599}
{"type":"LG02","number":988960}
{"type":"LG02","number":1039091}
{"type":"LG02","number":4053373}
{"type":"LG04","number":18844340}
{"type":"LG01","number":637427}
{"type":"LG02","number":5877374}
{"type":"LG02","number":6189273}
{"type":"LG04","number":367479}
{"type":"LG02","number":989001}
{"type":"LG02","number":1332246}
{"type":"LG02","number":2253486}
{"type":"LG02","number":2465302}
{"type":"LG02","number":4053384}
{"type":"LG03","number":270664}
{"type":"LG03","number":311950}
{"type":"LG02","number":316947}
{"type":"LG03","number":422620}
{"type":"LG03","number":572404}
{"type":"LG03","number":10730243}
{"type":"LG04","number":99338}
{"type":"LG03","number":11395636}
{"type":"LG03","number":11571674}
{"type":"LG03","number":11571799}
{"type":"LG04","number":18844339}
{"type":"LG03","number":4820414}
{"type":"LG03","number":4897138}
{"type":"LG01","number":46821269}
{"type":"LG01","number":912605}
{"type":"LG03","number":5099274}
{"type":"LG03","number":5131482}
{"type":"LG03","number":5459684}
{"type":"LG04","number":18815419}
{"type":"LG04","number":18951531}
{"type":"LG04","number":19293490}
{"type":"LG04","number":25252}
{"type":"LG04","number":277971}
{"type":"LG04","number":958033}
{"type":"LG04","number":2513320}
{"type":"LG03","number":11331084}
{"type":"LG04","number":2926072}
{"type":"LG04","number":3047003}
{"type":"LG04","number":3187459}

进行分组聚合排序后的数据

{"type":"LG01","number":21896}
{"type":"LG01","number":144166}
{"type":"LG01","number":448631}
{"type":"LG01","number":638216}
{"type":"LG01","number":1226413}
{"type":"LG02","number":1039091}
{"type":"LG02","number":3241037}
{"type":"LG02","number":4053373}
{"type":"LG02","number":5877374}
{"type":"LG02","number":6189273}
{"type":"LG03","number":10730243}
{"type":"LG03","number":10863477}
{"type":"LG03","number":11331084}
{"type":"LG03","number":11395636}
{"type":"LG03","number":11571674}
{"type":"LG03","number":11571799}
{"type":"LG04","number":18815419}
{"type":"LG04","number":18844339}
{"type":"LG04","number":18844340}
{"type":"LG04","number":18951531}
{"type":"LG04","number":19293490}
# ----------为了方便观看----------
{"type":"LG01","number":46390360}
{"type":"LG01","number":46484768}
{"type":"LG01","number":46637427}
{"type":"LG01","number":46821269}
{"type":"LG01","number":46912605}
{"type":"LG02","number":316947}
{"type":"LG02","number":388297}
{"type":"LG02","number":437768}
{"type":"LG02","number":753599}
{"type":"LG02","number":988960}
{"type":"LG03","number":270662}
{"type":"LG03","number":270664}
{"type":"LG03","number":311950}
{"type":"LG03","number":422620}
{"type":"LG03","number":572404}
{"type":"LG04","number":25252}
{"type":"LG04","number":99338}
{"type":"LG04","number":277971}
{"type":"LG04","number":367479}
{"type":"LG04","number":512086}
# ----------为了方便观看----------
{"type":"LG01","number":390360}
{"type":"LG01","number":484768}
{"type":"LG01","number":637427}
{"type":"LG01","number":821269}
{"type":"LG01","number":912605}
{"type":"LG02","number":989001}
{"type":"LG02","number":1332246}
{"type":"LG02","number":2253486}
{"type":"LG02","number":2465302}
{"type":"LG02","number":4053384}
{"type":"LG03","number":4820414}
{"type":"LG03","number":4897138}
{"type":"LG03","number":5099274}
{"type":"LG03","number":5131482}
{"type":"LG03","number":5459684}
{"type":"LG04","number":958033}
{"type":"LG04","number":2513320}
{"type":"LG04","number":2926072}
{"type":"LG04","number":3047003}
{"type":"LG04","number":3187459}

 数据自动分成三份,每份划分组之后又单独进行排序,而我们想要的结果应该是

{"type":"LG01","number":21896}
{"type":"LG01","number":144166}
{"type":"LG01","number":390360}
{"type":"LG01","number":448631}
{"type":"LG01","number":484768}
{"type":"LG01","number":637427}
{"type":"LG01","number":638216}
{"type":"LG01","number":821269}
{"type":"LG01","number":912605}
{"type":"LG01","number":1226413}
{"type":"LG01","number":46390360}
{"type":"LG01","number":46484768}
{"type":"LG01","number":46637427}
{"type":"LG01","number":46821269}
{"type":"LG01","number":46912605}
# ----------为了方便观看----------
{"type":"LG02","number":316947}
{"type":"LG02","number":388297}
{"type":"LG02","number":437768}
{"type":"LG02","number":753599}
{"type":"LG02","number":988960}
{"type":"LG02","number":989001}
{"type":"LG02","number":1039091}
{"type":"LG02","number":1332246}
{"type":"LG02","number":2253486}
{"type":"LG02","number":2465302}
{"type":"LG02","number":3241037}
{"type":"LG02","number":4053373}
{"type":"LG02","number":4053384}
{"type":"LG02","number":5877374}
{"type":"LG02","number":6189273}
# ----------为了方便观看----------
{"type":"LG03","number":270662}
{"type":"LG03","number":270664}
{"type":"LG03","number":311950}
{"type":"LG03","number":422620}
{"type":"LG03","number":572404}
{"type":"LG03","number":4820414}
{"type":"LG03","number":4897138}
{"type":"LG03","number":5099274}
{"type":"LG03","number":5131482}
{"type":"LG03","number":5459684}
{"type":"LG03","number":10730243}
{"type":"LG03","number":10863477}
{"type":"LG03","number":11331084}
{"type":"LG03","number":11395636}
{"type":"LG03","number":11571674}
{"type":"LG03","number":11571799}
# ----------为了方便观看----------
{"type":"LG04","number":25252}
{"type":"LG04","number":99338}
{"type":"LG04","number":277971}
{"type":"LG04","number":367479}
{"type":"LG04","number":512086}
{"type":"LG04","number":958033}
{"type":"LG04","number":2513320}
{"type":"LG04","number":2926072}
{"type":"LG04","number":3047003}
{"type":"LG04","number":3187459}
{"type":"LG04","number":18815419}
{"type":"LG04","number":18844339}
{"type":"LG04","number":18844340}
{"type":"LG04","number":18951531}
{"type":"LG04","number":19293490}

那么有没有方法避免这种不精确的统计的呢?

es官网文档里面提到2种解决方法。

  1. 聚合操作在单个shard时是精确的,也就是说我们索引的数据全部插入到一个shard的时候 它的聚合统计结果是准确的。
  2. 在索引数据的时候,使用route路由字段,将所有聚合的数据分布到同一个shard即可,这样再聚合时也是精确的。

关于路由可以查看我的这篇文章:https://www.cnblogs.com/nhdlb/p/16738914.html

第一种适合数据量不大的场景下,直接把数据放在一份索引里面进行操作。

第二种适合数据量较大的场景下,通过业务字段将相同属性的数据路由在同一个shard里面,在进行操作。

文章中已加入路由的设置( 创建与查询的条件 ),可根据业务场景进行使用。

测试函数

/**
 * 单元测试
 */
@RunWith(SpringRunner.class)
@SpringBootTest
public class ES_Dome {
    // 依赖注入 工具类
    @Autowired
    private ElasticsearchUtils elasticsearchUtils;
    
    /**
     * 创建索引
     */
    @Test
    public void createIndex(){
        String index = "test_index";
        Long max = 200000;
        elasticsearchUtils.createIndex(index, max);
    }
    
    /**
     * 新增、修改数据
     */
    @Test
    public void save_updateData(){
        String index = "test_index";
        // 自定义测试的文件数据
        for (int i = 0; i < 10; i++) {
            Map<String, Object> map = new HashMap<>();
            // 当ID一致时, 新数据会覆盖旧数据, 也就是修改
            map.put("id", UUID.randomUUID().toString());
            map.put("file_id", 80+i);
            map.put("crop_id", "1");
            map.put("position", 10606360L + i);
            map.put("create_user_id", "6");
            map.put("create_time", new Date());
            try {
                elasticsearchUtils.saveData(map, index, null);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    
    /**
     * 查询数据
     */
    @Test
    public void selectData(){
        String index = "test_index";
        Pageable pageable = PageRequest.of(1,1);
        List<GeneSnpData> list = elasticsearchUtils.searchData(index, pageable, null, GeneSnpData.class,
                    elasticsearchUtils.judgeConditionByGeneSnpData(geneSnpData),
                    new String[]{}, new String[]{}, "position", true);
        System.out.println(" = = = >>> list", JSONArray.toJSONString(list));
    }
    
}

 

posted @ 2022-09-13 12:30  怒吼的萝卜  阅读(993)  评论(0编辑  收藏  举报