SpringBoot:SpringBoot+Elasticsearch整合工具类
前言
SpringBoot与Elasticsearch有严格的版本约束,非必要的情况下,使用适配的版本进行开发。
Elasticsearch的jar版本与Elasticsearch服务器版本也有严格的版本约束,需要完全对应版本约束。
Maven依赖
<!-- 依赖声明 -->
<dependencyManagement>
<dependencies>
<!-- SpringBoot的依赖配置-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-dependencies</artifactId>
<version>2.5.9</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<!-- elasticsearch依赖 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
<!-- <version>2.5.9</version> -->
<!-- 适配Elasticsearch服务器7.16.1版本 -->
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- excel工具 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<!-- JSONObject对象依赖的jar包 -->
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
<version>1.9.3</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>net.sf.ezmorph</groupId>
<artifactId>ezmorph</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>net.sf.json-lib</groupId>
<artifactId>json-lib</artifactId>
<version>2.2.3</version>
<classifier>jdk8</classifier><!-- 指定jdk版本 -->
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<!-- 阿里JSON解析器 -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.79</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
配置文件
server:
port: 8088
spring:
data:
elasticsearch:
client:
reactive:
# 配置es节点信息, (集群使用逗号分隔) 9200端口是http查询使用的, 9300集群使用, 这里我使用9200
endpoints: 192.168.126.200:9200
# 登录账号
username: es
# 登录密码
password: 123456
定义配置类
package com.java.hz.config;
import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* @Program: Demo
* @Description: ES配置类
* @Author: zhaoyue
* @Create: 2022-08-04
*/
@Configuration
public class EsConfig {
public static final RequestOptions COMMON_OPTIONS;
@Value("${spring.data.elasticsearch.client.reactive.endpoints}")
private String urls;
@Value("${spring.data.elasticsearch.client.reactive.username}")
private String account;
@Value("${spring.data.elasticsearch.client.reactive.password}")
private String password;
static {
RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
COMMON_OPTIONS = builder.build();
}
// 注册到IOC中
@Bean("restHighLevelClient")
public RestHighLevelClient restHighLevelClient() {
RestClientBuilder builder = null;
String ipAddr = null;
String[] urlArr;
Integer port = null;
if (!StringUtils.isBlank(urls)) {
String[] urlsArr = urls.split(",");
for (int i = 0; i < urlsArr.length; i++) {
String url = urlsArr[i];
urlArr = url.split(":");
ipAddr = urlArr[0];
port = (urlArr[1] == null ? 0 : Integer.parseInt(urlArr[1]));
builder = RestClient.builder(new HttpHost(ipAddr, port, "http"));
}
}
CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(account, password));
builder.setHttpClientConfigCallback(f -> f.setDefaultCredentialsProvider(credentialsProvider));
RestHighLevelClient restClient = new RestHighLevelClient(builder);
return restClient;
}
}
定义固定字段的实体类
如果索引库的字段是固定的,可以定义实体类进行操作。如果字段不固定使用Map或者JSONObject都可以操作。
import com.alibaba.fastjson.annotation.JSONField;
import com.fasterxml.jackson.annotation.JsonFormat;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.DateFormat;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;
import java.util.Date;
/**
* @Program: bioinfo
* ES库名:gene_snp_data
* @Description: ES数据库文件类型
* @Author: zhaoyue
* @Create: 2022-08-05
*/
@Document(indexName = "gene_snp_data")
// get set方法注解
@Getter
@Setter
public class GeneSnpData {
/**
* ID
*/
@Id
private String id;
/**
* 文件ID
*/
@Field(type = FieldType.Keyword)
private String file_id;
/**
* 位置
*/
@Field(type = FieldType.Long)
private Long position;
/**
* 创建时间
*/
@Field(type = FieldType.Date,
format = DateFormat.date_hour_minute_second)
@JsonFormat(shape = JsonFormat.Shape.STRING, pattern = "yyyy-MM-dd'T'HH:mm:ss",timezone="UTC-8")
private Date create_time;
/**
* 开始时间(用于时间范围条件查询)
*/
private Date startTime;
/**
* 结束时间(用于时间范围条件查询)
*/
private Date endTime;
/**
* 起始位置(用于范围条件查询)
*/
private Long beginPosition;
/**
* 终止位置(用于范围条件查询)
*/
private Long endPosition;
}
FieldType是用来标明数据类型,重点讲解三个类型。
- FieldType.Text
这个类型表示数据为字符串, 但是,查询的时候会按照分词的方式进行查询, 相当于 like '%%' - FieldType.Keyword
这个类型也表示数据为字符串, 查询的时候不会进行分词查询, 相当于 == - FieldType.Date
这个类型表示时间类型, 它会自动将你的时间转为UTC时间(相当于-8小时)存储, 要想存储准确时间需要自己加8小时
Elasticsearch工具类
import com.alibaba.fastjson.JSONObject;
import com.wiserice.gene.domain.GeneSnpData;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.admin.indices.alias.Alias;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.core.CountRequest;
import org.elasticsearch.client.core.CountResponse;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.reindex.BulkByScrollResponse;
import org.elasticsearch.index.reindex.DeleteByQueryRequest;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.collapse.CollapseBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.annotation.Transient;
import org.springframework.data.domain.Pageable;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.*;
/**
* @Program: bioinfo
* @Description: elasticsearch工具类
* @Author: zhaoyue
* @Create: 2022-08-08
*/
@Component
public class ElasticsearchUtils {
// 日志输出
private Logger logger = LoggerFactory.getLogger(ElasticsearchUtils.class);
// 依赖注入
@Resource
RestHighLevelClient restHighLevelClient;
/**
* 创建索引库
*
* @param index 索引库名称
* @param max 单次查询最大量
*/
public void createIndex(String index, Long max) {
CreateIndexRequest request = new CreateIndexRequest(index);
// 设置setting (setting定义可参照 IndexMetadata 类)
request.settings(Settings.builder()
// 分片数
.put("index.number_of_shards", 10)
// 副本数
.put("index.number_of_replicas", 1)
// 设置单次查询最大量, 默认10000
.put("index.max_result_window", max == 0 ? 10000 : max)
// 将数据路由到一组shard上面,设置index.routing_partition_size,默认值是1,即只路由到1个shard
// 可以将其设置为大于1且小于索引shard总数的某个值,就可以路由到一组shard了。值越大,数据越均匀
.put("index.routing_partition_size", 2)
);
try {
// 设置mapping参数
request.mapping(XContentFactory.jsonBuilder()
// 强制设置当前索引的(增删改查)操作时, 必须添加路由参数, 否则报错
// 根据业务场景可以选择注释掉
.startObject()
.startObject("_routing")
.field("required", true)
.endObject()
.endObject());
} catch (IOException e) {
e.printStackTrace();
}
request.alias(new Alias(index + "_alias"));// 设置别名
request.setTimeout(TimeValue.timeValueMinutes(2));// 设置创建索引超时时长2分钟
// 同步请求
try {
CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);
// 处理响应
boolean acknowledged = createIndexResponse.isAcknowledged();
boolean shardsAcknowledged = createIndexResponse.isShardsAcknowledged();
logger.info(" = = = >>> 索引库{}创建 : {}", index, acknowledged + "-" + shardsAcknowledged);
} catch (IOException e) {
logger.info(" = = = >>> 索引{}创建异常:" + e.getMessage(), index);
}
}
/**
* 删除索引库
*/
public boolean deleteIndex(String index) {
boolean acknowledged = isExistIndex(index);
if(acknowledged) {
// 创建删除索引请求
DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest(index);
// 执行
AcknowledgedResponse delete = null;
try {
delete = restHighLevelClient.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
// 得到相应
acknowledged = delete.isAcknowledged();
}
logger.info(" = = = >>> 删除索引库-{} : {}", index, acknowledged);
return acknowledged;
}
/**
* 判断索引库是否存在
*/
public boolean isExistIndex(String index) {
GetIndexRequest request = new GetIndexRequest(index);
//参数
request.local(false);//从主节点返回本地索引信息状态
request.humanReadable(true);//以适合人类的格式返回
request.includeDefaults(false);//是否返回每个索引的所有默认配置
boolean exists = false;
try {
exists = restHighLevelClient.indices().exists(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
logger.info(" = = = >>> 判断索引库 {} 是否存在 : {}", index, exists);
return exists;
}
/**
* 关闭索引
*/
public void closeIndex(String index) {
CloseIndexRequest request=new CloseIndexRequest(index);
AcknowledgedResponse close = null;
try {
close = restHighLevelClient.indices().close(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
boolean acknowledged = close.isAcknowledged();
logger.info(" = = = >>> 关闭索引库-{} : {}", index, acknowledged);
}
/**
* 开启索引
*/
public void testOpenIndex(String index) {
OpenIndexRequest request=new OpenIndexRequest(index);
OpenIndexResponse open = null;
try {
open = restHighLevelClient.indices().open(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
boolean acknowledged = open.isAcknowledged();
logger.info(" = = = >>> 开启索引库-{} : {}", index, acknowledged);
}
/**
* 查询索引库条数
*
* @param index 索引库名称
* @param boolQueryBuilder 条件对象
*/
public Long searchCount(String index, BoolQueryBuilder boolQueryBuilder) {
long start = System.currentTimeMillis();
logger.info(" = = = >>> 查询索引库 {} 总条数", index);
CountRequest countRequest = new CountRequest();
countRequest.indices(index);
if (boolQueryBuilder != null) {
countRequest.query(boolQueryBuilder);
}
CountResponse count = null;
try {
count = restHighLevelClient.count(countRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
logger.info(" = = = >>> 索引库 {} 的总条数: {} , 耗时: {}/秒", index,
count.getCount(), (System.currentTimeMillis() - start) / 1000);
return count.getCount();
}
/**
* 查询数据
*
* @param index 索引库名称
* @param pageable 分页对象
* @param routing 路由
* @param clazz 转换对象类型
* @param boolQueryBuilder 条件对象
* @param showField 指定查询字段 可传空数组
* @param shieldField 指定排除字段 可传空数组
* @param orderField 排序字段
* @param bl true.正序 false.倒序
* @param <T>
* @return
*/
public <T> List searchData(String index, Pageable pageable, String routing, Class<T> clazz, BoolQueryBuilder boolQueryBuilder,
String[] showField, String[] shieldField, String orderField, boolean bl) {
long start_time = System.currentTimeMillis();
logger.info(" = = = >>> 索引库 {} 条件查询", index);
List<T> list = new ArrayList<>();
//1.创建请求
SearchRequest request = new SearchRequest();
// 指定索引库
request.indices(index);
// 指定路由查询数据
if(routing != null && routing != ""){
request.routing(routing);
}
// 创建请求参数
SearchSourceBuilder ssb = new SearchSourceBuilder();
if (pageable != null && !pageable.isUnpaged()) {
// 开始页数
Integer start = (pageable.getPageNumber() == 0 ? 1 : pageable.getPageNumber()) - 1;
// 这里要自己计算出起始页是第多少条,from是指从多少条开始截取(包含第from这条)
int from = start * pageable.getPageSize();
// 查全部数据
ssb.from(from).size(pageable.getPageSize())
// 如果不写或者写false时, 当总记录数超过10000时会返回总数10000
// 配置为true就会返回真实条数(前提是真实条数小于创建索引时的最大值, 否则返回创建索引时的最大值)
.trackTotalHits(true);
} else {
ssb.size(20000).trackTotalHits(true);
}
//排序
if (StringUtils.isNotEmpty(orderField)) {
if (bl) {
// 正序
ssb.sort(orderField, SortOrder.ASC);
} else {
// 倒序
ssb.sort(orderField, SortOrder.DESC);
}
}
// 指定查询字段和排除字段
ssb.fetchSource(showField, shieldField);
// 添加判断条件
ssb.query(boolQueryBuilder);
// 请求的查询json
request.source(ssb);
// 开始查询数据
SearchResponse response = null;
try {
// 获取查询的返回值
response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
// 返回值
SearchHit[] searchHits = response.getHits().getHits();
for (SearchHit hit : searchHits) {
// 获取每个JSON对象字符串
String jsonStr = hit.getSourceAsString();
list.add(JSONObject.parseObject(jsonStr, clazz));
}
logger.info(" = = = >>> 索引库 {} 条件查询的条数: {} , 耗时: {}/秒", index,
list.size(), (System.currentTimeMillis() - start_time) / 1000);
return list;
}
/**
* 分组排序
*
* @param index 索引库名称
* @param routing 路由
* @param clazz 转换对象类型
* @param boolQueryBuilder 条件对象
* @param showField 指定查询字段 可传空数组
* @param shieldField 指定查询字段 可传空数组
* @param groupField 分组字段 字段类型必须是字符串
* @param sortField 排序字段
* @param bl 排序规则 true.ack false.desc
* @param <T>
* @return
*/
public <T> List searchDataByGroup(String index, String routing, Class<T> clazz, BoolQueryBuilder boolQueryBuilder,
String[] showField, String[] shieldField, String groupField, String sortField,
boolean bl) {
long start_time = System.currentTimeMillis();
logger.info(" = = = >>> 索引库 {} 条件查询", index);
List<T> list = new ArrayList<>();
//1.创建请求
SearchRequest request = new SearchRequest();
// 指定索引库
request.indices(index);
// 指定路由查询数据
if(routing != null && routing != ""){
request.routing(routing);
}
// 创建请求参数
SearchSourceBuilder ssb = new SearchSourceBuilder();
// 指定查询字段和排除字段
ssb.fetchSource(showField, shieldField);
// 添加判断条件
ssb.query(boolQueryBuilder);
// 分组排序
TermsAggregationBuilder partJobAggBuilder = AggregationBuilders.terms("terms")
.field(groupField + ".keyword");
if (bl) {
// 从小到大
partJobAggBuilder.subAggregation(AggregationBuilders.min("min").field(sortField));
} else {
// 从大到小
partJobAggBuilder.subAggregation(AggregationBuilders.max("max").field(sortField));
}
ssb.aggregation(partJobAggBuilder);
// 请求的查询json
request.source(ssb);
// 开始查询数据
SearchResponse response = null;
try {
response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
// 返回值
SearchHit[] searchHits = response.getHits().getHits();
for (SearchHit hit : searchHits) {
// 获取每个JSON对象字符串
String jsonStr = hit.getSourceAsString();
list.add(JSONObject.parseObject(jsonStr, clazz));
}
logger.info(" = = = >>> 索引库 {} 分组排序查询的条数: {} , 耗时: {}/秒", index,
list.size(), (System.currentTimeMillis() - start_time) / 1000);
return list;
}
/**
* 分组计数
*
* @param index 索引库名称
* @param boolQueryBuilder 条件对象
* @param groupField 分组字段 字段类型必须是字符串
* @return
*/
public JSONObject searchDataByGroupNum(String index, BoolQueryBuilder boolQueryBuilder, String groupField){
long start_time = System.currentTimeMillis();
logger.info(" = = = >>> 索引库 {} 条件查询", index);
//1.创建请求
SearchRequest request = new SearchRequest();
// 指定索引库
request.indices(index);
// 创建请求参数
SearchSourceBuilder ssb = new SearchSourceBuilder();
// 添加判断条件
ssb.query(boolQueryBuilder);
//根据单个字段进行分组统计,统计出的列别名叫sum
TermsAggregationBuilder termsBuilder = AggregationBuilders.terms("sum").field(groupField+".keyword");
ssb.aggregation(termsBuilder);
// 请求的查询json
request.source(ssb);
// 开始查询数据
SearchResponse response = null;
try {
response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
// 得到这个分组的数据集合
Terms terms = response.getAggregations().get("sum");
JSONObject json = new JSONObject();
for (int i = 0; i < terms.getBuckets().size(); i++) {
String key = terms.getBuckets().get(i).getKey().toString(); //分组后的值
Long sum = terms.getBuckets().get(i).getDocCount();// 值的数量
json.put(key, sum);
logger.info(" = = = >>> key:{} count:{}", terms.getBuckets().get(i).getKey(), terms.getBuckets().get(i).getDocCount());
}
logger.info(" = = = >>> 索引库 {} 分组计数查询的条数: {} , 耗时: {}/秒", index,
json.size(), (System.currentTimeMillis() - start_time) / 1000);
return json;
}
/**
* 数据去重
*
* @param index 索引库的名称
* @param clazz 转换对象类型
* @param boolQueryBuilder 条件对象
* @param showField 指定查询字段 可传空数组
* @param shieldField 指定查询字段 可传空数组
* @param distinctField 去重字段
* @param <T>
* @return
*/
public <T> List searchDataByDistinct(String index, Class<T> clazz, BoolQueryBuilder boolQueryBuilder,
String[] showField, String[] shieldField, String distinctField) {
long start_time = System.currentTimeMillis();
logger.info(" = = = >>> 索引库 {} 条件查询", index);
List<T> list = new ArrayList<>();
//1.创建请求
SearchRequest request = new SearchRequest();
// 指定索引库
request.indices(index);
// 创建请求参数
SearchSourceBuilder ssb = new SearchSourceBuilder();
// 指定查询字段和排除字段
ssb.fetchSource(showField, shieldField);
// 添加判断条件
ssb.query(boolQueryBuilder);
// 去重
ssb.collapse(new CollapseBuilder(distinctField)).trackTotalHits(true);
// 请求的查询json
request.source(ssb);
// 开始查询数据
SearchResponse response = null;
try {
response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
// 返回值
SearchHit[] searchHits = response.getHits().getHits();
for (SearchHit hit : searchHits) {
// 获取每个JSON对象字符串
String jsonStr = hit.getSourceAsString();
list.add(JSONObject.parseObject(jsonStr, clazz));
}
logger.info(" = = = >>> 索引库 {} 去重查询的条数: {} , 耗时: {}/秒", index,
list.size(), (System.currentTimeMillis() - start_time) / 1000);
return list;
}
/**
* 保存、修改数据(固定字段对象)
*
* @param fileEntity 实体对象
* @param index 索引库名称
* @param routing 路由
* @throws IOException
*/
@Transient
public void saveDataByObject(GeneSnpData fileEntity, String index, String routing) {
long start = System.currentTimeMillis();
logger.info(" = = = >>> 索引库 {} 数据存储", index);
// 当ID重复时, 新数据就会覆盖掉旧数据, 也就是修改
// 设置ID主键
IndexRequest request = new IndexRequest(index).id(fileEntity.getId().toString());
// 指定路由保存数据
if(routing != null && routing != ""){
request.routing(routing);
}
// 传入JSON字符串
request.source(JSONObject.toJSONString(fileEntity), XContentType.JSON);
IndexResponse response = null;
try {
response = restHighLevelClient.index(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
logger.info(" = = = >>> 索引库 {} 存储耗时: {}/秒", index,
(System.currentTimeMillis() - start) / 1000);
}
/**
* 保存、修改数据(不固定字段对象)
*
* @param map 数据 ( id:必填字段 )
* @param index 索引库名称
* @param routing 路由
* @throws IOException
*/
@Transient
public void saveData(Map<String, Object> map, String index, String routing) {
long start = System.currentTimeMillis();
logger.info(" = = = >>> 索引库 {} 数据存储", index);
// 当ID重复时, 新数据就会覆盖掉旧数据, 也就是修改
// 设置ID主键
IndexRequest request = new IndexRequest(index).id(map.get("id").toString());
// 指定路由保存数据
if(routing != null && routing != ""){
request.routing(routing);
}
// 传入JSON字符串
request.source(JSONObject.toJSONString(map), XContentType.JSON);
IndexResponse response = null;
try {
response = restHighLevelClient.index(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
logger.info(" = = = >>> 索引库 {} 存储耗时: {}/秒", index,
(System.currentTimeMillis() - start) / 1000);
}
/**
* 删除索引库内所有数据
*
* @param indices 索引库名称
* @throws IOException
* @return
*/
public Long deleteAllDataByIndices(String indices) {
long start = System.currentTimeMillis();
logger.info(" = = = >>> 索引库 {} 删除全部数据", indices);
DeleteByQueryRequest request = new DeleteByQueryRequest(indices);
// 异常继续执行
request.setConflicts("proceed");
request.setQuery(new BoolQueryBuilder().filter(QueryBuilders.boolQuery()));
BulkByScrollResponse response = null;
try {
response = restHighLevelClient.deleteByQuery(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
logger.info(" = = = >>> 索引库 {} 删除全部数据, 条数: {}, 耗时: {}/秒", indices,
response.getDeleted(), (System.currentTimeMillis() - start) / 1000);
return response.getDeleted();
}
/**
* 条件删除数据
*
* @param indices 索引库名称
* @param routing 路由
* @param boolQueryBuilder 条件对象
* @throws IOException
* @return
*/
public Long deleteDataByCondition(String indices, String routing, BoolQueryBuilder boolQueryBuilder) {
long start = System.currentTimeMillis();
logger.info(" = = = >>> 索引库 {} 条件删除", indices);
long deleted = 0L;
DeleteByQueryRequest request = new DeleteByQueryRequest(indices);
if(routing != null && routing != ""){
request.setRouting(routing);
}
// 异常继续执行
request.setConflicts("proceed");
request.setQuery(boolQueryBuilder);
try {
BulkByScrollResponse response = restHighLevelClient.deleteByQuery(request, RequestOptions.DEFAULT);
deleted = response.getDeleted();
} catch (IOException e) {
e.printStackTrace();
}
logger.info(" = = = >>> 索引库 {} 条件删除, 条数: {}, 耗时: {}/秒", indices,
deleted, (System.currentTimeMillis() - start) / 1000);
return deleted;
}
/**
* 自定义判断条件(GeneSnpData 固定字段对象)
*
* @return
*/
public BoolQueryBuilder judgeConditionByGeneSnpData(GeneSnpData fileEntity) {
logger.info(" = = = >>> 拼接 gene_snp_data 查询条件");
// 查询条件对象
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
/**
* ID
*/
if (StringUtils.isNotBlank(fileEntity.getId())) {
boolQueryBuilder.filter(QueryBuilders.matchQuery("id.keyword", fileEntity.getId()));
}
/**
* 文件ID
*/
if (StringUtils.isNotBlank(fileEntity.getFile_id())) {
boolQueryBuilder.filter(QueryBuilders.matchQuery("file_id.keyword", fileEntity.getFile_id()));
}
/**
* 所属染色体 模糊查询
*/
if (StringUtils.isNotBlank(geneContrastResult.getChr_ind())) {
boolQueryBuilder.filter(QueryBuilders.wildcardQuery("chr_ind.keyword", "*"+geneContrastResult.getChr_ind()+"*"));
}
/**
* 位置
*/
if (fileEntity.getChr_position() != null) {
boolQueryBuilder.filter(QueryBuilders.matchQuery("position", fileEntity.getPosition()));
}
/**
* 位置 范围查询条件
*/
if (fileEntity.getBeginPosition() != null && fileEntity.getEndPosition() != null) {
boolQueryBuilder.filter(QueryBuilders
.rangeQuery("position")
// 大于等于 开始位置
.gte(fileEntity.getBeginPosition())
// 小于等于 结束位置
.lte(fileEntity.getEndPosition()));
}
/**
* 创建时间(时间范围检索)
*/
if (fileEntity.getStartTime() != null
&& fileEntity.getEndTime() != null) {
boolQueryBuilder.filter(QueryBuilders
//传入时间,格式 2020-01-02T03:17:37.638Z
.rangeQuery("create_time")
//因为是FieldType.Date修饰的时间会转为UTC(-8小时), 所以需要+8小时 getUTCTime()函数
// 大于等于 开始时间
.from(getUTCTime(fileEntity.getStartTime()))
// 小于等于 结束时间
.to(getUTCTime(fileEntity.getEndTime())));
}
logger.info(" = = = >>> gene_snp_data 查询条件: {}", boolQueryBuilder.toString()
.replace(" ", "").replace("\n", ""));
return boolQueryBuilder;
}
/**
* 自定义判断条件(不固定字段对象)
*
* @return
*/
public BoolQueryBuilder judgeConditionByMap(Map<String, Object> map) {
logger.info(" = = = >>> 拼接 map 查询条件");
// 查询条件对象
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
/**
* ID
*/
if (map.get("id") != null && StringUtils.isNotBlank(map.get("id").toString())) {
boolQueryBuilder.filter(QueryBuilders.matchQuery("id.keyword", map.get("id").toString()));
}
/**
* 文件ID
*/
if (map.get("file_id") != null && StringUtils.isNotBlank(map.get("file_id").toString())) {
boolQueryBuilder.filter(QueryBuilders.matchQuery("file_id.keyword", map.get("file_id").toString()));
}
/**
* 所属染色体 模糊查询
*/
if (StringUtils.isNotBlank(geneContrastResult.getChr_ind())) {
boolQueryBuilder.filter(QueryBuilders.wildcardQuery("chr_ind.keyword", "*"+geneContrastResult.getChr_ind()+"*"));
}
/**
* 创建时间(时间范围检索)
*/
if (map.get("startTime") != null
&& map.get("endTime") != null) {
boolQueryBuilder.filter(QueryBuilders
//传入时间,格式 2020-01-02T03:17:37.638Z
.rangeQuery("create_time")
.from(getUTCTime((Date) map.get("startTime")))
.to(getUTCTime((Date) map.get("endTime"))));
}
logger.info(" = = = >>> map 查询条件: {}", boolQueryBuilder.toString()
.replace(" ", "").replace("\n", ""));
return boolQueryBuilder;
}
/**
* 时间+8小时
*
* @param date 时间
*/
public Date getUTCTime(Date date) {
Calendar cal = Calendar.getInstance();
cal.setTime(date);
cal.add(Calendar.HOUR, 8);
return cal.getTime();
}
}
重点
Elasticsearch是一个分布式的搜索引擎,每个索引都可以有多个分片,用来将一份大索引的数据切分成多个小的物理索引,解决单个索引数据量过大导致的性能问题,另外每个shard还可以配置多个副本,来保证高可靠以及更好的抗并发的能力;将一个索引切分成多个shard,大多数时候是没有问题的,但是在es里面如果索引被切分成多个shard,在使用group进行聚合时,可能会出现问题,这个在官网文档里,描述也非常清楚:https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_shard_size_3
例如:分组聚合排序时(group order),如果有多个shard就会出现问题
举例:我创建一个索引有三个shard。
首先对索引存入一批数据
# 包含类型LG01、LG02、LG03、LG04的数据
{"type":"LG01","number":21896}
{"type":"LG01","number":144166}
{"type":"LG04","number":512086}
{"type":"LG01","number":448631}
{"type":"LG01","number":638216}
{"type":"LG01","number":1226413}
{"type":"LG02","number":3241037}
{"type":"LG01","number":390360}
{"type":"LG01","number":484768}
{"type":"LG01","number":821269}
{"type":"LG01","number":46390360}
{"type":"LG01","number":46484768}
{"type":"LG03","number":270662}
{"type":"LG01","number":46637427}
{"type":"LG01","number":46912605}
{"type":"LG03","number":10863477}
{"type":"LG02","number":388297}
{"type":"LG02","number":437768}
{"type":"LG02","number":753599}
{"type":"LG02","number":988960}
{"type":"LG02","number":1039091}
{"type":"LG02","number":4053373}
{"type":"LG04","number":18844340}
{"type":"LG01","number":637427}
{"type":"LG02","number":5877374}
{"type":"LG02","number":6189273}
{"type":"LG04","number":367479}
{"type":"LG02","number":989001}
{"type":"LG02","number":1332246}
{"type":"LG02","number":2253486}
{"type":"LG02","number":2465302}
{"type":"LG02","number":4053384}
{"type":"LG03","number":270664}
{"type":"LG03","number":311950}
{"type":"LG02","number":316947}
{"type":"LG03","number":422620}
{"type":"LG03","number":572404}
{"type":"LG03","number":10730243}
{"type":"LG04","number":99338}
{"type":"LG03","number":11395636}
{"type":"LG03","number":11571674}
{"type":"LG03","number":11571799}
{"type":"LG04","number":18844339}
{"type":"LG03","number":4820414}
{"type":"LG03","number":4897138}
{"type":"LG01","number":46821269}
{"type":"LG01","number":912605}
{"type":"LG03","number":5099274}
{"type":"LG03","number":5131482}
{"type":"LG03","number":5459684}
{"type":"LG04","number":18815419}
{"type":"LG04","number":18951531}
{"type":"LG04","number":19293490}
{"type":"LG04","number":25252}
{"type":"LG04","number":277971}
{"type":"LG04","number":958033}
{"type":"LG04","number":2513320}
{"type":"LG03","number":11331084}
{"type":"LG04","number":2926072}
{"type":"LG04","number":3047003}
{"type":"LG04","number":3187459}
进行分组聚合排序后的数据
{"type":"LG01","number":21896}
{"type":"LG01","number":144166}
{"type":"LG01","number":448631}
{"type":"LG01","number":638216}
{"type":"LG01","number":1226413}
{"type":"LG02","number":1039091}
{"type":"LG02","number":3241037}
{"type":"LG02","number":4053373}
{"type":"LG02","number":5877374}
{"type":"LG02","number":6189273}
{"type":"LG03","number":10730243}
{"type":"LG03","number":10863477}
{"type":"LG03","number":11331084}
{"type":"LG03","number":11395636}
{"type":"LG03","number":11571674}
{"type":"LG03","number":11571799}
{"type":"LG04","number":18815419}
{"type":"LG04","number":18844339}
{"type":"LG04","number":18844340}
{"type":"LG04","number":18951531}
{"type":"LG04","number":19293490}
# ----------为了方便观看----------
{"type":"LG01","number":46390360}
{"type":"LG01","number":46484768}
{"type":"LG01","number":46637427}
{"type":"LG01","number":46821269}
{"type":"LG01","number":46912605}
{"type":"LG02","number":316947}
{"type":"LG02","number":388297}
{"type":"LG02","number":437768}
{"type":"LG02","number":753599}
{"type":"LG02","number":988960}
{"type":"LG03","number":270662}
{"type":"LG03","number":270664}
{"type":"LG03","number":311950}
{"type":"LG03","number":422620}
{"type":"LG03","number":572404}
{"type":"LG04","number":25252}
{"type":"LG04","number":99338}
{"type":"LG04","number":277971}
{"type":"LG04","number":367479}
{"type":"LG04","number":512086}
# ----------为了方便观看----------
{"type":"LG01","number":390360}
{"type":"LG01","number":484768}
{"type":"LG01","number":637427}
{"type":"LG01","number":821269}
{"type":"LG01","number":912605}
{"type":"LG02","number":989001}
{"type":"LG02","number":1332246}
{"type":"LG02","number":2253486}
{"type":"LG02","number":2465302}
{"type":"LG02","number":4053384}
{"type":"LG03","number":4820414}
{"type":"LG03","number":4897138}
{"type":"LG03","number":5099274}
{"type":"LG03","number":5131482}
{"type":"LG03","number":5459684}
{"type":"LG04","number":958033}
{"type":"LG04","number":2513320}
{"type":"LG04","number":2926072}
{"type":"LG04","number":3047003}
{"type":"LG04","number":3187459}
数据自动分成三份,每份划分组之后又单独进行排序,而我们想要的结果应该是
{"type":"LG01","number":21896}
{"type":"LG01","number":144166}
{"type":"LG01","number":390360}
{"type":"LG01","number":448631}
{"type":"LG01","number":484768}
{"type":"LG01","number":637427}
{"type":"LG01","number":638216}
{"type":"LG01","number":821269}
{"type":"LG01","number":912605}
{"type":"LG01","number":1226413}
{"type":"LG01","number":46390360}
{"type":"LG01","number":46484768}
{"type":"LG01","number":46637427}
{"type":"LG01","number":46821269}
{"type":"LG01","number":46912605}
# ----------为了方便观看----------
{"type":"LG02","number":316947}
{"type":"LG02","number":388297}
{"type":"LG02","number":437768}
{"type":"LG02","number":753599}
{"type":"LG02","number":988960}
{"type":"LG02","number":989001}
{"type":"LG02","number":1039091}
{"type":"LG02","number":1332246}
{"type":"LG02","number":2253486}
{"type":"LG02","number":2465302}
{"type":"LG02","number":3241037}
{"type":"LG02","number":4053373}
{"type":"LG02","number":4053384}
{"type":"LG02","number":5877374}
{"type":"LG02","number":6189273}
# ----------为了方便观看----------
{"type":"LG03","number":270662}
{"type":"LG03","number":270664}
{"type":"LG03","number":311950}
{"type":"LG03","number":422620}
{"type":"LG03","number":572404}
{"type":"LG03","number":4820414}
{"type":"LG03","number":4897138}
{"type":"LG03","number":5099274}
{"type":"LG03","number":5131482}
{"type":"LG03","number":5459684}
{"type":"LG03","number":10730243}
{"type":"LG03","number":10863477}
{"type":"LG03","number":11331084}
{"type":"LG03","number":11395636}
{"type":"LG03","number":11571674}
{"type":"LG03","number":11571799}
# ----------为了方便观看----------
{"type":"LG04","number":25252}
{"type":"LG04","number":99338}
{"type":"LG04","number":277971}
{"type":"LG04","number":367479}
{"type":"LG04","number":512086}
{"type":"LG04","number":958033}
{"type":"LG04","number":2513320}
{"type":"LG04","number":2926072}
{"type":"LG04","number":3047003}
{"type":"LG04","number":3187459}
{"type":"LG04","number":18815419}
{"type":"LG04","number":18844339}
{"type":"LG04","number":18844340}
{"type":"LG04","number":18951531}
{"type":"LG04","number":19293490}
那么有没有方法避免这种不精确的统计的呢?
es官网文档里面提到2种解决方法。
1. 聚合操作在单个shard时是精确的,也就是说我们索引的数据全部插入到一个shard的时候 它的聚合统计结果是准确的。
2. 在索引数据的时候,使用route路由字段,将所有聚合的数据分布到同一个shard即可,这样再聚合时也是精确的。
关于路由可以查看我的这篇文章:https://www.cnblogs.com/nhdlb/p/16738914.html
第一种适合数据量不大的场景下,直接把数据放在一份索引里面进行操作。
第二种适合数据量较大的场景下,通过业务字段将相同属性的数据路由在同一个shard里面,在进行操作。
文章中已加入路由的设置( 创建与查询的条件 ),可根据业务场景进行使用。
测试函数
/**
* 单元测试
*/
@RunWith(SpringRunner.class)
@SpringBootTest
public class ES_Dome {
// 依赖注入 工具类
@Autowired
private ElasticsearchUtils elasticsearchUtils;
/**
* 创建索引
*/
@Test
public void createIndex(){
String index = "test_index";
Long max = 200000;
elasticsearchUtils.createIndex(index, max);
}
/**
* 新增、修改数据
*/
@Test
public void save_updateData(){
String index = "test_index";
// 自定义测试的文件数据
for (int i = 0; i < 10; i++) {
Map<String, Object> map = new HashMap<>();
// 当ID一致时, 新数据会覆盖旧数据, 也就是修改
map.put("id", UUID.randomUUID().toString());
map.put("file_id", 80+i);
map.put("crop_id", "1");
map.put("position", 10606360L + i);
map.put("create_user_id", "6");
map.put("create_time", new Date());
try {
elasticsearchUtils.saveData(map, index, null);
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 查询数据
*/
@Test
public void selectData(){
String index = "test_index";
Pageable pageable = PageRequest.of(1,1);
List<GeneSnpData> list = elasticsearchUtils.searchData(index, pageable, null, GeneSnpData.class,
elasticsearchUtils.judgeConditionByGeneSnpData(geneSnpData),
new String[]{}, new String[]{}, "position", true);
System.out.println(" = = = >>> list", JSONArray.toJSONString(list));
}
}