Spring Boot + Elasticsearch 实现索引的日常维护
全文检索的应用越来越广泛,几乎成了互联网应用的标配,商品搜索、日志分析、历史数据归档等等,各种场景都会涉及到大批量的数据,在全文检索方面,方案无外乎Lucene、Solr、Elasticsearch三种应用的较为广泛。es、solr的底层都依托于Lucene,但es比solr学习成本更低,由于其提供的RESTful API简单快捷,对互联网应用开发而言更是如虎添翼。
下面结合以实际案例,通过Java API的形式操作es数据集。
框架选型基础是Spring Boot + Spring-data-elasticsearch + elasticsearch。
使用ElasticsearchRepository的形式来连接、维护ES数据集,ElasticsearchRepository中提供了简单的操作索引数据的方法集合,继承自ElasticsearchCrudRepository,涵盖了CRUD、排序、分页等常见的基本操作功能。
@NoRepositoryBean
public
interface
ElasticsearchRepository
<
T
,
ID
extends
Serializable
>
extends
ElasticsearchCrudRepository
<
T
,
ID
>
{
<
S
extends
T
>
S index
(
S var1
);
Iterable
<
T
>
search
(
QueryBuilder
var1
);
Page
<
T
>
search
(
QueryBuilder
var1
,
Pageable
var2
);
Page
<
T
>
search
(
SearchQuery
var1
);
Page
<
T
>
searchSimilar
(
T var1
,
String
[]
var2
,
Pageable
var3
);
void
refresh
();
Class
<
T
>
getEntityClass
();
}
从基本的pom配置开始
<project
xmlns
=
"http://maven.apache.org/POM/4.0.0"
xmlns:xsi
=
"http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation
=
"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.esp.index.data
</groupId>
<artifactId>
esp-cube
</artifactId>
<version>
0.0.1-SNAPSHOT
</version>
<parent>
<groupId>
org.springframework.boot
</groupId>
<artifactId>
spring-boot-starter-parent
</artifactId>
<version>
1.5.2.RELEASE
</version>
<relativePath
/>
<!-- lookup parent from repository -->
</parent>
<properties>
<project.build.sourceEncoding>
UTF-8
</project.build.sourceEncoding>
<project.reporting.outputEncoding>
UTF-8
</project.reporting.outputEncoding>
<java.version>
1.7
</java.version>
</properties>
<dependencies>
<dependency>
<groupId>
org.springframework.boot
</groupId>
<artifactId>
spring-boot-starter-jdbc
</artifactId>
<exclusions>
<exclusion>
<groupId>
org.apache.tomcat
</groupId>
<artifactId>
tomcat-jdbc
</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>
org.springframework.boot
</groupId>
<artifactId>
spring-boot-starter-data-elasticsearch
</artifactId>
</dependency>
<dependency>
<groupId>
org.springframework.boot
</groupId>
<artifactId>
spring-boot-starter-web
</artifactId>
<exclusions>
<exclusion>
<artifactId>
log4j-over-slf4j
</artifactId>
<groupId>
org.slf4j
</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>
org.springframework.boot
</groupId>
<artifactId>
spring-boot-starter
</artifactId>
<exclusions>
<exclusion>
<groupId>
org.springframework.boot
</groupId>
<artifactId>
spring-boot-starter-logging
</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>
org.springframework.boot
</groupId>
<artifactId>
spring-boot-starter-test
</artifactId>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.springframework.boot
</groupId>
<artifactId>
spring-boot-starter-log4j
</artifactId>
<version>
1.3.1.RELEASE
</version>
</dependency>
</dependencies>
<build>
<finalName>
esp-cube
</finalName>
<plugins>
<plugin>
<groupId>
org.springframework.boot
</groupId>
<artifactId>
spring-boot-maven-plugin
</artifactId>
</plugin>
</plugins>
</build>
</project>
编写自己的Resository操作类
public
interface
ArticleSearchRepository
extends
ElasticsearchRepository
<
Article
,
Long
>{
List
<
Article
>
findByAbstractsAndContent
(
String
abstracts
,
String
content
);
}
其中Article为是与elasticsearch连接的实体类,类似于PO的概念,其中指定的索引名称、类型名称、及分片、副本数量等要素。
@Data
@Document
(
indexName
=
"article_index"
,
type
=
"article"
,
shards
=
5
,
replicas
=
1
,
indexStoreType
=
"fs"
,
refreshInterval
=
"-1"
)
public
class
Article
implements
Serializable
{
/**
* serialVersionUID:
*
* @since JDK 1.6
*/
private
static
final
long
serialVersionUID
=
1L
;
@Id
private
Long
id
;
/** 标题 */
private
String
title
;
/** 摘要 */
private
String
abstracts
;
/** 内容 */
private
String
content
;
/** 发表时间 */
@Field
(
format
=
DateFormat
.
date_time
,
index
=
FieldIndex
.
no
,
store
=
true
,
type
=
FieldType
.
Object
)
private
Date
postTime
;
/** 点击率 */
private
Long
clickCount
;
}
我们需要定义域的实体和一个Spring data的基本的CRUD支持库类。用id注释定义标识符字段,如果你没有指定ID字段,Elasticsearch不能索引你的文件。同时需要指定索引名称类型,@Document注解也有助于我们设置分片和副本数量。
接口类
public
interface
ArticleService
{
/**
* saveArticle: 写入<br/>
*
* @author guooo Date:2017年9月27日下午3:20:06
* @param article
* @return
* @since JDK 1.6
*/
long
saveArticle
(
Article
article
);
/**
* deleteArticle: 删除,并未真正删除,只是查询不到<br/>
*
* @author guooo Date:2017年9月27日下午3:20:08
* @param id
* @since JDK 1.6
*/
void
deleteArticle
(
long
id
);
/**
* findArticle: <br/>
*
* @author guooo Date:2017年9月27日下午3:20:10
* @param id
* @return
* @since JDK 1.6
*/
Article
findArticle
(
long
id
);
/**
* findArticlePageable: <br/>
*
* @author guooo Date:2017年9月27日下午3:20:13
* @return
* @since JDK 1.6
*/
List
<
Article
>
findArticlePageable
();
/**
* findArticleAll: <br/>
*
* @author guooo Date:2017年9月27日下午3:20:15
* @return
* @since JDK 1.6
*/
List
<
Article
>
findArticleAll
();
/**
* findArticleSort: <br/>
*
* @author guooo Date:2017年9月27日下午3:20:18
* @return
* @since JDK 1.6
*/
List
<
Article
>
findArticleSort
();
/**
* search: <br/>
*
* @author guooo Date:2017年9月27日下午3:20:22
* @param content
* @return
* @since JDK 1.6
*/
List
<
Article
>
search
(
String
content
);
/**
* update: es没有修改操作,结合save操作完成<br/>
*
* @author guooo Date:2017年9月27日下午3:20:25
* @param id
* @return
* @since JDK 1.6
*/
long
update
(
long
id
);
}
接口实现
@Service
public
class
ArticleServiceImpl
implements
ArticleService
{
final
int
page
=
0
;
final
int
size
=
10
;
/* 搜索模式 */
String
SCORE_MODE_SUM
=
"sum"
;
// 权重分求和模式
Float
MIN_SCORE
=
10.0F
;
// 由于无相关性的分值默认为 1 ,设置权重分最小值为 10
Pageable
pageable
=
new
PageRequest
(
page
,
size
);
@Autowired
ArticleSearchRepository
repository
;
@Override
public
long
saveArticle
(
Article
article
)
{
Article
result
=
repository
.
save
(
article
);
return
result
.
getId
();
}
@Override
public
void
deleteArticle
(
long
id
)
{
repository
.
delete
(
id
);
}
@Override
public
Article
findArticle
(
long
id
)
{
return
repository
.
findOne
(
id
);
}
@Override
public
List
<
Article
>
findArticlePageable
()
{
return
repository
.
findAll
(
pageable
).
getContent
();
}
@Override
public
List
<
Article
>
findArticleAll
()
{
Iterable
<
Article
>
iterables
=
repository
.
findAll
();
List
<
Article
>
articles
=
new
ArrayList
<>();
for
(
Article
article
:
iterables
)
{
articles
.
add
(
article
);
}
return
articles
;
}
@Override
public
List
<
Article
>
findArticleSort
()
{
List
<
Order
>
orders
=
new
ArrayList
<>();
Order
order
=
new
Order
(
Direction
.
ASC
,
"clickCount"
);
orders
.
add
(
order
);
Sort
sort
=
new
Sort
(
orders
);
Iterable
<
Article
>
iterables
=
repository
.
findAll
(
sort
);
List
<
Article
>
articles
=
new
ArrayList
<>();
for
(
Article
article
:
iterables
)
{
articles
.
add
(
article
);
}
return
articles
;
}
@Override
public
List
<
Article
>
search
(
String
content
)
{
return
repository
.
findByAbstractsAndContent
(
content
,
content
);
}
@Override
public
long
update
(
long
id
)
{
Article
article
=
repository
.
findOne
(
id
);
article
.
setTitle
(
"test"
);
Article
retun
=
repository
.
save
(
article
);
System
.
out
.
println
(
retun
.
getId
()+
"更新的数据"
);
return
retun
.
getId
();
}
}
是不是与JPA、hibernate操作数据集的手法很类似?
controller方法类:
@RestController
@RequestMapping
(
value
=
"/article"
)
public
class
APIArticleController
{
@Autowired
ArticleService
articleService
;
@RequestMapping
(
value
=
"save"
,
method
=
RequestMethod
.
POST
)
public
long
save
()
{
for
(
int
i
=
10000
;
i
<
12000
;
i
++)
{
Article
article
=
new
Article
();
article
.
setClickCount
(
Long
.
valueOf
(
i
+
RandomUtils
.
nextInt
(
23
,
i
)));
article
.
setAbstracts
(
"我的一个测试"
+
i
);
article
.
setContent
(
i
+
"这是第一个测试的内容@spring-data-elasticsearch"
);
article
.
setPostTime
(
new
Date
());
article
.
setId
(
Long
.
valueOf
(
RandomUtils
.
nextLong
(
i
,
i
)));
long
_id
=
articleService
.
saveArticle
(
article
);
System
.
out
.
println
(
_id
);
}
return
23
;
}
@RequestMapping
(
value
=
"delete"
,
method
=
RequestMethod
.
POST
)
public
void
deleteArticle
(
long
id
)
{
articleService
.
deleteArticle
(
id
);
}
@RequestMapping
(
value
=
"findOne"
,
method
=
RequestMethod
.
POST
)
public
Article
findArticle
(
long
id
)
{
return
articleService
.
findArticle
(
id
);
}
@RequestMapping
(
value
=
"findArticlePageable"
,
method
=
RequestMethod
.
POST
)
public
List
<
Article
>
findArticlePageable
()
{
return
articleService
.
findArticlePageable
();
}
@RequestMapping
(
value
=
"findArticleAll"
,
method
=
RequestMethod
.
POST
)
public
List
<
Article
>
findArticleAll
()
{
return
articleService
.
findArticleAll
();
}
@RequestMapping
(
value
=
"findArticleSort"
,
method
=
RequestMethod
.
POST
)
public
List
<
Article
>
findArticleSort
()
{
return
articleService
.
findArticleSort
();
}
@RequestMapping
(
value
=
"search"
,
method
=
RequestMethod
.
POST
)
public
List
<
Article
>
search
(
String
content
)
{
return
articleService
.
search
(
content
);
}
@RequestMapping
(
value
=
"update"
,
method
=
RequestMethod
.
POST
)
public
long
update
(
long
id
)
{
return
articleService
.
update
(
id
);
}
}
Spring Boot的启动类及配置项,这里略过,项目启动后,可能过controller暴露出来的方法进行Article数据索引的CRUD操作
成长的乐趣,在于分享!
|
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· Open-Sora 2.0 重磅开源!
· 周边上新:园子的第一款马克杯温暖上架