ElasticSearch入门及使用java api操作ES
分布式的搜索引擎
ELK的一个组成成分
E:ElasticSearch数据搜索和分析的功能
L:Logstach日志搜集系统
K:Kibana数据可视化分析,数据可视化平台
使用示例
假如一个分布式系统有100台服务器,系统出现故障时,需要查看错误日志要一台机器一台机器进行查看,非常麻烦。
但是如果使用了ELK架构,在系统出现运行异常时,直接在Kibana页面就能看到日志情况。如果再根据业务接入一些实时计算模块,还能做实时报警功能。
全文检索
概念:全文检索是指计算机索引程序通过扫描文章中的每一个词,对每一个词建立一个索引,指明该词在文章中出现的位置与次数,当用户进行查询时,检索程序就根据事先建立的索引进行查找,并将查找的结果反馈给用户的检索方式。(类似于字典的目录)
主要分为两种:按字检索与按词检索
按字检索
对于文章中的每一个字都建立索引,检索时将词分解为字的组合
按词检索
对中文中的词,即语义单位建立索引(如乔碧罗殿下,唱跳rap),检索时按词进行检索。
倒排索引
像之前的mysql数据库,一般都是根据前端传来的id,然后去数据库里匹配这个id所对应的内容。
倒排索引之后是根据内容找到分词后对应的id,最后拿着id去查询真正需要的东西。
Lucene
Lucene就是一个Jar包,里面包含了各种建立倒排索引的方法,java开发的时候只需要导入这个jar包就可以开发了。
ES和Lucene的区别
Es的底层就是Lucene,区别就是ES是分布式的。
Lucene难以解决的问题
-
数据越大,存不下来,那就需要多台服务器存数据。但是Lucene不支持分布式的,那就需要安装多个Lucene然后通过代码来合并搜索结果,这样很不好。
-
要考虑安全性,一台服务器挂了,那么上面的数据就消失了。
ES则是分布式集群,每一个节点其实就是Lucene,当用户搜索时,会随机挑一台,然后这台机器自己知道数据在哪,不用我们管这些底层。
ES的优点
-
分布式的功能
-
数据高可用、集群高可用
-
API更简单(基于Lucene,隐藏了Lucene的复杂性,提供简单api)
-
支持的语言很多
-
支持PB级别的数据
-
完成搜索和分析的功能
ES核心概念
-
NRT(near realtime)近实时
-
cluster集群
-
Node节点
-
index索引
-
type类型
-
document文档
-
field字段
-
shard分片
-
replica副本
使用示例
比如一首诗,有诗题、作者、朝代、字数、诗内容等字段,那么首先,我们可以建立一个名叫 Poems 的索引,然后创建一个名叫 Poem 的类型,类型是通过 Mapping 来定义每个字段的类型。
比如诗题、作者、朝代都是 Keyword 类型,诗内容是 Text 类型,而字数是 Integer 类型,最后就是把数据组织成 Json 格式存放进去了。
注意:keyword类型是不会分词的,直接根据字符串内容建立反向索引,Text类型在存入ES的时候,会先分词,然后根据分词后的内容建立反向索引。
使用Java API调用ES
maven项目(不整合springboot)
引入对应依赖
<dependencies>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>7.17.6</version>
</dependency>
<!-- elasticsearch 的客户端 -->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.17.6</version>
</dependency>
<!-- elasticsearch 依赖 2.x 的 log4j -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.19.0</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.19.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.14.0-rc1</version>
</dependency>
<!-- junit 单元测试 -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
</dependency>
</dependencies>
客户端建立连接
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import java.io.IOException;
/**
* @Description 创建ES客户端:完成与ES服务端的连接
* @Author LH
* @Date 2022/10/9 9:24
**/
public class ESTestClient {
创建索引(相当于mysql的创建数据库)
import org.apache.http.HttpHost;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import java.io.IOException;
/**
* @Description 创建索引(相当于mysql的创建数据库)
* @Author LH
* @Date 2022/10/9 9:30
**/
public class ESTestIndexCreate {
查看索引
import org.apache.http.HttpHost;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.client.indices.GetIndexResponse;
import java.io.IOException;
/**
* @Description 查看索引
* @Author LH
* @Date 2022/10/9 10:16
**/
public class ESTestIndexSearch {
删除索引
import org.apache.http.HttpHost;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import java.io.IOException;
/**
* @Description 删除索引
* @Author LH
* @Date 2022/10/9 10:31
**/
public class ESTestIndexDelete {
插入文档(相当于mysql的建表)
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.xcontent.XContentType;
import org.example.es.model.User;
import java.io.IOException;
/**
* @Description 插入文档(相当于mysql的建表)
* @Author LH
* @Date 2022/10/9 10:50
**/
public class ESTestDocInsert {
查询文档
import org.apache.http.HttpHost;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import java.io.IOException;
/**
* @Description 查询文档
* @Author LH
* @Date 2022/10/9 11:19
**/
public class ESTestDocSearch {
修改文档
import org.apache.http.HttpHost;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.xcontent.XContentType;
import java.io.IOException;
/**
* @Description 修改文档
* @Author LH
* @Date 2022/10/9 11:10
**/
public class ESTestDocUpdate {
删除文档
import org.apache.http.HttpHost;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import java.io.IOException;
/**
* @Description 删除文档
* @Author LH
* @Date 2022/10/9 11:27
**/
public class ESTestDocDelete {
批量插入文档
import org.apache.http.HttpHost;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.xcontent.XContentType;
import java.io.IOException;
/**
* @Description 批量插入文档
* @Author LH
* @Date 2022/10/9 13:32
**/
public class ESTestDocInsertBatch {
批量删除文档
import org.apache.http.HttpHost;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import java.io.IOException;
/**
* @Description 批量删除文档
* @Author LH
* @Date 2022/10/9 13:43
**/
public class ESTestDocDeleteBatch {
全量查询
/**
* @Description 全量查询
* @Author LH
* @Date 2022/10/9 13:57
**/
public class ESTestDocAllQuery {
组合条件查询
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
/**
* @Description 组合条件查询
* @Author LH
* @Date 2022/10/9 15:21
**/
public class ESTestDocComQuery {
按年龄做降序查询
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortOrder;
import java.io.IOException;
/**
* @Description 按年龄做降序查询
* @Author LH
* @Date 2022/10/9 14:35
**/
public class ESTestDocDescQuery {
过滤查询(过滤掉不需要展示的字段)
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
/**
* @Description 过滤查询(过滤掉不需要展示的字段)
* @Author LH
* @Date 2022/10/9 14:45
**/
public class ESTestDocExcludeQuery {
模糊查询
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.query.FuzzyQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
/**
* @Description 模糊查询
* @Author LH
* @Date 2022/10/9 16:00
**/
public class ESTestDocFuzzyQuery {
分页查询
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
/**
* @Description 分页查询
* 做全量查询,对查询的结果进行分页显示,每页两条数据,查询第一页
* 查看第几页:(页码 - 1) * 每页条数
* @Author LH
* @Date 2022/10/9 14:22
**/
public class ESTestDocPageQuery {
条件查询
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
/**
* @Description 条件查询
* @Author LH
* @Date 2022/10/9 14:16
**/
public class ESTestDocQuery {
范围查询
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
/**
* @Description 范围查询
* @Author LH
* @Date 2022/10/9 15:32
**/
public class ESTestDocRangeQuery {
聚合查询
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
/**
* @Description 聚合查询
* @Author LH
* @Date 2022/10/9 16:24
**/
public class ESTestDocAggreQuery {
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
/**
* @Description 分组查询
* @Author LH
* @Date 2022/10/9 16:43
**/
public class ESTestDocGroupQuery {