PHP使用ES搜索
版本说明
注意自己的PHP
版本和Elasticsearch
版本的对应关系,选择合适的PHP Elasticsearch
客户端版本
Elasticsearch版本 | PHP ES客户端版本 | PHP版本 |
---|---|---|
>= 7.0, < 8.0 | >= 7.0 | >= 7.1 |
>= 6.6,<= 6.7 | 6.7.x | > 7.0 |
>= 6.0,<= 6.5 | 6.5.x | > 7.0 |
>= 5.0,<= 6.0 | 5.5.x | > 5.6,< 7.0 |
安装依赖
使用 Comoposer 方式安装
composer require elasticsearch/elasticsearch
封装
单例模式封装
class EsClient
{
const ES_HOST_NAME = '192.168.0.242';
private static $instance;
/**
* Es的连接句柄
*
* @var object
*/
private $elastic;
/**
* 私有化构造函数,防止类外实例化
* @param
*/
private function __construct()
{
$this->elastic = ClientBuilder::create()->setHosts([self::ES_HOST_NAME])->build();
}
/**
* 私有化克隆函数,防止类外克隆对象
*/
private function __clone()
{
}
/**
* 类的唯一公开静态方法,获取类单例的唯一入口
* @return object
*/
public static function getEsInstance()
{
if (!(self::$instance instanceof self)) {
self::$instance = new self();
}
return self::$instance;
}
/**
* 获取Es的连接实例
* @return \Elasticsearch\Client|object
*/
public function getEsConn()
{
return $this->elastic;
}
/**
* 需要在单例切换的时候做清理工作
*/
public function __destruct()
{
self::$instance->elastic->close();
self::$instance = null;
}
}
ES客户端操作封装
class EsService
{
// ES客户端链接
private $client;
/**
* 构造函数
* MyElasticsearch constructor.
*/
public function __construct()
{
// 单例获取
$this->client = EsClient::getEsInstance()->getEsConn();
}
/**
* 判断索引是否存在
* @param string $index_name
* @return bool
*/
public function existsIndex(string $index_name = 'gyx_ik'): bool
{
$params = [
'index' => $index_name
];
return $this->client->indices()->exists($params);
}
/**
* 创建索引
* @param string $index_name
* @param array $mapping
* @return array
* @throws \think\Exception
*/
public function createIndex(string $index_name = 'gyx_ik', array $mapping = []): array
{
if ($this->existsIndex($index_name)) {
throw new Exception('该索引已存在:' . $index_name);
}
// 只能创建一次
$params = [
'index' => $index_name,
'body' => [
'settings' => [
'number_of_shards' => 5,
'number_of_replicas' => 1
],
'mappings' => $mapping
]
];
return $this->client->indices()->create($params);
}
/**
* 删除索引
* @param string $index_name
* @return array
*/
public function deleteIndex(string $index_name = ''): array
{
$params = ['index' => $index_name];
return $this->client->indices()->delete($params);
}
/**
* 添加文档
* @param $id
* @param $doc ['id'=>100, 'title'=>'phone']
* @param string $index_name
* @param string $type_name
* @return array
*/
public function addDoc($id, $doc, string $index_name = 'gyx_ik', string $type_name = '_doc'): array
{
$params = [
'index' => $index_name,
'type' => $type_name,
'id' => $id,
'body' => $doc
];
return $this->client->index($params);
}
/**
* 判断文档存在
* @param int $id
* @param string $index_name
* @param string $type_name
* @return bool
*/
public function existsDoc(int $id = 1, string $index_name = 'gyx_ik', string $type_name = '_doc'): bool
{
$params = [
'index' => $index_name,
'type' => $type_name,
'id' => $id
];
return $this->client->exists($params);
}
/**
* 获取文档
* @param int $id
* @param string $index_name
* @param string $type_name
* @return array
*/
public function getDoc(int $id = 1, string $index_name = 'gyx_ik', string $type_name = '_doc'): array
{
$params = [
'index' => $index_name,
'type' => $type_name,
'id' => $id
];
return $this->client->get($params);
}
/**
* 更新文档
* @param int $id
* @param string $index_name
* @param string $type_name
* @param array $body
* @return array
*/
public function updateDoc(int $id = 1, array $body = [], string $index_name = 'gyx_ik', string $type_name = '_doc'): array
{
// 可以灵活添加新字段,最好不要乱添加
$params = [
'index' => $index_name,
'type' => $type_name,
'id' => $id,
'body' => $body
];
return $this->client->update($params);
}
/**
* 删除文档
* @param int $id
* @param string $index_name
* @param string $type_name
* @return array
*/
public function deleteDoc(int $id = 1, string $index_name = 'gyx_ik', string $type_name = '_doc'): array
{
$params = [
'index' => $index_name,
'type' => $type_name,
'id' => $id
];
return $this->client->delete($params);
}
/**
* 搜索文档:分页,排序,权重,过滤
* @param string $index_name
* @param string $type_name
* @param array $body
* @return array
*/
public function searchDoc(array $body = [], string $index_name = "gyx_ik", string $type_name = "_doc"): array
{
$params = [
'index' => $index_name,
'type' => $type_name,
'body' => $body
];
return $this->client->search($params);
}
}
使用示例封装
class GoodsEsLogic
{
private $es_service;
private $index_name = 'gyx_goods_son';
public function __construct()
{
$this->es_service = new EsService();
}
/**
* 获取ES物料信息
* @Authors Webster
* @Email Webster@gmail.com
* @Date 2023-06-29
* @param int $son_id
* @return array|\PDOStatement|string|\think\Model|null
* @throws \think\db\exception\DataNotFoundException
* @throws \think\db\exception\ModelNotFoundException
* @throws \think\exception\DbException
*/
public function getGoodsSonInfoToDoc(int $son_id = 0)
{
return Db::table('gyx_shop_goods_son')
->alias('a')
->leftJoin('gyx_shop_goods b', 'b.id=a.goods_id')
->where('a.id', $son_id)
->field('a.id,a.goods_id,b.goods_name,a.gc_id,a.son_name,a.son_state,a.type,a.goods_type,a.is_son_disabled')
->field('a.attr_json,a.spec_value,a.son_seo_keywords,a.brand_name')
->find();
}
/**
* 创建物料索引
* @Authors Webster
* @Email Webster@gmail.com
* @Date 2023-06-29
*/
public function createGoodsIndex()
{
$mapping = [
'properties' => [
'id' => [
'type' => 'keyword'
],
'goods_id' => [
'type' => 'keyword'
],
'goods_name' => [
'type' => 'keyword'
],
'gc_id' => [
'type' => 'keyword'
],
'son_name' => [
'type' => 'text',
"analyzer" => "ik_smart"
],
'son_state' => [
'type' => 'keyword'
],
'type' => [
'type' => 'keyword'
],
'goods_type' => [
'type' => 'keyword'
],
'is_son_disabled' => [
'type' => 'keyword'
],
'attr_json' => [
'type' => 'text',
"analyzer" => "ik_smart"
],
'spec_value' => [
'type' => 'text',
"analyzer" => "ik_smart"
],
'son_seo_keywords' => [
'type' => 'text',
"analyzer" => "ik_smart"
],
'brand_name' => [
'type' => 'keyword'
],
]
];
$this->es_service->createIndex($this->index_name, $mapping);
}
/**
* 更新文档
* @Authors Webster
* @Email Webster@gmail.com
* @Date 2023-06-29
* @param int $son_id
*/
public function updateGoodsSonDoc(int $son_id = 0)
{
$son_info = $this->getGoodsSonInfoToDoc($son_id);
$this->es_service->updateDoc($son_id, $son_info, $this->index_name);
}
/**
* 创建文档
* @Authors Webster
* @Email Webster@gmail.com
* @Date 2023-06-29
* @param int $son_id
*/
public function createGoodsSonDoc(int $son_id = 0)
{
$son_info = $this->getGoodsSonInfoToDoc($son_id);
$this->es_service->addDoc($son_id, $son_info, $this->index_name);
}
/**
* 搜索
* @Authors Webster
* @Email Webster@gmail.com
* @Date 2023-06-29
* @param string $keywords
* @param array $attr_arr
* @param int $page
* @param int $size
* @return array
*/
public function searchGoodsSonDoc(string $keywords = '', array $attr_arr = [], int $page = 1, int $size = 10): array
{
$body = [
'query' => [
'bool' => [
"must" => [
["match_phrase" => ["son_state" => ["query" => "1"]]],
["match_phrase" => ["is_son_disabled" => ["query" => "0"]]]
]
]
],
'from' => $page - 1,
'size' => $size
];
if ($keywords) {
$keyword_arr = explode(' ', $keywords);
foreach ($keyword_arr as $item) {
$tmp_bool = [
'should' => [
['match_phrase' => ["son_name" => ["query" => $item]]],
['match_phrase' => ["attr_json" => ["query" => $item]]],
['match_phrase' => ["son_seo_keywords" => ["query" => $item]]],
['match_phrase' => ["brand_name" => ["query" => $item]]],
['match_phrase' => ["id" => ["query" => $item]]]
]
];
$body['query']['bool']['must'][] = ['bool' => $tmp_bool];
}
}
if ($attr_arr) {
foreach ($attr_arr as $attr) {
$tmp_bool = [];
$attr_value = explode('|', $attr['val']);
foreach ($attr_value as $item) {
$tmp_bool['should'][] = ['match_phrase' => ["attr_json" => ["query" => $attr['key'] . '-' . $item]]];
}
$body['query']['bool']['must'][] = ['bool' => $tmp_bool];
}
}
$res = $this->es_service->searchDoc($body, $this->index_name)['hits'];
return ['count' => $res['total']['value'], 'list' => array_column($res['hits'], '_source')];
}
}
MySQL 数据同步ES
同步双写
这是能想到的最直接的方式,在写入MySQL时,直接也同步往ES里写一份数据。
- 优点:
- 实现简单
- 缺点:
- 业务耦合,商品管理中耦合大量数据同步代码
- 影响性能,写入两个存储,相应时间边长
- 不便扩展:搜索可能有一些个性化需求,需要对数据进行聚合,这种方式不方便实现
异步双写
修改商品的时候,先把商品数据丢进MQ,为了解耦合,我们一般会拆分一个搜索服务,由搜索服务去订阅商品变动的消息,来完成同步。
- 优点:
- 解耦合,商品服务无需关注数据同步
- 时效性较好,使用MQ,正常情况下,同步完成在秒级
- 缺点:
- 引入新的组件和服务,增加了复杂度
定时任务
如果数据量没有那么大,定时任务也是一种选择。
定时任务,最麻烦的一点时频率不好选,频率高的话,会非自然的形成业务的波峰,导致存储的CPU、内存占用波峰式上升,频率低的话实时性比较差,而且也有波峰的情况。
- 优点:
- 实现比较简单
- 缺点:
- 实时性难以保证
- 对存储的压力较大
数据订阅
MySQL通过binlog订阅实现主从同步,各路数据订阅框架比如:canal 就依据这个原理,将client组件伪装成从库,来实现数据订阅,可以实现零代码介入。
这种方式看起来和异步双写比较像,但是它降低了商品服务的耦合,同时数据的实时性更好
- 优点:
- 业务入侵性较少
- 时效性较好
- 缺点:
- 对数据订阅框架得熟悉
实际上MySQL同步到其他的数据存储,大体上都是类似的几种方案
待人友善是修养,
独往独来是性格。
--qq:2986957136