Sphinx全文索引引擎

一、什么是sphinx

原理:sphinx将数据库中的表建立索引,php操作sphinx时,将要查询的关键字进行匹配,返回一个id,php通过id到数据库中查询数据。

二、下载

链接:https://pan.baidu.com/s/1ic3JTra4NKbEgxV0bAjXsw
提取码:b436

重要文件说明:

三、使用sphinx

 1、复制csft_mysql.conf文件到sphinx根目录,并修改名称为sphinx.conf

2、配置sphinx.conf

#MySQL数据源配置,详情请查看:http://www.coreseek.cn/products-install/mysql/
#请先将var/test/documents.sql导入数据库,并配置好以下的MySQL用户密码数据库

#源定义
source exhibit
{
    type                    = mysql

    sql_host                = localhost
    sql_user                = root
    sql_pass                = root
    sql_db                    = test
    sql_port                = 3306
    sql_query_pre            = SET NAMES utf8
    
    #要求:第一个字段必须是ID,如果不叫ID可以起个别名叫ID(类型必须是非零、唯一、不重复的整数)
    #sql_query第一列id需为整数
    #title、content作为字符串/文本字段,被全文索引
    sql_query                = SELECT itemid as id, title, keyword, address FROM destoon_exhibit 
    
    sql_attr_uint            = itemid           #从SQL读取到的值必须为整数
    #sql_attr_timestamp        = date_added     #从SQL读取到的值必须为整数,作为时间属性
    
    #命令行查询时,设置正确的字符集
    sql_query_info_pre      = SET NAMES utf8                                        
    #命令行查询时,从数据库读取原始数据信息
    #sql_query_info            = SELECT * FROM documents WHERE id=$id 
}

#index定义
index exhibit
{
    source            = exhibit             #对应的source名称
    path            = G:/phpstudy/Sphinx/var/data/exhibit #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
    docinfo            = extern
    mlock            = 0
    morphology        = none
    min_word_len        = 1
    html_strip                = 0

    #中文分词配置,详情请查看:http://www.coreseek.cn/products-install/coreseek_mmseg/
    #charset_dictpath = /usr/local/mmseg3/etc/ #BSD、Linux环境下设置,/符号结尾
    charset_dictpath = G:/phpstudy/Sphinx/etc/                             #Windows环境下设置,/符号结尾,最好给出绝对路径,例如:C:/usr/local/coreseek/etc/...
    charset_type        = zh_cn.utf-8
}

#全局index定义
indexer
{
    mem_limit            = 128M
}

#searchd服务定义
searchd
{
    listen                  =   9312
    read_timeout        = 5
    max_children        = 30
    
    # 最大返回的记录数(即使查询出的记录数量多也只返回这些数据)
    max_matches            = 1000
    seamless_rotate        = 0
    preopen_indexes        = 0
    unlink_old            = 1
    pid_file = G:/phpstudy/Sphinx/var/log/searchd_mysql.pid  #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
    log = G:/phpstudy/Sphinx/var/log/searchd_mysql.log        #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
    query_log = G:/phpstudy/Sphinx/var/log/query_mysql.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
}

3、安装sphinx服务器

4、创建索引

5、开启sphinx服务

6、安装php扩展

下载地址:

链接:https://pan.baidu.com/s/1kVInF6Whk2y0Vwsv9rwhvA
提取码:vd97
复制这段内容后打开百度网盘手机App,操作更方便哦

将php_sphinx.dll文件复制到如图地址:

重启服务器:

四、PHP操作Sphinx

<?php
/**
 * Created by PhpStorm.
 * User: Yang
 * Date: 2019/8/14
 * Time: 16:16
 */

$sphinx = new SphinxClient();
//设置searchd的主机名和TCP端口
$sphinx->SetServer("localhost", 9312);
//设置连接超时
$sphinx->SetConnectTimeout(3);
//控制搜索结果集的返回格式
$sphinx->SetArrayResult(true);
//设置全文查询的匹配模式
/*
SPH_MATCH_ALL    匹配所有查询词(默认模式).
SPH_MATCH_ANY    匹配查询词中的任意一个.
SPH_MATCH_PHRASE    将整个查询看作一个词组,要求按顺序完整匹配.
SPH_MATCH_BOOLEAN    将查询看作一个布尔表达式.
SPH_MATCH_EXTENDED    将查询看作一个Sphinx内部查询语言的表达式.
SPH_MATCH_FULLSCAN    使用完全扫描,忽略查询词汇.
SPH_MATCH_EXTENDED2    类似 SPH_MATCH_EXTENDED ,并支持评分和权重.
*/
$sphinx->SetMatchMode(SPH_MATCH_ANY);
$q = "2017";
$result = $sphinx->Query($q);
var_dump($result['matches']);
$id_array = array_column($result['matches'], "id");
$ids = implode(",", $id_array);
echo $ids;
array(20) {
  [0]=>
  array(3) {
    ["id"]=>
    string(4) "9388"
    ["weight"]=>
    int(2574)
    ["attrs"]=>
    array(1) {
      ["addtime"]=>
      string(10) "1488729600"
    }
  }
  [1]=>
  array(3) {
    ["id"]=>
    string(5) "24571"
    ["weight"]=>
    int(2574)
    ["attrs"]=>
    array(1) {
      ["addtime"]=>
      string(10) "1488729600"
    }
  }
  [2]=>
  array(3) {
    ["id"]=>
    string(2) "68"
    ["weight"]=>
    int(2569)
    ["attrs"]=>
    array(1) {
      ["addtime"]=>
      string(10) "1504195200"
    }
  }
  [3]=>
  array(3) {
    ["id"]=>
    string(2) "81"
    ["weight"]=>
    int(2569)
    ["attrs"]=>
    array(1) {
      ["addtime"]=>
      string(10) "1504195200"
    }
  }
...
} 9388,24571,68,81,3186,3213,3278,3444,3470,3645,3785,3843,3890,3907,4120,4164,4182,4212,4235,4568

注意:要通过定时器定时建立索引。

五、Linux下安装Sphinx

链接:https://pan.baidu.com/s/1tUF8Y5imp-ryHxoDDHH2MQ
提取码:4pav 

tar -zxvf sphinx-3.0.3-facc3fb-linux-amd64.tar.gz

 

 解压出来不用安装,像windows下一样配置即可。

 linux配置

source destoon
{
    type            = mysql
    sql_host        = 
    sql_user        = 
    sql_pass        = 
    sql_db        = 
    sql_port        = 3306    # optional, default is 3306

     sql_query_pre        = SET NAMES utf8
     sql_query_pre        = SET SESSION query_cache_type=OFF

    sql_query        = SELECT itemid as id,title FROM destoon_sell_5 where status = 3
    sql_ranged_throttle    = 0

}

index sell
{
    source            = destoon
    path            = /www/server/sphinx/var/data/sell
    docinfo            = none
    dict            = keywords

    mlock            = 0
    morphology        = none
    min_word_len        = 1
    min_prefix_len        = 1
    expand_keywords        = 1

    ngram_len        = 1
    #ngram_chars        = U+3000..U+2FA1F
    ngram_chars = U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z,A..Z->a..z, a..z,U+4E00..U+9FBF, U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF,U+2F800..U+2FA1F, U+2E80..U+2EFF, U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF,U+3040..U+309F, U+30A0..U+30FF, U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF,U+3130..U+318F, U+A000..U+A48F, U+A490..U+A4CF

    blend_chars        = +, &, U+23

    blend_mode        = trim_tail, skip_pure

    html_strip        = 0
}

indexer
{
    mem_limit        = 128M
}

#############################################################################
## searchd settings
#############################################################################

searchd
{
    
    listen            = 9312
    listen            = 9306:mysql41
    log            = /www/server/sphinx/var/log/searchd.log
    query_log        = /www/server/sphinx/var/log/query.log
    read_timeout        = 5
    client_timeout        = 300
    max_children        = 30
    persistent_connections_limit    = 30
    pid_file        = /www/server/sphinx/var/log/searchd.pid
    seamless_rotate        = 1
    preopen_indexes        = 1
    unlink_old        = 1
    max_packet_size        = 8M
    max_filters        = 256
    max_filter_values    = 4096
    max_batch_queries    = 32
    workers            = threads # for RT to work
}

建立索引

./bin/indexer -c ./bin/sphinx.conf --all --rotate

启动服务

./searchd --config sphinx.conf

 

posted @ 2019-08-14 12:27  样子2018  阅读(1638)  评论(0编辑  收藏  举报