linux中sphinx及coreseek及mmseg3的安装

1,在root目录下,下载sphinx安装包sphinx-2.2.11.tar.gz

 

2、解压安装包

tar zxvf sphinx-2.2.11-beta.tar.gz

 

3、检查当前系统是否满足安装sphinx 和指定安装目录

cd sphinx-2.2.11-release

./configure --prefix=/usr/local/sphinx

 

4、编译和安装sphinx

make && make install

 

5、备份配置文件

cd /usr/local/sphinx/etc

cp sphinx.conf.dist sphinx.conf

 

6、修改配置文件

 

1)、导入sphinx准备的测试数据会导入test库和两张表(确保apache mysql服务已经打开)

mysql -uroot -p</usr/local/sphinx/etc/example.sql

 

2)、修改sphinx配置文件

vim /usr/local/sphinx/etc/sphinx.conf

在vim下搜索    /sql_host   (告诉sphinx mysql链接信息)

        sql_host                = localhost     //服务器名

        sql_user                = root          //数据库账户

        sql_pass                = qaz5788943    //数据库密码

        sql_db                  = test          //使用sphinx 库名

        sql_port                = 3306  # optional, default is 3306

 

在vim下搜索   /sql_query_pre

 

打开下面配置的注释

  sql_query_pre          = SET NAMES utf8 

 

在vim下搜索 /exceptions.txt 注释下面配置

#       exceptions              = /data/exceptions.txt

 

 

7、创建索引文件

cd /usr/local/sphinx/bin 

 ./indexer --all  #创建索引   

./searchd -c /usr/local/sphinx/etc/sphinx.conf #启动索引

 

8、测试索引

mysql -h0 -P9306

select * from test1 WHERE MATCH('my document');

 

安装coreseek中文分词 (其实就是一个sphinx+中文词库)

 

wget http://www.coreseek.cn/uploads/csft/4.0/coreseek-4.1-beta.tar.gz

tar xzvf coreseek-4.1-beta.tar.gz

cd coreseek-4.1-beta

##安装mmseg

cd mmseg-3.2.14

./bootstrap #输出的warning信息可以忽略,如果出现error则需要解决

./configure --prefix=/usr/local/mmseg3

make && make install

cd ..

##安装coreseek

cd csft-4.1

sh buildconf.sh #输出的warning信息可以忽略,如果出现error则需要解决

./configure --prefix=/usr/local/coreseek --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql

make && make install

cd ..

##测试mmseg分词,coreseek搜索

cd testpack

cat var/test/test.xml #此时应该正确显示中文 

/usr/local/mmseg3/bin/mmseg -d /usr/local/mmseg3/etc var/test/test.xml

 

 

--------------------------------------------------------------------------------------------------------------------------------------------------------

启动indexer服务:

/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/sphinx.conf --rotate --all

 

 

代理文件(sphinx.master.conf):

#全局index定义
indexer
{
    mem_limit            = 128M
}

#searchd服务定义
searchd
{
    listen                  = 9312                           # 9312
    listen                  = localhost:9412:mysql41         
    read_timeout            = 5
    max_children            = 30
    max_matches             = 4000
    seamless_rotate         = 1
    max_packet_size         = 64M
    preopen_indexes         = 0
    unlink_old              = 1
    max_filters             = 256
    max_filter_values       = 4096
    max_batch_queries       = 256
    pid_file                = /usr/local/coreseek/log/searchd_mysql_master9312.pid
    log                     = /usr/local/coreseek/log/searchd_mysql_master9312.log
    query_log               = /usr/local/coreseek/log/query_mysql_master9312.log
    binlog_path             =                                                                # 关闭binlog日志
    dist_threads            = 16
}



# 分布式代理--加速作用
index job                          
{
    type  = distributed
    agent = 127.0.0.1:9721:search_job

    agent_query_timeout   = 30000
    agent_connect_timeout = 30000
}

 

 

索local文件(job.conf):

#Sphinx配置文件模板
#全局index定义
indexer
{
    mem_limit          = 128M
}

#searchd服务定义
searchd
{
    listen              = 9721
    listen              = localhost:9821:mysql41
    read_timeout        = 5
    max_children        = 30
    max_matches         = 4000
    seamless_rotate     = 1
    max_packet_size     = 64M
    preopen_indexes     = 0
    unlink_old          = 1
    max_filters         = 256
    max_filter_values   = 4096
    max_batch_queries   = 256
    pid_file            = /usr/local/coreseek/log/job_mysql.pid
    log                 = /usr/local/coreseek/log/searchd_job_mysql.log
    query_log           = /usr/local/coreseek/log/query_job_mysql.log
    binlog_path =                                                                     #关闭binlog日志
    attr_flush_period = 900 # persist updates to disk every 15 minutes
}


#=============全局sphinx设置完成======================================

source text_base
{
    type            = mysql

    sql_host        = host
    sql_user        = root
    sql_pass        = 密码
    sql_db            = database_name
    sql_port        = 17611    # optional, default is 3306
    
    sql_query_pre   = SET NAMES utf8
    sql_query_pre   = set session group_concat_max_len=1000000;
    sql_range_step  = 10000
    sql_query       =

    sql_attr_uint        = job_id:12
    sql_field_string      = job_name
    sql_field_string      = labour_type
    sql_attr_uint        = payment:12
    sql_attr_uint        = commission:12
    sql_field_string      = description
    sql_attr_uint        = sex:12    
    sql_attr_uint        = age:12    
    sql_attr_uint        = longitude:12
    sql_attr_uint        = latitude:12
    sql_attr_uint        = tag_id:12
    sql_attr_uint        = tag_type:12
    sql_attr_uint        = agent_id:12
        
        
    
    sql_field_string     = address    
    sql_field_string     = tag_name
    sql_field_string     = create_time
    
    
    sql_attr_multi = uint city_code from field city_code
    sql_attr_multi = uint district_code from field district_code    
    
    sql_ranged_throttle  = 0
}
source search_job : text_base
{
    sql_query_pre = set session group_concat_max_len=1000000
    sql_query_pre = SET NAMES utf8
    sql_query_range = select MIN(job_id),MAX(job_id) from recruit_job_tag
    sql_query     =     SELECT job.*,dictionary.tag_name,dictionary.tag_type,dictionary.agent_id FROM recruit_job job \
                        INNER JOIN recruit_job_tag tag ON job.job_id = tag.job_id \
                        INNER JOIN dictionary_job_tag dictionary ON dictionary.tag_id = tag.job_id \
                        where job.job_id >=$start and job.job_id<=$end ;
}

index search_job
{
    source            = search_job
    path              = /usr/local/coreseek/var/data/search_job
    docinfo           = extern
    mlock             = 0
    morphology        = none
    min_word_len      = 2
    html_strip        = 1
    preopen           = 1
    #min_infix_len    = 1
    index_sp          = 1
    charset_dictpath  = /usr/local/mmseg3/etc/
    charset_type      = zh_cn.utf-8
}

 

 

 

表结构:

CREATE TABLE `NewTable` (
`job_id`  bigint(20) NOT NULL AUTO_INCREMENT COMMENT '职位ID' ,
`job_name`  varchar(50) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '职位名称' ,
`labour_type`  varchar(1) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '劳务类型,取值参考通用字典表labour_type字段' ,
`payment`  int(11) NULL DEFAULT NULL COMMENT '薪酬' ,
`commission`  int(3) NULL DEFAULT NULL COMMENT '佣金,取值0~100,表示百分比数值' ,
`description`  varchar(500) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '描述' ,
`sex`  enum('0','1') CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '性别,0-女,1-男' ,
`age`  int(2) NULL DEFAULT NULL COMMENT '年龄要求,由性别决定,男默认填写60,女默认填写55,' ,
`city_code`  varchar(6) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '所在城市' ,
`district_code`  varchar(6) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '区域' ,
`address`  varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '详细地址' ,
`longitude`  double(10,6) NULL DEFAULT NULL COMMENT '经度' ,
`latitude`  double(10,6) NULL DEFAULT NULL COMMENT '纬度' ,
PRIMARY KEY (`job_id`)
)
ENGINE=InnoDB
DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci
COMMENT='招聘职位表'
AUTO_INCREMENT=5
ROW_FORMAT=DYNAMIC
;

 

 

启动命令:

/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/job.conf --rotate --all
/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/job.conf

 

/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/sphinx.master.conf


mysql -h127.0.0.1 -P9412

 

posted on 2017-11-12 13:13  ziyi_ang  阅读(258)  评论(0编辑  收藏  举报

导航