linux中sphinx及coreseek及mmseg3的安装
1,在root目录下,下载sphinx安装包sphinx-2.2.11.tar.gz
2、解压安装包
tar zxvf sphinx-2.2.11-beta.tar.gz
3、检查当前系统是否满足安装sphinx 和指定安装目录
cd sphinx-2.2.11-release
./configure --prefix=/usr/local/sphinx
4、编译和安装sphinx
make && make install
5、备份配置文件
cd /usr/local/sphinx/etc
cp sphinx.conf.dist sphinx.conf
6、修改配置文件
1)、导入sphinx准备的测试数据会导入test库和两张表(确保apache mysql服务已经打开)
mysql -uroot -p</usr/local/sphinx/etc/example.sql
2)、修改sphinx配置文件
vim /usr/local/sphinx/etc/sphinx.conf
在vim下搜索 /sql_host (告诉sphinx mysql链接信息)
sql_host = localhost //服务器名
sql_user = root //数据库账户
sql_pass = qaz5788943 //数据库密码
sql_db = test //使用sphinx 库名
sql_port = 3306 # optional, default is 3306
在vim下搜索 /sql_query_pre
打开下面配置的注释
sql_query_pre = SET NAMES utf8
在vim下搜索 /exceptions.txt 注释下面配置
# exceptions = /data/exceptions.txt
7、创建索引文件
cd /usr/local/sphinx/bin
./indexer --all #创建索引
./searchd -c /usr/local/sphinx/etc/sphinx.conf #启动索引
8、测试索引
mysql -h0 -P9306
select * from test1 WHERE MATCH('my document');
安装coreseek中文分词 (其实就是一个sphinx+中文词库)
wget http://www.coreseek.cn/uploads/csft/4.0/coreseek-4.1-beta.tar.gz
tar xzvf coreseek-4.1-beta.tar.gz
cd coreseek-4.1-beta
##安装mmseg
cd mmseg-3.2.14
./bootstrap #输出的warning信息可以忽略,如果出现error则需要解决
./configure --prefix=/usr/local/mmseg3
make && make install
cd ..
##安装coreseek
cd csft-4.1
sh buildconf.sh #输出的warning信息可以忽略,如果出现error则需要解决
./configure --prefix=/usr/local/coreseek --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql
make && make install
cd ..
##测试mmseg分词,coreseek搜索
cd testpack
cat var/test/test.xml #此时应该正确显示中文
/usr/local/mmseg3/bin/mmseg -d /usr/local/mmseg3/etc var/test/test.xml
--------------------------------------------------------------------------------------------------------------------------------------------------------
启动indexer服务:
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/sphinx.conf --rotate --all
代理文件(sphinx.master.conf):
#全局index定义 indexer { mem_limit = 128M } #searchd服务定义 searchd { listen = 9312 # 9312 listen = localhost:9412:mysql41 read_timeout = 5 max_children = 30 max_matches = 4000 seamless_rotate = 1 max_packet_size = 64M preopen_indexes = 0 unlink_old = 1 max_filters = 256 max_filter_values = 4096 max_batch_queries = 256 pid_file = /usr/local/coreseek/log/searchd_mysql_master9312.pid log = /usr/local/coreseek/log/searchd_mysql_master9312.log query_log = /usr/local/coreseek/log/query_mysql_master9312.log binlog_path = # 关闭binlog日志 dist_threads = 16 } # 分布式代理--加速作用 index job { type = distributed agent = 127.0.0.1:9721:search_job agent_query_timeout = 30000 agent_connect_timeout = 30000 }
索local文件(job.conf):
#Sphinx配置文件模板 #全局index定义 indexer { mem_limit = 128M } #searchd服务定义 searchd { listen = 9721 listen = localhost:9821:mysql41 read_timeout = 5 max_children = 30 max_matches = 4000 seamless_rotate = 1 max_packet_size = 64M preopen_indexes = 0 unlink_old = 1 max_filters = 256 max_filter_values = 4096 max_batch_queries = 256 pid_file = /usr/local/coreseek/log/job_mysql.pid log = /usr/local/coreseek/log/searchd_job_mysql.log query_log = /usr/local/coreseek/log/query_job_mysql.log binlog_path = #关闭binlog日志 attr_flush_period = 900 # persist updates to disk every 15 minutes } #=============全局sphinx设置完成====================================== source text_base { type = mysql sql_host = host sql_user = root sql_pass = 密码 sql_db = database_name sql_port = 17611 # optional, default is 3306 sql_query_pre = SET NAMES utf8 sql_query_pre = set session group_concat_max_len=1000000; sql_range_step = 10000 sql_query = sql_attr_uint = job_id:12 sql_field_string = job_name sql_field_string = labour_type sql_attr_uint = payment:12 sql_attr_uint = commission:12 sql_field_string = description sql_attr_uint = sex:12 sql_attr_uint = age:12 sql_attr_uint = longitude:12 sql_attr_uint = latitude:12 sql_attr_uint = tag_id:12 sql_attr_uint = tag_type:12 sql_attr_uint = agent_id:12 sql_field_string = address sql_field_string = tag_name sql_field_string = create_time sql_attr_multi = uint city_code from field city_code sql_attr_multi = uint district_code from field district_code sql_ranged_throttle = 0 } source search_job : text_base { sql_query_pre = set session group_concat_max_len=1000000 sql_query_pre = SET NAMES utf8 sql_query_range = select MIN(job_id),MAX(job_id) from recruit_job_tag sql_query = SELECT job.*,dictionary.tag_name,dictionary.tag_type,dictionary.agent_id FROM recruit_job job \ INNER JOIN recruit_job_tag tag ON job.job_id = tag.job_id \ INNER JOIN dictionary_job_tag dictionary ON dictionary.tag_id = tag.job_id \ where job.job_id >=$start and job.job_id<=$end ; } index search_job { source = search_job path = /usr/local/coreseek/var/data/search_job docinfo = extern mlock = 0 morphology = none min_word_len = 2 html_strip = 1 preopen = 1 #min_infix_len = 1 index_sp = 1 charset_dictpath = /usr/local/mmseg3/etc/ charset_type = zh_cn.utf-8 }
表结构:
CREATE TABLE `NewTable` ( `job_id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '职位ID' , `job_name` varchar(50) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '职位名称' , `labour_type` varchar(1) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '劳务类型,取值参考通用字典表labour_type字段' , `payment` int(11) NULL DEFAULT NULL COMMENT '薪酬' , `commission` int(3) NULL DEFAULT NULL COMMENT '佣金,取值0~100,表示百分比数值' , `description` varchar(500) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '描述' , `sex` enum('0','1') CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '性别,0-女,1-男' , `age` int(2) NULL DEFAULT NULL COMMENT '年龄要求,由性别决定,男默认填写60,女默认填写55,' , `city_code` varchar(6) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '所在城市' , `district_code` varchar(6) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '区域' , `address` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '详细地址' , `longitude` double(10,6) NULL DEFAULT NULL COMMENT '经度' , `latitude` double(10,6) NULL DEFAULT NULL COMMENT '纬度' , PRIMARY KEY (`job_id`) ) ENGINE=InnoDB DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci COMMENT='招聘职位表' AUTO_INCREMENT=5 ROW_FORMAT=DYNAMIC ;
启动命令:
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/job.conf --rotate --all
/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/job.conf
/usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/sphinx.master.conf
mysql -h127.0.0.1 -P9412