discuz sphinx全文检索搜索引擎方案

基于discuz的索引配置文件,这个配置文件比较灵活,可以根据不同的需求来配置

#
# linuxTone full index search configure file
#
source lt_posts
{
    type = mysql
    sql_host = 127.0.0.1
    sql_user = root
    sql_pass =
    sql_db = lt_bbs
    sql_port = 3306
    sql_query_pre = SET NAMES utf8
    sql_query = SELECT pid,tid,fid,dateline,subject,message,author FROM cdb_posts where first=1 #此处是基于posts表来做索引的,这样的目的是可以同时检索到subject,message,author 三个字段的值
    sql_attr_uint = fid
    sql_attr_timestamp = dateline
    sql_query_info = SELECT * FROM cdb_posts WHERE pid=$id
}
index lt_posts
{
    source = lt_posts
    path = /data/sphinx/data/lt_posts
    docinfo = extern
    mlock = 0
    morphology = none
    min_word_len = 2
    html_strip = 1
    charset_dictpath = /usr/local/mmseg-3.2.13/etc/
    charset_type = zh_cn.utf-8
    ngram_len = 0
}
########## 增量索引 ##################
source delta
{
    type = mysql
    sql_host = 127.0.0.1
    sql_user = root
    sql_pass =
    sql_db = lt_bbs
    sql_port = 3306 # optional, default is 3306
    sql_query_pre = SET NAMES utf8
    sql_query = SELECT pid,tid,fid,dateline,subject,message,author FROM cdb_posts where first=1 and dateline > unix_timestamp()-3600*10 #增量索引采用当前时间戳减去一个需要间隔的时间来新建新增的数据索引

    sql_attr_uint = fid
    sql_attr_timestamp = dateline
    sql_query_info = SELECT * FROM cdb_posts WHERE pid=$id
}
index delta
{
    source = delta
    path = /data/sphinx/data/lt_delta
    docinfo = extern
    mlock = 0
    morphology = none
    min_word_len = 2
    html_strip = 1
    charset_dictpath = /usr/local/mmseg-3.2.13/etc/
    charset_type = zh_cn.utf-8
    ngram_len = 0
}
indexer
{
    mem_limit = 32M
}
searchd
{
    port = 9312
    log = /data/sphinx/var/log/searchd.log
    query_log = /data/sphinx/var/log/query.log
    read_timeout = 5
    max_children = 30
    pid_file = /data/sphinx/var/log/searchd.pid
    max_matches = 10000
    seamless_rotate = 1
    preopen_indexes = 0
    unlink_old = 1
}

 sphinx最主要的就是这个配置文件,当然在增量索引部分可以写一个脚本放到crontab里面来定时跑
下面介绍下在discuz中sphinx的php调用部分,sphinx的接口采用PHP的扩展,可以通过pecl或者http://pecl.php.net/package/sphinx来安装

<?php
/**
 *全文搜索服务
 */
define('IN_DISCUZ', true);
require_once './include/common.inc.php';

$q = isset($_GET['q']) && !empty($_GET['q']) ? $_GET['q'] : '';
$q = str_replace(array('<', '>', ' ', '\'', ','), array('', '', ' ', '', ''), strip_tags($q));

$page = isset($_GET['page']) && intval($_GET['page']) > 0 ? intval($_GET['page']) : 1;
$perNum = 20;
$offset = ($page - 1) * $perNum;

$search = new SphinxClient();
$search -> setServer('127.0.0.1', 9312);
$search -> setConnectTimeout(2);
$search -> setArrayResult(true);
$search -> setMatchMode(SPH_MATCH_ANY);
$search -> setRankingMode(SPH_RANK_PROXIMITY_BM25);
$search -> setSortMode(SPH_SORT_EXTENDED, '@relevance desc,@weight desc');
$search -> setLimits($offset, $perNum);
$search -> setFieldWeights(array('subject' => 2000, 'message' => 0));

$rs = array();
$query_totals = $query_time = 0;
if (!empty($q)) {
	$rs = $search -> Query($q, "*");
	$pages = ceil($rs['total'] / $perNum);

	$query_totals = $rs['total_found'];
	$query_time = $rs['time'];
} 

$data = $title = $content = array();

if (!empty($rs) && $page <= $pages) {
	$pids = array();
	foreach($rs['matches'] as $v) {
		$pids[] = $v['id'];
	} 
	$pid = implode(',', $pids);
	$sql = "select pid,tid,author,authorid,subject,message,dateline from cdb_posts where pid IN($pid) and status ='0' and invisible='0'";

	$query = $db -> query($sql);
	while ($row = $db -> fetch_array($query)) {
		$data[] = $row;
		$title[] = $row['subject'];
		$content[] = preg_replace('/\[[\/]?(b|img|url|color|s|hr|p|list|i|align|email|u|font|code|hide|table|tr|td|th|attach|list|indent|float).*\]/', '', strip_tags($row['message']));
	} 
	// 搜索词高亮
	$opts = array();
	$opts['before_match'] = '<em>';
	$opts['after_match'] = '</em>';
	$title = $search -> BuildExcerpts($title, 'lt_posts', $q, $opts);
	$content = $search -> BuildExcerpts($content, 'lt_posts', $q, $opts);

	foreach($data as $k => $v) {
		$data[$k]['subject'] = $title[$k];
		$data[$k]['message'] = $content[$k];
	} 

	$url = "s.php?q=" . urlencode($q);
	$multipage = multi($rs['total'], $perNum, $page, $url);
} 

include template("lt_search");

?>

 跑主索引的shell脚本search-index.sh

#!/bin/bash
#
# The BBS search exec full index
#
/usr/local/csft-3.2.13/bin/indexer -c /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate lt_posts >> /data/sphinx/var/`date "+%Y-%m-%d-%H"`.log

 跑增量索引

/usr/local/csft-3.2.13/bin/indexer -c /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate delta

 合并主索引和增量索引

#/usr/local/csft-3.2.13/bin/indexer --config /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate --merge lt_posts delta

 

  1 <?php
  2 
  3 /*
  4     [UCenter] (C)2001-2099 Comsenz Inc.
  5     This is NOT a freeware, use is subject to license terms
  6 
  7     $Id: db.class.php 1059 2011-03-01 07:25:09Z monkey $
  8 */
  9 
 10 
 11 class ucclient_db {
 12     var $querynum = 0;
 13     var $link;
 14     var $histories;
 15 
 16     var $dbhost;
 17     var $dbuser;
 18     var $dbpw;
 19     var $dbcharset;
 20     var $pconnect;
 21     var $tablepre;
 22     var $time;
 23 
 24     var $goneaway = 5;
 25 
 26     function connect($dbhost, $dbuser, $dbpw, $dbname = '', $dbcharset = '', $pconnect = 0, $tablepre='', $time = 0) {
 27         $this->dbhost = $dbhost;
 28         $this->dbuser = $dbuser;
 29         $this->dbpw = $dbpw;
 30         $this->dbname = $dbname;
 31         $this->dbcharset = $dbcharset;
 32         $this->pconnect = $pconnect;
 33         $this->tablepre = $tablepre;
 34         $this->time = $time;
 35 
 36         if($pconnect) {
 37             if(!$this->link = mysql_pconnect($dbhost, $dbuser, $dbpw)) {
 38                 $this->halt('Can not connect to MySQL server');
 39             }
 40         } else {
 41             if(!$this->link = mysql_connect($dbhost, $dbuser, $dbpw)) {
 42                 $this->halt('Can not connect to MySQL server');
 43             }
 44         }
 45 
 46         if($this->version() > '4.1') {
 47             if($dbcharset) {
 48                 mysql_query("SET character_set_connection=".$dbcharset.", character_set_results=".$dbcharset.", character_set_client=binary", $this->link);
 49             }
 50 
 51             if($this->version() > '5.0.1') {
 52                 mysql_query("SET sql_mode=''", $this->link);
 53             }
 54         }
 55 
 56         if($dbname) {
 57             mysql_select_db($dbname, $this->link);
 58         }
 59 
 60     }
 61 
 62     function fetch_array($query, $result_type = MYSQL_ASSOC) {
 63         return mysql_fetch_array($query, $result_type);
 64     }
 65 
 66     function result_first($sql) {
 67         $query = $this->query($sql);
 68         return $this->result($query, 0);
 69     }
 70 
 71     function fetch_first($sql) {
 72         $query = $this->query($sql);
 73         return $this->fetch_array($query);
 74     }
 75 
 76     function fetch_all($sql, $id = '') {
 77         $arr = array();
 78         $query = $this->query($sql);
 79         while($data = $this->fetch_array($query)) {
 80             $id ? $arr[$data[$id]] = $data : $arr[] = $data;
 81         }
 82         return $arr;
 83     }
 84 
 85     function cache_gc() {
 86         $this->query("DELETE FROM {$this->tablepre}sqlcaches WHERE expiry<$this->time");
 87     }
 88 
 89     function query($sql, $type = '', $cachetime = FALSE) {
 90         $func = $type == 'UNBUFFERED' && @function_exists('mysql_unbuffered_query') ? 'mysql_unbuffered_query' : 'mysql_query';
 91         if(!($query = $func($sql, $this->link)) && $type != 'SILENT') {
 92             $this->halt('MySQL Query Error', $sql);
 93         }
 94         $this->querynum++;
 95         $this->histories[] = $sql;
 96         return $query;
 97     }
 98 
 99     function affected_rows() {
100         return mysql_affected_rows($this->link);
101     }
102 
103     function error() {
104         return (($this->link) ? mysql_error($this->link) : mysql_error());
105     }
106 
107     function errno() {
108         return intval(($this->link) ? mysql_errno($this->link) : mysql_errno());
109     }
110 
111     function result($query, $row) {
112         $query = @mysql_result($query, $row);
113         return $query;
114     }
115 
116     function num_rows($query) {
117         $query = mysql_num_rows($query);
118         return $query;
119     }
120 
121     function num_fields($query) {
122         return mysql_num_fields($query);
123     }
124 
125     function free_result($query) {
126         return mysql_free_result($query);
127     }
128 
129     function insert_id() {
130         return ($id = mysql_insert_id($this->link)) >= 0 ? $id : $this->result($this->query("SELECT last_insert_id()"), 0);
131     }
132 
133     function fetch_row($query) {
134         $query = mysql_fetch_row($query);
135         return $query;
136     }
137 
138     function fetch_fields($query) {
139         return mysql_fetch_field($query);
140     }
141 
142     function version() {
143         return mysql_get_server_info($this->link);
144     }
145 
146     function close() {
147         return mysql_close($this->link);
148     }
149 
150     function halt($message = '', $sql = '') {
151         $error = mysql_error();
152         $errorno = mysql_errno();
153         if($errorno == 2006 && $this->goneaway-- > 0) {
154             $this->connect($this->dbhost, $this->dbuser, $this->dbpw, $this->dbname, $this->dbcharset, $this->pconnect, $this->tablepre, $this->time);
155             $this->query($sql);
156         } else {
157             $s = '';
158             if($message) {
159                 $s = "<b>UCenter info:</b> $message<br />";
160             }
161             if($sql) {
162                 $s .= '<b>SQL:</b>'.htmlspecialchars($sql).'<br />';
163             }
164             $s .= '<b>Error:</b>'.$error.'<br />';
165             $s .= '<b>Errno:</b>'.$errorno.'<br />';
166             $s = str_replace(UC_DBTABLEPRE, '[Table]', $s);
167             exit($s);
168         }
169     }
170 }
171 
172 ?>
View Code

 

本文转自:http://www.lvtao.net/dev/sphinx-discuzx-search.html

 

posted @ 2014-07-26 16:52  小孟文摘  阅读(1421)  评论(0编辑  收藏  举报