discuz sphinx全文检索搜索引擎方案
基于discuz的索引配置文件,这个配置文件比较灵活,可以根据不同的需求来配置
# # linuxTone full index search configure file # source lt_posts { type = mysql sql_host = 127.0.0.1 sql_user = root sql_pass = sql_db = lt_bbs sql_port = 3306 sql_query_pre = SET NAMES utf8 sql_query = SELECT pid,tid,fid,dateline,subject,message,author FROM cdb_posts where first=1 #此处是基于posts表来做索引的,这样的目的是可以同时检索到subject,message,author 三个字段的值 sql_attr_uint = fid sql_attr_timestamp = dateline sql_query_info = SELECT * FROM cdb_posts WHERE pid=$id } index lt_posts { source = lt_posts path = /data/sphinx/data/lt_posts docinfo = extern mlock = 0 morphology = none min_word_len = 2 html_strip = 1 charset_dictpath = /usr/local/mmseg-3.2.13/etc/ charset_type = zh_cn.utf-8 ngram_len = 0 } ########## 增量索引 ################## source delta { type = mysql sql_host = 127.0.0.1 sql_user = root sql_pass = sql_db = lt_bbs sql_port = 3306 # optional, default is 3306 sql_query_pre = SET NAMES utf8 sql_query = SELECT pid,tid,fid,dateline,subject,message,author FROM cdb_posts where first=1 and dateline > unix_timestamp()-3600*10 #增量索引采用当前时间戳减去一个需要间隔的时间来新建新增的数据索引 sql_attr_uint = fid sql_attr_timestamp = dateline sql_query_info = SELECT * FROM cdb_posts WHERE pid=$id } index delta { source = delta path = /data/sphinx/data/lt_delta docinfo = extern mlock = 0 morphology = none min_word_len = 2 html_strip = 1 charset_dictpath = /usr/local/mmseg-3.2.13/etc/ charset_type = zh_cn.utf-8 ngram_len = 0 } indexer { mem_limit = 32M } searchd { port = 9312 log = /data/sphinx/var/log/searchd.log query_log = /data/sphinx/var/log/query.log read_timeout = 5 max_children = 30 pid_file = /data/sphinx/var/log/searchd.pid max_matches = 10000 seamless_rotate = 1 preopen_indexes = 0 unlink_old = 1 }
sphinx最主要的就是这个配置文件,当然在增量索引部分可以写一个脚本放到crontab里面来定时跑
下面介绍下在discuz中sphinx的php调用部分,sphinx的接口采用PHP的扩展,可以通过pecl或者http://pecl.php.net/package/sphinx来安装
<?php /** *全文搜索服务 */ define('IN_DISCUZ', true); require_once './include/common.inc.php'; $q = isset($_GET['q']) && !empty($_GET['q']) ? $_GET['q'] : ''; $q = str_replace(array('<', '>', ' ', '\'', ','), array('', '', ' ', '', ''), strip_tags($q)); $page = isset($_GET['page']) && intval($_GET['page']) > 0 ? intval($_GET['page']) : 1; $perNum = 20; $offset = ($page - 1) * $perNum; $search = new SphinxClient(); $search -> setServer('127.0.0.1', 9312); $search -> setConnectTimeout(2); $search -> setArrayResult(true); $search -> setMatchMode(SPH_MATCH_ANY); $search -> setRankingMode(SPH_RANK_PROXIMITY_BM25); $search -> setSortMode(SPH_SORT_EXTENDED, '@relevance desc,@weight desc'); $search -> setLimits($offset, $perNum); $search -> setFieldWeights(array('subject' => 2000, 'message' => 0)); $rs = array(); $query_totals = $query_time = 0; if (!empty($q)) { $rs = $search -> Query($q, "*"); $pages = ceil($rs['total'] / $perNum); $query_totals = $rs['total_found']; $query_time = $rs['time']; } $data = $title = $content = array(); if (!empty($rs) && $page <= $pages) { $pids = array(); foreach($rs['matches'] as $v) { $pids[] = $v['id']; } $pid = implode(',', $pids); $sql = "select pid,tid,author,authorid,subject,message,dateline from cdb_posts where pid IN($pid) and status ='0' and invisible='0'"; $query = $db -> query($sql); while ($row = $db -> fetch_array($query)) { $data[] = $row; $title[] = $row['subject']; $content[] = preg_replace('/\[[\/]?(b|img|url|color|s|hr|p|list|i|align|email|u|font|code|hide|table|tr|td|th|attach|list|indent|float).*\]/', '', strip_tags($row['message'])); } // 搜索词高亮 $opts = array(); $opts['before_match'] = '<em>'; $opts['after_match'] = '</em>'; $title = $search -> BuildExcerpts($title, 'lt_posts', $q, $opts); $content = $search -> BuildExcerpts($content, 'lt_posts', $q, $opts); foreach($data as $k => $v) { $data[$k]['subject'] = $title[$k]; $data[$k]['message'] = $content[$k]; } $url = "s.php?q=" . urlencode($q); $multipage = multi($rs['total'], $perNum, $page, $url); } include template("lt_search"); ?>
跑主索引的shell脚本search-index.sh
#!/bin/bash # # The BBS search exec full index # /usr/local/csft-3.2.13/bin/indexer -c /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate lt_posts >> /data/sphinx/var/`date "+%Y-%m-%d-%H"`.log
跑增量索引
/usr/local/csft-3.2.13/bin/indexer -c /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate delta
合并主索引和增量索引
#/usr/local/csft-3.2.13/bin/indexer --config /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate --merge lt_posts delta
1 <?php 2 3 /* 4 [UCenter] (C)2001-2099 Comsenz Inc. 5 This is NOT a freeware, use is subject to license terms 6 7 $Id: db.class.php 1059 2011-03-01 07:25:09Z monkey $ 8 */ 9 10 11 class ucclient_db { 12 var $querynum = 0; 13 var $link; 14 var $histories; 15 16 var $dbhost; 17 var $dbuser; 18 var $dbpw; 19 var $dbcharset; 20 var $pconnect; 21 var $tablepre; 22 var $time; 23 24 var $goneaway = 5; 25 26 function connect($dbhost, $dbuser, $dbpw, $dbname = '', $dbcharset = '', $pconnect = 0, $tablepre='', $time = 0) { 27 $this->dbhost = $dbhost; 28 $this->dbuser = $dbuser; 29 $this->dbpw = $dbpw; 30 $this->dbname = $dbname; 31 $this->dbcharset = $dbcharset; 32 $this->pconnect = $pconnect; 33 $this->tablepre = $tablepre; 34 $this->time = $time; 35 36 if($pconnect) { 37 if(!$this->link = mysql_pconnect($dbhost, $dbuser, $dbpw)) { 38 $this->halt('Can not connect to MySQL server'); 39 } 40 } else { 41 if(!$this->link = mysql_connect($dbhost, $dbuser, $dbpw)) { 42 $this->halt('Can not connect to MySQL server'); 43 } 44 } 45 46 if($this->version() > '4.1') { 47 if($dbcharset) { 48 mysql_query("SET character_set_connection=".$dbcharset.", character_set_results=".$dbcharset.", character_set_client=binary", $this->link); 49 } 50 51 if($this->version() > '5.0.1') { 52 mysql_query("SET sql_mode=''", $this->link); 53 } 54 } 55 56 if($dbname) { 57 mysql_select_db($dbname, $this->link); 58 } 59 60 } 61 62 function fetch_array($query, $result_type = MYSQL_ASSOC) { 63 return mysql_fetch_array($query, $result_type); 64 } 65 66 function result_first($sql) { 67 $query = $this->query($sql); 68 return $this->result($query, 0); 69 } 70 71 function fetch_first($sql) { 72 $query = $this->query($sql); 73 return $this->fetch_array($query); 74 } 75 76 function fetch_all($sql, $id = '') { 77 $arr = array(); 78 $query = $this->query($sql); 79 while($data = $this->fetch_array($query)) { 80 $id ? $arr[$data[$id]] = $data : $arr[] = $data; 81 } 82 return $arr; 83 } 84 85 function cache_gc() { 86 $this->query("DELETE FROM {$this->tablepre}sqlcaches WHERE expiry<$this->time"); 87 } 88 89 function query($sql, $type = '', $cachetime = FALSE) { 90 $func = $type == 'UNBUFFERED' && @function_exists('mysql_unbuffered_query') ? 'mysql_unbuffered_query' : 'mysql_query'; 91 if(!($query = $func($sql, $this->link)) && $type != 'SILENT') { 92 $this->halt('MySQL Query Error', $sql); 93 } 94 $this->querynum++; 95 $this->histories[] = $sql; 96 return $query; 97 } 98 99 function affected_rows() { 100 return mysql_affected_rows($this->link); 101 } 102 103 function error() { 104 return (($this->link) ? mysql_error($this->link) : mysql_error()); 105 } 106 107 function errno() { 108 return intval(($this->link) ? mysql_errno($this->link) : mysql_errno()); 109 } 110 111 function result($query, $row) { 112 $query = @mysql_result($query, $row); 113 return $query; 114 } 115 116 function num_rows($query) { 117 $query = mysql_num_rows($query); 118 return $query; 119 } 120 121 function num_fields($query) { 122 return mysql_num_fields($query); 123 } 124 125 function free_result($query) { 126 return mysql_free_result($query); 127 } 128 129 function insert_id() { 130 return ($id = mysql_insert_id($this->link)) >= 0 ? $id : $this->result($this->query("SELECT last_insert_id()"), 0); 131 } 132 133 function fetch_row($query) { 134 $query = mysql_fetch_row($query); 135 return $query; 136 } 137 138 function fetch_fields($query) { 139 return mysql_fetch_field($query); 140 } 141 142 function version() { 143 return mysql_get_server_info($this->link); 144 } 145 146 function close() { 147 return mysql_close($this->link); 148 } 149 150 function halt($message = '', $sql = '') { 151 $error = mysql_error(); 152 $errorno = mysql_errno(); 153 if($errorno == 2006 && $this->goneaway-- > 0) { 154 $this->connect($this->dbhost, $this->dbuser, $this->dbpw, $this->dbname, $this->dbcharset, $this->pconnect, $this->tablepre, $this->time); 155 $this->query($sql); 156 } else { 157 $s = ''; 158 if($message) { 159 $s = "<b>UCenter info:</b> $message<br />"; 160 } 161 if($sql) { 162 $s .= '<b>SQL:</b>'.htmlspecialchars($sql).'<br />'; 163 } 164 $s .= '<b>Error:</b>'.$error.'<br />'; 165 $s .= '<b>Errno:</b>'.$errorno.'<br />'; 166 $s = str_replace(UC_DBTABLEPRE, '[Table]', $s); 167 exit($s); 168 } 169 } 170 } 171 172 ?>
本文转自:http://www.lvtao.net/dev/sphinx-discuzx-search.html