php实现关键词过滤
在php中,可以通过trie_filter扩展实现关键词的过滤,具体操作如下
1.安装libdatrie的依赖库 libiconv
wget https://ftp.gnu.org/pub/gnu/libiconv/libiconv-1.14.tar.gz tar zxvf libiconv-1.14.tar.gz cd libiconv-1.14 ./configure make make install
2. 安装:libdatrie(需要最低版本为0.2.4)
wget ftp://linux.thai.net/pub/ThaiLinux/software/libthai/libdatrie-0.2.4.tar.gz tar zxf libdatrie-0.2.4.tar.gz cd libdatrie-0.2.4 ./configure --prefix=/usr/local make make install
编译出现错误 trietool.c:125: undefined reference to `libiconv'
解决办法为:./configure LDFLAGS=-L/usr/local/lib LIBS=-liconv
3.安装trie_filter 扩展
下载源码包 https://github.com/wulijun/php-ext-trie-filter 在这里下载源码包
phpize ./configure --with-php-config=/usr/local/bin/php-config make make install
注意:如果是php7的版本扩展源码请移步至https://github.com/zzjin/php-ext-trie-filter/tree/php7
4.修改 php.ini 文件,添加 trie_filter 扩展:extension=trie_filter.so,重启PHP。
如何在项目中使用关键词过滤
1、做一个后台来录入关键词,将文件保存在服务器,并且生成相对应的tree文件
public function keywordsAction(){ $file_path = APP_PATH."public/filter/"; if($this->request->isPost()){ $keywords = $this->request->get('keywords','trim'); if(file_put_contents($file_path.'keywords.txt',$keywords)){ $handle = fopen($file_path.'keywords.txt', 'r'); // 生成空的trie-tree-filter $resTrie = trie_filter_new(); while(! feof($handle)) { $item = trim(fgets($handle)); if (empty($item)) { continue; } // 把敏感词逐个加入trie-tree trie_filter_store($resTrie, $item); } // 生成trie-tree文件 $blackword_tree = $file_path.'blackword.tree'; trie_filter_save($resTrie, $blackword_tree); return $this->responseJson(); } return $this->responseJson(300,'操作失败'); }else{ $type = $this->request->get('type','trim','add'); $keywords = ''; if($type == 'update'){ $keywords = file_get_contents($file_path.'keywords.txt'); } $this->view->setVar('keywords',$keywords)->pick('Index/keywords'); } }
<?php class FilterHelper { // trie-tree对象 private static $_resTrie = null; // 字典树的更新时间 private static $_mtime = null; /** * 防止初始化 */ private function __construct() {} /** * 防止克隆对象 */ private function __clone() {} /** * 提供trie-tree对象 * * @param $tree_file 字典树文件路径 * @param $new_mtime 当前调用时字典树的更新时间 * @return null */ static public function getResTrie($tree_file, $new_mtime) { if (is_null(self::$_mtime)) { self::$_mtime = $new_mtime; } if (($new_mtime != self::$_mtime) || is_null(self::$_resTrie)) { self::$_resTrie = trie_filter_load($tree_file); self::$_mtime = $new_mtime; } return self::$_resTrie; } /** * 过滤替换关键词 * @param $content 原字符串 * @param string $replace_str 替换后的字符 * @return string */ static public function filter($content,$replace_str = "*"){ $arrRet = self::isHasKeywords($content); $badwords = []; foreach($arrRet as $ret){ $badwords[] = substr($content, $ret[0], $ret[1]); // $key = substr($content, $ret[0], $ret[1]); // $content = str_replace($key,str_repeat($replace_str,mb_strlen($key)),$content); } return str_replace($badwords,$replace_str,$content); } /** * 是否有过滤词 * @param $content * @return array */ static public function isHasKeywords($content){ $arrRet = trie_filter_search_all(self::$_resTrie, $content); return $arrRet; } }
2、在项目相应位置调用关键词过滤替换即可
public function testAction(){ $content = '你果真是个贱人啊'; $tree_file = APP_PATH."public/filter/blackword.tree"; clearstatcache(); // 获取请求时,字典树文件的修改时间 $new_mtime = filemtime($tree_file); $resTrie = \FilterHelper::getResTrie($tree_file,$new_mtime); echo \FilterHelper::filter($content); }
以上就是使用php扩展实现关键词过滤