php查找/过滤一段文字中的违禁词敏感词
<?php //定义编码 header( 'Content-Type:text/html;charset=utf-8 '); $words=array('我','你','他'); $content="测一测我是不是违禁词"; $banned=generateRegularExpression($words); //检查违禁词 $res_banned=check_words($banned,$content); write_html($content,$res_banned); /** * @describe 数组生成正则表达式 * @param array $words * @return string */ function generateRegularExpression($words) { $regular = implode('|', array_map('preg_quote', $words)); return "/$regular/i"; } /** * @describe 字符串 生成正则表达式 * @param array $words * @return string */ function generateRegularExpressionString($string){ $str_arr[0]=$string; $str_new_arr= array_map('preg_quote', $str_arr); return $str_new_arr[0]; } /** * 检查敏感词 * @param $banned * @param $string * @return bool|string */ function check_words($banned,$string) { $match_banned=array(); //循环查出所有敏感词 $new_banned=strtolower($banned); $i=0; do{ $matches=null; if (!empty($new_banned) && preg_match($new_banned, $string, $matches)) { $isempyt=empty($matches[0]); if(!$isempyt){ $match_banned = array_merge($match_banned, $matches); $matches_str=strtolower(generateRegularExpressionString($matches[0])); $new_banned=str_replace("|".$matches_str."|","|",$new_banned); $new_banned=str_replace("/".$matches_str."|","/",$new_banned); $new_banned=str_replace("|".$matches_str."/","/",$new_banned); } } $i++; if($i>20){ $isempyt=true; break; } }while(count($matches)>0 && !$isempyt); //查出敏感词 if($match_banned){ return $match_banned; } //没有查出敏感词 return array(); } /** * 打印到页面上 * @param $filepath * @param $res_mingan * @param $res_banned */ function write_html($content,$res_banned){ print_r($content); if($res_banned){ print_r(" <font color='red'>违禁词(".count($res_banned)."):</font>".implode('|',$res_banned)); } echo "<br>"; }