PHP 搜索分词实现代码
<?php /** * @author: xiaojiang 2014-01-08 * php 建立分词树 * */ class Tree{ public $w = ''; public $subT = array(); public $isEnd = false; public function __construct($w= '' , $isEnd = false){ if(!empty($w)){ $this->w = $w; $this->isEnd = $isEnd; } } public function insert( $str ){ $len = strlen($str); if(!$len) return ; $scope = $this; for( $i = 0; $i< $len; $i++ ){ //判断汉字 $cStr = $str[$i]; if( ord( $cStr ) > 127 ){ $cStr = substr($str, $i, 3); $i += 2; } $scope = $scope->insertNode( $cStr ); } $scope->isEnd = true; } private function &insertNode( $w ){ $t = $this->hasTree( $w ); if( !$t ){ $t = new Tree( $w ); array_push($this->subT, $t ); } return $t; } public function &hasTree($w){ foreach ($this->subT as $t){ if($t->w == $w) return $t; } return false; } } class myStr{ private $str = ''; private $arr = array(); private $len = 0; public function __construct( $str){ $this->str = $str; $len = strlen($str); for ($i = 0; $i < $len; $i++ ){ $cStr = $str[$i]; if(ord($cStr) > 127){ $cStr = substr($str, $i , 3); $i += 2; } array_push($this->arr, $cStr); } $this->len = count($this->arr); } public function getIndex( $idx ){ return $this->arr[$idx]; } public function getLength(){ return $this->len; } } $tIns = new Tree(); $tIns->insert('中华'); $tIns->insert('人民'); $tIns->insert('共和国'); $tIns->insert('baidu'); $strIns = new myStr("cc中华的人民共和国和中国啊啊www.baidua.com"); for ($i = 0; $i < $strIns->getLength(); $i++ ){ $j = $i; $curW = $strIns->getIndex($i); $stIns = $tIns->hasTree( $curW ); if(!$stIns) continue; $sw = ''; while ( $stIns ){ $sw .= $stIns->w; $_isEnd = $stIns->isEnd; $stIns = $stIns->hasTree( $strIns->getIndex( ++$j ) ); if( !$stIns && !$_isEnd) $sw = ''; } if($sw) echo $sw."<br>"; } ?>
输出:
中华
人民
共和国
baidu
生命只有一次。