php字典分词,广告违禁词模块

2023年2月23日13:37:54

php版本8.0 laravel8

https://github.com/lizhichao/VicWord

composer require lizhichao/word

数据库存储违禁词

// 生成字典
public static function generateDict()
    {
        $path = self::getPath();
        if (!file_exists($path)) {
            file_put_contents($path, '');
        } else {
            unlink($path);
            file_put_contents($path, '');
        }
        $dict = new VicDict($path);

        $ad = AdDict::where('is_delete', GlobalCode::NORMAL)->get(['name']);
        if ($ad->count() > 0) {
            foreach ($ad as $v) {
                if (!empty($v->name)) {
                    $dict->add($v->name, 'n');
                }
            }
            //保存词库
            $dict->save();
        }
    }
	
	// 分词匹配字典
    public static function mateWord(string $word = '')
    {
        if (empty($word)) {
            throw new Exception('待匹配文字不能为空');
        }

        $path = self::getPath();
        if (!file_exists($path)) {
            self::generateDict();
        }
        $fc = new VicWord($path);

        $data = $fc->getWord($word);

        if (!empty($data)) {
            $mtch = [];
            foreach ($data as $k => $v) {
                if ($v['3'] == 1) {
                    $mtch[] = $v['0'];
                }
            }
            $newArr = [];
            foreach ($mtch as $value) {
                $newArr[] = sprintf("<font color='red'>%s</font>", $value);
            }

            $replaceData = str_replace($mtch, $newArr, $word);

            return $replaceData;
        } else {
            return $word;
        }
    }

posted on 2023-02-24 16:48  zh7314  阅读(69)  评论(0编辑  收藏  举报