java敏感词检测 简单实现

模仿网上的dfa算法,感觉自己写的处理方法效率不行,记录一下,总归是自己写的

下面是实体类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package com.htht.business.utils;
 
 
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
 
/**
 * @author linjunwei
 * @version 2024/12/6 9:14
 */
public class SentiveUtils {
 
    public static void main(String[] args) {
        List<String> list = new ArrayList<>();
        list.add("死全家");
        list.add("死全家了");
        SentiveObj sentiveObj = buildMap(list);
        System.out.println(sentiveObj.toString());
 
        String s = scanSentiveKey("死光光,死全了", sentiveObj);
        System.out.println(s);
    }
 
    public static SentiveObj buildMap(List<String> sentiveList) {
        //手动创建根节点
        SentiveObj result = new SentiveObj();
        result.setSentiveKey("");
        result.setBeforeSentiveKey("");
        result.setEndFlag("1");
        result.setChildMap(new HashMap<>());
 
        for (String s : sentiveList) {
            buildMap(result, s);
        }
        return result;
    }
 
    /**
     * 根据传入的字符 在map中组成敏感词树
     * @param sentiveObj
     * @param key
     */
    public static void buildMap(SentiveObj sentiveObj, String key) {
        SentiveObj sentiveObj1 = sentiveObj;
        for (int i = 0; i < key.length(); i++) {
            String keyChar = String.valueOf(key.charAt(i));
            if (!sentiveObj1.getChildMap().containsKey(keyChar)) {
                Map<String, SentiveObj> childMap = sentiveObj1.getChildMap();
                SentiveObj sentiveObj2 = new SentiveObj();
                sentiveObj2.setChildMap(new HashMap<>());
                sentiveObj2.setSentiveKey(keyChar);
                sentiveObj2.setBeforeSentiveKey(key.substring(0, i + 1));
                childMap.put(keyChar, sentiveObj2);
                sentiveObj1 = sentiveObj2;
            }else{
                sentiveObj1 = sentiveObj1.getChildMap().get(keyChar);
            }
            if (i==key.length()-1){
                sentiveObj1.setEndFlag("0");
            }
        }
    }
 
    /**
     * 扫描句子中的敏感词
     * @param jvzi
     * @param sentiveObj
     * @return
     */
    public static String scanSentiveKey(String jvzi,SentiveObj sentiveObj){
 
        String result = null;
        for (int i = 0; i < jvzi.length(); i++) {
            String keyChar = String.valueOf(jvzi.charAt(i));
            if (sentiveObj.getChildMap().containsKey(keyChar)) {
                //匹配到敏感词开头了,进行循环匹配是否完整敏感词
                result = matchSentiveKey(sentiveObj, jvzi, i);
            }
        }
 
        return result; // 如果没有找到敏感词,返回 null
    }
 
    /**
     * 匹配句子中的敏感词
     * @param sentiveObj
     * @param s
     * @param i
     * @return
     */
    public static String matchSentiveKey(SentiveObj sentiveObj,String s,int i){
        for (; i < s.length(); i++) {
            System.out.println(s.charAt(i));
            System.out.println(sentiveObj.getChildMap());
            System.out.println(sentiveObj.getChildMap().get("习"));
            Map<String,SentiveObj> map = sentiveObj.getChildMap();
            sentiveObj = map.get(String.valueOf(s.charAt(i)));
            if (sentiveObj == null){
                return null;
            }else if (sentiveObj.getEndFlag().equals("0")){
                return "敏感词:"+sentiveObj.getBeforeSentiveKey();
            }
        }
        return null;
    }
}

  测试的主类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
package com.htht.business.utils;
 
import lombok.Data;
 
import java.util.Map;
 
/**
 * @author linjunwei
 * @version 2024/12/6 9:17
 */
@Data
public class SentiveObj {
 
    /**
     * 当前敏感词的key
     */
    private String sentiveKey;
 
    /**
     * 敏感词前缀
     */
    private String beforeSentiveKey;
 
    /**
     * 敏感词字map
     */
    private Map<String,SentiveObj> childMap;
 
    /**
     * 是否敏感词最后一个字 0:是 1:否
     */
    private String endFlag;
 
    public SentiveObj(){
        this.endFlag = "1";
    }
}

  

posted @   霸王龙168  阅读(64)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
点击右上角即可分享
微信分享提示