java敏感词检测 简单实现
模仿网上的dfa算法,感觉自己写的处理方法效率不行,记录一下,总归是自己写的
下面是实体类
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | package com.htht.business.utils; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * @author linjunwei * @version 2024/12/6 9:14 */ public class SentiveUtils { public static void main(String[] args) { List<String> list = new ArrayList<>(); list.add( "死全家" ); list.add( "死全家了" ); SentiveObj sentiveObj = buildMap(list); System.out.println(sentiveObj.toString()); String s = scanSentiveKey( "死光光,死全了" , sentiveObj); System.out.println(s); } public static SentiveObj buildMap(List<String> sentiveList) { //手动创建根节点 SentiveObj result = new SentiveObj(); result.setSentiveKey( "" ); result.setBeforeSentiveKey( "" ); result.setEndFlag( "1" ); result.setChildMap( new HashMap<>()); for (String s : sentiveList) { buildMap(result, s); } return result; } /** * 根据传入的字符 在map中组成敏感词树 * @param sentiveObj * @param key */ public static void buildMap(SentiveObj sentiveObj, String key) { SentiveObj sentiveObj1 = sentiveObj; for ( int i = 0 ; i < key.length(); i++) { String keyChar = String.valueOf(key.charAt(i)); if (!sentiveObj1.getChildMap().containsKey(keyChar)) { Map<String, SentiveObj> childMap = sentiveObj1.getChildMap(); SentiveObj sentiveObj2 = new SentiveObj(); sentiveObj2.setChildMap( new HashMap<>()); sentiveObj2.setSentiveKey(keyChar); sentiveObj2.setBeforeSentiveKey(key.substring( 0 , i + 1 )); childMap.put(keyChar, sentiveObj2); sentiveObj1 = sentiveObj2; } else { sentiveObj1 = sentiveObj1.getChildMap().get(keyChar); } if (i==key.length()- 1 ){ sentiveObj1.setEndFlag( "0" ); } } } /** * 扫描句子中的敏感词 * @param jvzi * @param sentiveObj * @return */ public static String scanSentiveKey(String jvzi,SentiveObj sentiveObj){ String result = null ; for ( int i = 0 ; i < jvzi.length(); i++) { String keyChar = String.valueOf(jvzi.charAt(i)); if (sentiveObj.getChildMap().containsKey(keyChar)) { //匹配到敏感词开头了,进行循环匹配是否完整敏感词 result = matchSentiveKey(sentiveObj, jvzi, i); } } return result; // 如果没有找到敏感词,返回 null } /** * 匹配句子中的敏感词 * @param sentiveObj * @param s * @param i * @return */ public static String matchSentiveKey(SentiveObj sentiveObj,String s, int i){ for (; i < s.length(); i++) { System.out.println(s.charAt(i)); System.out.println(sentiveObj.getChildMap()); System.out.println(sentiveObj.getChildMap().get( "习" )); Map<String,SentiveObj> map = sentiveObj.getChildMap(); sentiveObj = map.get(String.valueOf(s.charAt(i))); if (sentiveObj == null ){ return null ; } else if (sentiveObj.getEndFlag().equals( "0" )){ return "敏感词:" +sentiveObj.getBeforeSentiveKey(); } } return null ; } } |
测试的主类
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | package com.htht.business.utils; import lombok.Data; import java.util.Map; /** * @author linjunwei * @version 2024/12/6 9:17 */ @Data public class SentiveObj { /** * 当前敏感词的key */ private String sentiveKey; /** * 敏感词前缀 */ private String beforeSentiveKey; /** * 敏感词字map */ private Map<String,SentiveObj> childMap; /** * 是否敏感词最后一个字 0:是 1:否 */ private String endFlag; public SentiveObj(){ this .endFlag = "1" ; } } |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律