java 敏感字过滤
1 import java.util.ArrayList; 2 import java.util.Arrays; 3 import java.util.HashMap; 4 import java.util.Iterator; 5 import java.util.List; 6 import java.util.Map; 7 import java.util.Map.Entry; 8 /** 9 * 过滤敏感词,并把敏感词替换成* 10 * 11 */ 12 public class SensitiveWordUtils { 13 14 //敏感词库 15 static final String keysContent = "@要过滤的字"; 49 static String[] keys = null; 50 51 static ArrayList<String> first = new ArrayList<String>(); 52 static String[] sortFirst; 53 static char[] charFirst; 54 static HashMap<String, ArrayList<String>> map = new HashMap<String, ArrayList<String>>(); 55 static HashMap<String, String[]> sortMap = new HashMap<String, String[]>(); 56 static HashMap<String, char[]> charMap = new HashMap<String, char[]>(); 57 58 static ArrayList<String> temp; 59 static String key, value; 60 int length; 61 62 63 /* 64 * 静态代码块只会被执行一次 用来注册敏感词 65 */ 66 static { 67 keys = keysContent.split("@"); 68 } 69 70 /** 71 * 带参数的构造函数 72 * 73 * @param keys 74 * 敏感词 75 * @param tContent 76 * 需要过滤的内容 77 */ 78 public SensitiveWordUtils(String tContent) { 79 for (String k : keys) { 80 if (!first.contains(k.substring(0, 1))) { 81 first.add(k.substring(0, 1)); 82 } 83 length = k.length(); 84 for (int i = 1; i < length; i++) { 85 key = k.substring(0, i); 86 value = k.substring(i, i + 1); 87 if (i == 1 && !first.contains(key)) { 88 first.add(key); 89 } 90 91 // 有,添加 92 if (map.containsKey(key)) { 93 if (!map.get(key).contains(value)) { 94 map.get(key).add(value); 95 } 96 } 97 // 没有添加 98 else { 99 temp = new ArrayList<String>(); 100 temp.add(value); 101 map.put(key, temp); 102 } 103 } 104 } 105 sortFirst = first.toArray(new String[first.size()]); 106 Arrays.sort(sortFirst); // 排序 107 108 charFirst = new char[first.size()]; 109 for (int i = 0; i < charFirst.length; i++) { 110 charFirst[i] = first.get(i).charAt(0); 111 } 112 Arrays.sort(charFirst); // 排序 113 114 String[] sortValue; 115 ArrayList<String> v; 116 Map.Entry<String, ArrayList<String>> entry; 117 Iterator<Entry<String, ArrayList<String>>> iter = map.entrySet() 118 .iterator(); 119 while (iter.hasNext()) { 120 entry = (Map.Entry<String, ArrayList<String>>) iter.next(); 121 v = (ArrayList<String>) entry.getValue(); 122 sortValue = v.toArray(new String[v.size()]); 123 Arrays.sort(sortValue); // 排序 124 sortMap.put(entry.getKey(), sortValue); 125 } 126 127 char[] charValue; 128 iter = map.entrySet().iterator(); 129 while (iter.hasNext()) { 130 entry = (Map.Entry<String, ArrayList<String>>) iter.next(); 131 v = (ArrayList<String>) entry.getValue(); 132 charValue = new char[v.size()]; 133 for (int i = 0; i < charValue.length; i++) { 134 charValue[i] = v.get(i).charAt(0); 135 } 136 Arrays.sort(charValue); // 排序 137 charMap.put(entry.getKey(), charValue); 138 } 139 } 140 /** 141 * 把敏感词替换成* 142 * 143 * @param content 144 * 需要过滤的内容 145 * @return 过滤完后的符合要求的内容 146 */ 147 public String replace(String content) { 148 String r = null, f, c = content; 149 String replacedword = content; 150 char g; 151 char[] temps; 152 int length = c.length(); 153 for (int i = 0; i < length - 1; i++) { 154 g = c.charAt(i); 155 // 二分查找 156 if (Arrays.binarySearch(charFirst, g) > -1) { 157 tag : for (int j = i + 1; j < length; j++) { 158 f = c.substring(i, j); 159 g = c.charAt(j); 160 temps = charMap.get(f); 161 if (temps == null) { // 找到了 162 //System.out.println("ok"); 163 r = f; 164 String str = ""; 165 for (int m = 1; m <= r.length(); m++) { 166 str = str + "*"; 167 } 168 replacedword = c.replace(r, str); 169 c = replacedword; 170 break tag; 171 } 172 // 二分查找 173 if (Arrays.binarySearch(temps, g) > -1) { 174 if (j == length - 1) { 175 // print("find!"); 176 //System.out.println("find!"); 177 r = c.substring(i, j + 1); 178 String str = ""; 179 for (int m = 1; m <= r.length(); m++) { 180 str = str + "*"; 181 } 182 replacedword = c.replace(r, str); 183 c = replacedword; 184 break tag; 185 } 186 } else { // 没有找到了 187 break; 188 } 189 } 190 } 191 } 192 return replacedword; 193 } 194 }
1 import java.util.ArrayList; 2 import java.util.List; 3 4 5 public class TestMGC { 6 7 public static void main(String[] args) { 8 //被过滤内容 9 String contentStr = "法律严禁传播色情"; 10 11 SensitiveWordUtils swu = new SensitiveWordUtils(contentStr); 12 System.out.println(swu.replace(contentStr)); 13 } 14 }
原文地址: http://www.oschina.net/code/snippent_813213_14355