java 敏感字过滤

  1 import java.util.ArrayList;
  2 import java.util.Arrays;
  3 import java.util.HashMap;
  4 import java.util.Iterator;
  5 import java.util.List;
  6 import java.util.Map;
  7 import java.util.Map.Entry;
  8 /**
  9  * 过滤敏感词,并把敏感词替换成*
 10  * 
 11  */
 12 public class SensitiveWordUtils {
 13 
 14     //敏感词库
 15     static final String keysContent = "@要过滤的字";
 49     static String[] keys = null;
 50 
 51     static ArrayList<String> first = new ArrayList<String>();
 52     static String[] sortFirst;
 53     static char[] charFirst;
 54     static HashMap<String, ArrayList<String>> map = new HashMap<String, ArrayList<String>>();
 55     static HashMap<String, String[]> sortMap = new HashMap<String, String[]>();
 56     static HashMap<String, char[]> charMap = new HashMap<String, char[]>();
 57 
 58     static ArrayList<String> temp;
 59     static String key, value;
 60     int length;
 61 
 62 
 63     /*
 64      * 静态代码块只会被执行一次 用来注册敏感词
 65      */
 66     static {
 67         keys = keysContent.split("@");
 68     }
 69 
 70     /**
 71      * 带参数的构造函数
 72      * 
 73      * @param keys
 74      *            敏感词
 75      * @param tContent
 76      *            需要过滤的内容
 77      */
 78     public SensitiveWordUtils(String tContent) {
 79         for (String k : keys) {
 80             if (!first.contains(k.substring(0, 1))) {
 81                 first.add(k.substring(0, 1));
 82             }
 83             length = k.length();
 84             for (int i = 1; i < length; i++) {
 85                 key = k.substring(0, i);
 86                 value = k.substring(i, i + 1);
 87                 if (i == 1 && !first.contains(key)) {
 88                     first.add(key);
 89                 }
 90 
 91                 // 有,添加
 92                 if (map.containsKey(key)) {
 93                     if (!map.get(key).contains(value)) {
 94                         map.get(key).add(value);
 95                     }
 96                 }
 97                 // 没有添加
 98                 else {
 99                     temp = new ArrayList<String>();
100                     temp.add(value);
101                     map.put(key, temp);
102                 }
103             }
104         }
105         sortFirst = first.toArray(new String[first.size()]);
106         Arrays.sort(sortFirst); // 排序
107 
108         charFirst = new char[first.size()];
109         for (int i = 0; i < charFirst.length; i++) {
110             charFirst[i] = first.get(i).charAt(0);
111         }
112         Arrays.sort(charFirst); // 排序
113 
114         String[] sortValue;
115         ArrayList<String> v;
116         Map.Entry<String, ArrayList<String>> entry;
117         Iterator<Entry<String, ArrayList<String>>> iter = map.entrySet()
118         .iterator();
119         while (iter.hasNext()) {
120             entry = (Map.Entry<String, ArrayList<String>>) iter.next();
121             v = (ArrayList<String>) entry.getValue();
122             sortValue = v.toArray(new String[v.size()]);
123             Arrays.sort(sortValue); // 排序
124             sortMap.put(entry.getKey(), sortValue);
125         }
126 
127         char[] charValue;
128         iter = map.entrySet().iterator();
129         while (iter.hasNext()) {
130             entry = (Map.Entry<String, ArrayList<String>>) iter.next();
131             v = (ArrayList<String>) entry.getValue();
132             charValue = new char[v.size()];
133             for (int i = 0; i < charValue.length; i++) {
134                 charValue[i] = v.get(i).charAt(0);
135             }
136             Arrays.sort(charValue); // 排序
137             charMap.put(entry.getKey(), charValue);
138         }
139     }
140     /**
141      * 把敏感词替换成*
142      * 
143      * @param content
144      *            需要过滤的内容
145      * @return 过滤完后的符合要求的内容
146      */
147     public String replace(String content) {
148         String r = null, f, c = content;
149         String replacedword = content;
150         char g;
151         char[] temps;
152         int length = c.length();
153         for (int i = 0; i < length - 1; i++) {
154             g = c.charAt(i);
155             // 二分查找
156             if (Arrays.binarySearch(charFirst, g) > -1) {
157                 tag : for (int j = i + 1; j < length; j++) {
158                     f = c.substring(i, j);
159                     g = c.charAt(j);
160                     temps = charMap.get(f);
161                     if (temps == null) { // 找到了
162                         //System.out.println("ok");
163                         r = f;
164                         String str = "";
165                         for (int m = 1; m <= r.length(); m++) {
166                             str = str + "*";
167                         }
168                         replacedword = c.replace(r, str);
169                         c = replacedword;
170                         break tag;
171                     }
172                     // 二分查找
173                     if (Arrays.binarySearch(temps, g) > -1) {
174                         if (j == length - 1) {
175                             // print("find!");
176                             //System.out.println("find!");
177                             r = c.substring(i, j + 1);
178                             String str = "";
179                             for (int m = 1; m <= r.length(); m++) {
180                                 str = str + "*";
181                             }
182                             replacedword = c.replace(r, str);
183                             c = replacedword;
184                             break tag;
185                         }
186                     } else { // 没有找到了
187                         break;
188                     }
189                 }
190             }
191         }
192         return replacedword;
193     }
194 }
 1 import java.util.ArrayList;
 2 import java.util.List;
 3 
 4 
 5 public class TestMGC {
 6 
 7     public static void main(String[] args) {
 8         //被过滤内容
 9         String contentStr = "法律严禁传播色情";
10         
11         SensitiveWordUtils swu = new SensitiveWordUtils(contentStr);
12         System.out.println(swu.replace(contentStr));
13     }
14 }

原文地址: http://www.oschina.net/code/snippent_813213_14355

posted @ 2012-10-19 11:11  依稀|.мīss.чou  阅读(560)  评论(1编辑  收藏  举报