过滤广告(只能发布 [a-zA-z0-9及汉字,;?.]) ,排除其他特殊符号
/**
* 过滤广告(只能发布 [a-zA-z0-9及汉字,;?.]) ,排除其他特殊符号
* Created by 1 on 2015/8/19.
*/
public class FilterAD {
public static void main(String[] args) throws UnsupportedEncodingException {
String str = "afas141541d1221fs三df大法师⒉一二三一四①⑶112·2312,;?.,;?。";
System.out.println(clearNotChinese(str));
System.out.println(replaceill(clearNotChinese(str)));
str = "非农白银裙,55,193,05在/线/指/导、验证【730】";
System.out.println(clearNotChinese(str));
System.out.println(replaceill(clearNotChinese(str)));
}
public static String clearNotChinese(String buff) {
return buff.replaceAll("[^a-zA-Z0-9\u4E00-\u9FA5\u002C-\u002C\u003B-\u003B\u002E-\u002E\u003F-\u003F" +
"\uFF0C-\uFF0C\uFF1B-\uFF1B\uFF1F-\uFF1F\u3002-\u3002]", "");
}
//一二三四五六七八九十
//4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341
//59F9 8D30 53C1 8086 4F0D 9646 67D2 634C 7396
//4E00 4E03 4E09 4E5D 4E8C 4E94 4F0D 516B 516D 5341
//53C1 56DB 59F9 634C 67D2 7396 8086 8D30 9646
//出现数值行字符替换掉
public static String replaceill(String buff) {
String str = buff.replaceAll("[^a-zA-Z\u4E01-\u4E02\u4E04-\u4E08\u4E0A-\u4E5C\u4E5E-\u4E8B" +
"\u4E8D-\u4E93\u4E95-\u4F0C\u4F0E-\u516A\u516C-\u5340\u5342-\u53C0" +
"\u53C2-\u56DA\u56DC-\u59F8\u59FA-\u634B\u634D-\u67D1\u67D3-\u7395\u7397-\u8085" +
"\u8087-\u8D2F\u8D31-\u9645\u9647-\u9FA5\u002C-\u002C\u003B-\u003B\u002E-\u002E\u003F-\u003F" +
"\uFF0C-\uFF0C\uFF1B-\uFF1B\uFF1F-\uFF1F\u3002-\u3002]", "#");
str = str.replaceAll("[^#]",",");
System.out.println(str);
String nums[] = str.split(",");
int numcount = 0;//数字统计
int specount = 0;//间隔数
for (String num:nums){
if (num.length() > 5){
System.out.println("存在qq广告");
}else{
if (num.length()>0){
if (specount > 2){
numcount = 0;
specount = 0;
}
numcount+=num.length();
if (numcount > 5){
System.out.println("存在qq广告");
specount = 0;
}
}else{
specount++;
}
}
}
return str;
}
}