过滤广告(只能发布 [a-zA-z0-9及汉字,;?.]) ,排除其他特殊符号

 
/**
* 过滤广告(只能发布 [a-zA-z0-9及汉字,;?.]) ,排除其他特殊符号
* Created by 1 on 2015/8/19.
*/
public class FilterAD {
 
 
    public static void main(String[] args) throws UnsupportedEncodingException {
 
        String str = "afas141541d1221fs三df大法师⒉一二三一四①⑶112·2312,;?.,;?。";
        System.out.println(clearNotChinese(str));
        System.out.println(replaceill(clearNotChinese(str)));
 
        str = "非农白银裙,55,193,05在/线/指/导、验证【730】";
        System.out.println(clearNotChinese(str));
        System.out.println(replaceill(clearNotChinese(str)));
    }
 
 
    public static String clearNotChinese(String buff) {
        return buff.replaceAll("[^a-zA-Z0-9\u4E00-\u9FA5\u002C-\u002C\u003B-\u003B\u002E-\u002E\u003F-\u003F" +
                "\uFF0C-\uFF0C\uFF1B-\uFF1B\uFF1F-\uFF1F\u3002-\u3002]", "");
    }
 
 
    //一二三四五六七八九十
    //4E00 4E8C 4E09 56DB 4E94 516D 4E03 516B 4E5D 5341
    //59F9 8D30 53C1 8086 4F0D 9646 67D2 634C 7396
    //4E00 4E03 4E09 4E5D 4E8C 4E94 4F0D 516B 516D 5341
    //53C1 56DB 59F9 634C 67D2 7396 8086 8D30 9646
    //出现数值行字符替换掉
    public static String replaceill(String buff) {
        String str = buff.replaceAll("[^a-zA-Z\u4E01-\u4E02\u4E04-\u4E08\u4E0A-\u4E5C\u4E5E-\u4E8B" +
                "\u4E8D-\u4E93\u4E95-\u4F0C\u4F0E-\u516A\u516C-\u5340\u5342-\u53C0" +
                "\u53C2-\u56DA\u56DC-\u59F8\u59FA-\u634B\u634D-\u67D1\u67D3-\u7395\u7397-\u8085" +
                "\u8087-\u8D2F\u8D31-\u9645\u9647-\u9FA5\u002C-\u002C\u003B-\u003B\u002E-\u002E\u003F-\u003F" +
                "\uFF0C-\uFF0C\uFF1B-\uFF1B\uFF1F-\uFF1F\u3002-\u3002]", "#");
        str = str.replaceAll("[^#]",",");
 
        System.out.println(str);
        String nums[] = str.split(",");
 
        int numcount = 0;//数字统计
        int specount = 0;//间隔数
        for (String num:nums){
            if (num.length() > 5){
                System.out.println("存在qq广告");
            }else{
                if (num.length()>0){
                    if (specount > 2){
                        numcount = 0;
                        specount = 0;
                    }
                    numcount+=num.length();
                    if (numcount > 5){
                        System.out.println("存在qq广告");
                        specount = 0;
                    }
                }else{
                        specount++;
                }
            }
        }
 
        return str;
    }
 
}


来自为知笔记(Wiz)


posted on 2015-08-20 10:38  知识铺  阅读(386)  评论(0编辑  收藏  举报