[Java SE]Unicode解码

文由

ASCII其他非Unicode字符Unicode混合的"脏的、不规范的"编码文本转为正常文本。

源码

unicodetoString(String unicodeText)

public class StringUtil {
    /**
     *  解码
     */
/*    public static String unicodetoString(String unicode) {
        if (unicode == null || "".equals(unicode)) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        int i = -1;
        int pos = 0;
        while ((i = unicode.indexOf("\\u", pos)) != -1) {
            sb.append(unicode.substring(pos, i));
            if (i + 5 < unicode.length()) {
                pos = i + 6;
                sb.append((char) Integer.parseInt(unicode.substring(i + 2, i + 6), 16));
            }
        }
        return sb.toString();
    }*/

    /**
     * 解码
     *   编码字符串 --> 文本字符串
     *  支持对 ASCII与UNICODE混合编码的(脏文本)字符串解码
     *      Eg :  "\"2ABRT3425\\u884C\\u653F\\u590D\\u8BAE\\u8868436FDGDSD\""  -->  2ABRT3425行政复议表
     * @param unicode
     * @return
     */
    public static String unicodetoString(String unicode){
        if (unicode == null || "".equals(unicode)) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        for(int pos=0;pos<unicode.length();){
            //"\"2ABRT3425\\u884C\\u653F\\u590D\\u8BAE\\u8868436FDGDSD\"";
            //System.out.println("pos:"+unicode.substring(pos,pos+1)+" - "+pos);
            //System.out.println("index:"+unicode.indexOf("\\u", pos)+"\n");
            if(unicode.indexOf("\\u", pos)-pos == 0){//unicode编码 Eg: \\2435
                //System.out.println("pos2:"+unicode.substring(pos,pos+6));
                if (pos + 6 <= unicode.length()){
                    Character ch = (char) Integer.parseInt(unicode.substring(pos + 2, pos + 6), 16);
                    //System.out.println("char:"+ch);
                    sb.append(ch);
                    pos+=6;
                } else {// \\u
                    sb.append(unicode.substring(pos,pos+2));
                    pos+=2;
                }
            } else {//非unicode编码
                sb.append(unicode.substring(pos,pos+1));
                pos+=1;
            }
        }
        return sb.toString();
    }
}

Test

String s=  "\"2ABRT3425\\u884C\\u653F\\u590D\\u8BAE436FDGDSD\"";
System.out.println("Unicode:"+s);
String s1 = StringUtil.unicodetoString(s);
System.out.println("解码:"+s1);

[result]

Unicode:"2ABRT3425\u884C\u653F\u590D\u8BAE436FDGDSD"
解码:"2ABRT3425行政复议436FDGDSD"
posted @ 2021-01-21 11:52  千千寰宇  阅读(846)  评论(0编辑  收藏  举报