utf-8 解码

TWO_BYTE_CONSTANT1 = 192;
THREE_BYTE_CONSTANT1 = 224;
TWO_BYTE_MASK1 = 31;
TWO_BYTE_SHIFT1 = 6;
TWO_BYTE_MASK2 = 63;
THREE_BYTE_MASK1 = 15;
THREE_BYTE_SHIFT1 = 12;
THREE_BYTE_SHIFT2 = 6;
THREE_BYTE_MASK3 = 63;
THREE_BYTE_MASK2 = 63;

private String getStringRepresentation(byte[] bytes) throws UnsupportedEncodingException
    {
        // We're computing the string ourselves, because the implementation
        // of "new String(bytes)" doesn't honor the special treatment of
        // the 0 character in JRE 1.6_u11.

        // Allocate the byte array with the computed length.
        char[] chars  = new char[bytes.length];

        // Fill out the array.
        int charIndex = 0;
        int byteIndex = 0;
        while (byteIndex < bytes.length)
        {

            int b = bytes[byteIndex++] & 0xff;

            // Depending on the flag bits in the first byte, the character
            // is represented by a single byte, by two bytes, or by three
            // bytes. We're not checking the redundant flag bits in the
            // second byte and the third byte.
            try
            {
                chars[charIndex++] =
                    (char)(b < TWO_BYTE_CONSTANT1   ? b                                                          :

                           b < THREE_BYTE_CONSTANT1 ? ((b                  & TWO_BYTE_MASK1) << TWO_BYTE_SHIFT1) |
                                                      ((bytes[byteIndex++] & TWO_BYTE_MASK2)                   ) :

                                                      ((b                  & THREE_BYTE_MASK1) << THREE_BYTE_SHIFT1) |
                                                      ((bytes[byteIndex++] & THREE_BYTE_MASK2) << THREE_BYTE_SHIFT2) |
                                                      ((bytes[byteIndex++] & THREE_BYTE_MASK3)                     ));
            }
            catch (ArrayIndexOutOfBoundsException e)
            {
                throw new UnsupportedEncodingException("Missing UTF-8 bytes after initial byte [0x"+Integer.toHexString(b)+"] in string ["+new String(chars, 0, charIndex)+"]");
            }
        }

        return new String(chars, 0, charIndex);
    }
}

 

posted @ 2013-03-07 11:22  道以万计  阅读(911)  评论(0编辑  收藏  举报