utf8 to unicode

json utf8 to unicode (stm32 发烧群友提供),仅留做参考,不保证其准确及可用。

u32 UTF8_to_Unicode(char *dst, char *src)  //json utf8 to unicode
{
    u32 i = 0, unicode = 0, ii, iii;
    int codeLen = 0;
    
    while ( *src )
    {
        //1. UTF-8 ---> Unicode
        if(0 == (src[0] & 0x80))
        {
            // 单字节
            codeLen = 1;
            unicode = src[0];
        }
        else if(0xC0 == (src[0] & 0xE0) && 0x80 == (src[1] & 0xC0))
        {// 双字节
            codeLen = 2;
            unicode = (u32)((((u32)src[0] & 0x001F) << 6) | ((u32)src[1] & 0x003F));
        }
        else if(0xE0 == (src[0] & 0xF0) && 0x80 == (src[1] & 0xC0) && 0x80 == (src[2] & 0xC0))
        {// 三字节
            codeLen = 3;
            ii = (((u32)src[0] & 0x000F) << 12);
            iii = (((u32)src[1] & 0x003F) << 6);
            unicode = ii|iii|((u32)src[2] & 0x003F);
            unicode = (u32)((((u32)src[0] & 0x000F) << 12) | (((u32)src[1] & 0x003F) << 6) | ((u32)src[2] & 0x003F));
        }
        else if(0xF0 == (src[0] & 0xF0) && 0x80 == (src[1] & 0xC0) && 0x80 == (src[2] & 0xC0) && 0x80 == (src[3] & 0xC0))
        {// 四字节
            codeLen = 4;
            unicode = (((int)(src[0] & 0x07)) << 18) | (((int)(src[1] & 0x3F)) << 12) | (((int)(src[2] & 0x3F)) << 6) | (src[3] & 0x3F);
        }
        else
        {
            break;
        }
        src += codeLen;
        if (unicode < 0x80)
        {
            if (i == 0 && unicode == 0x20)
            {
                continue;
            }
        }
        i += 2;
        *dst++ = (u8)((unicode&0xff));
        *dst++ = (u8)(((unicode>>8)&0xff));
    } // end while
    *dst = 0;
    
    return i;
}

 

posted @ 2020-01-13 15:44  酒醉的Tiger  阅读(459)  评论(0编辑  收藏  举报