UTF8编码转Unicode编码(C语言)
/**
-
功能:UTF8编码转Unicode编码
-
参数:dst 目标字符串,src 源字符串
*/
WORD UTF8_to_Unicode(BYTE *dst, BYTE *src)
{
WORD i = 0, unicode = 0, ii, iii;
int codeLen = 0;while ( *src )
{
//1. UTF-8 ---> Unicode
if(0 == (src[0] & 0x80))
{
// 单字节
codeLen = 1;
unicode = src[0];
}
else if(0xC0 == (src[0] & 0xE0) && 0x80 == (src[1] & 0xC0))
{// 双字节
codeLen = 2;
unicode = (WORD)((((WORD)src[0] & 0x001F) << 6) | ((WORD)src[1] & 0x003F));
}
else if(0xE0 == (src[0] & 0xF0) && 0x80 == (src[1] & 0xC0) && 0x80 == (src[2] & 0xC0))
{// 三字节
codeLen = 3;
ii = (((WORD)src[0] & 0x000F) << 12);
iii = (((WORD)src[1] & 0x003F) << 6);
unicode = ii|iii|((WORD)src[2] & 0x003F);
unicode = (WORD)((((WORD)src[0] & 0x000F) << 12) | (((WORD)src[1] & 0x003F) << 6) | ((WORD)src[2] & 0x003F));
}
else if(0xF0 == (src[0] & 0xF0) && 0x80 == (src[1] & 0xC0) && 0x80 == (src[2] & 0xC0) && 0x80 == (src[3] & 0xC0))
{// 四字节
codeLen = 4;
unicode = (((int)(src[0] & 0x07)) << 18) | (((int)(src[1] & 0x3F)) << 12) | (((int)(src[2] & 0x3F)) << 6) | (src[3] & 0x3F);
}
else
{
INFOBOX_Show("超出4字节的Unicode码", 100);
break;
}
src += codeLen;
if (unicode < 0x80)
{
if (i == 0 && unicode == 0x20)
{
continue;
}
}
i += 2;
*dst++ = (BYTE)(unicode&0xff);
*dst++ = (BYTE)((unicode>>8)&0xff);
} // end while
*dst = 0;return i;
}