几个多字节和UNICODE及UTF-8之间相互转化的函数
做vc++开发免不了多字节UNICODE、UTF-8转来转去,下面贴出我写的几个字符转化函数
1、 TCHAR 和CHAR转化, TCHAR 对应UNICODE和多字节下分别是WCHAR 和CHAR 函数如下:
需要注意*dest是new出来的需要释放
//char 字符串转化为tchar字符串 void C2T(TCHAR** dest, const char* src) { #ifdef _UNICODE if (src == NULL) { return ; } size_t alen = strlen(src) + 1; size_t ulen = (size_t)MultiByteToWideChar(CP_ACP, 0, src,alen,NULL, 0 )+1; *dest = new WCHAR[ulen]; ::MultiByteToWideChar(CP_ACP, 0, src, alen, *dest, ulen); #else //多字节TCHAR就是 char int len = strlen(src)+1; *dest = new char[len]; strcpy(*dest, src); #endif }
2、TCHAR转化为多字节,同样要注意*dest指针释放
void T2C(char** dest, const TCHAR* src) { if(src == NULL) return ; #ifdef _UNICODE size_t len = WideCharToMultiByte(CP_ACP, 0, src, -1, NULL, 0 , NULL, NULL); if (len == 0) { return; } *dest = new char[len]; WideCharToMultiByte( CP_ACP, 0, src, -1, *dest, len, NULL, NULL ); #else int len = _tcslen(src) + 1; *dest = new TCHAR[len]; strcpy(*dest, src); #endif }
3、下面两个函数跟上面的有点重复但是后面会用到,也贴出来
//多字节转化为宽字节 void C2W(WCHAR** dest, const char* src) { if (src == NULL) { return ; } size_t alen = strlen(src) + 1; size_t ulen = (size_t)MultiByteToWideChar(CP_ACP, 0, src,alen,NULL, 0 )+1; *dest = new WCHAR[ulen]; ::MultiByteToWideChar(CP_ACP, 0, src, alen, *dest, ulen); } //宽字节转化为多字节 void W2C(char** dest, const WCHAR *src) { if(src == NULL) return ; size_t len = WideCharToMultiByte(CP_ACP, 0, src, -1, NULL, 0 , NULL, NULL); if (len == 0) { return; } *dest = new char[len]; WideCharToMultiByte( CP_ACP, 0, src, -1, *dest, len, NULL, NULL ); }
4、UTF-8和多字节及宽字节之间的转化
//UNICODE可以直接转化为UTF-8 void UnicodeToUtf8(char** dest , const WCHAR* src) { ASSERT(dest!= NULL || src != NULL); int len = -1; len = WideCharToMultiByte(CP_UTF8, 0, src, -1, 0, 0, 0, 0)+1; *dest = new char[len+1]; ::WideCharToMultiByte(CP_UTF8, 0, src, -1,*dest, len, 0, 0); } //多字节要先转化为宽字节在转化为UTF-8 void AnsiToUtf8(char** dest, const char* src) { ASSERT(dest!= NULL || src != NULL); WCHAR* pwszStr = NULL; C2W(&pwszStr, src); UnicodeToUtf8(dest, pwszStr); SAFE_ARRYDELETE(pwszStr); }
UTF-8转化为多字节或者UNICODE:
void Utf8ToAnsi(char** dest, const char* src) { ASSERT(dest!= NULL || src != NULL); WCHAR* str = NULL; Utf8ToUnicode(&str, src); W2C(dest, str); SAFE_ARRYDELETE(str); } void Utf8ToUnicode(WCHAR** dest,const char* src) { ASSERT(dest!= NULL || src != NULL); int unicodeLen = ::MultiByteToWideChar( CP_UTF8, 0, src, -1, NULL, 0 ) + 1; *dest = new WCHAR[unicodeLen]; //memset(*dest, 0x0, (unicodeLen + 1)*sizeof(WCHAR)); MultiByteToWideChar(CP_UTF8, 0, src, -1, *dest, unicodeLen); }
SAFE_ARRYDELETE是一个内存是释放宏如下
#define SAFE_ARRYDELETE(x) if(NULL!=x){delete[] x;x = NULL;} #define SAFE_DELETE(x) if(NULL!=x){delete x; x =NULL;}
有了以上函数基本上不同编码间可以随意转,大家也还可以对以上这些函数尽心扩展,需要注意的是上面这些函数中dest指向的指针需要释放。