Windows下编码转换相关(UTF-8 & UTF-16)
一、ANSI转换为UTF-16
#include <windows.h>
wchar_t* AnsiToUnicodeBegin(const char* str)
{
char* sz = new char[strlen(str)+1];
strcpy(sz, str);
int wLen = MultiByteToWideChar(CP_ACP, 0, sz, -1, NULL, 0);
wchar_t* wBuf = new wchar_t[wLen+1];
MultiByteToWideChar(CP_ACP, 0, sz, -1, wBuf, wLen);
delete[] sz;
return wBuf;
}
void AnsiToUnicodeEnd(wchar_t* wstr_chaged)
{
if(wstr_chaged)
{
delete[] wstr_chaged;
wstr_chaged = NULL;
}
}
二、UTF-8转换为UTF-16
wchar_t* UTF8ToUnicodeBegin(const char* str)
{
char* sz = new char[strlen(str)+1];
strcpy(sz, str);
int wLen = MultiByteToWideChar(CP_UTF8, 0, sz, -1, NULL, 0);
wchar_t* wBuf = new wchar_t[wLen+1];
MultiByteToWideChar(CP_UTF8, 0, sz, -1, wBuf, wLen);
delete[] sz;
return wBuf;
}
void UTF8ToUnicodeEnd(wchar_t* wstr_chaged)
{
if(wstr_chaged)
{
delete[] wstr_chaged;
wstr_chaged = NULL;
}
}
三、UTF-16转换为UTF-8
char* UnicodeToUTF8Begin(const wchar_t* wstr)
{
wchar_t *sz = new wchar_t[wcslen(wstr)+1];
wcscpy(sz,wstr);
int len = WideCharToMultiByte(CP_UTF8,0,sz,-1,NULL,0,0,0);
char* buf = new char[len+1];
WideCharToMultiByte(CP_UTF8,0,sz,-1,buf,len,NULL,NULL);
delete[] sz;
return buf;
}
void UnicodeToUTF8End(const char* str_chaged)
{
if(str_chaged)
{
delete[] str_chaged;
str_chaged = NULL;
}
}
四、读写UTF-8格式文件,转换为UTF-16处理,最后转回UTF-8写入
int main()
{
char lineBuf[256]={0};
char headFlag[3];
FILE *fpr;
fpr = fopen("c:\\test.txt","rb");
fread(headFlag,1,3,fpr);
fgets(lineBuf,256,fpr);
wchar_t* wstr = UTF8ToUnicodeBegin(lineBuf);
char* str = UnicodeToUTF8Begin(wstr);
FILE *fpw;
fpw = fopen("c:\\outputUTF-8.txt","w");
fwrite(headFlag,1,3,fpw);
fwrite(str,1,strlen(str),fpw);
UnicodeToUTF8End(str);
UTF8ToUnicodeEnd(wstr);
fclose(fpr);
fclose(fpw);
system("pause");
return 0;
}
下面是我用C++写的一个转换类。功能是读取UTF8文件到wstring中处理,然后再转换为UTF8编码的字符串,最后写回文件中。
1 #ifndef CHARACTERCONVERT_H_ 2 #define CHARACTERCONVERT_H_ 3 4 #include <string> 5 namespace MyLIB 6 { 7 8 class CharacterConvert 9 { 10 public: 11 static void ConvertUTF8ToUnicode(const std::string& strUtf8,std::wstring& strUtf16); 12 static void ConvertUnicodeToUTF8(const std::wstring& strUtf16,std::string& strUtf8); 13 private: 14 CharacterConvert(void); 15 ~CharacterConvert(void); 16 }; 17 18 } 19 #endif
1 #include "StdAfx.h" 2 #include "CharacterConvert.h" 3 #include <Windows.h> 4 5 6 using namespace MyLIB; 7 8 void CharacterConvert::ConvertUTF8ToUnicode(const std::string& strUtf8,std::wstring& strUtf16) 9 { 10 int wLen = MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, NULL, 0); 11 wchar_t* wBuf = new wchar_t[wLen+1]; 12 if(wBuf==NULL) 13 { 14 return; 15 } 16 MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, wBuf, wLen); 17 strUtf16.assign(wBuf); 18 if(wBuf!=NULL) 19 { 20 delete[] wBuf; 21 wBuf = NULL; 22 } 23 } 24 25 void CharacterConvert::ConvertUnicodeToUTF8(const std::wstring& strUtf16,std::string& strUtf8) 26 { 27 int len = WideCharToMultiByte(CP_UTF8,0,strUtf16.c_str(),-1,NULL,0,0,0); 28 char *buf = new char[len+1]; 29 if(buf==NULL) 30 { 31 return; 32 } 33 WideCharToMultiByte(CP_UTF8,0,strUtf16.c_str(),-1,buf,len,NULL,NULL); 34 strUtf8.assign(buf); 35 if(buf!=NULL) 36 { 37 delete[] buf; 38 buf=NULL; 39 } 40 }
1 // STLTest.cpp :
2 // 3 4 #include "stdafx.h" 5 #include <iostream> 6 #include <fstream> 7 #include <string> 8 #include <algorithm> 9 #include "CharacterConvert.h" 10 using namespace std; 11 12 13 14 int _tmain(int argc, _TCHAR* argv[]) 15 { 16 string input; 17 wstring output; 18 string utf8; 19 ifstream fin("testUTF8.txt",ios_base::in|ios_base::binary); 20 if(!fin.is_open()) 21 { 22 return -1; 23 } 24 ofstream fout("UTF8Output.txt",ios_base::out|ios_base::binary); 25 if(!fout.is_open()) 26 { 27 return -1; 28 } 29 30 while(getline(fin,input)) 31 { 32 MyLIB::CharacterConvert::ConvertUTF8ToUnicode(input,output); 33 MyLIB::CharacterConvert::ConvertUnicodeToUTF8(output,utf8); 34 fout << utf8 << endl; 35 } 36 37 return 0; 38 }