VC编码转换
VC编码转换函数
有很多人在为VC中的编码转换犯愁,有的自己根据编码规范去写,经过我查阅资料,根本不需要这么做,我总结了一下实现方式。
CodePages枚举的定义(参看了.net中的定义)
CodePages
enum CodePages
{
IBM037=37,
IBM437=437,
IBM500=500,
ASMO_708=708,
DOS_720=720,
ibm737=737,
ibm775=775,
ibm850=850,
ibm852=852,
IBM855=855,
ibm857=857,
IBM00858=858,
IBM860=860,
ibm861=861,
DOS_862=862,
IBM863=863,
IBM864=864,
IBM865=865,
cp866=866,
ibm869=869,
IBM870=870,
windows_874=874,
cp875=875,
shift_jis=932,
gb2312=936,
ks_c_5601_1987=949,
big5=950,
IBM1026=1026,
IBM01047=1047,
IBM01140=1140,
IBM01141=1141,
IBM01142=1142,
IBM01143=1143,
IBM01144=1144,
IBM01145=1145,
IBM01146=1146,
IBM01147=1147,
IBM01148=1148,
IBM01149=1149,
utf_16=1200,
unicodeFFFE=1201,
windows_1250=1250,
windows_1251=1251,
Windows_1252=1252,
windows_1253=1253,
windows_1254=1254,
windows_1255=1255,
windows_1256=1256,
windows_1257=1257,
windows_1258=1258,
Johab=1361,
macintosh=10000,
x_mac_japanese=10001,
x_mac_chinesetrad=10002,
x_mac_korean=10003,
x_mac_arabic=10004,
x_mac_hebrew=10005,
x_mac_greek=10006,
x_mac_cyrillic=10007,
x_mac_chinesesimp=10008,
x_mac_romanian=10010,
x_mac_ukrainian=10017,
x_mac_thai=10021,
x_mac_ce=10029,
x_mac_icelandic=10079,
x_mac_turkish=10081,
x_mac_croatian=10082,
utf_32=12000,
utf_32BE=12001,
x_Chinese_CNS=20000,
x_cp20001=20001,
x_Chinese_Eten=20002,
x_cp20003=20003,
x_cp20004=20004,
x_cp20005=20005,
x_IA5=20105,
x_IA5_German=20106,
x_IA5_Swedish=20107,
x_IA5_Norwegian=20108,
us_ascii=20127,
x_cp20261=20261,
x_cp20269=20269,
IBM273=20273,
IBM277=20277,
IBM278=20278,
IBM280=20280,
IBM284=20284,
IBM285=20285,
IBM290=20290,
IBM297=20297,
IBM420=20420,
IBM423=20423,
IBM424=20424,
x_EBCDIC_KoreanExtended=20833,
IBM_Thai=20838,
koi8_r=20866,
IBM871=20871,
IBM880=20880,
IBM905=20905,
IBM00924=20924,
EUC_JP=20932,
x_cp20936=20936,
x_cp20949=20949,
cp1025=21025,
koi8_u=21866,
iso_8859_1=28591,
iso_8859_2=28592,
iso_8859_3=28593,
iso_8859_4=28594,
iso_8859_5=28595,
iso_8859_6=28596,
iso_8859_7=28597,
iso_8859_8=28598,
iso_8859_9=28599,
iso_8859_13=28603,
iso_8859_15=28605,
x_Europa=29001,
iso_8859_8_i=38598,
iso_2022_jp=50220,
csISO2022JP=50221,
iso_2022_kr=50225,
x_cp50227=50227,
euc_jp=51932,
EUC_CN=51936,
euc_kr=51949,
hz_gb_2312=52936,
GB18030=54936,
x_iscii_de=57002,
x_iscii_be=57003,
x_iscii_ta=57004,
x_iscii_te=57005,
x_iscii_as=57006,
x_iscii_or=57007,
x_iscii_ka=57008,
x_iscii_ma=57009,
x_iscii_gu=57010,
x_iscii_pa=57011,
utf_7=65000,
utf_8=65001
几个转换函数一看就懂,主要调用WideCharToMultiByte和MultiByteToWideChar函数。enum CodePages
{
IBM037=37,
IBM437=437,
IBM500=500,
ASMO_708=708,
DOS_720=720,
ibm737=737,
ibm775=775,
ibm850=850,
ibm852=852,
IBM855=855,
ibm857=857,
IBM00858=858,
IBM860=860,
ibm861=861,
DOS_862=862,
IBM863=863,
IBM864=864,
IBM865=865,
cp866=866,
ibm869=869,
IBM870=870,
windows_874=874,
cp875=875,
shift_jis=932,
gb2312=936,
ks_c_5601_1987=949,
big5=950,
IBM1026=1026,
IBM01047=1047,
IBM01140=1140,
IBM01141=1141,
IBM01142=1142,
IBM01143=1143,
IBM01144=1144,
IBM01145=1145,
IBM01146=1146,
IBM01147=1147,
IBM01148=1148,
IBM01149=1149,
utf_16=1200,
unicodeFFFE=1201,
windows_1250=1250,
windows_1251=1251,
Windows_1252=1252,
windows_1253=1253,
windows_1254=1254,
windows_1255=1255,
windows_1256=1256,
windows_1257=1257,
windows_1258=1258,
Johab=1361,
macintosh=10000,
x_mac_japanese=10001,
x_mac_chinesetrad=10002,
x_mac_korean=10003,
x_mac_arabic=10004,
x_mac_hebrew=10005,
x_mac_greek=10006,
x_mac_cyrillic=10007,
x_mac_chinesesimp=10008,
x_mac_romanian=10010,
x_mac_ukrainian=10017,
x_mac_thai=10021,
x_mac_ce=10029,
x_mac_icelandic=10079,
x_mac_turkish=10081,
x_mac_croatian=10082,
utf_32=12000,
utf_32BE=12001,
x_Chinese_CNS=20000,
x_cp20001=20001,
x_Chinese_Eten=20002,
x_cp20003=20003,
x_cp20004=20004,
x_cp20005=20005,
x_IA5=20105,
x_IA5_German=20106,
x_IA5_Swedish=20107,
x_IA5_Norwegian=20108,
us_ascii=20127,
x_cp20261=20261,
x_cp20269=20269,
IBM273=20273,
IBM277=20277,
IBM278=20278,
IBM280=20280,
IBM284=20284,
IBM285=20285,
IBM290=20290,
IBM297=20297,
IBM420=20420,
IBM423=20423,
IBM424=20424,
x_EBCDIC_KoreanExtended=20833,
IBM_Thai=20838,
koi8_r=20866,
IBM871=20871,
IBM880=20880,
IBM905=20905,
IBM00924=20924,
EUC_JP=20932,
x_cp20936=20936,
x_cp20949=20949,
cp1025=21025,
koi8_u=21866,
iso_8859_1=28591,
iso_8859_2=28592,
iso_8859_3=28593,
iso_8859_4=28594,
iso_8859_5=28595,
iso_8859_6=28596,
iso_8859_7=28597,
iso_8859_8=28598,
iso_8859_9=28599,
iso_8859_13=28603,
iso_8859_15=28605,
x_Europa=29001,
iso_8859_8_i=38598,
iso_2022_jp=50220,
csISO2022JP=50221,
iso_2022_kr=50225,
x_cp50227=50227,
euc_jp=51932,
EUC_CN=51936,
euc_kr=51949,
hz_gb_2312=52936,
GB18030=54936,
x_iscii_de=57002,
x_iscii_be=57003,
x_iscii_ta=57004,
x_iscii_te=57005,
x_iscii_as=57006,
x_iscii_or=57007,
x_iscii_ka=57008,
x_iscii_ma=57009,
x_iscii_gu=57010,
x_iscii_pa=57011,
utf_7=65000,
utf_8=65001
几个转换函数
void UnicodeToOther(const wstring &src, string& result, CodePages codePage)
{
int n = WideCharToMultiByte(codePage, 0, src.c_str(), -1, 0, 0, 0, 0 );
result.resize(n);
::WideCharToMultiByte(codePage, 0, src.c_str(), -1, (char*)result.c_str(), (int)result.length(), 0, 0 );
}
void OtherToUnicode(const string& src, wstring& result, CodePages codePage)
{
int n = MultiByteToWideChar(codePage, 0, src.c_str(), -1, NULL, 0);
result.resize(n);
::MultiByteToWideChar(codePage, 0, src.c_str(), -1, (TCHAR*)result.c_str(), (int)result.length());
}
void EncodingConvert(const string& src, CodePages srcCodePage, string& result, CodePages resultCodePage)
{
wstring buffer;
OtherToUnicode(src, buffer, srcCodePage);
UnicodeToOther(buffer, result, resultCodePage);
void UnicodeToOther(const wstring &src, string& result, CodePages codePage)
{
int n = WideCharToMultiByte(codePage, 0, src.c_str(), -1, 0, 0, 0, 0 );
result.resize(n);
::WideCharToMultiByte(codePage, 0, src.c_str(), -1, (char*)result.c_str(), (int)result.length(), 0, 0 );
}
void OtherToUnicode(const string& src, wstring& result, CodePages codePage)
{
int n = MultiByteToWideChar(codePage, 0, src.c_str(), -1, NULL, 0);
result.resize(n);
::MultiByteToWideChar(codePage, 0, src.c_str(), -1, (TCHAR*)result.c_str(), (int)result.length());
}
void EncodingConvert(const string& src, CodePages srcCodePage, string& result, CodePages resultCodePage)
{
wstring buffer;
OtherToUnicode(src, buffer, srcCodePage);
UnicodeToOther(buffer, result, resultCodePage);
本文基于署名 2.5 中国大陆许可协议发布,欢迎转载,演绎或用于商业目的,但是必须保留本文的署名小橋流水(包含链接)。如您有任何疑问或者授权方面的协商,请给我发邮件。