指定存储文件的编码格式(下)

指定存储文件编码格式共两篇。

此为下篇。

 

对于上篇的内容进行了优化,这样大大提高了代码的通用性和重用性。

有兴趣的可以都下载下来对比参考。

   1 // test__EncodeAFileToEncodeBFile.cpp : 定义控制台应用程序的入口点。
   2 //
   3 
   4 #include "stdafx.h"
   5 #include <windows.h>
   6 #include <string>
   7 #include <iostream>
   8 
   9 
  10 #ifndef IN
  11 #define IN
  12 #endif
  13 
  14 #ifndef OUT
  15 #define OUT
  16 #endif
  17 
  18 #ifndef INOUT
  19 #define INOUT
  20 #endif
  21 
  22 #ifndef OPTION
  23 #define OPTION
  24 #endif
  25 
  26 #define UTF8_SIGN 3
  27 #define UTF16_SIGN 2
  28 #define FILE_HEADER 6
  29 
  30 enum FileEncodeType
  31 {
  32     OTHER = 0,
  33     UTF8,
  34     UTF8_NO_BOM,
  35     UTF16LE,
  36     UTF16LE_NO_BOM,
  37     UTF16BE,
  38     UTF16BE_NO_BOM
  39 };
  40 
  41 
  42 //************************************
  43 // Method:    IsUTF8EncodeText
  44 // FullName:  IsUTF8EncodeText
  45 // Access:    public 
  46 // Returns:   BOOL
  47 // Qualifier:判断输入内容是否时UTF8编码格式(可以判断不带BOM的UTF8编码)
  48 // Parameter: BYTE * lpText:判断是否时UTF8的内容
  49 // Parameter: INT cchText:UTF8的内容长度(按个数)
  50 //************************************
  51 BOOL IsUTF8EncodeText(IN CONST BYTE* lpText, IN INT cchText)
  52 {
  53     //    UTF8判断规则
  54     //  0000 0000 - 0000 007F 0xxxxxxx
  55     //    0000 0080 - 0000 07FF 110xxxxx 10xxxxxx
  56     //    0000 0800 - 0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
  57     //    0001 0000 - 001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  58     //    0020 0000 - 03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  59     //    0400 0000 - 7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
  60 
  61 
  62     int i;
  63     DWORD nBytes = 0;//UFT8可用1-6个字节编码,ASCII用一个字节
  64     BYTE lpByte;
  65     BOOL bAllAscii = TRUE; //如果全部都是ASCII, 说明不是UTF-8
  66     for (i = 0; i < cchText; i++)
  67     {
  68         lpByte = *(lpText + i);
  69         if ((lpByte & 0x80) != 0) // 判断是否ASCII编码,如果不是,说明有可能是UTF-8,ASCII用7位编码,但用一个字节存,最高位标记为0,o0xxxxxxx
  70             bAllAscii = FALSE;
  71         if (nBytes == 0) //如果不是ASCII码,应该是多字节符,计算字节数
  72         {
  73             if (lpByte >= 0x80)
  74             {
  75                 if (lpByte >= 0xFC && lpByte <= 0xFD)
  76                     nBytes = 6;
  77                 else if (lpByte >= 0xF8)
  78                     nBytes = 5;
  79                 else if (lpByte >= 0xF0)
  80                     nBytes = 4;
  81                 else if (lpByte >= 0xE0)
  82                     nBytes = 3;
  83                 else if (lpByte >= 0xC0)
  84                     nBytes = 2;
  85                 else
  86                 {
  87                     return FALSE;
  88                 }
  89                 nBytes--;
  90             }
  91         }
  92         else //多字节符的非首字节,应为 10xxxxxx
  93         {
  94             if ((lpByte & 0xC0) != 0x80)
  95             {
  96                 return FALSE;
  97             }
  98             nBytes--;
  99         }
 100     }
 101     if (nBytes > 0) //违返规则
 102     {
 103         return FALSE;
 104     }
 105     if (bAllAscii) //如果全部都是ASCII, 说明不是UTF-8
 106     {
 107         return FALSE;
 108     }
 109 
 110     return TRUE;
 111 }
 112 
 113 //************************************
 114 // Method:    GetEncodeType
 115 // FullName:  GetEncodeType
 116 // Access:    public 
 117 // Returns:   FileEncodeType
 118 // Qualifier:识别指定文件编码
 119 // Parameter: IN CONST LPTSTR lpFileName:指定文件名称
 120 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE
 121 //************************************
 122 FileEncodeType GetEncodeType(IN CONST LPTSTR lpFileName)
 123 {
 124     FileEncodeType lFileEncodeType = OTHER;
 125     HANDLE hFile = NULL;
 126     LPBYTE lpFileHeader = NULL;
 127     INT cbFileHeader = FILE_HEADER;
 128     INT cchFileHeader = FILE_HEADER;
 129 
 130 
 131     hFile = ::CreateFile(lpFileName, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
 132     if (INVALID_HANDLE_VALUE == hFile)
 133     {
 134         int errLogNumber = GetLastError();
 135         printf_s("error number:%d\n", errLogNumber);
 136         return lFileEncodeType;
 137     }
 138 
 139     lpFileHeader = (BYTE *)malloc(cbFileHeader);
 140     if (NULL == lpFileHeader)
 141     {
 142         printf_s("malloc error\n");
 143         ::CloseHandle(hFile);
 144         return lFileEncodeType;
 145     }
 146     ZeroMemory(lpFileHeader, cbFileHeader);
 147 
 148     SetFilePointer(hFile, 0, NULL, FILE_BEGIN);
 149     if (FALSE == ReadFile(hFile, lpFileHeader, FILE_HEADER, NULL, NULL))
 150     {
 151         int errLogNumber = GetLastError();
 152         printf_s("error number:%d\n", errLogNumber);
 153         free(lpFileHeader);
 154         ::CloseHandle(hFile);
 155         return lFileEncodeType;
 156     }
 157 
 158 
 159 
 160     if (0xef == lpFileHeader[0] && 0xbb == lpFileHeader[1] && 0xbf == lpFileHeader[2])
 161     {
 162         free(lpFileHeader);
 163         ::CloseHandle(hFile);
 164         return lFileEncodeType = UTF8;
 165     }
 166     else if (0xff == lpFileHeader[0] && 0xfe == lpFileHeader[1])
 167     {
 168         free(lpFileHeader);
 169         ::CloseHandle(hFile);
 170         return lFileEncodeType = UTF16BE;
 171     }
 172     else if (0xfe == lpFileHeader[0] && 0xff == lpFileHeader[1])
 173     {
 174         free(lpFileHeader);
 175         ::CloseHandle(hFile);
 176         return lFileEncodeType = UTF16LE;
 177     }
 178     else
 179     {
 180         free(lpFileHeader);
 181 
 182         cbFileHeader = SetFilePointer(hFile, 0, NULL, FILE_END);
 183         if (INVALID_SET_FILE_POINTER == cbFileHeader)
 184         {
 185             int errLogNumber = GetLastError();
 186             printf_s("error number:%d\n", errLogNumber);
 187             ::CloseHandle(hFile);
 188             return lFileEncodeType = OTHER;
 189         }
 190 
 191         lpFileHeader = (BYTE *)malloc(cbFileHeader);
 192         if (NULL == lpFileHeader)
 193         {
 194             printf_s("malloc error\n");
 195             ::CloseHandle(hFile);
 196             return lFileEncodeType = OTHER;
 197         }
 198         ZeroMemory(lpFileHeader, cbFileHeader);
 199 
 200         SetFilePointer(hFile, 0, NULL, FILE_BEGIN);
 201         if (FALSE == ReadFile(hFile, lpFileHeader, cbFileHeader, NULL, NULL))
 202         {
 203             int errLogNumber = GetLastError();
 204             printf_s("error number:%d\n", errLogNumber);
 205             free(lpFileHeader);
 206             ::CloseHandle(hFile);
 207             return lFileEncodeType = OTHER;
 208         }
 209 
 210 
 211 
 212         if (TRUE == IsUTF8EncodeText(lpFileHeader, cbFileHeader))
 213         {
 214             free(lpFileHeader);
 215             ::CloseHandle(hFile);
 216             return lFileEncodeType = UTF8_NO_BOM;
 217         }
 218 
 219 
 220         lFileEncodeType = OTHER;
 221     }
 222 
 223     free(lpFileHeader);
 224     ::CloseHandle(hFile);
 225     return lFileEncodeType;
 226 }
 227 
 228 //************************************
 229 // Method:    GetEncodeType
 230 // FullName:  GetEncodeType
 231 // Access:    public 
 232 // Returns:   FileEncodeType
 233 // Qualifier:识别指定字节流编码
 234 // Parameter: IN CONST BYTE * lpBytes:指定字节流
 235 // Parameter: IN CONST INT cchBytes:指定字节流长度
 236 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE
 237 //************************************
 238 FileEncodeType GetEncodeType(IN CONST BYTE *lpBytes, IN CONST INT cchBytes)
 239 {
 240     if (0xef == lpBytes[0] && 0xbb == lpBytes[1] && 0xbf == lpBytes[2])
 241     {
 242         return UTF8;
 243     }
 244     else if (0xff == lpBytes[0] && 0xfe == lpBytes[1])
 245     {
 246         return UTF16LE;
 247     }
 248     else if (0xfe == lpBytes[0] && 0xff == lpBytes[1])
 249     {
 250         return UTF16BE;
 251     }
 252     else
 253     {
 254         if (TRUE == IsUTF8EncodeText(lpBytes, cchBytes))
 255         {
 256             return UTF8_NO_BOM;
 257         }
 258     }
 259 
 260     return OTHER;
 261 }
 262 
 263 //************************************
 264 // Method:    Utf16leAndUtf16beTransfers
 265 // FullName:  Utf16leAndUtf16beTransfers
 266 // Access:    public 
 267 // Returns:   BOOL
 268 // Qualifier:相互转换UTF16LE 和 UTF16BE
 269 // Parameter: INOUT LPBYTE lpUtf16String:指定UTF16编码字节流
 270 // Parameter: IN CONST INT cchUtf16String:指定UTF16编码字节流长度
 271 //************************************
 272 BOOL Utf16leAndUtf16beTransfers(INOUT LPBYTE lpUtf16String, IN CONST INT cchUtf16String)
 273 {
 274     if (NULL == lpUtf16String || cchUtf16String < 0)
 275     {
 276         return FALSE;
 277     }
 278 
 279     for (INT i = 0; i < cchUtf16String; i += 2)//每两值交换
 280     {
 281         lpUtf16String[i] = lpUtf16String[i] ^ lpUtf16String[i + 1];
 282         lpUtf16String[i + 1] = lpUtf16String[i + 1] ^ lpUtf16String[i];
 283         lpUtf16String[i] = lpUtf16String[i] ^ lpUtf16String[i + 1];
 284     }
 285 
 286     return TRUE;
 287 }
 288 
 289 //************************************
 290 // Method:    Utf8ToUtf8NoBOM
 291 // FullName:  Utf8ToUtf8NoBOM
 292 // Access:    public 
 293 // Returns:   BOOL
 294 // Qualifier:UTF8编码转换到UTF8 without BOM编码
 295 // Parameter: IN CONST LPSTR lpUtf8String:UTF8编码字符串
 296 // Parameter: IN CONST INT cchUtf8String:UTF8编码字符串长度
 297 // Parameter: INOUT CONST LPSTR lpUtf8NoBOMString:UTF8 without BOM编码字符串
 298 // Parameter: IN CONST INT cchUtf8NoBOMString:UTF8 without BOM编码字符串长度
 299 //************************************
 300 BOOL Utf8ToUtf8NoBOM(IN CONST LPSTR lpUtf8String, IN CONST INT cchUtf8String, INOUT CONST LPSTR lpUtf8NoBOMString, IN CONST INT cchUtf8NoBOMString)
 301 {
 302     if (NULL == lpUtf8String || cchUtf8String < 0 || NULL == lpUtf8NoBOMString || cchUtf8NoBOMString < 0)
 303     {
 304         return FALSE;
 305     }
 306     if (cchUtf8NoBOMString < cchUtf8String - UTF8_SIGN)
 307     {
 308         return FALSE;
 309     }
 310 
 311     ::CopyMemory(lpUtf8NoBOMString, lpUtf8String + UTF8_SIGN, cchUtf8String - UTF8_SIGN);
 312 
 313     return TRUE;
 314 }
 315 //************************************
 316 // Method:    Utf8NoBOMToUtf8
 317 // FullName:  Utf8NoBOMToUtf8
 318 // Access:    public 
 319 // Returns:   BOOL
 320 // Qualifier:UTF8 without BOM编码转换到UTF8编码
 321 // Parameter: INOUT CONST LPSTR lpUtf8NoBOMString:UTF8 without BOM编码字符串
 322 // Parameter: IN CONST INT cchUtf8NoBOMString:UTF8 without BOM编码字符串长度
 323 // Parameter: IN CONST LPSTR lpUtf8String:UTF8编码字符串
 324 // Parameter: IN CONST INT cchUtf8String:UTF8编码字符串长度
 325 //************************************
 326 BOOL Utf8NoBOMToUtf8(INOUT CONST LPSTR lpUtf8NoBOMString, IN CONST INT cchUtf8NoBOMString, IN CONST LPSTR lpUtf8String, IN CONST INT cchUtf8String)
 327 {
 328     if (NULL == lpUtf8String || cchUtf8String < 0 || NULL == lpUtf8NoBOMString || cchUtf8NoBOMString < 0)
 329     {
 330         return FALSE;
 331     }
 332     if (cchUtf8NoBOMString < cchUtf8String - UTF8_SIGN)
 333     {
 334         return FALSE;
 335     }
 336 
 337     lpUtf8String[0] = 0xef;
 338     lpUtf8String[1] = 0xbb;
 339     lpUtf8String[2] = 0xbf;
 340     ::CopyMemory(lpUtf8String + UTF8_SIGN, lpUtf8NoBOMString, cchUtf8NoBOMString);
 341 
 342     return TRUE;
 343 }
 344 
 345 //************************************
 346 // Method:    BYTETOUTF8
 347 // FullName:  BYTETOUTF8
 348 // Access:    public 
 349 // Returns:   BOOL
 350 // Qualifier:指定编码转换为UTF8(或without BOM)编码
 351 // Parameter: IN CONST LPBYTE lpBytes:指定需要转换的字节流
 352 // Parameter: IN CONST INT cbBytes:指定需要转换的字节流长度(字节单位)
 353 // Parameter: IN CONST FileEncodeType tpBytes:指定需要转换的字节流的实际编码格式
 354 // Parameter: INOUT LPSTR lpText:指定输出的编码流
 355 // Parameter: INOUT LPINT lpcbText:指定输出的编码流长度(字节单位)
 356 // Parameter: IN CONST BOOL bWithBOM:指定输出的编码是否有BOM
 357 //************************************
 358 BOOL BYTETOUTF8(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPSTR lpText, INOUT LPINT lpcbText, IN CONST BOOL bWithBOM = TRUE)
 359 {
 360     BOOL bResult = FALSE;
 361     LPSTR lpSourceA = NULL;
 362     INT cbSourceA = 0;
 363     INT cchSourceA = 0;
 364     LPWSTR lpSourceW = NULL;
 365     INT cbSourceW = 0;
 366     INT cchSourceW = 0;
 367 
 368 
 369     if (NULL == lpBytes || 
 370         NULL == lpText || 
 371         !(UTF8_NO_BOM == tpBytes || UTF8 == tpBytes || UTF16LE == tpBytes || UTF16BE == tpBytes) ||
 372         cbBytes < 0 || 
 373         NULL == lpcbText)
 374     {
 375         return FALSE;
 376     }
 377 
 378     switch (tpBytes)
 379     {
 380         case UTF8_NO_BOM:
 381         {
 382             if (FALSE == bWithBOM)
 383             {
 384                 return FALSE;
 385             }
 386             if (*lpcbText < cbBytes + UTF8_SIGN)
 387             {
 388                 return FALSE;
 389             }
 390 
 391             cbSourceA = cbBytes;
 392             cchSourceA = cbSourceA;
 393             lpSourceA = (CHAR *)malloc(cbSourceA);
 394             if (NULL == lpSourceA)
 395             {
 396                 printf_s("malloc error\n");
 397                 return FALSE;
 398             }
 399             ZeroMemory(lpSourceA, cbSourceA);
 400             CopyMemory(lpSourceA, lpBytes, cbSourceA);
 401 
 402             bResult = Utf8NoBOMToUtf8(lpSourceA, cchSourceA, lpText, *lpcbText);
 403             free(lpSourceA);
 404         }
 405             break;
 406         case UTF8:
 407         {
 408             if (TRUE == bWithBOM)
 409             {
 410                 return FALSE;
 411             }
 412             if (*lpcbText < cbBytes - UTF8_SIGN)
 413             {
 414                 return FALSE;
 415             }
 416 
 417             cbSourceA = cbBytes;
 418             cchSourceA = cbSourceA;
 419             lpSourceA = (CHAR *)malloc(cbSourceA);
 420             if (NULL == lpSourceA)
 421             {
 422                 printf_s("malloc error\n");
 423                 return FALSE;
 424             }
 425             ZeroMemory(lpSourceA, cbSourceA);
 426             CopyMemory(lpSourceA, lpBytes, cbSourceA);
 427 
 428             bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText);
 429             free(lpSourceA);
 430         }
 431         break;
 432         case UTF16LE:
 433         {
 434             cbSourceW = cbBytes;
 435             cchSourceW = cbSourceW / sizeof(WCHAR);
 436             lpSourceW = (WCHAR *)malloc(cbSourceW);
 437             if (NULL == lpSourceW)
 438             {
 439                 printf_s("malloc error\n");
 440                 return FALSE;
 441             }
 442             ZeroMemory(lpSourceW, cbSourceW);
 443             ::CopyMemory(lpSourceW, lpBytes, cbBytes);
 444 
 445             *lpcbText = ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, 0, NULL, NULL);//获取所需存储大小
 446 
 447             if (TRUE == bWithBOM)
 448             {
 449                 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, *lpcbText, NULL, NULL))
 450                 {
 451                     free(lpSourceW);
 452                     return FALSE;
 453                 }
 454                 bResult = TRUE;
 455             }
 456             else
 457             {
 458                 cbSourceA = *lpcbText;
 459                 cchSourceA = cbSourceA;
 460                 lpSourceA = (CHAR *)malloc(cbSourceA);
 461                 if (NULL == lpSourceA)
 462                 {
 463                     printf_s("malloc error\n");
 464                     free(lpSourceW);
 465                     return FALSE;
 466                 }
 467                 ZeroMemory(lpSourceA, cbSourceA);
 468                 CopyMemory(lpSourceA, lpBytes, cbSourceA);
 469                 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpSourceA, cbSourceA, NULL, NULL))
 470                 {
 471                     free(lpSourceW);
 472                     free(lpSourceA);
 473                     return FALSE;
 474                 }
 475                 bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText);
 476                 *lpcbText -= UTF8_SIGN;
 477                 free(lpSourceA);
 478             }
 479 
 480             free(lpSourceW);
 481         }
 482             break;
 483         case UTF16BE:
 484         {
 485             LPBYTE lpBytesTemp = NULL;
 486             INT cbBytesTemp = 0;
 487             INT cchBytesTemp = 0;
 488 
 489             cbBytesTemp = cbBytes;
 490             lpBytesTemp = (BYTE *)malloc(cbBytesTemp);
 491             if (NULL == lpBytesTemp)
 492             {
 493                 printf_s("malloc error\n");
 494                 return FALSE;
 495             }
 496             ZeroMemory(lpBytesTemp, cbBytesTemp);
 497             ::CopyMemory(lpBytesTemp, lpBytes, cbBytesTemp);
 498 
 499             if (FALSE == Utf16leAndUtf16beTransfers(lpBytesTemp, cbBytesTemp))
 500             {
 501                 free(lpBytesTemp);
 502                 return FALSE;
 503             }
 504 
 505             cbSourceW = cbBytes;
 506             cchSourceW = cbSourceW / sizeof(WCHAR);
 507             lpSourceW = (WCHAR *)malloc(cbSourceW);
 508             if (NULL == lpSourceW)
 509             {
 510                 printf_s("malloc error\n");
 511                 free(lpBytesTemp);
 512                 return FALSE;
 513             }
 514             ZeroMemory(lpSourceW, cbSourceW);
 515             ::CopyMemory(lpSourceW, lpBytesTemp, cbSourceW);
 516             free(lpBytesTemp);
 517             lpBytesTemp = NULL;
 518 
 519             *lpcbText = ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, 0, NULL, NULL);//获取所需存储大小
 520 
 521             if (TRUE == bWithBOM)
 522             {
 523                 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, *lpcbText, NULL, NULL))
 524                 {
 525                     free(lpSourceW);
 526                     return FALSE;
 527                 }
 528                 bResult = TRUE;
 529             }
 530             else
 531             {
 532                 cbSourceA = *lpcbText;
 533                 cchSourceA = cbSourceA;
 534                 lpSourceA = (CHAR *)malloc(cbSourceA);
 535                 if (NULL == lpSourceA)
 536                 {
 537                     printf_s("malloc error\n");
 538                     free(lpSourceW);
 539                     return FALSE;
 540                 }
 541                 ZeroMemory(lpSourceA, cbSourceA);
 542                 CopyMemory(lpSourceA, lpBytes, cbSourceA);
 543                 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpSourceA, cbSourceA, NULL, NULL))
 544                 {
 545                     free(lpSourceW);
 546                     free(lpSourceA);
 547                     return FALSE;
 548                 }
 549                 bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText);
 550                 *lpcbText -= UTF8_SIGN;
 551                 free(lpSourceA);
 552             }
 553 
 554             free(lpSourceW);
 555         }
 556             break;
 557         default:
 558             return FALSE;
 559     }
 560 
 561     return bResult;
 562 }
 563 
 564 //************************************
 565 // Method:    BYTETOUTF16LE
 566 // FullName:  BYTETOUTF16LE
 567 // Access:    public 
 568 // Returns:   BOOL
 569 // Qualifier:指定编码流转换为UTF16LE编码格式
 570 // Parameter: IN CONST LPBYTE lpBytes:指定的编码流
 571 // Parameter: IN CONST INT cbBytes:指定的编码流长度(字节单位)
 572 // Parameter: IN CONST FileEncodeType tpBytes:指定的编码流的实际编码格式
 573 // Parameter: INOUT LPWSTR lpText:指定输出的编码流
 574 // Parameter: INOUT LPINT lpcbText:指定输出的编码流长度(字节单位)
 575 //************************************
 576 BOOL BYTETOUTF16LE(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPWSTR lpText, INOUT LPINT lpcbText)
 577 {
 578     BOOL bResult = FALSE;
 579     LPSTR lpSourceA = NULL;
 580     INT cbSourceA = 0;
 581     INT cchSourceA = 0;
 582     LPWSTR lpSourceW = NULL;
 583     INT cbSourceW = 0;
 584     INT cchSourceW = 0;
 585 
 586 
 587     if (NULL == lpBytes ||
 588         NULL == lpText ||
 589         !(UTF8_NO_BOM == tpBytes || UTF8 == tpBytes || UTF16LE == tpBytes || UTF16BE == tpBytes) ||
 590         cbBytes < 0 ||
 591         NULL == lpcbText)
 592     {
 593         return FALSE;
 594     }
 595 
 596     switch (tpBytes)
 597     {
 598         case UTF8_NO_BOM:
 599         {
 600             LPSTR lpSourceATemp = NULL;
 601             INT cbSourceATemp = 0;
 602             INT cchSourceATemp = 0;
 603 
 604 
 605             if (*lpcbText < cbBytes + UTF8_SIGN)
 606             {
 607                 return FALSE;
 608             }
 609 
 610             cbSourceA = cbBytes;
 611             cchSourceA = cbSourceA;
 612             lpSourceA = (CHAR *)malloc(cbSourceA);
 613             if (NULL == lpSourceA)
 614             {
 615                 printf_s("malloc error\n");
 616                 return FALSE;
 617             }
 618             ZeroMemory(lpSourceA, cbSourceA);
 619             CopyMemory(lpSourceA, lpBytes, cbSourceA);
 620 
 621             cbSourceATemp = cbBytes + UTF8_SIGN;
 622             cchSourceATemp = cbSourceATemp;
 623             lpSourceATemp = (CHAR *)malloc(cbSourceATemp);
 624             if (NULL == lpSourceATemp)
 625             {
 626                 printf_s("malloc error\n");
 627                 return FALSE;
 628             }
 629             ZeroMemory(lpSourceATemp, cbSourceATemp);
 630 
 631             if (FALSE == Utf8NoBOMToUtf8(lpSourceA, cchSourceA, lpSourceATemp, cchSourceATemp))
 632             {
 633                 free(lpSourceA);
 634                 free(lpSourceATemp);
 635                 return FALSE;
 636             }
 637             free(lpSourceA);
 638             lpSourceA = NULL;
 639 
 640             *lpcbText = MultiByteToWideChar(CP_UTF8, 0, lpSourceATemp, cbSourceATemp, lpText, 0);
 641             if (0 == MultiByteToWideChar(CP_UTF8, 0, lpSourceATemp, cbSourceATemp, lpText, *lpcbText))
 642             {
 643                 free(lpSourceATemp);
 644                 return FALSE;
 645             }
 646 
 647             *lpcbText *= sizeof(WCHAR);
 648         
 649             free(lpSourceATemp);
 650             bResult = TRUE;
 651         }
 652         break;
 653         case UTF8:
 654         {
 655             if (*lpcbText < cbBytes + UTF8_SIGN)
 656             {
 657                 return FALSE;
 658             }
 659 
 660             cbSourceA = cbBytes;
 661             cchSourceA = cbSourceA;
 662             lpSourceA = (CHAR *)malloc(cbSourceA);
 663             if (NULL == lpSourceA)
 664             {
 665                 printf_s("malloc error\n");
 666                 return FALSE;
 667             }
 668             ZeroMemory(lpSourceA, cbSourceA);
 669             CopyMemory(lpSourceA, lpBytes, cbSourceA);
 670 
 671 
 672             *lpcbText = MultiByteToWideChar(CP_UTF8, 0, lpSourceA, cchSourceA, lpText, 0);
 673             if (0 == MultiByteToWideChar(CP_UTF8, 0, lpSourceA, cchSourceA, lpText, *lpcbText))
 674             {
 675                 free(lpSourceA);
 676                 return FALSE;
 677             }
 678 
 679             *lpcbText *= sizeof(WCHAR);
 680 
 681             free(lpSourceA);
 682             bResult = TRUE;
 683         }
 684         break;
 685         case UTF16LE:
 686         {
 687             CopyMemory(lpText, lpBytes, cbBytes);
 688             *lpcbText = cbBytes;
 689             return TRUE;
 690         }
 691         break;
 692         case UTF16BE:
 693         {
 694             if (*lpcbText < cbBytes)
 695             {
 696                 return FALSE;
 697             }
 698 
 699             LPBYTE lpBytesTemp = NULL;
 700             INT cbBytesTemp = 0;
 701 
 702             cbBytesTemp = cbBytes;
 703             lpBytesTemp = (BYTE *)malloc(cbBytesTemp);
 704             if (NULL == lpBytesTemp)
 705             {
 706                 printf_s("malloc error\n");
 707                 return FALSE;
 708             }
 709             ZeroMemory(lpBytesTemp, cbBytesTemp);
 710             CopyMemory(lpBytesTemp, lpBytes, cbBytesTemp);
 711             if (FALSE == Utf16leAndUtf16beTransfers(lpBytesTemp, cbBytesTemp))
 712             {
 713                 free(lpSourceW);
 714                 return FALSE;
 715             }
 716             CopyMemory(lpText, lpBytesTemp, cbBytesTemp);
 717             *lpcbText = cbBytesTemp;
 718             free(lpBytesTemp);
 719 
 720             bResult = TRUE;
 721         }
 722         break;
 723         default:
 724             return FALSE;
 725     }
 726 
 727     return bResult;
 728 }
 729 //************************************
 730 // Method:    BYTETOUTF16BE
 731 // FullName:  BYTETOUTF16BE
 732 // Access:    public 
 733 // Returns:   BOOL
 734 // Qualifier:指定编码流转换为UTF16BE编码格式
 735 // Parameter: IN CONST LPBYTE lpBytes:指定需要转换的编码流
 736 // Parameter: IN CONST INT cbBytes:指定需要转换的编码流长度(字节单位)
 737 // Parameter: IN CONST FileEncodeType tpBytes::指定需要转换的编码流的实际编码格式
 738 // Parameter: INOUT LPWSTR lpText:指定输出流
 739 // Parameter: INOUT LPINT lpcbText:指定输出流的长度(字节单位)
 740 //************************************
 741 BOOL BYTETOUTF16BE(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPWSTR lpText, INOUT LPINT lpcbText)
 742 {
 743     if (FALSE == BYTETOUTF16LE(lpBytes, cbBytes, tpBytes, lpText, lpcbText))
 744     {
 745         return FALSE;
 746     }
 747     return Utf16leAndUtf16beTransfers((LPBYTE)lpText, *lpcbText);
 748 }
 749 
 750 
 751 //************************************
 752 // Method:    FileAToFileB
 753 // FullName:  FileAToFileB
 754 // Access:    public 
 755 // Returns:   BOOL
 756 // Qualifier:指定文件A中内容转换为指定编码存入指定文件B中
 757 // Parameter: CONST LPTSTR lpFileA:输入文件A(只读文件)
 758 // Parameter: CONST LPSTR lpFileB:输出文件B(总是创建文件)
 759 // Parameter: FileEncodeType tpFileB:指定输出文件编码(UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE)
 760 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE
 761 //************************************
 762 BOOL FileAToFileB(IN CONST LPTSTR lpFileA, IN CONST LPTSTR lpFileB, FileEncodeType tpFileB)
 763 {
 764     BOOL bResult = FALSE;
 765     FileEncodeType tpFileA = OTHER;
 766     HANDLE hFileA = NULL;
 767     HANDLE hFileB = NULL;
 768     LPBYTE lpReadFileBytes = NULL;
 769     INT cbReadFileBytes = 0;
 770     INT cchReadFileBytes = 0;
 771     LPSTR lpWriteFileString = NULL;
 772     INT cbWriteFileString = 0;
 773     INT cchWriteFileString = 0;
 774     LPWSTR lpWriteFileWString = NULL;
 775     INT cbWriteFileWString = 0;
 776     INT cchWriteFileWString = 0;
 777 
 778 
 779     hFileA = ::CreateFile(lpFileA, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
 780     if (INVALID_HANDLE_VALUE == hFileA)
 781     {
 782         int errLogNumber = GetLastError();
 783         printf_s("error number:%d\n", errLogNumber);
 784         return FALSE;
 785     }
 786 
 787     cbReadFileBytes = SetFilePointer(hFileA, 0, NULL, FILE_END);
 788     if (INVALID_SET_FILE_POINTER == cbReadFileBytes)
 789     {
 790         int errLogNumber = GetLastError();
 791         printf_s("error number:%d\n", errLogNumber);
 792         ::CloseHandle(hFileA);
 793         return FALSE;
 794     }
 795 
 796     if (1 != sizeof(BYTE))
 797     {
 798         printf_s("byte cell width error\n");
 799         return FALSE;
 800     }
 801 
 802     lpReadFileBytes = (BYTE *)malloc(cbReadFileBytes);
 803     if (NULL == lpReadFileBytes)
 804     {
 805         printf_s("malloc error\n");
 806         ::CloseHandle(hFileA);
 807         return FALSE;
 808     }
 809     ZeroMemory(lpReadFileBytes, cbReadFileBytes);
 810 
 811     SetFilePointer(hFileA, 0, NULL, FILE_BEGIN);
 812     if (FALSE == ReadFile(hFileA, lpReadFileBytes, cbReadFileBytes, NULL, NULL))
 813     {
 814         int errLogNumber = GetLastError();
 815         printf_s("error number:%d\n", errLogNumber);
 816         free(lpReadFileBytes);
 817         ::CloseHandle(hFileA);
 818         return FALSE;
 819     }
 820     ::CloseHandle(hFileA);
 821     hFileA = NULL;
 822 
 823 
 824     if (!(UTF8 == tpFileB || UTF8_NO_BOM == tpFileB || UTF16LE == tpFileB || UTF16BE == tpFileB))
 825     {
 826         printf_s("Unable to identify type error\n");
 827         free(lpReadFileBytes);
 828         return FALSE;
 829     }
 830 
 831     hFileB = ::CreateFile(lpFileB, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
 832     if (INVALID_HANDLE_VALUE == hFileB)
 833     {
 834         int errLogNumber = GetLastError();
 835         printf_s("error number:%d\n", errLogNumber);
 836         free(lpReadFileBytes);
 837         return FALSE;
 838     }
 839 
 840 
 841     tpFileA = GetEncodeType(lpReadFileBytes, cbReadFileBytes);
 842     switch (tpFileA)
 843     {
 844         case UTF8:
 845         {
 846             switch (tpFileB)
 847             {
 848             case UTF8_NO_BOM:
 849                 cbWriteFileString = cbReadFileBytes - UTF8_SIGN;
 850                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
 851                 ZeroMemory(lpWriteFileString, cbWriteFileString);
 852                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileString, &cbWriteFileString, FALSE);
 853                 free(lpReadFileBytes);
 854                 lpReadFileBytes = NULL;
 855                 break;
 856             case UTF16LE:
 857                 cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR);
 858                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
 859                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
 860                 bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileWString, &cbWriteFileWString);
 861                 free(lpReadFileBytes);
 862                 lpReadFileBytes = NULL;
 863                 break;
 864             case UTF16BE:
 865                 cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR);
 866                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
 867                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
 868                 bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileWString, &cbWriteFileWString);
 869                 free(lpReadFileBytes);
 870                 lpReadFileBytes = NULL;
 871                 break;
 872             default:;
 873             }
 874         }
 875             break;
 876         case UTF8_NO_BOM:
 877         {
 878             switch (tpFileB)
 879             {
 880             case UTF8:
 881                 cbWriteFileString = cbReadFileBytes + UTF8_SIGN;
 882                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
 883                 ZeroMemory(lpWriteFileString, cbWriteFileString);
 884                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileString, &cbWriteFileString, TRUE);
 885                 free(lpReadFileBytes);
 886                 lpReadFileBytes = NULL;
 887                 break;
 888             case UTF16LE:
 889                 cbWriteFileWString = (cbReadFileBytes + UTF8_SIGN) * sizeof(WCHAR);
 890                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
 891                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
 892                 bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileWString, &cbWriteFileWString);
 893                 free(lpReadFileBytes);
 894                 lpReadFileBytes = NULL;
 895                 break;
 896             case UTF16BE:
 897                 cbWriteFileWString = (cbReadFileBytes + UTF8_SIGN) * sizeof(WCHAR);
 898                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
 899                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
 900                 bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileWString, &cbWriteFileWString);
 901                 free(lpReadFileBytes);
 902                 lpReadFileBytes = NULL;
 903                 break;
 904             default:;
 905             }
 906         }
 907             break;
 908         case UTF16LE:
 909         {
 910             switch (tpFileB)
 911             {
 912             case UTF8:
 913                 cbWriteFileString = cbReadFileBytes;
 914                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
 915                 ZeroMemory(lpWriteFileString, cbWriteFileString);
 916                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileString, &cbWriteFileString, TRUE);
 917                 free(lpReadFileBytes);
 918                 lpReadFileBytes = NULL;
 919                 break;
 920             case UTF8_NO_BOM:
 921                 cbWriteFileString = cbReadFileBytes;
 922                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
 923                 ZeroMemory(lpWriteFileString, cbWriteFileString);
 924                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileString, &cbWriteFileString, FALSE);
 925                 free(lpReadFileBytes);
 926                 lpReadFileBytes = NULL;
 927                 break;
 928             case UTF16BE:
 929                 cbWriteFileWString = cbReadFileBytes;
 930                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
 931                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
 932                 bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileWString, &cbWriteFileWString);
 933                 free(lpReadFileBytes);
 934                 lpReadFileBytes = NULL;
 935                 break;
 936             default:;
 937             }
 938         }
 939             break;
 940         case UTF16BE:
 941         {
 942             switch (tpFileB)
 943             {
 944             case UTF8:
 945                 cbWriteFileString = cbReadFileBytes;
 946                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
 947                 ZeroMemory(lpWriteFileString, cbWriteFileString);
 948                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileString, &cbWriteFileString, TRUE);
 949                 free(lpReadFileBytes);
 950                 lpReadFileBytes = NULL;
 951                 break;
 952             case UTF8_NO_BOM:
 953                 cbWriteFileString = cbReadFileBytes;
 954                 lpWriteFileString = (CHAR *)malloc(cbWriteFileString);
 955                 ZeroMemory(lpWriteFileString, cbWriteFileString);
 956                 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileString, &cbWriteFileString, FALSE);
 957                 free(lpReadFileBytes);
 958                 lpReadFileBytes = NULL;
 959                 break;
 960             case UTF16LE:
 961                 cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR);
 962                 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString);
 963                 ZeroMemory(lpWriteFileWString, cbWriteFileWString);
 964                 bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileWString, &cbWriteFileWString);
 965                 free(lpReadFileBytes);
 966                 lpReadFileBytes = NULL;
 967                 break;
 968             default:;
 969             }
 970         }
 971             break;
 972         default:
 973             printf("file encode unable to identify.\n");
 974             free(lpReadFileBytes);
 975             return FALSE;
 976     }
 977 
 978 
 979     if (NULL != lpWriteFileString)
 980     {
 981         if (FALSE == WriteFile(hFileB, lpWriteFileString, cbWriteFileString, NULL, NULL))
 982         {
 983             free(lpWriteFileString);
 984             return FALSE;
 985         }
 986         free(lpWriteFileString);
 987         lpWriteFileString = NULL;
 988     }
 989     if (NULL != lpWriteFileWString)
 990     {
 991         if (FALSE == WriteFile(hFileB, lpWriteFileWString, cbWriteFileWString, NULL, NULL))
 992         {
 993             free(lpWriteFileWString);
 994             return FALSE;
 995         }
 996         free(lpWriteFileWString);
 997         lpWriteFileWString = NULL;
 998     }
 999     ::CloseHandle(hFileB);
1000     hFileB = NULL;
1001 
1002     return bResult;
1003 }
1004 
1005 
1006 int _tmain(int argc, _TCHAR* argv[])
1007 {
1008     LPTSTR lpFileA_utf8 = TEXT("Input-utf8.txt");
1009     LPTSTR lpFileA_utf8_no_bom = TEXT("Input-utf8-no-bom.txt");
1010     LPTSTR lpFileA_utf16le = TEXT("Input-utf16le.txt");
1011     LPTSTR lpFileA_utf16be = TEXT("Input-utf16be.txt");
1012 
1013     LPTSTR lpFileB_utf8 = TEXT("Output-utf8.txt");
1014     LPTSTR lpFileB_utf8_no_bom = TEXT("Output-utf8-no-bom.txt");
1015     LPTSTR lpFileB_utf16le = TEXT("Output-utf16le.txt");
1016     LPTSTR lpFileB_utf16be = TEXT("Output-utf16be.txt");
1017 
1018 
1019 
1020 
1021     //FileAToFileB(lpFileA_utf8, lpFileB_utf8_no_bom, UTF8_NO_BOM);
1022     //FileAToFileB(lpFileA_utf8, lpFileB_utf16le, UTF16LE);
1023     //FileAToFileB(lpFileA_utf8, lpFileB_utf16be, UTF16BE);
1024 
1025     FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf8, UTF8);
1026     //FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf16le, UTF16LE);
1027     //FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf16be, UTF16BE);
1028 
1029     //FileAToFileB(lpFileA_utf16le, lpFileB_utf8, UTF8);
1030     //FileAToFileB(lpFileA_utf16le, lpFileB_utf8_no_bom, UTF8_NO_BOM);
1031     //FileAToFileB(lpFileA_utf16le, lpFileB_utf16be, UTF16BE);
1032 
1033     //FileAToFileB(lpFileA_utf16be, lpFileB_utf8, UTF8);
1034     //FileAToFileB(lpFileA_utf16be, lpFileB_utf8_no_bom, UTF8_NO_BOM);
1035     //FileAToFileB(lpFileA_utf16be, lpFileB_utf16le, UTF16LE);
1036 
1037     return 0;
1038 }

*注:源码下载地址,请点击这里

 

posted @ 2015-01-25 20:18  仙人球球  Views(490)  Comments(0Edit  收藏  举报