指定存储文件的编码格式(下)
指定存储文件编码格式共两篇。
此为下篇。
对于上篇的内容进行了优化,这样大大提高了代码的通用性和重用性。
有兴趣的可以都下载下来对比参考。
1 // test__EncodeAFileToEncodeBFile.cpp : 定义控制台应用程序的入口点。 2 // 3 4 #include "stdafx.h" 5 #include <windows.h> 6 #include <string> 7 #include <iostream> 8 9 10 #ifndef IN 11 #define IN 12 #endif 13 14 #ifndef OUT 15 #define OUT 16 #endif 17 18 #ifndef INOUT 19 #define INOUT 20 #endif 21 22 #ifndef OPTION 23 #define OPTION 24 #endif 25 26 #define UTF8_SIGN 3 27 #define UTF16_SIGN 2 28 #define FILE_HEADER 6 29 30 enum FileEncodeType 31 { 32 OTHER = 0, 33 UTF8, 34 UTF8_NO_BOM, 35 UTF16LE, 36 UTF16LE_NO_BOM, 37 UTF16BE, 38 UTF16BE_NO_BOM 39 }; 40 41 42 //************************************ 43 // Method: IsUTF8EncodeText 44 // FullName: IsUTF8EncodeText 45 // Access: public 46 // Returns: BOOL 47 // Qualifier:判断输入内容是否时UTF8编码格式(可以判断不带BOM的UTF8编码) 48 // Parameter: BYTE * lpText:判断是否时UTF8的内容 49 // Parameter: INT cchText:UTF8的内容长度(按个数) 50 //************************************ 51 BOOL IsUTF8EncodeText(IN CONST BYTE* lpText, IN INT cchText) 52 { 53 // UTF8判断规则 54 // 0000 0000 - 0000 007F 0xxxxxxx 55 // 0000 0080 - 0000 07FF 110xxxxx 10xxxxxx 56 // 0000 0800 - 0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 57 // 0001 0000 - 001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 58 // 0020 0000 - 03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 59 // 0400 0000 - 7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx 60 61 62 int i; 63 DWORD nBytes = 0;//UFT8可用1-6个字节编码,ASCII用一个字节 64 BYTE lpByte; 65 BOOL bAllAscii = TRUE; //如果全部都是ASCII, 说明不是UTF-8 66 for (i = 0; i < cchText; i++) 67 { 68 lpByte = *(lpText + i); 69 if ((lpByte & 0x80) != 0) // 判断是否ASCII编码,如果不是,说明有可能是UTF-8,ASCII用7位编码,但用一个字节存,最高位标记为0,o0xxxxxxx 70 bAllAscii = FALSE; 71 if (nBytes == 0) //如果不是ASCII码,应该是多字节符,计算字节数 72 { 73 if (lpByte >= 0x80) 74 { 75 if (lpByte >= 0xFC && lpByte <= 0xFD) 76 nBytes = 6; 77 else if (lpByte >= 0xF8) 78 nBytes = 5; 79 else if (lpByte >= 0xF0) 80 nBytes = 4; 81 else if (lpByte >= 0xE0) 82 nBytes = 3; 83 else if (lpByte >= 0xC0) 84 nBytes = 2; 85 else 86 { 87 return FALSE; 88 } 89 nBytes--; 90 } 91 } 92 else //多字节符的非首字节,应为 10xxxxxx 93 { 94 if ((lpByte & 0xC0) != 0x80) 95 { 96 return FALSE; 97 } 98 nBytes--; 99 } 100 } 101 if (nBytes > 0) //违返规则 102 { 103 return FALSE; 104 } 105 if (bAllAscii) //如果全部都是ASCII, 说明不是UTF-8 106 { 107 return FALSE; 108 } 109 110 return TRUE; 111 } 112 113 //************************************ 114 // Method: GetEncodeType 115 // FullName: GetEncodeType 116 // Access: public 117 // Returns: FileEncodeType 118 // Qualifier:识别指定文件编码 119 // Parameter: IN CONST LPTSTR lpFileName:指定文件名称 120 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE 121 //************************************ 122 FileEncodeType GetEncodeType(IN CONST LPTSTR lpFileName) 123 { 124 FileEncodeType lFileEncodeType = OTHER; 125 HANDLE hFile = NULL; 126 LPBYTE lpFileHeader = NULL; 127 INT cbFileHeader = FILE_HEADER; 128 INT cchFileHeader = FILE_HEADER; 129 130 131 hFile = ::CreateFile(lpFileName, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); 132 if (INVALID_HANDLE_VALUE == hFile) 133 { 134 int errLogNumber = GetLastError(); 135 printf_s("error number:%d\n", errLogNumber); 136 return lFileEncodeType; 137 } 138 139 lpFileHeader = (BYTE *)malloc(cbFileHeader); 140 if (NULL == lpFileHeader) 141 { 142 printf_s("malloc error\n"); 143 ::CloseHandle(hFile); 144 return lFileEncodeType; 145 } 146 ZeroMemory(lpFileHeader, cbFileHeader); 147 148 SetFilePointer(hFile, 0, NULL, FILE_BEGIN); 149 if (FALSE == ReadFile(hFile, lpFileHeader, FILE_HEADER, NULL, NULL)) 150 { 151 int errLogNumber = GetLastError(); 152 printf_s("error number:%d\n", errLogNumber); 153 free(lpFileHeader); 154 ::CloseHandle(hFile); 155 return lFileEncodeType; 156 } 157 158 159 160 if (0xef == lpFileHeader[0] && 0xbb == lpFileHeader[1] && 0xbf == lpFileHeader[2]) 161 { 162 free(lpFileHeader); 163 ::CloseHandle(hFile); 164 return lFileEncodeType = UTF8; 165 } 166 else if (0xff == lpFileHeader[0] && 0xfe == lpFileHeader[1]) 167 { 168 free(lpFileHeader); 169 ::CloseHandle(hFile); 170 return lFileEncodeType = UTF16BE; 171 } 172 else if (0xfe == lpFileHeader[0] && 0xff == lpFileHeader[1]) 173 { 174 free(lpFileHeader); 175 ::CloseHandle(hFile); 176 return lFileEncodeType = UTF16LE; 177 } 178 else 179 { 180 free(lpFileHeader); 181 182 cbFileHeader = SetFilePointer(hFile, 0, NULL, FILE_END); 183 if (INVALID_SET_FILE_POINTER == cbFileHeader) 184 { 185 int errLogNumber = GetLastError(); 186 printf_s("error number:%d\n", errLogNumber); 187 ::CloseHandle(hFile); 188 return lFileEncodeType = OTHER; 189 } 190 191 lpFileHeader = (BYTE *)malloc(cbFileHeader); 192 if (NULL == lpFileHeader) 193 { 194 printf_s("malloc error\n"); 195 ::CloseHandle(hFile); 196 return lFileEncodeType = OTHER; 197 } 198 ZeroMemory(lpFileHeader, cbFileHeader); 199 200 SetFilePointer(hFile, 0, NULL, FILE_BEGIN); 201 if (FALSE == ReadFile(hFile, lpFileHeader, cbFileHeader, NULL, NULL)) 202 { 203 int errLogNumber = GetLastError(); 204 printf_s("error number:%d\n", errLogNumber); 205 free(lpFileHeader); 206 ::CloseHandle(hFile); 207 return lFileEncodeType = OTHER; 208 } 209 210 211 212 if (TRUE == IsUTF8EncodeText(lpFileHeader, cbFileHeader)) 213 { 214 free(lpFileHeader); 215 ::CloseHandle(hFile); 216 return lFileEncodeType = UTF8_NO_BOM; 217 } 218 219 220 lFileEncodeType = OTHER; 221 } 222 223 free(lpFileHeader); 224 ::CloseHandle(hFile); 225 return lFileEncodeType; 226 } 227 228 //************************************ 229 // Method: GetEncodeType 230 // FullName: GetEncodeType 231 // Access: public 232 // Returns: FileEncodeType 233 // Qualifier:识别指定字节流编码 234 // Parameter: IN CONST BYTE * lpBytes:指定字节流 235 // Parameter: IN CONST INT cchBytes:指定字节流长度 236 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE 237 //************************************ 238 FileEncodeType GetEncodeType(IN CONST BYTE *lpBytes, IN CONST INT cchBytes) 239 { 240 if (0xef == lpBytes[0] && 0xbb == lpBytes[1] && 0xbf == lpBytes[2]) 241 { 242 return UTF8; 243 } 244 else if (0xff == lpBytes[0] && 0xfe == lpBytes[1]) 245 { 246 return UTF16LE; 247 } 248 else if (0xfe == lpBytes[0] && 0xff == lpBytes[1]) 249 { 250 return UTF16BE; 251 } 252 else 253 { 254 if (TRUE == IsUTF8EncodeText(lpBytes, cchBytes)) 255 { 256 return UTF8_NO_BOM; 257 } 258 } 259 260 return OTHER; 261 } 262 263 //************************************ 264 // Method: Utf16leAndUtf16beTransfers 265 // FullName: Utf16leAndUtf16beTransfers 266 // Access: public 267 // Returns: BOOL 268 // Qualifier:相互转换UTF16LE 和 UTF16BE 269 // Parameter: INOUT LPBYTE lpUtf16String:指定UTF16编码字节流 270 // Parameter: IN CONST INT cchUtf16String:指定UTF16编码字节流长度 271 //************************************ 272 BOOL Utf16leAndUtf16beTransfers(INOUT LPBYTE lpUtf16String, IN CONST INT cchUtf16String) 273 { 274 if (NULL == lpUtf16String || cchUtf16String < 0) 275 { 276 return FALSE; 277 } 278 279 for (INT i = 0; i < cchUtf16String; i += 2)//每两值交换 280 { 281 lpUtf16String[i] = lpUtf16String[i] ^ lpUtf16String[i + 1]; 282 lpUtf16String[i + 1] = lpUtf16String[i + 1] ^ lpUtf16String[i]; 283 lpUtf16String[i] = lpUtf16String[i] ^ lpUtf16String[i + 1]; 284 } 285 286 return TRUE; 287 } 288 289 //************************************ 290 // Method: Utf8ToUtf8NoBOM 291 // FullName: Utf8ToUtf8NoBOM 292 // Access: public 293 // Returns: BOOL 294 // Qualifier:UTF8编码转换到UTF8 without BOM编码 295 // Parameter: IN CONST LPSTR lpUtf8String:UTF8编码字符串 296 // Parameter: IN CONST INT cchUtf8String:UTF8编码字符串长度 297 // Parameter: INOUT CONST LPSTR lpUtf8NoBOMString:UTF8 without BOM编码字符串 298 // Parameter: IN CONST INT cchUtf8NoBOMString:UTF8 without BOM编码字符串长度 299 //************************************ 300 BOOL Utf8ToUtf8NoBOM(IN CONST LPSTR lpUtf8String, IN CONST INT cchUtf8String, INOUT CONST LPSTR lpUtf8NoBOMString, IN CONST INT cchUtf8NoBOMString) 301 { 302 if (NULL == lpUtf8String || cchUtf8String < 0 || NULL == lpUtf8NoBOMString || cchUtf8NoBOMString < 0) 303 { 304 return FALSE; 305 } 306 if (cchUtf8NoBOMString < cchUtf8String - UTF8_SIGN) 307 { 308 return FALSE; 309 } 310 311 ::CopyMemory(lpUtf8NoBOMString, lpUtf8String + UTF8_SIGN, cchUtf8String - UTF8_SIGN); 312 313 return TRUE; 314 } 315 //************************************ 316 // Method: Utf8NoBOMToUtf8 317 // FullName: Utf8NoBOMToUtf8 318 // Access: public 319 // Returns: BOOL 320 // Qualifier:UTF8 without BOM编码转换到UTF8编码 321 // Parameter: INOUT CONST LPSTR lpUtf8NoBOMString:UTF8 without BOM编码字符串 322 // Parameter: IN CONST INT cchUtf8NoBOMString:UTF8 without BOM编码字符串长度 323 // Parameter: IN CONST LPSTR lpUtf8String:UTF8编码字符串 324 // Parameter: IN CONST INT cchUtf8String:UTF8编码字符串长度 325 //************************************ 326 BOOL Utf8NoBOMToUtf8(INOUT CONST LPSTR lpUtf8NoBOMString, IN CONST INT cchUtf8NoBOMString, IN CONST LPSTR lpUtf8String, IN CONST INT cchUtf8String) 327 { 328 if (NULL == lpUtf8String || cchUtf8String < 0 || NULL == lpUtf8NoBOMString || cchUtf8NoBOMString < 0) 329 { 330 return FALSE; 331 } 332 if (cchUtf8NoBOMString < cchUtf8String - UTF8_SIGN) 333 { 334 return FALSE; 335 } 336 337 lpUtf8String[0] = 0xef; 338 lpUtf8String[1] = 0xbb; 339 lpUtf8String[2] = 0xbf; 340 ::CopyMemory(lpUtf8String + UTF8_SIGN, lpUtf8NoBOMString, cchUtf8NoBOMString); 341 342 return TRUE; 343 } 344 345 //************************************ 346 // Method: BYTETOUTF8 347 // FullName: BYTETOUTF8 348 // Access: public 349 // Returns: BOOL 350 // Qualifier:指定编码转换为UTF8(或without BOM)编码 351 // Parameter: IN CONST LPBYTE lpBytes:指定需要转换的字节流 352 // Parameter: IN CONST INT cbBytes:指定需要转换的字节流长度(字节单位) 353 // Parameter: IN CONST FileEncodeType tpBytes:指定需要转换的字节流的实际编码格式 354 // Parameter: INOUT LPSTR lpText:指定输出的编码流 355 // Parameter: INOUT LPINT lpcbText:指定输出的编码流长度(字节单位) 356 // Parameter: IN CONST BOOL bWithBOM:指定输出的编码是否有BOM 357 //************************************ 358 BOOL BYTETOUTF8(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPSTR lpText, INOUT LPINT lpcbText, IN CONST BOOL bWithBOM = TRUE) 359 { 360 BOOL bResult = FALSE; 361 LPSTR lpSourceA = NULL; 362 INT cbSourceA = 0; 363 INT cchSourceA = 0; 364 LPWSTR lpSourceW = NULL; 365 INT cbSourceW = 0; 366 INT cchSourceW = 0; 367 368 369 if (NULL == lpBytes || 370 NULL == lpText || 371 !(UTF8_NO_BOM == tpBytes || UTF8 == tpBytes || UTF16LE == tpBytes || UTF16BE == tpBytes) || 372 cbBytes < 0 || 373 NULL == lpcbText) 374 { 375 return FALSE; 376 } 377 378 switch (tpBytes) 379 { 380 case UTF8_NO_BOM: 381 { 382 if (FALSE == bWithBOM) 383 { 384 return FALSE; 385 } 386 if (*lpcbText < cbBytes + UTF8_SIGN) 387 { 388 return FALSE; 389 } 390 391 cbSourceA = cbBytes; 392 cchSourceA = cbSourceA; 393 lpSourceA = (CHAR *)malloc(cbSourceA); 394 if (NULL == lpSourceA) 395 { 396 printf_s("malloc error\n"); 397 return FALSE; 398 } 399 ZeroMemory(lpSourceA, cbSourceA); 400 CopyMemory(lpSourceA, lpBytes, cbSourceA); 401 402 bResult = Utf8NoBOMToUtf8(lpSourceA, cchSourceA, lpText, *lpcbText); 403 free(lpSourceA); 404 } 405 break; 406 case UTF8: 407 { 408 if (TRUE == bWithBOM) 409 { 410 return FALSE; 411 } 412 if (*lpcbText < cbBytes - UTF8_SIGN) 413 { 414 return FALSE; 415 } 416 417 cbSourceA = cbBytes; 418 cchSourceA = cbSourceA; 419 lpSourceA = (CHAR *)malloc(cbSourceA); 420 if (NULL == lpSourceA) 421 { 422 printf_s("malloc error\n"); 423 return FALSE; 424 } 425 ZeroMemory(lpSourceA, cbSourceA); 426 CopyMemory(lpSourceA, lpBytes, cbSourceA); 427 428 bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText); 429 free(lpSourceA); 430 } 431 break; 432 case UTF16LE: 433 { 434 cbSourceW = cbBytes; 435 cchSourceW = cbSourceW / sizeof(WCHAR); 436 lpSourceW = (WCHAR *)malloc(cbSourceW); 437 if (NULL == lpSourceW) 438 { 439 printf_s("malloc error\n"); 440 return FALSE; 441 } 442 ZeroMemory(lpSourceW, cbSourceW); 443 ::CopyMemory(lpSourceW, lpBytes, cbBytes); 444 445 *lpcbText = ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, 0, NULL, NULL);//获取所需存储大小 446 447 if (TRUE == bWithBOM) 448 { 449 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, *lpcbText, NULL, NULL)) 450 { 451 free(lpSourceW); 452 return FALSE; 453 } 454 bResult = TRUE; 455 } 456 else 457 { 458 cbSourceA = *lpcbText; 459 cchSourceA = cbSourceA; 460 lpSourceA = (CHAR *)malloc(cbSourceA); 461 if (NULL == lpSourceA) 462 { 463 printf_s("malloc error\n"); 464 free(lpSourceW); 465 return FALSE; 466 } 467 ZeroMemory(lpSourceA, cbSourceA); 468 CopyMemory(lpSourceA, lpBytes, cbSourceA); 469 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpSourceA, cbSourceA, NULL, NULL)) 470 { 471 free(lpSourceW); 472 free(lpSourceA); 473 return FALSE; 474 } 475 bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText); 476 *lpcbText -= UTF8_SIGN; 477 free(lpSourceA); 478 } 479 480 free(lpSourceW); 481 } 482 break; 483 case UTF16BE: 484 { 485 LPBYTE lpBytesTemp = NULL; 486 INT cbBytesTemp = 0; 487 INT cchBytesTemp = 0; 488 489 cbBytesTemp = cbBytes; 490 lpBytesTemp = (BYTE *)malloc(cbBytesTemp); 491 if (NULL == lpBytesTemp) 492 { 493 printf_s("malloc error\n"); 494 return FALSE; 495 } 496 ZeroMemory(lpBytesTemp, cbBytesTemp); 497 ::CopyMemory(lpBytesTemp, lpBytes, cbBytesTemp); 498 499 if (FALSE == Utf16leAndUtf16beTransfers(lpBytesTemp, cbBytesTemp)) 500 { 501 free(lpBytesTemp); 502 return FALSE; 503 } 504 505 cbSourceW = cbBytes; 506 cchSourceW = cbSourceW / sizeof(WCHAR); 507 lpSourceW = (WCHAR *)malloc(cbSourceW); 508 if (NULL == lpSourceW) 509 { 510 printf_s("malloc error\n"); 511 free(lpBytesTemp); 512 return FALSE; 513 } 514 ZeroMemory(lpSourceW, cbSourceW); 515 ::CopyMemory(lpSourceW, lpBytesTemp, cbSourceW); 516 free(lpBytesTemp); 517 lpBytesTemp = NULL; 518 519 *lpcbText = ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, 0, NULL, NULL);//获取所需存储大小 520 521 if (TRUE == bWithBOM) 522 { 523 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpText, *lpcbText, NULL, NULL)) 524 { 525 free(lpSourceW); 526 return FALSE; 527 } 528 bResult = TRUE; 529 } 530 else 531 { 532 cbSourceA = *lpcbText; 533 cchSourceA = cbSourceA; 534 lpSourceA = (CHAR *)malloc(cbSourceA); 535 if (NULL == lpSourceA) 536 { 537 printf_s("malloc error\n"); 538 free(lpSourceW); 539 return FALSE; 540 } 541 ZeroMemory(lpSourceA, cbSourceA); 542 CopyMemory(lpSourceA, lpBytes, cbSourceA); 543 if (0 == ::WideCharToMultiByte(CP_UTF8, 0, lpSourceW, cchSourceW, lpSourceA, cbSourceA, NULL, NULL)) 544 { 545 free(lpSourceW); 546 free(lpSourceA); 547 return FALSE; 548 } 549 bResult = Utf8ToUtf8NoBOM(lpSourceA, cchSourceA, lpText, *lpcbText); 550 *lpcbText -= UTF8_SIGN; 551 free(lpSourceA); 552 } 553 554 free(lpSourceW); 555 } 556 break; 557 default: 558 return FALSE; 559 } 560 561 return bResult; 562 } 563 564 //************************************ 565 // Method: BYTETOUTF16LE 566 // FullName: BYTETOUTF16LE 567 // Access: public 568 // Returns: BOOL 569 // Qualifier:指定编码流转换为UTF16LE编码格式 570 // Parameter: IN CONST LPBYTE lpBytes:指定的编码流 571 // Parameter: IN CONST INT cbBytes:指定的编码流长度(字节单位) 572 // Parameter: IN CONST FileEncodeType tpBytes:指定的编码流的实际编码格式 573 // Parameter: INOUT LPWSTR lpText:指定输出的编码流 574 // Parameter: INOUT LPINT lpcbText:指定输出的编码流长度(字节单位) 575 //************************************ 576 BOOL BYTETOUTF16LE(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPWSTR lpText, INOUT LPINT lpcbText) 577 { 578 BOOL bResult = FALSE; 579 LPSTR lpSourceA = NULL; 580 INT cbSourceA = 0; 581 INT cchSourceA = 0; 582 LPWSTR lpSourceW = NULL; 583 INT cbSourceW = 0; 584 INT cchSourceW = 0; 585 586 587 if (NULL == lpBytes || 588 NULL == lpText || 589 !(UTF8_NO_BOM == tpBytes || UTF8 == tpBytes || UTF16LE == tpBytes || UTF16BE == tpBytes) || 590 cbBytes < 0 || 591 NULL == lpcbText) 592 { 593 return FALSE; 594 } 595 596 switch (tpBytes) 597 { 598 case UTF8_NO_BOM: 599 { 600 LPSTR lpSourceATemp = NULL; 601 INT cbSourceATemp = 0; 602 INT cchSourceATemp = 0; 603 604 605 if (*lpcbText < cbBytes + UTF8_SIGN) 606 { 607 return FALSE; 608 } 609 610 cbSourceA = cbBytes; 611 cchSourceA = cbSourceA; 612 lpSourceA = (CHAR *)malloc(cbSourceA); 613 if (NULL == lpSourceA) 614 { 615 printf_s("malloc error\n"); 616 return FALSE; 617 } 618 ZeroMemory(lpSourceA, cbSourceA); 619 CopyMemory(lpSourceA, lpBytes, cbSourceA); 620 621 cbSourceATemp = cbBytes + UTF8_SIGN; 622 cchSourceATemp = cbSourceATemp; 623 lpSourceATemp = (CHAR *)malloc(cbSourceATemp); 624 if (NULL == lpSourceATemp) 625 { 626 printf_s("malloc error\n"); 627 return FALSE; 628 } 629 ZeroMemory(lpSourceATemp, cbSourceATemp); 630 631 if (FALSE == Utf8NoBOMToUtf8(lpSourceA, cchSourceA, lpSourceATemp, cchSourceATemp)) 632 { 633 free(lpSourceA); 634 free(lpSourceATemp); 635 return FALSE; 636 } 637 free(lpSourceA); 638 lpSourceA = NULL; 639 640 *lpcbText = MultiByteToWideChar(CP_UTF8, 0, lpSourceATemp, cbSourceATemp, lpText, 0); 641 if (0 == MultiByteToWideChar(CP_UTF8, 0, lpSourceATemp, cbSourceATemp, lpText, *lpcbText)) 642 { 643 free(lpSourceATemp); 644 return FALSE; 645 } 646 647 *lpcbText *= sizeof(WCHAR); 648 649 free(lpSourceATemp); 650 bResult = TRUE; 651 } 652 break; 653 case UTF8: 654 { 655 if (*lpcbText < cbBytes + UTF8_SIGN) 656 { 657 return FALSE; 658 } 659 660 cbSourceA = cbBytes; 661 cchSourceA = cbSourceA; 662 lpSourceA = (CHAR *)malloc(cbSourceA); 663 if (NULL == lpSourceA) 664 { 665 printf_s("malloc error\n"); 666 return FALSE; 667 } 668 ZeroMemory(lpSourceA, cbSourceA); 669 CopyMemory(lpSourceA, lpBytes, cbSourceA); 670 671 672 *lpcbText = MultiByteToWideChar(CP_UTF8, 0, lpSourceA, cchSourceA, lpText, 0); 673 if (0 == MultiByteToWideChar(CP_UTF8, 0, lpSourceA, cchSourceA, lpText, *lpcbText)) 674 { 675 free(lpSourceA); 676 return FALSE; 677 } 678 679 *lpcbText *= sizeof(WCHAR); 680 681 free(lpSourceA); 682 bResult = TRUE; 683 } 684 break; 685 case UTF16LE: 686 { 687 CopyMemory(lpText, lpBytes, cbBytes); 688 *lpcbText = cbBytes; 689 return TRUE; 690 } 691 break; 692 case UTF16BE: 693 { 694 if (*lpcbText < cbBytes) 695 { 696 return FALSE; 697 } 698 699 LPBYTE lpBytesTemp = NULL; 700 INT cbBytesTemp = 0; 701 702 cbBytesTemp = cbBytes; 703 lpBytesTemp = (BYTE *)malloc(cbBytesTemp); 704 if (NULL == lpBytesTemp) 705 { 706 printf_s("malloc error\n"); 707 return FALSE; 708 } 709 ZeroMemory(lpBytesTemp, cbBytesTemp); 710 CopyMemory(lpBytesTemp, lpBytes, cbBytesTemp); 711 if (FALSE == Utf16leAndUtf16beTransfers(lpBytesTemp, cbBytesTemp)) 712 { 713 free(lpSourceW); 714 return FALSE; 715 } 716 CopyMemory(lpText, lpBytesTemp, cbBytesTemp); 717 *lpcbText = cbBytesTemp; 718 free(lpBytesTemp); 719 720 bResult = TRUE; 721 } 722 break; 723 default: 724 return FALSE; 725 } 726 727 return bResult; 728 } 729 //************************************ 730 // Method: BYTETOUTF16BE 731 // FullName: BYTETOUTF16BE 732 // Access: public 733 // Returns: BOOL 734 // Qualifier:指定编码流转换为UTF16BE编码格式 735 // Parameter: IN CONST LPBYTE lpBytes:指定需要转换的编码流 736 // Parameter: IN CONST INT cbBytes:指定需要转换的编码流长度(字节单位) 737 // Parameter: IN CONST FileEncodeType tpBytes::指定需要转换的编码流的实际编码格式 738 // Parameter: INOUT LPWSTR lpText:指定输出流 739 // Parameter: INOUT LPINT lpcbText:指定输出流的长度(字节单位) 740 //************************************ 741 BOOL BYTETOUTF16BE(IN CONST LPBYTE lpBytes, IN CONST INT cbBytes, IN CONST FileEncodeType tpBytes, INOUT LPWSTR lpText, INOUT LPINT lpcbText) 742 { 743 if (FALSE == BYTETOUTF16LE(lpBytes, cbBytes, tpBytes, lpText, lpcbText)) 744 { 745 return FALSE; 746 } 747 return Utf16leAndUtf16beTransfers((LPBYTE)lpText, *lpcbText); 748 } 749 750 751 //************************************ 752 // Method: FileAToFileB 753 // FullName: FileAToFileB 754 // Access: public 755 // Returns: BOOL 756 // Qualifier:指定文件A中内容转换为指定编码存入指定文件B中 757 // Parameter: CONST LPTSTR lpFileA:输入文件A(只读文件) 758 // Parameter: CONST LPSTR lpFileB:输出文件B(总是创建文件) 759 // Parameter: FileEncodeType tpFileB:指定输出文件编码(UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE) 760 // *注:目前只能识别UTF8/UTF8 WITHOUT BOM/UTF16LE/UTF16BE 761 //************************************ 762 BOOL FileAToFileB(IN CONST LPTSTR lpFileA, IN CONST LPTSTR lpFileB, FileEncodeType tpFileB) 763 { 764 BOOL bResult = FALSE; 765 FileEncodeType tpFileA = OTHER; 766 HANDLE hFileA = NULL; 767 HANDLE hFileB = NULL; 768 LPBYTE lpReadFileBytes = NULL; 769 INT cbReadFileBytes = 0; 770 INT cchReadFileBytes = 0; 771 LPSTR lpWriteFileString = NULL; 772 INT cbWriteFileString = 0; 773 INT cchWriteFileString = 0; 774 LPWSTR lpWriteFileWString = NULL; 775 INT cbWriteFileWString = 0; 776 INT cchWriteFileWString = 0; 777 778 779 hFileA = ::CreateFile(lpFileA, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); 780 if (INVALID_HANDLE_VALUE == hFileA) 781 { 782 int errLogNumber = GetLastError(); 783 printf_s("error number:%d\n", errLogNumber); 784 return FALSE; 785 } 786 787 cbReadFileBytes = SetFilePointer(hFileA, 0, NULL, FILE_END); 788 if (INVALID_SET_FILE_POINTER == cbReadFileBytes) 789 { 790 int errLogNumber = GetLastError(); 791 printf_s("error number:%d\n", errLogNumber); 792 ::CloseHandle(hFileA); 793 return FALSE; 794 } 795 796 if (1 != sizeof(BYTE)) 797 { 798 printf_s("byte cell width error\n"); 799 return FALSE; 800 } 801 802 lpReadFileBytes = (BYTE *)malloc(cbReadFileBytes); 803 if (NULL == lpReadFileBytes) 804 { 805 printf_s("malloc error\n"); 806 ::CloseHandle(hFileA); 807 return FALSE; 808 } 809 ZeroMemory(lpReadFileBytes, cbReadFileBytes); 810 811 SetFilePointer(hFileA, 0, NULL, FILE_BEGIN); 812 if (FALSE == ReadFile(hFileA, lpReadFileBytes, cbReadFileBytes, NULL, NULL)) 813 { 814 int errLogNumber = GetLastError(); 815 printf_s("error number:%d\n", errLogNumber); 816 free(lpReadFileBytes); 817 ::CloseHandle(hFileA); 818 return FALSE; 819 } 820 ::CloseHandle(hFileA); 821 hFileA = NULL; 822 823 824 if (!(UTF8 == tpFileB || UTF8_NO_BOM == tpFileB || UTF16LE == tpFileB || UTF16BE == tpFileB)) 825 { 826 printf_s("Unable to identify type error\n"); 827 free(lpReadFileBytes); 828 return FALSE; 829 } 830 831 hFileB = ::CreateFile(lpFileB, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); 832 if (INVALID_HANDLE_VALUE == hFileB) 833 { 834 int errLogNumber = GetLastError(); 835 printf_s("error number:%d\n", errLogNumber); 836 free(lpReadFileBytes); 837 return FALSE; 838 } 839 840 841 tpFileA = GetEncodeType(lpReadFileBytes, cbReadFileBytes); 842 switch (tpFileA) 843 { 844 case UTF8: 845 { 846 switch (tpFileB) 847 { 848 case UTF8_NO_BOM: 849 cbWriteFileString = cbReadFileBytes - UTF8_SIGN; 850 lpWriteFileString = (CHAR *)malloc(cbWriteFileString); 851 ZeroMemory(lpWriteFileString, cbWriteFileString); 852 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileString, &cbWriteFileString, FALSE); 853 free(lpReadFileBytes); 854 lpReadFileBytes = NULL; 855 break; 856 case UTF16LE: 857 cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR); 858 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); 859 ZeroMemory(lpWriteFileWString, cbWriteFileWString); 860 bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileWString, &cbWriteFileWString); 861 free(lpReadFileBytes); 862 lpReadFileBytes = NULL; 863 break; 864 case UTF16BE: 865 cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR); 866 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); 867 ZeroMemory(lpWriteFileWString, cbWriteFileWString); 868 bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF8, lpWriteFileWString, &cbWriteFileWString); 869 free(lpReadFileBytes); 870 lpReadFileBytes = NULL; 871 break; 872 default:; 873 } 874 } 875 break; 876 case UTF8_NO_BOM: 877 { 878 switch (tpFileB) 879 { 880 case UTF8: 881 cbWriteFileString = cbReadFileBytes + UTF8_SIGN; 882 lpWriteFileString = (CHAR *)malloc(cbWriteFileString); 883 ZeroMemory(lpWriteFileString, cbWriteFileString); 884 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileString, &cbWriteFileString, TRUE); 885 free(lpReadFileBytes); 886 lpReadFileBytes = NULL; 887 break; 888 case UTF16LE: 889 cbWriteFileWString = (cbReadFileBytes + UTF8_SIGN) * sizeof(WCHAR); 890 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); 891 ZeroMemory(lpWriteFileWString, cbWriteFileWString); 892 bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileWString, &cbWriteFileWString); 893 free(lpReadFileBytes); 894 lpReadFileBytes = NULL; 895 break; 896 case UTF16BE: 897 cbWriteFileWString = (cbReadFileBytes + UTF8_SIGN) * sizeof(WCHAR); 898 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); 899 ZeroMemory(lpWriteFileWString, cbWriteFileWString); 900 bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF8_NO_BOM, lpWriteFileWString, &cbWriteFileWString); 901 free(lpReadFileBytes); 902 lpReadFileBytes = NULL; 903 break; 904 default:; 905 } 906 } 907 break; 908 case UTF16LE: 909 { 910 switch (tpFileB) 911 { 912 case UTF8: 913 cbWriteFileString = cbReadFileBytes; 914 lpWriteFileString = (CHAR *)malloc(cbWriteFileString); 915 ZeroMemory(lpWriteFileString, cbWriteFileString); 916 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileString, &cbWriteFileString, TRUE); 917 free(lpReadFileBytes); 918 lpReadFileBytes = NULL; 919 break; 920 case UTF8_NO_BOM: 921 cbWriteFileString = cbReadFileBytes; 922 lpWriteFileString = (CHAR *)malloc(cbWriteFileString); 923 ZeroMemory(lpWriteFileString, cbWriteFileString); 924 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileString, &cbWriteFileString, FALSE); 925 free(lpReadFileBytes); 926 lpReadFileBytes = NULL; 927 break; 928 case UTF16BE: 929 cbWriteFileWString = cbReadFileBytes; 930 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); 931 ZeroMemory(lpWriteFileWString, cbWriteFileWString); 932 bResult = BYTETOUTF16BE(lpReadFileBytes, cbReadFileBytes, UTF16LE, lpWriteFileWString, &cbWriteFileWString); 933 free(lpReadFileBytes); 934 lpReadFileBytes = NULL; 935 break; 936 default:; 937 } 938 } 939 break; 940 case UTF16BE: 941 { 942 switch (tpFileB) 943 { 944 case UTF8: 945 cbWriteFileString = cbReadFileBytes; 946 lpWriteFileString = (CHAR *)malloc(cbWriteFileString); 947 ZeroMemory(lpWriteFileString, cbWriteFileString); 948 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileString, &cbWriteFileString, TRUE); 949 free(lpReadFileBytes); 950 lpReadFileBytes = NULL; 951 break; 952 case UTF8_NO_BOM: 953 cbWriteFileString = cbReadFileBytes; 954 lpWriteFileString = (CHAR *)malloc(cbWriteFileString); 955 ZeroMemory(lpWriteFileString, cbWriteFileString); 956 bResult = BYTETOUTF8(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileString, &cbWriteFileString, FALSE); 957 free(lpReadFileBytes); 958 lpReadFileBytes = NULL; 959 break; 960 case UTF16LE: 961 cbWriteFileWString = cbReadFileBytes * sizeof(WCHAR); 962 lpWriteFileWString = (WCHAR *)malloc(cbWriteFileWString); 963 ZeroMemory(lpWriteFileWString, cbWriteFileWString); 964 bResult = BYTETOUTF16LE(lpReadFileBytes, cbReadFileBytes, UTF16BE, lpWriteFileWString, &cbWriteFileWString); 965 free(lpReadFileBytes); 966 lpReadFileBytes = NULL; 967 break; 968 default:; 969 } 970 } 971 break; 972 default: 973 printf("file encode unable to identify.\n"); 974 free(lpReadFileBytes); 975 return FALSE; 976 } 977 978 979 if (NULL != lpWriteFileString) 980 { 981 if (FALSE == WriteFile(hFileB, lpWriteFileString, cbWriteFileString, NULL, NULL)) 982 { 983 free(lpWriteFileString); 984 return FALSE; 985 } 986 free(lpWriteFileString); 987 lpWriteFileString = NULL; 988 } 989 if (NULL != lpWriteFileWString) 990 { 991 if (FALSE == WriteFile(hFileB, lpWriteFileWString, cbWriteFileWString, NULL, NULL)) 992 { 993 free(lpWriteFileWString); 994 return FALSE; 995 } 996 free(lpWriteFileWString); 997 lpWriteFileWString = NULL; 998 } 999 ::CloseHandle(hFileB); 1000 hFileB = NULL; 1001 1002 return bResult; 1003 } 1004 1005 1006 int _tmain(int argc, _TCHAR* argv[]) 1007 { 1008 LPTSTR lpFileA_utf8 = TEXT("Input-utf8.txt"); 1009 LPTSTR lpFileA_utf8_no_bom = TEXT("Input-utf8-no-bom.txt"); 1010 LPTSTR lpFileA_utf16le = TEXT("Input-utf16le.txt"); 1011 LPTSTR lpFileA_utf16be = TEXT("Input-utf16be.txt"); 1012 1013 LPTSTR lpFileB_utf8 = TEXT("Output-utf8.txt"); 1014 LPTSTR lpFileB_utf8_no_bom = TEXT("Output-utf8-no-bom.txt"); 1015 LPTSTR lpFileB_utf16le = TEXT("Output-utf16le.txt"); 1016 LPTSTR lpFileB_utf16be = TEXT("Output-utf16be.txt"); 1017 1018 1019 1020 1021 //FileAToFileB(lpFileA_utf8, lpFileB_utf8_no_bom, UTF8_NO_BOM); 1022 //FileAToFileB(lpFileA_utf8, lpFileB_utf16le, UTF16LE); 1023 //FileAToFileB(lpFileA_utf8, lpFileB_utf16be, UTF16BE); 1024 1025 FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf8, UTF8); 1026 //FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf16le, UTF16LE); 1027 //FileAToFileB(lpFileA_utf8_no_bom, lpFileB_utf16be, UTF16BE); 1028 1029 //FileAToFileB(lpFileA_utf16le, lpFileB_utf8, UTF8); 1030 //FileAToFileB(lpFileA_utf16le, lpFileB_utf8_no_bom, UTF8_NO_BOM); 1031 //FileAToFileB(lpFileA_utf16le, lpFileB_utf16be, UTF16BE); 1032 1033 //FileAToFileB(lpFileA_utf16be, lpFileB_utf8, UTF8); 1034 //FileAToFileB(lpFileA_utf16be, lpFileB_utf8_no_bom, UTF8_NO_BOM); 1035 //FileAToFileB(lpFileA_utf16be, lpFileB_utf16le, UTF16LE); 1036 1037 return 0; 1038 }
*注:源码下载地址,请点击这里。
煮酒论英雄