c++ 读取 utf-8 文件到 string
#include <iostream> #include <assert.h> #include <fstream> #include <string> #include <string.h> using namespace std; #ifdef _WIN32 #include <Windows.h> #endif typedef enum FileType { FileType_ANSI = 0, FileType_UNICODE, FileType_UTF8 }FILETYPE; #ifdef _WIN32 string UTF8ToGB(const char* str); #endif FILETYPE GetTextFileType(const std::string & strFileName); string ReadTextFile(const std::string & strFileName); int main() { string json = ReadTextFile("/tmp/a.json"); getchar(); return 0; } FILETYPE GetTextFileType(const std::string & strFileName) { FILETYPE fileType = FileType_ANSI; std::ifstream file; file.open(strFileName.c_str(), std::ios_base::in); bool bUnicodeFile = false; if (file.good()) { char szFlag[3] = { 0 }; file.read(szFlag, sizeof(char) * 3); if ((unsigned char)szFlag[0] == 0xFF && (unsigned char)szFlag[1] == 0xFE) { fileType = FileType_UNICODE; } else if ((unsigned char)szFlag[0] == 0xEF && (unsigned char)szFlag[1] == 0xBB && (unsigned char)szFlag[2] == 0xBF) { fileType = FileType_UTF8; } } file.close(); return fileType; } string ReadTextFile(const std::string & strFileName) { FILETYPE fileType = GetTextFileType(strFileName); if (fileType != FileType_UTF8) { cout << "UTF-8 file needed!" << endl; return ""; } FILE * fp = NULL; fp = fopen(strFileName.c_str(), "rb"); fseek(fp, 0, SEEK_END); size_t size = ftell(fp); fseek(fp, 0, SEEK_SET); std::string result; if (fp != NULL) { // UTF-8 file should offset 3 byte from start position. fseek(fp, sizeof(char) * 3, 0); int buferSize = (int)size - 3; char* szBuf = new char[buferSize + 1]; memset(szBuf, 0, sizeof(char) * (buferSize + 1)); fread(szBuf, sizeof(char), buferSize, fp); result.append(szBuf); delete szBuf; } fclose(fp); #ifdef _WIN32 result = UTF8ToGB(result.c_str()); #endif return result; } #ifdef _WIN32 string UTF8ToGB(const char* str) { string result; WCHAR *strSrc; LPSTR szRes; int i = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0); strSrc = new WCHAR[i + 1]; MultiByteToWideChar(CP_UTF8, 0, str, -1, strSrc, i); i = WideCharToMultiByte(CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL); szRes = new CHAR[i + 1]; WideCharToMultiByte(CP_ACP, 0, strSrc, -1, szRes, i, NULL, NULL); result = szRes; delete[]strSrc; delete[]szRes; return result; } #endif
桂棹兮兰桨,击空明兮溯流光。