c++ 读取 utf-8 文件到 string

#include <iostream>
#include <assert.h>
#include <fstream>
#include <string>
#include <string.h>
using namespace std;

#ifdef _WIN32
#include <Windows.h>
#endif

typedef enum FileType
{
    FileType_ANSI = 0,
    FileType_UNICODE,
    FileType_UTF8
}FILETYPE;

#ifdef _WIN32
string UTF8ToGB(const char* str);
#endif

FILETYPE GetTextFileType(const std::string & strFileName);
string ReadTextFile(const std::string & strFileName);

int main()
{
    string json = ReadTextFile("/tmp/a.json");

    getchar();

    return 0;
}

FILETYPE GetTextFileType(const std::string & strFileName)
{
    FILETYPE fileType = FileType_ANSI;
    std::ifstream file;
    file.open(strFileName.c_str(), std::ios_base::in);
    bool bUnicodeFile = false;

    if (file.good())
    {
        char szFlag[3] = { 0 };
        file.read(szFlag, sizeof(char) * 3);
        if ((unsigned char)szFlag[0] == 0xFF
            && (unsigned char)szFlag[1] == 0xFE)
        {
            fileType = FileType_UNICODE;
        }
        else if ((unsigned char)szFlag[0] == 0xEF
            && (unsigned char)szFlag[1] == 0xBB
            && (unsigned char)szFlag[2] == 0xBF)
        {
            fileType = FileType_UTF8;
        }
    }

    file.close();
    return fileType;
}

string ReadTextFile(const std::string & strFileName)
{
    FILETYPE fileType = GetTextFileType(strFileName);
    if (fileType != FileType_UTF8)
    {
        cout << "UTF-8 file needed!" << endl;
        return "";
    }

    FILE * fp = NULL;
    fp = fopen(strFileName.c_str(), "rb");
    fseek(fp, 0, SEEK_END);
    size_t size = ftell(fp);
    fseek(fp, 0, SEEK_SET);

    std::string result;

    if (fp != NULL)
    {
        // UTF-8 file should offset 3 byte from start position.
        fseek(fp, sizeof(char) * 3, 0);
        int buferSize = (int)size - 3;
        char* szBuf = new char[buferSize + 1];
        memset(szBuf, 0, sizeof(char) * (buferSize + 1));
        fread(szBuf, sizeof(char), buferSize, fp);
        result.append(szBuf);
        delete szBuf;
    }

    fclose(fp);

#ifdef _WIN32
    result = UTF8ToGB(result.c_str());
#endif

    return result;
}

#ifdef _WIN32
string UTF8ToGB(const char* str)
{
    string result;
    WCHAR *strSrc;
    LPSTR szRes;

    int i = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
    strSrc = new WCHAR[i + 1];
    MultiByteToWideChar(CP_UTF8, 0, str, -1, strSrc, i);

    i = WideCharToMultiByte(CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL);
    szRes = new CHAR[i + 1];
    WideCharToMultiByte(CP_ACP, 0, strSrc, -1, szRes, i, NULL, NULL);

    result = szRes;
    delete[]strSrc;
    delete[]szRes;

    return result;
}
#endif

 

posted on 2019-12-18 11:45  空明流光  阅读(5693)  评论(0编辑  收藏  举报

导航