C++ 读写 UTF-8 with bom 文本文件

FileHelper.h
#pragma once
#include <iostream>
#include <fstream>
#include <string>
#include <string.h>
using namespace std;

#ifdef _WIN32
#include <Windows.h>
#endif

typedef enum TextFileType
{
    TextFileType_ANSI = 0,
    TextFileType_UNICODE,
    TextFileType_UTF8
}TEXTFILETYPE;

class FileHelper
{
public:
    #ifdef _WIN32
        static string UTF8ToGB(const char* str);
    #endif

    static TEXTFILETYPE GetTextFileType(const std::string & strFileName);
    static string ReadTextFile(const std::string & strFileName);
    static void WriteTextFile(const std::string & strFileName, const std::string & strContent);
};

FileHelper.cpp
#include "FileHelper.h"

TEXTFILETYPE FileHelper::GetTextFileType(const std::string & strFileName)
{
    TEXTFILETYPE fileType = TextFileType_ANSI;
    std::ifstream file;
    file.open(strFileName.c_str(), std::ios_base::in);
    bool bUnicodeFile = false;

    if (file.good())
    {
        char szFlag[3] = { 0 };
        file.read(szFlag, sizeof(char) * 3);
        if ((unsigned char)szFlag[0] == 0xFF
            && (unsigned char)szFlag[1] == 0xFE)
        {
            fileType = TextFileType_UNICODE;
        }
        else if ((unsigned char)szFlag[0] == 0xEF
            && (unsigned char)szFlag[1] == 0xBB
            && (unsigned char)szFlag[2] == 0xBF)
        {
            fileType = TextFileType_UTF8;
        }
    }

    file.close();
    return fileType;
}

string FileHelper::ReadTextFile(const std::string & strFileName)
{
    TEXTFILETYPE fileType = FileHelper::GetTextFileType(strFileName);
    if (fileType != TextFileType_UTF8)
    {
        cout << "UTF-8 file needed...!" << endl;
        return "";
    }

    FILE * fp = NULL;
    fp = fopen(strFileName.c_str(), "rb");
    fseek(fp, 0, SEEK_END);
    size_t size = ftell(fp);
    fseek(fp, 0, SEEK_SET);

    std::string result;

    if (fp != NULL)
    {
        // UTF-8 file should offset 3 byte from start position.
        fseek(fp, sizeof(char) * 3, 0);
        int buferSize = (int)size - 3;
        char* szBuf = new char[buferSize + 1];
        memset(szBuf, 0, sizeof(char) * (buferSize + 1));
        fread(szBuf, sizeof(char), buferSize, fp);
        result.append(szBuf);
        delete szBuf;
    }

    fclose(fp);

#ifdef _WIN32
    result = UTF8ToGB(result.c_str());
#endif

    return result;
}

void FileHelper::WriteTextFile(const std::string & strFileName, const std::string & strContent)
{
    FILE * pFile;
    char buffer[] = { 0xEF , 0xBB , 0xBF };
    pFile = fopen(strFileName.c_str(), "wb");
    fwrite(buffer, sizeof(char), sizeof(buffer), pFile);
    fwrite(strContent.c_str(), sizeof(char), strContent.size(), pFile);
    fclose(pFile);
}

#ifdef _WIN32
string FileHelper::UTF8ToGB(const char* str)
{
    string result;
    WCHAR *strSrc;
    LPSTR szRes;

    //获得临时变量的大小
    int i = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
    strSrc = new WCHAR[i + 1];
    MultiByteToWideChar(CP_UTF8, 0, str, -1, strSrc, i);

    //获得临时变量的大小
    i = WideCharToMultiByte(CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL);
    szRes = new CHAR[i + 1];
    WideCharToMultiByte(CP_ACP, 0, strSrc, -1, szRes, i, NULL, NULL);

    result = szRes;
    delete[]strSrc;
    delete[]szRes;

    return result;
}
#endif

 

posted on 2021-04-25 17:28  空明流光  阅读(1918)  评论(0编辑  收藏  举报

导航