VC++ 读取UTF-8和ANSI编码文件

 判断是否是UTF-8文件:

bool IsUTF8Text(const void* pBuffer, long size)
{
    bool IsUTF8 = true;
    unsigned char* start = (unsigned char*)pBuffer;
    unsigned char* end = (unsigned char*)pBuffer + size;
    while (start < end)
    {
        if (*start < 0x80) // (10000000): 值小于0x80的为ASCII字符    
        {
            start++;
        }
        else if (*start < (0xC0)) // (11000000): 值介于0x80与0xC0之间的为无效UTF-8字符    
        {
            IsUTF8 = false;
            break;
        }
        else if (*start < (0xE0)) // (11100000): 此范围内为2字节UTF-8字符    
        {
            if (start >= end - 1)
            {
                break;
            }

            if ((start[1] & (0xC0)) != 0x80)
            {
                IsUTF8 = false;
                break;
            }

            start += 2;
        }
        else if (*start < (0xF0)) // (11110000): 此范围内为3字节UTF-8字符    
        {
            if (start >= end - 2)
            {
                break;
            }

            if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
            {
                IsUTF8 = false;
                break;
            }

            start += 3;
        }
        else
        {
            IsUTF8 = false;
            break;
        }
    }

    return IsUTF8;
}

bool IsUTF8File(const char* pFileName)
{
    FILE *f = NULL;
    fopen_s(&f, pFileName, "rb");
    if (NULL == f)
    {
        return false;
    }

    fseek(f, 0, SEEK_END);
    long lSize = ftell(f);
    fseek(f, 0, SEEK_SET);  //或rewind(f);  

    char *pBuff = new char[lSize + 1];
    memset(pBuff, 0, lSize + 1);
    fread(pBuff, lSize, 1, f);
    fclose(f);

    bool bIsUTF8 = IsUTF8Text(pBuff, lSize);
    delete[]pBuff;
    pBuff = NULL;

    return bIsUTF8;
}

读取文件:

CString GetFile(CString filename, UINT CodePage)
{
    CFile fileR;
    CString strFile = L"";
    if (!fileR.Open(filename, CFile::modeRead | CFile::typeBinary))
    {
        return strFile;
    }
    BYTE head[3];
    fileR.Read(head, 3);
    if (!(head[0] == 0xEF && head[1] == 0xBB && head[2] == 0xBF))
    {
        fileR.SeekToBegin();
    }
    ULONGLONG FileSize = fileR.GetLength();
    char* pContent = (char*)calloc(FileSize + 1, sizeof(char));
    fileR.Read(pContent, FileSize);
    fileR.Close();
    int n = MultiByteToWideChar(CodePage, 0, pContent, FileSize + 1, NULL, 0);
    wchar_t* pWideChar = (wchar_t*)calloc(n + 1, sizeof(wchar_t));
    MultiByteToWideChar(CodePage, 0, pContent, FileSize + 1, pWideChar, n);
    strFile = CString(pWideChar);
    free(pContent);
    free(pWideChar);

    return strFile;
}
posted @ 2020-01-08 16:21  余生以学  阅读(1203)  评论(0编辑  收藏  举报