使用不同编码读取文本

string COfficeControlTestToolDlg::ReadFile(CString strFilePath)

{

CFile mFile;

if(!mFile.Open(strFilePath,CFile::modeRead|CFile::typeBinary))

{

MessageBox(_T("无法打开文件:")+strFilePath,_T("错误"),MB_ICONERROR|MB_OK);

PostQuitMessage(0);

}

m_isUnicode = FALSE;

m_isUTF_8Code = FALSE;

byte head[3]; //get head content

string strContents; // file contents

UINT FileSize; // file size

char *buf; // temp

mFile.Read(head,3);

if((head[0]==0xff&&head[1]==0xfe)||(head[0]==0xfe&&head[1]==0xff)) //Test file Is Unicode ??

{

m_isUnicode = true;

}

if ((head[0]==0xef && head[1]==0xbb && head[2]==0xbf) || (head[0]==0xbf && head[1]==0xbb && head[2]==0xef)) //Test file Is UTF-8??

{

m_isUTF_8Code = true;

}

if (m_isUTF_8Code) //read UTF-8 File

{

FileSize = (UINT)mFile.GetLength();

buf = new char[FileSize];

mFile.Seek(3,CFile::begin); //0xefbbbf

mFile.Read(buf,FileSize);

int size = MultiByteToWideChar(CP_UTF8,0,buf,FileSize+1,NULL,0);

wchar_t* pWideChar=new wchar_t[size+1];

MultiByteToWideChar(CP_UTF8,0,buf,FileSize+1,pWideChar,size);

strContents = CString(pWideChar).GetBuffer(0);

delete[] buf;

delete[] pWideChar;

}

else if(m_isUnicode) //read Unicode File;

{

int i = 1;

wchar_t wch; //for unicode

wchar_t wstr[300]; // for unicode

memset((void*)wstr, 0, sizeof(char)*(2*300));

mFile.Seek(2,CFile::begin); //0xfffe

while(mFile.Read((char *)&wch,2)>0)

{

if(wch==0x000D) //by line

{

//change to ANSI

int nLen = i;

buf = new char[2*nLen];

memset((void*)buf, 0, sizeof(char)*(2*nLen));

WideCharToMultiByte(CP_ACP, 0, wstr, -1, buf, 2*nLen, NULL, NULL);

buf[2*nLen-1] = '\0';

strContents += buf;

delete[] buf;

i=0;

}

else

{

wstr[i++] = wch;

}

else //read ANSI file

{

FileSize = (UINT)mFile.GetLength();

buf = new char[FileSize];

while(mFile.Read(buf,FileSize)>0)

{

strContents = buf;

}

delete[] buf;

}

mFile.Close();

return strContents;

}

posted on 2018-01-09 21:29 程序员乌鸦阅读(223) 评论(0) 编辑收藏举报

刷新页面返回顶部

程序员乌鸦

使用不同编码读取文本

导航

公告