C++ Stream与编码转换
1.自己动手改造codecvt来读写Unicode(LE)代码:
#include <iostream> #include <fstream> #include <locale> #include "TextCodeChange.h" using namespace std; //#define endl L"\r\n" int main() { locale loc(locale::classic(), new class NullCodecvt); wstring wstr; wifstream wfin(L"test.txt", ios_base::binary); wofstream wfout(L"test1.txt", ios_base::binary);; wfout.imbue(loc); wchar_t header[1] = { 0xFEFF }; wfout.write(header, 1); wfin.imbue(loc); wcout.imbue(locale("")); while (std::getline(wfin, wstr)) //while (wfin >> wstr) { wcout << wstr << endl; wfout << wstr << endl; } wfin.close(); wfout.close(); return 0; }
头文件TextCodeChange.h
using std::codecvt ; typedef codecvt < wchar_t , char , mbstate_t > NullCodecvtBase ; class NullCodecvt : public NullCodecvtBase { public: typedef wchar_t _E ; typedef char _To ; typedef mbstate_t _St ; explicit NullCodecvt( size_t _R=0 ) : NullCodecvtBase(_R) { } protected: virtual result do_in( _St& _State , const _To* _F1 , const _To* _L1 , const _To*& _Mid1 , _E* F2 , _E* _L2 , _E*& _Mid2 ) const { return noconv ; } virtual result do_out( _St& _State , const _E* _F1 , const _E* _L1 , const _E*& _Mid1 , _To* F2, _E* _L2 , _To*& _Mid2 ) const { return noconv ; } virtual result do_unshift( _St& _State , _To* _F2 , _To* _L2 , _To*& _Mid2 ) const { return noconv ; } virtual int do_length( _St& _State , const _To* _F1 , const _To* _L1 , size_t _N2 ) const _THROW0() { return (_N2 < (size_t)(_L1 - _F1)) ? _N2 : _L1 - _F1 ; } virtual bool do_always_noconv() const _THROW0() { return true ; } virtual int do_max_length() const _THROW0() { return 2 ; } virtual int do_encoding() const _THROW0() { return 2 ; } } ;
2.利用C++11标准中提供的 codecvt_utf16来读写UTF-16LE文件,读出的UTF-16LE字符放入wchar_t中处理。
#include <iostream> #include <fstream> #include <locale> #include <codecvt> using namespace std; int main() { wchar_t header[1] = { 0xFEFF };// Unicode little endian's header wstring wstr; wifstream wfin(L"test.txt"); std::locale loc(std::locale(), new std::codecvt_utf16<wchar_t,0x10ffff,std::little_endian>); std::wofstream wfout("test1.txt", std::ios::binary); wfout.imbue(loc); wfin.imbue(loc); wcout.imbue(locale("")); wfout.write(header, 1); while (wfin >> wstr) { wcout << wstr << endl; wfout << wstr << L"\r\n"; } wfin.close(); wfout.close(); return 0; }
3.利用C++11标准中提供的 codecvt_utf16来读写UTF-16BE文件,读出的UTF-16BE字符放入wchar_t中处理。
#include <iostream> #include <fstream> #include <locale> #include <codecvt> using namespace std; int main() { wchar_t header[1] = { 0xFFFE };// Unicode little endian's header wstring wstr; wifstream wfin(L"test.txt"); std::locale loc(std::locale(), new std::codecvt_utf16<wchar_t, 0x10ffff, std::generate_header>); std::wofstream wfout("test1.txt", std::ios::binary); wfout.imbue(loc); wfin.imbue(loc); wcout.imbue(locale("")); wfout.write(header, 1); while (wfin >> wstr) { wcout << wstr << endl; wfout << wstr << L"\r\n"; } wfin.close(); wfout.close(); return 0; }
4.利用C++11标准中提供的codecvt_utf8来读写UTF-8文件,读出的UTF-8字符放入wchar_t中处理。
#include <iostream> #include <fstream> #include <locale> #include <codecvt> using namespace std; int main() { wstring wstr; wifstream wfin(L"test.txt"); std::locale loc(std::locale(), new std::codecvt_utf8<wchar_t>); wofstream wfout("test1.txt"); wfout.imbue(loc); wfin.imbue(loc); wcout.imbue(locale("")); while (wfin >> wstr) { wcout << wstr << endl; wfout << wstr << endl; } wfin.close(); wfout.close(); return 0; }
5.UTF8编码转换UTF-16编码的应用
#include <iostream> #include <fstream> #include <locale> #include <codecvt> #include <cvt\wstring> using namespace std; int main() { //读取UTF-8文件的内容并转换为UTF16编码放入wstring中 std::wifstream wfin("test.txt"); wfin.imbue(std::locale(wfin.getloc(), new std::codecvt_utf8_utf16<wchar_t>)); wstring wstr; wfin >> wstr; //创建一个utf8编码转换器 wstring_convert<codecvt_utf8<wchar_t> > myconv; //将UTF16编码转换为UTF8编码 string mbstring = myconv.to_bytes(wstr); cout << mbstring; //将UTF8编码转换为UTF16编码 wstr = myconv.from_bytes(mbstring); wcout.imbue(locale("")); wcout << wstr; wfin.close(); return 0; }