基于Poco的UTF8、UTF16、GBK、Hex之间的转换
2014-02-17 09:41 DVwei 阅读(3491) 评论(0) 编辑 收藏 举报/******Encoding.h*******/ #include "Poco/UnicodeConverter.h" #include "Poco/Exception.h" #include "Poco/DigestEngine.h" #define MyLib_API Foundation_API using namespace Poco; POCO_DECLARE_EXCEPTION(MyLib_API, EncodeException, Exception) class Encoding { public: enum ByteOrderType { BIG_ENDIAN_BYTE_ORDER, LITTLE_ENDIAN_BYTE_ORDER, UNKNOW }; static void GBKToUTF16(const std::string& gbkString, std::wstring& utf16String) throw(EncodeException); static void UTF16ToGBK(const std::wstring& utf16String, std::string& gbkString) throw(EncodeException); static void UTF8ToUTF16(const std::string& utf8String, std::wstring& utf16String) throw(EncodeException); static void UTF16ToUTF8(const std::wstring& utf16String, std::string& utf8String) throw(EncodeException); static void UTF8ToGBK(const std::string& utf8String, std::string& gbkString) throw(EncodeException); static void GBKToUTF8(const std::string& gbkString, std::string& utf8String) throw(EncodeException); static void EncodeHexString(const std::string& bytes, std::string& hexString); static void DecodeHexString(const std::string& hexString, std::string& bytes); static void EncodeHexString(const std::wstring& bytes, std::string& hexString); static void DecodeHexString(const std::string& hexString, std::wstring& bytes); static ByteOrderType GetCurrentByteOrder(); private: static Poco::UnicodeConverter _unicodeConverter; static ByteOrderType _currentByteOrder; };
/********Encoding.cpp********/#include "Encoding.h" #include "Poco/NumberParser.h" Poco::UnicodeConverter Encoding::_unicodeConverter; Encoding::ByteOrderType Encoding::_currentByteOrder; POCO_IMPLEMENT_EXCEPTION(EncodeException, Poco::Exception, "Encoding error") void Encoding::GBKToUTF16(const std::string& gbkString, std::wstring& utf16String) { //获得需要分配的空间大小 int size = MultiByteToWideChar(936, 0, gbkString.c_str(), -1, NULL, 0); std::vector<wchar_t> buff(size); if(MultiByteToWideChar(936, 0, gbkString.c_str(), -1, buff.data(), size) == 0) { //throw a exception throw EncodeException("GBK convert to UTF16 failed", GetLastError()); } if(!utf16String.empty()) utf16String.clear(); utf16String.append(buff.data(), buff.size()); } void Encoding::UTF16ToGBK(const std::wstring& utf16String, std::string& gbkString) { int size = 0; //获得需要分配的空间大小 size = WideCharToMultiByte(936, 0, utf16String.c_str(), -1, NULL, 0, NULL, NULL); std::vector<char> buff(size); if(WideCharToMultiByte(936, 0, utf16String.c_str(), -1, buff.data(), size, NULL, NULL) == 0) throw EncodeException("UTF16 convert to GBK failed", GetLastError()); if(!gbkString.empty()) gbkString.clear(); gbkString.append(buff.data(), buff.size()); } void Encoding::UTF8ToUTF16(const std::string& utf8String, std::wstring& utf16String) { std::string errorMessage; try { _unicodeConverter.toUTF16(utf8String, utf16String); } catch(Poco::Exception &e) { errorMessage.append("UTF8 convert to UTF16 failed, "); errorMessage.append(e.message()); } if(!errorMessage.empty()) throw EncodeException(errorMessage, GetLastError()); } void Encoding::UTF16ToUTF8(const std::wstring& utf16String, std::string& utf8String) { std::string errorMessage; try { _unicodeConverter.toUTF8(utf16String, utf8String); } catch(Poco::Exception &e) { errorMessage.append("UTF8 convert to UTF16 failed, "); errorMessage.append(e.message()); } if(!errorMessage.empty()) throw EncodeException(errorMessage, GetLastError()); } void Encoding::UTF8ToGBK(const std::string& utf8String, std::string& gbkString) { std::wstring utf16String; std::string errorMessage; try { _unicodeConverter.toUTF16(utf8String, utf16String); UTF16ToGBK(utf16String, gbkString); } catch(EncodeException) { errorMessage = "UTF8 convert to GBK failed"; } catch(Poco::Exception &e) { errorMessage.append("UTF8 convert to UTF16 failed, "); errorMessage.append(e.message()); } if(!errorMessage.empty()) throw EncodeException(errorMessage, GetLastError()); } void Encoding::GBKToUTF8(const std::string& gbkString, std::string& utf8String) { std::wstring utf16String; std::string errorMessage; try { GBKToUTF16(gbkString, utf16String); _unicodeConverter.toUTF8(utf16String, utf8String); } catch(EncodeException) { errorMessage = "GBK convert to UTF8 failed"; } catch(Poco::Exception &e) { errorMessage.append("UTF8 convert to UTF16 failed, "); errorMessage.append(e.message()); } if(!errorMessage.empty()) throw EncodeException(errorMessage, GetLastError()); } void Encoding::EncodeHexString(const std::string& bytes, std::string& hexString) { if(!hexString.empty()) hexString.clear(); Poco::DigestEngine::Digest digest(bytes.begin(), bytes.end()); hexString = Poco::DigestEngine::digestToHex(digest); } void Encoding::DecodeHexString(const std::string& hexString, std::string& bytes) { unsigned int _value; if(!bytes.empty()) bytes.clear(); for(std::string::size_type i = 0, j = 0; i < hexString.length(); i+=2) { if(NumberParser::tryParseHex(hexString.substr(i, 2), _value)) bytes.push_back(_value); } } void Encoding::EncodeHexString(const std::wstring& utf16String, std::string& hexString) { if(!hexString.empty()) hexString.clear(); Poco::DigestEngine::Digest digest; for(auto iter = utf16String.begin(); iter != utf16String.end(); ++iter) { const unsigned char* ptr = (const unsigned char*)&*iter; if(GetCurrentByteOrder() == BIG_ENDIAN_BYTE_ORDER) { digest.push_back(*ptr); digest.push_back(*(ptr + 1)); } else if(GetCurrentByteOrder() == LITTLE_ENDIAN_BYTE_ORDER) { digest.push_back(*(ptr + 1)); digest.push_back(*ptr); } else return; } hexString = Poco::DigestEngine::digestToHex(digest); } void Encoding::DecodeHexString(const std::string& hexString, std::wstring& utf16String) { unsigned int _value; if(!utf16String.empty()) utf16String.clear(); for(std::string::size_type i = 0, j = 0; i < hexString.length(); i+=4) { if(NumberParser::tryParseHex(hexString.substr(i, 4), _value)) utf16String.push_back(_value); } } Encoding::ByteOrderType Encoding::GetCurrentByteOrder() { static bool flag = false; if(flag) return _currentByteOrder; union { char16_t s; char c[2]; }un; un.s = 0x0102; if(un.c[0] == 1 && un.c[1] == 2) _currentByteOrder = BIG_ENDIAN_BYTE_ORDER; else if(un.c[0] == 2 && un.c[1] == 1) _currentByteOrder = LITTLE_ENDIAN_BYTE_ORDER; else _currentByteOrder = UNKNOW; flag = true; return _currentByteOrder; }