MLang - Internet Development SDK(MSDN)出现在IE4及其后续版本中,相对于Platform SDK提供的 WideCharToMultiByte 和 MultiByteToWideChar 它的特点是这些函数更加倾向于Internet程序设计,并且支持在internet上的任何字符集和Unicode之间互相转换。更详细的可以查看这里。
1、首先创建IMultiLanguage2的COM对象
1//初始化字符集代码转换API
2hr = CoCreateInstance(CLSID_CMultiLanguage,
3 NULL,
4 CLSCTX_ALL,
5 IID_IMultiLanguage2,
6 (LPVOID*)&m_spMultiLang);
7
2、范例转换成UNICODE,注意这里当碰到MLang不支持的转换的时候会再次尝试使用Platform SDK的函数再次转换。2hr = CoCreateInstance(CLSID_CMultiLanguage,
3 NULL,
4 CLSCTX_ALL,
5 IID_IMultiLanguage2,
6 (LPVOID*)&m_spMultiLang);
7
Code
1 ////////////////////////////////////////////////////////////////////////////
2 ///Convert current local string to UNICODE string
3 ///@param[out] lstrwstring
4 /// The buffer to save converted UNICODE string
5 ///@return
6 /// E_FAIL when fail or S_OK when success.
7 HRESULT ConvertToUnicode(CLocalString &lstrwstring)const
8 {
9 //Now calculate destinate UNICODE string length in character
10 size_t nCharSizeDst = m_string.length() + 1;
11 LPWSTR pwstr = new WCHAR[nCharSizeDst];
12
13 //Convert to UNICODE, because source is not UNICODE, so the quantity of
14 //char and byte are the same.
15 DWORD dwMode = 0;
16 size_t nByteSizeSrc = m_string.length();
17 HRESULT hr = m_spMultiLang->ConvertStringToUnicode(&dwMode,
18 m_dwCodePage,
19 const_cast<CHAR*>(m_string.c_str()),
20 &nByteSizeSrc,
21 pwstr,
22 &nCharSizeDst);
23 //Convert successfully, quit
24 if(S_OK == hr)
25 {
26 lstrwstring.Assign(pwstr, nCharSizeDst);
27 delete[] pwstr;
28
29 return S_OK;
30 }
31
32 //Now try to use MultiByteToWideChar to do transformation again
33 nByteSizeSrc = m_string.length() + 1;
34 int nRet = ::MultiByteToWideChar(m_dwCodePage,
35 0,
36 m_string.c_str(),
37 (int)nByteSizeSrc,
38 pwstr,
39 (int)nCharSizeDst);
40
41 if(ERROR_NO_UNICODE_TRANSLATION == nRet)
42 {
43 delete[] pwstr;
44 NWTRACE(NWEL_CRITICAL, TEXT("Convert to UNICODE fail with reason ERROR_NO_UNICODE_TRANSLATION."));
45 return E_FAIL;
46 }
47
48 if(0 == nRet)
49 {
50 delete[] pwstr;
51 DWORD dwErrID = ::GetLastError();
52 NWTRACE(NWEL_CRITICAL, TEXT("Convert to UNICODE fail with reason %d."), dwErrID);
53 return E_FAIL;
54 }
55
56 //Conversion success.
57 lstrwstring.Assign(pwstr, nCharSizeDst);
58 delete[] pwstr;
59 return S_OK;
60 }
3、获取字符集的索引和MIME命名1 ////////////////////////////////////////////////////////////////////////////
2 ///Convert current local string to UNICODE string
3 ///@param[out] lstrwstring
4 /// The buffer to save converted UNICODE string
5 ///@return
6 /// E_FAIL when fail or S_OK when success.
7 HRESULT ConvertToUnicode(CLocalString &lstrwstring)const
8 {
9 //Now calculate destinate UNICODE string length in character
10 size_t nCharSizeDst = m_string.length() + 1;
11 LPWSTR pwstr = new WCHAR[nCharSizeDst];
12
13 //Convert to UNICODE, because source is not UNICODE, so the quantity of
14 //char and byte are the same.
15 DWORD dwMode = 0;
16 size_t nByteSizeSrc = m_string.length();
17 HRESULT hr = m_spMultiLang->ConvertStringToUnicode(&dwMode,
18 m_dwCodePage,
19 const_cast<CHAR*>(m_string.c_str()),
20 &nByteSizeSrc,
21 pwstr,
22 &nCharSizeDst);
23 //Convert successfully, quit
24 if(S_OK == hr)
25 {
26 lstrwstring.Assign(pwstr, nCharSizeDst);
27 delete[] pwstr;
28
29 return S_OK;
30 }
31
32 //Now try to use MultiByteToWideChar to do transformation again
33 nByteSizeSrc = m_string.length() + 1;
34 int nRet = ::MultiByteToWideChar(m_dwCodePage,
35 0,
36 m_string.c_str(),
37 (int)nByteSizeSrc,
38 pwstr,
39 (int)nCharSizeDst);
40
41 if(ERROR_NO_UNICODE_TRANSLATION == nRet)
42 {
43 delete[] pwstr;
44 NWTRACE(NWEL_CRITICAL, TEXT("Convert to UNICODE fail with reason ERROR_NO_UNICODE_TRANSLATION."));
45 return E_FAIL;
46 }
47
48 if(0 == nRet)
49 {
50 delete[] pwstr;
51 DWORD dwErrID = ::GetLastError();
52 NWTRACE(NWEL_CRITICAL, TEXT("Convert to UNICODE fail with reason %d."), dwErrID);
53 return E_FAIL;
54 }
55
56 //Conversion success.
57 lstrwstring.Assign(pwstr, nCharSizeDst);
58 delete[] pwstr;
59 return S_OK;
60 }
Code
1 DWORD GetCodePage(LPCSTR pszCharset)
2 {
3 NWASSERT(pszCharset);
4 CComBSTR bstrCharSet(pszCharset);
5
6 MIMECSETINFO mi;
7 if(FAILED(m_spMultiLang->GetCharsetInfo(bstrCharSet, &mi)))
8 return NWCP_ANSI;
9 else
10 return mi.uiInternetEncoding;
11 }
12
13 HRESULT GetCharset(wstring& wstrCharset)const
14 {
15 MIMECPINFO mimeInfo;
16 HRESULT hr = m_spMultiLang->GetCodePageInfo(m_dwCodePage, 0, &mimeInfo);
17 if(SUCCEEDED(hr))
18 wstrCharset = mimeInfo.wszWebCharset;
19 return hr;
20 }
4、解码MIME字符串,注意这里面的BASE64与QuotePrintable的解码并没有列出来,需要的google一下吧1 DWORD GetCodePage(LPCSTR pszCharset)
2 {
3 NWASSERT(pszCharset);
4 CComBSTR bstrCharSet(pszCharset);
5
6 MIMECSETINFO mi;
7 if(FAILED(m_spMultiLang->GetCharsetInfo(bstrCharSet, &mi)))
8 return NWCP_ANSI;
9 else
10 return mi.uiInternetEncoding;
11 }
12
13 HRESULT GetCharset(wstring& wstrCharset)const
14 {
15 MIMECPINFO mimeInfo;
16 HRESULT hr = m_spMultiLang->GetCodePageInfo(m_dwCodePage, 0, &mimeInfo);
17 if(SUCCEEDED(hr))
18 wstrCharset = mimeInfo.wszWebCharset;
19 return hr;
20 }
Code
1 ////////////////////////////////////////////////////////////////////////////
2 ///解码MIME字符串,此函数首先检测lpStr,如果它是以"=?"开始,那么就判定为传入的字符
3 ///串以MIME编码,并且进行解码。字符集由MIME指定。其格式为
4 ///"=?[CharSet]?[B/Q]?[encoded char]?=",参照RFC2045, 2046
5 ///@param[in] lpStr
6 ///必须是符合RFC标准的ANSI字符串
7 ///@return
8 ///如果格式不对或者出错返回E_FAIL,否则返回S_OK。
9 HRESULT AssignMIME(LPCSTR lpStr)
10 {
11 NWASSERT(lpStr);
12
13 //Make a copy
14 string strSrc(lpStr);
15 string::iterator it = strSrc.begin();
16
17 //The return value.
18 HRESULT hr = E_ABORT;
19
20 //7 来自于基本格式"=??=",B/Q,和另外两个"?", 考虑到还有字符集的信息,所以应该
21 //至少大于7.
22 if(strSrc.length() > 7)
23 {
24 if(*it == '=' && *(it + 1) == '?')
25 {
26 //获取字符集
27 size_t nOffset = strSrc.find_first_of('?', 2);
28 string strCharset = strSrc.substr(2, nOffset - 2);
29 m_dwCodePage = GetCodePage(strCharset.c_str());
30
31 //获取被编码的内容,注意可能为空。
32 if(strSrc.length() > (nOffset + 3) + 2)
33 {
34 //获取实际的字符串,注意可能为空
35 string strContent(lpStr + nOffset + 3, strSrc.length() - (nOffset + 3) - 2);
36
37 //获取编码方式
38 ++nOffset;
39 if(*(lpStr + nOffset) == 'B' || *(lpStr + nOffset) == 'b')
40 {
41 hr = m_spCharCodec->Decode(strContent, m_string, NWCC_BASE64);
42 }
43 else if(*(lpStr + nOffset) == 'Q' || *(lpStr + nOffset) == 'q')
44 {
45 hr = m_spCharCodec->Decode(strContent, m_string, NWCC_QUOTEPRINT);
46 }
47 else
48 {
49 hr = E_UNEXPECTED;
50 NWTRACE(NWEL_CRITICAL, TEXT("The character codec is unexpected"));
51 NWASSERT(FALSE);
52 }
53
54 //错误检测
55 ++nOffset;
56 if(*(lpStr + nOffset) != '?')
57 {
58 hr = E_UNEXPECTED;
59 NWTRACE(NWEL_CRITICAL, TEXT("The MIME string format is incorrect"));
60 NWASSERT(FALSE);
61 }
62 }
63 else //没有有效字符,空串
64 {
65 m_string.clear();
66 }
67 }
68 }
69 return hr;
70 }
5、还有一些其它的诸如比较两个字符串、去掉字符串首尾的空格。
1 ////////////////////////////////////////////////////////////////////////////
2 ///解码MIME字符串,此函数首先检测lpStr,如果它是以"=?"开始,那么就判定为传入的字符
3 ///串以MIME编码,并且进行解码。字符集由MIME指定。其格式为
4 ///"=?[CharSet]?[B/Q]?[encoded char]?=",参照RFC2045, 2046
5 ///@param[in] lpStr
6 ///必须是符合RFC标准的ANSI字符串
7 ///@return
8 ///如果格式不对或者出错返回E_FAIL,否则返回S_OK。
9 HRESULT AssignMIME(LPCSTR lpStr)
10 {
11 NWASSERT(lpStr);
12
13 //Make a copy
14 string strSrc(lpStr);
15 string::iterator it = strSrc.begin();
16
17 //The return value.
18 HRESULT hr = E_ABORT;
19
20 //7 来自于基本格式"=??=",B/Q,和另外两个"?", 考虑到还有字符集的信息,所以应该
21 //至少大于7.
22 if(strSrc.length() > 7)
23 {
24 if(*it == '=' && *(it + 1) == '?')
25 {
26 //获取字符集
27 size_t nOffset = strSrc.find_first_of('?', 2);
28 string strCharset = strSrc.substr(2, nOffset - 2);
29 m_dwCodePage = GetCodePage(strCharset.c_str());
30
31 //获取被编码的内容,注意可能为空。
32 if(strSrc.length() > (nOffset + 3) + 2)
33 {
34 //获取实际的字符串,注意可能为空
35 string strContent(lpStr + nOffset + 3, strSrc.length() - (nOffset + 3) - 2);
36
37 //获取编码方式
38 ++nOffset;
39 if(*(lpStr + nOffset) == 'B' || *(lpStr + nOffset) == 'b')
40 {
41 hr = m_spCharCodec->Decode(strContent, m_string, NWCC_BASE64);
42 }
43 else if(*(lpStr + nOffset) == 'Q' || *(lpStr + nOffset) == 'q')
44 {
45 hr = m_spCharCodec->Decode(strContent, m_string, NWCC_QUOTEPRINT);
46 }
47 else
48 {
49 hr = E_UNEXPECTED;
50 NWTRACE(NWEL_CRITICAL, TEXT("The character codec is unexpected"));
51 NWASSERT(FALSE);
52 }
53
54 //错误检测
55 ++nOffset;
56 if(*(lpStr + nOffset) != '?')
57 {
58 hr = E_UNEXPECTED;
59 NWTRACE(NWEL_CRITICAL, TEXT("The MIME string format is incorrect"));
60 NWASSERT(FALSE);
61 }
62 }
63 else //没有有效字符,空串
64 {
65 m_string.clear();
66 }
67 }
68 }
69 return hr;
70 }