UTF-8

0000-007F | 0xxxxxxx                   #UTF-8规定,若1字符=1字节,首位须为0
0080-07FF | 110xxxxx 10xxxxxx          #UTF-8规定,若1字符=2字节,高位前3位为110,低位前2位为10
0800-FFFF | 1110xxxx 10xxxxxx 10xxxxxx #UTF-8规定,若1字符=3字节,高位前4位为1110,后面低位前2位均为10
比如,张三的UTF-8编码为:
E5 BC A0 E4 B8 89
E5 ----- 1110 0101
BC ----- 1011 1100
A0 ----- 1010 0000

E4 ----- 1110 0100
B8 ----- 1011 1000
89 ----- 1000 1001
#include <windows.h>
#include <iostream>
using namespace std;

std::string string_To_UTF8(const std::string & str)
{
    int nwLen = ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, NULL, 0);

    wchar_t * pwBuf = new wchar_t[nwLen + 1];//一定要加1,不然会出现尾巴
    ZeroMemory(pwBuf, nwLen * 2 + 2);

    ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.length(), pwBuf, nwLen);

    int nLen = ::WideCharToMultiByte(CP_UTF8, 0, pwBuf, -1, NULL, NULL, NULL, NULL);

    char * pBuf = new char[nLen + 1];
    ZeroMemory(pBuf, nLen + 1);

    ::WideCharToMultiByte(CP_UTF8, 0, pwBuf, nwLen, pBuf, nLen, NULL, NULL);

    std::string retStr(pBuf);

    delete []pwBuf;
    delete []pBuf;

    pwBuf = NULL;
    pBuf = NULL;

    return retStr;
}

std::string UTF8_To_string(const std::string & str)
{
    int nwLen = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, NULL, 0);

    wchar_t * pwBuf = new wchar_t[nwLen + 1];//一定要加1,不然会出现尾巴
    memset(pwBuf, 0, nwLen * 2 + 2);

    MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), pwBuf, nwLen);

    int nLen = WideCharToMultiByte(CP_ACP, 0, pwBuf, -1, NULL, NULL, NULL, NULL);

    char * pBuf = new char[nLen + 1];
    memset(pBuf, 0, nLen + 1);

    WideCharToMultiByte(CP_ACP, 0, pwBuf, nwLen, pBuf, nLen, NULL, NULL);

    std::string retStr = pBuf;

    delete []pBuf;
    delete []pwBuf;

    pBuf = NULL;
    pwBuf = NULL;

    return retStr;
}

int main()
{
    string str1("迪丽热巴·阿凡提13800000000");
    string str2 = string_To_UTF8(str1);
    string str3 = UTF8_To_string(str2);
    cout<<str3.c_str()<<endl;
    system("pause");
}

 



posted @ 2019-02-07 23:06  长老猪  阅读(286)  评论(0编辑  收藏  举报