中文进行URL编码

#include<string>
#ifdef _WIN32
#include <windows.h>
#endif // WIN32
#include<stdio.h>
#include<stdlib.h>

using namespace std;

////////////////////////////////////////////
void UTF8ToUnicode(wchar_t* pOut, const char* pText) {
    char* uchar = (char*)pOut;
    uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
    uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);
    return;
}

void UnicodeToUTF8(char* pOut, const wchar_t* pText) {
    // 注意 WCHAR高低字的顺序,低字节在前,高字节在后 
    const char* pchar = (const char*)pText;
    pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
    pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6);
    pOut[2] = (0x80 | (pchar[0] & 0x3F));
    return;
}



///////////////////////////////windows专用///////////////////////////////////
#if defined(_WIN32)
void UnicodeToGB2312(char* pOut, wchar_t uData)
{
    WideCharToMultiByte(CP_ACP, NULL, &uData, 1, pOut, sizeof(wchar_t), NULL, NULL);
}
void Gb2312ToUnicode(wchar_t* pOut, const char* gbBuffer)
{
    MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, gbBuffer, 2, pOut, 1);
}

string UTF8ToGB2312(const string& str) {
    auto len = str.size();
    auto pText = str.data();
    char Ctemp[4] = { 0 };
    char* pOut = new char[len + 1];
    memset(pOut, 0, len + 1);

    int i = 0, j = 0;
    while (i < len)
    {
        if (pText[i] >= 0)
        {
            pOut[j++] = pText[i++];
        }
        else
        {
            wchar_t Wtemp;
            UTF8ToUnicode(&Wtemp, pText + i);
            UnicodeToGB2312(Ctemp, Wtemp);
            pOut[j] = Ctemp[0];
            pOut[j + 1] = Ctemp[1];
            i += 3;
            j += 2;
        }
    }
    string ret = pOut;
    delete[] pOut;
    return ret;
}

string GB2312ToUTF8(const string& str) {
    auto len = str.size();
    auto pText = str.data();
    char buf[4] = { 0 };
    auto nLength = len * 3;
    char* pOut = new char[nLength];
    memset(pOut, 0, nLength);
    size_t i = 0, j = 0;
    while (i < len)
    {
        //如果是英文直接复制就可以   
        if (*(pText + i) >= 0)
        {
            pOut[j++] = pText[i++];
        }
        else
        {
            wchar_t pbuffer;
            Gb2312ToUnicode(&pbuffer, pText + i);
            UnicodeToUTF8(buf, &pbuffer);
            pOut[j] = buf[0];
            pOut[j + 1] = buf[1];
            pOut[j + 2] = buf[2];
            j += 3;
            i += 2;
        }
    }
    string ret = pOut;
    delete[] pOut;
    return ret;
}
#endif//defined(_WIN32)

string UrlEncode(const string& str) {
    string out;
    size_t len = str.size();
    for (size_t i = 0; i < len; ++i) {
        char ch = str[i];
        if (isalnum((uint8_t)ch)) {
            out.push_back(ch);
        }
        else {
            char buf[4];
            sprintf(buf, "%%%X%X", (uint8_t)ch >> 4, (uint8_t)ch & 0x0F);
            out.append(buf);
        }
    }
    return out;
}



int main(int argc, char* argv[])
{
    string str = "中文";
#ifdef _WIN32
    string out = GB2312ToUTF8(str);
#endif // _WIN32



   /* wstring str222 = StringToWString(str);
  
    wchar_t* temp = new wchar_t[str222.size() + 1];
    memset(temp, 0, sizeof(wchar_t) * str222.size());
    memcpy(temp, str222.c_str(), sizeof(wchar_t) * str222.size());
    temp[str222.size()] = L'\0';

    char* cCharUtf = new char[256];
    UnicodeToUTF8(cCharUtf, temp);
    string in2 = cCharUtf;*/


    string out2 = UrlEncode(out); //linux 直接用str
    printf("%s >>>>>>\n",out2.data());
    return 0;
}

 

posted on 2024-11-26 17:41  邗影  阅读(1)  评论(0编辑  收藏  举报

导航