C++ 处理字符串编码问题

#include <iostream>
#include <locale>
#include <codecvt>

std::string UnicodeToUTF8(const std::wstring &wstr) {
    std::string ret;
    try {
        std::wstring_convert<std::codecvt_utf8<wchar_t> > wcv;
        ret = wcv.to_bytes(wstr);
    } catch (const std::exception &e) {
        std::cerr << e.what() << std::endl;
    }
    return ret;
}

std::wstring UTF8ToUnicode(const std::string &str) {
    std::wstring ret;
    try {
        std::wstring_convert<std::codecvt_utf8<wchar_t> > wcv;
        ret = wcv.from_bytes(str);
    } catch (const std::exception &e) {
        std::cerr << e.what() << std::endl;
    }
    return ret;
}

std::string UnicodeToANSI(const std::wstring &wstr) {
    std::string ret;
    std::mbstate_t state = {};
    const wchar_t *src = wstr.data();
    size_t len = std::wcsrtombs(nullptr, &src, 0, &state);
    if (static_cast<size_t>(-1) != len) {
        std::unique_ptr<char[]> buff(new char[len + 1]);
        len = std::wcsrtombs(buff.get(), &src, len, &state);
        if (static_cast<size_t>(-1) != len) {
            ret.assign(buff.get(), len);
        }
    }
    return ret;
}

std::wstring ANSIToUnicode(const std::string &str) {
    std::wstring ret;
    std::mbstate_t state = {};
    const char *src = str.data();
    size_t len = std::mbsrtowcs(nullptr, &src, 0, &state);
    if (static_cast<size_t>(-1) != len) {
        std::unique_ptr<wchar_t[]> buff(new wchar_t[len + 1]);
        len = std::mbsrtowcs(buff.get(), &src, len, &state);
        if (static_cast<size_t>(-1) != len) {
            ret.assign(buff.get(), len);
        }
    }
    return ret;
}

std::string UTF8ToANSI(const std::string &str) {
    return UnicodeToANSI(UTF8ToUnicode(str));
}

std::string ANSIToUTF8(const std::string &str) {
    return UnicodeToUTF8(ANSIToUnicode(str));
}

int main(int argc, char *argv[]) {
//    Unicode => UTF8
    std::wstring aa = L"你好!";
    std::string bb = UnicodeToUTF8(aa);
    std::cout << bb << std::endl;
//    UTF-8 => ANSI
    return 0;
}

参考:

C++中字符编码的转换(Unicode、UTF-8、ANSI)

std::wstring_convert处理UTF8

posted @ 2023-02-02 19:02  黄河大道东  阅读(31)  评论(0编辑  收藏  举报