音标编码 转unicode输出,VC++

字典的音标有的是用 Kingsoft Phonetic Plain 字体,有的是用KK 字体,对应的编码也不同, 输出时可以设置对应的字体,但是需要安装对应的字体,程序中还要改字体。

统一改成UTF16后就可以统一输出了,window程序一般都用UTF16,接口转换也比较方便。

对应的转换关系可以在网上找,不过网上的不一定准确,还是要测试调整一下:

wchar_t KingCodeToUTF16(wchar_t k)
{
    wchar_t uc = 0;
    switch (k)
    {
    case '5':
        //uc = 'ˈ';
        uc = 0x02c8;
        break;
    case '7':
        uc = 0x02cc;//  'ˌ';   //没有
        break;
    case '9':
        uc = 0x02cc;// 'ˌ';
        break;
    case 'A':
        uc = 0x00e6; // 'æ';
        break;
    case 'B':
        uc = 0x0251; // 'ɑ';
        break;
    case 'C':
        uc = 0x0254; // 'ɔ';
        break;
    case 'E':
        uc = 0x0259;// 'ə';
        break;
    case 'F':
        uc = 0x0283;// 'ʃ';
        break;
    case 'I':
        uc = 0x026a; // 'ɪ';
        break;
    case 'J':
        uc = 0x028a; // 'ʊ';    
        break;
    case 'N':
        uc = 0x014b; // 'ŋ'; 
        break;
    case 'Q':
        uc = 0x028c;// 'ʌ';
        break;
    case 'R':
        uc = 0x0252; // 'ɔ';  'ɒ'  IPA88  同 C
        break;
    case 'T':
        uc = 0x00f0; // 'ð';
        break;
    case 'U':
        uc = 0x028a; // 'u';   ??  这是大写U
        break;
    case 'V':
        uc = 0x0292; // 'ʒ';
        break;
    case 'W':
        uc = 0x03b8; // 'θ';
        break;
    case '\\':
        uc = 0x0292;// 'ɜ';  // ?? IPA88 可以用E 
        break;
    case '^':
        uc = 0x0261;// 'ɡ';
        break;
    case 'Z':
        uc = 0x025b;// 'ɛ';
        break;
    default:
        uc = k;
        break;
    }
    return uc;
}


wchar_t* kingtouc16(wchar_t* pking)
{
    int i;
    int len = wcslen(pking);
    wchar_t* puc = new wchar_t[len + 1];
    for(i = 0;i< len; i++)
    {
        puc[i] = KingCodeToUTF16(pking[i]);
    }
    puc[i] = 0;

    return puc;
}


wchar_t KKToUTF16(wchar_t kk)
{
    wchar_t uc = 0;
    switch (kk)
    {
    case '!':
        uc = 0x026a; // 'ɪ';
        break;
    case '"':
        uc = 0x02c8; //'ˈ';
        break;
    case '#':
        uc = 0x00e6; // 'æ';
        break;
    case '$':
        uc = 0x0251; // 'ɑ';
        break;
    case '%':
        uc = 0x0254; // 'ɔ';
        break;
    case '&':
        uc = 0x028a; // 'ʊ';    
        break;
    case '(':
        uc = 0x028c;// 'ʌ';
        break;
    case ')':
        uc = 0x0292;// 'ɜ';  
        break;
    case '\*':
        uc = 0x0259;// 'ə';
        break;
    case '+':
        uc = 0x0259;// 'ə';
        break;
    case '.':
        uc = 0x0283;// 'ʃ';
        break;
    case '//':
        uc = 0x0292; // 'ʒ';
        break;
    case '0':
        uc = 0x014b; // 'ŋ'; 
        break;
    case '6':
        uc = 0x02c8; //'ˈ';
        break;
    case '7':
        uc = 0x02cc;//  'ˌ';   
        break;
    case '8':
        uc = ':'; //  ':';   
        break;
    case '9':
        uc = 0x0292; // 'ʒ';
        break;
    case '<':
        uc = 0x02cc;//  'ˌ';   
        break;
    case 'G':
        uc = 0x03b8; // 'θ';
        break;
    case 'H':
        uc = 0x00f0; // 'ð';
        break;
    case 'W':
        uc = 0x025b;// 'ɛ';
        break;
    
    /*
    //下面这些是King
    case 'R':
        uc = 0x0252; // 'ɔ';  'ɒ'  IPA88  同 C
        break;
    case 'U':
        uc = 0x028a; // 'u';   ??  这是大写U
        break;
    case '^':
        uc = 0x0261;// 'ɡ';
        break;
    */
    default:
        uc = kk;
        break;
    }
    return uc;
}


wchar_t* kktouc16(wchar_t* pking)
{
    int i;
    int len = wcslen(pking);
    wchar_t* puc = new wchar_t[len + 1];
    for (i = 0; i < len; i++)
    {
        puc[i] = KKToUTF16(pking[i]);
    }
    puc[i] = 0;

    return puc;
}

测试程序运行效果:

 

 

完整测试程序,有些其他的测试,不影响音标输出测试

// TW32.cpp : Defines the entry point for the application.
//

#include "stdafx.h"
#include "resource.h"

#include<stdio.h>
#include <commctrl.h>
#include <richedit.h>

#define MAX_LOADSTRING 100

// Global Variables:
HINSTANCE hInst;                                // current instance
TCHAR szTitle[MAX_LOADSTRING] = _T("TW32");                                // The title bar text
TCHAR szWindowClass[MAX_LOADSTRING] =_T("TW32_XGZ_2023");                                // The title bar text

// Foward declarations of functions included in this code module:
ATOM                MyRegisterClass(HINSTANCE hInstance);
BOOL                InitInstance(HINSTANCE, int);
LRESULT CALLBACK    WndProc(HWND, UINT, WPARAM, LPARAM);
LRESULT CALLBACK    About(HWND, UINT, WPARAM, LPARAM);


//检查文件类型
#define GFT_NULL        0   //没有打开文件
#define GFT_ANSI        1   //ANSI 
#define GFT_UTF16LE        2   //UTF-16 LE 
#define GFT_UTF16BE        3   //UTF-16 BE 
#define GFT_UTF8BOM        4   //UTF-8 BOM 文件头3字节为标识,明确是UTF8
#define GFT_UTF8        5   //UTF-8 没有BOM,有可能是UTF8
//#define GFT_UTF8_0        6   //没有UTF8长字符  //没必要
//#define GFT_UTF8_N        7   //有UTF8长字符
#define GFT_CKLENMAX    1000000   //最多检查1000,000 个字符,避免大文件
bool IsUTF8Count(const void* pBuffer, long size, long& utf8num);
int GetFileType(const TCHAR* PathFileName);

unsigned int a_encode = CP_UTF8;   //从UTF16转换存储建议统一用UTF8,程序中最好显式调用
wchar_t* CharToWchar(const char* c, size_t m_encode = CP_ACP);  //    因为VS2022 IDE中的ANSI串是ACP,
char* WcharToChar(const wchar_t* wp, size_t m_encode = CP_ACP); //


int PRINT(const TCHAR* fmt, ...);
//int setFormat(int size, DWORD c);
int setFormat(HWND hWndREdit, int size, DWORD c, TCHAR* szFaceName = _T("Tahoma"));
int RPRINT(const TCHAR* fmt, ...);

int OnCreate(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);
int OnPaint(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);
int OnSize(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);

#define IDC_RICHEDIT       1010 
#define IDC_EDIT           1011 
#define IDC_LIST           1012

HWND hWndRichEdit;
HWND hWndEdit;
HWND hWndList;

int APIENTRY WinMain(HINSTANCE hInstance,
                     HINSTANCE hPrevInstance,
                     LPSTR     lpCmdLine,
                     int       nCmdShow)
{
     // TODO: Place code here.
    MSG msg;
    HACCEL hAccelTable;
        
    MyRegisterClass(hInstance);

    // Perform application initialization:
    if (!InitInstance (hInstance, nCmdShow)) 
    {
        return FALSE;
    }

    hAccelTable = LoadAccelerators(hInstance, (LPCTSTR)IDC_TW32);

    // Main message loop:
    while (GetMessage(&msg, NULL, 0, 0)) 
    {
        if (!TranslateAccelerator(msg.hwnd, hAccelTable, &msg)) 
        {
            TranslateMessage(&msg);
            DispatchMessage(&msg);
        }
    }

    return msg.wParam;
}


ATOM MyRegisterClass(HINSTANCE hInstance)
{
    WNDCLASSEX wcex;

    wcex.cbSize = sizeof(WNDCLASSEX); 

    wcex.style            = CS_HREDRAW | CS_VREDRAW;
    wcex.lpfnWndProc    = (WNDPROC)WndProc;
    wcex.cbClsExtra        = 0;
    wcex.cbWndExtra        = 0;
    wcex.hInstance        = hInstance;
    wcex.hIcon            = LoadIcon(hInstance, (LPCTSTR)IDI_TW32);
    wcex.hCursor        = LoadCursor(NULL, IDC_ARROW);
    wcex.hbrBackground    = (HBRUSH)(COLOR_WINDOW+1);
    wcex.lpszMenuName    = (LPCTSTR)IDC_TW32;  //XGZ
    wcex.lpszClassName    = szWindowClass;
    wcex.hIconSm        = LoadIcon(wcex.hInstance, (LPCTSTR)IDI_SMALL);

    return RegisterClassEx(&wcex);
}

BOOL InitInstance(HINSTANCE hInstance, int nCmdShow)
{
   HWND hWnd;

   hInst = hInstance; // Store instance handle in our global variable

   hWnd = CreateWindow(szWindowClass, szTitle, WS_OVERLAPPEDWINDOW,
      CW_USEDEFAULT, 0, CW_USEDEFAULT, 0, NULL, NULL, hInstance, NULL);

   if (!hWnd)
   {
      return FALSE;
   }

   ShowWindow(hWnd, nCmdShow);
   UpdateWindow(hWnd);

   return TRUE;
}

LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
    int wmId, wmEvent;
    
    
    TCHAR szHello[MAX_LOADSTRING];
    LoadString(hInst, IDS_HELLO, szHello, MAX_LOADSTRING);
    switch (message) 
    {
        case WM_COMMAND:
            wmId    = LOWORD(wParam); 
            wmEvent = HIWORD(wParam); 
            // Parse the menu selections:
            switch (wmId)
            {
                case IDM_ABOUT:
                   DialogBox(hInst, (LPCTSTR)IDD_ABOUTBOX, hWnd, (DLGPROC)About);
                   break;
                case IDM_EXIT:
                   DestroyWindow(hWnd);
                   break;
                default:
                   return DefWindowProc(hWnd, message, wParam, lParam);
            }
            break;
        case WM_CREATE:
            OnCreate( hWnd,  message,  wParam,  lParam);
            break;
        case WM_PAINT:
            OnPaint(hWnd, message, wParam, lParam);
            break;
        case WM_SIZE:
            OnSize(hWnd, message, wParam, lParam);
            break;
        case WM_DESTROY:
            PostQuitMessage(0);
            break;
        default:
            return DefWindowProc(hWnd, message, wParam, lParam);
   }
   return 0;
}


wchar_t KingCodeToUTF16(wchar_t k)
{
    wchar_t uc = 0;
    switch (k)
    {
    case '5':
        //uc = 'ˈ';
        uc = 0x02c8;
        break;
    case '7':
        uc = 0x02cc;//  'ˌ';   //没有
        break;
    case '9':
        uc = 0x02cc;// 'ˌ';
        break;
    case 'A':
        uc = 0x00e6; // 'æ';
        break;
    case 'B':
        uc = 0x0251; // 'ɑ';
        break;
    case 'C':
        uc = 0x0254; // 'ɔ';
        break;
    case 'E':
        uc = 0x0259;// 'ə';
        break;
    case 'F':
        uc = 0x0283;// 'ʃ';
        break;
    case 'I':
        uc = 0x026a; // 'ɪ';
        break;
    case 'J':
        uc = 0x028a; // 'ʊ';    
        break;
    case 'N':
        uc = 0x014b; // 'ŋ'; 
        break;
    case 'Q':
        uc = 0x028c;// 'ʌ';
        break;
    case 'R':
        uc = 0x0252; // 'ɔ';  'ɒ'  IPA88  同 C
        break;
    case 'T':
        uc = 0x00f0; // 'ð';
        break;
    case 'U':
        uc = 0x028a; // 'u';   ??  这是大写U
        break;
    case 'V':
        uc = 0x0292; // 'ʒ';
        break;
    case 'W':
        uc = 0x03b8; // 'θ';
        break;
    case '\\':
        uc = 0x0292;// 'ɜ';  // ?? IPA88 可以用E 
        break;
    case '^':
        uc = 0x0261;// 'ɡ';
        break;
    case 'Z':
        uc = 0x025b;// 'ɛ';
        break;
    default:
        uc = k;
        break;
    }
    return uc;
}


wchar_t* kingtouc16(wchar_t* pking)
{
    int i;
    int len = wcslen(pking);
    wchar_t* puc = new wchar_t[len + 1];
    for(i = 0;i< len; i++)
    {
        puc[i] = KingCodeToUTF16(pking[i]);
    }
    puc[i] = 0;

    return puc;
}


wchar_t KKToUTF16(wchar_t kk)
{
    wchar_t uc = 0;
    switch (kk)
    {
    case '!':
        uc = 0x026a; // 'ɪ';
        break;
    case '"':
        uc = 0x02c8; //'ˈ';
        break;
    case '#':
        uc = 0x00e6; // 'æ';
        break;
    case '$':
        uc = 0x0251; // 'ɑ';
        break;
    case '%':
        uc = 0x0254; // 'ɔ';
        break;
    case '&':
        uc = 0x028a; // 'ʊ';    
        break;
    case '(':
        uc = 0x028c;// 'ʌ';
        break;
    case ')':
        uc = 0x0292;// 'ɜ';  
        break;
    case '\*':
        uc = 0x0259;// 'ə';
        break;
    case '+':
        uc = 0x0259;// 'ə';
        break;
    case '.':
        uc = 0x0283;// 'ʃ';
        break;
    case '//':
        uc = 0x0292; // 'ʒ';
        break;
    case '0':
        uc = 0x014b; // 'ŋ'; 
        break;
    case '6':
        uc = 0x02c8; //'ˈ';
        break;
    case '7':
        uc = 0x02cc;//  'ˌ';   
        break;
    case '8':
        uc = ':'; //  ':';   
        break;
    case '9':
        uc = 0x0292; // 'ʒ';
        break;
    case '<':
        uc = 0x02cc;//  'ˌ';   
        break;
    case 'G':
        uc = 0x03b8; // 'θ';
        break;
    case 'H':
        uc = 0x00f0; // 'ð';
        break;
    case 'W':
        uc = 0x025b;// 'ɛ';
        break;
    
    /*
    //下面这些是King
    case 'R':
        uc = 0x0252; // 'ɔ';  'ɒ'  IPA88  同 C
        break;
    case 'U':
        uc = 0x028a; // 'u';   ??  这是大写U
        break;
    case '^':
        uc = 0x0261;// 'ɡ';
        break;
    */
    default:
        uc = kk;
        break;
    }
    return uc;
}


wchar_t* kktouc16(wchar_t* pking)
{
    int i;
    int len = wcslen(pking);
    wchar_t* puc = new wchar_t[len + 1];
    for (i = 0; i < len; i++)
    {
        puc[i] = KKToUTF16(pking[i]);
    }
    puc[i] = 0;

    return puc;
}

int OnCreate(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
    // InitCommonControls();

    HINSTANCE hRich = LoadLibrary(_T("Riched20.dll"));
    hWndRichEdit = CreateWindowEx(WS_EX_CLIENTEDGE, RICHEDIT_CLASS, NULL,// _T("RichEdit20W"), NULL,
        WS_CHILD | WS_VISIBLE | WS_VSCROLL | ES_MULTILINE,  // | ES_READONLY 
        0, 0, 500, 500,
        hWnd, (HMENU)IDC_RICHEDIT, hInst, NULL);

    hWndEdit = CreateWindow(_T("edit"), NULL,
        WS_CHILD | WS_BORDER | WS_VISIBLE | ES_MULTILINE | WS_VSCROLL ,// | ES_READONLY 
        0, 0, 0, 0, hWnd, (HMENU)IDC_EDIT, hInst, NULL);

    hWndList = CreateWindow(_T("listbox"), NULL,
        WS_CHILD | WS_BORDER | WS_VISIBLE | WS_VSCROLL | WS_HSCROLL | LBS_NOTIFY,// | LBS_SORT,
        0, 0, 0, 0, hWnd, (HMENU)IDC_LIST, hInst, NULL);

    

    PRINT(_T("=== [ Main OnCreate Test PRINT 圆周率 = %f ] ===\r\n"), 3.1415926);
    
    //===============
    RPRINT(_T("\r\nTest\r\n")); //先输入

    setFormat(hWndRichEdit,5, 0x800000, _T("宋体"));
        
    wchar_t* pWord = _T("enthymeme");
    wchar_t *pking = _T("(`ZnWE9mim; 5enWE9mi:m)");
    wchar_t* pkk = _T("6WnG*7mim");
    wchar_t *pUc;
    
    RPRINT(_T("\r\n\r\n %s"), pWord);
    pUc = kingtouc16(pking); RPRINT(_T("\r\nKing = %s"), pUc); delete pUc;
    pUc = kktouc16(pkk); RPRINT(_T("\r\nKK = %s"), pUc); delete pUc;

    wchar_t* pWord2 = _T("actual");
    wchar_t* pking2 = _T("(`AktFJEl; 5AktFuEl)");
    wchar_t* pkk2 = _T("6#kt.u*l");
    

    RPRINT(_T("\r\n\r\n %s"), pWord2);
    pUc = kingtouc16(pking2); RPRINT(_T("\r\nKing = %s"), pUc); delete pUc;
    pUc = kktouc16(pkk2); RPRINT(_T("\r\nKK = %s"), pUc); delete pUc;
    


    

    //============
    TCHAR* PathFile = _T("R:\\21世纪UTF8.txt");
    int iFType = GetFileType(PathFile);
    PRINT(_T("\r\n%s"), PathFile);
    PRINT(_T("\r\nGetFileType = %d"), iFType);

    FILE* fp;
    fp = _tfopen(PathFile, _T("r,ccs=UTF-8"));   //ftell == 3
    if (NULL == fp)
    {
        PRINT(_T("\r\n<FAIL> fopen failed!"));
        return 1;
    }

    LARGE_INTEGER t1, t2, tc;  //计时
    double time;

    QueryPerformanceFrequency(&tc);
    QueryPerformanceCounter(&t1);   //计时

    int  fstrlen;
    fpos_t fpos;
    
    
    int i;
    int j;
    

    int f1;
    int f2;
    int fm;

    //fseek(fp, 0, SEEK_SET);  //直接到0,不管有没有BOM头 ,rewind(fp)也是一样
    //fseek(fp, 3, SEEK_SET);  //若要跳过BOM头,则定位到3

    f1 = ftell(fp);  //用ccs=UTF-8打开后当前位置3
    j = fseek(fp, 0, SEEK_END);
    f2 = ftell(fp);
    fm = (f1 + f2) / 2;
    fseek(fp, fm, 0);

    fgetpos(fp, &fpos);

    PRINT(_T("\r\nf1=%d,fm=%d,f2=%d"), f1, fm, f2);

    TCHAR* buf = new TCHAR[100000];

    TCHAR* bufyb = new TCHAR[100];
    
    
    fseek(fp, 3, SEEK_SET);
    for (int i = 0; i < 10; i++)
    {
        fgetws(buf, 1000000, fp);  

        int len = lstrlen(buf);
        for (int j = 0; j < len; j++)
        {
            if ((buf[j] == '\`')&&(buf[j+1]=='3') && (buf[j + 2] == '\`') && (buf[j + 3] == '('))
            {
                int k;
                for (k = j+4; k < j+100; k++)
                {
                    if(buf[k] == ')') break;
                    
                    bufyb[k] = buf[k];
                    
                }
                bufyb[k] = 0;
                PRINT(bufyb+j+4);
            }
        }

        //PRINT(_T("\r\n%s"), buf);
        PRINT(_T("\r\n"));
    }
    

    //fsetpos(fp, &fpos);

    //if (fgetws(buf, 1000000, fp) != NULL)
        //if (fgets(buf, 1000000, fp) != NULL)
    //{

        //ShowWTXT(buf);

    //}

    delete bufyb;
    delete buf;
     
    fclose(fp);

    QueryPerformanceCounter(&t2);
    time = (double)(t2.QuadPart - t1.QuadPart) / (double)tc.QuadPart;
    PRINT(_T("\r\nRun time = %f"),  time);
    
    return 0;
}

int OnPaint(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
    HDC hdc;
    PAINTSTRUCT ps;

    HFONT hFont;
    wchar_t* wp = L"Chinese中文测试 =3.1415926  immethodical (ˌɪmə`θɑdɪkl; ˌimiˈθɔdikəl)形容词 无秩序的; 无规则的; 漫无章法的; 紊乱的";
    wchar_t  wbuf[1024];
    wchar_t  wbuf2[1024];
    wchar_t  wbuf3[1024];

    hdc = BeginPaint(hWnd, &ps);

    LOGFONT logfont;
    ZeroMemory(&logfont, sizeof(LOGFONT));
    logfont.lfCharSet = DEFAULT_CHARSET;
    logfont.lfHeight = -20;
    lstrcpy(logfont.lfFaceName, _T("华文中宋"));  //若在不同系统下运行不要用默认
    hFont = CreateFontIndirect(&logfont);
    SelectObject(hdc, hFont);


    /*GetObject(hFont, sizeof(LOGFONT), &logfont);
    logfont.lfHeight = 16;
    _tcscpy(logfont.lfFaceName, _T("Times New Roman"));  //单用时,中会用系统默认
    hFont = CreateFontIndirect(&logfont);
    SelectObject(hdc, hFont);*/

    SetTextColor(hdc, RGB(100, 0, 0));
    SetBkColor(hdc, RGB(255, 255, 0));  //高亮,亮黄底色

    //WritePrivateProfileString(_T("Default"), _T("Str中文1"), wp, _T("R:\\tes中文1.ini"));
    GetPrivateProfileString(_T("Default"), _T("Str中文1"), _T("NULL"), wbuf, 1024, _T("R:\\tes中文1.ini"));
    GetPrivateProfileString(_T("Default"), _T("Str中文2"), _T("NULL"), wbuf2, 1024, _T("R:\\tes中文1.ini"));
    GetPrivateProfileString(_T("Default"), _T("Str3"), _T("NULL"), wbuf3, 1024, _T("R:\\tes中文1.ini"));

    RECT rt;
    GetClientRect(hWnd, &rt);
    DrawText(hdc, wp, lstrlen(wp), &rt, DT_LEFT);
    TextOut(hdc, 10, 50, wbuf, lstrlen(wbuf));
    TextOut(hdc, 10, 150, wbuf2, lstrlen(wbuf2));
    TextOut(hdc, 10, 250, wbuf3, lstrlen(wbuf3));
    EndPaint(hWnd, &ps);
    return 0;
}

int OnSize(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
    int cxClient, cyClient;

    cxClient = LOWORD(lParam);
    cyClient = HIWORD(lParam);

    int top = 100;

    //MoveWindow(hWndEditInput, 5, 0, 200, 30, TRUE);
    MoveWindow(hWndList, 5, top, 250, cyClient - top - 20, TRUE);

    //MoveWindow(hWndMainToolbar, 210, 0, 400, 25, TRUE);
    MoveWindow(hWndRichEdit, 260, top, cxClient - 265, cyClient - top -200 -20, TRUE);
    MoveWindow(hWndEdit, 260, cyClient - 200, cxClient - 265, 170, TRUE);
    //SendMessage(hWndList, CB_SETHORIZONTALEXTENT, (WPARAM)10, (LPARAM)0);
    
    return DefWindowProc(hWnd, message, wParam, lParam);

}

// Mesage handler for about box.
LRESULT CALLBACK About(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam)
{
    switch (message)
    {
        case WM_INITDIALOG:
                return TRUE;

        case WM_COMMAND:
            if (LOWORD(wParam) == IDOK || LOWORD(wParam) == IDCANCEL) 
            {
                EndDialog(hDlg, LOWORD(wParam));
                return TRUE;
            }
            break;
    }
    return FALSE;
}

TCHAR buffer[10000];

int PRINT(const TCHAR* fmt, ...)
{
    va_list argptr;
    int cnt;

    int iEditTextLength;
    HWND hWnd = hWndEdit;

    if (NULL == hWnd) return 0;

    va_start(argptr, fmt);
    cnt = _vstprintf(buffer, fmt, argptr);  // A or W but ISO C
    //cnt = vswprintf(buffer, fmt, argptr);  // only W
    //cnt = wvsprintf(buffer, fmt, argptr);  // not %f
    
    va_end(argptr);

    iEditTextLength = GetWindowTextLength(hWnd);
    if (iEditTextLength + cnt > 30000)       // edit text max length is 30000
    {
        SendMessage(hWnd, EM_SETSEL, 0, 10000);
        SendMessage(hWnd, WM_CLEAR, 0, 0);
        iEditTextLength = iEditTextLength - 10000;
    }
    SendMessage(hWnd, EM_SETSEL, iEditTextLength, iEditTextLength);
    SendMessage(hWnd, EM_REPLACESEL, 0, (LPARAM)buffer);

    return(cnt);
}

int setFormat( HWND hWndREdit, int size, DWORD c, TCHAR * szFaceName)
{
    CHARFORMAT cf;
    ZeroMemory(&cf, sizeof(CHARFORMAT));
    cf.cbSize = sizeof(CHARFORMAT);
    cf.dwMask |= CFM_COLOR;
    cf.crTextColor = RGB(0, 0, 0); //设置颜色
    cf.dwMask |= CFM_SIZE;
    cf.yHeight = 200;//设置高度
    cf.dwMask |= CFM_FACE;

    //wcscpy(cf.szFaceName, _T("Kingsoft Phonetic Plain"));//设置字体
    //wcscpy(cf.szFaceName, _T("Tahoma"));//设置字体
    lstrcpy(cf.szFaceName, szFaceName);//设置字体

    if (size != 0)
    {
        cf.dwMask |= CFM_SIZE;
        cf.yHeight = size * 50;
    }
    else
    {
        cf.dwMask &= ~CFM_SIZE;
    }

    int r, g, b;

    r = c >> 16 & 0xff;
    g = c >> 8 & 0xff;
    b = c & 0xff;

    cf.dwMask |= CFM_COLOR;
    cf.crTextColor = RGB(r, g, b); //设置颜色

    SendMessage(hWndREdit, EM_SETCHARFORMAT, SCF_SELECTION, (LPARAM)&cf);
    return 0;
}

int RPRINT(const TCHAR* fmt, ...)
{
    va_list argptr;
    int cnt;

    int iEditTextLength;
    HWND hWnd = hWndRichEdit;

    if (NULL == hWnd) return 0;

    va_start(argptr, fmt);
    cnt = _vstprintf(buffer, fmt, argptr);  // A or W but ISO C
    //cnt = vswprintf(buffer, fmt, argptr);  // only W
    //cnt = wvsprintf(buffer, fmt, argptr);  // not %f

    va_end(argptr);

    iEditTextLength = GetWindowTextLength(hWnd);
    if (iEditTextLength + cnt > 30000)       // edit text max length is 30000
    {
        SendMessage(hWnd, EM_SETSEL, 0, 10000);
        SendMessage(hWnd, WM_CLEAR, 0, 0);
        iEditTextLength = iEditTextLength - 10000;
    }
    SendMessage(hWnd, EM_SETSEL, iEditTextLength, iEditTextLength);
    SendMessage(hWnd, EM_REPLACESEL, 0, (LPARAM)buffer);

    return(cnt);
}


bool IsUTF8Count(const void* pBuffer, long size, long& utf8num)
{
    bool IsUTF8 = true;
    unsigned char* start = (unsigned char*)pBuffer;
    unsigned char* end = (unsigned char*)pBuffer + size;

    long  utf8num2 = 0;
    long  utf8num3 = 0;
    long  utf8num4 = 0;

    utf8num = 0;

    while (start < end)
    {
        if (*start < 0x80) // (10000000): 值小于0x80的为ASCII字符    
        {
            start++;
        }
        else if (*start < (0xC0)) // (11000000): 值介于0x80与0xC0之间的为无效UTF-8字符    
        {
            IsUTF8 = false;   //第一个不可能小于 1100 0000且大于 80
            break;
        }
        else if (*start < (0xE0)) // (11100000): 此范围内为2字节UTF-8字符    
        {
            utf8num2++;
            if (start >= end - 1)
            {
                break;
            }

            if ((start[1] & (0xC0)) != 0x80)
            {
                IsUTF8 = false;
                break;
            }
            start += 2;
        }
        else if (*start < (0xF0)) // (11110000): 此范围内为3字节UTF-8字符    
        {
            utf8num3++;
            if (start >= end - 2)
            {
                break;
            }
            if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
            {
                IsUTF8 = false;
                break;
            }
            start += 3;
        }
        else if (*start < (0xF8)) // (11111000): 此范围内为4字节UTF-8字符    
        {
            utf8num4++;
            if (start >= end - 3)
            {
                break;
            }
            if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80 || (start[3] & (0xC0)) != 0x80)
            {
                IsUTF8 = false;
                break;
            }
            start += 4;   //多一个判断可以+4,也不会慢
        }
        else
        {
            IsUTF8 = false;
            break;
        }
    }

    //false 100% 不是UTF8 

    utf8num = utf8num2 + utf8num3 + utf8num4;

    return IsUTF8;  //ture 说明符合UTF8规律, utf8num / size;

}

int GetFileType(const TCHAR* PathFileName)
{
    long    lSize;
    long    lutf8num = 0;

    unsigned char Head[3];
    FILE* fp = NULL;

    fp = _tfopen(PathFileName, _T("rb"));
    if (NULL == fp)
    {
        return GFT_NULL;  // 文件不存在
    }

    fseek(fp, 0, SEEK_END);
    lSize = ftell(fp);

    if (lSize < 3)
    {
        fclose(fp);
        return  GFT_ANSI;    // 1 ANSI 文件,没有BOM头
    }

    fseek(fp, 0, SEEK_SET);  
    fread(Head, 3, 1, fp);

    if ((Head[0] == 0xff) && (Head[1] == 0xfe))
    {
        return  GFT_UTF16LE;
    }

    if ((Head[0] == 0xfe) && (Head[1] == 0xff))
    {
        return  GFT_UTF16BE;;
    }

    if ((Head[0] == 0xef) && (Head[1] == 0xbb) && (Head[2] == 0xbf))
    {
        return    GFT_UTF8BOM;
    }

    //没有编码头的情况
    if (lSize > GFT_CKLENMAX) lSize = GFT_CKLENMAX;
    fseek(fp, 0, SEEK_SET);  //或rewind(f);

    char* pBuff = new char[lSize + 1];
    memset(pBuff, 0, lSize + 1);
    fread(pBuff, lSize, 1, fp);
    fclose(fp);

    //bool bIsUTF8 = IsUTF8Text(pBuff, lSize);
    bool bIsUTF8 = IsUTF8Count(pBuff, lSize, lutf8num);

    delete pBuff;
    pBuff = NULL;

    if (!bIsUTF8)
    {
        return GFT_ANSI;    //明确不是UTF8,UTF8解码会出错
    }

    //if (lutf8num == 0) return GFT_UTF8_0;  // 没有UTF8字符的UTF8 
    //return GFT_UTF8_N;                     // 有UTF8字符的UTF8

    return GFT_UTF8; //其实程序不必管有有没有UTF8长字符

}

wchar_t* CharToWchar(const char* c, size_t m_encode)
{
    if (!c) return nullptr;
    int len = MultiByteToWideChar(m_encode, 0, c, strlen(c), NULL, 0);
    wchar_t* m_wchar = new wchar_t[len + 1];
    MultiByteToWideChar(m_encode, 0, c, strlen(c), m_wchar, len);
    m_wchar[len] = '\0';
    return m_wchar;
}

char* WcharToChar(const wchar_t* wp, size_t m_encode)
{
    if (!wp) return nullptr;
    int len = WideCharToMultiByte(m_encode, 0, wp, wcslen(wp), NULL, 0, NULL, NULL);
    char* m_char = new char[len + 1];
    WideCharToMultiByte(m_encode, 0, wp, wcslen(wp), m_char, len, NULL, NULL);
    m_char[len] = '\0';
    return m_char;
}

 

posted @ 2023-07-08 11:06  XGZ21  阅读(105)  评论(0编辑  收藏  举报