C/C++ GBK和UTF8之间的转换

 

{

  

关于GBK和UTF-8之间的转换,很多初学者会很迷茫。

一般来说GBK和UTF-8是文字的编码方式,其对应的内码是不一样的,所以GBK和UTF-8的转换需要对内码进行一一映射,然后进行转换。

对于一般系统上的工程,一般使用libiconv即可,但是对于嵌入式或手机操作系统,libiconv显得就有点庞大了。

在这里提供GBK和UTF8转换以及全半角、大小写转换等函数,希望对手机开发的同学有所帮助,特别是在iOS上开发的同学。

strnormalize.h

strnormalize.c


具体全半角、简繁体转换使用方法见下代码:

 #include "strnormalize.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 int main(int argc, char **argv)
 {
     str_normalize_init();
     unsigned options = SNO_TO_LOWER | SNO_TO_HALF;
     if (argc > 1) options = atoi(argv[1]);
 
     char *buffer = (char *)malloc(65536);
     memset(buffer, 0, 65536);
     while (fgets(buffer, 65536, stdin))
     {   
         str_normalize_utf8(buffer, options);
         printf("%s", buffer);
     }   
     free(buffer);
 
     return 0;
 }

UTF-8和GBK转换使用方法如下:

#include "strnormalize.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

int main(int argc, char **argv)
{
    str_normalize_init();
    const char *utf8 = "我是utf-8字符!";
    const char *gbk = "����GBK�ַ���";
    uint32_t utf8_len = strlen(utf8);
    uint32_t gbk_len = strlen(utf8);
    uint32_t utf8buffer_len = utf8_len * 3 + 1;
    uint32_t gbkbuffer_len = gbk_len * 2 + 1;
    char *utf8buffer = (char *)malloc(utf8buffer_len);
    char *gbkbuffer = (char *)malloc(gbkbuffer_len);
    memset(utf8buffer, 0, utf8buffer_len);
    memset(gbkbuffer, 0, gbkbuffer_len);
    utf8_to_gbk(utf8, utf8_len, &gbkbuffer, &gbkbuffer_len);
    gbk_to_utf8(gbk, gbk_len, &utf8buffer, &utf8buffer_len);
    printf("utf8: %s<=>%d \t gbkbuffer: %s<=>%d\n", utf8, utf8_len, gbkbuffer, gbkbuffer_len);
    printf("gbk: %s<=>%d \t utf8buffer: %s<=>%d\n", gbk, gbk_len, utf8buffer, utf8buffer_len);
    free(utf8buffer);
    free(gbkbuffer);
    return 0;
}

 

strnormalize.h

 

/**
 * Copyright(c) 2012-2013, All Rights Reserved.
 *
 * @file strnormalize.h
 * @details Check GBK character you could do
 *     code >= 0x8000 && _pGbk2Utf16[code - 0x8000] != 0
 * @author cnangel
 * @version 1.0.0
 * @date 2012/10/09 11:44:58
 */

#ifndef __STRNORMALIZE_H__
#define __STRNORMALIZE_H__

#ifdef __cplusplus
extern "C" {
#endif

#define SNO_TO_LOWER        1
#define SNO_TO_UPPER        2
#define SNO_TO_HALF         4
#define SNO_TO_SIMPLIFIED   8

void str_normalize_init();
void str_normalize_gbk(char *text, unsigned options);
void str_normalize_utf8(char *text, unsigned options);

int gbk_to_utf8(const char *from, unsigned int from_len, char **to, unsigned int *to_len);
int utf8_to_gbk(const char *from, unsigned int from_len, char **to, unsigned int *to_len);

#ifdef __cplusplus
}
#endif

#endif /* __STRNORMALIZE_H__ */

 

strnormalize.c

 

 

 

}

posted @ 2019-12-05 15:12  YZFHKMS-X  阅读(8610)  评论(0编辑  收藏  举报