随笔 - 170  文章 - 0  评论 - 16  阅读 - 35908 

g++ charset.cpp -licui18n -licuuc

 

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <unicode/ucnv.h>
#include <unicode/utypes.h>
//#include <unicode/urename.h>
#include <unicode/ucsdet.h>

bool what_charset(const char *data, int len, char **detected)
{
UCharsetDetector* csd;
const UCharsetMatch **csm;
int match, matchCount = 0;
UErrorCode status = U_ZERO_ERROR;
csd = ucsdet_open(&status);
if(status != U_ZERO_ERROR)
return false;
ucsdet_setText(csd, data, len, &status);
if(status != U_ZERO_ERROR)
return false;

csm = ucsdet_detectAll(csd, &matchCount, &status);
if(status != U_ZERO_ERROR)
return false;
#if 1 //打印出探测的可能的编码
for(match = 0; match < matchCount; match += 1)
{
const char *name = ucsdet_getName(csm[match], &status);
const char *lang = ucsdet_getLanguage(csm[match], &status);
int confidence = ucsdet_getConfidence(csm[match], &status);
if (lang == NULL || strlen(lang) == 0)
lang = "**";
printf("%s (%s) %d\n", name, lang, confidence);
}
#endif
if(matchCount > 0)
{
*detected = strdup(ucsdet_getName(csm[0], &status)); //分配了内
if(status != U_ZERO_ERROR)
return false;
}

printf("charset = %s n", *detected);
ucsdet_close(csd);

return true;
}

int main(int argc, char* argv[])
{

std::string data = "This is a 测试用数据";
char buf[128];
char *str[1];
str[0] = buf;
int convert_flag = 0;
int subject_length = data.length();

what_charset((const char *)data.c_str(), data.length(), str);
std::string encoding = str[0];
if(encoding != "UTF-8")
{
int clen = 0;
convert_flag = 1;
UErrorCode error = U_ZERO_ERROR;
char *u8_data = (char *)malloc(subject_length * 2);
if(u8_data == NULL)
{
return -1;
}
clen = ucnv_convert("UTF-8", (char *)encoding.c_str(), (char *)u8_data, subject_length*2, (char *)data.c_str(), subject_length, &error);
if(error != U_ZERO_ERROR)
{
free((void *)u8_data);
return -1;
}
}

return 0;
}

posted on   北京开发  阅读(65)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 零经验选手,Compose 一天开发一款小游戏!
· 通过 API 将Deepseek响应流式内容输出到前端
· AI Agent开发,如何调用三方的API Function,是通过提示词来发起调用的吗
点击右上角即可分享
微信分享提示