周5的下午,没什么心情除BUG。想到最近可能会用到字符串拼音首字母的快速查找。
在网上G了几下,站在前人的肩膀上实现了下面的这个类。 可以处理 中英文数字字母标点混合的情况,
会自动过滤中文汉字和E文字母外的内容。如果取不到某个字符的拼音首字母就自动条过了。
测试了下,基本上可以用。
不过不是很完美。对某些汉字没办法取到首字母,比如现在很火的 “囧”。多音字也没办法处理
PS: 后台插入代码的功能真难用。我插入后代码折叠显示到文章的最底部。中间空一大段。都没法调整。郁闷
//根据汉字字符串返回首字母字符串
#include <Windows.h>
class GB2Alpha
{
public:
GB2Alpha()
{
pAlpha = NULL;
}
~GB2Alpha()
{
if (pAlpha)
{
delete[] pAlpha;
pAlpha = NULL;
}
}
//得到实际的拼音首字母buffer
const char* GetAlpha(const char* pStr)
{
if (pAlpha)
{
delete pAlpha;
pAlpha = NULL;
}
if (!pStr)
{
return NULL;
}
char chr[3];
wchar_t wchr = 0;
unsigned int nlen = strlen(pStr);
pAlpha = new char[nlen+1];
memset(pAlpha,0x00,nlen+1);
int nIndex = 0; //首字母数组的长度索引
for (int i = 0;i< nlen ;)
{
char tmpchar = '\0';
if (pStr[i] > 0) //非汉字
{
if (pStr[i] >= 'a' && pStr[i] <= 'z')
{
tmpchar = (char)(pStr[i] - 'a' + 'A');
pAlpha[nIndex++] = tmpchar;
}
if (pStr[i] >= 'A' && pStr[i] <= 'Z')
{
tmpchar = pStr[i];
pAlpha[nIndex++] = tmpchar;
}
i++;
}
else //汉字
{
memset(chr, 0x00, sizeof(chr));
chr[0] = pStr[i++];
chr[1] = pStr[i++];
chr[2] = '\0';
wchr = 0;
wchr = (chr[0] & 0xff) << 8;
wchr |= (chr[1] & 0xff);
tmpchar = convert(wchr);
if(tmpchar != '\0')
{
pAlpha[nIndex++] = tmpchar;
}
}
}
return pAlpha;
}
private:
//汉字首字母转换表
char convert(wchar_t n)
{
if (In(0xB0A1,0xB0C4,n)) return 'A';
if (In(0XB0C5,0XB2C0,n)) return 'B';
if (In(0xB2C1,0xB4ED,n)) return 'C';
if (In(0xB4EE,0xB6E9,n)) return 'D';
if (In(0xB6EA,0xB7A1,n)) return 'E';
if (In(0xB7A2,0xB8c0,n)) return 'F';
if (In(0xB8C1,0xB9FD,n)) return 'G';
if (In(0xB9FE,0xBBF6,n)) return 'H';
if (In(0xBBF7,0xBFA5,n)) return 'J';
if (In(0xBFA6,0xC0AB,n)) return 'K';
if (In(0xC0AC,0xC2E7,n)) return 'L';
if (In(0xC2E8,0xC4C2,n)) return 'M';
if (In(0xC4C3,0xC5B5,n)) return 'N';
if (In(0xC5B6,0xC5BD,n)) return 'O';
if (In(0xC5BE,0xC6D9,n)) return 'P';
if (In(0xC6DA,0xC8BA,n)) return 'Q';
if (In(0xC8BB,0xC8F5,n)) return 'R';
if (In(0xC8F6,0xCBF0,n)) return 'S';
if (In(0xCBFA,0xCDD9,n)) return 'T';
if (In(0xCDDA,0xCEF3,n)) return 'W';
if (In(0xCEF4,0xD188,n)) return 'X';
if (In(0xD1B9,0xD4D0,n)) return 'Y';
if (In(0xD4D1,0xD7F9,n)) return 'Z';
return '\0';
}
bool In(wchar_t start, wchar_t end, wchar_t code)
{
if (code >= start && code <= end)
{
return true;
}
return false;
}
protected:
char* pAlpha;
};