C++实现获取汉字拼音首字母

中英文数字字母标点混合的情况,会自动过滤中文汉字和英文字母外的内容,如果取不到某个字符的拼音首字母就自动跳过。测试了下,基本上可以用。不过不是很完美,对某些汉字没办法取到首字母,比如现在很火的 “囧”,多音字也没办法处理。
  1. //根据汉字字符串返回首字母字符串 
  2. #include <Windows.h> 
  3. class GB2Alpha 
  4. public
  5.  
  6.     GB2Alpha() 
  7.     { 
  8.         pAlpha = NULL; 
  9.     } 
  10.      
  11.     ~GB2Alpha() 
  12.     { 
  13.         if (pAlpha) 
  14.         { 
  15.             delete[] pAlpha; 
  16.             pAlpha = NULL; 
  17.         } 
  18.     } 
  19.  
  20.     //得到实际的拼音首字母buffer 
  21.     const char* GetAlpha(const char* pStr) 
  22.     { 
  23.         if (pAlpha) 
  24.         { 
  25.             delete pAlpha; 
  26.             pAlpha = NULL; 
  27.         } 
  28.         if (!pStr) 
  29.         { 
  30.             return NULL; 
  31.         } 
  32.  
  33.         char   chr[3];    
  34.         wchar_t   wchr   =   0;  
  35.  
  36.         unsigned int nlen = strlen(pStr); 
  37.         pAlpha = new char[nlen+1]; 
  38.         memset(pAlpha,0x00,nlen+1); 
  39.  
  40.         int nIndex = 0; //首字母数组的长度索引 
  41.  
  42.         for (int i = 0;i< nlen ;) 
  43.         { 
  44.             char tmpchar = '\0'
  45.  
  46.             if (pStr[i] > 0) //非汉字 
  47.             { 
  48.                 if (pStr[i] >= 'a' && pStr[i] <= 'z'
  49.                 { 
  50.                     tmpchar = (char)(pStr[i] - 'a' + 'A'); 
  51.                     pAlpha[nIndex++] = tmpchar; 
  52.                 } 
  53.                 if (pStr[i] >= 'A' && pStr[i] <= 'Z'
  54.                 { 
  55.                     tmpchar = pStr[i]; 
  56.                     pAlpha[nIndex++] = tmpchar; 
  57.                 } 
  58.                 i++;  
  59.             } 
  60.             else //汉字 
  61.             { 
  62.                 memset(chr,   0x00,   sizeof(chr));    
  63.                 chr[0]   =   pStr[i++];    
  64.                 chr[1]   =   pStr[i++];    
  65.                 chr[2]   =   '\0';    
  66.  
  67.                 wchr   =   0;    
  68.                 wchr   =   (chr[0]   &   0xff)   <<   8;    
  69.                 wchr   |=  (chr[1]   &   0xff);    
  70.  
  71.                 tmpchar = convert(wchr); 
  72.                 if(tmpchar != '\0'
  73.                 { 
  74.                     pAlpha[nIndex++] = tmpchar; 
  75.                 } 
  76.             } 
  77.         } 
  78.         return pAlpha; 
  79.     } 
  80.  
  81. private
  82.     //汉字首字母转换表 
  83.     char   convert(wchar_t   n)    
  84.     {    
  85.         if   (In(0xB0A1,0xB0C4,n))   return   'A';    
  86.         if   (In(0XB0C5,0XB2C0,n))   return   'B';    
  87.         if   (In(0xB2C1,0xB4ED,n))   return   'C';    
  88.         if   (In(0xB4EE,0xB6E9,n))   return   'D';    
  89.         if   (In(0xB6EA,0xB7A1,n))   return   'E';    
  90.         if   (In(0xB7A2,0xB8c0,n))   return   'F';    
  91.         if   (In(0xB8C1,0xB9FD,n))   return   'G';    
  92.         if   (In(0xB9FE,0xBBF6,n))   return   'H';    
  93.         if   (In(0xBBF7,0xBFA5,n))   return   'J';    
  94.         if   (In(0xBFA6,0xC0AB,n))   return   'K';    
  95.         if   (In(0xC0AC,0xC2E7,n))   return   'L';    
  96.         if   (In(0xC2E8,0xC4C2,n))   return   'M';    
  97.         if   (In(0xC4C3,0xC5B5,n))   return   'N';    
  98.         if   (In(0xC5B6,0xC5BD,n))   return   'O';    
  99.         if   (In(0xC5BE,0xC6D9,n))   return   'P';    
  100.         if   (In(0xC6DA,0xC8BA,n))   return   'Q';    
  101.         if   (In(0xC8BB,0xC8F5,n))   return   'R';    
  102.         if   (In(0xC8F6,0xCBF0,n))   return   'S';    
  103.         if   (In(0xCBFA,0xCDD9,n))   return   'T';    
  104.         if   (In(0xCDDA,0xCEF3,n))   return   'W';    
  105.         if   (In(0xCEF4,0xD188,n))   return   'X';    
  106.         if   (In(0xD1B9,0xD4D0,n))   return   'Y';    
  107.         if   (In(0xD4D1,0xD7F9,n))   return   'Z';    
  108.         return   '\0';    
  109.     }    
  110.  
  111.     bool   In(wchar_t   start,   wchar_t   end,   wchar_t   code)    
  112.     {    
  113.         if   (code   >=   start   &&   code   <=   end)      
  114.         {    
  115.             return   true;    
  116.         }    
  117.         return   false;    
  118.     }    
  119.  
  120. protected
  121.     char* pAlpha; 
  122. }; 
posted on 2010-04-26 17:53  carekee  阅读(3160)  评论(0编辑  收藏  举报