一个比较全的汉字转拼音工具类

 前两天无聊,把j2me的代码(gb2312转拼音) 移植到了c#,其实特别简单,就是换换东西。

此外,跟大家介绍一个很强大的工具:Java Language Conversion Assistant。使用它可以轻松的将java类库转换为c#工程。特别是算法型的代码,转换效果非常好。

我曾经把一个qrcode,dm混合编码的算法用它进行转换,喝杯茶的功夫,全部转换完成。当然了,它会提示你一些需要手动修改的地方,然后我用了10分钟,就修改完代码,可以正常执行了。有兴趣的朋友自己找找,就在vs.net中有集成的。

 1   class GB2PY

  2     {
  3         private static int[] FIRST_TABLE = { 4521745253457614631846826,
  4                 470104729747614476144811949062493244989650371,
  5                 506145062250906513875144652218522185221852698,
  6                 52980536895448155289 };
  7 
  8         private static string[] ALL_VALUE = { "zuo""zun""zui""zuan""zu",
  9                 "zou""zong""zi""zhuo""zhun""zhui""zhuang""zhuan",
 10                 "zhuai""zhua""zhu""zhou""zhong""zhi""zheng",
 11                 "zhen""zhe""zhao""zhang""zhan""zhai""zha""zeng",
 12                 "zen""zei""ze""zao""zang""zan""zai""za""yun",
 13                 "yue""yuan""yu""you""yong""yo""ying""yin""yi",
 14                 "ye""yao""yang""yan""ya""xun""xue""xuan""xu",
 15                 "xiu""xiong""xing""xin""xie""xiao""xiang""xian",
 16                 "xia""xi""wu""wo""weng""wen""wei""wang""wan",
 17                 "wai""wa""tuo""tun""tui""tuan""tu""tou""tong",
 18                 "ting""tie""tiao""tian""ti""teng""te""tao",
 19                 "tang""tan""tai""ta""suo""sun""sui""suan""su",
 20                 "sou""song""si""shuo""shun""shui""shuang""shuan",
 21                 "shuai""shua""shu""shou""shi""sheng""shen""she",
 22                 "shao""shang""shan""shai""sha""seng""sen""se",
 23                 "sao""sang""san""sai""sa""ruo""run""rui""ruan",
 24                 "ru""rou""rong""ri""reng""ren""re""rao""rang",
 25                 "ran""qun""que""quan""qu""qiu""qiong""qing",
 26                 "qin""qie""qiao""qiang""qian""qia""qi""pu""po",
 27                 "ping""pin""pie""piao""pian""pi""peng""pen",
 28                 "pei""pao""pang""pan""pai""pa""ou""o""nuo",
 29                 "nue""nuan""nv""nu""nong""niu""ning""nin""nie",
 30                 "niao""niang""nian""ni""neng""nen""nei""ne",
 31                 "nao""nang""nan""nai""na""mu""mou""mo""miu",
 32                 "ming""min""mie""miao""mian""mi""meng""men",
 33                 "mei""me""mao""mang""man""mai""ma""luo""lun",
 34                 "lue""luan""lv""lu""lou""long""liu""ling""lin",
 35                 "lie""liao""liang""lian""lia""li""leng""lei",
 36                 "le""lao""lang""lan""lai""la""kuo""kun""kui",
 37                 "kuang""kuan""kuai""kua""ku""kou""kong""keng",
 38                 "ken""ke""kao""kang""kan""kai""ka""jun""jue",
 39                 "juan""ju""jiu""jiong""jing""jin""jie""jiao",
 40                 "jiang""jian""jia""ji""huo""hun""hui""huang",
 41                 "huan""huai""hua""hu""hou""hong""heng""hen",
 42                 "hei""he""hao""hang""han""hai""ha""guo""gun",
 43                 "gui""guang""guan""guai""gua""gu""gou""gong",
 44                 "geng""gen""gei""ge""gao""gang""gan""gai""ga",
 45                 "fu""fou""fo""feng""fen""fei""fang""fan""fa",
 46                 "er""en""e""duo""dun""dui""duan""du""dou",
 47                 "dong""diu""ding""die""diao""dian""di""deng",
 48                 "de""dao""dang""dan""dai""da""cuo""cun""cui",
 49                 "cuan""cu""cou""cong""ci""chuo""chun""chui",
 50                 "chuang""chuan""chuai""chu""chou""chong""chi",
 51                 "cheng""chen""che""chao""chang""chan""chai""cha",
 52                 "ceng""ce""cao""cang""can""cai""ca""bu""bo",
 53                 "bing""bin""bie""biao""bian""bi""beng""ben",
 54                 "bei""bao""bang""ban""bai""ba""ao""ang""an",
 55                 "ai""a" };
 56 
 57         private static int[] ALL_CODE = { -10254-10256-10260-10262,
 58                 -10270-10274-10281-10296-10307-10309-10315-10322,
 59                 -10328-10329-10331-10519-10533-10544-10587-10764,
 60                 -10780-10790-10800-10815-10832-10838-11014-11018,
 61                 -11019-11020-11024-11038-11041-11045-11052-11055,
 62                 -11067-11077-11097-11303-11324-11339-11340-11358,
 63                 -11536-11589-11604-11781-11798-11831-11847-11861,
 64                 -11867-12039-12058-12067-12074-12089-12099-12120,
 65                 -12300-12320-12346-12359-12556-12585-12594-12597,
 66                 -12607-12802-12812-12829-12831-12838-12849-12852,
 67                 -12858-12860-12871-12875-12888-13060-13063-13068,
 68                 -13076-13091-13095-13096-13107-13120-13138-13147,
 69                 -13318-13326-13329-13340-13343-13356-13359-13367,
 70                 -13383-13387-13391-13395-13398-13400-13404-13406,
 71                 -13601-13611-13658-13831-13847-13859-13870-13878,
 72                 -13894-13896-13905-13906-13907-13910-13914-13917,
 73                 -14083-14087-14090-14092-14094-14097-14099-14109,
 74                 -14112-14122-14123-14125-14135-14137-14140-14145,
 75                 -14149-14151-14159-14170-14345-14353-14355-14368,
 76                 -14379-14384-14399-14407-14429-14594-14630-14645,
 77                 -14654-14663-14668-14670-14674-14678-14857-14871,
 78                 -14873-14882-14889-14894-14902-14908-14914-14921,
 79                 -14922-14926-14928-14929-14930-14933-14937-14941,
 80                 -15109-15110-15117-15119-15121-15128-15139-15140,
 81                 -15141-15143-15144-15149-15150-15153-15158-15165,
 82                 -15180-15183-15362-15363-15369-15375-15377-15385,
 83                 -15394-15408-15416-15419-15435-15436-15448-15454,
 84                 -15625-15631-15640-15652-15659-15661-15667-15681,
 85                 -15701-15707-15878-15889-15903-15915-15920-15933,
 86                 -15944-15958-15959-16155-16158-16169-16171-16180,
 87                 -16187-16202-16205-16212-16216-16220-16393-16401,
 88                 -16403-16407-16412-16419-16423-16427-16429-16433,
 89                 -16448-16452-16459-16465-16470-16474-16647-16657,
 90                 -16664-16689-16706-16708-16733-16915-16942-16970,
 91                 -16983-17185-17202-17417-17427-17433-17454-17468,
 92                 -17482-17487-17496-17676-17683-17692-17697-17701,
 93                 -17703-17721-17730-17733-17752-17759-17922-17928,
 94                 -17931-17947-17950-17961-17964-17970-17988-17997,
 95                 -18012-18181-18183-18184-18201-18211-18220-18231,
 96                 -18237-18239-18446-18447-18448-18463-18478-18490,
 97                 -18501-18518-18526-18696-18697-18710-18722-18731,
 98                 -18735-18741-18756-18763-18773-18774-18783-18952,
 99                 -18961-18977-18996-19003-19006-19018-19023-19038,
100                 -19212-19218-19224-19227-19235-19238-19242-19243,
101                 -19249-19261-19263-19270-19275-19281-19288-19289,
102                 -19467-19479-19484-19500-19515-19525-19531-19540,
103                 -19715-19725-19728-19739-19741-19746-19751-19756,
104                 -19763-19774-19775-19784-19805-19976-19982-19986,
105                 -19990-20002-20026-20032-20036-20051-20230-20242,
106                 -20257-20265-20283-20292-20295-20304-20317-20319 };
107 
108         public static string getAllPY(string gb2312)
109         {
110             if (null == gb2312 || "".Equals(gb2312.Trim()))
111             {
112                 return gb2312;
113             }
114             char[] chars = gb2312.ToCharArray();
115             StringBuilder retuBuf = new StringBuilder();
116             for (int i = 0, Len = chars.Length; i < Len; i++)
117             {
118                 retuBuf.Append(getAllPY(chars[i]));
119             } // end of for
120             return retuBuf.ToString();
121         }
122 
123         public static string getAllPY(char gb2312)
124         {
125             int ascii = getCnAscii(gb2312);
126             if (ascii == 0)
127             { // 取ascii时出错
128                 return new string(gb2312, 1);
129             }
130             else
131             {
132                 string spell = getSpellByAscii(ascii);
133                 if (spell == null)
134                 {
135                     return new string(gb2312, 1);
136                 }
137                 else
138                 {
139                     return spell;
140                 } // end of if spell == null
141             }
142         }
143 
144         public static char getFirstPY(char ch)
145         {
146             if (ch >= 0 && ch <= 0x7F)
147             {
148                 return ch;
149             }
150             int gb = 0;
151 
152             byte[] bytes = Encoding.GetEncoding("gb2312").GetBytes(new string(ch, 1));
153             if (bytes.Length < 2)
154             {
155                 gb = byte2Int(bytes[0]);
156             }
157             gb = (bytes[0<< 8 & 0xff00+ (bytes[1& 0xff);
158             if (gb < FIRST_TABLE[0])
159                 return ch;
160             int i;
161             for (i = 0; i < 26++i)
162             {
163                 if (match(i, gb))
164                     break;
165             }
166             if (i >= 26)
167                 return ch;
168             else
169                 return (char)(65 + i);
170         }
171 
172         public static string getFirstPY(string src)
173         {
174             StringBuilder sb = new StringBuilder();
175             int len = src.Length;
176             int i;
177             for (i = 0; i < len; i++)
178             {
179                 sb.Append(getFirstPY(src[i]));
180             }
181             return sb.ToString();
182         }
183 
184         private static int getCnAscii(char cn)
185         {
186             byte[] bytes = null;
187             bytes = Encoding.GetEncoding("gb2312").GetBytes(new string(cn, 1));
188             if (bytes == null || bytes.Length > 2 || bytes.Length <= 0)
189             {
190                 return 0;
191             }
192             if (bytes.Length == 1)
193             {
194                 return bytes[0];
195             }
196             else
197             {
198                 int hightByte = bytes[0];
199                 int lowByte = bytes[1];
200                 int ascii = (256 * hightByte + lowByte) - 256 * 256;
201                 return ascii;
202             }
203         }
204 
205         private static string getSpellByAscii(int ascii)
206         {
207             if (ascii > 0 && ascii < 160)
208             { // 单字符
209                 return new string((char)ascii, 1);
210             }
211             if (ascii < -20319 || ascii > -10247)
212             { // 不知道的字符
213                 return null;
214             }
215             int first = 0;
216             int sLast = ALL_CODE.Length - 1;
217             int last = ALL_CODE.Length - 1;
218             int mid;
219             int temp;
220             while (true)
221             {
222                 mid = (first + last) >> 1;
223                 if (ascii == ALL_CODE[mid])
224                 {
225                     return ALL_VALUE[mid];
226                 }
227                 else if (ascii > ALL_CODE[mid])
228                 {
229                     temp = mid - 1;
230                     if (temp >= 0)
231                     {
232                         if (ascii < ALL_CODE[temp])
233                         {
234                             return ALL_VALUE[mid];
235                         }
236                         else
237                         {
238                             last = mid;
239                         }
240                     }
241                     else
242                     {
243                         return ALL_VALUE[0];
244                     }
245                 }
246                 else
247                 {
248                     if (mid + 1 <= sLast)
249                     {
250                         first = mid + 1;
251                     }
252                     else
253                     {
254                         return ALL_VALUE[sLast];
255                     }
256                 }
257             }
258         }
259 
260         private static bool match(int i, int gb)
261         {
262             if (gb < FIRST_TABLE[i])
263             {
264                 return false;
265             }
266             int j = i + 1;
267             // 字母Z使用了两个标签
268             while (j < 26 && (FIRST_TABLE[j] == FIRST_TABLE[i]))
269             {
270                 ++j;
271             }
272             if (j == 26)
273                 return gb <= FIRST_TABLE[j];
274             else
275                 return gb < FIRST_TABLE[j];
276         }
277 
278         private static int byte2Int(byte b)
279         {
280             if (b < 0)
281             {
282                 return 256 + b;
283             }
284             else
285             {
286                 return b;
287             }
288         }
289 
290         public static bool isSpliter(char c)
291         {
292             char[] spliter = { ','''';''' };
293             foreach (char cc in spliter)
294             {
295                 if (c == cc)
296                 {
297                     return true;
298                 }
299             }
300             return false;
301         }
302 
303         public static string[] split(string src)
304         {
305             string text = src.Trim();
306             StringBuilder sb = new StringBuilder();
307             ArrayList al = new ArrayList();
308             int i = 0;
309             //跳过之前的分隔符
310             for (i = 0; i < text.Length; i++)
311             {
312                 if (!isSpliter(text[i]))
313                 {
314                     break;
315                 }
316             }
317             for (; i < text.Length; i++)
318             {
319                 if (isSpliter(text[i]))
320                 {
321                     if (sb.Length > 0)
322                     {
323                         al.Add(sb.ToString());
324                     }
325                     sb = new StringBuilder();
326                 }
327                 else
328                 {
329                     sb.Append(text[i]);
330                 }
331             }
332             if (sb.Length > 0)
333             {
334                 al.Add(sb.ToString());
335             }
336             if (al.Count > 0)
337             {
338                 string[] ret = new string[al.Count];
339                 for (i = 0; i < al.Count; i++)
340                 {
341                     ret[i] = (string)al[i];
342                 }
343                 return ret;
344             }
345             else
346             {
347                 return null;
348             }
349         }
350     }
posted @ 2008-10-23 09:55  NON-Fish  阅读(650)  评论(1编辑  收藏  举报