如何实现通过汉字的拼音或首拼快速检索(含部分源码)

 前段时间有难得的一周左右的时间空闲,在朋友的拉动下玩了开心网,里面有一个小功能很有意思,选择人名的时候,可以根据拼音 或 拼音首字母快速输入,这个功能如果用在我们的应用系统里,对软件的易用性非常有帮助,于是研究了一把,并把全部代码扩充到了E8.Net工作流架构里面。

      如图:

     

     输入拼音首字母或全拼均可以快速检索,这个功能对于很多软件系统中人名检索,知识库关键字等快速检索很有帮助。由于E8.Net工作流架构里本来就已经实现了类似google的快速检索的控件(有博客介绍),因此实现通过拼音检索就变得相对容易了。

    实现思路如下:

         需要检索的内容,数据库里保存汉字相应的拼音首字母和拼音。 (E8.Net中已经包含了将汉字转换为拼音及首拼的全部源码)

          汉字转换拼音部分代码如下:

            

public class GBToPY
    {
        
private static int[] FIRST_TABLE = { 4521745253457614631846826,
                
470104729747614476144811949062493244989650371,
                
506145062250906513875144652218522185221852698,
                
52980536895448155289 };

        
private static string[] ALL_VALUE = { "zuo""zun""zui""zuan""zu",
                
"zou""zong""zi""zhuo""zhun""zhui""zhuang""zhuan",
                
"zhuai""zhua""zhu"..};

        
private static int[] ALL_CODE = { -10254-10256-10260-10262,
                
-10270-10274-10281-10296-10307-10309-10315-10322,
                
-10328-10329-10331-10519-10533-10544-10587-10764,
                
-10780-10790-10800-10815-10832-10838-11014-11018,
                
-11019-11020-11024-11038-11041-11045-11052-11055,
                
-11067-11077-11097-11303-11324-11339-11340-11358,
                
-12607-12802-12812-12829-12831-12838-12849-12852,
                
-12858-12860-12871-12875-12888-13060-13063-13068,
                
-13076-13091-13095-13096-13107-13120-13138-13147,
                
-13318-13326-13329-13340-13343-13356-13359-13367,
                
-13383-13387-13391-13395-13398-13400-13404-13406,
                
-13601-13611-13658.};

        
        
public static string getAllPY(char gb2312)
        {
            
int ascii = getCnAscii(gb2312);
            
if (ascii == 0)
            { 
// 取ascii时出错
                return new string(gb2312, 1);
            }
            
else
            {
                
string spell = getSpellByAscii(ascii);
                
if (spell == null)
                {
                    
return new string(gb2312, 1);
                }
                
else
                {
                    
return spell;
                } 
// end of if spell == null
            }
        }

        
public static char getFirstPY(char ch)
        {
            
if (ch >= 0 && ch <= 0x7F)
            {
                
return ch;
            }
            
int gb = 0;

            
byte[] bytes = Encoding.GetEncoding("gb2312").GetBytes(new string(ch, 1));
            
if (bytes.Length < 2)
            {
                gb 
= byte2Int(bytes[0]);
            }
            gb 
= (bytes[0<< 8 & 0xff00+ (bytes[1& 0xff);
            
if (gb < FIRST_TABLE[0])
                
return ch;
            
int i;
            
for (i = 0; i < 26++i)
            {
                
if (match(i, gb))
                    
break;
            }
            
if (i >= 26)
                
return ch;
            
else
                
return (char)(65 + i);
        }

        
public static string getFirstPY(string src)
        {
            StringBuilder sb 
= new StringBuilder();
            
int len = src.Length;
            
int i;
            
for (i = 0; i < len; i++)
            {
                sb.Append(getFirstPY(src[i]));
            }
            
return sb.ToString();
        }

        
private static int getCnAscii(char cn)
        {
            
byte[] bytes = null;
            bytes 
= Encoding.GetEncoding("gb2312").GetBytes(new string(cn, 1));
            
if (bytes == null || bytes.Length > 2 || bytes.Length <= 0)
            {
                
return 0;
            }
            
if (bytes.Length == 1)
            {
                
return bytes[0];
            }
            
else
            {
                
int hightByte = bytes[0];
                
int lowByte = bytes[1];
                
int ascii = (256 * hightByte + lowByte) - 256 * 256;
                
return ascii;
            }
        }

        
private static string getSpellByAscii(int ascii)
        {
            
if (ascii > 0 && ascii < 160)
            { 
// 单字符
                return new string((char)ascii, 1);
            }
            
if (ascii < -20319 || ascii > -10247)
            { 
// 不知道的字符
                return null;
            }
            
int first = 0;
            
int sLast = ALL_CODE.Length - 1;
            
int last = ALL_CODE.Length - 1;
            
int mid;
            
int temp;
            
while (true)
            {
                mid 
= (first + last) >> 1;
                
if (ascii == ALL_CODE[mid])
                {
                    
return ALL_VALUE[mid];
                }
                
else if (ascii > ALL_CODE[mid])
                {
                    temp 
= mid - 1;
                    
if (temp >= 0)
                    {
                        
if (ascii < ALL_CODE[temp])
                        {
                            
return ALL_VALUE[mid];
                        }
                        
else
                        {
                            last 
= mid;
                        }
                    }
                    
else
                    {
                        
return ALL_VALUE[0];
                    }
                }
                
else
                {
                    
if (mid + 1 <= sLast)
                    {
                        first 
= mid + 1;
                    }
                    
else
                    {
                        
return ALL_VALUE[sLast];
                    }
                }
            }
        }

        
private static bool match(int i, int gb)
        {
            
if (gb < FIRST_TABLE[i])
            {
                
return false;
            }
            
int j = i + 1;
            
// 字母Z使用了两个标签
            while (j < 26 && (FIRST_TABLE[j] == FIRST_TABLE[i]))
            {
                
++j;
            }
            
if (j == 26)
                
return gb <= FIRST_TABLE[j];
            
else
                
return gb < FIRST_TABLE[j];
        }

        
private static int byte2Int(byte b)
        {
            
if (b < 0)
            {
                
return 256 + b;
            }
            
else
            {
                
return b;
            }
        }

        
public static bool isSpliter(char c)
        {
            
char[] spliter = { ','''';''' };
            
foreach (char cc in spliter)
            {
                
if (c == cc)
                {
                    
return true;
                }
            }
            
return false;
        }

        
public static string[] split(string src)
        {
            
string text = src.Trim();
            StringBuilder sb 
= new StringBuilder();
            ArrayList al 
= new ArrayList();
            
int i = 0;
            
//跳过之前的分隔符
            for (i = 0; i < text.Length; i++)
            {
                
if (!isSpliter(text[i]))
                {
                    
break;
                }
            }
            
for (; i < text.Length; i++)
            {
                
if (isSpliter(text[i]))
                {
                    
if (sb.Length > 0)
                    {
                        al.Add(sb.ToString());
                    }
                    sb 
= new StringBuilder();
                }
                
else
                {
                    sb.Append(text[i]);
                }
            }
            
if (sb.Length > 0)
            {
                al.Add(sb.ToString());
            }
            
if (al.Count > 0)
            {
                
string[] ret = new string[al.Count];
                
for (i = 0; i < al.Count; i++)
                {
                    ret[i] 
= (string)al[i];
                }
                
return ret;
            }
            
else
            {
                
return null;
            }
        }
    }

 

               

         通过异步方式即时获取检索内容(JAVASCRIPT脚本中实现缓存机制,保证性能)

             具体代码可以参考另一篇博客: http://www.cnblogs.com/cancanwyq/archive/2008/04/17/1158178.html

 

      这样基本上就很容易实现上面图例所示的功能了。

posted @ 2008-11-05 13:44  赵思伟  阅读(1132)  评论(2编辑  收藏  举报