汉字转换拼音部分代码

来源:http://www.cnblogs.com/cancanwyq/archive/2008/10/17/1313171.html

public class GBToPY
    {
        private static int[] FIRST_TABLE = { 45217, 45253, 45761, 46318, 46826,
                47010, 47297, 47614, 47614, 48119, 49062, 49324, 49896, 50371,
                50614, 50622, 50906, 51387, 51446, 52218, 52218, 52218, 52698,
                52980, 53689, 54481, 55289 };

        private static string[] ALL_VALUE = { "zuo", "zun", "zui", "zuan", "zu",
                "zou", "zong", "zi", "zhuo", "zhun", "zhui", "zhuang", "zhuan",
                "zhuai", "zhua", "zhu", ..};

        private static int[] ALL_CODE = { -10254, -10256, -10260, -10262,
                -10270, -10274, -10281, -10296, -10307, -10309, -10315, -10322,
                -10328, -10329, -10331, -10519, -10533, -10544, -10587, -10764,
                -10780, -10790, -10800, -10815, -10832, -10838, -11014, -11018,
                -11019, -11020, -11024, -11038, -11041, -11045, -11052, -11055,
                -11067, -11077, -11097, -11303, -11324, -11339, -11340, -11358,
                -12607, -12802, -12812, -12829, -12831, -12838, -12849, -12852,
                -12858, -12860, -12871, -12875, -12888, -13060, -13063, -13068,
                -13076, -13091, -13095, -13096, -13107, -13120, -13138, -13147,
                -13318, -13326, -13329, -13340, -13343, -13356, -13359, -13367,
                -13383, -13387, -13391, -13395, -13398, -13400, -13404, -13406,
                -13601, -13611, -13658.};

        
        public static string getAllPY(char gb2312)
        {
            int ascii = getCnAscii(gb2312);
            if (ascii == 0)
            { // 取ascii时出错
                return new string(gb2312, 1);
            }
            else
            {
                string spell = getSpellByAscii(ascii);
                if (spell == null)
                {
                    return new string(gb2312, 1);
                }
                else
                {
                    return spell;
                } // end of if spell == null
            }
        }

        public static char getFirstPY(char ch)
        {
            if (ch >= 0 && ch <= 0x7F)
            {
                return ch;
            }
            int gb = 0;

            byte[] bytes = Encoding.GetEncoding("gb2312").GetBytes(new string(ch, 1));
            if (bytes.Length < 2)
            {
                gb = byte2Int(bytes[0]);
            }
            gb = (bytes[0] << 8 & 0xff00) + (bytes[1] & 0xff);
            if (gb < FIRST_TABLE[0])
                return ch;
            int i;
            for (i = 0; i < 26; ++i)
            {
                if (match(i, gb))
                    break;
            }
            if (i >= 26)
                return ch;
            else
                return (char)(65 + i);
        }

        public static string getFirstPY(string src)
        {
            StringBuilder sb = new StringBuilder();
            int len = src.Length;
            int i;
            for (i = 0; i < len; i++)
            {
                sb.Append(getFirstPY(src[i]));
            }
            return sb.ToString();
        }

        private static int getCnAscii(char cn)
        {
            byte[] bytes = null;
            bytes = Encoding.GetEncoding("gb2312").GetBytes(new string(cn, 1));
            if (bytes == null || bytes.Length > 2 || bytes.Length <= 0)
            {
                return 0;
            }
            if (bytes.Length == 1)
            {
                return bytes[0];
            }
            else
            {
                int hightByte = bytes[0];
                int lowByte = bytes[1];
                int ascii = (256 * hightByte + lowByte) - 256 * 256;
                return ascii;
            }
        }

        private static string getSpellByAscii(int ascii)
        {
            if (ascii > 0 && ascii < 160)
            { // 单字符
                return new string((char)ascii, 1);
            }
            if (ascii < -20319 || ascii > -10247)
            { // 不知道的字符
                return null;
            }
            int first = 0;
            int sLast = ALL_CODE.Length - 1;
            int last = ALL_CODE.Length - 1;
            int mid;
            int temp;
            while (true)
            {
                mid = (first + last) >> 1;
                if (ascii == ALL_CODE[mid])
                {
                    return ALL_VALUE[mid];
                }
                else if (ascii > ALL_CODE[mid])
                {
                    temp = mid - 1;
                    if (temp >= 0)
                    {
                        if (ascii < ALL_CODE[temp])
                        {
                            return ALL_VALUE[mid];
                        }
                        else
                        {
                            last = mid;
                        }
                    }
                    else
                    {
                        return ALL_VALUE[0];
                    }
                }
                else
                {
                    if (mid + 1 <= sLast)
                    {
                        first = mid + 1;
                    }
                    else
                    {
                        return ALL_VALUE[sLast];
                    }
                }
            }
        }

        private static bool match(int i, int gb)
        {
            if (gb < FIRST_TABLE[i])
            {
                return false;
            }
            int j = i + 1;
            // 字母Z使用了两个标签
            while (j < 26 && (FIRST_TABLE[j] == FIRST_TABLE[i]))
            {
                ++j;
            }
            if (j == 26)
                return gb <= FIRST_TABLE[j];
            else
                return gb < FIRST_TABLE[j];
        }

        private static int byte2Int(byte b)
        {
            if (b < 0)
            {
                return 256 + b;
            }
            else
            {
                return b;
            }
        }

        public static bool isSpliter(char c)
        {
            char[] spliter = { ',', ',', ';', ';' };
            foreach (char cc in spliter)
            {
                if (c == cc)
                {
                    return true;
                }
            }
            return false;
        }

        public static string[] split(string src)
        {
            string text = src.Trim();
            StringBuilder sb = new StringBuilder();
            ArrayList al = new ArrayList();
            int i = 0;
            //跳过之前的分隔符
            for (i = 0; i < text.Length; i++)
            {
                if (!isSpliter(text[i]))
                {
                    break;
                }
            }
            for (; i < text.Length; i++)
            {
                if (isSpliter(text[i]))
                {
                    if (sb.Length > 0)
                    {
                        al.Add(sb.ToString());
                    }
                    sb = new StringBuilder();
                }
                else
                {
                    sb.Append(text[i]);
                }
            }
            if (sb.Length > 0)
            {
                al.Add(sb.ToString());
            }
            if (al.Count > 0)
            {
                string[] ret = new string[al.Count];
                for (i = 0; i < al.Count; i++)
                {
                    ret[i] = (string)al[i];
                }
                return ret;
            }
            else
            {
                return null;
            }
        }
    }
posted @ 2009-12-14 11:26  yangjing  阅读(343)  评论(0编辑  收藏  举报