Wind-Eagle

No pain,no gain!
  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理
using System;
using System.Collections.Generic;
using System.Globalization;

namespace Encoding
{
    
internal class Program
    {
        
private static void Main(string[] args)
        {
            var gb1 
= System.Text.Encoding.GetEncoding("Unicode");
            Console.WriteLine(gb1.GetString(gb1.GetBytes(
"测试")));

            var gb0 
= System.Text.Encoding.GetEncoding("UTF-8");
            Console.WriteLine(gb0.GetString(gb0.GetBytes(
"测试")));

            var gb 
= System.Text.Encoding.GetEncoding("GB2312");
            Console.WriteLine(gb.GetString(gb.GetBytes(
"测试")));

            
#region 测试代码

            Console.WriteLine(IsChineseLetter(
"测试"0));
            Console.WriteLine(ChineseLetterCode(
"测试"0));
            Console.WriteLine(ChineseLetterFromCode(ChineseLetterCode(
"测试"0)));

            Console.WriteLine(gb1.GetString(
                Utf8_2_Unicode(gb0.GetBytes(
"测试test"))));

            
//http://qkzz.net/article/3d697483-a5ae-4b50-9ae9-45dc6dd26141.htm
            
//http://topic.csdn.net/u/20090617/18/1907627e-ce38-4ae5-9755-1cc349a4ed1a.html
            
//一级汉字有 3755 个, 40 * 94=3760 个, 其中 d7fe, d7fd, d7fc, d7fb, d7fa 五位置为空
            for (byte i = 0xb0; i < 0xd8; i++)
            {
                
for (byte j = 0xa1; j < (i != 0xd7 ? 0xff : 0xfa); j++)
                {
                    Console.Write(gb.GetString(
new[] {i, j}));
                    
if (j == 0xc7 || j == 0xee || j == (i != 0xd7 ? 0xfe : 0xf9))
                        Console.WriteLine();
                }
            }

            Console.WriteLine(GetChineseLetterFromGb2312(
0));
            Console.WriteLine(GetChineseLetterFromGb2312(
3754));

            
//汉字的 Unicode 编码范围
            for (var i = 19968; i <= 40959; i++)
            {
                Console.Write(ChineseLetterFromCode(i));
            }

            
#endregion

            Console.Read();
        }

        
public static string GetChineseLetterFromGb2312(int rNum)
        {
            
if (rNum < 0 || rNum > 3754)
                
throw new ArgumentOutOfRangeException("rNum""超出一级汉字的范围!");
            var gb 
= System.Text.Encoding.GetEncoding("GB2312");
            
return gb.GetString(new[] {(byte) (0xb0 + (rNum/94)), (byte) (0xa1 + (rNum%94))});
        }

        
/// <summary>
        
/// UTF8 汉字字节流转成 Unicode 汉字字节流
        
/// </summary>
        
/// <param name="input"></param>
        
/// <see cref="http://hi.baidu.com/hyqsoft/blog/item/263795a164d1728346106464.html"/>
        public static byte[] Utf8_2_Unicode(byte[] input)
        {
            var ret 
= new List<byte>();
            
for (var i = 0; i < input.Length; i++)
            {
                
if (input[i] >= 240// 11110xxx
                {
                    
//i += 3;
                    throw new Exception("四字节的 UTF-8 字符不能转换成两字节的 Unicode 字符!");
                }
                
//else if (input[i] >= 224)
                if (input[i] >= 224// 1110xxxx
                {
                    ret.Add((
byte) ((input[i + 2& 63| ((input[i + 1& 3<< 6)));
                    ret.Add((
byte) ((input[i] << 4| ((input[i + 1& 60>> 2)));
                    i 
+= 2;
                }
                
else if (input[i] >= 192// 110xxxxx
                {
                    ret.Add((
byte) ((input[i + 1& 63| ((input[i] & 3<< 6)));
                    ret.Add((
byte) ((input[i] & 28>> 2));
                    i 
+= 1;
                }
                
else
                {
                    ret.Add(input[i]);
                    ret.Add(
0);
                }
            }
            
return ret.ToArray();
        }

        
#region 汉字与Unicode编码

        
public static bool IsChineseLetter(string input, int index)
        {
            var chfrom 
= Convert.ToInt32("4e00"16); //范围(0x4e00~0x9fff)转换成int(chfrom~chend)
            var chend = Convert.ToInt32("9fa5"16);
            
if (input != "")
            {
                
//var code = Char.ConvertToUtf32(input, index);
                var gb = System.Text.Encoding.GetEncoding("Unicode");
                var b 
= gb.GetBytes(input.Substring(index, 1));
                var code 
= b[0+ b[1]*0x100;

                
return code >= chfrom && code <= chend;
            }
            
return false;
        }

        
public static int ChineseLetterCode(string input, int index)
        {
            var chfrom 
= Convert.ToInt32("4e00"16); //范围(0x4e00~0x9fff)转换成int(chfrom~chend)
            var chend = Convert.ToInt32("9fa5"16);
            
if (input != "")
            {
                var code 
= Char.ConvertToUtf32(input, index);

                
return code >= chfrom && code <= chend ? code : 0;
            }
            
return 0;
        }

        
public static string ChineseLetterHexCode(string input, int index)
        {
            var code 
= ChineseLetterCode(input, index);
            
return code != 0 ? code.ToString("X4") : string.Empty;
        }

        
public static string ChineseLetterFromCode(int code)
        {
            var chfrom 
= Convert.ToInt32("4e00"16); //范围(0x4e00~0x9fff)转换成int(chfrom~chend)
            var chend = Convert.ToInt32("9fa5"16);
            
//return code >= chfrom && code <= chend ? Char.ConvertFromUtf32(code) : string.Empty;
            if (code >= chfrom && code <= chend)
            {
                var gb 
= System.Text.Encoding.GetEncoding("Unicode");
                var b 
= new[] {(byte) (code%0x100), (byte) (code/0x100)};
                
return gb.GetString(b);
            }
            
return string.Empty;
        }

        
public static string ChineseLetterFromHexCode(string hexCode)
        {
            
//var code = Convert.ToInt32(hexCode, 16);
            var code = int.Parse(hexCode, NumberStyles.HexNumber);
            
return ChineseLetterFromCode(code);
        }

        
#endregion
    }
}


包含了处理汉字与Unicode编码转换的多种方法,从UTF-8字节流得到Unicode字节流的方法(从而可以用 Unicode编码处理方法处理UTF-8编码)。研究GB2312编码规范,得到最简单的根据一个整数得到一个一级汉字的方法,根据这个方法可以写出最简单生成一级汉字验证码程序~