C# tesseract2验证码识别

可识别一些简单验证码

//google的tesseract识别,导入tessnet2_32.dll,识别速度不如ocr
private void button27_Click(object sender, EventArgs e)
{
    if (pictureBox3.Image != null)
    {
        Bitmap map = (Bitmap)pictureBox3.Image;

        tessnet2.Tesseract ocr = new tessnet2.Tesseract();//声明一个OCR类   
        string txt = "";
        List<tessnet2.Word> result = new List<tessnet2.Word>();

        try
        {
            ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");//设置识别变量,当前只能识别数字及英文字符
            //应用当前语言包。注,Tessnet2是支持多国语的。语言包下载链接:http://code.google.com/p/tesseract-ocr/downloads/list
            ocr.Init(Application.StartupPath + @"\tessdata\eng", "eng", false);

            result = ocr.DoOCR(map, Rectangle.Empty);
            foreach (tessnet2.Word word in result)
            {
                txt = word.Text;
            }
        }
        catch (Exception ex)
        {
            MessageBox.Show(ex.Message);
        }
        if (string.IsNullOrEmpty(txt) || txt == "~")
        {
            #region 如图片太小,无法识别,则先放大

            int w = map.Width >= 100 ? map.Width : 100;
            int h = map.Height >= 30 ? map.Height : 30;

            Bitmap resizedBmp = new Bitmap(w, h);
            Graphics g = Graphics.FromImage(resizedBmp);
            g.DrawImage(map, new Rectangle(0, 0, w, h), new Rectangle(0, 0, map.Width, map.Height), GraphicsUnit.Pixel);
            map = resizedBmp;

            #endregion

            result.Clear();
            txt = "";
            result = ocr.DoOCR(map, Rectangle.Empty);//执行识别操作                    

            foreach (tessnet2.Word word in result)//遍历识别结果。
            {
                txt += word.Text;
            }
        }
        textBox3.Text += txt + "\r\n";
        textBox3.Select(textBox3.Text.Length, 1);
        textBox3.ScrollToCaret();
    }
}

 

 

posted @ 2011-07-29 20:23  事理  阅读(5347)  评论(1编辑  收藏  举报