csharp:Optical Character Recognition
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Data; using System.Drawing; using System.IO; using System.Drawing.Imaging; using MODI;//Microsoft Office Document Imaging // 首先用office安装盘这个组件,默认安装office的时候是不会安装的,只要添加这个组件功能就好了安装说明:http://support.microsoft.com/kb/982760 //组件Microsoft Office Document Imaging 12.0 Type Library(office2007) //或者Microsoft Office Document Imaging 11.0 Type Library(office2003) //中文简体OCR引擎 http://www.microsoft.com/downloads/thankyou.aspx?familyId=dd172063-9517-41d8-82af-29c38f7437b6&displayLang=zh-hk namespace ToText { /// <summary> /// Optical Character Recognition光学字符识别 /// 20140507 Geovin Du /// 涂聚文 /// </summary> public static class OCRGetstring { /// <summary> /// 语言类型 /// </summary> /// <returns></returns> public static DataTable getLanguageList() { DataTable dt = new DataTable(); dt.Columns.Add("ID",typeof(int)); dt.Columns.Add("LanguageName", typeof(string)); dt.Columns.Add("LanguageLCID", typeof(string)); //dt.Rows.Add(1, "", 1); dt.Rows.Add(1,"简体中文", "2052"); dt.Rows.Add(2,"繁体中文", "1028"); dt.Rows.Add(3,"英语", "9"); dt.Rows.Add(4,"捷克语", "5"); dt.Rows.Add(5,"丹麦语", "6"); dt.Rows.Add(6,"德语", "7"); dt.Rows.Add(7,"希腊语", "8"); dt.Rows.Add(8,"西班牙语", "10"); dt.Rows.Add(9,"芬兰语", "11"); dt.Rows.Add(10,"法语", "12"); dt.Rows.Add(11, "匈牙利语", "14"); dt.Rows.Add(12, "意大利语", "16"); dt.Rows.Add(13, "日语", "17"); dt.Rows.Add(14, "韩语", "18"); dt.Rows.Add(15, "荷兰语", "19"); dt.Rows.Add(16, "挪威语", "20"); dt.Rows.Add(17, "波兰语", "21"); dt.Rows.Add(18, "葡萄牙语", "22"); dt.Rows.Add(19, "俄语", "25"); dt.Rows.Add(20,"瑞典语", "29"); dt.Rows.Add(21,"土耳其语", "31"); return dt; } /// <summary> /// /// </summary> /// <param name="sValue"></param> /// <returns></returns> private static MODI.MiLANGUAGES GetLanuageType(string sValue) { switch (sValue) { case "2052": return MODI.MiLANGUAGES.miLANG_CHINESE_SIMPLIFIED; case "5": return MODI.MiLANGUAGES.miLANG_CZECH; case "6": return MODI.MiLANGUAGES.miLANG_DANISH; case "7": return MODI.MiLANGUAGES.miLANG_GERMAN; case "8": return MODI.MiLANGUAGES.miLANG_GREEK; case "9": return MODI.MiLANGUAGES.miLANG_ENGLISH; case "10": return MODI.MiLANGUAGES.miLANG_SPANISH; case "11": return MODI.MiLANGUAGES.miLANG_FINNISH; case "12": return MODI.MiLANGUAGES.miLANG_FRENCH; case "14": return MODI.MiLANGUAGES.miLANG_HUNGARIAN; case "16": return MODI.MiLANGUAGES.miLANG_ITALIAN; case "17": return MODI.MiLANGUAGES.miLANG_JAPANESE; case "18": return MODI.MiLANGUAGES.miLANG_KOREAN; case "19": return MODI.MiLANGUAGES.miLANG_DUTCH; case "20": return MODI.MiLANGUAGES.miLANG_NORWEGIAN; case "21": return MODI.MiLANGUAGES.miLANG_POLISH; case "22": return MODI.MiLANGUAGES.miLANG_PORTUGUESE; case "25": return MODI.MiLANGUAGES.miLANG_RUSSIAN; case "29": return MODI.MiLANGUAGES.miLANG_SWEDISH; case "31": return MODI.MiLANGUAGES.miLANG_TURKISH; case "1028": return MODI.MiLANGUAGES.miLANG_CHINESE_TRADITIONAL; default: return MODI.MiLANGUAGES.miLANG_ENGLISH; } } /// <summary> /// Images轉換文字 /// </summary> /// <param name="image">Image</param> /// <param name="language">语言类型</param> /// <returns></returns> public static string ExtractText(this System.Drawing.Image image,string language) { var tmpFile = Path.GetTempFileName(); StringBuilder sb = new StringBuilder(); //string text; try { var bmp = new Bitmap(Math.Max(image.Width, 1024), Math.Max(image.Height, 768)); var gfxResize = Graphics.FromImage(bmp); gfxResize.DrawImage(image, new Rectangle(0, 0, image.Width, image.Height)); bmp.Save(tmpFile + ".bmp", ImageFormat.Bmp); var doc = new MODI.Document(); doc.Create(tmpFile + ".bmp"); // doc.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, true, true); doc.OCR(GetLanuageType(language), true, true); // 识别文字类型 var img = (MODI.Image)doc.Images[0]; var layout = img.Layout; sb.Append(layout.Text); //text = sb.ToString();// layout.Text; } finally { File.Delete(tmpFile); File.Delete(tmpFile + ".bmp"); } return sb.ToString();// text; } /// <summary> /// 来源图片文件轉換文字 /// </summary> /// <param name="fileToOCR">file文件</param> /// <param name="language">语言类型</param> /// <returns></returns> public static string getFileToOCR(string fileToOCR, string language) { StringBuilder sb = new StringBuilder(); if (File.Exists(fileToOCR)) { MODI.Document md = new MODI.Document(); md.Create(fileToOCR); md.OCR(GetLanuageType(language), true, true); MODI.Image img; MODI.Layout layout; for (int i = 0; i < md.Images.Count; i++) { img = (MODI.Image)md.Images[i]; layout = img.Layout; sb.Append(layout.Text); } md.Close(false); } else { sb.Append(""); } return sb.ToString(); } } }
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using System.Runtime.InteropServices; namespace ToText { /// <summary> /// geovindu /// </summary> public partial class Form1 : Form { public Form1() { InitializeComponent(); } #region DllImport [DllImport("AspriseOCR.dll", EntryPoint = "OCR", CallingConvention = CallingConvention.Cdecl)] public static extern IntPtr OCR(string file, int type); [DllImport("AspriseOCR.dll", EntryPoint = "OCRpart", CallingConvention = CallingConvention.Cdecl)] static extern IntPtr OCRpart(string file, int type, int startX, int startY, int width, int height); [DllImport("AspriseOCR.dll", EntryPoint = "OCRBarCodes", CallingConvention = CallingConvention.Cdecl)] static extern IntPtr OCRBarCodes(string file, int type); [DllImport("AspriseOCR.dll", EntryPoint = "OCRpartBarCodes", CallingConvention = CallingConvention.Cdecl)] static extern IntPtr OCRpartBarCodes(string file, int type, int startX, int startY, int width, int height); #endregion #region 转换按钮事件 /// <summary> /// 转换按钮事件 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void button2_Click(object sender, EventArgs e) { int startX = 0; int startY = 0; int width = -1; int height = -1; string img_path = txt_imgpath.Text; // 图片路径 if (String.IsNullOrEmpty(img_path)) // 图片非空验证 { MessageBox.Show("请先选择图片!"); return; } try { Image img = Image.FromFile(img_path); width = img.Width; height = img.Height; } catch (Exception ex) { MessageBox.Show(ex.StackTrace); } txt_result.Text = Marshal.PtrToStringAnsi(OCRpart(img_path, -1, startX, startY, width, height)); } #endregion #region 浏览事件 /// <summary> /// 浏览事件 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btn_imgpath_Click(object sender, EventArgs e) { openFileDialog1.ShowDialog(); txt_imgpath.Text = openFileDialog1.FileName; } /// <summary> /// 浏览图片 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void txt_imgpath_Click(object sender, EventArgs e) { openFileDialog1.ShowDialog(); txt_imgpath.Text = openFileDialog1.FileName; } #endregion /// <summary> /// /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void Form1_Load(object sender, EventArgs e) { } } }
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)