[转]C#写一个后台运行的文字识别工具
最近做笔记需要一个截图后获取图中文字的轻型程序,最好直接按快捷键就能识别并将文字自动复制。网上的应该都是要钱的,或者东西太杂了看着乱得慌,于是决定自己写。我c#只稍微学了一点,讲的不好的地方代码不规范的地方见谅。
我使用的是百度的文字识别api,请先自己去申请一下资源,免费的,一个月免费识别1000次好像是。然后截图直接用的Snipaste这个软件,贼好用。
api主页:https://ai.baidu.com/tech/ocr/general
然后领取免费资源,创建应用使用。
一、创建项目
创建一个窗口应用项目
然后右键引用 - 管理NuGet程序包,安装Newtonsoft.Json和System.Net.Http
二、编写基本识别代码
首先去百度api的控制台获取一下api key和secret key,网址:百度智能云-登录
然后进入文档,里面请求的基本代码已经给我们写好了,直接复制就ok了,文字识别我们用高精度版的。
获取AccessToken文档:https://ai.baidu.com/ai-doc/REFERENCE/Ck3dwjhhu
文字识别文档:https://ai.baidu.com/ai-doc/OCR/1k3h7y3db
MainForm.cs
- using System;
- using System.Collections.Generic;
- using System.Net.Http;
- using System.Text;
- using System.Windows.Forms;
- using Newtonsoft.Json;
- using System.Text.RegularExpressions;
- using System.IO;
- using System.Net;
- using System.Web;
-
- namespace TextRecognition
- {
- public partial class MainForm : Form
- {
- public MainForm()
- {
- InitializeComponent();
- }
-
- //调用接口所需令牌
- string ACCESS_TOKEN = "";
- //应用的api key
- string API_KEY = "控制台查询";
- //应用的secret key
- string API_SECRET = "控制台查询";
- //存储识别结果
- string resultText = "";
-
- private void MainForm_Load(object sender, EventArgs e)
- {
- getAccessToken();
- }
-
- //获取AccessToken
- private void getAccessToken()
- {
- String authHost = "https://aip.baidubce.com/oauth/2.0/token";
- HttpClient client = new HttpClient();
- List<KeyValuePair<String, String>> paraList = new List<KeyValuePair<string, string>>();
- paraList.Add(new KeyValuePair<string, string>("grant_type", "client_credentials"));
- paraList.Add(new KeyValuePair<string, string>("client_id", API_KEY));
- paraList.Add(new KeyValuePair<string, string>("client_secret", API_SECRET));
-
- HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
- String result = response.Content.ReadAsStringAsync().Result;
-
- //解析返回的json结果
- Dictionary<string, string> res = JsonConvert.DeserializeObject<Dictionary<string, string>>(result);
- res.TryGetValue("access_token", out ACCESS_TOKEN);
- }
-
- //处理识别功能被用户触发
- private void doRecognize()
- {
- IDataObject iData = Clipboard.GetDataObject();
- //只处理复制内容是图片的情况
- if (iData.GetDataPresent(DataFormats.Bitmap))
- {
- var image = Clipboard.GetImage();
- MemoryStream ms = new MemoryStream();
- image.Save(ms, System.Drawing.Imaging.ImageFormat.Png);
- byte[] arr = new byte[ms.Length];
- ms.Position = 0;
- ms.Read(arr, 0, (int)ms.Length);
- string base64 = Convert.ToBase64String(arr);
- try
- {
- recognize(base64);
- //把识别结果复制到剪切板
- Clipboard.SetText(resultText);
- }
- catch (Exception ex)
- {
- resultText = "异常:" + ex.ToString();
- }
- }
- }
-
- //调用api识别图片中的文字
- private void recognize(string base64)
- {
- string host = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=" + ACCESS_TOKEN;
- Encoding encoding = Encoding.Default;
- HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host);
- request.Method = "post";
- request.KeepAlive = true;
- // 图片的base64编码
- String str = "image=" + HttpUtility.UrlEncode(base64);
- byte[] buffer = encoding.GetBytes(str);
- request.ContentLength = buffer.Length;
- request.GetRequestStream().Write(buffer, 0, buffer.Length);
- HttpWebResponse response = (HttpWebResponse)request.GetResponse();
- StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
- string result = reader.ReadToEnd();
- //解析json太麻烦直接正则获取结果
- string pattern = "\"words\":\"(.*?)\"";
- StringBuilder sb = new StringBuilder();
- foreach (Match match in Regex.Matches(result, pattern))
- {
- sb.AppendLine(match.Groups[1].ToString());
- }
- resultText = sb.ToString();
- Console.WriteLine(resultText);
- }
-
- }
- }
此时可以在窗口里添加个按钮测试一下doRecognize识别功能,用截图工具截个图,再点按钮,可以看到他已经识别完成并且给我们把结果复制到剪切板了
三、监听快捷键按下
我想实现当按下alt+3的时候执行doReconize函数,需要全局hook键盘。
首先在项目里创建类文件Hook.cs,然后编辑内容
Hook.cs
- using System;
- using System.Collections.Generic;
- using System.Diagnostics;
- using System.Linq;
- using System.Runtime.InteropServices;
- using System.Text;
- using System.Threading.Tasks;
- using System.Windows.Forms;
-
- namespace TextRecognition
- {
- class Hook
- {
- [DllImport("user32.dll")]
- public static extern int SetWindowsHookEx(int idHook, HookProc lpfn, IntPtr hInstance, int threadId);
-
- [DllImport("user32.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
- public static extern bool UnhookWindowsHookEx(int idHook);
-
- [DllImport("user32.dll")]
- public static extern int CallNextHookEx(int idHook, int nCode, int wParam, IntPtr lParam);
-
- [DllImport("kernel32.dll")]
- public static extern int GetCurrentThreadId();
-
- [DllImport("kernel32.dll")]
- public static extern IntPtr GetModuleHandle(string name);
-
- [DllImport("User32.dll")]
- public static extern void keybd_event(Byte bVk, Byte bScan, Int32 dwFlags, Int32 dwExtraInfo);
-
- //键盘hook
- public const int WH_KEYBOARD_LL = 13;
- public delegate int HookProc(int nCode, int wParam, IntPtr lParam);
-
- //回调
- HookProc KeyBoardHookProcedure;
-
- public int hHook;
-
- //hook到的消息结构
- [StructLayout(LayoutKind.Sequential)]
- public class KeyBoardHookStruct
- {
- public int vkCode;
- public int scanCode;
- public int flags;
- public int time;
- public int dwExtraInfo;
- }
-
- //开启hook
- public void Hook_Start()
- {
- KeyBoardHookProcedure = new HookProc(KeyBoardHookProc);
- hHook = SetWindowsHookEx(WH_KEYBOARD_LL, KeyBoardHookProcedure, GetModuleHandle(Process.GetCurrentProcess().MainModule.ModuleName), 0);
- }
-
- //关闭hook
- public void Hook_Clear()
- {
- bool retKeyboard = true;
- retKeyboard = UnhookWindowsHookEx(hHook);
- hHook = 0;
- }
-
- //键盘hook到之后的操作
- private int KeyBoardHookProc(int nCode, int wParam, IntPtr lParam)
- {
- if (nCode >= 0)
- {
- KeyBoardHookStruct kbh = (KeyBoardHookStruct)Marshal.PtrToStructure(lParam, typeof(KeyBoardHookStruct));
- //如果按下alt + 3
- if (kbh.vkCode == (int)Keys.D3 && (int)Control.ModifierKeys == (int)Keys.Alt)
- {
- Console.WriteLine("按了alt+3");
- //键盘消息不再往下传递
- return 1;
- }
- }
- return CallNextHookEx(hHook, nCode, wParam, lParam);
- }
- }
- }
再来看一下控制台输出,发现可以获取到按下的事件了,如果想换成别的组合键可以上网搜一下该怎么写。
四、按键触发doRecognize函数
我们按下alt+3被hook到之后,要调用MainForm.cs里定义的doRecognize函数。当时学的时候没学太多,只记得个委托什么什么的可以实现这样的功能,我就用这个委托实现了。当然也有别的方式,比如直接抽离成工具类什么的。
我讲不大清楚,大家既然都用c#了应该比我懂得多,我就直接放代码了。
Hook.cs
- using System;
- using System.Collections.Generic;
- using System.Diagnostics;
- using System.Linq;
- using System.Runtime.InteropServices;
- using System.Text;
- using System.Threading.Tasks;
- using System.Windows.Forms;
-
- namespace 文字识别
- {
- class KeyHooks
- {
- //以下只展示新增or修改的部分
- public delegate void recHandler(object sender, EventArgs e);
-
- public event recHandler handler;
-
- //键盘hook到之后的操作
- private int KeyBoardHookProc(int nCode, int wParam, IntPtr lParam)
- {
- if (nCode >= 0)
- {
- KeyBoardHookStruct kbh = (KeyBoardHookStruct)Marshal.PtrToStructure(lParam, typeof(KeyBoardHookStruct));
- if (kbh.vkCode == (int)Keys.D3
- && (int)Control.ModifierKeys == (int)Keys.Alt
- && this.handler != null)
- {
- Console.WriteLine("按了alt+3");
- this.handler(this, new EventArgs());
- return 1;
- }
- }
- return CallNextHookEx(hHook, nCode, wParam, lParam);
- }
- }
- }
MainForm.cs
- using System;
- using System.Collections.Generic;
- using System.Net.Http;
- using System.Text;
- using System.Windows.Forms;
- using Newtonsoft.Json;
- using System.Text.RegularExpressions;
- using System.IO;
- using System.Net;
- using System.Web;
-
- namespace TextRecognition
- {
- public partial class MainForm : Form
- {
- //也只展示新增、修改部分
- private void MainForm_Load(object sender, EventArgs e)
- {
- getAccessToken();
- hook.Hook_Start();
- hook.handler += new Hook.recHandler(handleRec);
- }
-
- public void handleRec(object sender, EventArgs e)
- {
- doRecognize();
- }
- }
- }
五、优化
我想让他一直在后台,别给我一启动蹦出一个窗口,我还要避免这个程序重复运行。
1.设置后台运行
首先我们把NotifyIcon这个组件拖入到窗口里,再拖个ContextMenuStrip来配置右键展示的菜单
设置一下NotifyIcon的icon,随便找个xxx.ico文件吧,然后修改Text属性,再把ContextMenuStrip属性修改为刚添加的那个context menu strip,然后这个就能在右下角看到了(这个b水印真烦啊)
再编辑一下ContextMenuStrip,我创建了复制上一次识别结果和退出两个选项,双击他们给他们添加点击事件
- private void copyMenuItem_Click(object sender, EventArgs e)
- {
- Clipboard.SetText(resultText);
- }
-
- private void exitMenuItem_Click(object sender, EventArgs e)
- {
- this.Close();
- }
再让窗体隐藏,将主窗体的ShowlnTaskbar属性设为 False,将其 WindowState属性设为 Minimized,程序即可变成后台运行程序。
2.防止重复开启
很简单,在Program.cs里写代码
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Threading.Tasks;
- using System.Windows.Forms;
-
- namespace TextRecognition
- {
- static class Program
- {
- /// <summary>
- /// 应用程序的主入口点。
- /// </summary>
- [STAThread]
- static void Main()
- {
- bool unique;
- Console.WriteLine(Application.ProductName);
- System.Threading.Mutex mutex = new System.Threading.Mutex(true, Application.ProductName, out unique);
- if (!unique)
- {
- MessageBox.Show("请勿重复运行此程序!", "错误", MessageBoxButtons.OK, MessageBoxIcon.Asterisk);
- return;
- }
- Application.EnableVisualStyles();
- Application.SetCompatibleTextRenderingDefault(false);
- Application.Run(new MainForm());
- }
- }
- }
ok了,运行一下玩玩把