Python 实现公式图像识别转 Latex(Mathpix)
本文是文本OCR的后续,因为用到了公式识别,所以阅读了 Mathpix API 文档,编写了一份比较适合自己使用的公式识别的Python程序,具体代码如下,注意使用之前应当去 Mathpix 官网 申请开发者ID
和 KEY
其对应的是代码中的APP_ID
和APP_KEY
后的XXX
,在我的代码中加入了使用次数的限制,但是需要手动新建一个 ./count.txt
文件用于初始化使用次数,当然在个人信息页的 Usage
下也可以看到 API 的调用情况包括时间和次数。下面是代码实现,可以直接拷贝使用:
import os
import sys
import json
import time
import base64
import signal
import win32con
import winsound
import requests
from PIL import ImageGrab
import win32clipboard as wc
def set_clipboard(txt):
wc.OpenClipboard()
wc.EmptyClipboard()
wc.SetClipboardData(win32con.CF_UNICODETEXT, txt)
wc.CloseClipboard()
env = os.environ
default_headers = {
'app_id': env.get('APP_ID', 'XXX'),
'app_key': env.get('APP_KEY', 'XXX'),
'Content-type': 'application/json'
}
service = 'https://api.mathpix.com/v3/latex'
format_set = ["text",
"latex_simplified",
"latex_styled",
"mathml",
"asciimath",
"latex_list"]
format_set_output = ["latex_styled",
"latex_simplified",
"text"]
count = 0
def changeCount(number):
filehandle = open("./count.txt","w")
filehandle.write(str(number))
filehandle.close()
def getCount():
if not os.path.exists("./count.txt"):
return 0
else:
filehandle = open("./count.txt","r")
number = int(filehandle.read())
filehandle.close()
return number
def image_uri(filename):
image_data = open(filename, "rb").read()
return "data:image/jpg;base64," + base64.b64encode(image_data).decode()
def latex(args, headers=default_headers, timeout=30):
r = requests.post(service,
data=json.dumps(args), headers=headers, timeout=timeout)
return json.loads(r.text)
def sig_handler(signum, frame):
sys.exit(0)
""" 截图后,调用Mathpix 公式识别"""
def LatexOcrScreenshots(path="./",ifauto=False):
global count
if count >= 1000:
print("\nThe maximum number of uses has been reached!")
changeCount(count)
return
if not os.path.exists(path):
os.makedirs(path)
image = ImageGrab.grabclipboard()
if image != None:
count += 1
changeCount(count)
print("\rThe image has been obtained. Please wait a moment! ",end=" ")
filename = str(time.time_ns())
image.save(path+filename+".png")
txt = latex({
'src': image_uri(path+filename+".png"),
"ocr": ["math", "text"],
"skip_recrop": True,
"formats":format_set
})
os.remove(path+filename+".png")
have_obtain = False
for format_text in format_set_output:
if format_text in txt:
set_clipboard("$$\n"+txt[format_text]+"\n$$")
have_obtain = True
break;
if have_obtain == False:
set_clipboard("")
winsound.PlaySound('SystemAsterisk',winsound.SND_ASYNC)
return txt
else :
if not ifauto:
print("Count : ",count," Please get the screenshots by Shift+Win+S!",end="")
return ""
else:
print("\rCount : ",count," Please get the screenshots by Shift+Win+S!",end="")
def AutoOcrScreenshotsLatex():
global count
count = getCount()
signal.signal(signal.SIGINT, sig_handler)
signal.signal(signal.SIGTERM, sig_handler)
print("Count : ",count," Please get the screenshots by Shift+Win+S !",end="")
while(1):
try:
LatexOcrScreenshots(ifauto=True)
time.sleep(0.1)
except SystemExit:
print("\nLast Count : ",count)
changeCount(count)
return
else:
pass
finally:
pass
if __name__ == '__main__':
AutoOcrScreenshots()
可以看出其与百度API不同的地方是,直接使用网站POST便可以实现OCR内容的获取,具体获取的内容是由format_set
决定的,而输出的内容的优先级是由format_set_output
决定的。
任世事无常,勿忘初心