Python 之pytesseract模块读取知乎验证码案例
import pytesseract from PIL import Image import requests import time # 获取只会验证码图片并保存为本地 def get_data_request(): headers = { "User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0" } captcha_url = 'https://www.zhihu.com/captcha.gif?r=%d&type=login' % (time.time() * 1000) try: response = requests.get(captcha_url, headers=headers) try: img_name = "./captcha.png" with open(img_name, "wb") as f: f.write(response.content) return img_name except IOError as e: print(e) except ConnectionError as e: print(e) # 读取图片内容返回 def read_captcha(img_url): image = Image.open(img_url) text = pytesseract.image_to_string(image) return text def main(): img = get_data_request() read_data = read_captcha(img) print(read_data) if __name__ == '__main__': main()
结果如图: