爬虫23-验证码识别

1.tesseract

import pytesseract
from PIL import Image
pytesseract.pytesseract.tesseract_cmd=r"H:\Python\Tesseract_dev20170510\Tesseract-OCR\tesseract.exe"

image=Image.open('a.png')

text=pytesseract.image_to_string(image)
print(text)

  

2.识别拉勾网图形验证码

#encoding: utf-8

import pytesseract
from urllib import request
from PIL import Image
import time

def main():
    pytesseract.pytesseract.tesseract_cmd = r"H:\Python\Tesseract_dev20170510\Tesseract-OCR\tesseract.exe"
    url = "https://passport.lagou.com/vcode/create?from=register&refresh=1513082291955"
    while True:
        request.urlretrieve(url,'captcha.png')
        image = Image.open('captcha.png')
        text = pytesseract.image_to_string(image)
        print(text)
        time.sleep(2)


if __name__ == '__main__':
    main()

  

posted @ 2020-03-15 21:09  胡辣汤王子  阅读(141)  评论(0编辑  收藏  举报