手工验证码识别

from builtins import input,open,print

import requests
from bs4 import BeautifulSoup


headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    }


def download_code(s):
    url='https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'

    r = s.get(url=url,headers=headers)
    soup = BeautifulSoup(r.text,'lxml')
    image_src = 'https://so.gushiwen.org'+soup.find('img',id='imgCode')['src']
    __VIEWSTATE = soup.find('input',id="__VIEWSTATE")['value']
    __VIEWSTATEGENERATOR = soup.find('input',id="__VIEWSTATEGENERATOR")['value']
    code_img =s.get(image_src,headers=headers)
    with open('code_img.png','wb')  as fp:
        fp.write(code_img.content)

    from PIL import Image
    import matplotlib.pyplot as plt
    img = Image.open('code_img.png')
    plt.figure('pic')
    plt.imshow(img)
    plt.axis('off') # 关闭坐标
    plt.show()

    return __VIEWSTATE, __VIEWSTATEGENERATOR


def login(view,viewg,session):
    post_url='https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx'
    # 提示用户输入验证码
    code=input('请输入验证码:')
    print(view)
    print(viewg)
    print(session)
    formdata = {
        '__VIEWSTATE':view,
        '__VIEWSTATEGENERATOR':viewg,
        'from':'http://so.gushiwen.org/user/collect.aspx',
        'email':'111',
        'pwd':'222',
        'code':code,
        'denglu':'登录',
    }
    r=session.post(url=post_url,headers=headers,data=formdata)
    # print(r.status_code)
    with open('gushi.html','w',encoding='utf8') as fp:
        fp.write(r.text)


def main():
    s=requests.Session()
    # 下载验证码到本地
    view,viewg=download_code(s)
    login(view,viewg,s)


if __name__ == '__main__':
    main()
posted @ 2021-08-29 18:57  该显示昵称已被使用了  阅读(47)  评论(0编辑  收藏  举报