验证码识别
验证码识别
- 基于线上的大码平台识别验证码
- 打码平台 :
- 超级鹰(常用)先注册(用户中心的身份)-->登录(用户中心) 官网:http://www.chaojiying.com/about.html
- 云打码
- 打码兔
超级鹰的使用流程
- 注册登录
- 查询余额,请充值
- 创建一个软件ID
- 下载一个示例代码
- 需要将验证码下载到本地,然后才能识别
注册然后登录:
充值1元钱有1000积分,1000积分大概可以使用100多次。
生成软件ID
开发文档中下载实例代码:
下载后修改代码
#!/usr/bin/env python # coding:utf-8 import requests from hashlib import md5 class Chaojiying_Client(object): def __init__(self, username, password, soft_id): self.username = username password = password.encode('utf8') self.password = md5(password).hexdigest() self.soft_id = soft_id self.base_params = { 'user': self.username, 'pass2': self.password, 'softid': self.soft_id, } self.headers = { 'Connection': 'Keep-Alive', 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)', } def PostPic(self, im, codetype): """ im: 图片字节 codetype: 题目类型 参考 http://www.chaojiying.com/price.html """ params = { 'codetype': codetype, } params.update(self.base_params) files = {'userfile': ('ccc.jpg', im)} r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers) return r.json() def ReportError(self, im_id): """ im_id:报错题目的图片ID """ params = { 'id': im_id, } params.update(self.base_params) r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers) return r.json() def tranformImgCode(imgPath, imgType): chaojiying = Chaojiying_Client('username', 'password', 'ID') # 用户中心>>软件ID 生成一个替换 96001 im = open(imgPath, 'rb').read() # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要// return chaojiying.PostPic(im, imgType) # 1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加() print(tranformImgCode('./a.jpg', 1902))
实例
模拟登录:古诗文网 地址:https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx
模拟登录流程:
对点击登录按钮对应的请求进行发送(post请求),需要处理请求参数:用户名、密码、验证码、其他防伪参数
login.py
#!/usr/bin/env python # -*- coding: utf-8 -*- # author: 青城子 # datetime: 2021/7/17 15:39 # ide: PyCharm import requests from lxml import etree import chaojiying headers = { "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36', } session = requests.Session() # 识别验证码 # 使用超级鹰需要将验证码下载到本地 url = "https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx" page_text = session.get(url=url, headers=headers).text # 解析验证码图片的地址 tree = etree.HTML(page_text) img_src = "https://so.gushiwen.cn" + tree.xpath('//*[@id="imgCode"]/@src')[0] # 验证码图片 # 将验证码图片保存到本地 img_data = session.get(url=img_src, headers=headers).content with open("./code.jpg", "wb") as fp: fp.write(img_data) # 识别验证码 code_text = chaojiying.tranformImgCode("./code.jpg", 1902) print(code_text) login_url = "https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx" data = { "__VIEWSTATE": "bRb / aAzb7 + I + v8qJnjb / achUMrSTjgyK6TLoWB6wq4yEHtY / V1UnC1ckOq3rA / wkZRbfstT0lJHqG4RYQkykXTFV7eln2H + XkcN807hkVoueVgvuWernzqA8UeA =", "__VIEWSTATEGENERATOR": "C93BE1AE", "from": "http: // so.gushiwen.cn / user / collect.aspx", "email": "xxxxxxx@qq.com", "pwd": "xxxx", "code": code_text.get("pic_str"), # 验证码动态变化 "denglu": "登录", } # 对点击登录按钮发起请求:获取了登录成功后对应的页面源码数据 page_text_login = session.post(url=login_url, headers=headers, data=data).text with open("./gushiwend.html", "w", encoding="utf-8") as fp: fp.write(page_text_login)
chaojiying.py
超级鹰验证码
#!/usr/bin/env python # coding:utf-8 import requests from hashlib import md5 class Chaojiying_Client(object): def __init__(self, username, password, soft_id): self.username = username password = password.encode('utf8') self.password = md5(password).hexdigest() self.soft_id = soft_id self.base_params = { 'user': self.username, 'pass2': self.password, 'softid': self.soft_id, } self.headers = { 'Connection': 'Keep-Alive', 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)', } def PostPic(self, im, codetype): """ im: 图片字节 codetype: 题目类型 参考 http://www.chaojiying.com/price.html """ params = { 'codetype': codetype, } params.update(self.base_params) files = {'userfile': ('ccc.jpg', im)} r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers) return r.json() def ReportError(self, im_id): """ im_id:报错题目的图片ID """ params = { 'id': im_id, } params.update(self.base_params) r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers) return r.json() def tranformImgCode(imgPath, imgType): chaojiying = Chaojiying_Client('username', 'password', 'ID') # 用户中心>>软件ID 生成一个替换 96001 im = open(imgPath, 'rb').read() # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要// return chaojiying.PostPic(im, imgType) # 1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加()