模拟登录之图片验证码处理
登录图片验证码处理
案例:古诗文网登录https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx
1 import requests 2 from lxml.html.clean import etree 3 from fake_useragent import UserAgent 4 UA = UserAgent() 5 headers = {'User-Agent': UA.random, } 6 session=requests.Session()#为避免cookie影响,一般直接采用session请求 7 8 #验证码处理(机器识别,参考官方文档操作说明http://www.chaojiying.com/api-14.html---ABC123456,ABC123456) 9 import requests 10 from hashlib import md5 11 class Chaojiying_Client(object): 12 13 def __init__(self, username, password, soft_id): 14 self.username = username 15 password = password.encode('utf8') 16 self.password = md5(password).hexdigest() 17 self.soft_id = soft_id 18 self.base_params = { 19 'user': self.username, 20 'pass2': self.password, 21 'softid': self.soft_id, 22 } 23 self.headers = { 24 'Connection': 'Keep-Alive', 25 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)', 26 } 27 28 def PostPic(self, im, codetype): 29 """ 30 im: 图片字节 31 codetype: 题目类型 参考 http://www.chaojiying.com/price.html 32 """ 33 params = { 34 'codetype': codetype, 35 } 36 params.update(self.base_params) 37 files = {'userfile': ('ccc.jpg', im)} 38 r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers) 39 return r.json() 40 41 def ReportError(self, im_id): 42 """ 43 im_id:报错题目的图片ID 44 """ 45 params = { 46 'id': im_id, 47 } 48 params.update(self.base_params) 49 r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers) 50 return r.json() 51 def getCodeImgText(imgPath,imgType):#imgType 验证码类型 官方网站>>价格体系 52 chaojiying = Chaojiying_Client('bobo328410948', 'bobo328410948', '899370')#用户中心>>软件ID 生成一个替换 96001 53 im = open(imgPath, 'rb').read()#本地图片文件路径 来替换 a.jpg 有时WIN系统须要// 54 return chaojiying.PostPic(im,imgType)['pic_str'] 55 56 57 #二维码识别 58 url='https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx' 59 response=session.get(url,headers=headers) 60 tree=etree.HTML(response.text) 61 img_src='https://so.gushiwen.org/'+tree.xpath('//img[@id="imgCode"]/@src')[0] 62 print(img_src) 63 response_img=session.get(url=img_src,headers=headers) 64 with open('Code.jpg','wb')as f : 65 f.write(response_img.content) 66 code=getCodeImgText('Code.jpg',1004) 67 print(code) 68 #一般额外的参数隐藏在页面中 69 __VIEWSTATE=tree.xpath('//input[@id="__VIEWSTATE"]/@value')[0] 70 __VIEWSTATEGENERATOR=tree.xpath('//input[@id="__VIEWSTATEGENERATOR"]/@value')[0] 71 print(__VIEWSTATE) 72 print(__VIEWSTATEGENERATOR) 73 #请求参数 74 data = { 75 '__VIEWSTATE': __VIEWSTATE, 76 '__VIEWSTATEGENERATOR': __VIEWSTATEGENERATOR, 77 'from': 'http://so.gushiwen.org/user/collect.aspx', 78 'email':'934321247@qq.com', 79 'pwd': 'ABC123456', 80 'code': code, 81 'denglu': '登录', 82 } 83 #登录提交数据data 84 url = 'https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx' 85 response = session.post(url=url,data=data,headers=headers) 86 with open('index.html','w',encoding='utf-8')as f: 87 f.write(response.text) 88 # print(response.text) 89 text=etree.HTML(response.text).xpath('//div[@class="mainreg2"]') 90 for i in text: 91 print(i.xpath('./span[2]/text()'))