07 验证码处理
引入:
- 相关的门户网站在进行登录的时候,如果用户连续登录的次数超过3次或者5次的时候,就会在登录页中动态生成验证码。通过验证码达到分流和反爬的效果。
今日概要:
- 使用云打码平台识别验证码
云打码官网地址: http://www.yundama.com/
云打码使用流程:
注册:普通用户和开发者用户(两个都要注册)
登陆:
— 普通用户:查询余额
— 开发者用户:
- 创建一个软件:我的软件 --> 添加一个软件
- 下载示例代码:点击开发文档 -> 调用示例及最新的DLL -> 点击PythonHttp示例下载,即可下载
开发者用户下载PythonHttp示例使用:
1. 解压下载好的PythonHttp调用示例,文件中有包含的验证图片,YDMHTTPDemo2.x.py,YDMHTTPDemo2.x.p3 三个文件
2. 使用时,将验证图片 和 YDMHTTPDemo2.x.p3 内容导入到项目即可使用
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
1 import http.client, mimetypes, urllib, json, time, requests 2 3 ###################################################################### 4 5 class YDMHttp: 6 7 apiurl = 'http://api.yundama.com/api.php' 8 username = '' 9 password = '' 10 appid = '' 11 appkey = '' 12 13 def __init__(self, username, password, appid, appkey): 14 self.username = username 15 self.password = password 16 self.appid = str(appid) 17 self.appkey = appkey 18 19 def request(self, fields, files=[]): 20 response = self.post_url(self.apiurl, fields, files) 21 response = json.loads(response) 22 return response 23 24 def balance(self): 25 data = {'method': 'balance', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey} 26 response = self.request(data) 27 if (response): 28 if (response['ret'] and response['ret'] < 0): 29 return response['ret'] 30 else: 31 return response['balance'] 32 else: 33 return -9001 34 35 def login(self): 36 data = {'method': 'login', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey} 37 response = self.request(data) 38 if (response): 39 if (response['ret'] and response['ret'] < 0): 40 return response['ret'] 41 else: 42 return response['uid'] 43 else: 44 return -9001 45 46 def upload(self, filename, codetype, timeout): 47 data = {'method': 'upload', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'codetype': str(codetype), 'timeout': str(timeout)} 48 file = {'file': filename} 49 response = self.request(data, file) 50 if (response): 51 if (response['ret'] and response['ret'] < 0): 52 return response['ret'] 53 else: 54 return response['cid'] 55 else: 56 return -9001 57 58 def result(self, cid): 59 data = {'method': 'result', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid)} 60 response = self.request(data) 61 return response and response['text'] or '' 62 63 def decode(self, filename, codetype, timeout): 64 cid = self.upload(filename, codetype, timeout) 65 if (cid > 0): 66 for i in range(0, timeout): 67 result = self.result(cid) 68 if (result != ''): 69 return cid, result 70 else: 71 time.sleep(1) 72 return -3003, '' 73 else: 74 return cid, '' 75 76 def report(self, cid): 77 data = {'method': 'report', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid), 'flag': '0'} 78 response = self.request(data) 79 if (response): 80 return response['ret'] 81 else: 82 return -9001 83 84 def post_url(self, url, fields, files=[]): 85 for key in files: 86 files[key] = open(files[key], 'rb'); 87 res = requests.post(url, files=files, data=fields) 88 return res.text 89 90 ###################################################################### 91 92 # 用户名 (指的是普通用户的用户名和密码) 93 username = 'username' 94 95 # 密码 96 password = 'password' 97 98 # 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得! 99 appid = 1 100 101 # 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得! 102 appkey = '22cc5376925e9387a23cf797cb9ba745' 103 104 # 图片文件 105 filename = 'getimage.jpg' 106 107 # 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html 108 codetype = 1004 109 110 # 超时时间,秒 111 timeout = 60 112 113 # 检查 114 if (username == 'username'): 115 print('请设置好相关参数再测试') 116 else: 117 # 初始化 118 yundama = YDMHttp(username, password, appid, appkey) 119 120 # 登陆云打码 121 uid = yundama.login(); 122 print('uid: %s' % uid) 123 124 # 查询余额 125 balance = yundama.balance(); 126 print('balance: %s' % balance) 127 128 # 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果 129 cid, result = yundama.decode(filename, codetype, timeout); 130 print('cid: %s, result: %s' % (cid, result)) 131 132 ######################################################################
云打码平台处理验证码的实现流程:
代码展示:
爬取人人网登陆后的页面,需要处理验证
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | import http.client, mimetypes, urllib, json, time, requests ###################################################################### class YDMHttp: apiurl = 'http://api.yundama.com/api.php' username = '' password = '' appid = '' appkey = '' def __init__(self, username, password, appid, appkey): self.username = username self.password = password self.appid = str(appid) self.appkey = appkey def request(self, fields, files=[]): response = self.post_url(self.apiurl, fields, files) response = json.loads(response) return response def balance(self): data = { 'method' : 'balance' , 'username' : self.username, 'password' : self.password, 'appid' : self.appid, 'appkey' : self.appkey} response = self.request(data) if (response): if (response[ 'ret' ] and response[ 'ret' ] < 0): return response[ 'ret' ] else : return response[ 'balance' ] else : return -9001 def login(self): data = { 'method' : 'login' , 'username' : self.username, 'password' : self.password, 'appid' : self.appid, 'appkey' : self.appkey} response = self.request(data) if (response): if (response[ 'ret' ] and response[ 'ret' ] < 0): return response[ 'ret' ] else : return response[ 'uid' ] else : return -9001 def upload(self, filename, codetype, timeout): data = { 'method' : 'upload' , 'username' : self.username, 'password' : self.password, 'appid' : self.appid, 'appkey' : self.appkey, 'codetype' : str(codetype), 'timeout' : str(timeout)} file = { 'file' : filename} response = self.request(data, file) if (response): if (response[ 'ret' ] and response[ 'ret' ] < 0): return response[ 'ret' ] else : return response[ 'cid' ] else : return -9001 def result(self, cid): data = { 'method' : 'result' , 'username' : self.username, 'password' : self.password, 'appid' : self.appid, 'appkey' : self.appkey, 'cid' : str(cid)} response = self.request(data) return response and response[ 'text' ] or '' def decode(self, filename, codetype, timeout): cid = self.upload(filename, codetype, timeout) if (cid > 0): for i in range(0, timeout): result = self.result(cid) if (result != '' ): return cid, result else : time.sleep(1) return -3003, '' else : return cid, '' def report(self, cid): data = { 'method' : 'report' , 'username' : self.username, 'password' : self.password, 'appid' : self.appid, 'appkey' : self.appkey, 'cid' : str(cid), 'flag' : '0' } response = self.request(data) if (response): return response[ 'ret' ] else : return -9001 def post_url(self, url, fields, files=[]): for key in files: files[key] = open(files[key], 'rb' ); res = requests.post(url, files=files, data=fields) return res.text def get_code_text(codeType,imgPath): # 用户名 (指的是普通用户的用户名和密码) username = 'mwhshare' # 密码 password = 'mwh@4598105' # 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得! appid = 6596 # 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得! appkey = '515bcabfb89e3a824619a1d1c8b25f36' # 图片文件 filename = imgPath # 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html codetype = codeType # 超时时间,秒 timeout = 20 # 检查 if (username == 'username' ): print( '请设置好相关参数再测试' ) else : # 初始化 yundama = YDMHttp(username, password, appid, appkey) # 登陆云打码 uid = yundama.login(); print( 'uid: %s' % uid) # 查询余额 balance = yundama.balance(); print( 'balance: %s' % balance) # 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果 cid, result = yundama.decode(filename, codetype, timeout); print( 'cid: %s, result: %s' % (cid, result)) return result import requests from lxml import etree from urllib import request # 获取一个session对象 session = requests.session() #session对象和requests作用几乎一样,都可以进行请求的发送,并且请求发送的方式也是一致的, #session进行请求的发送,如果会产生cookie的话,则cookie会自动被存储到session对象中 # 1. 获取人人网验证码图片: headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' } url = 'http://www.renren.com/' page_text = requests. get (url=url,headers=headers).text #将验证码图片解析出来且进行持久化存储 tree = etree.HTML(page_text) # 拿到当前人人网验证码图片的路径 code_img_src = tree.xpath( '//*[@id="verifyPic_login"]/@src' )[0] # 将图片储存到当前文件下 request.urlretrieve(url=code_img_src,filename= "./code.jpg" ) code = get_code_text(2004, './code.jpg' ) # print(code) # 模拟登陆 #拿到登陆所对应的url login_url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2019031446506 ' data = { "email" : "1696755793@qq.com" , "icod" :code, "origURL" : "http://www.renren.com/home" , "domain" : "renren.com" , "key_id" : "1" , "captcha_type" : "web_login" , "password" : "4f1d552d7dc5ba646e93e653da9b06e5a24dceda905323a830e19f6352ae8bc0" , "rkey" : "9e75e8dc3457b14c55a74627fa64fb43" , "f" : "" } #进行登录,当登录成功之后,可以获取cookie #cookie就会被存储到session中 response = session.post(url=login_url,headers=headers,data=data) #对登录成功后对应的当前用户的个人详情页进行请求发送 detail_url = 'http://www.renren.com/969393866/profile' #该次get请求使用的是session对象,该请求已经携带了cookie page_text = session. get (url=detail_url,headers=headers).text with open( "./renren.html" , "w" ,encoding= "utf-8" ) as fp: fp.write(page_text) print( "下载完毕" ) |
分类:
爬虫之从入门到放弃
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 为什么说在企业级应用开发中,后端往往是效率杀手?
· 用 C# 插值字符串处理器写一个 sscanf
· Java 中堆内存和栈内存上的数据分布和特点
· 开发中对象命名的一点思考
· .NET Core内存结构体系(Windows环境)底层原理浅谈
· 为什么说在企业级应用开发中,后端往往是效率杀手?
· DeepSeek 解答了困扰我五年的技术问题。时代确实变了!
· 本地部署DeepSeek后,没有好看的交互界面怎么行!
· 趁着过年的时候手搓了一个低代码框架
· 推荐一个DeepSeek 大模型的免费 API 项目!兼容OpenAI接口!