实战破解百度旋转验证码
1、效果演示
2、如何破解
2.1准备数据集
首先需要使用爬虫,对验证码图片进行采集,尽量每一种类型都要采集到。
2.2图像矫正
接下来对采集的数据进行人工校正
2.3数据清洗
(1)对数据进行进行旋转,达到增加数据量的目的。
(2)对数据进行灰度化处理,将三维图片降为二维。
(3)对图片大小进行resize,可以提高训练速度。
2.4划分训练集与测试集
一般训练集占数据量的80%,测试集占总数据量的20%,当然也可以根据自己的情况调整比例。
2.5训练模型
这里可以使用CNN神经网络模型进行训练,效果非常不错
2.6实战测试
下面直接上代码。其中的滑动系数可能需要自行调整,这个变动不会太频繁,可能几个月某度变一次。
__author__ = "detayun" import os import sys import time import base64 import requests from PIL import Image from io import BytesIO sys.path.append(os.path.abspath(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))) from selenium import webdriver from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver import ActionChains #PIL图片保存为base64编码 def PIL_base64(img, coding='utf-8'): img_format = img.format if img_format == None: img_format = 'JPEG' format_str = 'JPEG' if 'png' == img_format.lower(): format_str = 'PNG' if 'gif' == img_format.lower(): format_str = 'gif' if img.mode == "P": img = img.convert('RGB') if img.mode == "RGBA": format_str = 'PNG' img_format = 'PNG' output_buffer = BytesIO() # img.save(output_buffer, format=format_str) img.save(output_buffer, quality=100, format=format_str) byte_data = output_buffer.getvalue() base64_str = 'data:image/' + img_format.lower() + ';base64,' + base64.b64encode(byte_data).decode(coding) return base64_str # 根据链接下载旋转图片 def get_img(url): header = { "Host": "passport.baidu.com", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0", "Accept": "image/avif,image/webp,*/*", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding": "gzip, deflate, br", "Referer": "https://wappass.baidu.com/", "Connection": "keep-alive", "Cookie": 'Hm_lvt_3eecc7feff77952670b7c24e952e8773=1666849322,1666919008,1666961940,1667175865; Hm_lpvt_3eecc7feff77952670b7c24e952e8773=1667186488; token="MTY2NzE4NzczNS4yMTEzMjg1OmQwNDNhNmZiZTA4MjlmOGY1YjE0MjA0NmViN2M1NTdkM2MyYWY3NzE="; sessionid=aa6zibdmfbs5cwzh6x62niw7fbqe5pon', "Sec-Fetch-Dest": "image", "Sec-Fetch-Mode": "no-cors", "Sec-Fetch-Site": "same-site", "Pragma": "no-cache", "Cache-Control": "no-cache", } response = requests.get(url=url,headers=header) if response.status_code == 200: img = Image.open(BytesIO(response.content)) # 将图片转换成base64字符串并返回 return PIL_base64(img) # 识别 def shibie(base64_img): url = "http://www.detayun.cn/tool/verify_code_identify/" header = { "Host": "www.detayun.cn", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0", "Accept": "application/json, text/javascript, */*; q=0.01", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Referer": "http://www.detayun.cn/tool/verifyCodeIdentifyPage/?verify_idf_id=9", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "X-Requested-With": "XMLHttpRequest", "Content-Length": "134652", "Origin": "http://www.detayun.cn", "Connection": "keep-alive", "Cookie": 'Hm_lvt_3eecc7feff77952670b7c24e952e8773=1688628385,1688694584,1688743439,1688889515; _ga_CD35DZJ728=GS1.1.1677340409.5.1.1677340416.0.0.0; _ga=GA1.1.1572230966.1677226494; token="MTY4ODg5MzEwOC42MTI2NDk3OjY0ZTk0YWI1NTg3MWFmMDhkOTg3ZmIxZGQxMGIwYzIwZjBlNTRhODE="; sessionid=m8k9lxbpc3pzx2nbuylrj9llft7yqir6; Hm_lpvt_3eecc7feff77952670b7c24e952e8773=1688889515', "Pragma": "no-cache", "Cache-Control": "no-cache", } data = { 'verify_idf_id':'16', 'img_base64':base64_img, 'words':'', } response = requests.post(url=url,headers=header,data=data) if response.json()['code'] == 401: print('请登录识别账号,更新代码中的Cookie。登录地址:http://www.detayun.cn/account/loginPage/') return return int(str(response.json()['data']['res_str']).replace('顺时针旋转','').replace('度','')) if __name__ == '__main__': options = webdriver.ChromeOptions() driver = webdriver.Chrome(executable_path='.\webdriver\chromedriver.exe', options=options) # 访问百度首页 driver.get('https://wappass.baidu.com/static/captcha/tuxing.html?&ak=c27bbc89afca0463650ac9bde68ebe06&backurl=https%3A%2F%2Fwww.baidu.com%2Fs%3Fcl%3D3%26tn%3Dbaidutop10%26fr%3Dtop1000%26wd%3D%25E6%25B6%2588%25E9%2598%25B2%25E6%2588%2598%25E5%25A3%25AB%25E8%25BF%259E%25E5%25A4%259C%25E7%25AD%2591%25E5%259D%259D%25E5%25BA%2594%25E5%25AF%25B9%25E6%25B4%25AA%25E5%25B3%25B0%25E8%25BF%2587%25E5%25A2%2583%26rsv_idx%3D2%26rsv_dl%3Dfyb_n_homepage%26hisfilter%3D1&logid=8309940529500911554&signature=4bce59041938b160b7c24423bde0b518×tamp=1624535702') # 等待滑块出现 WebDriverWait(driver, 10).until(lambda x: x.find_element_by_xpath('//div[@class="passMod_slide-btn "]')) yzm_button = driver.find_element_by_xpath('//div[@class="passMod_slide-btn "]') time.sleep(1) move_x = 100 # 等待验证码出现 WebDriverWait(driver, 10).until(lambda x: x.find_element_by_xpath('//img[@class="passMod_spin-background"]')) img_src = driver.find_element_by_xpath('//img[@class="passMod_spin-background"]').get_attribute('src') # 下载图片并转化为base64 img_base64 = get_img(img_src) # 识别图片旋转角度 move_x = shibie(img_base64) # 通过旋转角度 * 滑动系数 = 滑动距离 move_x = move_x * 0.66 # 开始滑动 action = ActionChains(driver) action.click_and_hold(yzm_button).perform() # 鼠标左键按下不放 action.move_by_offset(move_x, 0).perform() action.release().perform() # 释放鼠标 time.sleep(2) # 第二次滑动 # 等待滑块出现 WebDriverWait(driver, 10).until(lambda x: x.find_element_by_xpath('//div[@class="passMod_slide-btn "]')) yzm_button = driver.find_element_by_xpath('//div[@class="passMod_slide-btn "]') time.sleep(1) move_x = 100 # 等待验证码出现 WebDriverWait(driver, 10).until(lambda x: x.find_element_by_xpath('//img[@class="passMod_spin-background"]')) img_src = driver.find_element_by_xpath('//img[@class="passMod_spin-background"]').get_attribute('src') # 下载图片并转化为base64 img_base64 = get_img(img_src) # 识别图片旋转角度 move_x = shibie(img_base64) # 通过旋转角度 * 滑动系数 = 滑动距离 move_x = move_x * 0.66 # 开始滑动 action = ActionChains(driver) action.click_and_hold(yzm_button).perform() # 鼠标左键按下不放 action.move_by_offset(move_x, 0).perform() action.release().perform() # 释放鼠标
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术