破解滑动验证码最新版本(97%左右通过率)
一、简述:
最近无聊想搞一下极验的滑块验证码破解这块,发现破解js代码耗时又耗力出现版本更新可能以前的所有努力都要推翻重做,不够通用性,最后还是选用selenium + PIL 来实现滑块验证码的破解。
期间也翻阅过很多文章,大多都已经失效,并且缺口位置查找和模拟滑动轨迹成功率很低,很难应用到实际开发项目中,本次是针对最新版本的极验滑块验证码进行破解。
二、项目环境
大致需要用到以下模块各位看观请提前准备好:
python3.6、selenium、numpy、PIL、chromedriver
三、分析步骤以及代码编写
1.首先分析目标网站(本次主要以geetest官网滑块demo为参考)
网站大致长这个样子,首先f12打开 开发者工具选择Elements查看节点,发现最新版本的滑块图片是使用画布来进行呈现的,期间查阅大量文档,使用如下代码获得画布中的图片数据,获 取到的图片是base64进行编码的
1 document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png")
2.通过分析发现这两个画布放的是所需要的背景图和缺口图(其实一眼就看出来的)
3. 接下来就是代码的编写了
3.1 首先是获得背景图和缺口图的数据
1 def get_images(self): 2 """ 3 获取验证码图片 4 :return: 图片的location信息 5 """ 6 time.sleep(1) 7 self.browser.web_driver_wait_ruishu(10, "class", 'geetest_canvas_slice') 8 fullgb = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_bg geetest_' 9 'absolute")[0].toDataURL("image/png")')["value"] 10 11 bg = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_fullbg geetest_fade' 12 ' geetest_absolute")[0].toDataURL("image/png")')["value"] 13 return bg, fullgb
3.2 对数据进行解码操作并保存图片
1 def get_decode_image(self, filename, location_list): 2 """ 3 解码base64数据 4 """ 5 _, img = location_list.split(",") 6 img = base64.decodebytes(img.encode()) 7 new_im: image.Image = image.open(BytesIO(img)) 8 9 return new_im
3.3 接下来就是计算缺口位置了(这里使用的PIL中计算两张图片的差值获得缺口位置)
def compute_gap(self, img1, img2): """计算缺口偏移 这种方式成功率很高""" # 将图片修改为RGB模式 img1 = img1.convert("RGB") img2 = img2.convert("RGB") # 计算差值 diff = ImageChops.difference(img1, img2) # 灰度图 diff = diff.convert("L") # 二值化 diff = diff.point(self.table, '1') left = 43 # 这里做了优化为减少误差 纵坐标的像素点大于5时才认为是找到 # 防止缺口有凸起时有误差 for w in range(left, diff.size[0]): lis = [] for h in range(diff.size[1]): if diff.load()[w, h] == 1: lis.append(w) if len(lis) > 5: return w
3.4 当滑块的缺口位置找到以后就需要生成滑动轨迹(其中加20是保证在滑动时先超过缺口位置然后在慢慢还原到正确位置)
1 def ease_out_quart(self, x): 2 return 1 - pow(1 - x, 4) 3 4 def get_tracks_2(self, distance, seconds, ease_func): 5 """ 6 根据轨迹离散分布生成的数学 生成 # 参考文档 https://www.jianshu.com/p/3f968958af5a 7 成功率很高 90% 往上 8 :param distance: 缺口位置 9 :param seconds: 时间 10 :param ease_func: 生成函数 11 :return: 轨迹数组 12 """ 13 distance += 20 14 tracks = [0] 15 offsets = [0] 16 for t in np.arange(0.0, seconds, 0.1): 17 ease = ease_func 18 offset = round(ease(t / seconds) * distance) 19 tracks.append(offset - offsets[-1]) 20 offsets.append(offset) 21 tracks.extend([-3, -2, -3, -2, -2, -2, -2, -1, -0, -1, -1, -1]) 22 return tracks
3.5 最后也就是滑动滑块到缺口位置
1 def move_to_gap(self, track): 2 """移动滑块到缺口处""" 3 slider = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_slider_button'))) 4 ActionChains(self.browser).click_and_hold(slider).perform() 5 6 while track: 7 x = track.pop(0) 8 ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform() 9 time.sleep(0.02) 10 11 ActionChains(self.browser).release().perform()
贴出完整代码(注意selenium有些方法会被极验检测到所以使用js命令直接运行的方式来达到效果)
crack.py
1 # -*-coding:utf-8 -*- 2 import base64 3 import time 4 import functools 5 import numpy as np 6 7 from tools.selenium_spider import SeleniumSpider 8 9 from selenium.webdriver import ActionChains 10 from selenium.webdriver.support import expected_conditions as EC 11 from selenium.webdriver.support.ui import WebDriverWait 12 from selenium.webdriver.common.by import By 13 import PIL.Image as image 14 from PIL import ImageChops, PngImagePlugin 15 from io import BytesIO 16 17 18 class Crack(object): 19 """ 20 解决三代极验滑块验证码 21 """ 22 def __init__(self): 23 self.url = 'https://www.geetest.com' 24 self.browser = SeleniumSpider(path="/personalwork/personal_tools_project/adbtools/chromedriver", max_window=True) 25 self.wait = WebDriverWait(self.browser, 100) 26 self.BORDER = 8 27 self.table = [] 28 29 for i in range(256): 30 if i < 50: 31 self.table.append(0) 32 else: 33 self.table.append(1) 34 35 def open(self): 36 """ 37 打开浏览器,并输入查询内容 38 """ 39 self.browser.get(self.url) 40 self.browser.get(self.url + "/Sensebot/") 41 self.browser.web_driver_wait_ruishu(10, "class", 'experience--area') 42 time.sleep(1) 43 self.browser.execute_js('document.getElementsByClassName("experience--area")[0].getElementsByTagName("div")' 44 '[2].getElementsByTagName("ul")[0].getElementsByTagName("li")[1].click()') 45 46 time.sleep(1) 47 self.browser.web_driver_wait_ruishu(10, "class", 'geetest_radar_tip') 48 49 self.browser.execute_js('document.getElementsByClassName("geetest_radar_tip")[0].click()') 50 51 def check_status(self): 52 """ 53 检测是否需要滑块验证码 54 :return: 55 """ 56 self.browser.web_driver_wait_ruishu(10, "class", 'geetest_success_radar_tip_content') 57 try: 58 time.sleep(0.5) 59 message = self.browser.find_element_by_class_name("geetest_success_radar_tip_content").text 60 if message == "验证成功": 61 return False 62 else: 63 return True 64 except Exception as e: 65 return True 66 67 def get_images(self): 68 """ 69 获取验证码图片 70 :return: 图片的location信息 71 """ 72 time.sleep(1) 73 self.browser.web_driver_wait_ruishu(10, "class", 'geetest_canvas_slice') 74 fullgb = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_bg geetest_' 75 'absolute")[0].toDataURL("image/png")')["value"] 76 77 bg = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_fullbg geetest_fade' 78 ' geetest_absolute")[0].toDataURL("image/png")')["value"] 79 return bg, fullgb 80 81 def get_decode_image(self, filename, location_list): 82 """ 83 解码base64数据 84 """ 85 _, img = location_list.split(",") 86 img = base64.decodebytes(img.encode()) 87 new_im: PngImagePlugin.PngImageFile = image.open(BytesIO(img)) 88 # new_im.convert("RGB") 89 # new_im.save(filename) 90 91 return new_im 92 93 def compute_gap(self, img1, img2): 94 """计算缺口偏移 这种方式成功率很高""" 95 # 将图片修改为RGB模式 96 img1 = img1.convert("RGB") 97 img2 = img2.convert("RGB") 98 99 # 计算差值 100 diff = ImageChops.difference(img1, img2) 101 102 # 灰度图 103 diff = diff.convert("L") 104 105 # 二值化 106 diff = diff.point(self.table, '1') 107 108 left = 43 109 110 for w in range(left, diff.size[0]): 111 lis = [] 112 for h in range(diff.size[1]): 113 if diff.load()[w, h] == 1: 114 lis.append(w) 115 if len(lis) > 5: 116 return w 117 118 def ease_out_quad(self, x): 119 return 1 - (1 - x) * (1 - x) 120 121 def ease_out_quart(self, x): 122 return 1 - pow(1 - x, 4) 123 124 def ease_out_expo(self, x): 125 if x == 1: 126 return 1 127 else: 128 return 1 - pow(2, -10 * x) 129 130 def get_tracks_2(self, distance, seconds, ease_func): 131 """ 132 根据轨迹离散分布生成的数学 生成 # 参考文档 https://www.jianshu.com/p/3f968958af5a 133 成功率很高 90% 往上 134 :param distance: 缺口位置 135 :param seconds: 时间 136 :param ease_func: 生成函数 137 :return: 轨迹数组 138 """ 139 distance += 20 140 tracks = [0] 141 offsets = [0] 142 for t in np.arange(0.0, seconds, 0.1): 143 ease = ease_func 144 offset = round(ease(t / seconds) * distance) 145 tracks.append(offset - offsets[-1]) 146 offsets.append(offset) 147 tracks.extend([-3, -2, -3, -2, -2, -2, -2, -1, -0, -1, -1, -1]) 148 return tracks 149 150 def get_track(self, distance): 151 """ 152 根据物理学生成方式 极验不能用 成功率基本为0 153 :param distance: 偏移量 154 :return: 移动轨迹 155 """ 156 distance += 20 157 # 移动轨迹 158 track = [] 159 # 当前位移 160 current = 0 161 # 减速阈值 162 mid = distance * 3 / 5 163 # 计算间隔 164 t = 0.5 165 # 初速度 166 v = 0 167 168 while current < distance: 169 if current < mid: 170 # 加速度为正2 171 a = 2 172 else: 173 # 加速度为负3 174 a = -3 175 # 初速度v0 176 v0 = v 177 # 当前速度v = v0 + at 178 v = v0 + a * t 179 # 移动距离x = v0t + 1/2 * a * t^2 180 move = v0 * t + 0.5 * a * (t ** 2) 181 # 当前位移 182 current += move 183 # 加入轨迹 184 track.append(round(move)) 185 track.extend([-3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1]) 186 return track 187 188 def move_to_gap(self, track): 189 """移动滑块到缺口处""" 190 slider = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_slider_button'))) 191 ActionChains(self.browser).click_and_hold(slider).perform() 192 193 while track: 194 x = track.pop(0) 195 ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform() 196 time.sleep(0.02) 197 198 ActionChains(self.browser).release().perform() 199 200 def crack(self, n): 201 # 打开浏览器 202 self.open() 203 204 if self.check_status(): 205 # 保存的图片名字 206 bg_filename = 'bg.png' 207 fullbg_filename = 'fullbg.png' 208 209 # 获取图片 210 bg_location_base64, fullbg_location_64 = self.get_images() 211 212 # 根据位置对图片进行合并还原 213 bg_img = self.get_decode_image(bg_filename, bg_location_base64) 214 fullbg_img = self.get_decode_image(fullbg_filename, fullbg_location_64) 215 # 获取缺口位置 216 gap = self.compute_gap(fullbg_img, bg_img) 217 print('缺口位置', gap) 218 219 track = self.get_tracks_2(gap - self.BORDER, 1, self.ease_out_quart) 220 print("滑动轨迹", track) 221 print("滑动距离", functools.reduce(lambda x, y: x+y, track)) 222 self.move_to_gap(track) 223 224 time.sleep(1) 225 if not self.check_status(): 226 print('验证成功') 227 return True 228 else: 229 print('验证失败') 230 # 保存图片方便调试 231 bg_img.save(f"bg_img{n}.png") 232 fullbg_img.save(f"fullbg{n}.png") 233 return False 234 235 else: 236 print("验证成功") 237 return True 238 239 240 if __name__ == '__main__': 241 print('开始验证') 242 crack = Crack() 243 # crack.crack(0) 244 count = 0 245 for i in range(200): 246 if crack.crack(i): 247 count += 1 248 print(f"成功率:{count / 200 * 100}%")
selenium_spider.py
1 #!/usr/local/bin/python 2 # coding:utf-8 3 4 """ 5 @author: Liubing 6 @software: PyCharm 7 @file: selenium_spider.py 8 @time: 2019-03-11 13:46 9 @describe: 基于selenium版本进一步封装 只针对于谷歌浏览器 其他浏览器需要自己封装 10 """ 11 import json 12 import time as time_ 13 14 from lxml import etree 15 from selenium.webdriver.support.wait import WebDriverWait 16 from selenium.webdriver.support import expected_conditions as EC 17 from selenium.webdriver.chrome.webdriver import WebDriver 18 from selenium.webdriver.chrome.options import Options 19 20 21 class SeleniumSpider(WebDriver): 22 """基于selenium进一步封装""" 23 24 def __init__(self, path, params=None, max_window=False, *args, **kwargs): 25 """ 26 初始化 27 :param path: str selenium驱动路径 28 :param params: list driver 附加参数 29 :param args: tuple 30 :param kwargs: 31 """ 32 self.__path = path 33 self.__params = params 34 # 初始化 35 self.__options = Options() 36 self.__options.add_argument('--dns-prefetch-disable') 37 self.__options.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug 38 self.__options.add_argument('disable-infobars') # 隐藏"Chrome正在受到自动软件的控制" 39 # self.__options.add_argument('--headless') 40 self.is_maximize_window = max_window # 是否开启全屏模式 41 42 # 过检测 具体参考文档: https://juejin.im/post/5c62b6d5f265da2dab17ae3c 43 self.__options.add_experimental_option('excludeSwitches', ['enable-automation']) 44 45 if params: 46 for i in params: 47 self.__options.add_argument(i) 48 super(SeleniumSpider, self).__init__(executable_path=self.__path, options=self.__options, *args, **kwargs) 56 if self.is_maximize_window: 57 self.maximize_window() 58 59 # 规则部分 60 self.ID = "id" 61 self.XPATH = "xpath" 62 self.LINK_TEXT = "link text" 63 self.PARTIAL_LINK_TEXT = "partial link text" 64 self.NAME = "name" 65 self.TAG_NAME = "tag name" 66 self.CLASS_NAME = "class name" 67 self.CSS_SELECTOR = "css selector" 68 69 def cookies_dict_to_selenium_cookies(self, cookies: dict, domain): 70 """ 71 requests cookies 转换到 selenium cookies 72 :param cookies: requests cookies 73 :return: selenium 支持的cookies 74 """ 75 temp_cookies = [] 76 for key, value in cookies.items(): 77 # requests 有bug 域区分的不是很清楚 手动区分 只限全国电信接口能用 78 temp_cookies.append({"name": key, "value": value, "domain": domain}) 79 return temp_cookies 80 81 def get(self, url: str, cookies=None, domain=None): 82 """ 83 请求数据 84 :param url: 待请求的url 85 :param cookies: 添加cookies cookies 格式 [{"name": key, "value": value, "domain": domain},...] 86 :param domain: cookie作用域 87 :return: 88 """ 89 super().get(url) 90 if cookies: 91 # 执行 92 if type(cookies) == list: 93 for cookie in cookies: 94 if "name" in cookie.keys() and "value" in cookie.keys() and "domain" in cookie.keys(): 95 self.add_cookie(cookie) 96 else: 97 raise TypeError('cookies错误请传入正确格式[{"name": key, "value": value, "domain": domain},...' 98 '] 或者{key: vale,...}') 99 elif type(cookies) == dict: 100 if domain: 101 for i in self.cookies_dict_to_selenium_cookies(cookies, domain): 102 self.add_cookie(i) 103 else: 104 raise ValueError("{key:vale}格式必须传入doamin参数") 105 # 刷新页面 106 self.refresh() 107 108 def web_driver_wait(self, time: int, rule: str, num: str): 109 """ 110 页面等待 瑞数产品弃用这种方法 不然会400错误 111 :param time: 等待时间 112 :param rule: 规则 [id, xpath, link text, partial link text, name, tag name, class name, css selector] 113 :param num: 根据元素id 114 :return: 115 """ 116 WebDriverWait(self, time, 0.5).until( 117 EC.presence_of_element_located((rule, num))) 118 119 def web_driver_wait_ruishu(self, time: int, rule: str, num: str): 120 """ 121 笨方法 遍历页面匹配 122 :param time: 等待时间 123 :param rule: 规则 [id, class] 124 :param num: 根据元素id 125 :return: 126 """ 127 while time: 128 response = self.execute_js("document.documentElement.outerHTML") 129 try: 130 html = etree.HTML(text=response["value"]) 131 inp = html.xpath("//*[contains(@%s, '%s')]" % (rule, num)) 132 if inp: 133 break 134 except Exception as e: 135 continue 136 time_.sleep(1) 137 time -= 1 138 if not time: 139 raise Exception("未找到 %s" % num) 140 141 def execute_chrome_protocol_js(self, protocol, params: dict): 142 """ 143 Chrome DevTools 协议操作 具体协议请参考 https://chromedevtools.github.io/devtools-protocol/ 144 :param protocol: str 协议名称 145 :param params: dict 参数 146 :return: 147 """ 148 resource = "/session/%s/chromium/send_command_and_get_result" % self.session_id 149 command_executor = self.command_executor 150 url = command_executor._url + resource 151 body = json.dumps({'cmd': protocol, 'params': params}) 152 response = command_executor._request('POST', url, body) 153 if response['status']: 154 return response 155 return response["value"] 156 157 def execute_js(self, js): 158 """ 159 执行js 过瑞数检测 160 :param js: str 待执行的js 161 :return: {"type": "xxx", value: "xxx"} 162 """
163 response = self.executor_chrome_protocol_js('Runtime.evaluate', js) 164 if response['status']: 165 return response 166 return response["value"]["result"]
问题:对于windows下滑动卡顿导致不成功问题解决办法
修改源码文件 site-packages\selenium\webdriver\common\actions\pointer_input.py
将默认250改为30或者其他值就可以(不能太快)
最后以极验官网demo为例进行了200次的测试,准确率高达了99%。