Python爬虫学习笔记之极限滑动验证码的识别
代码:
1 import time 2 from io import BytesIO 3 from PIL import Image 4 from selenium import webdriver 5 from selenium.webdriver import ActionChains 6 from selenium.webdriver.common.by import By 7 from selenium.webdriver.support.ui import WebDriverWait 8 from selenium.webdriver.support import expected_conditions as EC 9 10 EMAIL = '' # 邮箱 密码需要自己注册 11 PASSWORD = '' 12 BORDER = 6 13 INIT_LEFT = 60 14 15 16 class CrackGeetest(): 17 def __init__(self): 18 self.url = 'https://account.geetest.com/login' 19 self.browser = webdriver.Chrome() 20 self.wait = WebDriverWait(self.browser, 20) 21 self.email = EMAIL 22 self.password = PASSWORD 23 24 def __del__(self): 25 self.browser.close() 26 27 def get_geetest_button(self): 28 """ 29 获取初始验证按钮 30 :return: 31 """ 32 button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_radar_tip'))) 33 return button 34 35 def get_position(self): 36 """ 37 获取验证码位置 38 :return: 验证码位置元组 39 """ 40 img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_img'))) 41 time.sleep(2) 42 location = img.location 43 size = img.size 44 top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[ 45 'width'] 46 return (top, bottom, left, right) 47 48 def get_screenshot(self): 49 """ 50 获取网页截图 51 :return: 截图对象 52 """ 53 screenshot = self.browser.get_screenshot_as_png() 54 screenshot = Image.open(BytesIO(screenshot)) 55 return screenshot 56 57 def get_slider(self): 58 """ 59 获取滑块 60 :return: 滑块对象 61 """ 62 slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_slider_button'))) 63 return slider 64 65 def get_geetest_image(self, name='captcha.png'): 66 """ 67 获取验证码图片 68 :return: 图片对象 69 """ 70 top, bottom, left, right = self.get_position() 71 print('验证码位置', top, bottom, left, right) 72 screenshot = self.get_screenshot() 73 captcha = screenshot.crop((left, top, right, bottom)) 74 captcha.save(name) 75 return captcha 76 77 def open(self): 78 """ 79 打开网页输入用户名密码 80 :return: None 81 """ 82 self.browser.get(self.url) 83 email = self.wait.until(EC.presence_of_element_located((By.ID, 'email'))) 84 password = self.wait.until(EC.presence_of_element_located((By.ID, 'password'))) 85 email.send_keys(self.email) 86 password.send_keys(self.password) 87 88 def get_gap(self, image1, image2): 89 """ 90 获取缺口偏移量 91 :param image1: 不带缺口图片 92 :param image2: 带缺口图片 93 :return: 94 """ 95 left = 60 96 for i in range(left, image1.size[0]): 97 for j in range(image1.size[1]): 98 if not self.is_pixel_equal(image1, image2, i, j): 99 left = i 100 return left 101 return left 102 103 def is_pixel_equal(self, image1, image2, x, y): 104 """ 105 判断两个像素是否相同 106 :param image1: 图片1 107 :param image2: 图片2 108 :param x: 位置x 109 :param y: 位置y 110 :return: 像素是否相同 111 """ 112 # 取两个图片的像素点 113 pixel1 = image1.load()[x, y] 114 pixel2 = image2.load()[x, y] 115 threshold = 60 116 if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs( 117 pixel1[2] - pixel2[2]) < threshold: 118 return True 119 else: 120 return False 121 122 def get_track(self, distance): 123 """ 124 根据偏移量获取移动轨迹 125 :param distance: 偏移量 126 :return: 移动轨迹 127 """ 128 # 移动轨迹 129 track = [] 130 # 当前位移 131 current = 0 132 # 减速阈值 133 mid = distance * 4 / 5 134 # 计算间隔 135 t = 0.2 136 # 初速度 137 v = 0 138 139 while current < distance: 140 if current < mid: 141 # 加速度为正2 142 a = 2 143 else: 144 # 加速度为负3 145 a = -3 146 # 初速度v0 147 v0 = v 148 # 当前速度v = v0 + at 149 v = v0 + a * t 150 # 移动距离x = v0t + 1/2 * a * t^2 151 move = v0 * t + 1 / 2 * a * t * t 152 # 当前位移 153 current += move 154 # 加入轨迹 155 track.append(round(move)) 156 return track 157 158 def move_to_gap(self, slider, track): 159 """ 160 拖动滑块到缺口处 161 :param slider: 滑块 162 :param track: 轨迹 163 :return: 164 """ 165 ActionChains(self.browser).click_and_hold(slider).perform() 166 for x in track: 167 ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform() 168 time.sleep(0.5) 169 ActionChains(self.browser).release().perform() 170 171 def login(self): 172 """ 173 登录 174 :return: None 175 """ 176 submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'login-btn'))) 177 submit.click() 178 time.sleep(10) 179 print('登录成功') 180 181 def crack(self): 182 # 输入用户名密码 183 self.open() 184 # 点击验证按钮 185 button = self.get_geetest_button() 186 button.click() 187 # 获取验证码图片 188 image1 = self.get_geetest_image('captcha1.png') 189 # 点按呼出缺口 190 slider = self.get_slider() 191 slider.click() 192 # 获取带缺口的验证码图片 193 image2 = self.get_geetest_image('captcha2.png') 194 # 获取缺口位置 195 gap = self.get_gap(image1, image2) 196 print('缺口位置', gap) 197 # 减去缺口位移 198 gap -= BORDER 199 # 获取移动轨迹 200 track = self.get_track(gap) 201 print('滑动轨迹', track) 202 # 拖动滑块 203 self.move_to_gap(slider, track) 204 205 success = self.wait.until( 206 EC.text_to_be_present_in_element((By.CLASS_NAME, 'geetest_success_radar_tip_content'), '验证成功')) 207 print(success) 208 209 # 失败后重试 210 if not success: 211 self.crack() 212 else: 213 self.login() 214 215 216 if __name__ == '__main__': 217 crack = CrackGeetest() 218 crack.crack()