一、介绍

   一些网站会在正常的账号密码认证之外加一些验证码,以此来明确地区分人/机行为,从一定程度上达到反爬的效果,对于简单的校验码Tesserocr就可以搞定,如下

    但一些网站加入了滑动验证码,最典型的要属于极验滑动认证了,极验官网:http://www.geetest.com/,下图是极验的登录界面

 现在极验验证码已经更新到了 3.0 版本,截至 2017 年 7 月全球已有十六万家企业正在使用极验,每天服务响应超过四亿次,广泛应用于直播视频、金融服务、电子商务、游戏娱乐、政府企业等各大类型网站

对于这类验证,如果我们直接模拟表单请求,繁琐的认证参数与认证流程会让你蛋碎一地,我们可以用selenium驱动浏览器来解决这个问题,大致分为以下几个步骤

#1、输入账号、密码,然后点击登陆
#2、点击按钮,弹出没有缺口的图
#3、针对没有缺口的图片进行截图
#4、点击滑动按钮,弹出有缺口的图
#5、针对有缺口的图片进行截图
#6、对比两张图片,找出缺口,即滑动的位移
#7、按照人的行为行为习惯,把总位移切成一段段小的位移
#8、按照位移移动
#9、完成登录

二、实现

安装:selenium+chrome/phantomjs

#安装:Pillow
Pillow:基于PIL,处理python 3.x的图形图像库.因为PIL只能处理到python 2.x,而这个模块能处理Python3.x,目前用它做图形的很多.
http://www.cnblogs.com/apexchu/p/4231041.html

C:\Users\Administrator>pip3 install pillow
C:\Users\Administrator>python3
Python 3.6.1 (v3.6.1:69c0db5, Mar 21 2017, 18:41:36) [MSC v.1900 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> from PIL import Image
>>>
  1 from selenium import webdriver
  2 from selenium.webdriver import ActionChains
  3 from selenium.webdriver.common.by import By
  4 from selenium.webdriver.common.keys import Keys
  5 from selenium.webdriver.support import expected_conditions as EC
  6 from selenium.webdriver.support.wait import WebDriverWait
  7 from PIL import Image
  8 import time
  9 
 10 def get_snap():
 11     '''
 12     对整个网页截图,保存成图片,然后用PIL.Image拿到图片对象
 13     :return: 图片对象
 14     '''
 15     driver.save_screenshot('snap.png')
 16     page_snap_obj=Image.open('snap.png')
 17     return page_snap_obj
 18 
 19 def get_image():
 20     '''
 21     从网页的网站截图中,截取验证码图片
 22     :return: 验证码图片
 23     '''
 24     img=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_canvas_img')))
 25     time.sleep(2) #保证图片刷新出来
 26     localtion=img.location
 27     size=img.size
 28 
 29     top=localtion['y']
 30     bottom=localtion['y']+size['height']
 31     left=localtion['x']
 32     right=localtion['x']+size['width']
 33 
 34     page_snap_obj=get_snap()
 35     crop_imag_obj=page_snap_obj.crop((left,top,right,bottom))
 36     return crop_imag_obj
 37 
 38 
 39 def get_distance(image1,image2):
 40     '''
 41     拿到滑动验证码需要移动的距离
 42     :param image1:没有缺口的图片对象
 43     :param image2:带缺口的图片对象
 44     :return:需要移动的距离
 45     '''
 46     threshold=60
 47     left=57
 48     for i in range(left,image1.size[0]):
 49         for j in range(image1.size[1]):
 50             rgb1=image1.load()[i,j]
 51             rgb2=image2.load()[i,j]
 52             res1=abs(rgb1[0]-rgb2[0])
 53             res2=abs(rgb1[1]-rgb2[1])
 54             res3=abs(rgb1[2]-rgb2[2])
 55             if not (res1 < threshold and res2 < threshold and res3 < threshold):
 56                 return i-7 #经过测试,误差为大概为7
 57     return i-7 #经过测试,误差为大概为7
 58 
 59 
 60 def get_tracks(distance):
 61     '''
 62     拿到移动轨迹,模仿人的滑动行为,先匀加速后匀减速
 63     匀变速运动基本公式:
 64     ①v=v0+at
 65     ②s=v0t+½at²
 66     ③v²-v0²=2as
 67 
 68     :param distance: 需要移动的距离
 69     :return: 存放每0.3秒移动的距离
 70     '''
 71     #初速度
 72     v=0
 73     #单位时间为0.2s来统计轨迹,轨迹即0.2内的位移
 74     t=0.3
 75     #位移/轨迹列表,列表内的一个元素代表0.2s的位移
 76     tracks=[]
 77     #当前的位移
 78     current=0
 79     #到达mid值开始减速
 80     mid=distance*4/5
 81 
 82     while current < distance:
 83         if current < mid:
 84             # 加速度越小,单位时间的位移越小,模拟的轨迹就越多越详细
 85             a= 2
 86         else:
 87             a=-3
 88 
 89         #初速度
 90         v0=v
 91         #0.2秒时间内的位移
 92         s=v0*t+0.5*a*(t**2)
 93         #当前的位置
 94         current+=s
 95         #添加到轨迹列表
 96         tracks.append(round(s))
 97 
 98         #速度已经达到v,该速度作为下次的初速度
 99         v=v0+a*t
100     return tracks
101 
102 
103 try:
104     driver=webdriver.Chrome()
105     driver.get('https://account.geetest.com/login')
106     wait=WebDriverWait(driver,10)
107 
108     #步骤一:先点击按钮,弹出没有缺口的图片
109     button=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_radar_tip')))
110     button.click()
111 
112     #步骤二:拿到没有缺口的图片
113     image1=get_image()
114 
115     #步骤三:点击拖动按钮,弹出有缺口的图片
116     button=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_slider_button')))
117     button.click()
118 
119     #步骤四:拿到有缺口的图片
120     image2=get_image()
121 
122     # print(image1,image1.size)
123     # print(image2,image2.size)
124 
125     #步骤五:对比两张图片的所有RBG像素点,得到不一样像素点的x值,即要移动的距离
126     distance=get_distance(image1,image2)
127 
128     #步骤六:模拟人的行为习惯(先匀加速拖动后匀减速拖动),把需要拖动的总距离分成一段一段小的轨迹
129     tracks=get_tracks(distance)
130     print(tracks)
131     print(image1.size)
132     print(distance,sum(tracks))
133 
134 
135     #步骤七:按照轨迹拖动,完全验证
136     button=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_slider_button')))
137     ActionChains(driver).click_and_hold(button).perform()
138     for track in tracks:
139         ActionChains(driver).move_by_offset(xoffset=track,yoffset=0).perform()
140     else:
141         ActionChains(driver).move_by_offset(xoffset=3,yoffset=0).perform() #先移过一点
142         ActionChains(driver).move_by_offset(xoffset=-3,yoffset=0).perform() #再退回来,是不是更像人了
143 
144     time.sleep(0.5) #0.5秒后释放鼠标
145     ActionChains(driver).release().perform()
146 
147 
148     #步骤八:完成登录
149     input_email=driver.find_element_by_id('email')
150     input_password=driver.find_element_by_id('password')
151     button=wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'login-btn')))
152 
153     input_email.send_keys('18611453110@163.com')
154     input_password.send_keys('linhaifeng123')
155     # button.send_keys(Keys.ENTER)
156     button.click()
157 
158     import time
159     time.sleep(200)
160 finally:
161     driver.close()
View Code

案例:

  1 from selenium import webdriver
  2 from selenium.webdriver import ActionChains
  3 from selenium.webdriver.common.by import By
  4 from selenium.webdriver.common.keys import Keys
  5 from selenium.webdriver.support import expected_conditions as EC
  6 from selenium.webdriver.support.wait import WebDriverWait
  7 from PIL import Image
  8 import time
  9 
 10 def get_snap():
 11     driver.save_screenshot('full_snap.png')
 12     page_snap_obj=Image.open('full_snap.png')
 13     return page_snap_obj
 14 
 15 def get_image():
 16     img=driver.find_element_by_class_name('geetest_canvas_img')
 17     time.sleep(2)
 18     location=img.location
 19     size=img.size
 20 
 21     left=location['x']
 22     top=location['y']
 23     right=left+size['width']
 24     bottom=top+size['height']
 25 
 26     page_snap_obj=get_snap()
 27     image_obj=page_snap_obj.crop((left,top,right,bottom))
 28     # image_obj.show()
 29     return image_obj
 30 
 31 def get_distance(image1,image2):
 32     start=57
 33     threhold=60
 34 
 35     for i in range(start,image1.size[0]):
 36         for j in range(image1.size[1]):
 37             rgb1=image1.load()[i,j]
 38             rgb2=image2.load()[i,j]
 39             res1=abs(rgb1[0]-rgb2[0])
 40             res2=abs(rgb1[1]-rgb2[1])
 41             res3=abs(rgb1[2]-rgb2[2])
 42             # print(res1,res2,res3)
 43             if not (res1 < threhold and res2 < threhold and res3 < threhold):
 44                 return i-7
 45     return i-7
 46 
 47 def get_tracks(distance):
 48     distance+=20 #先滑过一点,最后再反着滑动回来
 49     v=0
 50     t=0.2
 51     forward_tracks=[]
 52 
 53     current=0
 54     mid=distance*3/5
 55     while current < distance:
 56         if current < mid:
 57             a=2
 58         else:
 59             a=-3
 60 
 61         s=v*t+0.5*a*(t**2)
 62         v=v+a*t
 63         current+=s
 64         forward_tracks.append(round(s))
 65 
 66     #反着滑动到准确位置
 67     back_tracks=[-3,-3,-2,-2,-2,-2,-2,-1,-1,-1] #总共等于-20
 68 
 69     return {'forward_tracks':forward_tracks,'back_tracks':back_tracks}
 70 
 71 try:
 72     # 1、输入账号密码回车
 73     driver = webdriver.Chrome()
 74     driver.implicitly_wait(3)
 75     driver.get('https://passport.cnblogs.com/user/signin')
 76 
 77     username = driver.find_element_by_id('input1')
 78     pwd = driver.find_element_by_id('input2')
 79     signin = driver.find_element_by_id('signin')
 80 
 81     username.send_keys('linhaifeng')
 82     pwd.send_keys('xxxxx')
 83     signin.click()
 84 
 85     # 2、点击按钮,得到没有缺口的图片
 86     button = driver.find_element_by_class_name('geetest_radar_tip')
 87     button.click()
 88 
 89     # 3、获取没有缺口的图片
 90     image1 = get_image()
 91 
 92     # 4、点击滑动按钮,得到有缺口的图片
 93     button = driver.find_element_by_class_name('geetest_slider_button')
 94     button.click()
 95 
 96     # 5、获取有缺口的图片
 97     image2 = get_image()
 98 
 99     # 6、对比两种图片的像素点,找出位移
100     distance = get_distance(image1, image2)
101 
102     # 7、模拟人的行为习惯,根据总位移得到行为轨迹
103     tracks = get_tracks(distance)
104     print(tracks)
105 
106     # 8、按照行动轨迹先正向滑动,后反滑动
107     button = driver.find_element_by_class_name('geetest_slider_button')
108     ActionChains(driver).click_and_hold(button).perform()
109 
110     # 正常人类总是自信满满地开始正向滑动,自信地表现是疯狂加速
111     for track in tracks['forward_tracks']:
112         ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()
113 
114     # 结果傻逼了,正常的人类停顿了一下,回过神来发现,卧槽,滑过了,然后开始反向滑动
115     time.sleep(0.5)
116     for back_track in tracks['back_tracks']:
117         ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform()
118 
119     # 小范围震荡一下,进一步迷惑极验后台,这一步可以极大地提高成功率
120     ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform()
121     ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform()
122 
123     # 成功后,骚包人类总喜欢默默地欣赏一下自己拼图的成果,然后恋恋不舍地松开那只脏手
124     time.sleep(0.5)
125     ActionChains(driver).release().perform()
126 
127     time.sleep(10)  # 睡时间长一点,确定登录成功
128 finally:
129     driver.close()
破解博客园后台登录
  1 from selenium import webdriver
  2 from selenium.webdriver import ActionChains
  3 from selenium.webdriver.common.by import By
  4 from selenium.webdriver.common.keys import Keys
  5 from selenium.webdriver.support import expected_conditions as EC
  6 from selenium.webdriver.support.wait import WebDriverWait
  7 from PIL import Image
  8 import time
  9 
 10 def get_snap(driver):
 11     driver.save_screenshot('full_snap.png')
 12     page_snap_obj=Image.open('full_snap.png')
 13     return page_snap_obj
 14 
 15 def get_image(driver):
 16     img=driver.find_element_by_class_name('geetest_canvas_img')
 17     time.sleep(2)
 18     location=img.location
 19     size=img.size
 20 
 21     left=location['x']
 22     top=location['y']
 23     right=left+size['width']
 24     bottom=top+size['height']
 25 
 26     page_snap_obj=get_snap(driver)
 27     image_obj=page_snap_obj.crop((left,top,right,bottom))
 28     # image_obj.show()
 29     return image_obj
 30 
 31 def get_distance(image1,image2):
 32     start=57
 33     threhold=60
 34 
 35     for i in range(start,image1.size[0]):
 36         for j in range(image1.size[1]):
 37             rgb1=image1.load()[i,j]
 38             rgb2=image2.load()[i,j]
 39             res1=abs(rgb1[0]-rgb2[0])
 40             res2=abs(rgb1[1]-rgb2[1])
 41             res3=abs(rgb1[2]-rgb2[2])
 42             # print(res1,res2,res3)
 43             if not (res1 < threhold and res2 < threhold and res3 < threhold):
 44                 return i-7
 45     return i-7
 46 
 47 def get_tracks(distance):
 48     distance+=20 #先滑过一点,最后再反着滑动回来
 49     v=0
 50     t=0.2
 51     forward_tracks=[]
 52 
 53     current=0
 54     mid=distance*3/5
 55     while current < distance:
 56         if current < mid:
 57             a=2
 58         else:
 59             a=-3
 60 
 61         s=v*t+0.5*a*(t**2)
 62         v=v+a*t
 63         current+=s
 64         forward_tracks.append(round(s))
 65 
 66     #反着滑动到准确位置
 67     back_tracks=[-3,-3,-2,-2,-2,-2,-2,-1,-1,-1] #总共等于-20
 68 
 69     return {'forward_tracks':forward_tracks,'back_tracks':back_tracks}
 70 
 71 def crack(driver): #破解滑动认证
 72     # 1、点击按钮,得到没有缺口的图片
 73     button = driver.find_element_by_class_name('geetest_radar_tip')
 74     button.click()
 75 
 76     # 2、获取没有缺口的图片
 77     image1 = get_image(driver)
 78 
 79     # 3、点击滑动按钮,得到有缺口的图片
 80     button = driver.find_element_by_class_name('geetest_slider_button')
 81     button.click()
 82 
 83     # 4、获取有缺口的图片
 84     image2 = get_image(driver)
 85 
 86     # 5、对比两种图片的像素点,找出位移
 87     distance = get_distance(image1, image2)
 88 
 89     # 6、模拟人的行为习惯,根据总位移得到行为轨迹
 90     tracks = get_tracks(distance)
 91     print(tracks)
 92 
 93     # 7、按照行动轨迹先正向滑动,后反滑动
 94     button = driver.find_element_by_class_name('geetest_slider_button')
 95     ActionChains(driver).click_and_hold(button).perform()
 96 
 97     # 正常人类总是自信满满地开始正向滑动,自信地表现是疯狂加速
 98     for track in tracks['forward_tracks']:
 99         ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()
100 
101     # 结果傻逼了,正常的人类停顿了一下,回过神来发现,卧槽,滑过了,然后开始反向滑动
102     time.sleep(0.5)
103     for back_track in tracks['back_tracks']:
104         ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform()
105 
106     # 小范围震荡一下,进一步迷惑极验后台,这一步可以极大地提高成功率
107     ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform()
108     ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform()
109 
110     # 成功后,骚包人类总喜欢默默地欣赏一下自己拼图的成果,然后恋恋不舍地松开那只脏手
111     time.sleep(0.5)
112     ActionChains(driver).release().perform()
113 
114 def login_cnblogs(username,password):
115     driver = webdriver.Chrome()
116     try:
117         # 1、输入账号密码回车
118         driver.implicitly_wait(3)
119         driver.get('https://passport.cnblogs.com/user/signin')
120 
121         input_username = driver.find_element_by_id('input1')
122         input_pwd = driver.find_element_by_id('input2')
123         signin = driver.find_element_by_id('signin')
124 
125         input_username.send_keys(username)
126         input_pwd.send_keys(password)
127         signin.click()
128 
129         # 2、破解滑动认证
130         crack(driver)
131 
132         time.sleep(10)  # 睡时间长一点,确定登录成功
133     finally:
134         driver.close()
135 
136 if __name__ == '__main__':
137     login_cnblogs(username='linhaifeng',password='xxxx')
138 
139 修订版
修订版

用类封装的版本

  1 import time
  2 import random
  3 
  4 from selenium.webdriver import ActionChains
  5 from selenium.webdriver.common.by import By
  6 from PIL import Image
  7 
  8 
  9 # def simulate_reaction(func):
 10 #     """模拟人类的反应时间"""
 11 #     from functools import wraps
 12 #
 13 #     @wraps
 14 #     def inner(self, *args, **kwargs):
 15 #         time.sleep(random.uniform(0.2, 1))
 16 #         ret = func(self, *args, **kwargs)
 17 #         return ret
 18 #     return inner
 19 
 20 
 21 class SVCR:
 22     """识别滑动验证码   极验验证"""
 23 
 24     def __init__(self, driver):
 25         self.driver = driver
 26         self.get_full_img = True
 27 
 28     # @simulate_reaction
 29     def run(self):
 30         """执行识别流程"""
 31         # 1. 点击按钮开始验证
 32         self.click_start_btn()
 33 
 34         # 2. 根据验证类型验证
 35         return self.judge_and_auth()
 36 
 37     def judge_and_auth(self):
 38         """判断验证类型并执行相应的验证方法"""
 39         if True:
 40             return self.auth_slide()
 41         else:
 42             pass
 43 
 44     def auth_slide(self):
 45 
 46         def get_distance(img1, img2):
 47             """计算滑动距离"""
 48             threshold = 60
 49             # 忽略可动滑块部分
 50             start_x = 57
 51 
 52             for i in range(start_x, img1.size[0]):
 53                 for j in range(img1.size[1]):
 54                     rgb1 = img1.load()[i, j]
 55                     rgb2 = img2.load()[i, j]
 56                     res1 = abs(rgb1[0] - rgb2[0])
 57                     res2 = abs(rgb1[1] - rgb2[1])
 58                     res3 = abs(rgb1[2] - rgb2[2])
 59                     if not (res1 < threshold and res2 < threshold and res3 < threshold):
 60                         return i - 7  # 经过测试,误差为大概为7
 61 
 62         def get_tracks(distance):
 63             """
 64             制造滑动轨迹
 65 
 66             策略:匀加速再匀减速,超过一些,再回调,左右小幅度震荡
 67             """
 68 
 69             v = 0
 70             current = 0
 71             t = 0.2
 72             tracks = []
 73 
 74             # 正向滑动
 75             while current < distance+10:
 76                 if current < distance*2/3:
 77                     a = 2
 78                 else:
 79                     a = -3
 80                 s = v*t + 0.5*a*(t**2)
 81                 current += s
 82                 tracks.append(round(s))
 83                 v = v + a*t
 84 
 85             # 往回滑动
 86             current = 0
 87             while current < 13:
 88                 if current < distance*2/3:
 89                     a = 2
 90                 else:
 91                     a = -3
 92                 s = v*t + 0.5*a*(t**2)
 93                 current += s
 94                 tracks.append(-round(s))
 95                 v = v + a*t
 96 
 97             # 最后修正
 98             tracks.extend([2, 2, -3, 2])
 99 
100             return tracks
101 
102         # 1. 截取完整图片
103         if self.get_full_img:
104             time.sleep(2)            # 等待图片加载完毕
105             img_before = self.get_img()
106         else:
107             img_before = self._img_before
108 
109         # 2. 点击出现缺口图片
110         slider_btn = self.driver.find_element_by_class_name("geetest_slider_button")
111         slider_btn.click()
112 
113         # 3. 截取缺口图片
114         time.sleep(2)            # 等待图片加载完毕
115         img_after = self.get_img()
116 
117         # 4. 生成移动轨迹
118         tracks = get_tracks(get_distance(img_before, img_after))
119 
120         # 5. 模拟滑动
121         slider_btn = self.driver.find_element_by_class_name("geetest_slider_button")
122         ActionChains(self.driver).click_and_hold(slider_btn).perform()
123         for track in tracks:
124             ActionChains(self.driver).move_by_offset(xoffset=track, yoffset=0).perform()
125 
126         # 6. 释放鼠标
127         time.sleep(0.5)  # 0.5秒后释放鼠标
128         ActionChains(self.driver).release().perform()
129 
130         # 7. 验证是否成功
131 
132         time.sleep(2)
133         div_tag = self.driver.find_element_by_class_name("geetest_fullpage_click")
134         if "display: block" in div_tag.get_attribute("style"):
135             '''判断模块对话框是否存在,如果存在就说明没有验证成功,"display: block",重新去验证'''
136             self.get_full_img = False
137             setattr(self, "_img_before", img_before)
138             return self.auth_slide()
139         else:
140             #如果验证成功"display: none"
141             time.sleep(1000)
142             return True
143 
144     # @simulate_reaction
145     def click_start_btn(self, search_style="CLASS_NAME", search_content="geetest_radar_tip"):
146         """找到开始按钮并点击"""
147         btn = getattr(self.driver, "find_element")(getattr(By, search_style), search_content)
148         btn.click()
149 
150     def get_img(self):
151         """截取图片"""
152         div_tag = self.driver.find_element_by_class_name("geetest_slicebg")
153 
154         # 计算截取图片大小
155         img_pt = div_tag.location       # {'x': 296, 'y': 15}
156         img_size = div_tag.size         # {'height': 159, 'width': 258}
157         img_box = (img_pt["x"], img_pt["y"], img_pt["x"] + img_size["width"], img_pt["y"] + img_size["height"])
158 
159         # 保存当前浏览页面
160         self.driver.save_screenshot("snap.png")
161 
162         # 截取目标图片
163         img = Image.open("snap.png")
164         return img.crop(img_box)
svcr
 1 from selenium import webdriver
 2 
 3 from svcr import SVCR
 4 
 5 
 6 def auth():
 7     driver = webdriver.Chrome()
 8     # browser.get(url)
 9     driver.get("https://passport.cnblogs.com/user/signin")  #请求页面
10     driver.implicitly_wait(3)
11     # 第一步:输入账号、密码,然后点击登陆
12     input_name = driver.find_element_by_id('input1')  #找到输入用户名的框
13     input_pwd = driver.find_element_by_id('input2')  #找到输入密码的框
14     input_button = driver.find_element_by_id('signin')  #找到按钮
15     input_name.send_keys("name")#博客园的账号
16     input_pwd.send_keys("pwd")#博客园的密码
17     input_button.click()  #进行点击
18     return  driver
19 
20 def main():
21     driver=auth()  #进行验证,
22     _auth = SVCR(driver)
23     _auth.run()
24 
25 if __name__ == '__main__':
26     main()
使用类

 

posted on 2018-01-21 12:11  海燕。  阅读(1530)  评论(0编辑  收藏  举报