请求库-selenium 模块
# -*- coding: utf-8 -*- # 安装:pip3 install selenium # 下载chromdriver.exe放到python安装路径的scripts目录中即可,注意最新版本是3.5 # 国内镜像网站地址:http://npm.taobao.org/mirrors/chromedriver/3.5 # 最新的版本去官网找:https://sites.google.com/a/chromium.org/chromedriver/downloads # 验证安装 from selenium import webdriver # driver = webdriver.Chrome() #弹出浏览器 # driver.get("https://www.baidu.com") #浏览器自动访问该url # print(driver.page_source) #终端打印获取到的urltext文件 # 安装:pip3 install selenium # 下载phantomjs,解压后把phantomjs.exe所在的bin目录放到环境变量 # 下载链接:http://phantomjs.org/download.html # drivers = webdriver.PhantomJS(executable_path=r"E:\python\phantomjs-2.1.1-windows\bin\phantomjs.exe") #无界面浏览器
# 环境变量配置之后就可以不用加里面的参数,但是不知道什么原因pycharm识别不了,只能手动吧目录填写进去
# drivers.get('https://www.baidu.com') # print(drivers.page_source) # 基本使用 from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #查找方式:ID,class from selenium.webdriver.common.keys import Keys #键盘操作,enter from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 browser = webdriver.Chrome() try: browser.get("https://www.baidu.com") #访问这个页面 input_tag = browser.find_element_by_id("kw") #找到搜索框ID input_tag.send_keys("极致诱惑") #给搜索框添加搜索条件 input_tag.send_keys(Keys.ENTER) #模仿人手动敲击回车键 wait = WebDriverWait(browser,10) #等待10秒 wait.until(EC.presence_of_element_located((By.ID,"content_left--"))) #等待十秒,知道等到content_left--加载出来, print("browser.page_source",browser.page_source) print("browser.current_url",browser.current_url) print("browser.get_cookie()",browser.get_cookie) finally: browser.close() #最后得关闭
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait import time driver = webdriver.Chrome() driver.get("https://www.baidu.com") wait = WebDriverWait(driver,5) try: # ===============所有方法=================== # 1、find_element_by_id # 2、find_element_by_link_text # 3、find_element_by_partial_link_text # 4、find_element_by_tag_name # 5、find_element_by_class_name # 6、find_element_by_name # 7、find_element_by_css_selector # 8、find_element_by_xpath ############################################################### # 1、find_element_by_id # print(driver.find_element_by_id("kw")) # 2、find_element_by_link_text # login = driver.find_elements_by_link_text("登录")[0] # login.click() # 3、find_element_by_partial_link_text login = driver.find_elements_by_partial_link_text("录")[0] login.click() # 4、find_element_by_tag_name # print(driver.find_element_by_tag_name("a")) # 5、find_element_by_class_name button=wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'tang-pass-footerBarULogin'))) button.click() print("============") # 6、find_element_by_name input_user = wait.until(EC.presence_of_element_located((By.NAME,"userName"))) input_pwd = wait.until(EC.presence_of_element_located((By.NAME,"password"))) commit = wait.until(EC.element_to_be_clickable((By.ID,"TANGRAM__PSP_10__submit"))) input_user.send_keys("xxxxxxxxx") #输入框输入用户名 input_pwd.send_keys("xxxxxxxxx") #密码框输入密码 commit.click() time.sleep(4) finally: driver.close()
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait import time driver = webdriver.Chrome() driver.get('https://doc.scrapy.org/en/latest/_static/selectors-sample1.html') driver.implicitly_wait(3) #有些文件加载比较缓慢,在加载之前等待 try: driver.find_element_by_xpath("//a") #从根目录下找子子孙孙,找不到报错 # driver.find_element_by_xpath("//body/a") #从根目录body先找他的儿子,找不到爆粗 driver.find_element_by_xpath("//body//a") #从body下的子子孙孙找a, res1 = driver.find_element_by_xpath("//body//a[3]") # 取第一个a标签,标签按1开始 print(res1.text) r2 = driver.find_element_by_xpath("//a[3]") print(r2.text) r3 = driver.find_element_by_xpath('//*[@id="images"]/a[3]') print(r3.text) # r1,r2,r3 的结果是相同的 res4 = driver.find_elements_by_xpath("/html/body/div/a")[3] print(res4.text) res5 = driver.find_element_by_xpath('//a[img/@src="image3_thumb.jpg"]') print(res5.text) finally: driver.close()
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #键盘按键操作 from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 browser = webdriver.Chrome() browser.get("https://www.amazon.cn") wait = WebDriverWait(browser,10) wait.until(EC.presence_of_all_elements_located((By.ID,"cc-lm-tcgShowImgContainer"))) tag = browser.find_element(By.CSS_SELECTOR,"#cc-lm-tcgShowImgContainer img") print(tag.get_attribute("src")) print(tag.id) print(tag.location) print(tag.tag_name) print(tag.size) browser.close()
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By # 按照什么方式查找,By.ID,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys # 键盘按键操作 from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait # 等待页面加载某些元素 import time driver = webdriver.Chrome() driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') # wait = WebDriverWait(driver,3) driver.implicitly_wait(3) try: driver.switch_to.frame("iframeResult") drop = driver.find_element_by_id("droppable") #不懂 drag = driver.find_element_by_id("draggable") #动 # 移动方块 # 方式一: # actions = ActionChains(driver) #拿到动作连对象 # actions.drag_and_drop(drop,drag) # actions.perform() # time.sleep(4) # 方式二 ActionChains(driver).click_and_hold(drag).perform() distance = drop.location["x"] - drag.location["x"] #获取到两者之间的距离 track = 0 while track < distance: ActionChains(driver).move_by_offset(xoffset=2,yoffset=0).perform() track +=2 ActionChains(driver).release().perform() time.sleep(10) finally: driver.close()
等待元素加载 #1、selenium只是模拟浏览器的行为,而浏览器解析页面是需要时间的(执行css,js),一些元素可能需要过一段时间才能加载出来,为了保证能查找到元素,必须等待 #2、等待的方式分两种: 隐式等待:在browser.get('xxx')前就设置,针对所有元素有效 显式等待:在browser.get('xxx')之后设置,只针对某个元素有效
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #键盘按键操作 from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 browser=webdriver.Chrome() #隐式等待:在查找所有元素时,如果尚未被加载,则等10秒 browser.implicitly_wait(10) browser.get('https://www.baidu.com') input_tag=browser.find_element_by_id('kw') input_tag.send_keys('美女') input_tag.send_keys(Keys.ENTER) contents=browser.find_element_by_id('content_left') #没有等待环节而直接查找,找不到则会报错 print(contents) browser.close()
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #键盘按键操作 from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 browser=webdriver.Chrome() browser.get('https://www.baidu.com') input_tag=browser.find_element_by_id('kw') input_tag.send_keys('美女') input_tag.send_keys(Keys.ENTER) #显式等待:显式地等待某个元素被加载 wait=WebDriverWait(browser,10) wait.until(EC.presence_of_element_located((By.ID,'content_left'))) contents=browser.find_element(By.CSS_SELECTOR,'#content_left') print(contents) browser.close()
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #键盘按键操作 from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 browser = webdriver.Chrome() browser.get("https://www.taobao.com/") wait = WebDriverWait(browser,10) # input_tag = browser.find_element_by_id("q") # input_tag.send_keys("情趣用品") # button = browser.find_element_by_class_name("btn-search") # button.click() input_tag = wait.until(EC.presence_of_element_located((By.ID,"q"))) input_tag.send_keys("情趣用品") button = wait.until(EC.presence_of_element_located((By.CLASS_NAME,"btn-search"))) button.click() import time time.sleep(3) input_tag = browser.find_element_by_id("q") input_tag.clear() input_tag.send_keys("iphone9") button = browser.find_element_by_class_name("btn-search") button.click()
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #键盘按键操作 from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 try: browser = webdriver.Chrome() browser.get("https://www.baidu.com") browser.execute_script("alert(6666)") import time time.sleep(3) finally: browser.close()
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #键盘按键操作 from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 #frame相当于一个单独的网页,在父frame里是无法直接查看到子frame的元素的,必须switch_to_frame切到该frame下,才能进一步查找 try: browser = webdriver.Chrome() browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') browser.switch_to.frame("iframeResult") tag1 = browser.find_element_by_id("droppable") print(tag1) browser.switch_to.parent_frame() tag2 = browser.find_element_by_id("textareaCode") print(tag2) finally: browser.close() # 先切换到iframe里找标签,然后切换到父母版中找标签
import time from selenium import webdriver browser = webdriver.Chrome() browser.get("https://www.baidu.com") browser.get("https://www.taobao.com") browser.get("https://www.sina.com.cn/") browser.back() #后退 time.sleep(12) browser.forward() # 前进 time.sleep(12) browser.close() # 访问顺序;首先访问百度,淘宝,新浪,然后返回淘宝,12秒后前进到新浪,12秒后关闭浏览器
#cookies # from selenium import webdriver # # browser=webdriver.Chrome() # browser.get('https://www.zhihu.com/explore') # print(browser.get_cookies()) # browser.add_cookie({'k1':'xxx','k2':'yyy'}) # print(browser.get_cookies()) # # # browser.delete_all_cookies()
import time from selenium import webdriver browser = webdriver.Chrome() browser.get("https://www.baidu.com") browser.execute_script("window.open()") print(browser.window_handles) browser.switch_to.window(browser.window_handles[1]) browser.get("https://www.taobao.com") time.sleep(10) browser.switch_to.window(browser.window_handles[0]) browser.get("https://www.sina.com.cn") browser.close() # 先访问百度页面,然后打开新的选项卡,跳转到新的选项卡,访问淘宝,10秒后跳转到第一个选项卡,访问新浪,关闭
from selenium import webdriver from selenium.common.exceptions import TimeoutException,NoSuchElementException,NoSuchFrameException try: browser=webdriver.Chrome() browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') browser.switch_to.frame('iframssseResult') except TimeoutException as e: print(e) except NoSuchFrameException as e: print(e) finally: browser.close()
# -*- coding: utf-8 -*- from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #键盘按键操作 from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 import time def get_good(driver): try: goods = driver.find_elements_by_class_name("gl-item") for good in goods: # 循环着一夜的的数据 detail_url = good.find_element_by_tag_name("a").get_attribute("href") #商品详情 detail_price = good.find_element_by_css_selector(".p-price i").text #商品价格 detail_name = good.find_element_by_css_selector(".p-name em").text #商品名 detail_com = good.find_element_by_css_selector(".p-commit a").text #评论量 msg = """ 商品名:%s 详情链接:%s 商品价格:%s 评论量:%s """%(detail_name,detail_url,detail_price,detail_com) print(msg) # 这里可以写入文件操作 button = driver.find_element_by_partial_link_text('下一页') #检测到右下一页链接 button.click() time.sleep(2) get_good(driver) # 睡两秒后继续爬,爬的太快容易被服务器检测到 except Exception: pass def spilder(url,keyword): driver = webdriver.Chrome() driver.get(url) driver.implicitly_wait(7) try: input_tag = driver.find_element_by_id("key") input_tag.send_keys(keyword) input_tag.send_keys(Keys.ENTER) get_good(driver) finally: driver.close() if __name__ == '__main__': spilder("https:www.jd.com","情趣")
# -*- coding: utf-8 -*- from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #键盘按键操作 from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 from PIL import Image #pip3 install pillow import time def get_snap(driver): ''' :param driver: :return: 返回屏幕的全图 ''' driver.save_screenshot("snap.png") snap_obj = Image.open("snap.png") # snap_obj.show() return snap_obj def get_image(driver): ''' :param driver: :return:返回验证图片的全图 ''' img = driver.find_element_by_class_name("geetest_canvas_img") time.sleep(2) size = img.size #获取到图片大小 location = img.location #获取到图片的相对位置 left = location["x"] top = location["y"] right = left + size["width"] bottom = top + size["height"] # print(left,top,right,bottom) snap_obj = get_snap(driver) image_obj = snap_obj.crop((left,top,right,bottom)) #截取全部中的坐标图片 # image_obj.show() return image_obj def get_distance(image1,image2): start_x = 58 # 图片的x轴起始位置 threhold = 60 # 误差的阀值 for x in range(start_x, image1.size[0]): for y in range(image1.size[1]): # x,y 是他们额像素点 rgb1 = image1.load()[x, y] # 获取第一张图片的像素点 rgb2 = image2.load()[x, y] # 获取第二张图片的像素点 res1 = abs(rgb1[0] - rgb2[0]) res2 = abs(rgb1[1] - rgb2[1]) res3 = abs(rgb1[2] - rgb2[2]) if not (res1 < threhold and res2 < threhold and res3 < threhold): return x - 7 def get_tracks(distance): distance += 20 # 滑动距离+20 →欺骗服务器 v0 = 0 s = 0 t = 0.2 mid = distance*3/5 forward_tracks = [] while s < distance: if s < mid: a=2 else: a=-3 v = v0 track = v*t+0.5*a*(t**2) track = round(track) # 四舍五入 v0 = v + a*t s+=track forward_tracks.append(track) # 移动轨迹 back_tracks = [-1, -1, -1, -2, -2, -2, -3, -3, -2, -2, -1] # 多加的20个单位 return {"forward_tracks":forward_tracks,"back_tracks":back_tracks} def slice(url,username,password): try: driver = webdriver.Chrome() driver.get(url) driver.implicitly_wait(3) # 1、输入账号,密码,点击登录 input_user = driver.find_element_by_id("input1") input_pwd = driver.find_element_by_id("input2") login_user = driver.find_element_by_id("signin") input_user.send_keys(username) input_pwd.send_keys(password) login_user.click() # 2、 点击按钮,出现验证码图片(完整的图) geetest_radar_tip = driver.find_element_by_class_name("geetest_radar_tip") geetest_radar_tip.click() # 3、针对没有缺口的图片截图 image1 = get_image(driver) # 4、点击滑动按钮,出现残缺的图片 slider_button = driver.find_element_by_class_name("geetest_slider_button") slider_button.click() # 5、针对有缺口的图片截图 image2 = get_image(driver) # 6、对比两张图片,找出缺口,就是滑动的2唯一 distance = get_distance(image1, image2) # 获取到图片的位移信息 # 7、按照人的行为习惯,把总位移切成小的位移 track_dic = get_tracks(distance) # 8、 按照位移移动图片 slider_button = driver.find_element_by_class_name("geetest_slider_button") # 找到按钮 ActionChains(driver).click_and_hold(slider_button).perform() # 摁住它 forward_tracks = track_dic["forward_tracks"] # 前进的距离 back_tracks = track_dic["back_tracks"] # 后退的距离(刚开始给distance+=20) print(forward_tracks, back_tracks) for forward_track in forward_tracks: ActionChains(driver).move_by_offset(xoffset=forward_track, yoffset=0).perform() # 移动增加的距离 time.sleep(0.3) # 睡0.3秒模仿人动作延迟 # print("==================================>") for back_track in back_tracks: # 后退的距离 ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform() ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform() # 最终抖一抖 ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform() # 抖一抖 time.sleep(0.3) ActionChains(driver).release().perform() # 释放 time.sleep(2) except Exception: pass finally: driver.close() if __name__ == '__main__': url = "https://passport.cnblogs.com/user/signin" username = "username" password = "password" slice(url,username,password)
本文来自博客园,作者:一石数字欠我15w!!!,转载请注明原文链接:https://www.cnblogs.com/52-qq/p/8303336.html