PYTHON 爬虫笔记七:Selenium库基础用法
知识点一:Selenium库详解及其基本使用
-
什么是Selenium
selenium 是一套完整的web应用程序测试系统,包含了测试的录制(selenium IDE),编写及运行(Selenium Remote Control)和测试的并行处理(Selenium Grid)。
selenium的核心Selenium Core基于JsUnit,完全由JavaScript编写,因此可以用于任何支持JavaScript的浏览器上。
selenium可以模拟真实浏览器,自动化测试工具,支持多种浏览器,爬虫中主要用来解决JavaScript渲染问题。
-
基本使用
用python写爬虫的时候,主要用的是selenium的Webdriver,我们可以通过下面的方式先看看Selenium.Webdriver支持哪些浏览器
-
基本用法:
#打开google浏览器,再打开百度,输入Python然后按回车 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) try: browser.get('https://www.baidu.com') input = browser.find_element_by_id('kw') # 找到id为kw的元素 input.send_keys('Python') # 敲入Python input.send_keys(Keys.ENTER) # 敲入回车 wait = WebDriverWait(browser, 10) wait.until(EC.presence_of_element_located((By.ID, 'content_left'))) # 等待某个元素加载出来 print(browser.current_url) print(browser.get_cookies()) print(browser.page_source) # 网页源代码 finally: browser.close()
https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=Python&rsv_pq=aa95af1b0000f45e&rsv_t=53e2qpjWA%2FivTq6GgdT4FAhWM%2FWfkPZhHYkLoaS7MVPQUZCHF%2FkLQV2%2Brnc&rqlang=cn&rsv_enter=1&rsv_sug3=6&rsv_sug2=0&inputT=111&rsv_sug4=112 [{'domain': '.baidu.com', 'httpOnly': False, 'name': 'H_PS_PSSID', 'path': '/', 'secure': False, 'value': '1464_21121_26922_22159'}, {'domain': '.baidu.com', 'expiry': 3681286522.89887, 'httpOnly': False, 'name': 'BAIDUID', 'path': '/', 'secure': False, 'value': '410FC9CBBC1B798C0BEF149D5C0BD4E4:FG=1'}, {'domain': '.baidu.com', 'expiry': 3681286522.899111, 'httpOnly': False, 'name': 'BIDUPSID', 'path': '/', 'secure': False, 'value': '410FC9CBBC1B798C0BEF149D5C0BD4E4'}, {'domain': '.baidu.com', 'expiry': 3681286522.899241, 'httpOnly': False, 'name': 'PSTM', 'path': '/', 'secure': False, 'value': '1533802877'}, {'domain': '.baidu.com', 'httpOnly': False, 'name': 'PSINO', 'path': '/', 'secure': False, 'value': '7'}, {'domain': 'www.baidu.com', 'expiry': 2479882880.172246, 'httpOnly': False, 'name': 'delPer', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_HOME', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'expiry': 1534666877, 'httpOnly': False, 'name': 'BD_UPN', 'path': '/', 'secure': False, 'value': '12314753'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_CK_SAM', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.baidu.com', 'expiry': 1533805472, 'httpOnly': False, 'name': 'H_PS_645EC', 'path': '/', 'secure': False, 'value': 'c9083HSTzGdEsBVBx%2FDjhaEep8Lu5MHd8KusVOaaun2nj5W%2Bjur8%2BSHut%2BM'}] <!DOCTYPE html><!--STATUS OK--><html xmlns="http://www.w3.org/1999/xhtml"><head><script charset="utf-8" async="" src="https://ss0.bdstatic.com/-0U0bnSm1A5BphGlnYG/tam-ogel/5d4e9b24-dcc5-483a-b6da-be1e9e621891.js"></script>
-
声明浏览器对象(上面我们知道了selenium支持很多的浏览器,但是如果想要声明并调用浏览器则需要:)
from selenium import webdriver browser = webdriver.Chrome() browser = webdriver.Firefox() browser = webdriver.Edge() browser = webdriver.PhantomJS() browser = webdriver.Safari()
-
访问页面
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.taobao.com') print(browser.page_source) browser.close()
<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" lang="zh-CN" class="ks-webkit537 ks-webkit ks-chrome68 ks-chrome"><head><script>/*! 2018-08-07 14:58:00 v8.5.7 */ !function(e){function i(n){if(o[n])return o[n].exports;var r=o[n]={exports:{},id:n,loaded:!1};return e[n].call(r.exports,r,r.exports,i),r.loaded=!0,r.exports}var o={};return i.m=e,i.c=o,i.p="",i(0)}([function(e,i){"use strict";var o=window,n=document;!function(){var e=2,r="ali_analytics";if(o[r]&&o[r].ua&&e<=o[r].ua.version)return void(i.info=o[r].ua);var t,a,d,s,c,u,h,l,m,b,f,v,p,w,g,x,z,O=o.navigator,k=O.appVersion,T=O&&O.userAgent||"",y=function(e){var i=0;return parseFloat(e.replace(/\./g,function(){return 0===i++?".":""}))},_=function(e,i){var o,n;i[o="trident"]=.1,(n=e.match(/Trident\/([\d.]*)/))&&n[1]&&(i[o]=y(n[1])),i.core=o},N=function(e){var i,o;return(i=e.match(/MSIE ([^;]*)|Trident.*; rv(?:\s|:)?([0-9.]+)/))&&(o=i[1]||i[2])?y(o):0},P=function(e){return e||"other"},M=function(e){function i(){for(var i=[["Windows NT 5.1","winXP"],["Windows NT 6.1","win7"],["Windows NT 6.0","winVista"],["Windows NT 6.2","win8"],["Windows NT 10.0","win10"],["iPad","ios"],["iPhone;","ios"],["iPod","ios"],["Macintosh","mac"],["Android","android"],["Ubuntu","ubuntu"],["Linux","linux"],["Windows NT 5.2","win2003"],["Windows NT 5.0","win2000"],["Windows","winOther"],["rhino","rhino"]],o=0,n=i.length;o<n;++o)if(e.indexOf(i[o][0])!==-1)return i[o][1];return"other"}function r(e,i,n,r){var t,a=o.navigator.mimeTypes;try{for(t in a)if(a.hasOwnProperty(t)&&a[t][e]==i){if(void 0!==n&&r.test(a[t][n]))return!0;if(void 0===n)return!0}return!1}catch(e){return!1}}var t,a,d,s,c,u,h,l="",m=l,b=l,f=[6,9],v="{{version}}",p="<!--[if IE "+v+"]><s></s><![endif]-->",w=n&&n.createElement("div"),g=[],x={webkit:void 0,edge:void 0,trident:void 0,gecko:void 0,presto:void 0,chrome:void 0,safari:void 0,firefox:void 0,ie:void 0,ieMode:void 0,opera:void 0,mobile:void 0,core:void 0,shell:void 0,phantomjs:void 0,os:void 0,ipad:void 0,iphone:void 0,ipod:void 0,ios:void 0,android:void 0,nodejs:void 0,extraName:void 0,extraVersion:void 0};if(w&&w.getElementsByTagName&&(w.innerHTML=p.replace(v,""),g=w.getElementsByTagName("s")),g.length>0){for(_(e,x),s=f[0],c=f[1];s<=c;s++)if(w.innerHTML=p.replace(v,s),g.length>0){x[b="ie"]=s;break}!x.ie&&(d=N(e))&&(x[b="ie"]=d)}else((a=e.match(/AppleWebKit\/*\s*([\d.]*)/i))||(a=e.match(/Safari\/([\d.]*)/)))&&a[1]?(x[m="webkit"]=y(a[1]),(a=e.match(/OPR\/(\d+\.\d+)/))&&a[1]?x[b="opera"]=y(a[1]):(a=e.match(/Chrome\/([\d.]*)/))&&a[1]?x[b="chrome"]=y(a[1]):(a=e.match(/\/([\d.]*) Safari/))&&a[1]?x[b="safari"]=y(a[1]):x.safari=x.webkit,(a=e.match(/Edge\/([\d.]*)/))&&a[1]&&(m=b="edge",x[m]=y(a[1])),/ Mobile\//.test(e)&&e.match(/iPad|iPod|iPhone/)?(x.mobile="apple",a=e.match(/OS ([^\s]*)/),a&&a[1]&&(x.ios=y(a[1].replace("_","."))),t="ios"。。。。。。。。。。。
-
查找元素
-
单个元素
#element from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.taobao.com') input_first = browser.find_element_by_id('q') # 寻找 id='q'的元素 input_second = browser.find_element_by_css_selector('#q') # 通过css选择器选择 input_third = browser.find_element_by_xpath('//*[@id="q"]') # 同上 print(input_first, input_second, input_third) browser.close()
#这里我们通过三种不同的方式去获取响应的元素,第一种是通过id
的方式,第二个中是CSS
选择器,第三种是xpath
选择器,结果都是相同的。<selenium.webdriver.remote.webelement.WebElement (session="49795084aea9c702e52e3464a2e602bf", element="0.06918141330676386-1")> <selenium.webdriver.remote.webelement.WebElement (session="49795084aea9c702e52e3464a2e602bf", element="0.06918141330676386-1")> <selenium.webdriver.remote.webelement.WebElement (session="49795084aea9c702e52e3464a2e602bf", element="0.06918141330676386-1")>
另一种方式:
from selenium import webdriver from selenium.webdriver.common.by import By chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.taobao.com') input_first = browser.find_element(By.ID, 'q') print(input_first) browser.close()
<selenium.webdriver.remote.webelement.WebElement (session="2706eb775a80e5eb5af01293caaf84e1", element="0.30861434960698597-1")>
查找单个元素的方法
* find_element_by_name
* find_element_by_xpath
* find_element_by_link_text
* find_element_by_partial_link_text
* find_element_by_tag_name
* find_element_by_class_name
* find_element_by_css_selector -
查找多个元素
#elements from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.taobao.com') lis = browser.find_elements_by_css_selector('.service-bd li') print(lis) browser.close()
[<selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-1")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-2")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-3")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-4")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-5")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-6")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-7")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-8")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-9")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-10")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-11")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-12")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-13")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-14")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-15")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-16")>]
另一种方式:
from selenium import webdriver from selenium.webdriver.common.by import By chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.taobao.com') lis = browser.find_elements(By.CSS_SELECTOR, '.service-bd li') print(lis) browser.close()
[<selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-1")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-2")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-3")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-4")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-5")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-6")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-7")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-8")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-9")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-10")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-11")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-12")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-13")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-14")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-15")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-16")>]
一些其他的方式:
find_elements_by_name
find_elements_by_xpath
find_elements_by_link_text
find_elements_by_partial_link_text
find_elements_by_tag_name
find_elements_by_class_name
find_elements_by_css_selector
-
-
元素交互操作
先获取元素。
对获取的元素调用交互方法from selenium import webdriver import time chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.taobao.com') input = browser.find_element_by_id('q') # 找到搜索框 input.send_keys('iPhone') # 输入 iPhone time.sleep(1) # 等待一秒 input.clear() # 清空文本框 input.send_keys('iPad') # 输入ipad button = browser.find_element(By.CLASS_NAME,'btn-search') # 找到搜索按钮 button.click() # 点击 browser.close()
更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement
-
交互动作(将动作附加到动作链中串行执行)
from selenium import webdriver from selenium.webdriver import ActionChains chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) # 声明浏览器对象 url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' browser.get(url) browser.switch_to.frame('iframeResult') #切换到‘iframeResult’框架 source = browser.find_element_by_css_selector('#draggable') # 选择元素 target = browser.find_element_by_css_selector('#droppable') # 选择元素 actions = ActionChains(browser) # 声明一个动作链对象 actions.drag_and_drop(source, target) # 拖拽方法 actions.perform() # 执行动作
更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.action_chains
-
执行JavaScript(执行一些交互动作时,可能一些动作没有提供API。)
#把网页拉到最下面,并提示 from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) # 声明浏览器对象 browser.get('https://www.zhihu.com/explore') browser.execute_script('window.scrollTo(0, document.body.scrollHeight)') browser.execute_script('alert("To Bottom")')
-
获取元素信息
-
获取属性:*.get_attribute(“class”) *
from selenium import webdriver from selenium.webdriver import ActionChains chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) # 声明浏览器对象 url = 'https://www.zhihu.com/explore' browser.get(url) logo = browser.find_element_by_id('zh-top-link-logo') print(logo) print(logo.get_attribute('class')) browser.close()
<selenium.webdriver.remote.webelement.WebElement (session="d5564ba9ec58a9a53015648f61a56bd7", element="0.46068206240103504-1")> zu-top-link-logo
-
获取文本值:*text*
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) # 声明浏览器对象 url = 'https://www.zhihu.com/explore' browser.get(url) input = browser.find_element_by_class_name('zu-top-add-question') print(input.text) #提问
-
获取ID、位置、标签名、大小
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) # 声明浏览器对象 url = 'https://www.zhihu.com/explore' browser.get(url) input = browser.find_element_by_class_name('zu-top-add-question') print(input.id) print(input.location) print(input.tag_name) print(input.size)
0.0866191825002236-1 {'x': 759, 'y': 7} button {'height': 32, 'width': 66}
-
Frame
在很多网页中都是有Frame标签,所以我们爬取数据的时候就涉及到切入到frame中以及切出来的问题,通过下面的例子演示 这里常用的是switch_to.from()和switch_to.parent_frame() 父级的frame要查找子级的frame必须要切换到子frame,否则不能查找。 同理子frame也不能查找父frame的元素。
import time from selenium import webdriver from selenium.common.exceptions import NoSuchElementException chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' # 声明网址 browser.get(url) # get传入网址 browser.switch_to.frame('iframeResult') # 切换到子frame source = browser.find_element_by_css_selector('#draggable') # 查找子frame中的元素 print(source) try: logo = browser.find_element_by_class_name('logo') except NoSuchElementException: print('NO LOGO') browser.switch_to.parent_frame() # 切换到父frame logo = browser.find_element_by_class_name('logo') print(logo) print(logo.text)
<selenium.webdriver.remote.webelement.WebElement (session="7d3651ff77a4266e974a48387a562fe1", element="0.7541880746473664-1")> NO LOGO <selenium.webdriver.remote.webelement.WebElement (session="7d3651ff77a4266e974a48387a562fe1", element="0.04296955550550363-2")> RUNOOB.COM
-
-
等待
当使用了隐式等待执行测试的时候,如果WebDriver没有在 DOM中找到元素,将继续等待,超出设定时间后则抛出找不到元素的异常, 换句话说,当查找元素或元素并没有立即出现的时候,
隐式等待将等待一段时间再查找DOM,默认的时间是0-
隐式等待
到了一定的时间发现元素还没有加载,则继续等待我们指定的时间,如果超过了我们指定的时间还没有加载就会抛出异常,如果没有需要等待的时候就已经加载完毕就会立即执行
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.implicitly_wait(10) # 传入隐式等待的时间 browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input)
<selenium.webdriver.remote.webelement.WebElement (session="7c232ca91150077a6d9ded5a653ebf6d", element="0.019205089543862464-1")>
-
显式等待:* .WebDriverWait(对象,最长等待时间)*
指定一个等待条件,并且指定一个最长等待时间,会在这个时间内进行判断是否满足等待条件,如果成立就会立即返回,如果不成立,就会一直等待,直到等待你指定的最长等待时间,
如果还是不满足,就会抛出异常,如果满足了就会正常返回from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.taobao.com/') wait = WebDriverWait(browser, 10) # 声明一个等待对象 # 判断元素是否加载 input = wait.until(EC.presence_of_element_located((By.ID, 'q'))) # 传入等待条件 # # 判断是否可点击的,一般用来判断是否为按钮 button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search'))) print(input, button)
<selenium.webdriver.remote.webelement.WebElement (session="ac4d3c5db3d4f3aae7b828ad648e87df", element="0.49320541507860316-1")> <selenium.webdriver.remote.webelement.WebElement (session="ac4d3c5db3d4f3aae7b828ad648e87df", element="0.49320541507860316-2")>
上述的例子中的条件:
EC.presence_of_element_located()
是确认元素是否已经出现了
EC.element_to_be_clickable()
是确认元素是否是可点击的常用的判断条件:
title_is
标题是某内容
title_contains
标题包含某内容
presence_of_element_located
元素加载出,传入定位元组,如(By.ID, 'p')
visibility_of_element_located
元素可见,传入定位元组
visibility_of
可见,传入元素对象
presence_of_all_elements_located
所有元素加载出
text_to_be_present_in_element
某个元素文本包含某文字
text_to_be_present_in_element_value
某个元素值包含某文字
frame_to_be_available_and_switch_to_it frame
加载并切换
invisibility_of_element_located
元素不可见
element_to_be_clickable
元素可点击
staleness_of
判断一个元素是否仍在DOM
,可判断页面是否已经刷新
element_to_be_selected
元素可选择,传元素对象
element_located_to_be_selected
元素可选择,传入定位元组
element_selection_state_to_be
传入元素对象以及状态,相等返回True
,否则返回False
element_located_selection_state_to_be
传入定位元组以及状态,相等返回True
,否则返回False
alert_is_present
是否出现Alert
更多操作参考:http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.support.expected_conditions
-
-
前进后退:*back();forward()*
#打开谷歌浏览器,然后打开白队首页,在打开淘宝,知乎首页,返回淘宝页面等待一秒在回到知乎界面 import time from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.baidu.com/') browser.get('https://www.taobao.com/') browser.get('https://www.zhihu.com/') browser.back() time.sleep(1) browser.forward() browser.close()
-
Cookies:*get_cookies();delete_all_cookes();add_cookie()*
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.zhihu.com/explore') print(browser.get_cookies()) browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'}) print(browser.get_cookies()) browser.delete_all_cookies() print(browser.get_cookies())
[{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1533802053.671843, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '156dfd931a77f9586c0da07030f2df36'}, {'domain': '.zhihu.com', 'expiry': 1533802958, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1533801158'}, {'domain': '.zhihu.com', 'expiry': 1628409153.672149, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '5fdfbadd6fc347398cc287ebb19a383c|1533801155000|1533801155000'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'e58e3a1c1f41931b44a67c2b426c2f1e'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672288, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjI4MWJhNjU3YTRiNGFkMmE0OTkxNjk5MDMxZjc4NTU=|1533801155|61d044a51d2c7f0c8a99b9efcdec2f929a626003"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672375, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"OTQ3Mjc1NmI0ZWEyNDFiOTkwYjAxYmY5NmU5Y2YyZGY=|1533801155|e2f66995a8136044730315e95ab3230932d3e85a"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672453, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"NzRiMmNiMGQ2ZjYwNGIwM2I2Nzg0OTExNGUxZTJmZTA=|1533801155|9ad2a89322a8cfaf04770121c359a1de82636336"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1628409157.390474, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"ALDleMF-Bw6PTmu-31M1eULrY7jjkGvu9tA=|1533801158"'}, {'domain': '.zhihu.com', 'expiry': 1596873157, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': '084e48e3-598e-4d8a-85b4-4def087fc321'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.543124770.1533801158.1533801158.1533801158.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1549569158, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1533801158.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180809=1'}] [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1533802053.671843, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '156dfd931a77f9586c0da07030f2df36'}, {'domain': '.zhihu.com', 'expiry': 1533802958, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1533801158'}, {'domain': '.zhihu.com', 'expiry': 1628409153.672149, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '5fdfbadd6fc347398cc287ebb19a383c|1533801155000|1533801155000'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'e58e3a1c1f41931b44a67c2b426c2f1e'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672288, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjI4MWJhNjU3YTRiNGFkMmE0OTkxNjk5MDMxZjc4NTU=|1533801155|61d044a51d2c7f0c8a99b9efcdec2f929a626003"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672375, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"OTQ3Mjc1NmI0ZWEyNDFiOTkwYjAxYmY5NmU5Y2YyZGY=|1533801155|e2f66995a8136044730315e95ab3230932d3e85a"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672453, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"NzRiMmNiMGQ2ZjYwNGIwM2I2Nzg0OTExNGUxZTJmZTA=|1533801155|9ad2a89322a8cfaf04770121c359a1de82636336"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1628409157.390474, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"ALDleMF-Bw6PTmu-31M1eULrY7jjkGvu9tA=|1533801158"'}, {'domain': '.zhihu.com', 'expiry': 1596873157, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': '084e48e3-598e-4d8a-85b4-4def087fc321'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.543124770.1533801158.1533801158.1533801158.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1549569158, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1533801158.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180809=1'}, {'domain': 'www.zhihu.com', 'expiry': 2164521158, 'httpOnly': False, 'name': 'name', 'path': '/', 'secure': True, 'value': 'germey'}] []
-
选项卡管理
通过执行js命令实现新开选项卡window.open() 不同的选项卡是存在列表里browser.window_handles 通过browser.window_handles[0]就可以操作第一个选项卡
import time from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.baidu.com') browser.execute_script('window.open()') # 通过执行一个js语句来新建选项卡 print(browser.window_handles) # 返回所有的选项卡的引用 browser.switch_to_window(browser.window_handles[1]) browser.get('https://www.taobao.com') time.sleep(1) browser.switch_to_window(browser.window_handles[0]) browser.get('https://python.org')
['CDwindow-B1E2119962846BA7AED735D128600D3E', 'CDwindow-B231BF356905D63DE59F86F644F77A8F']
-
异常处理
这里的异常比较复杂,官网的参考地址:
http://selenium-python.readthedocs.io/api.html#module-selenium.common.exceptions
这里只进行简单的演示,查找一个不存在的元素-
实例,会报错
from selenium import webdriver chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) browser.get('https://www.baidu.com') browser.find_element_by_id('hello')
NoSuchElementException Traceback (most recent call last) <ipython-input-15-b8bcc6bfca0b> in <module>() 4 browser = webdriver.Chrome(chromedriver) 5 browser.get('https://www.baidu.com') ----> 6 browser.find_element_by_id('hello') D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in find_element_by_id(self, id_) 357 element = driver.find_element_by_id('foo') 358 """ --> 359 return self.find_element(by=By.ID, value=id_) 360 361 def find_elements_by_id(self, id_): D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in find_element(self, by, value) 964 return self.execute(Command.FIND_ELEMENT, { 965 'using': by, --> 966 'value': value})['value'] 967 968 def find_elements(self, by=By.ID, value=None): D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in execute(self, driver_command, params) 318 response = self.command_executor.execute(driver_command, params) 319 if response: --> 320 self.error_handler.check_response(response) 321 response['value'] = self._unwrap_value( 322 response.get('value', None)) D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py in check_response(self, response) 240 alert_text = value['alert'].get('text') 241 raise exception_class(message, screen, stacktrace, alert_text) --> 242 raise exception_class(message, screen, stacktrace) 243 244 def _value_or_default(self, obj, key, default): NoSuchElementException: Message: no such element: Unable to locate element: {"method":"id","selector":"hello"} (Session info: chrome=68.0.3440.75) (Driver info: chromedriver=2.41.578737 (49da6702b16031c40d63e5618de03a32ff6c197e),platform=Windows NT 10.0.17134 x86_64)
- 抛出异常
from selenium import webdriver from selenium.common.exceptions import TimeoutException, NoSuchElementException chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) try: browser.get('https://www.baidu.com') except TimeoutException: print('Time Out') try: browser.find_element_by_id('hello') except NoSuchElementException: print('No Element') finally: browser.close()
No Element
-