PYTHON 爬虫笔记七:Selenium库基础用法

知识点一:Selenium库详解及其基本使用

  • 什么是Selenium

    selenium 是一套完整的web应用程序测试系统,包含了测试的录制(selenium IDE),编写及运行(Selenium Remote Control)和测试的并行处理(Selenium Grid)。

      selenium的核心Selenium Core基于JsUnit,完全由JavaScript编写,因此可以用于任何支持JavaScript的浏览器上。

      selenium可以模拟真实浏览器,自动化测试工具,支持多种浏览器,爬虫中主要用来解决JavaScript渲染问题。

  • 基本使用

 用python写爬虫的时候,主要用的是selenium的Webdriver,我们可以通过下面的方式先看看Selenium.Webdriver支持哪些浏览器

  1. 基本用法:

    #打开google浏览器,再打开百度,输入Python然后按回车
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait
    
    chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
    browser = webdriver.Chrome(chromedriver)
    try:
        browser.get('https://www.baidu.com')
        input = browser.find_element_by_id('kw')  # 找到id为kw的元素
        input.send_keys('Python')  # 敲入Python
        input.send_keys(Keys.ENTER)  # 敲入回车
        wait = WebDriverWait(browser, 10)
        wait.until(EC.presence_of_element_located((By.ID, 'content_left')))  # 等待某个元素加载出来
        print(browser.current_url)
        print(browser.get_cookies())
        print(browser.page_source)  # 网页源代码
    finally:
        browser.close()
    https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=Python&rsv_pq=aa95af1b0000f45e&rsv_t=53e2qpjWA%2FivTq6GgdT4FAhWM%2FWfkPZhHYkLoaS7MVPQUZCHF%2FkLQV2%2Brnc&rqlang=cn&rsv_enter=1&rsv_sug3=6&rsv_sug2=0&inputT=111&rsv_sug4=112
    [{'domain': '.baidu.com', 'httpOnly': False, 'name': 'H_PS_PSSID', 'path': '/', 'secure': False, 'value': '1464_21121_26922_22159'}, {'domain': '.baidu.com', 'expiry': 3681286522.89887, 'httpOnly': False, 'name': 'BAIDUID', 'path': '/', 'secure': False, 'value': '410FC9CBBC1B798C0BEF149D5C0BD4E4:FG=1'}, {'domain': '.baidu.com', 'expiry': 3681286522.899111, 'httpOnly': False, 'name': 'BIDUPSID', 'path': '/', 'secure': False, 'value': '410FC9CBBC1B798C0BEF149D5C0BD4E4'}, {'domain': '.baidu.com', 'expiry': 3681286522.899241, 'httpOnly': False, 'name': 'PSTM', 'path': '/', 'secure': False, 'value': '1533802877'}, {'domain': '.baidu.com', 'httpOnly': False, 'name': 'PSINO', 'path': '/', 'secure': False, 'value': '7'}, {'domain': 'www.baidu.com', 'expiry': 2479882880.172246, 'httpOnly': False, 'name': 'delPer', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_HOME', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'expiry': 1534666877, 'httpOnly': False, 'name': 'BD_UPN', 'path': '/', 'secure': False, 'value': '12314753'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_CK_SAM', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.baidu.com', 'expiry': 1533805472, 'httpOnly': False, 'name': 'H_PS_645EC', 'path': '/', 'secure': False, 'value': 'c9083HSTzGdEsBVBx%2FDjhaEep8Lu5MHd8KusVOaaun2nj5W%2Bjur8%2BSHut%2BM'}]
    <!DOCTYPE html><!--STATUS OK--><html xmlns="http://www.w3.org/1999/xhtml"><head><script charset="utf-8" async="" src="https://ss0.bdstatic.com/-0U0bnSm1A5BphGlnYG/tam-ogel/5d4e9b24-dcc5-483a-b6da-be1e9e621891.js"></script>
    获得的结果
  2. 声明浏览器对象(上面我们知道了selenium支持很多的浏览器,但是如果想要声明并调用浏览器则需要:)

    from selenium import webdriver
    
    browser = webdriver.Chrome()
    browser = webdriver.Firefox()
    browser = webdriver.Edge()
    browser = webdriver.PhantomJS()
    browser = webdriver.Safari()
  3. 访问页面

    from selenium import webdriver
    
    chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
    browser = webdriver.Chrome(chromedriver)
    browser.get('https://www.taobao.com')
    print(browser.page_source)
    browser.close()
    <!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" lang="zh-CN" class="ks-webkit537 ks-webkit ks-chrome68 ks-chrome"><head><script>/*! 2018-08-07 14:58:00 v8.5.7 */
    !function(e){function i(n){if(o[n])return o[n].exports;var r=o[n]={exports:{},id:n,loaded:!1};return e[n].call(r.exports,r,r.exports,i),r.loaded=!0,r.exports}var o={};return i.m=e,i.c=o,i.p="",i(0)}([function(e,i){"use strict";var o=window,n=document;!function(){var e=2,r="ali_analytics";if(o[r]&amp;&amp;o[r].ua&amp;&amp;e&lt;=o[r].ua.version)return void(i.info=o[r].ua);var t,a,d,s,c,u,h,l,m,b,f,v,p,w,g,x,z,O=o.navigator,k=O.appVersion,T=O&amp;&amp;O.userAgent||"",y=function(e){var i=0;return parseFloat(e.replace(/\./g,function(){return 0===i++?".":""}))},_=function(e,i){var o,n;i[o="trident"]=.1,(n=e.match(/Trident\/([\d.]*)/))&amp;&amp;n[1]&amp;&amp;(i[o]=y(n[1])),i.core=o},N=function(e){var i,o;return(i=e.match(/MSIE ([^;]*)|Trident.*; rv(?:\s|:)?([0-9.]+)/))&amp;&amp;(o=i[1]||i[2])?y(o):0},P=function(e){return e||"other"},M=function(e){function i(){for(var i=[["Windows NT 5.1","winXP"],["Windows NT 6.1","win7"],["Windows NT 6.0","winVista"],["Windows NT 6.2","win8"],["Windows NT 10.0","win10"],["iPad","ios"],["iPhone;","ios"],["iPod","ios"],["Macintosh","mac"],["Android","android"],["Ubuntu","ubuntu"],["Linux","linux"],["Windows NT 5.2","win2003"],["Windows NT 5.0","win2000"],["Windows","winOther"],["rhino","rhino"]],o=0,n=i.length;o&lt;n;++o)if(e.indexOf(i[o][0])!==-1)return i[o][1];return"other"}function r(e,i,n,r){var t,a=o.navigator.mimeTypes;try{for(t in a)if(a.hasOwnProperty(t)&amp;&amp;a[t][e]==i){if(void 0!==n&amp;&amp;r.test(a[t][n]))return!0;if(void 0===n)return!0}return!1}catch(e){return!1}}var t,a,d,s,c,u,h,l="",m=l,b=l,f=[6,9],v="{{version}}",p="&lt;!--[if IE "+v+"]&gt;&lt;s&gt;&lt;/s&gt;&lt;![endif]--&gt;",w=n&amp;&amp;n.createElement("div"),g=[],x={webkit:void 0,edge:void 0,trident:void 0,gecko:void 0,presto:void 0,chrome:void 0,safari:void 0,firefox:void 0,ie:void 0,ieMode:void 0,opera:void 0,mobile:void 0,core:void 0,shell:void 0,phantomjs:void 0,os:void 0,ipad:void 0,iphone:void 0,ipod:void 0,ios:void 0,android:void 0,nodejs:void 0,extraName:void 0,extraVersion:void 0};if(w&amp;&amp;w.getElementsByTagName&amp;&amp;(w.innerHTML=p.replace(v,""),g=w.getElementsByTagName("s")),g.length&gt;0){for(_(e,x),s=f[0],c=f[1];s&lt;=c;s++)if(w.innerHTML=p.replace(v,s),g.length&gt;0){x[b="ie"]=s;break}!x.ie&amp;&amp;(d=N(e))&amp;&amp;(x[b="ie"]=d)}else((a=e.match(/AppleWebKit\/*\s*([\d.]*)/i))||(a=e.match(/Safari\/([\d.]*)/)))&amp;&amp;a[1]?(x[m="webkit"]=y(a[1]),(a=e.match(/OPR\/(\d+\.\d+)/))&amp;&amp;a[1]?x[b="opera"]=y(a[1]):(a=e.match(/Chrome\/([\d.]*)/))&amp;&amp;a[1]?x[b="chrome"]=y(a[1]):(a=e.match(/\/([\d.]*) Safari/))&amp;&amp;a[1]?x[b="safari"]=y(a[1]):x.safari=x.webkit,(a=e.match(/Edge\/([\d.]*)/))&amp;&amp;a[1]&amp;&amp;(m=b="edge",x[m]=y(a[1])),/ Mobile\//.test(e)&amp;&amp;e.match(/iPad|iPod|iPhone/)?(x.mobile="apple",a=e.match(/OS ([^\s]*)/),a&amp;&amp;a[1]&amp;&amp;(x.ios=y(a[1].replace("_","."))),t="ios"。。。。。。。。。。。
    获得的结果
  4. 查找元素

    1. 单个元素

      #element
      from selenium import webdriver
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver)
      browser.get('https://www.taobao.com')
      input_first = browser.find_element_by_id('q')  # 寻找 id='q'的元素
      input_second = browser.find_element_by_css_selector('#q')  # 通过css选择器选择
      input_third = browser.find_element_by_xpath('//*[@id="q"]')  # 同上
      print(input_first, input_second, input_third)
      browser.close()
         #这里我们通过三种不同的方式去获取响应的元素,第一种是通过id的方式,第二个中是CSS选择器,第三种是xpath选择器,结果都是相同的。
      <selenium.webdriver.remote.webelement.WebElement (session="49795084aea9c702e52e3464a2e602bf", element="0.06918141330676386-1")> <selenium.webdriver.remote.webelement.WebElement (session="49795084aea9c702e52e3464a2e602bf", element="0.06918141330676386-1")> <selenium.webdriver.remote.webelement.WebElement (session="49795084aea9c702e52e3464a2e602bf", element="0.06918141330676386-1")>
      获得的结果
      另一种方式:
      from selenium import webdriver
      from selenium.webdriver.common.by import By
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver)
      browser.get('https://www.taobao.com')
      input_first = browser.find_element(By.ID, 'q')
      print(input_first)
      browser.close()
      另一种方式
      <selenium.webdriver.remote.webelement.WebElement (session="2706eb775a80e5eb5af01293caaf84e1", element="0.30861434960698597-1")>
      --->获得的结果

      查找单个元素的方法
      * find_element_by_name
      * find_element_by_xpath
      * find_element_by_link_text
      * find_element_by_partial_link_text
      * find_element_by_tag_name
      * find_element_by_class_name
      * find_element_by_css_selector

    2. 查找多个元素

      #elements
      from selenium import webdriver
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver)
      browser.get('https://www.taobao.com')
      lis = browser.find_elements_by_css_selector('.service-bd li')
      print(lis)
      browser.close()
      [<selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-1")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-2")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-3")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-4")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-5")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-6")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-7")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-8")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-9")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-10")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-11")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-12")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-13")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-14")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-15")>, <selenium.webdriver.remote.webelement.WebElement (session="1fd716bf2cf93a91caa11907d2ad67c7", element="0.7320173413952247-16")>]
      获得的结果
      另一种方式:
      from selenium import webdriver
      from selenium.webdriver.common.by import By
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver)
      browser.get('https://www.taobao.com')
      lis = browser.find_elements(By.CSS_SELECTOR, '.service-bd li')
      print(lis)
      browser.close()
      另一种方式
      [<selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-1")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-2")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-3")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-4")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-5")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-6")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-7")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-8")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-9")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-10")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-11")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-12")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-13")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-14")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-15")>, <selenium.webdriver.remote.webelement.WebElement (session="f8d8a0f755dde2eecb7de8970c44d9f5", element="0.5842041920354057-16")>]
      --->获得的结果

      一些其他的方式:

      find_elements_by_name
      find_elements_by_xpath
      find_elements_by_link_text
      find_elements_by_partial_link_text
      find_elements_by_tag_name
      find_elements_by_class_name
      find_elements_by_css_selector

  5.  元素交互操作

    先获取元素。
    对获取的元素调用交互方法

    from selenium import webdriver
    import time
    
    chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
    browser = webdriver.Chrome(chromedriver)
    browser.get('https://www.taobao.com')
    input = browser.find_element_by_id('q')  # 找到搜索框
    input.send_keys('iPhone')  # 输入 iPhone
    time.sleep(1)  # 等待一秒
    input.clear()  # 清空文本框
    input.send_keys('iPad')  # 输入ipad
    button = browser.find_element(By.CLASS_NAME,'btn-search')  # 找到搜索按钮
    button.click()  # 点击
    browser.close()

    更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement

  6. 交互动作(将动作附加到动作链中串行执行)

    from selenium import webdriver
    from selenium.webdriver import ActionChains
    
    chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
    browser = webdriver.Chrome(chromedriver) # 声明浏览器对象
    url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
    browser.get(url)
    browser.switch_to.frame('iframeResult')  #切换到‘iframeResult’框架
    source = browser.find_element_by_css_selector('#draggable')  # 选择元素
    target = browser.find_element_by_css_selector('#droppable')  # 选择元素 
    actions = ActionChains(browser)  # 声明一个动作链对象
    actions.drag_and_drop(source, target)  # 拖拽方法
    actions.perform()  # 执行动作

    更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.action_chains

  7.  执行JavaScript(执行一些交互动作时,可能一些动作没有提供API。)

    #把网页拉到最下面,并提示
    from selenium import webdriver
    
    chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
    browser = webdriver.Chrome(chromedriver) # 声明浏览器对象
     
    browser.get('https://www.zhihu.com/explore')
    browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
    browser.execute_script('alert("To Bottom")')
  8.  获取元素信息

    1. 获取属性:*.get_attribute(“class”) *

      from selenium import webdriver
      from selenium.webdriver import ActionChains
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver) # 声明浏览器对象
      url = 'https://www.zhihu.com/explore'
      browser.get(url)
      logo = browser.find_element_by_id('zh-top-link-logo')
      print(logo)
      print(logo.get_attribute('class'))
      browser.close()
      <selenium.webdriver.remote.webelement.WebElement (session="d5564ba9ec58a9a53015648f61a56bd7", element="0.46068206240103504-1")>
      zu-top-link-logo
      获得的结果
    2. 获取文本值:*text*

      from selenium import webdriver
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver) # 声明浏览器对象
      url = 'https://www.zhihu.com/explore'
      browser.get(url)
      input = browser.find_element_by_class_name('zu-top-add-question')
      print(input.text)
              #提问    
    3. 获取ID、位置、标签名、大小

      from selenium import webdriver
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver) # 声明浏览器对象
      url = 'https://www.zhihu.com/explore'
      browser.get(url)
      input = browser.find_element_by_class_name('zu-top-add-question')
      print(input.id)
      print(input.location)
      print(input.tag_name)
      print(input.size)
      0.0866191825002236-1
      {'x': 759, 'y': 7}
      button
      {'height': 32, 'width': 66}
      获得的结果
    4. Frame

      在很多网页中都是有Frame标签,所以我们爬取数据的时候就涉及到切入到frame中以及切出来的问题,通过下面的例子演示
      这里常用的是switch_to.from()和switch_to.parent_frame()
      父级的frame要查找子级的frame必须要切换到子frame,否则不能查找。 
      同理子frame也不能查找父frame的元素。
      import time
      from selenium import webdriver
      from selenium.common.exceptions import NoSuchElementException
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver)
      url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'  # 声明网址
      browser.get(url)  # get传入网址
      browser.switch_to.frame('iframeResult')  # 切换到子frame
      source = browser.find_element_by_css_selector('#draggable')  # 查找子frame中的元素
      print(source)
      try:
          logo = browser.find_element_by_class_name('logo')
      except NoSuchElementException:
          print('NO LOGO')
      browser.switch_to.parent_frame()  # 切换到父frame
      logo = browser.find_element_by_class_name('logo')
      print(logo)
      print(logo.text)
      <selenium.webdriver.remote.webelement.WebElement (session="7d3651ff77a4266e974a48387a562fe1", element="0.7541880746473664-1")>
      NO LOGO
      <selenium.webdriver.remote.webelement.WebElement (session="7d3651ff77a4266e974a48387a562fe1", element="0.04296955550550363-2")>
      RUNOOB.COM
      获得的结果
  9. 等待

    当使用了隐式等待执行测试的时候,如果WebDriver没有在 DOM中找到元素,将继续等待,超出设定时间后则抛出找不到元素的异常, 换句话说,当查找元素或元素并没有立即出现的时候,
    隐式等待将等待一段时间再查找DOM,默认的时间是0
    1. 隐式等待  

      到了一定的时间发现元素还没有加载,则继续等待我们指定的时间,如果超过了我们指定的时间还没有加载就会抛出异常,如果没有需要等待的时候就已经加载完毕就会立即执行
      from selenium import webdriver
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver)
      browser.implicitly_wait(10)  # 传入隐式等待的时间
      browser.get('https://www.zhihu.com/explore')
      input = browser.find_element_by_class_name('zu-top-add-question')
      print(input)
      <selenium.webdriver.remote.webelement.WebElement (session="7c232ca91150077a6d9ded5a653ebf6d", element="0.019205089543862464-1")>
      获得的结果
    2. 显式等待:* .WebDriverWait(对象,最长等待时间)*

      指定一个等待条件,并且指定一个最长等待时间,会在这个时间内进行判断是否满足等待条件,如果成立就会立即返回,如果不成立,就会一直等待,直到等待你指定的最长等待时间,
      如果还是不满足,就会抛出异常,如果满足了就会正常返回
      from selenium import webdriver
      from selenium.webdriver.common.by import By
      from selenium.webdriver.support.ui import WebDriverWait
      from selenium.webdriver.support import expected_conditions as EC
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver)
      browser.get('https://www.taobao.com/')
      wait = WebDriverWait(browser, 10)  # 声明一个等待对象
      # 判断元素是否加载
      input = wait.until(EC.presence_of_element_located((By.ID, 'q')))  # 传入等待条件
      # # 判断是否可点击的,一般用来判断是否为按钮
      button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))  
      print(input, button)
      <selenium.webdriver.remote.webelement.WebElement (session="ac4d3c5db3d4f3aae7b828ad648e87df", element="0.49320541507860316-1")> <selenium.webdriver.remote.webelement.WebElement (session="ac4d3c5db3d4f3aae7b828ad648e87df", element="0.49320541507860316-2")>
      获得的结果

      上述的例子中的条件:
      EC.presence_of_element_located()是确认元素是否已经出现了
      EC.element_to_be_clickable()是确认元素是否是可点击的

      常用的判断条件:
      title_is 标题是某内容
      title_contains 标题包含某内容
      presence_of_element_located 元素加载出,传入定位元组,如(By.ID, 'p')
      visibility_of_element_located 元素可见,传入定位元组
      visibility_of 可见,传入元素对象
      presence_of_all_elements_located 所有元素加载出
      text_to_be_present_in_element 某个元素文本包含某文字
      text_to_be_present_in_element_value 某个元素值包含某文字
      frame_to_be_available_and_switch_to_it frame加载并切换
      invisibility_of_element_located 元素不可见
      element_to_be_clickable 元素可点击
      staleness_of 判断一个元素是否仍在DOM,可判断页面是否已经刷新
      element_to_be_selected 元素可选择,传元素对象
      element_located_to_be_selected 元素可选择,传入定位元组
      element_selection_state_to_be 传入元素对象以及状态,相等返回True,否则返回False
      element_located_selection_state_to_be 传入定位元组以及状态,相等返回True,否则返回False
      alert_is_present 是否出现Alert

      更多操作参考:http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.support.expected_conditions

  10. 前进后退:*back();forward()*

    #打开谷歌浏览器,然后打开白队首页,在打开淘宝,知乎首页,返回淘宝页面等待一秒在回到知乎界面
    import time
    from selenium import webdriver
    
    chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
    browser = webdriver.Chrome(chromedriver)
    browser.get('https://www.baidu.com/')
    browser.get('https://www.taobao.com/')
    browser.get('https://www.zhihu.com/')
    browser.back()
    time.sleep(1)
    browser.forward()
    browser.close()
  11. Cookies:*get_cookies();delete_all_cookes();add_cookie()*

    from selenium import webdriver
    
    chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
    browser = webdriver.Chrome(chromedriver)
    browser.get('https://www.zhihu.com/explore')
    print(browser.get_cookies())
    browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})
    print(browser.get_cookies())
    browser.delete_all_cookies()
    print(browser.get_cookies())
    [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1533802053.671843, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '156dfd931a77f9586c0da07030f2df36'}, {'domain': '.zhihu.com', 'expiry': 1533802958, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1533801158'}, {'domain': '.zhihu.com', 'expiry': 1628409153.672149, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '5fdfbadd6fc347398cc287ebb19a383c|1533801155000|1533801155000'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'e58e3a1c1f41931b44a67c2b426c2f1e'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672288, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjI4MWJhNjU3YTRiNGFkMmE0OTkxNjk5MDMxZjc4NTU=|1533801155|61d044a51d2c7f0c8a99b9efcdec2f929a626003"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672375, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"OTQ3Mjc1NmI0ZWEyNDFiOTkwYjAxYmY5NmU5Y2YyZGY=|1533801155|e2f66995a8136044730315e95ab3230932d3e85a"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672453, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"NzRiMmNiMGQ2ZjYwNGIwM2I2Nzg0OTExNGUxZTJmZTA=|1533801155|9ad2a89322a8cfaf04770121c359a1de82636336"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1628409157.390474, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"ALDleMF-Bw6PTmu-31M1eULrY7jjkGvu9tA=|1533801158"'}, {'domain': '.zhihu.com', 'expiry': 1596873157, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': '084e48e3-598e-4d8a-85b4-4def087fc321'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.543124770.1533801158.1533801158.1533801158.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1549569158, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1533801158.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180809=1'}]
    [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1533802053.671843, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '156dfd931a77f9586c0da07030f2df36'}, {'domain': '.zhihu.com', 'expiry': 1533802958, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1533801158'}, {'domain': '.zhihu.com', 'expiry': 1628409153.672149, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': '5fdfbadd6fc347398cc287ebb19a383c|1533801155000|1533801155000'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'e58e3a1c1f41931b44a67c2b426c2f1e'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672288, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"MjI4MWJhNjU3YTRiNGFkMmE0OTkxNjk5MDMxZjc4NTU=|1533801155|61d044a51d2c7f0c8a99b9efcdec2f929a626003"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672375, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"OTQ3Mjc1NmI0ZWEyNDFiOTkwYjAxYmY5NmU5Y2YyZGY=|1533801155|e2f66995a8136044730315e95ab3230932d3e85a"'}, {'domain': '.zhihu.com', 'expiry': 1536393153.672453, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"NzRiMmNiMGQ2ZjYwNGIwM2I2Nzg0OTExNGUxZTJmZTA=|1533801155|9ad2a89322a8cfaf04770121c359a1de82636336"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1628409157.390474, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"ALDleMF-Bw6PTmu-31M1eULrY7jjkGvu9tA=|1533801158"'}, {'domain': '.zhihu.com', 'expiry': 1596873157, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': '084e48e3-598e-4d8a-85b4-4def087fc321'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.543124770.1533801158.1533801158.1533801158.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1549569158, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1533801158.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1596873158, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20180809=1'}, {'domain': 'www.zhihu.com', 'expiry': 2164521158, 'httpOnly': False, 'name': 'name', 'path': '/', 'secure': True, 'value': 'germey'}]
    []
    获得的结果
  12. 选项卡管理

    通过执行js命令实现新开选项卡window.open()
    不同的选项卡是存在列表里browser.window_handles
    通过browser.window_handles[0]就可以操作第一个选项卡
    import time
    from selenium import webdriver
    
    chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
    browser = webdriver.Chrome(chromedriver)
    browser.get('https://www.baidu.com')
    browser.execute_script('window.open()')  # 通过执行一个js语句来新建选项卡
    print(browser.window_handles)  # 返回所有的选项卡的引用
    browser.switch_to_window(browser.window_handles[1])
    browser.get('https://www.taobao.com')
    time.sleep(1)
    browser.switch_to_window(browser.window_handles[0])
    browser.get('https://python.org')
    ['CDwindow-B1E2119962846BA7AED735D128600D3E', 'CDwindow-B231BF356905D63DE59F86F644F77A8F']
    获得的结果
  13.  异常处理

    这里的异常比较复杂,官网的参考地址:
    http://selenium-python.readthedocs.io/api.html#module-selenium.common.exceptions
    这里只进行简单的演示,查找一个不存在的元素

    1. 实例,会报错

      from selenium import webdriver
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver)
      browser.get('https://www.baidu.com')
      browser.find_element_by_id('hello')
      NoSuchElementException                    Traceback (most recent call last)
      <ipython-input-15-b8bcc6bfca0b> in <module>()
            4 browser = webdriver.Chrome(chromedriver)
            5 browser.get('https://www.baidu.com')
      ----> 6 browser.find_element_by_id('hello')
      
      D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in find_element_by_id(self, id_)
          357             element = driver.find_element_by_id('foo')
          358         """
      --> 359         return self.find_element(by=By.ID, value=id_)
          360 
          361     def find_elements_by_id(self, id_):
      
      D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in find_element(self, by, value)
          964         return self.execute(Command.FIND_ELEMENT, {
          965             'using': by,
      --> 966             'value': value})['value']
          967 
          968     def find_elements(self, by=By.ID, value=None):
      
      D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in execute(self, driver_command, params)
          318         response = self.command_executor.execute(driver_command, params)
          319         if response:
      --> 320             self.error_handler.check_response(response)
          321             response['value'] = self._unwrap_value(
          322                 response.get('value', None))
      
      D:\Anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py in check_response(self, response)
          240                 alert_text = value['alert'].get('text')
          241             raise exception_class(message, screen, stacktrace, alert_text)
      --> 242         raise exception_class(message, screen, stacktrace)
          243 
          244     def _value_or_default(self, obj, key, default):
      
      NoSuchElementException: Message: no such element: Unable to locate element: {"method":"id","selector":"hello"}
        (Session info: chrome=68.0.3440.75)
        (Driver info: chromedriver=2.41.578737 (49da6702b16031c40d63e5618de03a32ff6c197e),platform=Windows NT 10.0.17134 x86_64)
      获得的结果
    2. 抛出异常
      from selenium import webdriver
      from selenium.common.exceptions import TimeoutException, NoSuchElementException
      
      chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
      browser = webdriver.Chrome(chromedriver)
      try:
          browser.get('https://www.baidu.com')
      except TimeoutException:
          print('Time Out')
      try:
          browser.find_element_by_id('hello')
      except NoSuchElementException:
          print('No Element')
      finally:
          browser.close()
      No Element
      获得的结果   
posted @ 2018-08-09 17:38  达尔文在思考  阅读(1802)  评论(0编辑  收藏  举报