安装
pip install selenium
开始
# coding=utf-8 from selenium import webdriver # 引用selenium库 import time # 实例化一个浏览器 driver = webdriver.Chrome() # 请求网页地址 driver.get("http://www.baidu.com") # 元素定位方法 driver.find_element_by_id("kw").send_keys("python") driver.find_element_by_id("su").click() # 退出浏览器 time.sleep(3) driver.quit()
报错
raceback (most recent call last): File "D:/python_work/18-20爬虫代码V3.1/爬虫代码V3.1/day06/code/01_try_selenium.py", line 6, in <module> driver = webdriver.Chrome() File "D:\Programs\Python\Python35\lib\site-packages\selenium\webdriver\chrome\webdriver.py", line 73, in __init__ self.service.start() File "D:\Programs\Python\Python35\lib\site-packages\selenium\webdriver\common\service.py", line 83, in start os.path.basename(self.path), self.start_error_message) selenium.common.exceptions.WebDriverException: Message: 'chromedriver' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/home
原因:没有安装chromedriver
chromedriver下载地址:https://chromedriver.storage.googleapis.com/index.html
解压缩
unzip chromedriver_linux64.zip
把文件夹移动到/usr/bin目录下
sudo mv chromedriver /usr/bin/
测试是否成功
继续运行代码,成功
selenium用法
# coding=utf-8 from selenium import webdriver # 引用selenium库 import time # 实例化一个浏览器 driver = webdriver.Chrome() # 请求网页地址 driver.get("http://www.baidu.com") # 最大化窗口 # driver.maximize_window() # 自定义窗口大小 driver.set_window_size("1920","1080") # 保存图片 driver.save_screenshot('./baidu.png') # 元素定位方法 driver.find_element_by_id("kw").send_keys("python") driver.find_element_by_id("su").click() # 请求 # 获取cookies cookies = driver.get_cookies() cookies = {i["name"]:i["value"] for i in cookies} print(cookies) # 获取网页elenments 即 html字符串 html = driver.page_source print(html) # 获取当前请求的url current_url = driver.current_url print(current_url) # 退出当前页面 driver.close() # 退出浏览器 driver.quit()
find_element
# coding=utf-8 from selenium import webdriver import time driver = webdriver.Chrome() driver.get("https://www.baidu.com/s?ie=UTF-8&wd=python") # find_element和find_elements区别 # find_element返回一个对象,如果没有会报错 # find_elements返回一个对象集组成的列表,如果没有返回一个空列表 # 获取内容块列表 ret = driver.find_elements_by_xpath("//div[@id='content_left']//div[contains(@class,'c-container')]") # by_xpath中获取属性需要使用get_attribute() for i in ret: # print(i.get_attribute('srcid')) pass # by_xpath中获取文本钥匙用text 注:不是text() # 获取"下一页>"文本所在的链接 print(driver.find_elements_by_link_text("下一页>")[0].get_attribute("href")) # 获取含有"下一页"文字的文本所在的链接 print(driver.find_element_by_partial_link_text("下一页").get_attribute("href")) # 根据表签名获取内容 ret1 = driver.find_elements_by_tag_name("h3") for i in ret1: # print(i.text) pass # 根据class获取内容 ret2 = driver.find_elements_by_class_name("c-container") print(ret2) # 获取css选择器 print(driver.find_element_by_class_name("table,img")) driver.quit()
frame操作
# coding=utf-8 from selenium import webdriver import time driver = webdriver.Chrome() driver.get("https://mail.qq.com") # 切换到iframe driver.switch_to.frame("login_frame") # 如果没有切换到iframe的话 会找不到元素而报错 driver.find_element_by_id("u").send_keys("123123123") time.sleep(3) driver.quit()