爬虫20-浏览器自动运行简单方法
from selenium import webdriver from selenium.webdriver.common.by import By #下载后的驱动放到火狐浏览器的根目录 #设置环境变量后就可以引用 driver=webdriver.Firefox() driver.get("https://www.baidu.com/") inputTag=driver.find_element_by_id('kw') # inputTag=driver.find_element_by#_class_name('s_ipt') # inputTag=driver.find_element_by_name('wd') inputTag = driver.find_elements(By.CSS_SELECTOR,".quickdelete-wrap > input")[0] inputTag=driver.find_elements_by_css_selector(".quickdelete-wrap>input") # inputTag=driver.find_element_by_xpath("//input[@id='kw']") inputTag.send_keys('python') submintT=driver.find_element_by_id('su') submintT.click() # 1. 如果只是想要解析网页中的数据,那么推荐将网页源代码扔给lxml来解析。因为lxml底层使用的是C语言,所以解析效率会更高。 # 2. 如果是想要对元素进行一些操作,比如给一个文本框输入值,或者是点击某个按钮,那么就必须使用selenium给我们提供的查找元素的方法。
2.操作表单
from selenium import webdriver import time from selenium.webdriver.support.ui import Select # 1.操作输入框 driver = webdriver.Chrome() driver.get('https://www.baidu.com/') inputTag = driver.find_element_by_id('kw') inputTag.send_keys('python') time.sleep(3) # inputTag.clear() #2.操作checkbox driver = webdriver.Chrome() driver.get('https://www.douban.com/') rememberBtn = driver.find_element_by_name('remember') rememberBtn.click() # 3.操作select标签: driver = webdriver.Chrome() driver.get('http://www.dobai.cn/') selectBtn = Select(driver.find_element_by_name('jumpMenu')) # selectBtn.select_by_index(1) # selectBtn.select_by_value("http://m.95xiu.com/") selectBtn.select_by_visible_text("95秀客户端")
3.行为链
from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains driver=webdriver.Firefox() driver.get("https://www.baidu.com") inputTag = driver.find_element_by_id('kw') submitBtn = driver.find_element_by_id('su') actions = ActionChains(driver) actions.move_to_element(inputTag) actions.send_keys_to_element(inputTag,'python') actions.move_to_element(submitBtn) actions.click(submitBtn) actions.perform()