1. 网页的打开
from selenium import webdriver import time driver = webdriver.Chrome(executable_path=r"C:\Users\qq302\Desktop\chromedriver.exe") # 1.打开浏览器 driver.get('https://www.baidu.com/')
2.网页浏览器的关闭
from selenium import webdriver import time driver = webdriver.Chrome(executable_path=r"C:\Users\qq302\Desktop\chromedriver.exe") driver.get('https://www.baidu.com/') time.sleep(5) driver.close() # 关闭当前网页 driver.quit() # 关闭整个浏览器
3. 关于内容元素分析
from selenium import webdriver import time driver = webdriver.Chrome(executable_path=r"C:\Users\qq302\Desktop\chromedriver.exe") driver.get('https://www.baidu.com/') # 使用id进行查找 inputTag = driver.find_element_by_id('kw') # 使用class_name 进行查找 inputTag = driver.find_element_by_class_name('s_ipt') # 使用xpath 进行查找 inputTag = driver.find_element_by_xpath("//input[@id='kw']") # 使用css解析器进行查找 inputTag = driver.find_element_by_css_selector('.s_ipt') # 我们也可以使用By进行元素的查找 from selenium.webdriver.common.by import By inputTag = driver.find_element(By.ID, 'kw') inputTag.send_keys('python') # 如果需要进行文本分析的话 from lxml import etree html = etree.HTML(driver.page_source) print(html.xpath(''))
4. 操作表单元素
from selenium import webdriver import time # 第一种:send_keys() 发送文本 driver = webdriver.Chrome(executable_path=r"C:\Users\qq302\Desktop\chromedriver.exe") driver.get('https://www.baidu.com/') inputTag = driver.find_element_by_id('kw') inputTag.send_keys('python') # 向kw发送python time.sleep(5) inputTag.clear() # 进行输入的清除 # 第二种:进行点击操作 driver = webdriver.Chrome(executable_path=r"C:\Users\qq302\Desktop\chromedriver.exe") driver.get('https://www.baidu.com/') inputTag = driver.find_element_by_id('kw') inputTag.send_keys('python') # 向kw发送python time.sleep(5) inputTag.click() # 进行输入的清除 # 第三种进行下拉框的操作 from selenium.webdriver.support.ui import Select driver = webdriver.Chrome(executable_path=r"C:\Users\qq302\Desktop\chromedriver.exe") driver.get('https://www.baidu.com/') SelectBn = Select(driver.find_element_by_class_name('pf')) SelectBn.select_by_index(1) SelectBn.deselect_all()
5.行为链模仿鼠标进行移动,点击,双击操作
from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains import time from selenium.webdriver.support.ui import Select driver = webdriver.Chrome(executable_path=r"C:\Users\qq302\Desktop\chromedriver.exe") driver.get('https://www.baidu.com/') inputTag = driver.find_element_by_id('kw') submitBn = driver.find_element_by_id('su') actions = ActionChains(driver) actions.move_to_element(inputTag) # 将鼠标进行移动 inputTag.send_keys('python') # 发送python文本 actions.click(submitBn) # 对百度一下进行点击 actions.perform() # 进行action操作 # click_and_hold(element) 右键点击且不松手 # context_click(element) # 右键点击 # double_click(element) # 进行双击操作
6. 对cookie进行操作,包括获取当前cookie,删除cookie
from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains import time from selenium.webdriver.support.ui import Select driver = webdriver.Chrome(executable_path=r"C:\Users\qq302\Desktop\chromedriver.exe") driver.get('https://www.baidu.com/') for cookie in driver.get_cookies(): # 获得所有的cookie信息 print(cookie) driver.delete_cookie('BD_HOME') # 删除cookie print(driver.get_cookie('BD_HOME')) # 获得当前的cookie值 driver.delete_all_cookies() # 删除所有的cookie值
7. 隐式等待和显示等待
# 隐式等待 from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains import time from selenium.webdriver.support.ui import Select driver = webdriver.Chrome(executable_path=r"C:\Users\qq302\Desktop\chromedriver.exe") driver.get('https://www.baidu.com/') # 隐式等待 driver.implicitly_wait(10) # 最长等待10秒 driver.find_element_by_id('kw').send_keys('python') driver.find_element_by_id('su').click() # 显示等待, 如果在10秒内没有出现就爬出异常 # 显示等待 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC driver = webdriver.Chrome(executable_path=r"C:\Users\qq302\Desktop\chromedriver.exe") driver.get('https://www.baidu.com/') try: WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, 'kw')) ) finally: print('end')
8. 进行窗口的切换driver.switch_to.window(driver.window_handles[1])
from selenium import webdriver driver_path = r'C:\Users\qq302\Desktop\chromedriver.exe' driver = webdriver.Chrome(executable_path=driver_path) driver.get('https://www.baidu.com/') driver.execute_script("window.open('https://www.douban.com/')") # 打开豆瓣的网址 driver.execute_script('window.scrollTo(0, 1000000)')# 进行窗口的刷新移动 print(driver.window_handles) # 打印文件句柄 driver.switch_to.window(driver.window_handles[1]) # 切换driver的窗口 print(driver.current_url)
9.使用免费的代理ip打开网页
# 9 设置代理Ip from selenium import webdriver options = webdriver.ChromeOptions() options.add_argument('--proxy-server=http://61.189.242.243:55484') # 61.189.242.243表示ip, 55484表示端口号 driver = webdriver.Chrome(executable_path=r'C:\Users\qq302\Desktop\chromedriver.exe', options=options) driver.get(r'http://httpbin.org/ip') # 使用代理ip打开网页
10.webdriverElement补充操作
# 10. WebElement元素补充操作 from selenium import webdriver from selenium.webdriver.remote.webelement import WebElement driver_path = r'C:\Users\qq302\Desktop\chromedriver.exe' driver = webdriver.Chrome(executable_path=driver_path) driver.get('https://www.baidu.com/') submitBn = driver.find_element_by_id('su') print(submitBn.get_attribute('value')) # 获得当前的value属性 driver.save_screenshot('baidu.png') # 进行截图保存