介绍
selenium最初是一个自动化测试工具,而爬虫中使用它主要是为了解决requests无法直接执行JavaScript代码的问题
selenium本质是通过驱动浏览器,完全模拟浏览器的操作,比如跳转、输入、点击、下拉等,来拿到网页渲染之后的结果,可支持多种浏览器
from selenium import webdriver
browser=webdriver.Chrome()
browser=webdriver.Firefox()
browser=webdriver.PhantomJS()
browser=webdriver.Safari()
browser=webdriver.Edge()
安装
有界面的浏览器
pip3 install selenium
下载chromdriver.exe放到python安装路径的scripts目录中即可
国内镜像网站地址:http://npm.taobao.org/mirrors/chromedriver
最新的版本去官网找:https://sites.google.com/a/chromium.org/chromedriver/downloads
from selenium import webdriver
bro = webdriver.Chrome(executable_path='./chromedriver')
bro.get('https://www.baidu.com')
print(bro.page_source)
bro.close()
selenium3默认支持的webdriver是Firfox,而Firefox需要安装geckodriver
下载链接:https://github.com/mozilla/geckodriver/releases
selenium+chromedriver
无界面浏览器
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('window-size=1920x3000')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--hide-scrollbars')
chrome_options.add_argument('blink-settings=imagesEnabled=false')
chrome_options.add_argument('--headless')
bro=webdriver.Chrome(executable_path='./chromedriver',options=chrome_options)
bro.get("https://www.baidu.com")
print(bro.get_cookies())
bro.close()
开发者模式
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
bro = webdriver.Chrome(executable_path=chromedriver_path, options=options)
def login():
for res in setting.user:
try:
username = res[0]
password = res[1]
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
bro = webdriver.Chrome(executable_path=chromedriver_path, options=options)
bro.implicitly_wait(10)
bro.get('https://www.taobao.com/')
bro.find_element_by_css_selector('#J_SiteNavLogin > div.site-nav-menu-hd > div.site-nav-sign > a.h').click()
input_username = bro.find_element_by_css_selector('#fm-login-id')
input_username.send_keys(username)
input_password = bro.find_element_by_css_selector('#fm-login-password')
input_password.send_keys(password)
input("人工操作")
return bro
except Exception as e:
continue
window.navigator.webdriver为true的情况
window.navigator.webdriver为true
def selenium(js):
option = webdriver.ChromeOptions()
option.add_experimental_option('useAutomationExtension', False)
option.add_experimental_option('excludeSwitches', ['enable-automation'])
bro = webdriver.Chrome(executable_path='./chromedriver', options=option)
bro.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
bro.implicitly_wait(10)
bro.get('https://www.toutiao.com/')
time.sleep(5)
print(bro.page_source)
bro.execute_script(js)
input()
selenium高级用法
.send_keys('')
.click()
.clear()
selenium选择器选择
常用用法
bro=webdriver.Chrome(executable_path='./chromedriver')
bro.get("https://www.baidu.com")
bro.implicitly_wait(10)
input_search=bro.find_element_by_xpath('//*[@id="kw"]')
input_search=bro.find_element_by_css_selector('#kw')
input_search.send_keys("美女")
enter=bro.find_element_by_id('su')
time.sleep(3)
enter.click()
time.sleep(5)
bro.close()
模拟百度登录
import time
bro=webdriver.Chrome(executable_path='./chromedriver')
bro.get("https://www.baidu.com")
bro.implicitly_wait(10)
submit_button=bro.find_element_by_link_text('登录')
submit_button.click()
user_button=bro.find_element_by_id('TANGRAM__PSP_10__footerULoginBtn')
user_button.click()
user_input=bro.find_element_by_id('TANGRAM__PSP_10__userName')
user_input.send_keys("jeff@qq.com")
pwd_input=bro.find_element_by_id('TANGRAM__PSP_10__password')
pwd_input.send_keys("123456")
submit_input=bro.find_element_by_id('TANGRAM__PSP_10__submit')
submit_input.click()
time.sleep(5)
bro.close()
获取cookie
import time
bro=webdriver.Chrome(executable_path='./chromedriver')
bro.get("https://www.baidu.com")
print(bro.get_cookies())
bro.close()
获取标签属性、获取文本、标签ID、位置、大小
print(tag.get_attribute('src'))
print(tag.get_attribute('href'))
print(tag.text)
print(tag.id)
print(tag.location)
print(tag.tag_name)
print(tag.size)
显示等待、隐士等待
bro.implicitly_wait(10)
执行JS代码
简单使用
from selenium import webdriver
import time
bro=webdriver.Chrome(executable_path='./chromedriver')
bro.get("https://www.baidu.com")
bro.execute_script('alert(1)')
time.sleep(5)
bro.close()
js屏幕上下滚动
window.scrollTo(0,100)
window.scrollTo(0,500)
window.scrollTo(0,document.body.scrollHeight)
window.scrollTo(0,document.body.scrollHeight-500)
bro.execute_script('window.scrollTo(0,document.body.scrollHeight)')
from selenium import webdriver
bro=webdriver.Chrome(executable_path='./chromedriver')
bro.get("https://www.cnblogs.com")
bro.execute_script('window.scrollTo(0,document.body.scrollHeight)')
模拟浏览器前进后退
from selenium import webdriver
import time
bro=webdriver.Chrome(executable_path='./chromedriver')
bro.get("https://www.cnblogs.com")
time.sleep(1)
bro.get("https://www.baidu.com")
time.sleep(1)
bro.get("https://www.jd.com")
time.sleep(1)
bro.back()
time.sleep(1)
bro.forward()
选项卡管理(新窗口跳转)
原理:都是js在操作,执行Js代码
from selenium import webdriver
import time
browser=webdriver.Chrome(executable_path='./chromedriver')
browser.get('https://www.baidu.com')
browser.execute_script('window.open()')
print(browser.window_handles)
browser.switch_to_window(browser.window_handles[1])
browser.get('https://www.taobao.com')
time.sleep(2)
browser.switch_to_window(browser.window_handles[0])
browser.get('https://www.sina.com.cn')
异常处理
from selenium import webdriver
try:
browser=webdriver.Chrome(executable_path='./chromedriver')
browser.get('http://www.baidu.com')
browser.find_element_by_id("xxx")
except Exception as e:
print(e)
finally:
browser.close()
模拟键盘操作
from selenium.webdriver.common.keys import Keys
input_search.send_keys(Keys.ENTER)
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
bro=webdriver.Chrome(executable_path='./chromedriver')
bro.get("https://www.baidu.com")
bro.implicitly_wait(10)
input_search=bro.find_element_by_css_selector('#kw')
input_search.send_keys("美女")
input_search.send_keys(Keys.ENTER)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?