基本使用
复制 | from selenium import webdriver |
| from selenium.webdriver.common.by import By |
| from selenium.webdriver.common.keys import Keys |
| from selenium.webdriver.support import expected_conditions |
| from selenium.webdriver.support.wait import WebDriverWait |
| |
| |
| browser = webdriver.Chrome() |
| try: |
| browser.get('https://www.baidu.com') |
| input = browser.find_element_by_id('kw') |
| input.send_keys('Python') |
| input.send_keys(Keys.ENTER) |
| wait = WebDriverWait(browser, 10) |
| wait.until( |
| expected_conditions.presence_of_element_located( |
| |
| |
| (By.ID, 'content_left') |
| ) |
| ) |
| print( |
| browser.current_url, |
| browser.get_cookies(), |
| browser.page_source, |
| sep='\n' |
| ) |
| |
| finally: |
| browser.close() |
声明浏览器对象
复制 | from selenium import webdriver |
| |
| |
| browser1 = webdriver.Chrome() |
| browser2 = webdriver.Firefox() |
| browser3 = webdriver.Edge() |
| browser4 = webdriver.PhantomJS() |
| browser5 = webdriver.Safari() |
| |
| |
访问页面
复制 | from selenium import webdriver |
| |
| browser = webdriver.Chrome() |
| browser.get('https://www.taobao.com') |
| print(browser.page_source) |
| browser.close() |
查找节点
拿淘宝网为例
复制 | from selenium import webdriver |
| |
| browser = webdriver.Chrome() |
| browser.get('https://www.taobao.com') |
| |
| input_first = browser.find_element_by_id('q') |
| input_second = browser.find_element_by_css_selector('#q') |
| input_third = browser.find_element_by_xpath('//*[@id="q"]') |
| print( |
| 'ID方式查找节点:\t', input_first, |
| 'CSS选择器方式查找节点:\t', input_second, |
| 'XPath方式查找节点:\t', input_third, |
| sep='\n' |
| ) |
| browser.close() |
复制 | |
| ID方式查找节点: |
| <selenium.webdriver.remote.webelement.WebElement (session="1ec980e4cd9be81c212a1b2285039dd9", element="0.49282688108570993-1")> |
| CSS选择器方式查找节点: |
| <selenium.webdriver.remote.webelement.WebElement (session="1ec980e4cd9be81c212a1b2285039dd9", element="0.49282688108570993-1")> |
| XPath方式查找节点: |
| <selenium.webdriver.remote.webelement.WebElement (session="1ec980e4cd9be81c212a1b2285039dd9", element="0.49282688108570993-1")> |
| |
获取结点的方法
复制 | |
| find_element_by_id |
| find_element_by_name |
| find_element_by_xpath |
| find_element_by_link_text |
| find_element_by_partial_link_text |
| find_element_by_tag_name |
| find_element_by_class_name |
| find_element_by_css_selector |
find_element()方法查找单个节点
复制 | from selenium import webdriver |
| from selenium.webdriver.common.by import By |
| |
| browser = webdriver.Chrome() |
| browser.get('https://www.taobao.com') |
| input_first = browser.find_element(By.ID, 'q') |
| print(input_first) |
| browser.close() |
| |
| |
| |
| |
find_elements()方法查找多个节点
复制 | from selenium import webdriver |
| from selenium.webdriver.common.by import By |
| |
| browser = webdriver.Chrome() |
| browser.get('https://www.taobao.com') |
| |
| list1 = browser.find_elements_by_css_selector('.service-bd li') |
| list2 = browser.find_elements(By.CSS_SELECTOR, '.service-bd li') |
| |
| print( |
| 'find_elements_by_css_selector()方法:', list1, |
| 'find_elements()方法:', list2, |
| sep='\n' |
| ) |
| browser.close() |
复制 | |
| find_elements_by_css_selector()方法: |
| [<selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-1")>, |
| ...... |
| <selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-16")>] |
| find_elements()方法: |
| [<selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-1")>, |
| ...... |
| <selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-16")>] |
复制 | find_element() |
| 和 |
| find_elements()两个例子输出结果一样 |
节点交互
复制 | from selenium import webdriver |
| import time |
| |
| browser = webdriver.Chrome() |
| browser.get('https://www.taobao.com') |
| |
| input_ = browser.find_element_by_id('q') |
| input_.send_keys('iPhone') |
| time.sleep(1) |
| input_.clear() |
| input_.send_keys('iPad') |
| button = browser.find_element_by_class_name('tb-bg') |
| button.click() |
Selenium驱动浏览器来执行一些操作
动作链
复制 | from selenium import webdriver |
| from selenium.webdriver import ActionChains |
| |
| browser = webdriver.Chrome() |
| browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') |
| browser.switch_to.frame('iframeResult') |
| |
| source = browser.find_element_by_css_selector('#draggable') |
| target = browser.find_element_by_css_selector('#droppable') |
| |
| actions = ActionChains(browser) |
| actions.drag_and_drop(source, target) |
| actions.perform() |
-
拖拽前
-
拖拽后
执行JavaScript
利用 execute_script()方法将进度条下拉到最底部,然后弹出 alert提示框。
复制 | from selenium import webdriver |
| |
| browser = webdriver.Chrome() |
| browser.get('http://www.zhihu.com/explore') |
| browser.execute_script('window.scrollTo(0, document.body.scrollHeight)') |
获取节点信息
获取属性
get_attribute()方法可以获取属性
复制 | from selenium import webdriver |
| |
| browser = webdriver.Chrome() |
| browser.get('http://www.zhihu.com/explore') |
| |
| logo = browser.find_element_by_id('zh-top-inner') |
| print( |
| logo, |
| logo.get_attribute('class'), |
| sep='\n' |
| ) |
| browser.close() |
复制 | |
| <selenium.webdriver.remote.webelement.WebElement (session="7f325513a2f34aaa95612698d78817e6", element="0.5056570582847388-1")> |
| zg-wrap modal-shifting clearfix |
获取文本值
text属性可以获取文本值
复制 | 相当于 Beautiful Soup 的 get_text()方法、 pyquery 的 text()方法 |
复制 | from selenium import webdriver |
| |
| browser = webdriver.Chrome() |
| browser.get('http://www.zhihu.com/explore') |
| |
| zhihu = browser.find_element_by_class_name('zu-top-link-logo') |
| print(zhihu.text) |
| browser.close() |
复制
获取id、位置、标签名和大小
可以通过id、location、tag_name和size属性,来获取
复制 | from selenium import webdriver |
| |
| browser = webdriver.Chrome() |
| browser.get('http://www.zhihu.com/explore') |
| |
| input_ = browser.find_element_by_class_name('zu-top-add-question') |
| print( |
| input_.id, |
| input_.location, |
| input_.tag_name, |
| input_.size, |
| sep='\n' |
| ) |
| browser.close() |
复制 | |
| 0.9009302916784063-1 |
| {'x': 849, 'y': 7} |
| button |
| {'height': 32, 'width': 66} |
切换Frame
复制 | from selenium import webdriver |
| from selenium.common.exceptions import NoSuchElementException |
| browser = webdriver.Chrome() |
| url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' |
| browser.get(url) |
| |
| browser.switch_to.frame('iframeResult') |
| try: |
| logo = browser.find_element_by_class_name('logo') |
| except NoSuchElementException: |
| print('NO LOGO') |
| browser.switch_to.parent_frame() |
| logo_ = browser.find_element_by_class_name('logo') |
| print(logo_) |
| print(logo_.text) |
复制 | |
| NO LOGO |
| <selenium.webdriver.remote.webelement.WebElement (session="d24c9d62b8c5882adec32f3ed55b5d7b", element="0.9706135395535092-2")> |
| RUNOOB.COM |
延时等待
隐式等待
复制 | from selenium import webdriver |
| |
| browser = webdriver.Chrome() |
| browser.implicitly_wait(10) |
| browser.get('https://www.zhihu.com/explore') |
| input = browser.find_element_by_class_name('zu-top-add-question') |
| print(input) |
显式等待
复制 | from selenium import webdriver |
| from selenium.webdriver.common.by import By |
| from selenium.webdriver.support.ui import WebDriverWait |
| from selenium.webdriver.support import expected_conditions as EC |
| |
| browser = webdriver.Chrome() |
| browser.get('https://www.taobao.com/') |
| wait = WebDriverWait(browser, 10) |
| input_ = wait.until(EC.presence_of_element_located((By.ID, 'q'))) |
| button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search'))) |
| |
| |
| print(input_, button, sep='\n') |
复制 | |
| <selenium.webdriver.remote.webelement.WebElement (session="497bc36663dd6ed41d7c59bd6a51982f", element="0.8346683456577526-1")> |
| <selenium.webdriver.remote.webelement.WebElement (session="497bc36663dd6ed41d7c59bd6a51982f", element="0.8346683456577526-2")> |
显示等待的方法有很多,如下
复制 | class title_is(object): |
| """An expectation for checking the title of a page. |
| title is the expected title, which must be an exact match |
| returns True if the title matches, false otherwise.""" |
| |
| |
| class title_contains(object): |
| """ An expectation for checking that the title contains a case-sensitive |
| substring. title is the fragment of title expected |
| returns True when the title matches, False otherwise |
| """ |
| |
| |
| class presence_of_element_located(object): |
| """ An expectation for checking that an element is present on the DOM |
| of a page. This does not necessarily mean that the element is visible. |
| locator - used to find the element |
| returns the WebElement once it is located |
| """ |
| |
| |
| class url_contains(object): |
| """ An expectation for checking that the current url contains a |
| case-sensitive substring. |
| url is the fragment of url expected, |
| returns True when the url matches, False otherwise |
| """ |
| |
| |
| class url_matches(object): |
| """An expectation for checking the current url. |
| pattern is the expected pattern, which must be an exact match |
| returns True if the url matches, false otherwise.""" |
| |
| |
| class url_to_be(object): |
| """An expectation for checking the current url. |
| url is the expected url, which must be an exact match |
| returns True if the url matches, false otherwise.""" |
| |
| |
| class url_changes(object): |
| """An expectation for checking the current url. |
| url is the expected url, which must not be an exact match |
| returns True if the url is different, false otherwise.""" |
| |
| |
| class visibility_of_element_located(object): |
| """ An expectation for checking that an element is present on the DOM of a |
| page and visible. Visibility means that the element is not only displayed |
| but also has a height and width that is greater than 0. |
| locator - used to find the element |
| returns the WebElement once it is located and visible |
| """ |
| |
| |
| class visibility_of(object): |
| """ An expectation for checking that an element, known to be present on the |
| DOM of a page, is visible. Visibility means that the element is not only |
| displayed but also has a height and width that is greater than 0. |
| element is the WebElement |
| returns the (same) WebElement once it is visible |
| """ |
| |
| |
| class presence_of_all_elements_located(object): |
| """ An expectation for checking that there is at least one element present |
| on a web page. |
| locator is used to find the element |
| returns the list of WebElements once they are located |
| """ |
| |
| |
| class visibility_of_any_elements_located(object): |
| """ An expectation for checking that there is at least one element visible |
| on a web page. |
| locator is used to find the element |
| returns the list of WebElements once they are located |
| """ |
| |
| |
| class visibility_of_all_elements_located(object): |
| """ An expectation for checking that all elements are present on the DOM of a |
| page and visible. Visibility means that the elements are not only displayed |
| but also has a height and width that is greater than 0. |
| locator - used to find the elements |
| returns the list of WebElements once they are located and visible |
| """ |
| |
| |
| class text_to_be_present_in_element(object): |
| """ An expectation for checking if the given text is present in the |
| specified element. |
| locator, text |
| """ |
| |
| |
| class text_to_be_present_in_element_value(object): |
| """ |
| An expectation for checking if the given text is present in the element's |
| locator, text |
| """ |
| |
| |
| class frame_to_be_available_and_switch_to_it(object): |
| """ An expectation for checking whether the given frame is available to |
| switch to. If the frame is available it switches the given driver to the |
| specified frame. |
| """ |
| |
| |
| class invisibility_of_element_located(object): |
| """ An Expectation for checking that an element is either invisible or not |
| present on the DOM. |
| |
| locator used to find the element |
| """ |
| |
| |
| class invisibility_of_element(invisibility_of_element_located): |
| """ An Expectation for checking that an element is either invisible or not |
| present on the DOM. |
| |
| element is either a locator (text) or an WebElement |
| """ |
| |
| |
| class element_to_be_clickable(object): |
| """ An Expectation for checking an element is visible and enabled such that |
| you can click it.""" |
| |
| |
| class staleness_of(object): |
| """ Wait until an element is no longer attached to the DOM. |
| element is the element to wait for. |
| returns False if the element is still attached to the DOM, true otherwise. |
| """ |
| |
| |
| class element_to_be_selected(object): |
| """ An expectation for checking the selection is selected. |
| element is WebElement object |
| """ |
| |
| |
| class element_located_to_be_selected(object): |
| """An expectation for the element to be located is selected. |
| locator is a tuple of (by, path)""" |
| |
| |
| class element_selection_state_to_be(object): |
| """ An expectation for checking if the given element is selected. |
| element is WebElement object |
| is_selected is a Boolean." |
| """ |
| |
| |
| class element_located_selection_state_to_be(object): |
| """ An expectation to locate an element and check if the selection state |
| specified is in that state. |
| locator is a tuple of (by, path) |
| is_selected is a boolean |
| """ |
| |
| |
| class number_of_windows_to_be(object): |
| """ An expectation for the number of windows to be a certain value.""" |
| |
| |
| class new_window_is_opened(object): |
| """ An expectation that a new window will be opened and have the number of |
| windows handles increase""" |
| |
| |
| class alert_is_present(object): |
| """ Expect an alert to be present.""" |
| |
| |
| def _find_element(driver, by): |
| """Looks up an element. Logs and re-raises ``WebDriverException`` |
| if thrown.""" |
前进和返回
back()方法,后退到上一个以页面。forward()方法,前进到下一个页面
复制 | import time |
| from selenium import webdriver |
| |
| browser = webdriver.Chrome() |
| browser.get('https://www.baidu.com/') |
| time.sleep(1) |
| browser.get('https://www.taobao.com/') |
| time.sleep(1) |
| browser.get('https://www.zhihu.com/') |
| time.sleep(1) |
| browser.back() |
| time.sleep(1) |
| browser.forward() |
| time.sleep(1) |
| browser.close() |
Cookies
复制 | from selenium import webdriver |
| |
| browser = webdriver.Chrome() |
| browser.get('https://www.zhihu.com/explore') |
| print(browser.get_cookies()) |
| browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'}) |
| print(browser.get_cookies()) |
| browser.delete_all_cookies() |
| print(browser.get_cookies()) |
| browser.close() |
复制 | |
| [{'domain': '.zhihu.com', 'expiry': 1579115127, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1563347127.1.1.utm......] |
| [{'domain': 'www.zhihu.com', 'expiry': 2194067127, 'httpOnly': False, 'name': 'name', 'path': '/', 'secure': True, 'value': 'germey'}, {'domain': '.zhihu.com......] |
| [] |
选项卡管理
复制 | import time |
| from selenium import webdriver |
| |
| browser = webdriver.Chrome() |
| browser.get('https://www.baidu.com') |
| browser.execute_script('window.open()') |
| |
| |
| print(browser.window_handles) |
| |
| |
| browser.switch_to.window(browser.window_handles[1]) |
| |
| |
| browser.get('https://www.taobao.com') |
| time.sleep(1) |
| browser.switch_to.window(browser.window_handles[0]) |
| browser.get('https://zhihu.com') |
| browser.close() |
复制 | |
| ['CDwindow-BBF992DA636EC22831C022F29A7F976A', 'CDwindow-37A0508493A023D6BC1393D11D5F4D9F'] |
异常处理
复制 | from selenium import webdriver |
| from selenium.common.exceptions import TimeoutException, NoSuchElementException |
| |
| browser = webdriver.Chrome() |
| try: |
| browser.get('https://www.baidu.com') |
| except TimeoutException: |
| print('Time Out') |
| try: |
| browser.find_element_by_id('hello') |
| except NoSuchElementException: |
| print('No Element') |
| finally: |
| browser.close() |
复制
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· winform 绘制太阳,地球,月球 运作规律
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 上周热点回顾(3.3-3.9)