Selenium（2）

一、安装Selenium

pip install selenium

二、八大元素定位

要想操作 Web 界面上的元素，首先要定位到该元素，Selenium 提供了定位元素的 API,这些方法都被定义在 WebDriver 类中，这些方法都以 find 开头。一共有如下八种：

from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
# 在selenium.webdriver.common.by中各定位方法对应的名称如下
# ID = "id"
# XPATH = "xpath"
# LINK_TEXT = "link text"
# PARTIAL_LINK_TEXT = "partial link text"
# NAME = "name"
# TAG_NAME = "tag name"
# CLASS_NAME = "class name"
# CSS_SELECTOR = "css selector"

class TestCase():

    def __init__(self):
        self.driver = webdriver.Chrome()
        # 打开Chrome浏览器
        self.driver.implicitly_wait(10)
        # 隐性等待10秒
        self.driver.get("https://www.baidu.com/")
        # 访问url

    def test_id(self):
        #id是唯一的
        self.driver.find_element(By.ID, "kw").send_keys("id")
        self.driver.find_element(By.ID, "su").click()
        #点击“百度一下”按钮
        sleep(5)
        self.driver.quit()

    def test_name(self):
        #name不是唯一的，driver.find_element返回第一个元素，driver.find_elements返回一个集合
        self.driver.find_element(By.NAME, "wd").send_keys("name")
        self.driver.find_element(By.ID, "su").click()
        # 点击“百度一下”按钮
        sleep(5)
        self.driver.quit()

    def test_class_name(self):
        self.driver.find_element(By.CLASS_NAME,"s_ipt").send_keys("class_name")
        self.driver.find_element(By.ID, "su").click()
        # 点击“百度一下”按钮
        sleep(5)
        self.driver.quit()

    def test_link_text(self):
        #超链接文本
        self.driver.find_element(By.LINK_TEXT,"新闻").click()
        sleep(5)
        self.driver.quit()

    def test_partial_link_text(self):
        #输入部分超链接文本即可定位到整个超链接
        self.driver.find_element(By.PARTIAL_LINK_TEXT,"新").click()
        sleep(5)
        self.driver.quit()

if __name__ == '__main__':
    case=TestCase()

1.id

通过元素的id属性来进行元素的获取，一般id是不会重复的。类似于人的身份证号码。

case.test_id()

2.name

通过元素的name属性来进行元素的获取，一般有可能会重名。类似于人的姓名。

case.test_name()

3.class name

通过元素的class属性来进行元素的获取，不特别推荐用此方法定位，因为class值可能会特别长，所以在读代码时不会特别友好。

case.test_class_name()

4.link text

通过超链接的文本来进行定位，只能用于a标签的元素进行定位。

case.test_link_text()

5.partial link text

与link text一样通过超链接文本来定位，只是说通过模糊查找的方式来实现元素的定位，类似于sql中like %s%，同样也只能用于a标签。

case.test_partial_link_text()

6.tag name

通过元素的标签名称来进行元素的获取，一定会重名，一般在自动化中几乎不用，在爬虫领域用的比较多。

from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
 
class TestCase():
 
    def __init__(self):
        self.driver = webdriver.Chrome()
        # 打开Chrome浏览器
        self.driver.implicitly_wait(10)
        # 隐性等待10秒
        self.driver.get("https://www.taobao.com/")
        # 访问url
 
    def test_tag_name(self):
        #页面中有多个“input"标签，默认返回第一个。使用driver.find_elements可以返回一个集合
        self.driver.find_element(By.TAG_NAME,"input").send_keys("tag_name")
        self.driver.find_element(By.XPATH, '//*[@id="J_TSearchForm"]/div[1]/button').click()
        #点击“搜索”按钮
        sleep(5)
        self.driver.quit()
 
if __name__ == '__main__':
    case=TestCase()
    case.test_tag_name()

7.css selector

定位界的万金油，定位核心是基于class属性来进行定位的。

from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
 
driver=webdriver.Chrome()
#打开Chrome浏览器
driver.implicitly_wait(10)
#隐性等待10秒
driver.get("https://www.baidu.com/")
#访问url

css通过绝对路径定位

绝对路径：从根标签(html)一级一级找到目标标签

上下级之间符号：>或空格

driver.find_element(By.CSS_SELECTOR,"html>body>div>div>div>div>div>form>span>input").send_keys("css通过绝对路径定位")
driver.find_element(By.CSS_SELECTOR,"html body div div div div div form span input").send_keys("css通过绝对路径定位")
sleep(5)

css通过id定位

id选择器符号：#

driver.find_element(By.CSS_SELECTOR,"#kw").send_keys("css通过id定位")
sleep(5)

css通过class定位

class选择器符号：.

driver.find_element(By.CSS_SELECTOR,".s_ipt").send_keys("css通过class定位")
sleep(5)

css通过标签定位

直接输入标签的值即可，一般情况会跟id/class/属性一起组合来进行定位

driver.find_element(By.CSS_SELECTOR,"input#kw").send_keys("css通过标签定位")
sleep(5)

css通过一个(多个)属性定位

[属性名='属性值']

driver.find_element(By.CSS_SELECTOR,"[maxlength='255']").send_keys("css通过一个(多个)属性值定位")
driver.find_element(By.CSS_SELECTOR,"[name='wd'][maxlength='255']").send_keys("css通过一个(多个)属性值定位")
sleep(5)

css通过部分属性值定位

[属性名='属性值']

匹配符号：* 包含某个字符 ^以什么开头 $以什么结尾

driver.find_element(By.CSS_SELECTOR,"[autocomplete*='f']").send_keys("css通过部分属性值定位")
driver.find_element(By.CSS_SELECTOR,"[autocomplete$='ff']").send_keys("css通过部分属性值定位")
driver.find_element(By.CSS_SELECTOR,"[autocomplete^='o']").send_keys("css通过部分属性值定位")
sleep(5)

css通过层级定位

一般情况会跟id/class/属性一起组合来进行定位

上下级符号：>或空格

driver.find_element(By.CSS_SELECTOR,"form>span>input").send_keys("css通过层级定位")
driver.find_element(By.CSS_SELECTOR,"form>span>input#kw").send_keys("css通过层级定位")
driver.find_element(By.CSS_SELECTOR,"form>span>input.s_ipt").send_keys("css通过层级定位")

css通过兄弟节点定位

兄弟节点：同一父级标签，下面有多个相同标签的元素，那么这些元素就是兄弟节点

定位第n个节点：nth-child(n)

定位倒数第n个节点：nth-last-child(n)

driver.find_element(By.CSS_SELECTOR,"div#s-top-left>a:nth-child(1)").click()  #css通过兄弟节点定位
driver.find_element(By.CSS_SELECTOR,"div#s-top-left>a:nth-last-child(2)").click()  #css通过兄弟节点定位
sleep(5)

8.xpath

定位界万金油,比css更常用，定位核心是基于树状结构来进行定位。

from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
 
driver=webdriver.Chrome()
#打开浏览器
driver.implicitly_wait(10)
#隐性等待10秒
driver.get("https://www.baidu.com/")
#加载url

绝对路径：使用/

/html/body/div/div/div...... #实际工作中不会使用

相对路径：使用//

相对路径：//form/span/input #定位所有form标签下有span标签下有input标签的元素

driver.find_element(By.XPATH,"//form/span/input").send_keys("相对路径")
sleep(5)

相对路径+索引定位：//form/span[1]/input #定位form标签下第1个span标签下的input标签下的元素

driver.find_element(By.XPATH,"//form/span[1]/input").send_keys("相对路径+索引定位")
sleep(5)

相对路径+属性定位：//input[@autocomplete='off'] #定位带有autocomplete='off'属性的input标签下的元素

driver.find_element(By.XPATH,"//input[@autocomplete='off']").send_keys("相对路径+属性定位")
sleep(5)

相对路径+属性组合：

and：//input[@name='wd' and @autocomplete='off']

or：//input[@name='wd' or @autocomplete='off']

driver.find_element(By.XPATH,"//input[@name='wd' and @autocomplete='off']").send_keys("相对路径+属性组合")
driver.find_element(By.XPATH,"//input[@name='wd' or @autocomplete='off']").send_keys("相对路径+属性组合")
sleep(5)

相对路径+通配符*：//*[@*='wd' or @*='off'] #标签名、属性名可以用通配符代替

driver.find_element(By.XPATH,"//*[@*='wd' or @*='off']").send_keys("相对路径+通配符*")
sleep(5)

以上所有的方式都可以组合使用：//form[@*='form' and @name='f']/span[1]/input

driver.find_element(By.XPATH,"//form[@*='form' and @name='f']/span[1]/input").send_keys("相对路径+组合使用")
sleep(5)

动态元素定位
相对路径+部分属性值定位：

属性值以of开头：//input[starts-with(@autocomplete,'of')] #匹配以of开头

属性值以ff结尾：//input[substring(@autocomplete,2)='ff'] #匹配第2位到最后一位

属性值中间包含f：//input[contains(@autocomplete,'f')] #匹配带有'f'的

driver.find_element(By.XPATH,"//input[starts-with(@autocomplete,'of')]").send_keys("相对路径+部分属性值定位")
driver.find_element(By.XPATH,"//input[substring(@autocomplete,2)='ff']").send_keys("相对路径+部分属性值定位")
driver.find_element(By.XPATH,"//input[contains(@autocomplete,'f')]").send_keys("相对路径+部分属性值定位")

问题：登录网易邮箱，iframe标签下的id每次网页刷新都会变，如何定位这个动态变化的元素呢？

代码：

from selenium import webdriver
from selenium.webdriver.common.by import By
import time
 
url="https://www.126.com/"
 
#打开浏览器
driver=webdriver.Chrome()
#隐性等待10秒
driver.implicitly_wait(10)
#加载网页
driver.get(url)
#定位子框架（一般遇到ifram标签代表这是一个子框架，如果要定位的元素在这个子框架内，则需要先定位到这个子框架）
driver.switch_to.frame(driver.find_element(By.XPATH,"//iframe[starts-with(@id,'x-URS-iframe')]"))  #starts-with匹配以x-URS-iframe开头的id属性值
#输入邮箱名
driver.find_element(By.NAME,"email").send_keys("crror")
time.sleep(5)

文本定位

相对路径+文本定位：

#link_text，partial_link_text只能用于超链接

xpath文本定位：//h2[text()='帐号登录']

xpath文本部分定位：//h2[contains(text(),'号')]

from selenium import webdriver
from selenium.webdriver.common.by import By
import time
 
url="https://www.126.com/"
 
driver=webdriver.Chrome()  #打开浏览器
driver.implicitly_wait(10)  #隐性等待10秒
driver.get(url)  #加载url
# value=driver.find_element(By.XPATH,"//h2[text()='帐号登录']").get_attribute("class")  #xpath文本定位，获取h2标签下class属性的值
value=driver.find_element(By.XPATH,"//h2[contains(text(),'号')]").get_attribute("class")  #xpath文本部分定位，获取h2标签下class属性的值
time.sleep(5)
print(value)  #输出h2标签下class属性的值

三、Selenium WebDriver 属性
driver.name 浏览器名称

driver.current_url 当前url

driver.title 当前页面标题

driver.page_source 当前页面源码

driver.current_window_handle 窗口句柄

driver.window_handles 当前窗口所有句柄（浏览器当前页签的标识码）

driver.back() 浏览器后退

driver.forward() 浏览器前进

driver.refresh() 浏览器刷新

driver.close() 关闭当前窗口

driver.quit() 退出浏览器

driver.switch_to.frame() 切换到frame

driver.switch_to.alert 切换到alert

driver.switch_to.active_element 切换到活动元素

四、Selenium WebElement 属性和方法

使用 WebDriver的 find 方法定位到元素后，会返回一个 WebElement 对象

WebElement常用属性如下：

id 标示

size 宽高

rect 宽高和坐标

tag_name 标签名称

text 文本内容

WebElement常用方法如下：

send_keys() 输入内容

clear() 清空内容

click() 单击

get_attribute() 获得属性值

is_selected() 是否被选中

is_enabled() 是否可用

is_displayed() 是否显示

value_of_css_property() css属性值

posted @ 2024-01-01 22:25 vorn 阅读(33) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· Selenium（3）

· Selenium（1）

· Selenium八种元素定位方法

· selenium元素定位方式及常用操作

· Selenium - 元素定位(1) - 八种元素定位

阅读排行：
· TypeScript + Deepseek 打造卜卦网站：技术与玄学的结合
· Manus的开源复刻OpenManus初探
· 三行代码完成国际化适配，妙~啊~
· .NET Core 中如何实现缓存的预热？
· 如何调用 DeepSeek 的自然语言处理 API 接口并集成到在线客服系统

vorn