Python爬虫——selenium语法
1.获取元素
通过a标签的文本筛选
driver.find_element(By.LINK_TEXT, 'xx').click()
通过css筛选
driver.find_element(By.CSS_SELECTOR, "input[type='email']").send_keys("xxx") driver.find_element(By.CSS_SELECTOR, "button[type='button']").click() driver.find_element(By.CSS_SELECTOR, "a.btn_download").click()
通过element name或者css name筛选
driver.find_element(By.NAME, "xx").send_keys("xxx") driver.find_element(By.CLASS_NAME, "xx").click()
通过xpath筛选,只会contains,starts-with等语法
driver.find_element(By.XPATH, '//a[starts-with(@title, "xx")]').click() driver.find_element(By.XPATH, '//iframe[contains(@title, "xx")]').click()
2.时间筛选器选择时间
可以使用driver.execute_script(js)将元素的readonly属性去掉,再click后clear掉日期,最后send_keys输入新的日期时间
element = driver.find_elements(By.CSS_SELECTOR, "input[class='xxx']") js = 'document.getElementsByClassName("xxx")[0].removeAttribute("readonly");' driver.execute_script(js) element[0].click() element[0].clear() element[0].send_keys("2022-01-01")
参考:selenium控制日历控件,readonly为true
3.指定chrome下载路径
在使用selenium下载文件的时候,如果不指定chrome的下载路径,会弹出下载框让你确定文件名和下载路径
需要在selenium的driver中使用prefs属性添加如下属性
prefs = {'profile.default_content_settings.popups': 0, # 设置为 0 禁止弹出窗口 'download.default_directory': '/Users/seluser/Downloads'} # 指定下载路径
代码如下
from selenium import webdriver profile = webdriver.ChromeOptions() prefs = {'profile.default_content_settings.popups': 0, # 设置为 0 禁止弹出窗口 'download.default_directory': 'd:\\'} # 指定下载路径 profile.add_experimental_option('prefs', prefs) # executable_path这个是chromedriver的路径 如果设置过环境变量,此参数可以省略 chromedriver_path = "D:\\path\\chromedriver.exe" # 自己本地电脑路径 driver = webdriver.Chrome(executable_path=chromedriver_path, chrome_options=profile)
参考:selenium+python自动化80-文件下载(不弹询问框)
如果使用的是undetected_chromedriver,则不支持chrome_options参数,需要使用如下方式进行添加prefs
import json import os import tempfile from functools import reduce import undetected_chromedriver as webdriver class ChromeWithPrefs(webdriver.Chrome): def __init__(self, *args, options=None, **kwargs): if options: self._handle_prefs(options) super().__init__(*args, options=options, **kwargs) # remove the user_data_dir when quitting self.keep_user_data_dir = False @staticmethod def _handle_prefs(options): if prefs := options.experimental_options.get("prefs"): # turn a (dotted key, value) into a proper nested dict def undot_key(key, value): if "." in key: key, rest = key.split(".", 1) value = undot_key(rest, value) return {key: value} # undot prefs dict keys undot_prefs = reduce( lambda d1, d2: {**d1, **d2}, # merge dicts (undot_key(key, value) for key, value in prefs.items()), ) # create an user_data_dir and add its path to the options user_data_dir = os.path.normpath(tempfile.mkdtemp()) options.add_argument(f"--user-data-dir={user_data_dir}") # create the preferences json file in its default directory default_dir = os.path.join(user_data_dir, "Default") os.mkdir(default_dir) prefs_file = os.path.join(default_dir, "Preferences") with open(prefs_file, encoding="latin1", mode="w") as f: json.dump(undot_prefs, f) # pylint: disable=protected-access # remove the experimental_options to avoid an error del options._experimental_options["prefs"] if __name__ == "__main__": prefs = { "profile.default_content_setting_values.images": 2, # "download.default_directory": "d:/temp", # "plugins.always_open_pdf_externally": True, } options = webdriver.ChromeOptions() options.add_experimental_option("prefs", prefs) # use the derived Chrome class that handles prefs driver = ChromeWithPrefs(options=options)
参考:https://github.com/ultrafunkamsterdam/undetected-chromedriver/issues/524
本文只发表于博客园和tonglin0325的博客,作者:tonglin0325,转载请注明原文链接:https://www.cnblogs.com/tonglin0325/p/4717180.html