Python爬虫——selenium语法

1.获取元素

通过a标签的文本筛选

driver.find_element(By.LINK_TEXT, 'xx').click()

通过css筛选

driver.find_element(By.CSS_SELECTOR, "input[type='email']").send_keys("xxx")
driver.find_element(By.CSS_SELECTOR, "button[type='button']").click()
driver.find_element(By.CSS_SELECTOR, "a.btn_download").click()

通过element name或者css name筛选

driver.find_element(By.NAME, "xx").send_keys("xxx")
driver.find_element(By.CLASS_NAME, "xx").click()

通过xpath筛选,只会contains,starts-with等语法

driver.find_element(By.XPATH, '//a[starts-with(@title, "xx")]').click()
driver.find_element(By.XPATH, '//iframe[contains(@title, "xx")]').click()

2.时间筛选器选择时间

可以使用driver.execute_script(js)将元素的readonly属性去掉,再click后clear掉日期,最后send_keys输入新的日期时间

element = driver.find_elements(By.CSS_SELECTOR, "input[class='xxx']")
js = 'document.getElementsByClassName("xxx")[0].removeAttribute("readonly");'
driver.execute_script(js)
element[0].click()
element[0].clear()
element[0].send_keys("2022-01-01")

参考:selenium控制日历控件,readonly为true

3.指定chrome下载路径

在使用selenium下载文件的时候,如果不指定chrome的下载路径,会弹出下载框让你确定文件名和下载路径

需要在selenium的driver中使用prefs属性添加如下属性

prefs = {'profile.default_content_settings.popups': 0,  # 设置为 0 禁止弹出窗口
         'download.default_directory': '/Users/seluser/Downloads'}          # 指定下载路径

代码如下

from selenium import webdriver

profile = webdriver.ChromeOptions()

prefs = {'profile.default_content_settings.popups': 0,  # 设置为 0 禁止弹出窗口
         'download.default_directory': 'd:\\'}          # 指定下载路径

profile.add_experimental_option('prefs', prefs)

# executable_path这个是chromedriver的路径 如果设置过环境变量,此参数可以省略
chromedriver_path = "D:\\path\\chromedriver.exe"   # 自己本地电脑路径
driver = webdriver.Chrome(executable_path=chromedriver_path, chrome_options=profile)

参考:selenium+python自动化80-文件下载(不弹询问框)

如果使用的是undetected_chromedriver,则不支持chrome_options参数,需要使用如下方式进行添加prefs

import json
import os
import tempfile
from functools import reduce

import undetected_chromedriver as webdriver

class ChromeWithPrefs(webdriver.Chrome):
    def __init__(self, *args, options=None, **kwargs):
        if options:
            self._handle_prefs(options)

        super().__init__(*args, options=options, **kwargs)

        # remove the user_data_dir when quitting
        self.keep_user_data_dir = False

    @staticmethod
    def _handle_prefs(options):
        if prefs := options.experimental_options.get("prefs"):
            # turn a (dotted key, value) into a proper nested dict
            def undot_key(key, value):
                if "." in key:
                    key, rest = key.split(".", 1)
                    value = undot_key(rest, value)
                return {key: value}

            # undot prefs dict keys
            undot_prefs = reduce(
                lambda d1, d2: {**d1, **d2},  # merge dicts
                (undot_key(key, value) for key, value in prefs.items()),
            )

            # create an user_data_dir and add its path to the options
            user_data_dir = os.path.normpath(tempfile.mkdtemp())
            options.add_argument(f"--user-data-dir={user_data_dir}")

            # create the preferences json file in its default directory
            default_dir = os.path.join(user_data_dir, "Default")
            os.mkdir(default_dir)

            prefs_file = os.path.join(default_dir, "Preferences")
            with open(prefs_file, encoding="latin1", mode="w") as f:
                json.dump(undot_prefs, f)

            # pylint: disable=protected-access
            # remove the experimental_options to avoid an error
            del options._experimental_options["prefs"]


if __name__ == "__main__":
    prefs = {
        "profile.default_content_setting_values.images": 2,
        # "download.default_directory": "d:/temp",
        # "plugins.always_open_pdf_externally": True,
    }
    options = webdriver.ChromeOptions()
    options.add_experimental_option("prefs", prefs)

    # use the derived Chrome class that handles prefs
    driver = ChromeWithPrefs(options=options)

参考:https://github.com/ultrafunkamsterdam/undetected-chromedriver/issues/524

posted @ 2015-08-10 10:02  tonglin0325  阅读(340)  评论(0编辑  收藏  举报