selenium 之Options和ChromeOptions
from selenium.webdriver.chrome.options import Options from selenium import webdriver chrome_options = Options() # 模拟器设置 chrome_options.add_argument('--headless') # 浏览器不提供可视化页面 chrome_options.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug # 设置开发者模式启动,该模式下webdriver属性为正常值 一般反爬比较好的网址都会根据这个反爬 options.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(chrome_options=chrome_options) # 将配置文件加载进web
driverdriver.get('xxxxxx')
# 设置默认编码为 utf-8,也就是中文 from selenium import webdriver options = webdriver.ChromeOptions() options.add_argument('lang=zh_CN.UTF-8') driver = webdriver.Chrome(chrome_options = options)
两种方法都可以。指向的源代码都一样。
chromeOptions
-
chromeOptions 是一个配置 chrome 启动是属性的类。通过这个类,我们可以为chrome配置如下参数(这个部分可以通过selenium源码看到):
-
设置 chrome 二进制文件位置 (binary_location)
-
添加启动参数 (add_argument)
-
添加扩展应用 (add_extension, add_encoded_extension)
-
添加实验性质的设置参数 (add_experimental_option)
-
设置调试器地址 (debugger_address)
-
源代码:
# .\Lib\site-packages\selenium\webdriver\chrome\options.py class Options(object): def __init__(self): # 设置 chrome 二进制文件位置 self._binary_location = '' # 添加启动参数 self._arguments = [] # 添加扩展应用 self._extension_files = [] self._extensions = [] # 添加实验性质的设置参数 self._experimental_options = {} # 设置调试器地址 self._debugger_address = None
example:
chromeOptions
chromeOptions 是一个配置 chrome 启动是属性的类。通过这个类,我们可以为chrome配置如下参数(这个部分可以通过selenium源码看到):
设置 chrome 二进制文件位置 (binary_location)
添加启动参数 (add_argument)
添加扩展应用 (add_extension, add_encoded_extension)
添加实验性质的设置参数 (add_experimental_option)
设置调试器地址 (debugger_address)
# 禁止图片的加载 from selenium import webdriver chrome_options = webdriver.ChromeOptions() prefs = {"profile.managed_default_content_settings.images": 2} chrome_options.add_experimental_option("prefs", prefs) # 启动浏览器,并设置好wait browser = webdriver.Chrome(chrome_options=chrome_options) browser.set_window_size(configure.windowHeight, configure.windowWidth) # 根据桌面分辨率来定,主要是为了抓到验证码的截屏 wait = WebDriverWait(browser, timeout = configure.timeoutMain)
# 添加xpath helper应用 from selenium import webdriver chrome_options = webdriver.ChromeOptions() # 设置好应用扩展 extension_path = 'D:/extension/XPath-Helper_v2.0.2.crx' chrome_options.add_extension(extension_path) # 启动浏览器,并设置好wait browser = webdriver.Chrome(chrome_options=chrome_options)
from time import sleep from selenium import webdriver from selenium.webdriver.common.by import By options = webdriver.ChromeOptions() prefs = {} # 设置这两个参数就可以避免密码提示框的弹出 prefs[“credentials_enable_service”] = False prefs[“profile.password_manager_enabled”] = False options.add_experimental_option(“prefs”, prefs) browser = webdriver.Chrome(chrome_options=options) browser.get('https://www.baidu.com/')
from selenium import webdriver from selenium.webdriver.chrome.options import Options options = webdriver.ChromeOptions() options.add_experimental_option('excludeSwitches', ['enable-automation']) # 规避检测 prefs = {"profile.managed_default_content_settings.images": 2} # 不显示图片提高代码速度 options.add_experimental_option("prefs", prefs) options.add_argument("--window-size=1920,1080") # 屏幕大小 browser = webdriver.Chrome(options=options)
Chrome_Options
# selenium启动配置参数接收是ChromeOptions类 from selenium import webdriver option = webdriver.ChromeOptions() # 添加UA options.add_argument('user-agent="MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"') # 指定浏览器分辨率 options.add_argument('window-size=1920x3000') # 谷歌文档提到需要加上这个属性来规避bug chrome_options.add_argument('--disable-gpu') # 隐藏滚动条, 应对一些特殊页面 options.add_argument('--hide-scrollbars') # 不加载图片, 提升速度 options.add_argument('blink-settings=imagesEnabled=false') # 浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败 options.add_argument('--headless') # 以最高权限运行 options.add_argument('--no-sandbox') # 手动指定使用的浏览器位置 options.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" #添加crx插件 option.add_extension('d:\crx\AdBlock_v2.17.crx') # 禁用JavaScript option.add_argument("--disable-javascript") # 设置开发者模式启动,该模式下webdriver属性为正常值 options.add_experimental_option('excludeSwitches', ['enable-automation']) # 禁用浏览器弹窗 prefs = { 'profile.default_content_setting_values' : { 'notifications' : 2 } } options.add_experimental_option('prefs',prefs) driver=webdriver.Chrome(chrome_options=chrome_options)
其他配置项目参数
–user-data-dir=”[PATH]” # 指定用户文件夹User Data路径,可以把书签这样的用户数据保存在系统分区以外的分区 –disk-cache-dir=”[PATH]“ # 指定缓存Cache路径 –disk-cache-size= # 指定Cache大小,单位Byte –first run # 重置到初始状态,第一次运行 –incognito # 隐身模式启动 –disable-javascript # 禁用Javascript --omnibox-popup-count="num" # 将地址栏弹出的提示菜单数量改为num个 --user-agent="xxxxxxxx" # 修改HTTP请求头部的Agent字符串,可以通过about:version页面查看修改效果 --disable-plugins # 禁止加载所有插件,可以增加速度。可以通过about:plugins页面查看效果 --disable-javascript # 禁用JavaScript,如果觉得速度慢在加上这个 --disable-java # 禁用java --start-maximized # 启动就最大化 --no-sandbox # 取消沙盒模式 --single-process # 单进程运行 --process-per-tab # 每个标签使用单独进程 --process-per-site # 每个站点使用单独进程 --in-process-plugins # 插件不启用单独进程 --disable-popup-blocking # 禁用弹出拦截 --disable-plugins # 禁用插件 --disable-images # 禁用图像 --incognito # 启动进入隐身模式 --enable-udd-profiles # 启用账户切换菜单 --proxy-pac-url # 使用pac代理 [via 1/2] --lang=zh-CN # 设置语言为简体中文 --disk-cache-dir # 自定义缓存目录 --disk-cache-size # 自定义缓存最大值(单位byte) --media-cache-size # 自定义多媒体缓存最大值(单位byte) --bookmark-menu # 在工具 栏增加一个书签按钮 --enable-sync # 启用书签同步
全部:(下面的没有进行翻译,仔细看下应该都看得懂. ~ ~) –disable-hang-monitor Suppresses hang monitor dialogs in renderer processes. –disable-metrics Completely disables UMA metrics system. –disable-metrics-reporting Disables only the sending of metrics reports. In contrast to kDisableMetrics, this executes all the code that a normal client would use for reporting, except the report is dropped rather than sent to the server. This is useful for finding issues in the metrics code during UI and performance tests. –assert-test Causes the browser process to throw an assertion on startup. –renderer-assert-test Causes the renderer process to throw an assertion on launch. –crash-test Performs a crash test when the browser is starte. –renderer-crash-test Causes the renderer process to crash on launch. –renderer-startup-dialog Use this argument when you want to see the child processes as soon as Chrome start. –plugin-startup-dialog Causes the plugin process to display a dialog on launch. –testshell-startup-dialog Causes the test shell process to display a dialog on launch. –plugin-launcher Specifies a command that should be used to launch the plugin process. Useful for running the plugin process through purify or quantify. Ex: –plugin-launcher=”path\to\purify /Run=yes. –plugin-launche. –channel The value of this switch tells the child process which IPC channel the browser expects to use to communicate with it. –testing-channel The value of this switch tells the app to listen for and broadcast testing-related messages on IPC channel with the given ID. –homepage The value of this switch specifies which page will be displayed in newly-opened tabs. We need this for testing purposes so that the UI tests don’t depend on what comes up for http://google.com. –start-renderers-manually When this switch is present, the browser will throw up a dialog box asking the user to start a renderer process independently rather than launching the renderer itself. (This is useful for debugging.. –renderer Causes the process to run as renderer instead of as browser. –renderer-path Path to the executable to run for the renderer subproces. –plugin Causes the process to run as plugin hos. –single-process Runs the renderer and plugins in the same process as the browse. –process-per-tab Runs each set of script-connected tabs (i.e., a BrowsingInstance) in its own renderer process. We default to using a renderer process for each site instance (i.e., group of pages from the same registered domain with script connections to each other). –process-per-site Runs a single process for each site (i.e., group of pages from the same registered domain) the user visits. We default to using a renderer process for each site instance (i.e., group of pages from the same registered domain with script connections to each other). –in-process-plugins Runs plugins inside the renderer proces. –no-sandbox Runs the renderer outside the sandbox. –safe-plugins Runs the plugin processes inside the sandbox. –trusted-plugins Excludes these plugins from the plugin sandbox. This is a comma separated list of plugin dlls name and activex clsid. –test-sandbox Runs the security test for the sandbox. –user-data-dir Specifies the user data directory, which is where the browser will look for all of its state. –app Specifies that the associated value should be launched in “application” mode. –upload-file Specifies the file that should be uploaded to the provided application. This switch is expected to be used with –app option. –dom-automation Specifies if the dom_automation_controller_ needs to be bound in the renderer. This binding happens on per-frame basis and hence can potentially be a performance bottleneck. One should only enable it when automating dom based tests. –plugin-path Tells the plugin process the path of the plugin to loa. –js-flags Specifies the flags passed to JS engin. –geoid The GeoID we should use. This is normally obtained from the operating system during first run and cached in the preferences afterwards. This is a numeric value; see http://msdn.microsoft.com/en-us/library/ms776390.aspx . –lang The language file that we want to try to open. Of the form language[-country] where language is the 2 letter code from ISO-639. –debug-children Will add kDebugOnStart to every child processes. If a value is passed, it will be used as a filter to determine if the child process should have the kDebugOnStart flag passed on or not. –debug-on-start Causes the process to start the JIT debugger on itself (mainly used by –debug-children. –wait-for-debugger-children Will add kWaitForDebugger to every child processes. If a value is passed, it will be used as a filter to determine if the child process should have the kWaitForDebugger flag passed on or not. –wait-for-debugger Waits for a debugger for 60 second. –log-filter-prefix Will filter log messages to show only the messages that are prefixed with the specified valu. –enable-logging Force logging to be enabled. Logging is disabled by default in release builds. –dump-histograms-on-exit Dump any accumualted histograms to the log when browser terminates (requires logging to be enabled to really do anything). Used by developers and test scripts. –disable-logging Force logging to be disabled. Logging is enabled by default in debug builds. –log-level Sets the minimum log level. Valid values are from 0 to 3: INFO = 0, WARNING = 1, LOG_ERROR = 2, LOG_FATAL = 3. –remote-shell-port Enable remote debug / automation shell on the specified por. –uninstall Runs un-installation steps that were done by chrome first-run. –omnibox-popup-count Number of entries to show in the omnibox popup. –uninstallomnibox-popup-count Removes the previous set suggestion coun. –automation-channel The value of this switch tells the app to listen for and broadcast automation-related messages on IPC channel with the given ID. –restore-last-session Indicates the last session should be restored on startup. This overrides the preferences value and is primarily intended for testing. –record-mode –playback-mode Chrome supports a playback and record mode. Record mode saves *everything* to the cache. Playback mode reads data exclusively from the cache. This allows us to record a session into the cache and then replay it at will. –no-events Don’t record/playback events when using record & playback. –hide-icons –show-icons Make Windows happy by allowing it to show “Enable access to this program” checkbox in Add/Remove Programs->Set Program Access and Defaults. This only shows an error box because the only way to hide Chrome is by uninstalling it. –make-default-browser Make Chrome default browse. –proxy-server Use a specified proxy server, overrides system settings. This switch only affects HTTP and HTTPS requests. –dns-log-details –dns-prefetch-disable Chrome will support prefetching of DNS information. Until this becomes the default, we’ll provide a command line switch. –debug-print Enables support to debug printing subsystem. –allow-all-activex Allow initialization of all activex controls. This is only to help website developers test their controls to see if they are compatible in Chrome. Note there’s a duplicate value in activex_shared.cc (to avoid dependency on chrome module). Please change both locations at the same time. –disable-dev-tools Browser flag to disable the web inspector for all renderers. –always-enable-dev-tools Enable web inspector for all windows, even if they’re part of the browser. Allows us to use our dev tools to debug browser windows itself. –memory-model Configure Chrome’s memory model. Does chrome really need multiple memory models? No. But we get a lot of concerns from individuals about how the changes work on *their* system, and we need to be able to experiment with a few choices. –tab-count-to-load-on-session-restore Used to set the value of SessionRestore::num_tabs_to_load_. See session_restore.h for details. const wchar_t kTabCountToLoadOnSessionRestore[] . –memory-profile Enable dynamic loading of the Memory Profiler DLL, which will trace all memory allocations during the run. –enable-file-cookies By default, cookies are not allowed on file://. They are needed in for testing, for example page cycler and layout tests. See bug 1157243. –start-maximized Start the browser maximized, regardless of any previous settings. TODO(pjohnson): Remove this once bug 1137420 is fixed. We are using this as a workaround for not being able to use moveTo and resizeTo on a top-level window. –enable-watchdog Spawn threads to watch for excessive delays in specified message loops. User should set breakpoints on Alarm() to examine problematic thread. Usage: -enable-watchdog=[ui][io] Order of the listed sub-arguments does not matter. –first-run Display the First Run experience when the browser is started, regardless of whether or not it’s actually the first run. –message-loop-strategy –message-loop-histogrammer Enable histograming of tasks served by MessageLoop. See about:histograms/Loop for results, which show frequency of messages on each thread, including APC count, object signalling count, etc. –import Perform importing from another browser. The value associated with this setting encodes the target browser and what items to import. –silent-dump-on-dcheck Change the DCHECKS to dump memory and continue instead of crashing. This is valid only in Release mode when –enable-dcheck is specified. –disable-prompt-on-repost Normally when the user attempts to navigate to a page that was the result of a post we prompt to make sure they want to. This switch may be used to disable that check. This switch is used during automated testing. –disable-popup-blocking Disable pop-up blocking. –disable-javascript Don’t execute JavaScript (browser JS like the new tab page still runs). –disable-java Prevent Java from running. –disable-plugins Prevent plugins from running. –disable-images Prevent images from loading. –use-lf-heap Use the low fragmentation heap for the CRT. –gears-plugin-path Debug only switch to specify which gears plugin dll to load. –gears-in-renderer Switch to load Gears in the renderer process. –enable-p13n –javascript-debugger-path Allow loading of the javascript debugger UI from the filesystem. –new-http Enable new HTTP stack.