| from selenium import webdriver |
| driver = webdriver.chrome() |
| driver.get('https://www.baidu.com') |
| from selenium import webdriver |
| `` |
| |
| ``` |
| from .firefox.webdriver import WebDriver as Firefox |
| from .firefox.firefox_profile import FirefoxProfile |
| from .firefox.options import Options as FirefoxOptions |
| |
| from .chrome.webdriver import WebDriver as Chrome |
| from .chrome.options import Options as ChromeOptions |
| from .ie.webdriver import WebDriver as Ie |
| from .ie.options import Options as IeOptions |
| from .edge.webdriver import WebDriver as Edge |
| from .opera.webdriver import WebDriver as Opera |
| from .safari.webdriver import WebDriver as Safari |
| from .blackberry.webdriver import WebDriver as BlackBerry |
| from .phantomjs.webdriver import WebDriver as PhantomJS |
| from .android.webdriver import WebDriver as Android |
| from .webkitgtk.webdriver import WebDriver as WebKitGTK |
| from .webkitgtk.options import Options as WebKitGTKOptions |
| from .remote.webdriver import WebDriver as Remote |
| from .common.desired_capabilities import DesiredCapabilities |
| from .common.action_chains import ActionChains |
| from .common.touch_actions import TouchActions |
| from .common.proxy import Proxy |
| ``` |
| |
| |
| ``` |
| |
| import warnings |
| from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver |
| from .remote_connection import ChromeRemoteConnection |
| from .service import Service |
| from .options import Options |
| class WebDriver(RemoteWebDriver): |
| |
| def __init__(self, executable_path="chromedriver", port=0, |
| options=None, service_args=None, |
| desired_capabilities=None, service_log_path=None, |
| chrome_options=None): |
| """ |
| 参数: |
| - executable_path - chromedriver的执行路径 默认在环境变里中查找 |
| - port -http连接的端口号 |
| - desired_capabilities: 一般浏览器的字典对象 |
| - options: ChromeOptions的实例 |
| """ |
| |
| |
| |
| self.service = Service( |
| executable_path, |
| port=port, |
| service_args=service_args, |
| log_path=service_log_path) |
| |
| self.service.start() |
| |
| ``` |
| |
| |
| |
| |
| |
| ``` |
| from selenium.webdriver.common import service |
| |
| class Service(service.Service): |
| """ |
| 实例化Service对象 管理ChromeDriver的启动和停止 |
| """ |
| def __init__(self, executable_path, port=0, service_args=None, |
| log_path=None, env=None): |
| """ |
| 参数: |
| - service_args : chromedriver 的参数 列表形式 |
| - log_path : chromedriver的日志路径 |
| """ |
| |
| self.service_args = service_args or [] |
| if log_path: |
| self.service_args.append('--log-path=%s' % log_path) |
| |
| service.Service.__init__(self, executable_path, port=port, env=env, |
| start_error_message="Please see https://sites.google.com/a/chromium.org/chromedriver/home") |
| |
| |
| def command_line_args(self): |
| return ["--port=%d" % self.port] + self.service_args |
| ``` |
| |
| ``` |
| |
| import errno |
| import os |
| import platform |
| import subprocess |
| from subprocess import PIPE |
| import time |
| from selenium.common.exceptions import WebDriverException |
| from selenium.webdriver.common import utils |
| |
| try: |
| from subprocess import DEVNULL |
| _HAS_NATIVE_DEVNULL = True |
| except ImportError: |
| DEVNULL = -3 |
| _HAS_NATIVE_DEVNULL = False |
| |
| |
| class Service(object): |
| |
| def __init__(self, executable, port=0, log_file=DEVNULL, env=None, start_error_message=""): |
| self.path = executable |
| |
| self.port = port |
| |
| if self.port == 0: |
| self.port = utils.free_port() |
| |
| if not _HAS_NATIVE_DEVNULL and log_file == DEVNULL: |
| log_file = open(os.devnull, 'wb') |
| |
| self.start_error_message = start_error_message |
| self.log_file = log_file |
| |
| self.env = env or os.environ |
| |
| @property |
| def service_url(self): |
| """ |
| Gets the url of the Service |
| """ |
| return "http://%s" % utils.join_host_port('localhost', self.port) |
| |
| def command_line_args(self): |
| raise NotImplemented("This method needs to be implemented in a sub class") |
| |
| def start(self): |
| """ |
| Starts the Service. |
| |
| :Exceptions: |
| - WebDriverException : Raised either when it can't start the service |
| or when it can't connect to the service |
| """ |
| try: |
| |
| cmd = [self.path] |
| cmd.extend(self.command_line_args()) |
| self.process = subprocess.Popen(cmd, env=self.env, |
| close_fds=platform.system() != 'Windows', |
| stdout=self.log_file, |
| stderr=self.log_file, |
| stdin=PIPE) |
| except TypeError: |
| raise |
| except OSError as err: |
| if err.errno == errno.ENOENT: |
| raise WebDriverException( |
| "'%s' executable needs to be in PATH. %s" % ( |
| os.path.basename(self.path), self.start_error_message) |
| ) |
| elif err.errno == errno.EACCES: |
| raise WebDriverException( |
| "'%s' executable may have wrong permissions. %s" % ( |
| os.path.basename(self.path), self.start_error_message) |
| ) |
| else: |
| raise |
| except Exception as e: |
| raise WebDriverException( |
| "The executable %s needs to be available in the path. %s\n%s" % |
| (os.path.basename(self.path), self.start_error_message, str(e))) |
| count = 0 |
| |
| |
| while True: |
| self.assert_process_still_running() |
| if self.is_connectable(): |
| break |
| count += 1 |
| time.sleep(1) |
| if count == 30: |
| raise WebDriverException("Can not connect to the Service %s" % self.path) |
| |
| def assert_process_still_running(self): |
| return_code = self.process.poll() |
| if return_code is not None: |
| raise WebDriverException( |
| 'Service %s unexpectedly exited. Status code was: %s' |
| % (self.path, return_code) |
| ) |
| |
| def is_connectable(self): |
| return utils.is_connectable(self.port) |
| ``` |
| |
| |
| |
| |
| ``` |
| |
| import warnings |
| from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver |
| from .remote_connection import ChromeRemoteConnection |
| from .service import Service |
| from .options import Options |
| class WebDriver(RemoteWebDriver): |
| """ |
| Controls the ChromeDriver and allows you to drive the browser. |
| |
| You will need to download the ChromeDriver executable from |
| http://chromedriver.storage.googleapis.com/index.html |
| """ |
| def __init__(self, executable_path="chromedriver", port=0, |
| options=None, service_args=None, |
| desired_capabilities=None, service_log_path=None, |
| chrome_options=None): |
| |
| |
| if options is None: |
| |
| if desired_capabilities is None: |
| |
| desired_capabilities = self.create_options().to_capabilities() |
| else: |
| if desired_capabilities is None: |
| desired_capabilities = options.to_capabilities() |
| else: |
| desired_capabilities.update(options.to_capabilities()) |
| |
| |
| |
| |
| try: |
| RemoteWebDriver.__init__( |
| self, |
| command_executor=ChromeRemoteConnection( |
| remote_server_addr=self.service.service_url), |
| desired_capabilities=desired_capabilities) |
| except Exception: |
| self.quit() |
| raise |
| self._is_remote = False |
| |
| def create_options(self): |
| return Options() |
| ``` |
| |
| |
| |
| ``` |
| chrome浏览器返回的caps字典对象为: |
| { |
| 'browserName': 'chrome', |
| 'version': '', |
| 'platform': 'ANY', |
| 'goog:chromeOptions': {'extensions': [], 'args': []} |
| } |
| ``` |
| |
| |
| ``` |
| RemoteWebDriver.__init__( |
| self, |
| command_executor=ChromeRemoteConnection( |
| remote_server_addr=self.service.service_url), |
| desired_capabilities=desired_capabilities) |
| ``` |
| |
| |
| ``` |
| |
| from selenium.webdriver.remote.remote_connection import RemoteConnection |
| |
| |
| class ChromeRemoteConnection(RemoteConnection): |
| |
| def __init__(self, remote_server_addr, keep_alive=True): |
| RemoteConnection.__init__(self, remote_server_addr, keep_alive) |
| self._commands["launchApp"] = ('POST', '/session/$sessionId/chromium/launch_app') |
| self._commands["setNetworkConditions"] = ('POST', '/session/$sessionId/chromium/network_conditions') |
| self._commands["getNetworkConditions"] = ('GET', '/session/$sessionId/chromium/network_conditions') |
| ``` |
| |
| ``` |
| class RemoteConnection(object): |
| """A connection with the Remote WebDriver server. |
| |
| Communicates with the server using the WebDriver wire protocol: |
| https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol""" |
| |
| |
| def __init__(self, remote_server_addr, keep_alive=False, resolve_ip=True): |
| |
| self.keep_alive = keep_alive |
| parsed_url = parse.urlparse(remote_server_addr) |
| addr = parsed_url.hostname |
| if parsed_url.hostname and resolve_ip: |
| port = parsed_url.port or None |
| if parsed_url.scheme == "https": |
| ip = parsed_url.hostname |
| elif port and not common_utils.is_connectable(port, parsed_url.hostname): |
| ip = None |
| LOGGER.info('Could not connect to port {} on host ' |
| '{}'.format(port, parsed_url.hostname)) |
| else: |
| ip = common_utils.find_connectable_ip(parsed_url.hostname, |
| port=port) |
| if ip: |
| netloc = ip |
| addr = netloc |
| if parsed_url.port: |
| netloc = common_utils.join_host_port(netloc, |
| parsed_url.port) |
| if parsed_url.username: |
| auth = parsed_url.username |
| if parsed_url.password: |
| auth += ':%s' % parsed_url.password |
| netloc = '%s@%s' % (auth, netloc) |
| remote_server_addr = parse.urlunparse( |
| (parsed_url.scheme, netloc, parsed_url.path, |
| parsed_url.params, parsed_url.query, parsed_url.fragment)) |
| else: |
| LOGGER.info('Could not get IP address for host: %s' % |
| parsed_url.hostname) |
| |
| self._url = remote_server_addr |
| if keep_alive: |
| self._conn = httplib.HTTPConnection( |
| str(addr), str(parsed_url.port), timeout=self._timeout) |
| |
| self._commands = { |
| Command.STATUS: ('GET', '/status'), |
| Command.NEW_SESSION: ('POST', '/session'), |
| Command.GET_ALL_SESSIONS: ('GET', '/sessions'), |
| Command.QUIT: ('DELETE', '/session/$sessionId'), |
| Command.GET_CURRENT_WINDOW_HANDLE: |
| ('GET', '/session/$sessionId/window_handle'), |
| Command.W3C_GET_CURRENT_WINDOW_HANDLE: |
| ('GET', '/session/$sessionId/window'), |
| Command.GET_WINDOW_HANDLES: |
| ('GET', '/session/$sessionId/window_handles'), |
| |
| |
| } |
| |
| |
| def execute(self, command, params): |
| command_info = self._commands[command] |
| assert command_info is not None, 'Unrecognised command %s' % command |
| path = string.Template(command_info[1]).substitute(params) |
| if hasattr(self, 'w3c') and self.w3c and isinstance(params, dict) and 'sessionId' in params: |
| del params['sessionId'] |
| data = utils.dump_json(params) |
| url = '%s%s' % (self._url, path) |
| return self._request(command_info[0], url, body=data) |
| |
| |
| |
| def _request(self, method, url, body=None): |
| """ |
| Send an HTTP request to the remote server. |
| |
| :Args: |
| - method - A string for the HTTP method to send the request with. |
| - url - A string for the URL to send the request to. |
| - body - A string for request body. Ignored unless method is POST or PUT. |
| |
| :Returns: |
| A dictionary with the server's parsed JSON response. |
| """ |
| LOGGER.debug('%s %s %s' % (method, url, body)) |
| |
| parsed_url = parse.urlparse(url) |
| headers = self.get_remote_connection_headers(parsed_url, self.keep_alive) |
| resp = None |
| if body and method != 'POST' and method != 'PUT': |
| body = None |
| |
| if self.keep_alive: |
| resp = self._conn.request(method, url, body=body, headers=headers) |
| |
| statuscode = resp.status |
| else: |
| http = urllib3.PoolManager(timeout=self._timeout) |
| resp = http.request(method, url, body=body, headers=headers) |
| |
| statuscode = resp.status |
| if not hasattr(resp, 'getheader'): |
| if hasattr(resp.headers, 'getheader'): |
| resp.getheader = lambda x: resp.headers.getheader(x) |
| elif hasattr(resp.headers, 'get'): |
| resp.getheader = lambda x: resp.headers.get(x) |
| |
| data = resp.data.decode('UTF-8') |
| try: |
| if 300 <= statuscode < 304: |
| return self._request('GET', resp.getheader('location')) |
| if 399 < statuscode <= 500: |
| return {'status': statuscode, 'value': data} |
| content_type = [] |
| if resp.getheader('Content-Type') is not None: |
| content_type = resp.getheader('Content-Type').split(';') |
| if not any([x.startswith('image/png') for x in content_type]): |
| |
| try: |
| data = utils.load_json(data.strip()) |
| except ValueError: |
| if 199 < statuscode < 300: |
| status = ErrorCode.SUCCESS |
| else: |
| status = ErrorCode.UNKNOWN_ERROR |
| return {'status': status, 'value': data.strip()} |
| |
| |
| |
| if 'value' not in data: |
| data['value'] = None |
| return data |
| else: |
| data = {'status': 0, 'value': data} |
| return data |
| finally: |
| LOGGER.debug("Finished Request") |
| resp.close() |
| ``` |
| 构造方法中主要是把localhost域名换成127.0.0.1,通过urllib.parse.urlparse把要处理的url解析6大部分。 |
| |
| urlparse返回的是一个名字元组对象scheme, netloc, path, params, query, fragment。netloc包括hostname和port。 |
| |
| 调用 common_utils.find_connectable_ip()方法获取hostname对应的ip地址,最后urllib.parse.urlunparse()重新组成url并赋值给self._url |
| |
| 初始化里self._commands 字典,value为具体执行的命令的字典。 |
| |
| RemoteConnection类的实例方法execute调用 _request方法最终实现发送命令到远程服务器。 |
| |
| 他们是通过wire protocol有线协议 这种协议是点对点方式进行通信的。首先前端将这个点击转换成json格式的字符串,然后通过wire protocl协议传递给服务器 |
| |
| RemoteWebDriver类的构造方法 更新capabilities字典 主要调用start_session传入capabilities字典 |
| |
| start_session方法 根据capabilities字典创建一个新的会话并获取session_id。 |
| |
| 另外还实例化了错误处理handle,文件查找file_detector(默认实例化是LocalFileDetector)。一个页面切换的SwitchTo对象。 |
| |
| |
| ``` |
| class WebDriver(object): |
| |
| _web_element_cls = WebElement |
| |
| def __init__(self, command_executor='http://127.0.0.1:4444/wd/hub', |
| desired_capabilities=None, browser_profile=None, proxy=None, |
| keep_alive=False, file_detector=None, options=None): |
| """ |
| 创建一个driver使用 wire协议发送命令 |
| 参数: |
| - command_executor - 远程服务器的url 'http://127.0.0.1:端口号' |
| - desired_capabilities - A dictionary of capabilities to request when |
| starting the browser session. 必选参数 |
| - proxy - 一个selenium.webdriver.common.proxy.Proxy 对象. 可选的 |
| - file_detector - 自定义文件检测器对象. 默认使用LocalFileDetector() |
| - options - options.Options类的实例 |
| """ |
| capabilities = {} |
| if options is not None: |
| capabilities = options.to_capabilities() |
| if desired_capabilities is not None: |
| if not isinstance(desired_capabilities, dict): |
| raise WebDriverException("Desired Capabilities must be a dictionary") |
| else: |
| |
| capabilities.update(desired_capabilities) |
| if proxy is not None: |
| warnings.warn("Please use FirefoxOptions to set proxy", |
| DeprecationWarning) |
| proxy.add_to_capabilities(capabilities) |
| self.command_executor = command_executor |
| if type(self.command_executor) is bytes or isinstance(self.command_executor, str): |
| self.command_executor = RemoteConnection(command_executor, keep_alive=keep_alive) |
| self._is_remote = True |
| |
| self.session_id = None |
| self.capabilities = {} |
| |
| self.error_handler = ErrorHandler() |
| self.start_client() |
| if browser_profile is not None: |
| warnings.warn("Please use FirefoxOptions to set browser profile", |
| DeprecationWarning) |
| |
| self.start_session(capabilities, browser_profile) |
| |
| self._switch_to = SwitchTo(self) |
| |
| self._mobile = Mobile(self) |
| |
| self.file_detector = file_detector or LocalFileDetector |
| |
| def start_session(self, capabilities, browser_profile=None): |
| """ |
| 根据capabilities字典创建一个新的会话 |
| browser_profile FirefoxProfile的一个对象 只有火狐浏览器 |
| """ |
| if not isinstance(capabilities, dict): |
| raise InvalidArgumentException("Capabilities must be a dictionary") |
| if browser_profile: |
| if "moz:firefoxOptions" in capabilities: |
| capabilities["moz:firefoxOptions"]["profile"] = browser_profile.encoded |
| else: |
| capabilities.update({'firefox_profile': browser_profile.encoded}) |
| """ |
| _make_w3c_caps return dict |
| { |
| "firstMatch": [{}], |
| "alwaysMatch": { |
| 'browserName': 'chrome', |
| 'version': '', |
| 'platformName': 'any', |
| 'goog:chromeOptions': {'extensions': [], 'args': []} |
| } |
| } |
| """ |
| w3c_caps = _make_w3c_caps(capabilities) |
| parameters = {"capabilities": w3c_caps, |
| "desiredCapabilities": capabilities} |
| |
| response = self.execute(Command.NEW_SESSION, parameters) |
| if 'sessionId' not in response: |
| response = response['value'] |
| |
| self.session_id = response['sessionId'] |
| |
| self.capabilities = response.get('value') |
| |
| |
| |
| if self.capabilities is None: |
| self.capabilities = response.get('capabilities') |
| |
| |
| self.w3c = response.get('status') is None |
| self.command_executor.w3c = self.w3c |
| |
| |
| def _make_w3c_caps(caps): |
| """Makes a W3C alwaysMatch capabilities object. |
| |
| Filters out capability names that are not in the W3C spec. Spec-compliant |
| drivers will reject requests containing unknown capability names. |
| |
| Moves the Firefox profile, if present, from the old location to the new Firefox |
| options object. |
| |
| :Args: |
| - caps - A dictionary of capabilities requested by the caller. |
| """ |
| |
| caps = copy.deepcopy(caps) |
| |
| profile = caps.get('firefox_profile') |
| always_match = {} |
| |
| if caps.get('proxy') and caps['proxy'].get('proxyType'): |
| caps['proxy']['proxyType'] = caps['proxy']['proxyType'].lower() |
| |
| for k, v in caps.items(): |
| |
| if v and k in _OSS_W3C_CONVERSION: |
| |
| always_match[_OSS_W3C_CONVERSION[k]] = v.lower() if k == 'platform' else v |
| if k in _W3C_CAPABILITY_NAMES or ':' in k: |
| always_match[k] = v |
| if profile: |
| moz_opts = always_match.get('moz:firefoxOptions', {}) |
| |
| if 'profile' not in moz_opts: |
| |
| new_opts = copy.deepcopy(moz_opts) |
| new_opts['profile'] = profile |
| always_match['moz:firefoxOptions'] = new_opts |
| return {"firstMatch": [{}], "alwaysMatch": always_match} |
| |
| |
| _OSS_W3C_CONVERSION = { |
| 'acceptSslCerts': 'acceptInsecureCerts', |
| 'version': 'browserVersion', |
| 'platform': 'platformName' |
| } |
| |
| |
| def execute(self, driver_command, params=None): |
| """ |
| 通过command.CommandExecutor执行driver_command命令 |
| 返回一个字典对象 里面装着JSON response |
| """ |
| if self.session_id is not None: |
| if not params: |
| params = {'sessionId': self.session_id} |
| elif 'sessionId' not in params: |
| params['sessionId'] = self.session_id |
| |
| |
| params = self._wrap_value(params) |
| |
| response = self.command_executor.execute(driver_command, params) |
| if response: |
| self.error_handler.check_response(response) |
| |
| response['value'] = self._unwrap_value( |
| response.get('value', None)) |
| return response |
| |
| |
| return {'success': 0, 'value': None, 'sessionId': self.session_id} |
| ``` |
| driver.get('https://www.baidu.com')调用的是webdriver/remote/webdriver.py下的get方法 |
| get方法调用了remote_connection.py中execute的方法,remote_connection.py中execute的方法中self.command_executor.execute实际调用的是RemoteConnection.py的execute方法。 |
| 实际上是一个HTTP request给监听端口上的Web Service, 在我们的HTTP request的body中,会以WebDriver Wire协议规定的JSON格式的字符串来告诉Selenium我们希望浏览器打开'https://www.baidu.com'页面 |
| |
| |
| |
| ``` |
| def get(self, url): |
| """ |
| Loads a web page in the current browser session. |
| """ |
| |
| self.execute(Command.GET, {'url': url}) |
| ``` |
| 总结一下: |
| 首先是webdriver实例化Service 类调用start()方法用subprocess启动chromedriver(带--port参数)驱动。chromedriver启动之后都会在绑定的端口启动Web Service。 |
| |
| |
| 接着实例化RemoteConnection获得 command_executor实例化对象 传入给RemoteWebDriver构造方法。 |
| |
| |
| RemoteWebDriver构造方法 start_session()方法启动session并获得唯一的session_id,通过这个session_id来确定找到对方且在多线程并行的时候彼此之间不会有冲突和干扰) |
| |
| |
| 接下来调用WebDriver的任何API,比如get() 都需要借助一个ComandExecutor(remote_connection类的实例对象)调用execute()发送一个命令(这个命令在ComandExecutor实例化时候生成的一个command字典)。 |
| |
| |
| ``` |
| self._commands = { |
| Command.STATUS: ('GET', '/status'), |
| Command.NEW_SESSION: ('POST', '/session'), |
| Command.GET_ALL_SESSIONS: ('GET', '/sessions'), |
| Command.QUIT: ('DELETE', '/session/$sessionId'), |
| Command.GET_CURRENT_WINDOW_HANDLE: |
| ('GET', '/session/$sessionId/window_handle'), |
| Command.W3C_GET_CURRENT_WINDOW_HANDLE: |
| ('GET', '/session/$sessionId/window'), |
| Command.GET_WINDOW_HANDLES: |
| ('GET', '/session/$sessionId/window_handles'), |
| |
| |
| } |
| ``` |
| ComandExecutor中的execute()方法最后返回一个_request()方法,实际上是一个HTTP request给监听端口上的Web Service。 |
| |
| 在HTTP request的body中,Wire JSON格式字典来告诉chromedriver接下来做什么事。(通过之前绑定的端口) |
| 实际的执行者是chromedriver驱动,而selenium就相当于一个代理。所以selenium并不是直接操控浏览器而是运行webdriver, 通过webdriver间接操控浏览器。 |
| |
| 在现实生活中这类似打出租车,我们告诉司机目的地是哪?走哪条路到达?webdriver就相当于出租车司机。 |
| |
| 转载于:https://www.cnblogs.com/jiang-cheng/p/9914803.html |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?