selenium 工作原理
selenium通信原理
- 对于每一条Selenium脚本,一个http请求会被创建并且发送给浏览器的驱动
- 浏览器驱动中包含了一个HTTP Server,用来接收这些http请求
- HTTP Server接收到请求后根据请求来具体操控对应的浏览器
- 浏览器执行具体的测试步骤
- 浏览器将步骤执行结果返回给HTTP Server
- HTTP Server又将结果返回给Selenium的脚本,如果是错误的http代码我们就会在控制台看到对应的报错信息。
那为什么同一个浏览器驱动即可以处理Java语言的脚本,也可以处理Python语言的脚本呢?
这就要提到WebDriver基于的协议:JSON Wire protocol。
1、JSON Wire protocol是在http协议基础上,对http请求及响应的body部分的数据的进一步规范
2、在WebDriver中为了给用户以更明确的反馈信息,提供了更细化的http响应状态码
3、body部分主要传送具体的数据,在WebDriver中这些数据都是以JSON的形式存在并进行传送的
原码分析 WebDriver的结构中就是典型的C/S结构,WebDriver API相当于是客户端,而小小的浏览器驱动才是服务器端。
1、执行webdriver.Chrome() -> 就会去启动一个子进程执行浏览器驱动监听端口号,这个驱动内部实现了一个soket 的server端
2、父类remoteWebDriver中实现了接口提供使用 -》 调用接口最终会执行excute方法去执行
3、excute方法解析好具体的操作 根据http协议去向driver内部的server发起请求
4、浏览器驱动就去执行对应的浏览器执行操作
源码分析
1、实例化WebDriver(),他继承了RemoteWebDriver
2、在WebDriver中先去初始化Service,Service在初始化时候会分配一个端口号
3、就是调用Service中的start方法启动一个子进程去启动浏览器驱动服务(chromedriver.exe)
4、初始化RemoteWebDriver,传入command_executor=ChromeRemoteConnection(remote_server_addr=self.service.service_url, keep_alive=keep_alive)
这个ChromeRemoteConnection主要是对service_url进行解析,然后使用urllib3.PoolManager()去创建http连接池,最后还提供了_commands字典,他的value
是浏览器驱动提供的接口(接口是满足restful风格的)
5、RemoteWebDriver初始化时回去启动一个浏览器窗口start_session,由于command_executor这个参数是ChromeRemoteConnection实例对象,所以可以
调用实例对象的excute方法,RemoteWebDriver类本身提供了我们常用的方法 比如 find_element_by_id...等等,这些方法最终都会去调用excute(),excute
最终会到ChromeRemoteConnection类中去调用_request方法发送http请求给浏览器驱动去请求对应操作的接口,_commands这个字典里面就可以查到某个操作
所对应的接口
1 # demo.py
2 from selenium import webdriver 3 driver = webdriver.Chrome() # 首先实例化WebDriver类
1 # 文件路径 \selenium\webdriver\chrome\webdriver.py 2 class WebDriver(RemoteWebDriver): 3 4 def __init__(self, executable_path="chromedriver", port=0, 5 options=None, service_args=None, 6 desired_capabilities=None, service_log_path=None, 7 chrome_options=None, keep_alive=True): 8 # executable_path: 浏览器驱动文件路径 9 ...省略部分代码... 10 11 # Service初始化 12 self.service = Service( 13 executable_path, 14 port=port, 15 service_args=service_args, 16 log_path=service_log_path) 17 # 启动一个子进程去执行chromedriver.exe驱动文件 18 # 这个驱动文件就是一个服务,接收http请求的 19 self.service.start() 20 21 try: 22 # 初始化RemoteWebDriver 23 RemoteWebDriver.__init__( 24 self, 25 # ChromeRemoteConnection这个初始化时解析传过来的url 26 # 并建立一个keep-alive连接或者普通连接 27 # 提供了command字典,映射对应的接口(chromedriver.exe提供的接口,restful接口风格的) 28 command_executor=ChromeRemoteConnection( 29 remote_server_addr=self.service.service_url, 30 keep_alive=keep_alive), 31 desired_capabilities=desired_capabilities) 32 except Exception: 33 self.quit() 34 raise 35 self._is_remote = False
1 # 文件路径:\selenium\webdriver\chrome\service.py 2 class Service(service.Service): 3 # 继承了父类 service.Service 4 5 def __init__(self, executable_path, port=0, service_args=None, 6 log_path=None, env=None): 7 8 9 self.service_args = service_args or [] 10 if log_path: 11 self.service_args.append('--log-path=%s' % log_path) 12 13 service.Service.__init__(self, executable_path, port=port, env=env, 14 start_error_message="Please see https://sites.google.com/a/chromium.org/chromedriver/home") 15 16 def command_line_args(self): # 这个是重写方法,self.port是在父类指定的 17 return ["--port=%d" % self.port] + self.service_args
1 # 文件路径:\selenium\webdriver\common\service.py 2 # 主要就是启动浏览器驱动 server来接收后续的http请求 3 class Service(object): 4 5 def __init__(self, executable, port=0, log_file=DEVNULL, env=None, start_error_message=""): 6 self.path = executable 7 8 self.port = port 9 if self.port == 0: 10 self.port = utils.free_port() 11 12 if not _HAS_NATIVE_DEVNULL and log_file == DEVNULL: 13 log_file = open(os.devnull, 'wb') 14 15 self.start_error_message = start_error_message 16 self.log_file = log_file 17 self.env = env or os.environ 18 19 @property 20 def service_url(self): 21 return "http://%s" % utils.join_host_port('localhost', self.port) 22 23 def start(self): 24 try: # 启 动了子进程去执行chromedriver.exe文件实现服务端 25 # 监听随机分配的端口号,或者你指定的端口号 26 cmd = [self.path] 27 cmd.extend(self.command_line_args()) 28 self.process = subprocess.Popen(cmd, env=self.env, 29 close_fds=platform.system() != 'Windows', 30 stdout=self.log_file, 31 stderr=self.log_file, 32 stdin=PIPE) 33 except TypeError: 34 raise 35 ...省略部分代码... 36
1 # 文件路径:\selenium\webdriver\remote\remote_connection.py 2 3 class RemoteConnection(object): 4 5 def __init__(self, remote_server_addr, keep_alive=False, resolve_ip=True): 6 # Attempt to resolve the hostname and get an IP address. 7 self.keep_alive = keep_alive 8 parsed_url = parse.urlparse(remote_server_addr) 9 if parsed_url.hostname and resolve_ip: 10 port = parsed_url.port or None 11 if parsed_url.scheme == "https": 12 ip = parsed_url.hostname 13 elif port and not common_utils.is_connectable(port, parsed_url.hostname): 14 ip = None 15 LOGGER.info('Could not connect to port {} on host ' 16 '{}'.format(port, parsed_url.hostname)) 17 else: 18 ip = common_utils.find_connectable_ip(parsed_url.hostname, 19 port=port) 20 if ip: 21 netloc = ip 22 if parsed_url.port: 23 netloc = common_utils.join_host_port(netloc, 24 parsed_url.port) 25 if parsed_url.username: 26 auth = parsed_url.username 27 if parsed_url.password: 28 auth += ':%s' % parsed_url.password 29 netloc = '%s@%s' % (auth, netloc) 30 remote_server_addr = parse.urlunparse( 31 (parsed_url.scheme, netloc, parsed_url.path, 32 parsed_url.params, parsed_url.query, parsed_url.fragment)) 33 else: 34 LOGGER.info('Could not get IP address for host: %s' % 35 parsed_url.hostname) 36 37 self._url = remote_server_addr 38 if keep_alive: 39 self._conn = urllib3.PoolManager(timeout=self._timeout) 40 # 这里提供了映射驱动服务server的接口字典 41 self.command = { 42 Command.GO_FORWARD: ('POST', '/session/$sessionId/forward'), 43 Command.GO_BACK: ('POST', '/session/$sessionId/back'), 44 Command.MOVE_TO:('POST', '/session/$sessionId/moveto'), 45 } 46 47 def execute(self, command, params): 48 """ 49 # 发送一个指令到server 50 Send a command to the remote server. 51 """ 52 command_info = self._commands[command] 53 assert command_info is not None, 'Unrecognised command %s' % command 54 path = string.Template(command_info[1]).substitute(params) 55 if hasattr(self, 'w3c') and self.w3c and isinstance(params, dict) and 'sessionId' in params: 56 del params['sessionId'] 57 data = utils.dump_json(params) 58 url = '%s%s' % (self._url, path) 59 return self._request(command_info[0], url, body=data) 60 61 def _request(self, method, url, body=None): 62 """ 63 # 发送一个http请求到浏览器驱动服务中 64 65 :Returns: 66 # 一个字典包含server解析成json的response 67 """ 68 parsed_url = parse.urlparse(url) 69 headers = self.get_remote_connection_headers(parsed_url, self.keep_alive) 70 resp = None 71 if body and method != 'POST' and method != 'PUT': 72 body = None 73 74 if self.keep_alive: 75 resp = self._conn.request(method, url, body=body, headers=headers) 76 77 statuscode = resp.status 78 else: 79 http = urllib3.PoolManager(timeout=self._timeout) 80 resp = http.request(method, url, body=body, headers=headers) 81 82 statuscode = resp.status 83 if not hasattr(resp, 'getheader'): 84 if hasattr(resp.headers, 'getheader'): 85 resp.getheader = lambda x: resp.headers.getheader(x) 86 elif hasattr(resp.headers, 'get'): 87 resp.getheader = lambda x: resp.headers.get(x) 88 89 data = resp.data.decode('UTF-8') 90 try: 91 if 300 <= statuscode < 304: 92 return self._request('GET', resp.getheader('location')) 93 if 399 < statuscode <= 500: 94 return {'status': statuscode, 'value': data} 95 content_type = [] 96 if resp.getheader('Content-Type') is not None: 97 content_type = resp.getheader('Content-Type').split(';') 98 if not any([x.startswith('image/png') for x in content_type]): 99 100 try: 101 data = utils.load_json(data.strip()) 102 except ValueError: 103 if 199 < statuscode < 300: 104 status = ErrorCode.SUCCESS 105 else: 106 status = ErrorCode.UNKNOWN_ERROR 107 return {'status': status, 'value': data.strip()} 108 109 if 'value' not in data: 110 data['value'] = None 111 return data 112 else: 113 data = {'status': 0, 'value': data} 114 return data 115 finally: 116 LOGGER.debug("Finished Request") 117 resp.close()
1 # 文件路径 :\selenium\webdriver\remote\webdriver.py 2 class WebDriver(object): 3 4 _web_element_cls = WebElement 5 6 def __init__(self, command_executor='http://127.0.0.1:4444/wd/hub', 7 desired_capabilities=None, browser_profile=None, proxy=None, 8 keep_alive=False, file_detector=None, options=None): 9 10 capabilities = {} 11 if options is not None: 12 capabilities = options.to_capabilities() 13 if desired_capabilities is not None: 14 if not isinstance(desired_capabilities, dict): 15 raise WebDriverException("Desired Capabilities must be a dictionary") 16 else: 17 capabilities.update(desired_capabilities) 18 if proxy is not None: 19 warnings.warn("Please use FirefoxOptions to set proxy", 20 DeprecationWarning, stacklevel=2) 21 proxy.add_to_capabilities(capabilities) 22 self.command_executor = command_executor # 23 if type(self.command_executor) is bytes or isinstance(self.command_executor, str): 24 self.command_executor = RemoteConnection(command_executor, keep_alive=keep_alive) 25 self._is_remote = True 26 self.session_id = None 27 self.capabilities = {} 28 self.error_handler = ErrorHandler() 29 self.start_client() 30 if browser_profile is not None: 31 warnings.warn("Please use FirefoxOptions to set browser profile", 32 DeprecationWarning, stacklevel=2) 33 self.start_session(capabilities, browser_profile) # 启动浏览器 34 self._switch_to = SwitchTo(self) 35 self._mobile = Mobile(self) 36 self.file_detector = file_detector or LocalFileDetector()