Agent = client.Agent
class ScrapyAgent(object): _Agent = Agent#为twisted的client.Agent类 _ProxyAgent = ProxyAgent _TunnelingAgent = TunnelingAgent def __init__(self, contextFactory=None, connectTimeout=10, bindAddress=None, pool=None, maxsize=0, warnsize=0, fail_on_dataloss=True): self._contextFactory = contextFactory self._connectTimeout = connectTimeout self._bindAddress = bindAddress self._pool = pool self._maxsize = maxsize self._warnsize = warnsize self._fail_on_dataloss = fail_on_dataloss self._txresponse = None def _get_agent(self, request, timeout):#获得代理 bindaddress = request.meta.get('bindaddress') or self._bindAddress proxy = request.meta.get('proxy') if proxy: _, _, proxyHost, proxyPort, proxyParams = _parse(proxy) scheme = _parse(request.url)[0] proxyHost = to_unicode(proxyHost) omitConnectTunnel = b'noconnect' in proxyParams if scheme == b'https' and not omitConnectTunnel: proxyConf = (proxyHost, proxyPort, request.headers.get(b'Proxy-Authorization', None)) return self._TunnelingAgent(reactor, proxyConf, contextFactory=self._contextFactory, connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool) else: endpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort, timeout=timeout, bindAddress=bindaddress) return self._ProxyAgent(endpoint) return self._Agent(reactor, contextFactory=self._contextFactory, connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool) def download_request(self, request): timeout = request.meta.get('download_timeout') or self._connectTimeout agent = self._get_agent(request, timeout) # request details url = urldefrag(request.url)[0] method = to_bytes(request.method) headers = TxHeaders(request.headers) if isinstance(agent, self._TunnelingAgent): headers.removeHeader(b'Proxy-Authorization') if request.body: bodyproducer = _RequestBodyProducer(request.body) elif method == b'POST': # Setting Content-Length: 0 even for POST requests is not a # MUST per HTTP RFCs, but it's common behavior, and some # servers require this, otherwise returning HTTP 411 Length required # # RFC 7230#section-3.3.2: # "a Content-Length header field is normally sent in a POST # request even when the value is 0 (indicating an empty payload body)." # # Twisted < 17 will not add "Content-Length: 0" by itself; # Twisted >= 17 fixes this; # Using a producer with an empty-string sends `0` as Content-Length # for all versions of Twisted. bodyproducer = _RequestBodyProducer(b'') else: bodyproducer = None start_time = time() d = agent.request(#调用代理的请求 method, to_bytes(url, encoding='ascii'), headers, bodyproducer) # set download latency d.addCallback(self._cb_latency, request, start_time) # response body is ready to be consumed d.addCallback(self._cb_bodyready, request) d.addCallback(self._cb_bodydone, request, url) # check download timeout self._timeout_cl = reactor.callLater(timeout, d.cancel) d.addBoth(self._cb_timeout, req
class Agent(_AgentBase): def __init__(self, reactor, contextFactory=BrowserLikePolicyForHTTPS(), connectTimeout=None, bindAddress=None, pool=None): if not IPolicyForHTTPS.providedBy(contextFactory): warnings.warn( repr(contextFactory) + " was passed as the HTTPS policy for an Agent, but it does " "not provide IPolicyForHTTPS. Since Twisted 14.0, you must " "pass a provider of IPolicyForHTTPS.", stacklevel=2, category=DeprecationWarning ) contextFactory = _DeprecatedToCurrentPolicyForHTTPS(contextFactory) endpointFactory = _StandardEndpointFactory( reactor, contextFactory, connectTimeout, bindAddress) self._init(reactor, endpointFactory, pool) @classmethod def usingEndpointFactory(cls, reactor, endpointFactory, pool=None): """ Create a new L{Agent} that will use the endpoint factory to figure out how to connect to the server. """ agent = cls.__new__(cls) agent._init(reactor, endpointFactory, pool) return agent def _init(self, reactor, endpointFactory, pool): _AgentBase.__init__(self, reactor, pool) self._endpointFactory = endpointFactory def _getEndpoint(self, uri): return self._endpointFactory.endpointForURI(uri) def request(self, method, uri, headers=None, bodyProducer=None): """ Issue a request to the server indicated by the given C{uri}. An existing connection from the connection pool may be used or a new one may be created. I{HTTP} and I{HTTPS} schemes are supported in C{uri}. @see: L{twisted.web.iweb.IAgent.request} """ parsedURI = URI.fromBytes(uri) try: endpoint = self._getEndpoint(parsedURI) except SchemeNotSupported: return defer.fail(Failure()) key = (parsedURI.scheme, parsedURI.host, parsedURI.port)#key值的计算 return self._requestWithEndpoint(key, endpoint, method, parsedURI, headers, bodyProducer, parsedURI.originForm)
#从class HTTPConnectionPool(object)中取得一个连接
def getConnection(self, key, endpoint): # Try to get cached version: connections = self._connections.get(key) while connections: connection = connections.pop(0) # Cancel timeout: self._timeouts[connection].cancel() del self._timeouts[connection] if connection.state == "QUIESCENT":#该连接为空闲状态 if self.retryAutomatically: newConnection = lambda: self._newConnection(key, endpoint) connection = _RetryingHTTP11ClientProtocol( connection, newConnection) return defer.succeed(connection)#成功 return self._newConnection(key, endpoint)