Scrapy源码 Response对象

Scrapy源码 Response对象

"""
This module implements the Response class which is used to represent HTTP
responses in Scrapy.

See documentation in docs/topics/request-response.rst
"""
from six.moves.urllib.parse import urljoin

from scrapy.http.request import Request
from scrapy.http.headers import Headers
from scrapy.link import Link
from scrapy.utils.trackref import object_ref
from scrapy.http.common import obsolete_setter
from scrapy.exceptions import NotSupported


class Response(object_ref):

def __init__(self, url, status=200, headers=None, body=b'', flags=None, request=None):
self.headers = Headers(headers or {})
self.status = int(status)
self._set_body(body)
self._set_url(url)
self.request = request
self.flags = [] if flags is None else list(flags)

@property
def meta(self):
try:
return self.request.meta
except AttributeError:
raise AttributeError(
"Response.meta not available, this response "
"is not tied to any request"
)

def _get_url(self):
return self._url

def _set_url(self, url):
if isinstance(url, str):
self._url = url
else:
raise TypeError('%s url must be str, got %s:' % (type(self).__name__,
type(url).__name__))

url = property(_get_url, obsolete_setter(_set_url, 'url'))

def _get_body(self):
return self._body

def _set_body(self, body):
if body is None:
self._body = b''
elif not isinstance(body, bytes):
raise TypeError(
"Response body must be bytes. "
"If you want to pass unicode body use TextResponse "
"or HtmlResponse.")
else:
self._body = body

body = property(_get_body, obsolete_setter(_set_body, 'body'))

def __str__(self):
return "<%d %s>" % (self.status, self.url)

__repr__ = __str__

def copy(self):
"""Return a copy of this Response"""
return self.replace()

def replace(self, *args, **kwargs):
"""Create a new Response with the same attributes except for those
given new values.
"""
for x in ['url', 'status', 'headers', 'body', 'request', 'flags']:
kwargs.setdefault(x, getattr(self, x))
cls = kwargs.pop('cls', self.__class__)
return cls(*args, **kwargs)

def urljoin(self, url):
"""Join this Response's url with a possible relative url to form an
absolute interpretation of the latter."""
return urljoin(self.url, url)

@property
def text(self):
"""For subclasses of TextResponse, this will return the body
as text (unicode object in Python 2 and str in Python 3)
"""
raise AttributeError("Response content isn't text")

def css(self, *a, **kw):
"""Shortcut method implemented only by responses whose content
is text (subclasses of TextResponse).
"""
raise NotSupported("Response content isn't text")

def xpath(self, *a, **kw):
"""Shortcut method implemented only by responses whose content
is text (subclasses of TextResponse).
"""
raise NotSupported("Response content isn't text")

def follow(self, url, callback=None, method='GET', headers=None, body=None,
cookies=None, meta=None, encoding='utf-8', priority=0,
dont_filter=False, errback=None, cb_kwargs=None):
# type: (...) -> Request
"""
Return a :class:`~.Request` instance to follow a link ``url``.
It accepts the same arguments as ``Request.__init__`` method,
but ``url`` can be a relative URL or a ``scrapy.link.Link`` object,
not only an absolute URL.

:class:`~.TextResponse` provides a :meth:`~.TextResponse.follow`
method which supports selectors in addition to absolute/relative URLs
and Link objects.
"""
if isinstance(url, Link):
url = url.url
elif url is None:
raise ValueError("url can't be None")
url = self.urljoin(url)
return Request(url, callback,
method=method,
headers=headers,
body=body,
cookies=cookies,
meta=meta,
encoding=encoding,
priority=priority,
dont_filter=dont_filter,
errback=errback,
cb_kwargs=cb_kwargs)
posted @ 2020-01-07 22:35  石斛  阅读(658)  评论(0编辑  收藏  举报