scrape.enqueue_response返回的defer什么时候callback??
Posted on 2018-10-12 21:02 王将军之武库 阅读(226) 评论(0) 编辑 收藏 举报秘密隐藏在
def defer_succeed(result): """Same as twisted.internet.defer.succeed but delay calling callback until next reactor loop It delays by 100ms so reactor has a chance to go trough readers and writers before attending pending delayed calls, so do not set delay to zero. """ d = defer.Deferred() reactor.callLater(0.1, d.callback, result) return d
def mustbe_deferred(f, *args, **kw): """Same as twisted.internet.defer.maybeDeferred, but delay calling callback/errback to next reactor loop """ try: result = f(*args, **kw) # FIXME: Hack to avoid introspecting tracebacks. This to speed up # processing of IgnoreRequest errors which are, by far, the most common # exception in Scrapy - see #125 except IgnoreRequest as e: return defer_fail(failure.Failure(e)) except: return defer_fail(failure.Failure()) else: return defer_result(result)
def defer_result(result): if isinstance(result, defer.Deferred): return result elif isinstance(result, failure.Failure): return defer_fail(result) else: return defer_succeed(result)
mustbe_deferred调用defer_result->defer_succeed,defer_succeed生成一个defer,这个defer会在0.1秒后调用callback,
这样的话该defer的callback都会得到调用。
def _scrape2(self, request_result, request, spider): """Handle the different cases of request's result been a Response or a Failure""" if not isinstance(request_result, Failure): return self.spidermw.scrape_response(#蜘蛛的中间件会先调用mustbe_deferred()函数
生成一个能自动调用的defer。
self.call_spider, request_result, request, spider) else: # FIXME: don't ignore errors in spider middleware dfd = self.call_spider(request_result, request, spider) return dfd.addErrback( self._log_download_errors, request_result, request, spider)
def enqueue_scrape(self, response, request, spider):#响应进队 slot = self.slot dfd = slot.add_response_request(response, request) def finish_scraping(_): slot.finish_response(response, request) self._check_if_closing(spider, slot) self._scrape_next(spider, slot) return _ dfd.addBoth(finish_scraping) dfd.addErrback( lambda f: logger.error('Scraper bug processing %(request)s', {'request': request}, exc_info=failure_to_exc_info(f), extra={'spider': spider})) self._scrape_next(spider, slot) return dfd def _scrape_next(self, spider, slot): while slot.queue: response, request, deferred = slot.next_response_request_deferred() self._scrape(response, request, spider).chainDeferred(deferred)#enqueue_scrape返回的
defer可以得到调用,原理是 d1.chainDeferered(d2),d1调用时d2也能得到调用。
def _scrape(self, response, request, spider): """Handle the downloaded response or failure through the spider callback/errback""" assert isinstance(response, (Response, Failure)) dfd = self._scrape2(response, request, spider) # returns spiders processed output dfd.addErrback(self.handle_spider_error, request, response, spider) dfd.addCallback(self.handle_spider_output, request, response, spider) return dfd def _scrape2(self, request_result, request, spider): """Handle the different cases of request's result been a Response or a Failure""" if not isinstance(request_result, Failure): return self.spidermw.scrape_response( self.call_spider, request_result, request, spider) else: # FIXME: don't ignore errors in spider middleware dfd = self.call_spider(request_result, request, spider) return dfd.addErrback( self._log_download_errors, request_result, request, spider) def call_spider(self, result, request, spider): result.request = request dfd = defer_result(result) dfd.addCallbacks(request.callback or spider.parse, request.errback) return dfd.addCallback(iterate_spider_output)