python scrapy 重复执行

from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
import time
import logging
from scrapy.utils.project import get_project_settings


#在控制台打印日志
configure_logging()
#CrawlerRunner获取settings.py里的设置信息
runner = CrawlerRunner(get_project_settings())

@defer.inlineCallbacks
def crawl():
    while True:
        logging.info("new cycle starting")
        yield runner.crawl("xxxxx")
        #1s跑一次
        time.sleep(1)
    reactor.stop()

crawl()
reactor.run()

  

posted @ 2019-12-27 12:35  winstonsias  阅读(819)  评论(0编辑  收藏  举报