分布式爬虫
设置setting.pyj即可
SCHEDULER = "scrapy_redis.scheduler.Scheduler"
DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
REDIS_URL = 'redis://user:pass@hostname:9001'
ITEM_PIPELINES = {
'zhihuuser.pipelines.MongoPipeline': 300,
'scrapy_redis.pipelines.RedisPipeline': 301
}