需求:将爬取到的数据值分别存储到本地磁盘、redis数据库、mysql数据。
- 需要在管道文件中编写对应平台的管道类
- 在配置文件中对自定义的管道类进行生效操作
# -*- coding: utf-8 -*- # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html import redis # 实现将数据存到redis数据库中 class QiubaiproPipeline(object): conn = None def open_spider(self,spider): print('开始爬虫') self.conn = redis.Redis(host='127.0.0.1',port=6379) def process_item(self, item, spider): dict = { 'author':item['author'], 'content':item['content'] } self.conn.lpush('data', dict) return item #实现将数据值存储到本地磁盘中 class QiubaiByFiles(object): def process_item(self,item,spider): print("数据已经写入指定的磁盘文件中") return item #实现将数据值存储到mysql数据库中 class QiubaiByMysql(object): def process_item(self,item,spider): print('数据已经写入到mysql数据库中') return item
配置文件中管道配置
ITEM_PIPELINES = { 'qiubaiPro.pipelines.QiubaiproPipeline': 300, 'qiubaiPro.pipelines.QiubaiByFiles': 500, 'qiubaiPro.pipelines.QiubaiByMysql': 400, }