returnes

导航

Scrapy的piplines.py存储文件和存储mongodb

一、将数据保存到文件

1.piplines.py文件

 1 import json
 2 
 3 class TencentPipeline(object):
 4 
 5     def open_spider(self,spider):
 6         if spider.name=='hr_tencent':
 7             self.file=open('data.json','w')
 8 
 9     def process_item(self, item, spider):
10         if spider.name=='hr_tencent':
11             data=dict(item)
12             # data=json.dumps(data,ensure_ascii=False)
13             data=json.dumps(data)
14             self.file.write(data+',\n')
15         return item
16 
17     def close_spider(self,spider):
18         if spider.name=='hr_tencent':
19             self.file.close()

2.settings.py文件

1 ITEM_PIPELINES = {
2    'tencent.pipelines.TencentPipeline': 300,
3 }

 

 

二、将数据保存到mongodb

1.piplines.py文件

 1 from pymongo import MongoClient
 2 
 3 
 4 class Tencent1Pipeline(object):
 5     def open_spider(self,spider):
 6         if spider.name == 'hr_tencent1':
 7             self.client=MongoClient('127.0.0.1',27017)
 8             self.tencent=self.client['tencent']['tencent']
 9     def process_item(self,item,spider):
10         if spider.name == 'hr_tencent1':
11             print(item)
12             self.tencent.insert(dict(item))
13             return item
14     def close_spider(self,spider):
15         if spider.name == 'hr_tencent1':
16             self.client.close()

2.settings.py文件

1 ITEM_PIPELINES = {
2    'tencent.pipelines.Tencent1Pipeline': 299,
3 }

 

posted on 2019-03-01 20:01  returnes  阅读(181)  评论(0编辑  收藏  举报