解决Scrapy抓取中文结果保存为文件时的编码问题
1 import json 2 import codecs 3 4 5 # Define your item pipelines here 6 # 7 # Don't forget to add your pipeline to the ITEM_PIPELINES setting 8 # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 9 10 11 class PandaPipeline(object): 12 def __init__(self): 13 self.file = codecs.open('pandaow.json', 'w', encoding='utf-8') 14 15 def process_item(self, item, spider): 16 line = json.dumps(dict(item),ensure_ascii=False) + '\n' 17 # print line 18 # self.file.write(line.decode("unicode_escape")) 19 self.file.write(line) 20 return item 21 22 def spider_closed(self, spider): 23 self.file.close()
将以上内容插入pipelines.py,同时在settings.py中加入
ITEM_PIPELINES = { 'panda.pipelines.PandaPipeline': 300 }
以调用pipelines文件