Scrapy学习-6-JSON数据处理
使用json模块处理JSON数据
class JsonwithEncodingPipeline(object): def __init__(self): self.file = codecs.open('article.json', 'w', encoding='utf-8') def process_item(self, item, spider): lines = json.dumps(dict(item), ensure_ascii=False) + '\n' self.file.write(lines) return item def spider_closed(self, spider): self.file.close()
内置JSON处理对象JsonItemExporter的使用
class JsonExporterPipeline(object): def __init__(self): self.file = open('articleexport.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item