你好呀~

scrapy的pipline的不同操作

针对json文件

import json
import os


class SpyOppoPipeline:
    def __init__(self):
        self.file = open('曹姐.json', 'wb+')

    def open_spider(self, spider):
        self.file.write("[\n".encode())

    def close_spider(self, spider):
        # 开始用字节形式打开,避免异常:io.UnsupportedOperation: can't do nonzero cur-relative seeks
        self.file.seek(-2, os.SEEK_END)
        self.file.truncate()
        self.file.write("\n]".encode())
        self.file.close()

    def process_item(self, item, spider):
        content = json.dumps(dict(item), ensure_ascii=False)
        self.file.write("\t".encode() + content.encode() + ",\n".encode())
        return item

 

posted @ 2022-02-11 15:05  测神  阅读(40)  评论(0编辑  收藏  举报