scrapy爬取图片


class FctpItem(scrapy.Item):
    # define the fields for your item here like:
    image_urls = scrapy.Field()
    images = scrapy.Field()

from scrapy.pipelines.images import ImagesPipeline
from scrapy.http import Request
from pathlib import Path

class FctpPipeline(ImagesPipeline):
    def get_media_requests(self, item, info):
        image_url = item['image_urls']
        yield Request(image_url,meta={'item':item})

    def file_path(self, request, response=None, info=None, *, item=None):
        # 修改返回值决定保存位置
        item = request.meta['item']
        filepath = item['images']+'.jpg'
        while Path('./data/'+filepath).exists():
            # 图片名重复后保存,  data是保存的文件目录
            filepath = filepath.replace('.jpg','_.jpg')
        return filepath

    def item_completed(self, results, item, info):
        return item
posted @ 2023-02-12 15:59  meizhengchao  阅读(14)  评论(0编辑  收藏  举报