scrapy爬取图片
class FctpItem(scrapy.Item):
# define the fields for your item here like:
image_urls = scrapy.Field()
images = scrapy.Field()
from scrapy.pipelines.images import ImagesPipeline
from scrapy.http import Request
from pathlib import Path
class FctpPipeline(ImagesPipeline):
def get_media_requests(self, item, info):
image_url = item['image_urls']
yield Request(image_url,meta={'item':item})
def file_path(self, request, response=None, info=None, *, item=None):
# 修改返回值决定保存位置
item = request.meta['item']
filepath = item['images']+'.jpg'
while Path('./data/'+filepath).exists():
# 图片名重复后保存, data是保存的文件目录
filepath = filepath.replace('.jpg','_.jpg')
return filepath
def item_completed(self, results, item, info):
return item
出处: https://www.cnblogs.com/meizhengchao/p/17113924.html
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出, 原文链接 如有问题, 可邮件(meizhengchao@qq.com)咨询.