Scrapy——將爬取圖片下載到本地

1. Spider程序:

  

 1 import scrapy, json
 2 from UnsplashImageSpider.items import ImageItem
 3 
 4 class UnsplashImageSpider(scrapy.Spider):
 5     # 定义Spider的名称
 6     name = 'unsplash_image'
 7     allowed_domains = ['unsplash.com']
 8     # 定义起始页面
 9     start_urls = ['https://unsplash.com/napi/photos?page=1&per_page=12']
10     def __init__ (self):
11         self.page_index = 1
12    
13     def parse(self, response):
14         # 解析服务器响应的JSON字符串
15         photo_list = json.loads(response.text) #
16         # 遍历每张图片
17         for photo in photo_list:
18             item = ImageItem()
19             item['image_id'] = photo['id']
20             item['download'] = photo['links']['download']
21             yield item
22 
23         self.page_index += 1
24         # 获取下一页的链接
25         next_link = 'https://unsplash.com/napi/photos?page='\
26             + str(self.page_index) + '&per_page=12'
27         # 继续获取下一页的图片
28         yield scrapy.Request(next_link, callback=self.parse)

2. 在Pipeline中使用urllib.request包直接下載圖片:

 1 from urllib.request import *
 2 
 3 class UnsplashimagespiderPipeline(object):
 4     def process_item(self, item, spider):
 5         # 每个item代表一个要下载的图片
 6         print('----------' + item['image_id'])
 7         real_url = item['download'] + "?force=true"
 8         try:
 9             pass
10             # 打开URL对应的资源
11             with urlopen(real_url) as result:
12                 # 读取图片数据
13                 data = result.read()
14                 # 打开图片文件
15                 with open("images/" + item['image_id'] + '.jpg', 'wb+') as f:
16                     # 写入读取的数据
17                     f.write(data)
18         except:
19             print('下载图片出现错误' % item['image_id'])

 

 
posted @ 2019-08-12 09:05  RiocasTure  阅读(171)  评论(0编辑  收藏  举报