scrapy简单爬取图片

#这里只爬取第一页

items.py
import scrapy
#定义爬取数据
class InsistItem(scrapy.Item):
    image_urls=scrapy.Field()

tengxun.py
import scrapy
from insist.items import InsistItem
import json

class TengxunSpider(scrapy.Spider):
    name = 'tengxun'
    allowed_domains = ['douyucdn.cn']
    start_urls = ['http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset=']
    def parse(self, response):
       item=InsistItem()
       con=json.loads(response.body)
       datas=con['data']
       print(datas)
       for i in datas:
            item['image_urls']=[i['vertical_src']]#非常重要,由于
 #{'scrapy.pipelines.images.ImagesPipeline': 301}用到的是图片的url列表,即使是一个链接也要用列表
            yield item

settings.py
ITEM_PIPELINES = {
  #'insist.pipelines.InsistPipeline': 300,
   'scrapy.pipelines.images.ImagesPipeline': 1,
}
IMAGES_STORE='C:\\Users\\lenovo\\Desktop\\data'#图片保存地址
IMAGES_URLS_FIELD='image_urls'#保存链接的字段

pipelines.py
import scrapy
from scrapy.pipelines.images import ImagesPipeline#导包
class SDPipeline(ImagesPipeline):
    def get_media_requests(self,item,info):
        image_link=item['image_urls']
        yield scrapy.Request(image_link)

最后scrapy crawl tengxun
然后在所写的图片的目录中打开一个full的文件夹查看图片

 

posted @ 2019-09-20 23:05  晨曦yd  阅读(194)  评论(0编辑  收藏  举报