scrapy简单爬取图片
#这里只爬取第一页 items.py import scrapy #定义爬取数据 class InsistItem(scrapy.Item): image_urls=scrapy.Field() tengxun.py import scrapy from insist.items import InsistItem import json class TengxunSpider(scrapy.Spider): name = 'tengxun' allowed_domains = ['douyucdn.cn'] start_urls = ['http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset='] def parse(self, response): item=InsistItem() con=json.loads(response.body) datas=con['data'] print(datas) for i in datas: item['image_urls']=[i['vertical_src']]#非常重要,由于 #{'scrapy.pipelines.images.ImagesPipeline': 301}用到的是图片的url列表,即使是一个链接也要用列表 yield item settings.py ITEM_PIPELINES = { #'insist.pipelines.InsistPipeline': 300, 'scrapy.pipelines.images.ImagesPipeline': 1, } IMAGES_STORE='C:\\Users\\lenovo\\Desktop\\data'#图片保存地址 IMAGES_URLS_FIELD='image_urls'#保存链接的字段 pipelines.py import scrapy from scrapy.pipelines.images import ImagesPipeline#导包 class SDPipeline(ImagesPipeline): def get_media_requests(self,item,info): image_link=item['image_urls'] yield scrapy.Request(image_link) 最后scrapy crawl tengxun 然后在所写的图片的目录中打开一个full的文件夹查看图片