Python利用requests模块爬取皮肤
import requests import os import json from lxml import etree from fake_useragent import UserAgent import logging # 日志输出的基本配置 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s: %(message)s') class glory_of_king(object): def __init__(self): if not os.path.exists("./王者荣耀皮肤"): os.mkdir("王者荣耀皮肤") # 利用fake_useragent产生随机UserAgent 防止被反爬 ua = UserAgent(verify_ssl=False, path='fake_useragent.json') for i in range(1, 50): self.headers = { 'User-Agent': ua.random } def scrape_skin(self): # 发送请求 获取响应 response = requests.get('https://pvp.qq.com/web201605/js/herolist.json', headers=self.headers) # str转为json data = json.loads(response.text) # for循环遍历data获取需要的字段 创建对应英雄名称的文件夹 for i in data: hero_number = i['ename'] # 获取英雄名字编号 hero_name = i['cname'] # 获取英雄名字 os.mkdir("./王者荣耀皮肤/{}".format(hero_name)) # 创建英雄名称对应的文件夹 response_src = requests.get("https://pvp.qq.com/web201605/herodetail/{}.shtml".format(hero_number), headers=self.headers) hero_content = response_src.content.decode('gbk') # 返回相应的html页面 解码为gbk # xpath解析对象 提取每个英雄的皮肤名字 hero_data = etree.HTML(hero_content) hero_img = hero_data.xpath('//div[@class="pic-pf"]/ul/@data-imgname') # 去掉每个皮肤名字中间的分隔符 hero_src = hero_img[0].split('|') logging.info(hero_src) # 遍历英雄src处理图片名称。 for j in range(len(hero_src)): # 去掉皮肤名字的&符号 index_ = hero_src[j].find("&") skin_name = hero_src[j][:index_] # 请求下载图片 response_skin = requests.get( "https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{}/{}-bigskin-{}.jpg".format( hero_number, hero_number, j + 1)) # 获取图片二进制数据 skin_img = response_skin.content # 把皮肤图片保存到对应名字的文件里 with open("./王者荣耀皮肤/{}/{}.jpg".format(hero_name, skin_name), "wb")as f: f.write(skin_img) logging.info(f"{skin_name}.jpg 下载成功!!") def run(self): self.scrape_skin() if __name__ == '__main__': spider = glory_of_king() spider.run()