Python利用requests模块爬取皮肤

 

import requests
import os
import json
from lxml import etree
from fake_useragent import UserAgent
import logging

# 日志输出的基本配置
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s: %(message)s')


class glory_of_king(object):
    def __init__(self):
        if not os.path.exists("./王者荣耀皮肤"):
            os.mkdir("王者荣耀皮肤")
        # 利用fake_useragent产生随机UserAgent  防止被反爬
        ua = UserAgent(verify_ssl=False, path='fake_useragent.json')
        for i in range(1, 50):
            self.headers = {
                'User-Agent': ua.random
            }

    def scrape_skin(self):
        # 发送请求   获取响应
        response = requests.get('https://pvp.qq.com/web201605/js/herolist.json', headers=self.headers)
        # str转为json
        data = json.loads(response.text)
        # for循环遍历data获取需要的字段  创建对应英雄名称的文件夹
        for i in data:
            hero_number = i['ename']    # 获取英雄名字编号
            hero_name = i['cname']      # 获取英雄名字
            os.mkdir("./王者荣耀皮肤/{}".format(hero_name))  # 创建英雄名称对应的文件夹
            response_src = requests.get("https://pvp.qq.com/web201605/herodetail/{}.shtml".format(hero_number),
                                        headers=self.headers)
            hero_content = response_src.content.decode('gbk')  # 返回相应的html页面 解码为gbk
            # xpath解析对象  提取每个英雄的皮肤名字
            hero_data = etree.HTML(hero_content)
            hero_img = hero_data.xpath('//div[@class="pic-pf"]/ul/@data-imgname')
            # 去掉每个皮肤名字中间的分隔符
            hero_src = hero_img[0].split('|')
            logging.info(hero_src)
            # 遍历英雄src处理图片名称。
            for j in range(len(hero_src)):
                # 去掉皮肤名字的&符号
                index_ = hero_src[j].find("&")
                skin_name = hero_src[j][:index_]
                # 请求下载图片
                response_skin = requests.get(
                    "https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{}/{}-bigskin-{}.jpg".format(
                        hero_number, hero_number, j + 1))
                # 获取图片二进制数据
                skin_img = response_skin.content
                # 把皮肤图片保存到对应名字的文件里
                with open("./王者荣耀皮肤/{}/{}.jpg".format(hero_name, skin_name), "wb")as f:
                    f.write(skin_img)
                    logging.info(f"{skin_name}.jpg 下载成功!!")

    def run(self):
        self.scrape_skin()

if __name__ == '__main__':
    spider = glory_of_king()
    spider.run()

 

posted @ 2020-10-19 17:54  Security  阅读(179)  评论(0编辑  收藏  举报