python爬取王者荣耀皮肤

import requests
import re
import os
from lxml import etree

def get_index():
    url = 'https://pvp.qq.com/web201605/herolist.shtml'
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('gbk')
    else:
        return None

def parse_index(html):
    selector = etree.HTML(html)
    links = selector.xpath('//*[@class="herolist clearfix"]/li/a/@href')
    links = ['https://pvp.qq.com/web201605/' + link for link in links]
    return links

def parse_deatil(url):
    response = requests.get(url)
    html = response.content.decode('gbk')
    name = re.findall(r'<h2 class="cover-name">(.*?)</h2>', html, re.S)
    id = re.findall(r'<span class="hidden">(\d+)</span>', html, re.S)
    skin_name_str = re.findall(r'<ul class="pic-pf-list pic-pf-list3" data-imgname="(.*?)">', html, re.S)
    skin_name_list = skin_name_str[0].split('|')
    skin_name_list = [name.split('&')[0] for name in skin_name_list]

    dir_name = '王者荣耀皮肤'
    # 保存目录
    if not os.path.exists(dir_name):
        os.mkdir(dir_name)

    for i, skin_name in enumerate(skin_name_list):
        skin_url = 'http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{}/{}-bigskin-{}.jpg'.format(id[0], id[0], i+1)
        file_name = '{}.jpg'.format(skin_name)
        res = requests.get(skin_url)
        if not os.path.exists(dir_name + '/' + name[0]):
            os.mkdir(dir_name + '/' + name[0])
        with open(dir_name + '/' + name[0] + '/' + file_name, 'wb') as f:
            f.write(res.content)
    print('正在爬取:{}{}'.format(name[0], skin_name_list))


if __name__ == '__main__':
    html = get_index()
    for link in parse_index(html):
        parse_deatil(link)

 

posted @ 2020-09-02 16:51  不会飞的鲨鱼  阅读(359)  评论(0编辑  收藏  举报