用Python3Request爬取英雄联盟皮肤、单线程爬取

"""
Hero_LOL 和王者荣耀类似
"""
import requests
import re
import json
import os
import threading


def hero(hero_name, hero_num):
    #统一前戳
    h_l = "https://ossweb-img.qq.com/images/lol/web201310/skin/big"
    # 逐一遍历英雄
    print(hero_num.__len__())
    num = 0  # 为了获取英雄的号码
    for i in hero_num:
        # 逐一遍历皮肤,此处假定一个英雄最多15个皮肤
        for sk_num in range(0, 15):  # 从第0个皮肤开始
            hsl = h_l + i + "00" + str(sk_num) + ".jpg"
            hl = requests.get(hsl)
            if hl.status_code == 200:
                filename = "LOL/" + str(hero_name[num]) + str(sk_num) + ".jpg"
                print("此时正在下载:" + filename+" 这是第"+str(num+1)+"个英雄")
                with open(filename, "wb") as f:
                    f.write(hl.content)
            else:
                break
        num += 1


def main():
    """
    #获取全部英雄数据
    :return:
    """
    #JS_url
    Hero_url =  "https://lol.qq.com/biz/hero/champion.js"
    #User-Agent伪装浏览器标识
    header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}
    #Get source code(获取源代码)
    response = requests.get(Hero_url,headers = header)
    #decode 编码为GBK,方便下方使用正则遍历
    response = response.content.decode("GBK")
    #regex(正则表达式)
    reg = """keys":([\s\S]*?),"data"""
    #预编译
    regex = re.compile(reg,re.IGNORECASE) #第一个参数 正则表达式 第二个参数忽略的意思,忽略大小写
    #findall 就是获取response内所有符合规则的数据,返回值为list
    response_s = regex.findall(response)
    #也就是所有符合规则的数据放到了列表里,如果只有一个,那就是list[0]
    response_s = response_s[0]
    #通过eval转换为一个dict,具体功能自己查(其实我也没大懂)
    res = eval(response_s)
    #get the keys in the dict(再转换为list)
    hero_num  = list(res.keys()) #Hero_number
    #get the values in the dict (再转换为list)
    hero_name = list(res.values())#Hero_name

    hero(hero_name,hero_num)if __name__ == '__main__':
    main()

 

posted @ 2019-06-06 11:30  王学长  阅读(276)  评论(0编辑  收藏  举报