用Python3Request爬取王者荣耀皮肤,单线程爬取、多线程爬取
先找HERO代码:Herolist.json: https://pvp.qq.com/web201605/js/herolist.json
皮肤URL规律:https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/英雄编号/英雄编号-bigskin-皮肤编号.jpg
其他规律去别的博客看
单线程爬取
import requests import json import time def hero(hero_name, hero_num, h_l): # 逐一遍历英雄 num = 0 # 为了获取英雄的号码,定义一个变量 for i in hero_num: # 逐一遍历皮肤,此处假定一个英雄最多10个皮肤 for sk_num in range(1, 10): # 从第一个开始,没有第0个皮肤 hsl = h_l + str(i) + "/" + str(i) + "-bigskin-" + str(sk_num) + ".jpg" hl = requests.get(hsl) if hl.status_code == 200:#判断状态码,因为如果可正确访问的话 状态码为200 不可的话就是404 print("此时正在下载:" + str(hero_name[num]) + str(sk_num) + "\n")#输出一下免得自己不知道自己下载哪个文件 with open("Hero/" + str(hero_name[num]) + str(sk_num) + ".jpg", "wb") as f: #记得在运行路径下新建一个Hero文件夹,否则就把Hero/+ 这几个代码删掉 f.write(hl.content) else: break #否则的话就跳出 num += 1 def main(): url = "https://pvp.qq.com/web201605/js/herolist.json" header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} response = requests.get(url,headers = header) hero_list = response.json() h_l = "https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/" #提取英雄名字和数字 hero_name = list(map(lambda x:x["cname"],hero_list))#这个lambda 自己下去看看,学习学习 hero_num = list(map(lambda x:x["ename"],hero_list)) print("HeroNumber:"+str(hero_name.__len__()))#输出英雄个数 hero(hero_name,hero_num,h_l) #调用函数 if __name__ == '__main__': main()
多线程爬取,效率超高,在爬数据的时候简直是利器!
import requests import json import threading import time def hero_1(hero_name, hero_num, h_l): # 逐一遍历英雄 num = 0 # 为了获取英雄的号码 for i in hero_num: # 逐一遍历皮肤,此处假定一个英雄最多10个皮肤 for sk_num in range(1, 15): # 从第一个开始,没有第0个皮肤 hsl = h_l + str(i) + "/" + str(i) + "-bigskin-" + str(sk_num) + ".jpg" hl = requests.get(hsl) if hl.status_code == 200: print("此时正在下载:" + str(hero_name[num]) + str(sk_num) + "\n") with open("Hero/" + str(hero_name[num]) + str(sk_num) + ".jpg", "wb") as f: f.write(hl.content) else: break num += 1 def main(): url = "https://pvp.qq.com/web201605/js/herolist.json" header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'} response = requests.get(url,headers = header) hero_list = response.json() h_l = "https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/" #提取英雄名字和数字 hero_name = list(map(lambda x:x["cname"],hero_list)) hero_num = list(map(lambda x:x["ename"],hero_list)) print("HeroNumber:"+str(hero_name.__len__())) hero_name1 = [] hero_name2 = [] hero_name3 = [] hero_num1 = [] hero_num2 = [] hero_num3 = [] for i in range(93): if i<30: hero_num1.append(hero_num[i]) hero_name1.append(hero_name[i]) elif i<60: hero_num2.append(hero_num[i]) hero_name2.append(hero_name[i]) else: hero_num3.append(hero_num[i]) hero_name3.append(hero_name[i]) t1 = threading.Thread(target=hero_1,args=(hero_name1,hero_num1,h_l)) t2 = threading.Thread(target=hero_1,args=(hero_name2,hero_num2,h_l)) t3 = threading.Thread(target=hero_1,args=(hero_name3,hero_num3,h_l)) t1.start() t2.start() t3.start() if __name__ == '__main__': main()