# @Author: 昊阳
# _*_ coding : utf-8 _*_
# @Time:2022/10/22 0022 13:19
# @File : video_all
# @Project : MyCase
import requests
import re
import pprint
def create(num):
for page in range(1, num+1):
print(f'--------------正在获取第{page}页的视频---------------')
url = f'https://v.huya.com/g/all?set_id=51&order=hot&page={page}'
headres = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
}
response = requests.get(url=url, headers=headres).text
# 正则中只需要数字就用\d+就行了
# 视频id
vid = re.findall('"vid":(\d+),"liveId"', response)
# 视频地址
for i in vid:
video_url = f'https://liveapi.huya.com/moment/getMomentContent?videoId={i}&_=1666417414509'
# 新的视频地址
response_1 = requests.get(url=video_url, headers=headres).json()
# pprint.pprint(response_1)
# 视频名
title = response_1['data']['moment']['title']
# 视频地址
video_url = response_1['data']['moment']['videoInfo']['definitions'][0]['url']
print(title,video_url)
# 保存数据
video_content = requests.get(url=video_url, headers=headres).content # 获取二进制数据内容
with open("video\\" + title + '.mp4', mode='wb') as f:
f.write(video_content)
# print(video_content)
if __name__ =='__main__':
num = input("请输入想要爬取几页的视频")
create(int(num))