import re import json import requests from moviepy.audio.io.AudioFileClip import AudioFileClip from moviepy.video.io.VideoFileClip import VideoFileClip
def getAllTitles(url): resp = requests.get(url=url, headers='').text # 跳过识别不了的gbk字符 resp = resp.encode("gbk", "ignore").decode("gbk", "ignore") all_pages = resp.split('volume')[1].split('subtitle')[0] pages = all_pages.split('part') titles = [] for i in pages: titles.append(i.split('.mp4')[0].split('\":\"')[1].replace(' ','')) del (titles[0]) return titles
def downVideos(titles,Primary_url): headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/\ 537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/\ 96.0.1054.62', "referer": "https://message.bilibili.com/"} page=1 for i in titles: url=Primary_url+'?p='+str(page) print('下载第' + str(page) + '个视频,地址为:' + url) # print(tt) html = requests.get(url, headers=headers) # 请求原始网页 # 视频下载链接在script标签内 k = r'<script>window.__playinfo__=(.*?)</script>' # print(k) script = re.findall(k, html.text)[0] # print(script) # 转成字典 script_dic = json.loads(script) url_video = script_dic['data']['dash']['video'][0]['baseUrl'] print('获取到第'+str(page)+'个视频地址') url_audio = script_dic['data']['dash']['audio'][0]['baseUrl'] print('获取到第'+str(page)+'个音频地址')
video_name = i + '.mp4' audio_name = i + '.mp3' output_name = i + '_merge.mp4' # 请求资源 html_video = requests.get(url_video, headers=headers) html_audio = requests.get(url_audio, headers=headers)
# 写入文件 with open('video/' + video_name, 'wb') as f: f.write(html_video.content) with open('video/' + audio_name, 'wb') as f: f.write(html_audio.content)