Python爬取b站视频
import json import os import subprocess import time import requests import re class BLBL(object): def __init__(self, url, cookie, referer): self.base_url = url # cookie内容 self.cookie = cookie # referer内容 self.referer = referer # 请求头信息 self.accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3' self.accept_Encoding = 'gzip, deflate, br' self.accept_Language = 'zh-CN,zh;q=0.9,en;q=0.8' self.user_agent = "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) " def html(self): # 访问起始网页需添加的请求头,不加的话,得不到完整的源代码(反爬) base_headers = { 'Accept': self.accept, 'Accept-Encoding': self.accept_Encoding, 'Accept-Language': self.accept_Language, 'Cache-Control': 'no-cache', 'Cookie': self.cookie, 'Referer': self.referer, 'Upgrade-Insecure-Requests': '1', 'User-Agent': self.user_agent } # 请求网页 base_response = requests.get(self.base_url, headers=base_headers) # 获取网页html代码 html = base_response.text # print(html.headers) return html def xin_xi(self, html): result = re.findall('<script>window.__playinfo__=(.*?)</script>', html, re.S)[0] print(result) title = re.findall('<span class="tit">(.*?)</span>', html)[0].replace('/','').replace(':','').replace(' ','').strip() html_data = json.loads(result) # 音频url地址 audio_url = html_data['data']['dash']['audio'][0]['backupUrl'][0] # 视频url地址 video_url = html_data['data']['dash']['video'][0]['backupUrl'][0] return title, audio_url, video_url def video(self, html): # 获取视频名称,音频网址,视频网址 title, audio_url, video_url = self.xin_xi(html) # 请求视频下载地址时需要添加的请求头 download_headers = { 'User-Agent': self.user_agent, 'Referer': self.referer, 'Orig`in': 'https://www.bilibili.com', 'Accept': self.accept, 'Accept-Encoding': self.accept_Encoding, 'Accept-Language': self.accept_Language } audio_content = requests.get(audio_url,headers=download_headers).content video_content = requests.get(video_url,headers=download_headers).content with open(title + '.mp3', mode='wb') as f: f.write(audio_content) with open(title + '.mp4', mode='wb') as f: f.write(video_content) print('正在保存:', title) self.video_audio_merge_single(title) def run(self): html = self.html() self.video(html) print('爬取成功')# 爬下来的是两个文件 一个音频一个视频 需要合成到一块才是完整的(使用ffmpeg)提前下载安装好并配置好环境变量 def video_audio_merge_single(self,video_name): print("视频合成开始:", video_name) # ffmpeg -i video.mp4 -i audio.wav -c:v copy -c:a aac -strict experimental output.mp4 command = 'ffmpeg -i {}.mp4 -i {}.mp3 -vcodec copy -acodec copy {}.mp4'.format( video_name, video_name,video_name+'(合)') subprocess.Popen(command, shell=True) time.sleep(10) print("视频合成结束:", video_name) os.remove(f'{video_name}.mp3') os.remove(f'{video_name}.mp4') if __name__ == '__main__': url= 视频播放地址 如:'https://www.bilibili.com/video/BV1yy4y1i766' referer = 'https://space.bilibili.com/' cookie = 登录后的cookie blbl = BLBL(url, cookie, referer) blbl.run()
目前正在学习Python中,如果有什么不对的地方 希望广大朋友,指出错误指出,深表感谢