Python爬取哔哩哔哩视频
Python爬取哔哩哔哩视频
一、环境准备
我这里使用的是环境如下仅供参考:
开发工具:pycharm
python环境:python-3.8.0
依赖的包:shutil,os,re,json,choice,requests,lxml
二、页面分析
我在这里就拿前段时间非常火的马老师的视频来举例子吧。
视频链接: https://www.bilibili.com/video/BV1Ef4y1i78b?from=search&seid=12072538764197074893
视频链接解析我们这里只需要 BV1Ef4y1i78b 也就是 video后面?号前面
第二部分抓包,哔哩哔哩这里的视频被分成多个小段了经过看源码分析后我们可以解析
分析返回json中的具体内容
返回给我们的们如下,真正对我们有用的信息在data中
在data 下面我们就可以清晰的看到我们想要的内容了,如视频的画质,以及视频的地址等,注意:如果你拿到地址直接进行访问的话是访问不到了,哔哩哔哩中添加了Referer如果你直接在浏览器访问是没有Referer的是找不到页面的。我们需要解析的内容如下:
视频的时长视频的质量视频的URL音频的URL音频和视频合并!
三、代码实操
3.1 准备工作
依赖的包
import json import os import re import shutil import ssl import time import requests from concurrent.futures import ThreadPoolExecutor from random import choice from lxml import etree
添加请求头和随机用户代理
设置请求头等参数,防止被反爬
headers = { 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'}def get_user_agent(): '''获取随机用户代理''' user_agents = [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11", "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10", "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1", "Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36", "Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20", "Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1 Mobile Safari/10600.6.3 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" ] # 在user_agent列表中随机产生一个代理,作为模拟的浏览器 user_agent = choice(user_agents) return user_agent
3.2 编写下载代码
def single_download(aid, acc_quality): '''单个视频实现下载''' \# 请求视频链接,获取信息 origin_video_url = 'https://www.bilibili.com/video/' + aid res = requests.get(origin_video_url, headers=headers) html = etree.HTML(res.text) title = html.xpath('//*[@id="viewbox_report"]/h1/span/text()')[0] print('您当前正在下载:', title) video_info_temp = re_video_info(res.text, '__playinfo__=(.*?)') video_info = {} \# 获取视频质量 quality = video_info_temp['data']['accept_description'][acc_quality] \# 获取视频时长 video_info['duration'] = video_info_temp['data']['dash']['duration'] \# 获取视频链接 video_url = video_info_temp['data']['dash']['video'][acc_quality]['baseUrl'] \# 获取音频链接 audio_url = video_info_temp['data']['dash']['audio'][acc_quality]['baseUrl'] \# 计算视频时长 video_time = int(video_info.get('duration', 0)) video_minute = video_time // 60 video_second = video_time % 60 print('当前视频清晰度为{},时长{}分{}秒'.format(quality, video_minute, video_second)) \# 调用函数下载保存视频 download_video_single(origin_video_url, video_url, audio_url, title)
3.3 编写下载代码
def download_video_single(referer_url, video_url, audio_url, video_name): '''单个视频下载''' \# 更新请求头 headers.update({"Referer": referer_url}) print("视频下载开始:%s" % video_name) \# 下载并保存视频 video_content = requests.get(video_url, headers=headers) print('%s\t视频大小:' % video_name, round(int(video_content.headers.get('content-length', 0)) / 1024 / 1024, 2), '\tMB') received_video = 0 with open('%s_video.mp4' % video_name, 'ab') as output: headers['Range'] = 'bytes=' + str(received_video) + '-' response = requests.get(video_url, headers=headers) output.write(response.content) \# 下载并保存音频 audio_content = requests.get(audio_url, headers=headers) print('%s\t音频大小:' % video_name, round(int(audio_content.headers.get('content-length', 0)) / 1024 / 1024, 2), '\tMB') received_audio = 0 with open('%s_audio.mp4' % video_name, 'ab') as output: headers['Range'] = 'bytes=' + str(received_audio) + '-' response = requests.get(audio_url, headers=headers) output.write(response.content) received_audio += len(response.content) print("视频下载结束:%s" % video_name) video_audio_merge_single(video_name)
3.4 将下载好的音频和视频合并
def video_audio_merge_single(video_name): '''使用ffmpeg单个视频音频合并''' print("视频合成开始:%s" % video_name) import subprocess command = 'ffmpeg -i %s_video.mp4 -i %s_audio.mp4 -c copy %s.mp4 -y -loglevel quiet' % ( video_name, video_name, video_name) subprocess.Popen(command, shell=True) print("视频合成结束:%s" % video_name)
3.5 运行测试
!/usr/bin/env python
coding:utf-8
@Time:2021/7/25 12:47
@File: 爬取哔哩哔哩视频.py
@Sofeware :PyCharm
3.1 准备工作
依赖的包
import json import os import sys import re import shutil import ssl import time import requests from concurrent.futuresimport ThreadPoolExecutor from random import choice from lxmlimport etree import warnings warnings.filterwarnings("ignore") # 忽略warning警告错误
需要改变的具体数据项:
进入视频播放页面,按F12键进入,按F5键刷新,再暂停。
通过类似 BV1z4411N7iv?from=search&seid=9453069623894011451 页面
1、按 CTRL+F 查找 title 或自定义 title
title = 'Pandas数据分析_Python进阶'
2、点击 Response 及下面的 { } ,找到Windows.INITIAL_STATE = {
"bvid":"BV1z4411N7iv"
"aid":55166475,
"p":30, # 视频数量
3、点击 Header 获得:
a、 'referer': 'https://www.bilibili.com/video/BV1z4411N7iv?from=search&seid=9453069623894011451',
b、url = 'https://api.bilibili.com/x/player/pagelist?bvid=BV1XW411Q7VQ&jsonp=jsonp'
c、由 Remote Address 获得 proxy 地址
以下两行委要更新的内容
title = 'PyQt5开发与实战'
main_address = 'https://www.bilibili.com/video/BV154411n79k?from=search&seid=9894826006664265144' bvid = main_address.split('?')[0].split('/')[-1] filepath = os.path.join(r'D:\software', title) if ~os.path.exists(filepath): try: os.mkdir(filepath) except BaseException: print('目录已存在! ') # 添加请求头和随机用户代理 proxy = {'https': 'http://10.22.96.29:8080', 'http': 'http://10.22.96.29:8080'} headers = { 'Accept': '*/*', 'accept-encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'referer': main_address, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36' } def get_user_agent(): *'''**获取随机用户代理''' * user_agents = [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11", "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10", "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1", "Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36", "Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20", "Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1 Mobile Safari/10600.6.3 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" ] # 在user_agent列表中随机产生一个代理,作为模拟的浏览器 user_agent = choice(user_agents) return user_agent
3.2 编写下载代码
3.2.1 编写正则表达式
def re_video_info(text, pattern): *'''**利用正则表达式匹配出视频信息并转化成json''' \* match = re.search(pattern, text) return json.loads(match.group(1)) def single_download(url, acc_quality, page, filename): *'''**单个视频实现下载''' \* # 请求视频链接,获取信息 origin_video_url = url+'?p={0}'.format(str(page).strip()) res = requests.get(origin_video_url, headers=headers, timeout=6, proxies=proxy, verify=False) #proxies=proxy, \# html = etree.HTML(res.text) \# title = html.xpath('//*[@id="viewbox_report"]/h1/span/text()')[0] filename = os.path.join(filepath, filename) print('您当前正在下载:', title+': ' + filename) video_info_temp = re_video_info(res.text, '__playinfo__=(.*?)</script><script>') \# video_info = {} \# 获取视频质量 \# quality = video_info_temp['data']['quality'] # video_info_temp['data']['accept_description'][acc_quality] \# 获取视频时长 \# video_info['duration'] = video_info_temp['data']['dash']['duration'] \# 计算视频时长 \# video_time = int(video_info.get('duration', 0)) \# video_minute = video_time // 60 \# video_second = video_time % 60 \# print('当前视频清晰度为{},时长{}分{}秒'.format(quality, video_minute, video_second)) \# 获取视频链接 video_url = video_info_temp['data']['dash']['video'][acc_quality]['baseUrl'] \# 获取音频链接 audio_url = video_info_temp['data']['dash']['audio'][acc_quality]['baseUrl'] \# 调用函数下载保存视频 download_video_single(origin_video_url, video_url, audio_url, filename) ··· ## 3.3 编写下载代码 ··· def download_video_single(referer_url, video_url, audio_url, video_name): *'''**单个视频下载''' * # 更新请求头 headers.update({"Referer": referer_url}) # print("视频下载开始:%s" % video_name) # 下载并保存视频 # video_content = requests.get(video_url, headers=headers, timeout=20, proxies=proxy, verify=False) # print('%s\t视频大小:' % video_name, round(int(video_content.headers.get('content-length', 0)) / 1024 / 1024, 2), '\tMB') received_video = 0 with open('%s_video.mp4' % video_name, 'ab') as output: headers['Range'] = 'bytes=' + str(received_video) + '-' response = requests.get(video_url, headers=headers, timeout=20, proxies=proxy, verify=False) output.write(response.content) # 下载并保存音频 # audio_content = requests.get(audio_url, headers=headers, timeout=20, proxies=proxy, verify=False) # print('%s\t音频大小:' % video_name, round(int(audio_content.headers.get('content-length', 0)) / 1024 / 1024, 2), '\tMB') received_audio = 0 with open('%s_audio.mp3' % video_name, 'ab') as output: headers['Range'] = 'bytes=' + str(received_audio) + '-' response = requests.get(audio_url, headers=headers, timeout=20, proxies=proxy, verify=False ) output.write(response.content) received_audio += len(response.content) # print("视频下载结束:%s" % video_name) video_audio_merge_single(video_name) ··· ## 3.4 将下载好的音频和视频合并 ··· def video_audio_merge_single(video_name): *'''**使用ffmpeg**单个视频音频合并''' * print("视频合成开始:%s" % video_name) import subprocess command2 = r'D:\software\ffmpeg.exe -i '+video_name+'_audio.mp3 -i ' +video_name+'_video.mp4 -acodec copy -vcodec copy ' +video_name+'.mp4 ' subprocess.Popen(command2, shell=True) time.sleep(20) delfilename1 = video_name + '_audio.mp3' delfilename2 = video_name + '_video.mp4' if os.path.exists(delfilename1): # 如果文件存在,则删除 os.remove(delfilename1) if os.path.exists(delfilename2): # 删除文件,可使用以下两种方法。 os.remove(delfilename2) print("视频合成结束:%s" % video_name) def main(): start_time = time.time() url = r'https://api.bilibili.com/x/player/pagelist?bvid=' + bvid + r'&jsonp=jsonp' res = requests.get(url, proxies=proxy) res.encoding = 'utf-8' # print(res.text) result = json.loads(res.text)['data'] replist = [' ', '/', r'\n', r'\t', '[', ']', '【', '】', '{', '}', '(', ')', '(', ')', '、', ',', ':'] # 须被替换字符 filenames = [] for it in result: filename = '' filename = filename.join('' if iin replistelse '_' if i == '.' else ifor iin it['part']) filenames.append(filename) print(filenames) # quit() url = r'https://www.bilibili.com/video/' + bvid for iin range(len(filenames)): i += 1 if i>40: # and (i<20)): zf = str(i).zfill(3)+'_'+filenames[i-1] # print('OK_'+zf) single_download(url, 0, i, zf) print("程序运行耗时:{}".format(time.time() - start_time)) # 当前时间 - 启动时间 = 程序运行耗时 if __name__ == '__main__': main() ··· ···
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了