有modal_id后下载抖音视频
此程序是通过包含modal_id的链接下载视频、音频、图文
配合前面a_b参数可以实现批量下载
单线程 没有添加代理池,批量爬取或使用框架 自行添加
点击查看代码
import json
import os
import re
import sys
import requests
from urllib.parse import unquote
from tqdm import tqdm
import time
class DouyinDownloader:
def __init__(self, share_link, download_folder='doyinVideo'):
"""
初始化 DouyinDownloader 类
:param share_link: 抖音视频分享链接
:param download_folder: 下载保存的文件夹,默认是 'doyinVideo'
"""
self.share_link = share_link
os.makedirs('video', exist_ok=True)
self.download_folder = os.path.join('video',download_folder)
self.headers = {
'Referer': 'https://www.douyin.com/',
'cookie': 'douyin.com; device_web_cpu_core=10; device_web_memory_size=8; __ac_nonce=06760391f00b9b51264ae; __ac_signature=_02B4Z6wo00f019a5ceAAAIDAhEZR-X3jjWfWmXVAAJLXd4; ttwid=1%7C7MTKBSMsP4eOv9h5NAh8p0E-NYIud09ftNmB0mjLpWc%7C1734359327%7C8794abeabbd47447e1f56e5abc726be089f2a0344d6343b5f75f23e7b0f0028f; UIFID_TEMP=0de8750d2b188f4235dbfd208e44abbb976428f0720eb983255afefa45d39c0c6532e1d4768dd8587bf919f866ff1396912bcb2af71efee56a14a2a9f37b74010d0a0413795262f6d4afe02a032ac7ab; s_v_web_id=verify_m4r4ribr_c7krmY1z_WoeI_43po_ATpO_I4o8U1bex2D7; hevc_supported=true; home_can_add_dy_2_desktop=%220%22; dy_swidth=2560; dy_sheight=1440; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A2560%2C%5C%22screen_height%5C%22%3A1440%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A10%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A50%7D%22; strategyABtestKey=%221734359328.577%22; csrf_session_id=2f53aed9aa6974e83aa9a1014180c3a4; fpk1=U2FsdGVkX1/IpBh0qdmlKAVhGyYHgur4/VtL9AReZoeSxadXn4juKvsakahRGqjxOPytHWspYoBogyhS/V6QSw==; fpk2=0845b309c7b9b957afd9ecf775a4c21f; passport_csrf_token=d80e0c5b2fa2328219856be5ba7e671e; passport_csrf_token_default=d80e0c5b2fa2328219856be5ba7e671e; odin_tt=3c891091d2eb0f4718c1d5645bc4a0017032d4d5aa989decb729e9da2ad570918cbe5e9133dc6b145fa8c758de98efe32ff1f81aa0d611e838cc73ab08ef7d3f6adf66ab4d10e8372ddd628f94f16b8e; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Afalse%2C%22volume%22%3A0.5%7D; bd_ticket_guard_client_web_domain=2; FORCE_LOGIN=%7B%22videoConsumedRemainSeconds%22%3A180%7D; UIFID=0de8750d2b188f4235dbfd208e44abbb976428f0720eb983255afefa45d39c0c6532e1d4768dd8587bf919f866ff139655a3c2b735923234f371c699560c657923fd3d6c5b63ab7bb9b83423b6cb4787e2ce66a7fbc4ecb24c8570f520fe6de068bbb95115023c0c6c1b6ee31b49fb7e3996fb8349f43a3fd8b7a61cd9e18e8fe65eb6a7c13de4c0960d84e344b644725db3eb2fa6b7caf821de1b50527979f2; is_dash_user=1; biz_trace_id=b57a241f; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCTEo2R0lDalVoWW1XcHpGOFdrN0Vrc0dXcCtaUzNKY1g4NGNGY2k0TTl1TEowNjdUb21mbFU5aDdvWVBGamhNRWNRQWtKdnN1MnM3RmpTWnlJQXpHMjA9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoyfQ%3D%3D; download_guide=%221%2F20241216%2F0%22; sdk_source_info=7e276470716a68645a606960273f276364697660272927676c715a6d6069756077273f276364697660272927666d776a68605a607d71606b766c6a6b5a7666776c7571273f275e58272927666a6b766a69605a696c6061273f27636469766027292762696a6764695a7364776c6467696076273f275e5827292771273f273d33323131333c3036313632342778; bit_env=RiOY4jzzpxZoVCl6zdVSVhVRjdwHRTxqcqWdqMBZLPGjMdB4Tax1kAELHNTVAAh72KuhumewE4Lq6f0-VJ2UpJrkrhSxoPw9LUb3zQrq1OSwbeSPHkRlRgRQvO89sItdGUyq1oFr0XyRCnMYG87KSeWyc4x0czGR0o50hTDoDLG5rJVoRcdQOLvjiAegsqyytKF59sPX_QM9qffK2SqYsg0hCggURc_AI6kguDDE5DvG0bnyz1utw4z1eEnIoLrkGDqzqBZj4dOAr0BVU6ofbsS-pOQ2u2PM1dLP9FlBVBlVaqYVgHJeSLsR5k76BRTddUjTb4zEilVIEwAMJWGN4I1BxVt6fC9B5tBQpuT0lj3n3eKXCKXZsd8FrEs5_pbfDsxV-e_WMiXI2ff4qxiTC0U73sfo9OpicKICtZjdq8qsHxJuu6wVR36zvXeL2Wch5C6MzprNvkivv0l8nbh2mSgy1nabZr3dmU6NcR-Bg3Q3xTWUlR9aAUmpopC-cNuXjgLpT-Lw1AYGilSUnCvosth1Gfypq-b0MpgmdSDgTrQ%3D; gulu_source_res=eyJwX2luIjoiMDhjOGQ3ZTJiODQyNjZkZWI5Y2VkMGJiODNlNmY1ZWY0ZjMyNTE2ZmYyZjAzNDMzZjI0OWU1Y2Q1NTczNTk5NyJ9; passport_auth_mix_state=hp9bc3dgb1tm5wd8p82zawus27g0e3ue; IsDouyinActive=false',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
}
# 确保下载文件夹存在
os.makedirs(self.download_folder, exist_ok=True)
def sanitize_filename(self, title):
"""
视频和音频文件名
替换非法字符(例如 Windows 系统中的非法字符)
"""
sanitized_title = re.sub(r'[<>:"/\\|?*]', '_', title) # 将非法字符替换为 '_'
sanitized_title = sanitized_title.strip() # 去除两端的空格
sanitized_title = sanitized_title[:50]
return sanitized_title
def download_video(self, url, title):
"""下载视频"""
response = requests.get(url, headers=self.headers, stream=True)
print(f"HTTP Status Code: {response.status_code}")
if response.status_code == 200:
# 获取文件的总大小
total_size = int(response.headers.get('Content-Length', 0))
video_path = os.path.join(self.download_folder, f"{self.sanitize_filename(title)}.mp4")
# 保存视频文件
with open(video_path, 'wb') as f:
with tqdm(total=total_size, unit='B', unit_scale=True, desc="Downloading") as pbar:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
pbar.update(len(chunk)) # 更新进度条
print(f"Video {video_path} downloaded successfully.")
else:
print("Failed to retrieve the video.")
def download_image(self, url,image_path):
"""下载图片"""
response = requests.get(url, headers=self.headers, stream=True)
print(f"HTTP Status Code: {response.status_code}")
if response.status_code == 200:
# 获取文件的总大小
total_size = int(response.headers.get('Content-Length', 0))
# 保存文件
with open(image_path, 'wb') as f:
with tqdm(total=total_size, unit='B', unit_scale=True, desc="Downloading") as pbar:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
pbar.update(len(chunk)) # 更新进度条
print(f"Video {image_path} downloaded successfully.")
time.sleep(0.5)
else:
print("Failed to retrieve the video.")
def download_audio(self, url, audio_path):
"""下载音频"""
response = requests.get(url, headers=self.headers, stream=True)
print(f"HTTP Status Code: {response.status_code}")
if response.status_code == 200:
# 获取文件的总大小
total_size = int(response.headers.get('Content-Length', 0))
# 保存音频文件
with open(audio_path, 'wb') as f:
with tqdm(total=total_size, unit='B', unit_scale=True, desc="Downloading") as pbar:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
pbar.update(len(chunk)) # 更新进度条
print(f"Video {audio_path} downloaded successfully.")
time.sleep(0.5)
def get_video_url(self, url):
"""获取视频播放地址"""
response = requests.get(url, headers=self.headers)
content = re.findall('</div><script id="RENDER_DATA" type="application/json">(.*?)</script>', response.text)
content = unquote(content[0])
content = json.loads(content)
video_list = content["app"]["videoDetail"]["video"]["bitRateList"]
# content['app']['videoDetail']['mediaType'] 2 是图文类型
media_type = content['app']['videoDetail']['mediaType']
if media_type == 2:
image_List = content['app']['videoDetail']['images']
title = self.sanitize_filename(content["app"]["videoDetail"]["desc"])
image_folder = os.path.join(self.download_folder,f"图文_{title}")
os.makedirs(image_folder, exist_ok=True)
# 下载图文
for index,image_detail in enumerate(image_List):
image_path = os.path.join(image_folder, f"{(title)}_{index+1}.jpg")
image_url = image_detail['urlList'][0]
self.download_image( image_url,image_path)
# 下载mp3
mp3_url = content['app']['videoDetail']['music']['playUrl']['uri']
audio_path = os.path.join(image_folder, f"{(title)}.mp3")
self.download_audio(mp3_url,audio_path)
print('图文已下载完成 请在视频文件夹中查看')
sys.exit(2)
else:
part_url = content["app"]["videoDetail"]["video"]["bitRateList"][0]['playAddr'][0]['src'] # 视频类型
title = content["app"]["videoDetail"]["desc"]
print(f"Title: {title}")
print("Video URL: https:" + part_url)
return "https:" + part_url, title
# 通过 搜索、视频分享、discover等获取modal_id
def get_modalid_from_share_link(self):
"""从分享链接中提取 modal_id"""
# 匹配分享视频链接 (例如 https://www.douyin.com/video/{modal_id})
video_pattern = r'https://www\.douyin\.com/video/(\d+)'
# 匹配带有 modal_id 参数的用户链接
user_pattern = r'https://www\.douyin\.com/user/.+?modal_id=(\d+)'
# 匹配带有 modal_id 参数的 discover 链接
discover_pattern = r'https://www\.douyin\.com/discover\?modal_id=(\d+)'
# 匹配带有 modal_id 参数的搜索链接(新增)
search_pattern = r'https://www\.douyin\.com/search/.+?modal_id=(\d+)'
# 尝试匹配分享视频链接中的 modal_id
match = re.search(video_pattern, self.share_link)
if match:
modal_id = match.group(1)
print(f"Extracted modal_id from video link: {modal_id}")
return modal_id
# 尝试匹配用户链接中的 modal_id
match = re.search(user_pattern, self.share_link)
if match:
modal_id = match.group(1)
print(f"Extracted modal_id from user link: {modal_id}")
return modal_id
# 尝试匹配 discover 链接中的 modal_id
match = re.search(discover_pattern, self.share_link)
if match:
modal_id = match.group(1)
print(f"Extracted modal_id from discover link: {modal_id}")
return modal_id
# 尝试匹配搜索链接中的 modal_id
match = re.search(search_pattern, self.share_link)
if match:
modal_id = match.group(1)
print(f"Extracted modal_id from search link: {modal_id}")
return modal_id
# 如果没有找到,继续处理分享链接
pattern = r'https://v\.douyin\.com/[a-zA-Z0-9]+/?'
try:
# 提取分享链接中的 URL 部分
url = re.findall(pattern, self.share_link)[0]
except Exception as e:
print('Invalid URL')
return None
print(f"Extracted URL: {url}")
# 重试机制,最多重试5次
max_retries = 5
retries = 0
while retries < max_retries:
try:
# 使用线程来进行请求
response = self.make_request(url)
# 检查是否成功获取最终重定向 URL
if response.url:
print(f"Final Redirect URL: {response.url}")
print(response.url)
# 提取 video modal_id
pattern = r'https://www\.douyin\.com/video/(\d+)'
match = re.search(pattern, response.url)
if match:
modal_id = match.group(1)
print(f"Extracted modal_id: {modal_id}")
return modal_id
else:
print("No modal_id found in final URL.")
retries += 1
time.sleep(2) # 等待 2 秒再尝试
print(f"Retrying... ({retries}/{max_retries})")
else:
print("Invalid response URL. Retrying...")
retries += 1
time.sleep(2) # 等待 2 秒再尝试
print(f"Retrying... ({retries}/{max_retries})")
except requests.exceptions.RequestException as e:
retries += 1
print(f"Request error: {e}. Retrying... ({retries}/{max_retries})")
time.sleep(2) # 等待 2 秒再尝试
print("Max retries reached. Could not retrieve the modal_id.")
return None
def make_request(self, url):
"""处理请求的函数,包含超时设置"""
try:
response = requests.get(url, headers=self.headers, allow_redirects=True, timeout=10)
return response
except requests.exceptions.RequestException as e:
print(f"Request error: {e}")
raise
def start_download(self):
"""主函数,开始下载视频"""
modal_id = self.get_modalid_from_share_link()
print()
if not modal_id:
print("Invalid share link.")
else:
url = f'https://www.douyin.com/discover?modal_id={modal_id}'
play_url, title = self.get_video_url(url)
self.download_video(play_url, title)
# 示例调用方法
if __name__ == '__main__':
# 通过命令行传入分享链接
print('开始运行')
import argparse
parser = argparse.ArgumentParser(description="Douyin Video Downloader")
parser.add_argument('share_link', type=str, help='Douyin video share link')
args = parser.parse_args()
downloader = DouyinDownloader(f'''{args.share_link}''')
downloader.start_download()
# 示例 python douyin_main.py "8.79 l@P.xF 09/09 sEU:/ 2分钟看全美国总统就职典礼流程,再花几分钟和大家聊聊特朗普这次就职有何看点?关注我,典礼结束继续分析 # 零基础看懂全球 # 重返白宫 # 硬核深度计划 https://v.douyin.com/ifMw5hWX/ 复制此链接,打开Dou音搜索,直接观看视频!"
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通