from urllib.parse import urljoin
import requests
import os
from concurrent.futures import ThreadPoolExecutor, wait
import re
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
}
def down_video(url, i):
print(f"{i}.ts开始下载")
# 下载ts文件
while True:
try:
resp = requests.get(url, headers=headers)
with open(os.path.join(path, str(i) + '.ts'), mode="wb") as f3:
f3.write(resp.content)
print(f"{i}.ts下载完成")
break
except Exception as e:
print("下载失败,重新下载")
def download_all_videos(path, host):
# 下载m3u8文件以及多线程下载ts文件
if not os.path.exists(path):
os.mkdir(path)
# 开启线程 准备下载
pool = ThreadPoolExecutor(max_workers=50)
# 1. 读取文件
tasks = []
i = 0
with open("index.m3u8", mode="r", encoding="utf-8") as f:
for line in f:
# 如果不是url 则走下次循环
if line.startswith("#"):
continue
line = host + line
print(line, i)
# 开启线程
tasks.append(pool.submit(down_video, line.strip(), i))
i += 1
# 统一等待
wait(tasks)
# 处理m3u8文件中的url问题
def do_m3u8_url(url, path, m3u8_filename="index.m3u8"):
# 这里还没处理key的问题
if not os.path.exists(path):
os.mkdir(path)
with open(m3u8_filename, mode="r", encoding="utf-8") as f:
data = f.readlines()
fw = open(os.path.join(path, m3u8_filename), 'w', encoding='UTF-8')
abs_path = os.getcwd()
i = 0
for line in data:
# 如果不是url 则走下次循环
if line.startswith("#"):
# 判断处理是存在需要秘钥
if line.find('URI') != -1:
# #EXT-X-KEY:METHOD=AES-128,URI="/20220622/5LnZiDXn/1500kb/hls/key.key"
line = line.split('/')[0] + 'key.m3u8"\n'
# 打印的line #EXT-X-KEY:METHOD=AES-128,URI="key.m3u8"
print("line", line)
# url = 'https://s7.fsvod1.com/20220622/5LnZiDXn/1500kb/hls/index.m3u8'
host = url.rsplit('/', 1)[0]
# 打印host https://s7.fsvod1.com/20220622/5LnZiDXn/1500kb/hls
print("host", host)
# 爬取key
download_m3u8(host + '/key.key', os.path.join(path, 'key.m3u8'))
fw.write(line)
else:
fw.write(f'{i}.ts\n')
i += 1
def download_m3u8(url, m3u8_filename="index.m3u8", state=0):
print('正在下载index.m3u8文件')
resp = requests.get(url=url, headers=headers)
with open(m3u8_filename, mode="w", encoding="utf-8") as f:
f.write(resp.text)
def merge(path, filename='output'):
# 进行ts文件合并 解决视频音频不同步的问题 建议使用这种
os.chdir(path)
cmd = f'ffmpeg -i index.m3u8 -c copy {filename}.mp4'
os.system(cmd)
def get_m3u8data(first_m3u8url):
session = requests.Session()
# 请求第一次m3u8de url
resp = session.get(first_m3u8url, headers=headers)
resp.encoding = 'UTF-8'
data = resp.text
# 第二次请求m3u8文件地址 返回最终包含所有ts文件的m3u8
second_m3u8_url = urljoin(first_m3u8url, data.split('/', 3)[-1].strip())
resp = session.get(second_m3u8_url, headers=headers)
with open('index.m3u8', 'wb') as f:
f.write(resp.content)
return second_m3u8_url
if __name__ == '__main__':
# ts文件存储目录
path = 'ts'
# 带加密的ts文件的 index.m3u8 url
url = 'https://s7.fsvod1.com/20220622/5LnZiDXn/index.m3u8' # 第一次m3u8 地址
meu8_url = get_m3u8data(url)
print(meu8_url)
# # 下载m3u8文件以及ts文件
# host = 'https://s7.fsvod1.com' # 主机地址 用于拼凑完整的ts路径和秘钥路径
# download_all_videos(path, host)
# do_m3u8_url(meu8_url, path)
# # 文件合并
# merge(path, '奇异博士')
print('over')```
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
转换格式
import requests
import re
from urllib.parse import urljoin
import os
from concurrent.futures import ThreadPoolExecutor, wait
file_path = "ts_before"
if not os.path.exists(file_path):
os.mkdir(file_path)
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36"
}
# 获取网页中的src路径
def get_frame_src(url):
session = requests.Session()
resp = session.get(url=url, headers=headers)
page_source = resp.text
# print(page_source)
src_url = re.search('"url":"(?P<url>.*?index.m3u8)"',page_source).group("url").replace("\\", "")
# print(src_url)
return src_url
# 获取到url请求下载first.m3u8文件
def get_firstm3u8(src_url):
session = requests.session()
resp = session.get(url=src_url, headers=headers)
with open("first.m3u8", "w", encoding="utf-8")as f1:
f1.write(resp.text)
print("first.m3u8 文件下载完成")
with open("first.m3u8", "r", encoding="utf-8")as f2:
for line in f2:
if line.startswith("#"):
continue
# 发现路径不完整,需要拼接路径
line = line.strip()
print(line)
src = urljoin(src_url, line)
response = session.get(url=src, headers=headers)
print(response.text)
with open("second.m3u8", "w", encoding="utf-8", newline="")as f3:
f3.write(response.text)
print("second.m3u8 文件下载完成")
# 下载一个png片段
def download_one_video(url, i):
session = requests.session()
resp = session.get(url=url, headers=headers)
print(f"{i}.ts开始下载")
with open(file_path+f"/{i}.ts", "wb")as f:
f.write(resp.content)
print(f"{i}.ts下载完成")
def download_all_video():
pool = ThreadPoolExecutor(50)
i = 0
tasks = []
with open("second.m3u8", "r", encoding="utf-8")as f:
for line in f:
if line.startswith("#"):
continue
line = line.strip()
task = pool.submit(download_one_video, line,i)
i += 1
tasks.append(task)
wait(tasks)
# 解析伪装成png的ts
def resolve_ts(src_path, dst_path):
if not os.path.exists(dst_path):
os.mkdir(dst_path)
file_list = sorted(os.listdir(src_path), key=lambda x: int(x.split('.')[0]))
for i in file_list:
origin_ts = os.path.join(src_path, i)
resolved_ts = os.path.join(dst_path, i)
try:
infile = open(origin_ts, "rb") # 打开文件
outfile = open(resolved_ts, "wb") # 内容输出
data = infile.read()
outfile.write(data)
outfile.seek(0x00)
outfile.write(b'\xff\xff\xff\xff')
outfile.flush()
infile.close() # 文件关闭
outfile.close()
except:
pass
"""
else:
# 删除目录
shutil.rmtree(src_path)
# 将副本重命名为正式文件
os.rename(dst_path, dst_path.rstrip('2'))
"""
print('resolve ' + origin_ts + ' success')
# 合并
def merge(filePath, filename='output'):
file_list = sorted(os.listdir(filePath), key=lambda x: int(x.split('.')[0]))
print(file_list)
# 排序后写入到文件中
with open("./file_list.txt", "w") as f:
for file in file_list:
# 格式为 file ./new_ts/1.ts ...
# file '����/7.ts'
f.write("file '{}/{}'\n".format(filePath, file))
# f.write(f"file./{filePath}/{file}")
cmd = f'ffmpeg -f concat -safe 0 -i file_list.txt -c copy {filename}.mp4'
os.system(cmd)
if __name__ == '__main__':
url = "https://www.9meiju.cc/mohuankehuan/shandianxiadibaji/1-6.html"
# src_url = get_frame_src(url)
# get_firstm3u8(src_url)
# download_all_video()
dst_path = "ts_after"
resolve_ts(file_path, dst_path)
merge(dst_path, "闪电侠6集")