| from urllib.parse import urljoin |
| |
| import requests |
| import os |
| from concurrent.futures import ThreadPoolExecutor, wait |
| import re |
| |
| headers = { |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36" |
| } |
| |
| |
| def down_video(url, i): |
| print(f"{i}.ts开始下载") |
| # 下载ts文件 |
| while True: |
| try: |
| resp = requests.get(url, headers=headers) |
| with open(os.path.join(path, str(i) + '.ts'), mode="wb") as f3: |
| f3.write(resp.content) |
| print(f"{i}.ts下载完成") |
| break |
| except Exception as e: |
| print("下载失败,重新下载") |
| |
| def download_all_videos(path, host): |
| # 下载m3u8文件以及多线程下载ts文件 |
| if not os.path.exists(path): |
| os.mkdir(path) |
| # 开启线程 准备下载 |
| pool = ThreadPoolExecutor(max_workers=50) |
| # 1. 读取文件 |
| tasks = [] |
| i = 0 |
| with open("index.m3u8", mode="r", encoding="utf-8") as f: |
| for line in f: |
| # 如果不是url 则走下次循环 |
| if line.startswith("#"): |
| continue |
| line = host + line |
| print(line, i) |
| # 开启线程 |
| tasks.append(pool.submit(down_video, line.strip(), i)) |
| i += 1 |
| # 统一等待 |
| wait(tasks) |
| |
| |
| # 处理m3u8文件中的url问题 |
| def do_m3u8_url(url, path, m3u8_filename="index.m3u8"): |
| # 这里还没处理key的问题 |
| if not os.path.exists(path): |
| os.mkdir(path) |
| |
| with open(m3u8_filename, mode="r", encoding="utf-8") as f: |
| data = f.readlines() |
| |
| fw = open(os.path.join(path, m3u8_filename), 'w', encoding='UTF-8') |
| abs_path = os.getcwd() |
| i = 0 |
| for line in data: |
| # 如果不是url 则走下次循环 |
| if line.startswith("#"): |
| # 判断处理是存在需要秘钥 |
| if line.find('URI') != -1: |
| # #EXT-X-KEY:METHOD=AES-128,URI="/20220622/5LnZiDXn/1500kb/hls/key.key" |
| line = line.split('/')[0] + 'key.m3u8"\n' |
| # 打印的line #EXT-X-KEY:METHOD=AES-128,URI="key.m3u8" |
| print("line", line) |
| # url = 'https://s7.fsvod1.com/20220622/5LnZiDXn/1500kb/hls/index.m3u8' |
| host = url.rsplit('/', 1)[0] |
| # 打印host https://s7.fsvod1.com/20220622/5LnZiDXn/1500kb/hls |
| print("host", host) |
| # 爬取key |
| download_m3u8(host + '/key.key', os.path.join(path, 'key.m3u8')) |
| fw.write(line) |
| else: |
| fw.write(f'{i}.ts\n') |
| i += 1 |
| |
| |
| def download_m3u8(url, m3u8_filename="index.m3u8", state=0): |
| print('正在下载index.m3u8文件') |
| resp = requests.get(url=url, headers=headers) |
| with open(m3u8_filename, mode="w", encoding="utf-8") as f: |
| f.write(resp.text) |
| |
| |
| def merge(path, filename='output'): |
| # 进行ts文件合并 解决视频音频不同步的问题 建议使用这种 |
| os.chdir(path) |
| cmd = f'ffmpeg -i index.m3u8 -c copy {filename}.mp4' |
| os.system(cmd) |
| |
| |
| def get_m3u8data(first_m3u8url): |
| session = requests.Session() |
| # 请求第一次m3u8de url |
| resp = session.get(first_m3u8url, headers=headers) |
| resp.encoding = 'UTF-8' |
| data = resp.text |
| # 第二次请求m3u8文件地址 返回最终包含所有ts文件的m3u8 |
| second_m3u8_url = urljoin(first_m3u8url, data.split('/', 3)[-1].strip()) |
| resp = session.get(second_m3u8_url, headers=headers) |
| with open('index.m3u8', 'wb') as f: |
| f.write(resp.content) |
| return second_m3u8_url |
| |
| |
| if __name__ == '__main__': |
| # ts文件存储目录 |
| path = 'ts' |
| # 带加密的ts文件的 index.m3u8 url |
| url = 'https://s7.fsvod1.com/20220622/5LnZiDXn/index.m3u8' # 第一次m3u8 地址 |
| meu8_url = get_m3u8data(url) |
| print(meu8_url) |
| # # 下载m3u8文件以及ts文件 |
| # host = 'https://s7.fsvod1.com' # 主机地址 用于拼凑完整的ts路径和秘钥路径 |
| # download_all_videos(path, host) |
| # do_m3u8_url(meu8_url, path) |
| # # 文件合并 |
| # merge(path, '奇异博士') |
| print('over')``` |
| ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| 转换格式 |
| |
| import requests |
| import re |
| from urllib.parse import urljoin |
| import os |
| from concurrent.futures import ThreadPoolExecutor, wait |
| |
| file_path = "ts_before" |
| if not os.path.exists(file_path): |
| os.mkdir(file_path) |
| |
| headers = { |
| "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36" |
| } |
| |
| |
| # 获取网页中的src路径 |
| def get_frame_src(url): |
| session = requests.Session() |
| resp = session.get(url=url, headers=headers) |
| page_source = resp.text |
| # print(page_source) |
| src_url = re.search('"url":"(?P<url>.*?index.m3u8)"',page_source).group("url").replace("\\", "") |
| # print(src_url) |
| return src_url |
| |
| # 获取到url请求下载first.m3u8文件 |
| def get_firstm3u8(src_url): |
| session = requests.session() |
| resp = session.get(url=src_url, headers=headers) |
| with open("first.m3u8", "w", encoding="utf-8")as f1: |
| f1.write(resp.text) |
| print("first.m3u8 文件下载完成") |
| with open("first.m3u8", "r", encoding="utf-8")as f2: |
| for line in f2: |
| if line.startswith("#"): |
| continue |
| # 发现路径不完整,需要拼接路径 |
| line = line.strip() |
| print(line) |
| src = urljoin(src_url, line) |
| response = session.get(url=src, headers=headers) |
| print(response.text) |
| with open("second.m3u8", "w", encoding="utf-8", newline="")as f3: |
| f3.write(response.text) |
| print("second.m3u8 文件下载完成") |
| |
| # 下载一个png片段 |
| def download_one_video(url, i): |
| session = requests.session() |
| resp = session.get(url=url, headers=headers) |
| print(f"{i}.ts开始下载") |
| with open(file_path+f"/{i}.ts", "wb")as f: |
| f.write(resp.content) |
| print(f"{i}.ts下载完成") |
| |
| def download_all_video(): |
| pool = ThreadPoolExecutor(50) |
| i = 0 |
| tasks = [] |
| with open("second.m3u8", "r", encoding="utf-8")as f: |
| for line in f: |
| if line.startswith("#"): |
| continue |
| line = line.strip() |
| task = pool.submit(download_one_video, line,i) |
| i += 1 |
| tasks.append(task) |
| wait(tasks) |
| |
| # 解析伪装成png的ts |
| def resolve_ts(src_path, dst_path): |
| if not os.path.exists(dst_path): |
| os.mkdir(dst_path) |
| file_list = sorted(os.listdir(src_path), key=lambda x: int(x.split('.')[0])) |
| for i in file_list: |
| origin_ts = os.path.join(src_path, i) |
| resolved_ts = os.path.join(dst_path, i) |
| try: |
| infile = open(origin_ts, "rb") # 打开文件 |
| outfile = open(resolved_ts, "wb") # 内容输出 |
| data = infile.read() |
| outfile.write(data) |
| outfile.seek(0x00) |
| outfile.write(b'\xff\xff\xff\xff') |
| outfile.flush() |
| infile.close() # 文件关闭 |
| outfile.close() |
| except: |
| pass |
| """ |
| else: |
| # 删除目录 |
| shutil.rmtree(src_path) |
| # 将副本重命名为正式文件 |
| os.rename(dst_path, dst_path.rstrip('2')) |
| """ |
| print('resolve ' + origin_ts + ' success') |
| |
| # 合并 |
| def merge(filePath, filename='output'): |
| file_list = sorted(os.listdir(filePath), key=lambda x: int(x.split('.')[0])) |
| print(file_list) |
| # 排序后写入到文件中 |
| with open("./file_list.txt", "w") as f: |
| for file in file_list: |
| # 格式为 file ./new_ts/1.ts ... |
| # file '����/7.ts' |
| f.write("file '{}/{}'\n".format(filePath, file)) |
| # f.write(f"file./{filePath}/{file}") |
| cmd = f'ffmpeg -f concat -safe 0 -i file_list.txt -c copy {filename}.mp4' |
| os.system(cmd) |
| |
| if __name__ == '__main__': |
| url = "https://www.9meiju.cc/mohuankehuan/shandianxiadibaji/1-6.html" |
| # src_url = get_frame_src(url) |
| # get_firstm3u8(src_url) |
| # download_all_video() |
| dst_path = "ts_after" |
| resolve_ts(file_path, dst_path) |
| merge(dst_path, "闪电侠6集") |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律