python 下载m3u8格式的视频方法

import requests
import re
import json
import m3u8
import os
from concurrent.futures import ThreadPoolExecutor
from time import sleep
import shutil
import subprocess

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'Cookie':'_did=web_1930955391DE64DF; _did=web_1930955391DE64DF; csrfToken=B3HzIa_gotdrEZWr2wOODE9z; webp_supported=%7B%22lossy%22%3Atrue%2C%22lossless%22%3Atrue%2C%22alpha%22%3Atrue%2C%22animation%22%3Atrue%7D; Hm_lvt_2af69bc2b378fb58ae04ed2a04257ed1=1730789391; HMACCOUNT=EAB764C050B1F83E; lsv_js_player_v2_main=ca85g8; uuid=71dac937b0a505074f4e5e94fa8d1234; auth_key=76212252; ac_userimg=https%3A%2F%2Fimgs.aixifan.com%2Fstyle%2Fimage%2FdefaultAvatar.jpg; stochastic=eWF6MjduaWRvZw%3D%3D; acPasstoken=ChVpbmZyYS5hY2Z1bi5wYXNzdG9rZW4ScGbcuUJMOlbw849_IMXUeM1D9Pnj6LNEtj0f15emmerMDycQxBDcT_CZHQLrhk-XgwxJctLTNpifSja1P7U-Vj2rAquxGMEdpRoqg2dF7Vvz2XvTTOMjZH0JmWGOuvGlGltsL5nV4iEaeATIEtRnGn8aEmdOSJkx2ly3WrEb6jSqdMEq6SIgOlkU3ZpDjDwLFeEP8MTMeUiOIRnLpDfBbTREE09kiSIoBTAB; ac_username=yangzhenyu9406; acPostHint=3eb71c4a51d5a6884aa31105719cdecf872a; safety_id=AAJROucDlbVzhbY2UsGaqvRY; cur_req_id=713498187499F148_self_0528e0128300a06862bd18195cc26e1d; cur_group_id=713498187499F148_self_0528e0128300a06862bd18195cc26e1d_0; Hm_lpvt_2af69bc2b378fb58ae04ed2a04257ed1=1730789919'
}

output_dir = r'C:\Users\38461\Desktop\dongman\data'


# 单个TS文件的下载函数,带重试机制
def download_segment(ts_url, ts_filename, max_retries):
    attempt = 0
    while attempt < max_retries:
        try:
            print(f"Attempting to download {ts_url} to {ts_filename}... (Attempt {attempt + 1})")
            response = requests.get(ts_url, headers=headers, stream=True, timeout=10)
            response.raise_for_status()  # 检查请求是否成功
            with open(ts_filename, "wb") as ts_file:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:
                        ts_file.write(chunk)
            print(f"Downloaded {ts_filename}")
            return  # 成功下载则退出函数
        except requests.exceptions.RequestException as e:
            print(f"Failed to download {ts_url}: {e}")
            attempt += 1
            sleep(2)  # 等待2秒钟后重试
    print(f"Failed to download {ts_url} after {max_retries} attempts.")


# 下载所有TS文件的多线程处理函数
def down_load_ts(m3u8_url,m3u8_full_path):
    m3u8_obj = m3u8.load(m3u8_url)
    # 使用线程池加速下载
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = []
        for i, segment in enumerate(m3u8_obj.segments):
            ts_url = 'https://tx-safety-video.acfun.cn/mediacloud/acfun/acfun_video/hls/' + segment.uri
            ts_filename = os.path.join(m3u8_full_path, f"segment_{i}.ts")
            futures.append(executor.submit(download_segment, ts_url, ts_filename,3))
        # 等待所有任务完成
        for future in futures:
            future.result()
    print("All TS files downloaded successfully.")
    return m3u8_obj


def merge_ts_files(m3u8_obj, output_dir, full_path):
    # 打开目标输出文件以写入模式
    output_file = os.path.join(full_path,'output_video.ts')
    with open(output_file, "wb") as merged:
        for i in range(len(m3u8_obj.segments)):
            ts_filename = os.path.join(output_dir, f"segment_{i}.ts")
            # 检查TS文件是否存在
            if not os.path.exists(ts_filename):
                print(f"Warning: {ts_filename} does not exist. Skipping this segment.")
                continue

            # 打开并读取TS文件,将其内容写入到合并文件中
            with open(ts_filename, "rb") as ts_file:
                merged.write(ts_file.read())
            print(f"Merged {ts_filename}")
    print(f"All segments merged into {output_file}.")
    return output_file

def ts_to_mp4(ts_file,output_file):
    # 使用 ffmpeg 将 .ts 文件转换为 .mp4
    subprocess.run(
        ['ffmpeg', '-i', ts_file, '-c', 'copy', output_file],
        check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
    )



# 获取M3U8的URL
def start_url():
    url = 'https://www.acfun.cn/bangumi/aa5024869'
    response = requests.get(url=url, headers=headers)
    data = json.loads(re.findall(r'window.bangumiData = (.*?)};', response.text)[0] + '}')
    info = data['currentVideoInfo']['ksPlayJsonHevc']
    name = data['bangumiTitle']
    m3u8_url = re.findall(r'{"id":1,"url":"(.*?)",', info)[0]

    m3u8_full_path = os.path.join(output_dir, name + '_' + 'ts')
    full_dir = os.path.join(output_dir,name)
    if not os.path.exists(m3u8_full_path):
        os.makedirs(m3u8_full_path, exist_ok=True)
    m3u8_obj = down_load_ts(m3u8_url, m3u8_full_path)
    if not os.path.exists(full_dir):
        os.makedirs(full_dir, exist_ok=True)
    ts_filename = merge_ts_files(m3u8_obj,m3u8_full_path,full_dir)
    ts_to_mp4(ts_filename,os.path.join(os.path.dirname(ts_filename),os.path.basename(ts_filename).replace('.ts','.mp4')))
    os.remove(ts_filename)
    shutil.rmtree(m3u8_full_path)



if __name__ == '__main__':
    start_url()

 

posted @ 2024-11-08 16:00  lvye001  阅读(34)  评论(0编辑  收藏  举报