m3u8 download python

# install either Qt 4.8 or 5.x. and make sure it's in PKG_CONFIG_PATH
sudo apt-get update

export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/lib/x86_64-linux-gnu/pkgconfig
export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/bin
pkg-config --modversion Qt5
echo $PKG_CONFIG_PATH
sudo apt install qt5-default
sudo apt-get install qtbase5-dev
# sudo apt install qttools5-dev-tools

# sudo apt-get install apt-file
# sudo apt-file update
# apt-file find Qt5.pc

make xconfig
 
import os
import sys
import requests
from urllib.parse import urlparse
#   git clone https://github.com/huzhenjie/m3u8_downloader
# !!! 同一url 开头有'/',末尾无'/' (http开头除外)
# host, body 有问题!应时刻考虑url的最长前缀,递归先序遍历获得所有有效的.ts文件地址
# 未修改
def get_cfg():
    argv = sys.argv
    if len(argv) <= 1:
        print('Usage: python3', argv[0], '[your_m3u8_url] [save_dir]')
        print('Sample: python3', argv[0], 'https://xxx.com/video.m3u8', '/Users/huzhenjie/Downloads/save_dir')
        return None
    return (argv[1], argv[2])

def get_host(url):
    urlgroup = urlparse(url)
    print(urlgroup)
    # _path = urlgroup.path
    # if _path.lower().endswith('.m3u8'):
    #     for i in range(-1, -100000, -1):
    #         if _path[i] == '/':
    #             _path = _path[0:i]
    #             break
    # print(_path)
    return urlgroup.scheme + '://' + urlgroup.hostname #+ _path

def get_m3u8_body(url):
    print('read m3u8 file:', url)
    session = requests.Session()
    adapter = requests.adapters.HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=10)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    r = session.get(url, timeout=10)
    return r.text

def get_url_list(host, body):
    lines = body.split('\n')
    ts_url_list = []
    for line in lines:
        if not line.startswith('#') and line != '':
            if line.lower().startswith('http'):
                ts_url_list.append(line)
            else:
                if line[0] =='/':
                    line = line[1:]
                ts_url_list.append('%s/%s' % (host, line))
    return ts_url_list

def download_ts_file(ts_url_list, download_dir):
    ts_path_list = []
    i = 0
    for ts_url in reversed(ts_url_list):
        i += 1
        file_name = ts_url[ts_url.rfind('/'):]
        curr_path = '%s%s' % (download_dir, file_name)
        print('\n[process]: %s/%s' % (i, len(ts_url_list)))
        print('[download]:', ts_url)
        print('[target]:', curr_path)
        ts_path_list.append(curr_path)
        if os.path.isfile(curr_path):
            print('[warn]: file already exist')
            continue
        r = requests.get(ts_url)
        with open(curr_path, 'wb') as f:
            f.write(r.content)
    return ts_path_list

def check_dir(path):
    if os.path.exists(path):
        return
    os.makedirs(path)

def get_download_url_list(host, m3u8_url, url_list = []):
    body = get_m3u8_body(m3u8_url)
    # print('body: %s'%body)
    ts_url_list = get_url_list(host, body)
    for url in ts_url_list:
        if url.lower().endswith('.m3u8'):
            url_list = get_download_url_list(host, url, url_list)
        else:
            url_list.append(url)
    return url_list


# def combine_ts_file(ts_path_list, target_path):
#     with open(target_path, 'wb+') as f:
#         for ts_path in ts_path_list:
#             print(ts_path)
#             f.write(open(ts_path, 'rb').read())

def download_ts(m3u8_url, save_dir):
    check_dir(save_dir)
    host = get_host(m3u8_url)
    print("host: %s"%host)
    ts_url_list = get_download_url_list(host, m3u8_url)
    print(ts_url_list)
    print('total file count:', len(ts_url_list))
    ts_path_list = download_ts_file(ts_url_list, save_dir)

def main():
    save_dir = './m3u8_sample_dir'
    # m3u8_url = 'https://v.kuaishouvod.com/m3u8/0/12/index.m3u8'
    m3u8_url = 'http://hls.cntv.lxdns.com/asp/hls/main/0303000a/3/default/978a64ddd3a1caa85ae70a23414e6540/main.m3u8'
    download_ts(m3u8_url, save_dir)


if __name__ == '__main__':
    main()
    # config = get_cfg()
    # if config:
    #     download_ts(config[0], config[1])

 

# https://blog.csdn.net/s_kangkang_A/article/details/103071822
import datetime
import os
import re
import threading
import time
from queue import Queue
import requests
from Crypto.Cipher import AES
 
 
# 传入链接,完成写文件操作及获取key链接
# def parse(url, headers, base_url):
#     resp = requests.get(url, headers=headers)
#     print(resp.text)
#     # 匹配key链接
#     key_url = re.search('"(.*?.key)"', resp.text).group(1).strip()
#     # print(key_url)
#     m3u8_text = resp.text
#     # print(m3u8_text)
#     # 按行拆分m3u8文档
#     ts_queue = Queue(10000)
#     lines = m3u8_text.split('\n')
#     s = len(lines)
#     # 找到文档中含有ts字段的行
#     concatfile = 'cache/' + "zzz" + '.txt'
#     for i, line in enumerate(lines):
#         # 我找的链接里,m3u8文件里是js链接,需要替换
#         if '.js' in line:
#             line = re.sub('\.js', '.ts', line)
#             if 'http' in line:
#                 # print("ts>>", line)
#                 ts_queue.put(line)
#             filename = re.search('([a-zA-Z0-9-_]+\.ts)', line).group(1).strip()
#             # 一定要先写文件,因为线程的下载是无序的,文件无法按照
#             # 123456。。。去顺序排序,而文件中的命名也无法保证是按顺序的
#             # 这会导致下载的ts文件无序,合并时,就会顺序错误,导致视频有问题。
#             open(concatfile, 'a+').write("file %s\n" % filename)
#             print("\r", '文件写入中', i, "/", s, end="", flush=True)
#         else:
#             if '.ts' in line:
#                 if 'http' in line:
#                     # print("ts>>", line)
#                     ts_queue.put(line)
#                 else:
#                     line = base_url + line
#                     ts_queue.put(line)
#                     # print('ts>>',line)
#                 filename = re.search('([a-zA-Z0-9-_]+\.ts)', line).group(1).strip()
#                 # 一定要先写文件,因为线程的下载是无序的,文件无法按照
#                 # 123456。。。去顺序排序,而文件中的命名也无法保证是按顺序的
#                 # 这会导致下载的ts文件无序,合并时,就会顺序错误,导致视频有问题。
#                 open(concatfile, 'a+').write("file %s\n" % filename)
#                 print("\r", '文件写入中', i, "/", s, end="", flush=True)
#     return ts_queue, concatfile, key_url
 
 
# 传入key链接,对key进行相关操作
def get_key(key_url):
    k = requests.get(key_url, headers=headers)
    key = k.content
    cryptor = AES.new(key, AES.MODE_CBC, key)
    return cryptor
 
 
# 下载操作
def down(ts_queue, headers, cryptor):
    session = requests.Session()
    adapter = requests.adapters.HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=10)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    while not ts_queue.empty():
        url = ts_queue.get()
        filename = re.search('([a-zA-Z0-9-_]+\.ts)', url).group(1).strip()
        try:
            requests.packages.urllib3.disable_warnings()
            # resp = requests.get(url, headers=headers)
            resp = session.get(url, headers=headers, timeout=10)
            with open('cache/' + filename, 'ab+') as f:
                data = cryptor.decrypt(resp.content)
                f.write(data)
            print("\r", '任务文件 ', filename, ' 下载成功', end="", flush=True)
        except:
            print('任务文件 ', filename, ' 下载失败')
            ts_queue.put(url)
 
 
# # 合并操作
# def merge(concatfile, name):
#     try:
#         path = 'cache/' + name + '.mp4'
#         # command = 'ffmpeg -y -f concat -i %s -crf 18 -ar 48000 -vcodec libx264 -c:a aac -r 25 -g 25 -keyint_min 25 -strict -2 %s' % (concatfile, path)
#         command = 'ffmpeg -y -f concat -i %s -bsf:a aac_adtstoasc -c copy %s' % (concatfile, path)
#         os.system(command)
#         print('视频合并完成')
#     except:
#         print('合并失败')
 
 
# # 565 1280
 
# # 删除操作
# def remove(concatfile):
#     dir = 'cache/'
#     for line in open(concatfile):
#         line = re.search('file (.*?\.ts)', line).group(1).strip()
#         os.remove(dir + line)
#     print("ts文件全部删除")
#     try:
#         os.remove(concatfile)
#         print('文件删除成功')
#     except:
#         print('文件删除失败')
def parse(url, header):
    
    htm = requests.get(url, headers=header, timeout=10)
    content = htm.text
    key_url = re.search('"(.*?.key)"', content).group(1).strip()
    print(content)
    if "#EXTM3U" not in content:
        print("这不是一个m3u8的视频链接!")
    if "EXT-X-KEY" in content:
        print("加密视频")
    tslist = re.findall('EXTINF:(.*),\n(.*)\n#', content)
    newlist = []
    for i in tslist:
        newlist.append(i[1])
    urlkey = url[ : url.rfind('/') + 1]
    ts_queue = [urlkey +i for i in newlist]
    concatfile = [i.split('/')[-1] for i in newlist]
    key_url = urlkey + key_url
    return ts_queue, concatfile, key_url
 
if __name__ == '__main__':
    # name = input('请输入视频名称:')
    # 时效性链接
    # url = 'https://meiju8.qfxmj.com/20191112/xVEwe432/2000kb/hls/index.m3u8?wsSecret=e16e458821a72c67fb51d57287de91e5&wsTime=1574232518&watch=ae6f4caa08e521511949081bd4dd75f9'
    # url = 'https://m3u8.zztt85.com/watch8/924b4e202f70dc7ba8a6c659d94d546f/924b4e202f70dc7ba8a6c659d94d546f.m3u8'
    url = 'https://p.xgkvwoj.cn/watch9/a21cbddfa31329194937aa51df7f9f9e/a21cbddfa31329194937aa51df7f9f9e.m3u8'
    headers = {
        'origin': 'https://www.51cg1.com',
        'referer': 'https://www.51cg1.com/archives/4434/',
        # 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
        'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
    }
    start = datetime.datetime.now().replace(microsecond=0)
    print("文件开始写入")
    # 需要就写,不需要不写
    # 该网站不需要

    ts_list, concatfile, key_url = parse(url, headers)
    if not os.path.exists('./cache'):
        os.mkdir('./cache')
    # print(ts_queue)
    # print("===================")
    # print(concatfile)
    # print("===================")
    # print(key_url)
    print('\n')
    print("文件写入结束")
    # 把key链接传给方法解析key值
    cryptor = get_key(key_url)
    # 获取队列元素数量
    num = len(ts_list)
    ts_queue = Queue()
    for i in ts_list:
        ts_queue.put(i)
    # 根据数量来开线程数,每五个元素一个线程
    # 最大开到50个
    print("下载任务开始")
    # if num > 5:
    #     t_num = num // 5
    # else:
    #     t_num = 1
    # if t_num > 50:
    #     t_num = 50
    t_num = 8
    threads = []
    for i in range(t_num):
        t = threading.Thread(target=down, name='th-' + str(i),
                             kwargs={'ts_queue': ts_queue, 'headers': headers, 'cryptor': cryptor})
        t.setDaemon(True)
        threads.append(t)
    for t in threads:
        time.sleep(0.4)
        t.start()
    for t in threads:
        t.join()
    print('\n')
    print("下载任务结束")
    end = datetime.datetime.now().replace(microsecond=0)
    print('写文件及下载耗时:' + str(end - start))
    # merge(concatfile, name)
    # remove(concatfile)
    # over = datetime.datetime.now().replace(microsecond=0)
    # print('合并及删除文件耗时:' + str(over - end))
    # print("所有任务结束")
    # print('任务总时长:', over - start)
# 解密过程:

# 1、下载index.m3u8、key.key、视频流

# 2、首先安装ffmpeg,并且设置好环境变量

# 3、将index.m3u8、key.key放入视频流所在的目录,将key.key改名为key.m3u8(因为key不是ffmpeg内置格式,使用key.key会报错)

# 4、在命令行中,进入视频流所在目录,输入以下命令即可完成解密以及合并的工作,最后会在当前目录下生成完整的输出视频out.mp4

# ffmpeg -i index.m3u8 out.mp4
 

# 当然了,最简单的办法当然是直接从网上获取视频,免去下载视频流的步骤,命令如下

# -i后面指定m3u8文件的URI

# out.mp4是生成文件名,默认是命令行的当前目录,可以通过绝对路径指定具体位置,如G:\abc\xxx.mp4
'''
ffmpeg -i https://abc.net/xx/xx/1000kb/hls/index.m3u8 out.mp4        
'''

 

posted @ 2023-07-20 15:52  神犇(shenben)  阅读(119)  评论(0编辑  收藏  举报