m3u8 download python
# install either Qt 4.8 or 5.x. and make sure it's in PKG_CONFIG_PATH
sudo apt-get update
export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/lib/x86_64-linux-gnu/pkgconfig
export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/bin
pkg-config --modversion Qt5
echo $PKG_CONFIG_PATH
sudo apt install qt5-default
sudo apt-get install qtbase5-dev
# sudo apt install qttools5-dev-tools
# sudo apt-get install apt-file
# sudo apt-file update
# apt-file find Qt5.pc
make xconfig
import os import sys import requests from urllib.parse import urlparse # git clone https://github.com/huzhenjie/m3u8_downloader # !!! 同一url 开头有'/',末尾无'/' (http开头除外) # host, body 有问题!应时刻考虑url的最长前缀,递归先序遍历获得所有有效的.ts文件地址 # 未修改 def get_cfg(): argv = sys.argv if len(argv) <= 1: print('Usage: python3', argv[0], '[your_m3u8_url] [save_dir]') print('Sample: python3', argv[0], 'https://xxx.com/video.m3u8', '/Users/huzhenjie/Downloads/save_dir') return None return (argv[1], argv[2]) def get_host(url): urlgroup = urlparse(url) print(urlgroup) # _path = urlgroup.path # if _path.lower().endswith('.m3u8'): # for i in range(-1, -100000, -1): # if _path[i] == '/': # _path = _path[0:i] # break # print(_path) return urlgroup.scheme + '://' + urlgroup.hostname #+ _path def get_m3u8_body(url): print('read m3u8 file:', url) session = requests.Session() adapter = requests.adapters.HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=10) session.mount('http://', adapter) session.mount('https://', adapter) r = session.get(url, timeout=10) return r.text def get_url_list(host, body): lines = body.split('\n') ts_url_list = [] for line in lines: if not line.startswith('#') and line != '': if line.lower().startswith('http'): ts_url_list.append(line) else: if line[0] =='/': line = line[1:] ts_url_list.append('%s/%s' % (host, line)) return ts_url_list def download_ts_file(ts_url_list, download_dir): ts_path_list = [] i = 0 for ts_url in reversed(ts_url_list): i += 1 file_name = ts_url[ts_url.rfind('/'):] curr_path = '%s%s' % (download_dir, file_name) print('\n[process]: %s/%s' % (i, len(ts_url_list))) print('[download]:', ts_url) print('[target]:', curr_path) ts_path_list.append(curr_path) if os.path.isfile(curr_path): print('[warn]: file already exist') continue r = requests.get(ts_url) with open(curr_path, 'wb') as f: f.write(r.content) return ts_path_list def check_dir(path): if os.path.exists(path): return os.makedirs(path) def get_download_url_list(host, m3u8_url, url_list = []): body = get_m3u8_body(m3u8_url) # print('body: %s'%body) ts_url_list = get_url_list(host, body) for url in ts_url_list: if url.lower().endswith('.m3u8'): url_list = get_download_url_list(host, url, url_list) else: url_list.append(url) return url_list # def combine_ts_file(ts_path_list, target_path): # with open(target_path, 'wb+') as f: # for ts_path in ts_path_list: # print(ts_path) # f.write(open(ts_path, 'rb').read()) def download_ts(m3u8_url, save_dir): check_dir(save_dir) host = get_host(m3u8_url) print("host: %s"%host) ts_url_list = get_download_url_list(host, m3u8_url) print(ts_url_list) print('total file count:', len(ts_url_list)) ts_path_list = download_ts_file(ts_url_list, save_dir) def main(): save_dir = './m3u8_sample_dir' # m3u8_url = 'https://v.kuaishouvod.com/m3u8/0/12/index.m3u8' m3u8_url = 'http://hls.cntv.lxdns.com/asp/hls/main/0303000a/3/default/978a64ddd3a1caa85ae70a23414e6540/main.m3u8' download_ts(m3u8_url, save_dir) if __name__ == '__main__': main() # config = get_cfg() # if config: # download_ts(config[0], config[1])
# https://blog.csdn.net/s_kangkang_A/article/details/103071822 import datetime import os import re import threading import time from queue import Queue import requests from Crypto.Cipher import AES # 传入链接,完成写文件操作及获取key链接 # def parse(url, headers, base_url): # resp = requests.get(url, headers=headers) # print(resp.text) # # 匹配key链接 # key_url = re.search('"(.*?.key)"', resp.text).group(1).strip() # # print(key_url) # m3u8_text = resp.text # # print(m3u8_text) # # 按行拆分m3u8文档 # ts_queue = Queue(10000) # lines = m3u8_text.split('\n') # s = len(lines) # # 找到文档中含有ts字段的行 # concatfile = 'cache/' + "zzz" + '.txt' # for i, line in enumerate(lines): # # 我找的链接里,m3u8文件里是js链接,需要替换 # if '.js' in line: # line = re.sub('\.js', '.ts', line) # if 'http' in line: # # print("ts>>", line) # ts_queue.put(line) # filename = re.search('([a-zA-Z0-9-_]+\.ts)', line).group(1).strip() # # 一定要先写文件,因为线程的下载是无序的,文件无法按照 # # 123456。。。去顺序排序,而文件中的命名也无法保证是按顺序的 # # 这会导致下载的ts文件无序,合并时,就会顺序错误,导致视频有问题。 # open(concatfile, 'a+').write("file %s\n" % filename) # print("\r", '文件写入中', i, "/", s, end="", flush=True) # else: # if '.ts' in line: # if 'http' in line: # # print("ts>>", line) # ts_queue.put(line) # else: # line = base_url + line # ts_queue.put(line) # # print('ts>>',line) # filename = re.search('([a-zA-Z0-9-_]+\.ts)', line).group(1).strip() # # 一定要先写文件,因为线程的下载是无序的,文件无法按照 # # 123456。。。去顺序排序,而文件中的命名也无法保证是按顺序的 # # 这会导致下载的ts文件无序,合并时,就会顺序错误,导致视频有问题。 # open(concatfile, 'a+').write("file %s\n" % filename) # print("\r", '文件写入中', i, "/", s, end="", flush=True) # return ts_queue, concatfile, key_url # 传入key链接,对key进行相关操作 def get_key(key_url): k = requests.get(key_url, headers=headers) key = k.content cryptor = AES.new(key, AES.MODE_CBC, key) return cryptor # 下载操作 def down(ts_queue, headers, cryptor): session = requests.Session() adapter = requests.adapters.HTTPAdapter(pool_connections=10, pool_maxsize=10, max_retries=10) session.mount('http://', adapter) session.mount('https://', adapter) while not ts_queue.empty(): url = ts_queue.get() filename = re.search('([a-zA-Z0-9-_]+\.ts)', url).group(1).strip() try: requests.packages.urllib3.disable_warnings() # resp = requests.get(url, headers=headers) resp = session.get(url, headers=headers, timeout=10) with open('cache/' + filename, 'ab+') as f: data = cryptor.decrypt(resp.content) f.write(data) print("\r", '任务文件 ', filename, ' 下载成功', end="", flush=True) except: print('任务文件 ', filename, ' 下载失败') ts_queue.put(url) # # 合并操作 # def merge(concatfile, name): # try: # path = 'cache/' + name + '.mp4' # # command = 'ffmpeg -y -f concat -i %s -crf 18 -ar 48000 -vcodec libx264 -c:a aac -r 25 -g 25 -keyint_min 25 -strict -2 %s' % (concatfile, path) # command = 'ffmpeg -y -f concat -i %s -bsf:a aac_adtstoasc -c copy %s' % (concatfile, path) # os.system(command) # print('视频合并完成') # except: # print('合并失败') # # 565 1280 # # 删除操作 # def remove(concatfile): # dir = 'cache/' # for line in open(concatfile): # line = re.search('file (.*?\.ts)', line).group(1).strip() # os.remove(dir + line) # print("ts文件全部删除") # try: # os.remove(concatfile) # print('文件删除成功') # except: # print('文件删除失败') def parse(url, header): htm = requests.get(url, headers=header, timeout=10) content = htm.text key_url = re.search('"(.*?.key)"', content).group(1).strip() print(content) if "#EXTM3U" not in content: print("这不是一个m3u8的视频链接!") if "EXT-X-KEY" in content: print("加密视频") tslist = re.findall('EXTINF:(.*),\n(.*)\n#', content) newlist = [] for i in tslist: newlist.append(i[1]) urlkey = url[ : url.rfind('/') + 1] ts_queue = [urlkey +i for i in newlist] concatfile = [i.split('/')[-1] for i in newlist] key_url = urlkey + key_url return ts_queue, concatfile, key_url if __name__ == '__main__': # name = input('请输入视频名称:') # 时效性链接 # url = 'https://meiju8.qfxmj.com/20191112/xVEwe432/2000kb/hls/index.m3u8?wsSecret=e16e458821a72c67fb51d57287de91e5&wsTime=1574232518&watch=ae6f4caa08e521511949081bd4dd75f9' # url = 'https://m3u8.zztt85.com/watch8/924b4e202f70dc7ba8a6c659d94d546f/924b4e202f70dc7ba8a6c659d94d546f.m3u8' url = 'https://p.xgkvwoj.cn/watch9/a21cbddfa31329194937aa51df7f9f9e/a21cbddfa31329194937aa51df7f9f9e.m3u8' headers = { 'origin': 'https://www.51cg1.com', 'referer': 'https://www.51cg1.com/archives/4434/', # 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36' 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1' } start = datetime.datetime.now().replace(microsecond=0) print("文件开始写入") # 需要就写,不需要不写 # 该网站不需要 ts_list, concatfile, key_url = parse(url, headers) if not os.path.exists('./cache'): os.mkdir('./cache') # print(ts_queue) # print("===================") # print(concatfile) # print("===================") # print(key_url) print('\n') print("文件写入结束") # 把key链接传给方法解析key值 cryptor = get_key(key_url) # 获取队列元素数量 num = len(ts_list) ts_queue = Queue() for i in ts_list: ts_queue.put(i) # 根据数量来开线程数,每五个元素一个线程 # 最大开到50个 print("下载任务开始") # if num > 5: # t_num = num // 5 # else: # t_num = 1 # if t_num > 50: # t_num = 50 t_num = 8 threads = [] for i in range(t_num): t = threading.Thread(target=down, name='th-' + str(i), kwargs={'ts_queue': ts_queue, 'headers': headers, 'cryptor': cryptor}) t.setDaemon(True) threads.append(t) for t in threads: time.sleep(0.4) t.start() for t in threads: t.join() print('\n') print("下载任务结束") end = datetime.datetime.now().replace(microsecond=0) print('写文件及下载耗时:' + str(end - start)) # merge(concatfile, name) # remove(concatfile) # over = datetime.datetime.now().replace(microsecond=0) # print('合并及删除文件耗时:' + str(over - end)) # print("所有任务结束") # print('任务总时长:', over - start) # 解密过程: # 1、下载index.m3u8、key.key、视频流 # 2、首先安装ffmpeg,并且设置好环境变量 # 3、将index.m3u8、key.key放入视频流所在的目录,将key.key改名为key.m3u8(因为key不是ffmpeg内置格式,使用key.key会报错) # 4、在命令行中,进入视频流所在目录,输入以下命令即可完成解密以及合并的工作,最后会在当前目录下生成完整的输出视频out.mp4 # ffmpeg -i index.m3u8 out.mp4 # 当然了,最简单的办法当然是直接从网上获取视频,免去下载视频流的步骤,命令如下 # -i后面指定m3u8文件的URI # out.mp4是生成文件名,默认是命令行的当前目录,可以通过绝对路径指定具体位置,如G:\abc\xxx.mp4 ''' ffmpeg -i https://abc.net/xx/xx/1000kb/hls/index.m3u8 out.mp4 '''