python - m3u8下载解析
import hashlib import os import time from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED import requests # pip install pycryptodome -i http://pypi.douban.com/simple --trusted-host=pypi.douban.com from Crypto.Cipher import AES # 拉勾通过cookies进行付费校验,必须要购买课程的 COOKIE = '' # 这里需要抓个cookie def md5(str1): h1 = hashlib.md5() h1.update(str1.encode('utf-8')) return h1.hexdigest() class m3u8: def __init__(self, url, cookie=COOKIE, core=16): self.url = url self.base_url = url[:url.rfind('/') + 1] # 如果需要拼接url,则启用 , +1 把 / 加上 self.tmp = md5(url) self.cookie = cookie self.executor = ThreadPoolExecutor(core) self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36', 'Cookie': self.cookie } def download_ts(self): rs = requests.get(self.url, headers=self.headers).text list_content = rs.split('\n') player_list = [] if not os.path.exists(self.tmp): os.system('mkdir ' + self.tmp) key = '' for index, line in enumerate(list_content): # 判断视频是否经过AES-128加密 if "#EXT-X-KEY" in line: if not key: method_pos = line.find("METHOD") comma_pos = line.find(",") method = line[method_pos:comma_pos].split('=')[1] # 获取加密方式 print("Decode Method:", method) uri_pos = line.find("URI") quotation_mark_pos = line.rfind('"') key_path = line[uri_pos:quotation_mark_pos].split('"')[1] key_url = key_path res = requests.get(key_url, headers=self.headers) key = res.content # 获取加密密钥 print("key:", key) # 以下拼接方式可能会根据自己的需求进行改动 if '#EXTINF' in line: # href = '' # 如果加密,直接提取每一级的.ts文件链接地址 if 'http' in list_content[index + 1]: href = list_content[index + 1] player_list.append(href) # 如果没有加密,构造出url链接 elif 'ad0.ts' not in list_content[index + 1]: href = self.base_url + list_content[index + 1] player_list.append(href) if len(key): print('此视频经过加密') print(player_list) # 打印ts地址列表 tasks = [self.executor.submit(self._download2_, tsUrl, key, i) for i, tsUrl in enumerate(player_list)] wait(tasks, return_when=ALL_COMPLETED) print('下载完成') else: print('此视频未加密') print(player_list) # 打印ts地址列表 tasks = [self.executor.submit(self._download_, tsUrl, i) for i, tsUrl in enumerate(player_list)] print(tasks) wait(tasks, return_when=ALL_COMPLETED) print('下载完成') def _download_(self, tsUrl, index): res = requests.get(tsUrl, headers=self.headers) with open(self.tmp + '/' + str(index + 1) + '.ts', 'wb') as file: file.write(res.content) print('正在写入第{}个文件'.format(index + 1)) def _download2_(self, tsUrl, key, index): crypto = AES.new(key, AES.MODE_CBC, key) res = requests.get(tsUrl, headers=self.headers) with open(self.tmp + '/' + str(index + 1) + '.ts', 'wb') as file: file.write(crypto.decrypt(res.content)) # 将解密后的视频写入文件 print('正在写入第{}个文件'.format(index + 1)) def merge(self, file_name): c = os.listdir(self.tmp) with open('%s.mp4' % file_name, 'wb+') as f: for i in range(len(c)): x = open(self.tmp + '/' + str(i + 1) + '.ts', 'rb').read() f.write(x) print('合并完成') os.system('rd /s/q ' + self.tmp) # 这里如果试Linux 把rm -tf改成rm -rf def download(self, file_name): self.download_ts() self.merge(file_name) if __name__ == '__main__': url = 'http://1252043158.vod2.myqcloud.com/1d93b969vodtranscq1252043158/5118f4575285890800411211515/drm/v.f240.m3u8' start = time.time() # 开始时间 d = m3u8(url) d.download("第01讲:程序运行时,内存到底是如何进行分配的?") print('共耗时: %s)' % (time.time() - start))
import re # pip install requests -i http://pypi.douban.com/simple --trusted-host=pypi.douban.com import requests import json from m3u8 import m3u8 import time res = requests.session() maps = {} def shi(url): print(url) ss = res.get(url).text r = re.findall("window.courseInfo = (.+);", ss)[0] print(r) b = json.loads(r) print(b["courseSections"]) for item in b["courseSections"]: for lesson in item["courseLessons"]: if "videoMedia" in lesson: maps[lesson["theme"]] = lesson["videoMedia"]["fileUrl"] print(item["sectionName"], lesson["theme"], lesson["videoMedia"]["fileUrl"]) else: print(item["sectionName"], lesson["theme"], "not update") print(maps) return maps # if __name__ == '__main__': # url = 'https://kaiwu.lagou.com/course/courseInfo.htm?courseId=69#/detail/pc?id=1898' start = time.time() # 开始时间 url = 'https://kaiwu.lagou.com/course/courseInfo.htm?courseId=67#/detail/pc?id=1585' maps = shi(url) print(maps) for k, v in maps.items(): m3u8(v).download(k) print('共耗时: %s)' % (time.time() - start))