(十一)爬取m3u8加密视频

 

 

 

 

 

import random
import os
import re
import requests
import asyncio
import aiohttp
import time
from lxml import etree
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad,unpad

url = "https://www.9tata.cc/play/17360-2-0.html"
user_agent_list = [
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 "
        "(KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
        "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 "
        "(KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 "
        "(KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 "
        "(KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 "
        "(KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 "
        "(KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
        "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 "
        "(KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
        "(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 "
        "(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 "
        "(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
        "(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
        "(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
        "(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
        "(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 "
        "(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
        "(KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 "
        "(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 "
        "(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
    ]
headers ={
    "User-Agent":random.choice(user_agent_list)
}



async def get_m3u8_url():
    async with aiohttp.ClientSession() as session:
        async with session.get(url, headers=headers,ssl=False) as response:
            text = await response.text()
            html = etree.HTML(text)

            # 1. 获取页面html
            video_name = html.xpath('//div[@class="page-header"]/h2/a[2]//text()')[0]
            m3u8_1 = re.search('now="(.*?m3u8)";', text).group(1)
            return m3u8_1, video_name

# 2. 从网页源码(js)获取m3u8地址
async def get_m3u8_detail(m3u8_url):
    async with aiohttp.ClientSession() as session:
        async with session.get(m3u8_url, headers=headers,ssl=False) as response:
            text = await response.text()
            path = text.split('\n')[2]
            # m3u8_2_path = os.path.join(os.path.dirname(m3u8_1), path)
            m3u8_2_path = os.path.dirname(os.path.dirname(os.path.dirname(m3u8_url)))+path
            return m3u8_2_path,os.path.dirname(path)

async def get_key(m3u8_url):
    """获取Key"""
    print(m3u8_url)
    async with aiohttp.ClientSession() as session:
        async with session.get(m3u8_url+"/key.key", headers=headers,ssl=False) as response:
            text = await response.text()
            with open("videos/key.key", "w") as f:
                f.write(text)
            return text


# 4. 获取m3u8文件顺序
async def get_ts_list(m3u8_detail,path):
    async with aiohttp.ClientSession() as session:
        async with session.get(m3u8_detail, headers=headers,ssl=False) as response:
            text = await response.text()
            with open("videos/old_index.m3u8","w") as f:
                text = text.replace(path+'/',os.path.abspath("videos")+'\\')
                # text = text.replace("\\","\\\\")
                f.write(text)
            ts_list = re.findall(r"\\([A-Za-z0-9]*\.ts)",text)
            print(ts_list)
            base_path = os.path.dirname(m3u8_detail)
            return ts_list, base_path

async def download(base_path, ts, aes):
    # 5. 下载ts文件
    ts_path = base_path+'/'+ts
    ts_name = os.path.basename(ts)
    print(ts_path)
    with open(f"videos/{ts_name}", "wb") as f:
        async with aiohttp.ClientSession() as session:
            async with session.get(ts_path, headers=headers,ssl=False) as response:
                f.write(aes.decrypt(pad(await response.content.read(), AES.block_size)))
                # f.write(aes.decrypt(await response.content.read()))
                print(f"{ts_name}下载完成")


def change_m3u8():
    """视频ts已经解密,不需要在解密  #EXT-X-KEY这行删除"""
    with open("videos/old_index.m3u8",'r') as f:
        lines = f.readlines()
    with open("index.m3u8", 'w') as file:
        for line in lines:
            if "#EXT-X-KEY" not in line:
                file.readline(line)


def merge(video_name="flash"):
    """视频合并"""
    os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)),"videos"))
    os.system(f"ffmpeg -allowed_extensions ALL -i index.m3u8 -c copy {video_name}.mp4")

async def main():
    m3u8_url, name = await get_m3u8_url()
    m3u8_detail,path = await get_m3u8_detail(m3u8_url)
    key = await get_key(os.path.dirname(m3u8_detail))
    aes = AES.new(key.encode('utf-8'), AES.MODE_CBC,b"0000000000000000")
    ts_list, base_path = await get_ts_list(m3u8_detail,path)
    tasks = [asyncio.create_task(download(base_path,ts, aes)) for ts in ts_list]
    await asyncio.wait(tasks)
    change_m3u8()
    merge(name)


start = time.time()
asyncio.run(main())
print(f"耗时:{time.time()-start}")

 

posted @ 2024-06-11 15:55  Mrterrific  阅读(23)  评论(0编辑  收藏  举报