使用python爬取网站电影
获取电影的.m3u8文件,使用python 携程爬取电影
1.找到需要爬取得电影url,点击f12 =>network=>fetch/xhr 找到index.m3u8文件,
https://vip.ffzy-online2.com/20230416/32868_9cb1983d/index.m3u8
2.使用以下脚本爬取整部电影
import requests from lxml import etree from urllib.parse import urljoin import re import asyncio import aiohttp import aiofiles from Crypto.Cipher import AES # 需要安装PCrypto pip install pycryptodome import os,time async def download_one(ts_url,session,sem): async with sem: for i in range(3): try: async with session.get(ts_url) as resp: content = await resp.content.read() file_name = ts_url.split("/")[-1] async with aiofiles.open(f"video_1/{file_name}",mode="wb") as f: await f.write(content) print(ts_url,"下载成功") break except Exception as e: print(e) print(ts_url,"下载失败") async def download_all_ts(): sem = asyncio.Semaphore(200) tasks = [] async with aiohttp.ClientSession() as session: with open ("m3u8.txt", mode="r", encoding='utf-8') as f: for line in f: if line.startswith("#"): continue line = line.strip() t = asyncio.create_task(download_one(line,session,sem)) tasks.append(t) await asyncio.wait(tasks) def get_key(): obj = re.compile(r'#EXT-X-KEY:METHOD=AES-128,URI="(?P<key_url>.*?)"', re.S) with open("m3u8.txt",mode="r",encoding='utf-8') as f: for line in f: result = obj.search(line) if result: key_url = result.group("key_url") resp = requests.get(key_url) key_content = resp.content resp.close() return key_content async def desc_one_ts(file_name,key): aes = AES.new(key=key,mode=AES.MODE_CBC,IV=b'0000000000000000') async with aiofiles.open(f"./video_1/{file_name}",mode="rb") as f1,\ aiofiles.open(f"./video_3/{file_name}",mode="wb") as f2: content = await f1.read() desc_content = aes.decrypt(content) await f2.write(desc_content) print(file_name,"解密成功") # time.sleep(1) async def desc_all_ts(key): tasks=[] with open("m3u8.txt",mode="r",encoding='utf-8') as f: for line in f: if line.startswith("#"): continue line = line.strip() file_name = line.split("/")[-1] t = asyncio.create_task(desc_one_ts(file_name,key)) tasks.append(t) await asyncio.wait(tasks) def merge(): print("记载m3u8") file_list=[] with open("m3u8.txt",mode="r",encoding='utf-8') as f: for line in f: if line.startswith("#"): continue line = line.strip() file_name = line.split("/")[-1] file_list.append(file_name) print("记载m3u8,成功") os.chdir("./video_3") tmp = [] n =1 for i in range(len(file_list)): file_name = file_list[i] tmp.append(file_name) if len(tmp) == 50: cmd = f"copy /b {'+'.join(tmp)} {n}.ts" r = os.popen(cmd) print(r.read()) tmp = [] n +=1 cmd = f"copy /b {'+'.join(tmp)} {n}.ts" r = os.popen(cmd) print(r.read()) n +=1 sec_tmp = [] for i in range(1,n): sec_tmp.append(f"{i}.ts") cmd = f"copy /b {'+'.join(sec_tmp)} mm.mp4" r = os.popen(cmd) print(r.read()) os.chdir("../") def main(): url = "https://qq.1080tg.com/20211108/867XNBBq/hls/index.m3u8" #这里url改为第一步获取得m3u8文件路径 heads = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0" } resp = requests.get(url,headers=heads) with open("m3u8.txt",mode="wb") as f: f.write(resp.content) print("m3u8文件下载成功") asyncio.run(download_all_ts()) key = get_key() asyncio.run(desc_all_ts(key)) merge() if __name__ == '__main__': main()
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了