爬取咪哩咪哩动漫视频
# -*- coding: utf-8 -*- # __author__ = "maple" import os import subprocess import time import requests import re import concurrent.futures from lxml import etree #http://dalao.wahaha-kuyun.com/20201122/1556_fd900088/1000k/hls/228f9bea95b000033.ts ts文件 ''' http://dalao.wahaha-kuyun.com/ 20201122 /1556_fd900088/ 1000k/hls/228f9bea95b000033.ts 我们观察几个ts链接(不同视频)看到ts链接共分为三部分 时间 id ts文件名 1,先找ts文件名是怎么来的 复制一个名字全局搜索 可以看到在一个index.m3u8地址返回的内容里面 2,查找这个 index.m3u8地址是哪里来的 全局搜索可以看到是那个地址返回的内容有这个链接 3,分析找到的地址参数 ''' def get_m3u8(url,id,nub): ''' 获取m3u8地址,视频名称 :param url: :return: ''' headers = { 'User-Agent':"User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) " } urls = f'http://www.milimili.tv/anime/{id}/{nub}/' response = requests.get(url).text m3u8_url = re.findall("src='/yun/yun\.php\?vid=(.*?)'", response)[0] new_m3u8_url = m3u8_url.replace('index.m3u8', '1000k/hls/index.m3u8') res = requests.get(urls,headers=headers).text ret = etree.HTML(res) name = ''.join(ret.xpath('/html/body/div[2]/div[1]//text()')).replace('>','').replace(' ','').replace('咪哩咪哩','') return new_m3u8_url,name def get_ts_url(url): ''' 获取ts视频流地址 :param url: :return: ''' response = requests.get(url).text ts_list = re.findall('(.*?).ts', response) li = {} for ts in ts_list: ts_url = url.replace('index.m3u8', ts) + '.ts' li[ts] = ts_url return li def save_video(url,name): ''' 保存视频 :param url: :return: ''' response = requests.get(url).content with open(name+'.ts','wb') as f: #地址改成自己的 f.write(response) print(name) if __name__ == '__main__': executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) id = int(input('请输入视频id:').strip()) # id 集数 nub = int(input('请输入视频集数:').strip()) #集数根据地址栏为准 列http://www.milimili.tv/anime/3753/13/ url = f'http://www.milimili.tv/e/action/player_i.php?id={id}&pid={nub}' new_m3u8_url,video_name = get_m3u8(url,id,nub) ts_dict = get_ts_url(new_m3u8_url) for name in ts_dict: executor.submit(save_video, ts_dict[name],name) executor.shutdown() print('====正在合成视频:{}===='.format(video_name)) command = 'copy /b *.ts {}.mp4'.format(video_name) subprocess.Popen(command, shell=True) time.sleep(10) print('====合成视频成功:{}===='.format(video_name)) for ts in ts_dict: os.remove(f'{ts}.ts')
目前正在学习Python中,如果有什么不对的地方 希望广大朋友,指出错误指出,深表感谢