Loading

python下载网站的m3u8视频

GitHub链接
https://github.com/WindSnowLi/My-python-tools/tree/main/DownloadM3u8

原文https://www.blog.hiyj.cn/article/detail/75

下载部分

# -*- coding: UTF-8 -*-
import urllib.request
import re
from concurrent.futures import ProcessPoolExecutor
import os
import datetime
import socket

# 设置超时时间为30s
# 看网站的下载速度
socket.setdefaulttimeout(30)

# 下载单个文件
def down(save, links, order, number):
    try:
        linking = links[0:links.rfind('/') + 1] + order
        while True:
            try:
                urllib.request.urlretrieve(linking, save + "/" + str(number) + '.ts')
                break
            except socket.timeout:
                continue
        print('进程{0}下载完成'.format(number))
        return -1
    except Exception as identifier:
        print(identifier)
        return number

# 获取m3u8中的视频列表
def getlist(links):
    request = urllib.request.Request(url=links)  # 需要通过encode设置编码 要不会报错
    response = urllib.request.urlopen(request)  # 发送请求
    logInfo = response.read().decode()  # 读取对象 将返回的二进制数据转成string类型
    reg = r'.*\.ts'
    reg_img = re.compile(reg)
    links = reg_img.findall(logInfo)
    return links

# 读取.m3u8文件中的视频列表
def read_m3u8(file_path):
    m3u8_file = open(file_path, "r", encoding='UTF-8')
    m3u8_file_str = ""
    for line in m3u8_file:
        m3u8_file_str = m3u8_file_str + line
    m3u8_file.close()
    reg = r'.*\.ts'
    reg_img = re.compile(reg)
    return reg_img.findall(m3u8_file_str)

# 使用.m3u8文件下载
def use_m3u8_down(savePath, order, number):
    try:
        urllib.request.urlretrieve(
            order, savePath + '/' + str(number) + '.ts')
        print('进程{0}下载完成'.format(number))
        return -1
    except Exception as identifier:
        print(identifier)
        return number


if __name__ == "__main__":
    link_list = 'https://**********.m3u8'
    save_path = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    save_path = "./" + save_path
    number = 0
    while True:
        if not os.path.exists(save_path):
            os.makedirs(save_path)
            break
        else:
            save_path += 'x'
  	# 进程池大小,若网站速度很慢可以适当增加进程,若很快则减少进程,为的是最大化利用自己的带宽
    processPool = ProcessPoolExecutor(max_workers=30)
    futures = {}
    for i in getlist(link_list):
        # for i in read_m3u8('./play.m3u8'):
        try:
            number += 1
            job = processPool.submit(down, save_path, link_list, i, number)
            futures[job] = number
            print('进程{0}进入下载'.format(number))
        except Exception as e:
            print(e)
            print("Error: Unable to start {0} the thread".format(i))

    for job in futures:
        re = job.result()
        # n = futures[job]
        if re != -1:
            print('{0}下载失败'.format(re))

合并部分

# -*- coding: UTF-8 -*-
from genericpath import exists
import os

# ts文件夹
path = '20210131182221'
# 目标文件保存文件夹
save_path = 'marge'
# 获取ts文件目录
file_list = os.listdir(path)
# 排序ts文件,我的是以1.ts、2.ts、3.ts、4.ts这种,排序的依据是从零个字符到倒数.之前那一个
file_list.sort(key=lambda x:int(x[0:len(x)-3]))

# 保存目标目录如果不存在则创建
if not os.path.exists(save_path):
    os.mkdir(save_path)


# 以二进制打开目标文件
target = open('./'+save_path+'/'+path+'.ts', "ab+")
# 以二进制打开ts表,依次以二进制写入目标文件
for temp in file_list:
    temp_file = open(path+'/'+temp, "rb+")
    target.write(temp_file.read())
    temp_file.close()
    print(temp)
target.close()
posted @ 2021-02-15 09:35  WindSnowLi  阅读(34)  评论(0编辑  收藏  举报