爬取ts类型视频文件并且合并成mp4文件

# !/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Time : 2020/12/21 上午8:51
# @Author : SR
# @Email : srcoder@1163.com
# @File : spider.py
# @Software: PyCharm

import os

import requests

from multiprocessing.pool import ThreadPool


class SpiderMovieFromChenYu:
    def __init__(self, save_ts_path, save_movie_path, fail_ts_list=[], ):

        self.save_ts_path = save_ts_path
        self.save_movie_path = save_movie_path
        self.fail_ts_list = fail_ts_list

        self.headers = {
            'Referer': 'http://www.chenyutv.com/',

            'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
        }

    def mkdir_directory(self):
        if not os.path.exists(self.save_ts_path):
            os.mkdir(self.save_ts_path)
        if not os.path.exists(self.save_movie_path):
            os.mkdir(self.save_movie_path)

    def get_ts(self, number, flag=False):

        play_url = 'https://sina.com-h-sina.com/20180815/9998_f9aa34bf/1000k/hls/c0cdc4673f4%03d.ts' % number

        ts_number = play_url.split('/')[-1]  # 获取ts编号

        if ts_number not in os.listdir(self.save_ts_path):  # 判断该ts是否已经下载

            try:
                session = requests.session()
                response = session.get(play_url, headers=self.headers, timeout=60)  # 进行数据请求

                if response.status_code == 200:

                    with open(os.path.join(self.save_ts_path, ts_number), 'wb') as f:  # 读取文件
                        f.write(response.content)  # 写入数据
                        f.close()
                        if flag:  # 判断失败的ts再一次下载是否成功 
                            self.fail_ts_list.remove(number)  # 如果成功从失败列表移除
            except Exception as e:

                #  判断失败的ts文件序号是否已经存在在失败的列表下
                if number not in self.fail_ts_list:
                    #  不存在添加到ts列表中
                    self.fail_ts_list.append(number)

    def check_ts(self):
        print("开始检查:")
        print(self.fail_ts_list)
        while self.fail_ts_list:  # 通过判断列表是否有值进行数据循环
            for number in self.fail_ts_list:  # 获取单个的ts文件序号
                self.get_ts(number, True)  # 数据下载
                print("%s:下载完毕" % number)
                print(self.fail_ts_list)
        print("ts 文件下载完成!")
        self.get_video()  # 下载成功之后将数据转换成mp4文件

    def get_video(self):

        ts_list = os.listdir(self.save_ts_path)  # 获取所有的ts文件
        ts_list.sort()  # 将ts文件进行排序
        ts_lists = [ts for ts in ts_list]

        for ts in ts_lists:
            with open(os.path.join(self.save_ts_path, ts), 'rb') as f1:
                with open(os.path.join(self.save_movie_path, '明日的我与昨日你的约会.mp4'), 'ab') as f2:
                    f2.write(f1.read())
                    print("%s:写入完成" % ts)


if __name__ == '__main__':
    min_number = int(input('请输入ts的起始数字>>:').strip())
    max_number = int(input('请输入ts的结尾数字>>:').strip()) + 1
    save_ts_path = input('请输入ts保存文件路径>>:').strip()
    save_movie_path = input('请输入视频保存文件路径>>:').strip()

    spider = SpiderMovieFromChenYu(save_ts_path, save_movie_path)
    spider.mkdir_directory()
    pool = ThreadPool(100)
    pool.map(spider.get_ts, range(min_number, max_number))
    pool.close()
    pool.join()

    spider.get_ts()
posted @ 2020-12-22 09:31  SR丶  阅读(796)  评论(0编辑  收藏  举报