python爬取电影

我们现在很多视频网站的电影都是由很多ts文件片段组成的,要想下载电影,就需要把这些ts文件片段全部下载下来, 然后合成一 部完整的电影

这个程序配置好以下三个地方就可以下载这类电影

url

 

 preurl

 

 

 packageName 就是我们保存ts文件片段和最终文件的文件夹名称

 

import requests
import re
import os
# copy /b  *.ts terget.ts
def get_ts(url):
    ret = requests.get(url)
    # print(ret.status_code)
    # print(ret.text)
    tsList=re.findall("[\w.]+\.ts",ret.text)
    print(tsList)
    return tsList

def download_ts(preUrl,tslist,packageName):
    pckpath = os.path.join(os.path.dirname(os.path.abspath(__file__)),packageName)
    print("check if exist pckpath:",pckpath)
    if not os.path.exists(pckpath):
        os.makedirs(pckpath)
    for u in tslist:
        tsUrl = preUrl + u
        ret = requests.get(tsUrl)
        if ret.status_code != 200:
            print("download {} failed {}".format(tsUrl,ret.status_code))
            return False
        else:
            with open("{}\{}".format(pckpath,u),"wb") as f:
                f.write(ret.content)
                print("download {} successfully".format(u))
    else:
        print("download all ts passed")
        return pckpath

def ingredient_ts(tsfolder,targetTs):
    cmd = "copy /b  {}\\*.ts {}\\{}.ts".format(tsfolder,tsfolder,targetTs)
    print("generate ts file by cmd:{}".format(cmd))
    ret=os.system(cmd)
    if ret!=0:
        print("generate ts file failed")
    else:
        print("generate ts file passed")

def main(url,package,preurl):
    tslist = get_ts(url)
    path = download_ts(preurl,tslist,package)
    ingredient_ts(path,package)

if __name__ == '__main__':
    url = "https://m3u8i.vodfile.m1905.com/202301052101/c64bf08f04c027c769af11314f3b9561/movie/2016/08/03/m20160803HSJEQAE1HXNRYVYU/6E338B8E258305701B35BF2AC.m3u8"
    packageName = "叶问2:宗师传奇 Ip Man 2" #存放ts的folder name
    preurl = "https://m3u8i.vodfile.m1905.com/202301052101/c64bf08f04c027c769af11314f3b9561/movie/2016/08/03/m20160803HSJEQAE1HXNRYVYU/" #ts文件url的前缀
    packageName = packageName.replace(" ","")
    main(url,packageName,preurl)

 

 

posted @ 2023-01-04 21:18  腹肌猿  阅读(502)  评论(0编辑  收藏  举报