Python | 某大学mooc字幕下载

运行效果


代码如何使用

运行环境

Python 3
requests模块、re模块(怎么下载,问度娘)

1.复制课程id。打开浏览器,进入到需要下载字幕的课程界面,课程id在链接的tid里,例如:https://www.icourse163.org/course/ZZU-1207201802?tid=1467133722

    # 注意不要加引号
    course_id = 1467133722

2.复制cookie。按F12,找一个post请求的url,把cookie的所有内容复制到这里

    header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
        'cookie': 'cookie贴这里哈'
    }

不能在控制台用document.cookie,这个不行的。
遇到卡住的问题看下图。

找个POST请求的url

粘贴的话,最好放在''单引号,双引号的话,就是怕cookie里也有双引号,虽然也可以,就是可能会稍微麻烦了些

3.复制"NTESSTUDYSI"。把Cookies的"NTESSTUDYSI"的值(一般在第一个)复制粘贴到这里

    csrfKey = "这里也要贴,看注释"

后面如果需要下载其它课程的字幕,一般更改课程的id就行了。
但有时候也可能不可以,因为cookie会过期,过期就重新复制。

源代码

代码如果看不懂也没有关系,会用就行了O(∩_∩)O~

import requests
import re


# 获取每节课的id
def get_lesson_Id(term_id: int):
    if type(term_id) == str:
        term_id = int(term_id)
    url = f'https://www.icourse163.org/web/j/courseBean.getLastLearnedMocTermDto.rpc?csrfKey={csrfKey}'
    data = {
        'termId': term_id
    }
    html = dict(requests.post(url, headers=header, data=data).json())
    chapters = html['result']['mocTermDto']['chapters']
    lessonsId = []
    for lessons in chapters:
        for lesson in lessons["lessons"]:
            for unit in lesson['units']:
                if unit['contentType'] == 1:
                    # lessonsId.append((unit['id'], lesson['name']))
                    lessonsId.append(unit['id'])
    return lessonsId

# 获取一节课的name、videoId、signature
def get_name_videoId_signature(lesson_id: int):
    url = f"https://www.icourse163.org/web/j/resourceRpcBean.getResourceToken.rpc?csrfKey={csrfKey}"
    data = {
        'bizId': lesson_id,
        'bizType': 1,
        'contentType': 1,
    }
    html = dict(requests.post(url, headers=header, data=data).json())["result"]["videoSignDto"]
    videoId = html['videoId']
    signature = html['signature']
    name = html["name"].split(".mp4")[0]
    return name, videoId, signature

# 获取一节课的字幕下载密匙
def get_nosKey(video_id, signature):
    url = f'https://vod.study.163.com/eds/api/v1/vod/video?videoId={video_id}&signature={signature}&clientType=1'
    srtCaptions = dict(requests.get(url).json())['result']['srtCaptions']
    # nosKey 可能有多个
    nos_keys = []
    for nosKey in srtCaptions:
        nos_keys.append(nosKey['nosKey'])
    return nos_keys

# 保存一节课的字幕
def save_subtitle(name, src_keys):
    for srcKey in src_keys:
        url = f'http://www.icourse163.org/video/downloadVideoSrt.htm?srcKey={srcKey}'
        content = requests.get(url).text
        con_clear = re.findall(" -->.*\r\n(.*?\r\n)\r\n", content)
        with open(f"{name}.txt", 'a', encoding="utf-8") as f:
            f.writelines(con_clear)

if __name__ == '__main__':
    # 1.复制课程id。打开浏览器,进入到需要下载字幕的课程界面,课程id在链接的tid里,例如:https://www.icourse163.org/course/ZZU-1207201802?tid=1467133722
    course_id = 1467133722
    # 2.复制cookie。按F12,找一个post请求的url,把cookie的所有内容复制到这里
    header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
        'cookie': 'cookie放这里'
    }
    # 3.复制"NTESSTUDYSI"。把cookie的"NTESSTUDYSI"的值(一般在第一个)复制粘贴到这里
    csrfKey = "还有这里要修改,别忘记了"
    lesson_ids = get_lesson_Id(course_id)
    for lesson in lesson_ids:
        name, video_id, signature = get_name_videoId_signature(lesson)
        nos_keys = get_nosKey(video_id, signature)
        if len(nos_keys) == 0:
            print(f"{name}   未查询到有字幕。。。")
            continue
        save_subtitle(name, nos_keys)
        print(f"-->{name} 字幕下载成功...")
posted @ 2022-03-21 01:00  槑孒  阅读(366)  评论(0编辑  收藏  举报