爬取梨视频

import re
import os
import requests

# 第一步: https://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=4&start=0
# 第二步: https://www.pearvideo.com/ + 'video_1688698/'
# 第三步: ldUrl="",srcUrl="http://video.pearvideo.com/mp4/third/20200731/cont-1689259-12308265-175332-hd.mp4",vdoUrl=srcUrl
# 第四步: http://video.pearvideo.com/mp4/third/20200731/cont-1689259-12308265-175332-hd.mp4

video_dir_path = os.path.join(os.path.dirname(__file__), 'video')
if not os.path.isdir(video_dir_path):
    os.mkdir(video_dir_path)

# 请求分类id为5下的页面
category_response = requests.post('https://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=4&start=0')
video_name_list = re.findall(r'<a href="(.*?)" class="vervideo-lilink actplay">', category_response.text)
for video_name in video_name_list:
    # 请求分类id为5下页面中不同的视频页面
    video_response = requests.get(f'https://www.pearvideo.com/{video_name}/')
    video_mp4_list = re.findall(r'ldUrl="",srcUrl="(.*?)",vdoUrl=srcUrl', video_response.text)
    for video_mp4 in video_mp4_list:
        # 请求分类id为5下页面中不同的视频页面中的视频
        video_mp4_response = requests.get(video_mp4)

        # 使用rsplit将视频路径末尾部分分割作为视频文件名
        video_name = video_mp4.rsplit('/', 1)[-1]
        video_file_path = os.path.join(video_dir_path, video_name)
        with open(video_file_path, 'wb') as f:
            for line in video_mp4_response.iter_content():
                f.write(line)
        print('爬取完毕!')
        break  # 不break就爬的太凶狠了
    break
posted @ 2020-08-01 19:21  给你加马桶唱疏通  阅读(120)  评论(0编辑  收藏  举报