爬取梨视频 - 輪滑少年

import requests

import re

res = requests.get('https://www.pearvideo.com/popular_9')

# print(res.text) #拿到的是一个页面的所有视频html页面

re_video = '<a href="(.*?)" class="actplay">'
video_urls = re.findall(re_video, res.text)
print(video_urls) #拿到的是改页面所有视频链接最后一部分数据，在for循环中进行拼接即可得到正确的视频链接地址
# https://www.pearvideo.com/

for video in video_urls:
    url = 'https://www.pearvideo.com/' + video  #视频详情页
    print(url)
#     # 向视频详情发送get请求
    res_video = requests.get(url)
    video_id = video.split('_')[1]  #得到视频的id
    video_url = 'https://www.pearvideo.com/videoStatus.jsp?contId=' + video_id  #视频页链接
    header = {
        'Referer': 'https://www.pearvideo.com/' + video
    }  #请求头
    res = requests.get(video_url, headers=header)  #模拟发送ajax请求
    print(res.json(),11111111111)  #需要json转码
    
    real_mp4 = res.json()['videoInfo']['videos']['srcUrl']
 #拿到视频链接
#https://video.pearvideo.com/mp4/adshort/20210118/  1611024074140   -15578857_adpkg-ad_hd.mp4 不可以播放

    real_mp42 = real_mp4.replace(real_mp4.split('/')[-1].split('-')[0], 'cont-%s' % video_id) 
#进行链接替换得到真正的视频链接 
　# https://video.pearvideo.com/mp4/adshort/20210118/  cont-1716868   -15578857_adpkg-ad_hd.mp4  可以播放



    print(real_mp42)  
    res = requests.get(real_mp42)
    name = real_mp42.split('/')[-1]
    with open('video/%s' % name, 'wb') as f:
        for line in res.iter_content():
            f.write(line)
posted on 2021-01-19 17:10 輪滑少年阅读(128) 评论(0) 收藏举报
刷新页面返回顶部