python基础day3-爬取梨视频,2019-6-25

''''''
'''
视频选项:
1.梨视频
'''
# import requests
#
# # 往视频源地址发送请求
# response = requests.get(
# 'https://video.pearvideo.com/mp4/adshort/20190625/cont-1570302-14057031_adpkg-ad_hd.mp4')
#
# # 打印二进制流,比如图片、视频等数据
# print(response.content)
#
# # 保存视频到本地
# with open('视频.mp4', 'wb') as f:
# f.write(response.content)

'''
1、先往梨视频主页发送请求
https://www.pearvideo.com/

解析获取所有视频的id:
video_1570302

re.findall()


2、获取视频详情页url:
惊险!男子抢上地铁滑倒,就脚进去了
https://www.pearvideo.com/video_1570302
揭秘坎儿井
https://www.pearvideo.com/video_1570107
'''
import requests
import re # 正则,用于解析文本数据
# 1、先往梨视频主页发送请求
response = requests.get('https://www.pearvideo.com/')
# print(response.text)

# re正则匹配获取所有视频id
# 参数1: 正则匹配规则
# 参数2: 解析文本
# 参数3: 匹配模式
res_list = re.findall('<a href="video_(.*?)"', response.text, re.S)
# print(res_list)

# 拼接每一个视频详情页url
for v_id in res_list:
detail_url = 'https://www.pearvideo.com/video_' + v_id
# print(detail_url)

# 对每一个视频详情页发送请求获取视 频源url
response = requests.get(url=detail_url)
# print(response.text)

# 解析并提取详情页视频url
# 视频url
video_url = re.findall('srcUrl="(.*?)"', response.text, re.S)[0]
print(video_url)

# 视频名称
video_name = re.findall(
'<h1 class="video-tt">(.*?)</h1>', response.text, re.S)[0]

print(video_name)

# 往视频url发送请求获取视频二进制流
v_response = requests.get(video_url)

with open('%s.mp4' % video_name, 'wb') as f:
f.write(v_response.content)
print(video_name, '视频爬取完成')


posted on 2019-06-26 22:24  leyzzz  阅读(183)  评论(0编辑  收藏  举报

导航