爬取视频
爬取千峰的JavaScript视频
1 import requests 2 from urllib.parse import quote 3 from lxml import etree 4 ''' 5 URL 6 http://video.mobiletrain.org/course/index/courseId/479 7 请求方式 8 GET 9 请求头 10 User-Agent:................ 11 ''' 12 #模拟发送请求获取响应 13 response =requests.get( 14 url='http://video.mobiletrain.org/course/index/courseId/479', 15 headers={ 16 'User-Agent':'................' 17 } 18 ) 19 html = response.text 20 #获取页面中的视频地址 21 eroot = etree.HTML(html) 22 hrefs = eroot.xpath("//li[@class='clearfix j-url-list']/a/@data-url") 23 for href in hrefs: 24 print(href) 25 # 设置文件名称 26 start_index = href.find(':')+1 27 end_index = -4 28 filename = href[start_index:end_index] 29 #从href中截取中文 30 start_url = href.find("千") 31 uri = href[start_url:end_index] 32 #构造视频真正的访问地址 33 start_uri = 'http://7xtcwd.com1.z0.glb.clouddn.com/' 34 #对中文进行编码 35 end_uri = quote(uri) 36 src = start_uri+end_uri+".mp4" 37 38 with open(filename+'.mp4', 'wb') as f: 39 # 使用request下载文件 40 video_response = requests.get( 41 url=src, 42 stream=True 43 ) 44 print("正在下载:", src) 45 # 每下载512个字节就回调一次 46 for chunk in video_response.iter_content(chunk_size=512): 47 f.write(chunk)