python在B站爬糖豆广场舞
先附上代码:
import requests,re from lxml import etree
#这是单页面下载,翻页的目前还不会 url = 'https://search.bilibili.com/all?vt=96737335&keyword=%E7%B3%96%E8%B1%86%E5%B9%BF%E5%9C%BA%E8%88%9E' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36', 'referer':'https://www.bilibili.com/' } res = requests.get(url,headers=headers) # print(res.text) html = etree.HTML(res.text) links = html.xpath('//*[@id="i_cecream"]/div/div[2]/div[2]/div/div/div/div[2]/div/div[*]/div/div[2]/div/div/a/@href') # print(links) for link in links: link = 'https:'+ link # print(link) url_res = requests.get(link,headers=headers) url_html = etree.HTML(url_res.text) #视频名 title = url_html.xpath('//*[@id="viewbox_report"]/h1/text()')[0] title2 = re.sub('\W','',title) # print(title2) # 获取音频URL get_ad = re.findall(r'"id":30280,"baseUrl":"(.*?)","base_url"',url_res.content.decode()) get_ad2=[] if not get_ad: # print('列表为空') continue else: # print(get_ad[0]) get_ad2 = get_ad # get_ad = re.findall(r'"id":17,"baseUrl":"(.*?)","base_url"',url_res.content.decode()) #获取视频url get_vd = re.findall(r'"id":32,"baseUrl":"(.*?)","base_url"',url_res.content.decode()) get_vd2 = [] if not get_vd: continue else: get_vd2 = get_vd # print(get_vd2) # with open('log2.txt','wb') as f: # f.write(get_vd2) headers2 = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36', 'referer':'https://www.bilibili.com', } # 第二次发送请求,请求视频 # print('1111') res_ad = requests.get(get_ad2[0],headers=headers2) res_vd = requests.get(get_vd2[0],headers=headers2) # res2 = res_ad.content print('1111') with open(f'video/{title2}ad.mp4','wb') as f: f.write(res_ad.content) with open(f'video/{title2}vd.mp4','wb') as f: f.write(res_vd.content) print(f'{title2}视频下载完成!') from moviepy.editor import * vd = VideoFileClip(f"video/{title2}vd.mp4") au = AudioFileClip(f"video/{title2}ad.mp4") vd2 = vd.set_audio(au) vd2.write_videofile(f"video/{title2}.mp4")
下载完以后的成果。