B站-批量下载教学视频集-爬虫教程----------java
1. 说明:下面代码是根据 https://blog.csdn.net/xiuminglee/article/details/106014889 网站代码二次修改的!!!!!!!!!!!!
2. 请先在本地下载 ffmpeg 并解压--记住解压路径。
3. 本文使用的依赖如下:
<!-- B站视频下载 依赖 --> <dependency> <groupId>cn.hutool</groupId> <artifactId>hutool-all</artifactId> <version>5.3.0</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.12.1</version> </dependency> <!-- 阿里JSON解析器 --> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.47</version> </dependency>
4. 代码如下:
package com.srn.cn.hbase; import cn.hutool.http.HttpRequest; import cn.hutool.http.HttpResponse; import cn.hutool.json.JSONArray; import cn.hutool.json.JSONObject; import com.alibaba.fastjson.JSON; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import java.io.*; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @ClassName dowload * @Description TODO * @Author 王志亭 * @Date 2020/12/29 15:05 **/ public class dowload { /** 视频地址 */ // bilibili 通用视频连接 private static String BILIBILI_VIDEO_URL = "https://www.bilibili.com/video/"; // 具体视频连接 private static String CONCRETE_VIDEO_URL = "BV1x54y1Q7mo"; // 获取全部视频信息接口--为了获取全部视频名称 private static String VIDEO_NAMES_URL = "https://api.bilibili.com/x/player/pagelist?bvid="+CONCRETE_VIDEO_URL+"&jsonp=jsonp"; // 具体的视频接口 private static String VIDEO_URL = BILIBILI_VIDEO_URL + CONCRETE_VIDEO_URL; // ffmpeg 本地地址 private static String FFMPEG_PATH = "D:\\Wangzhiting\\FFmpeg\\ffmpeg-4.3.1-2020-11-19-essentials_build\\bin\\ffmpeg.exe"; private static String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"; private static VideoInfo VIDEO_INFO = new VideoInfo(); private static String SAVE_PATH; public static void main(String[] args) { // htmlParser(); htmlParserList(); } /** * @Author 王志亭 * @Description 解析多个网页信息获取视频集合 * @Date 20-12-30 11:25:49 * @return void */ private static void htmlParserList() { // 获取全部视频名称 List<String> videoNames = getVideoNames(); String url = ""; for (int i = 0; i < videoNames.size(); i++) { url = VIDEO_URL + "?p=" + (i+1); System.out.println("url地址 = " + url); HttpResponse res = HttpRequest.get(url).timeout(2000).execute(); String html = res.body(); // 视频名称 System.out.println("视频名称== " + videoNames.get(i)); VIDEO_INFO.videoName = videoNames.get(i); // 截取视频信息 Pattern pattern = Pattern.compile("(?<=<script>window.__playinfo__=).*?(?=</script>)"); Matcher matcher = pattern.matcher(html); if (matcher.find()) { VIDEO_INFO.videoInfo = new JSONObject(matcher.group()); } else { System.err.println("未匹配到视频信息,退出程序!"); return; } getVideoInfo(); } } /** 解析HTML获取相关信息 */ private static void htmlParser() { HttpResponse res = HttpRequest.get(VIDEO_URL).timeout(2000).execute(); String html = res.body(); Document document = Jsoup.parse(html); Element title = document.getElementsByTag("title").first(); // 视频名称 VIDEO_INFO.videoName = title.text(); // 截取视频信息 Pattern pattern = Pattern.compile("(?<=<script>window.__playinfo__=).*?(?=</script>)"); Matcher matcher = pattern.matcher(html); if (matcher.find()) { VIDEO_INFO.videoInfo = new JSONObject(matcher.group()); } else { System.err.println("未匹配到视频信息,退出程序!"); return; } getVideoInfo(); } /** * @Author 王志亭 * @Description 获取全部视频名称集合 * @Date 20-12-31 14:40:02 * @return java.util.List<java.lang.String> */ private static List<String> getVideoNames(){ HttpResponse res = HttpRequest.get(VIDEO_NAMES_URL).timeout(2000).execute(); String jsonUrl = res.body(); com.alibaba.fastjson.JSONObject jsonObject = JSON.parseObject(jsonUrl); com.alibaba.fastjson.JSONArray pages = jsonObject.getJSONArray("data"); List<String> list = new ArrayList<>(); com.alibaba.fastjson.JSONObject object = null; for (int i = 0; i < pages.size(); i++) { object = pages.getJSONObject(i); list.add(object.getString("part")); } return list; } /** 解析视频和音频的具体信息 */ private static void getVideoInfo() { // 获取视频的基本信息 JSONObject videoInfo = VIDEO_INFO.videoInfo; JSONArray videoInfoArr = videoInfo.getJSONObject("data").getJSONObject("dash").getJSONArray("video"); VIDEO_INFO.videoBaseUrl = videoInfoArr.getJSONObject(0).getStr("baseUrl"); VIDEO_INFO.videoBaseRange = videoInfoArr.getJSONObject(0).getJSONObject("SegmentBase").getStr("Initialization"); HttpResponse videoRes = HttpRequest.get(VIDEO_INFO.videoBaseUrl) .header("Referer", VIDEO_URL) .header("Range", "bytes=" + VIDEO_INFO.videoBaseRange) .header("User-Agent", USER_AGENT) .timeout(2000) .execute(); VIDEO_INFO.videoSize = videoRes.header("Content-Range").split("/")[1]; // 获取音频基本信息 JSONArray audioInfoArr = videoInfo.getJSONObject("data").getJSONObject("dash").getJSONArray("audio"); VIDEO_INFO.audioBaseUrl = audioInfoArr.getJSONObject(0).getStr("baseUrl"); VIDEO_INFO.audioBaseRange = audioInfoArr.getJSONObject(0).getJSONObject("SegmentBase").getStr("Initialization"); HttpResponse audioRes = HttpRequest.get(VIDEO_INFO.audioBaseUrl) .header("Referer", VIDEO_URL) .header("Range", "bytes=" + VIDEO_INFO.audioBaseRange) .header("User-Agent", USER_AGENT) .timeout(2000) .execute(); VIDEO_INFO.audioSize = audioRes.header("Content-Range").split("/")[1]; downloadFile(); } /** 下载音视频 */ private static void downloadFile(){ // 保存音视频的位置 SAVE_PATH = "H://test/" + File.separator; File fileDir = new File(SAVE_PATH); if (!fileDir.exists()){ fileDir.mkdirs(); } // 下载视频 File videoFile = new File(SAVE_PATH + File.separator + VIDEO_INFO.videoName + "_video.mp4"); if (!videoFile.exists()){ System.out.println("--------------开始下载视频文件--------------"); HttpResponse videoRes = HttpRequest.get(VIDEO_INFO.videoBaseUrl) .header("Referer", VIDEO_URL) .header("Range", "bytes=0-" + VIDEO_INFO.videoSize) .header("User-Agent", USER_AGENT) .execute(); videoRes.writeBody(videoFile); System.out.println("--------------视频文件下载完成--------------"); } // 下载音频 File audioFile = new File(SAVE_PATH + File.separator + VIDEO_INFO.videoName + "_audio.mp4"); if (!audioFile.exists()){ System.out.println("--------------开始下载音频文件--------------"); HttpResponse audioRes = HttpRequest.get(VIDEO_INFO.audioBaseUrl) .header("Referer", VIDEO_URL) .header("Range", "bytes=0-" + VIDEO_INFO.audioSize) .header("User-Agent", USER_AGENT) .execute(); audioRes.writeBody(audioFile); System.out.println("--------------音频文件下载完成--------------"); } mergeFiles(videoFile,audioFile); } /** * @Author 王志亭 * @Description 合并音视频 * @Date 20-12-31 14:46:14 * @param videoFile : * @param audioFile : * @return void */ private static void mergeFiles(File videoFile,File audioFile){ System.out.println("--------------开始合并音视频--------------"); String outFile = SAVE_PATH + File.separator + VIDEO_INFO.videoName + ".mp4"; List<String> commend = new ArrayList<>(); commend.add(FFMPEG_PATH); commend.add("-i"); commend.add(videoFile.getAbsolutePath()); commend.add("-i"); commend.add(audioFile.getAbsolutePath()); commend.add("-vcodec"); commend.add("copy"); commend.add("-acodec"); commend.add("copy"); commend.add(outFile); ProcessBuilder builder = new ProcessBuilder(); builder.command(commend); try { builder.inheritIO().start().waitFor(); System.out.println("--------------音视频合并完成--------------"); videoFile.delete(); audioFile.delete(); } catch (InterruptedException | IOException e) { System.err.println("音视频合并失败!"); e.printStackTrace(); } } } class VideoInfo{ // 真实项目中不推荐直接使用`public`哦😯 public String videoName; public JSONObject videoInfo; public String videoBaseUrl; public String audioBaseUrl; public String videoBaseRange; public String audioBaseRange; public String videoSize; public String audioSize; }