使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字
1、首先需要下载FFmpeg;
2、Gradle依赖

def void forceVersion(details, group, version) { if (details.requested.group == group) { details.useVersion version } } def void forceVersion(details, group, name, version) { if (details.requested.group == group && details.requested.name == name) { details.useVersion version } } allprojects { p -> group = 'com.my.spider' version = '1.0.0' apply plugin: 'java' apply plugin: 'maven' apply plugin: 'maven-publish' [compileJava, compileTestJava]*.options*.encoding = 'UTF-8' jar.doFirst { manifest { def manifestFile = "${projectDir}/META-INF/MANIFEST.MF" if (new File(manifestFile).exists()) from (manifestFile) attributes 'Implementation-Title':p.name if (p.version.endsWith('-SNAPSHOT')) { attributes 'Implementation-Version': p.version + '-' + p.ext.Timestamp } else { attributes 'Implementation-Version': p.version } attributes 'Implementation-BuildDateTime':new Date() } } javadoc { options { encoding 'UTF-8' charSet 'UTF-8' author false version true links 'http://docs.oracle.com/javase/8/docs/api/index.html' memberLevel = org.gradle.external.javadoc.JavadocMemberLevel.PRIVATE } } if (System.env.uploadArchives) { build.dependsOn publish } buildscript { repositories { mavenCentral() } dependencies {classpath 'org.springframework.boot:spring-boot-gradle-plugin:1.5.14.RELEASE' } } afterEvaluate {Project project -> if (project.pluginManager.hasPlugin('java')) { configurations.all { resolutionStrategy.eachDependency {DependencyResolveDetails details -> forceVersion details, 'org.springframework.boot', '1.4.1.RELEASE' forceVersion details, 'org.slf4j', '1.7.21' forceVersion details, 'org.springframework', '4.3.3.RELEASE' } exclude module:'slf4j-log4j12' exclude module:'log4j' } dependencies {testCompile 'junit:junit:4.12' } } } repositories { mavenCentral() } // 时间戳:年月日时分 p.ext.Timestamp = new Date().format('yyyyMMddHHmm') // Build Number p.ext.BuildNumber = System.env.BUILD_NUMBER if (p.ext.BuildNumber == null || "" == p.ext.BuildNumber) { p.ext.BuildNumber = 'x' } } task zipSources(type: Zip) { description '压缩源代码' project.ext.zipSourcesFile = project.name + '-' + project.version + '-' + project.ext.Timestamp + '.' + project.ext.BuildNumber + '-sources.zip' archiveName = project.ext.zipSourcesFile includeEmptyDirs = false from project.projectDir exclude '**/.*' exclude 'build/*' allprojects.each { p -> exclude '**/' + p.name + '/bin/*' exclude '**/' + p.name + '/build/*' exclude '**/' + p.name + '/data/*' exclude '**/' + p.name + '/work/*' exclude '**/' + p.name + '/logs/*' } } def CopySpec appCopySpec(Project prj, dstname = null) { if (!dstname) { dstname = prj.name } return copySpec{ // Fat jar from (prj.buildDir.toString() + '/libs/' + prj.name + '-' + project.version + '.jar') { into dstname } // Configs from (prj.projectDir.toString() + '/config/examples') { into dstname + '/config' } // Windows start script from (prj.projectDir.toString() + '/' + prj.name + '.bat') { into dstname } // Unix conf script from (prj.projectDir.toString() + '/' + prj.name + '.conf') { into dstname rename prj.name, prj.name + '-' + project.version } } } task zipSetup(type: Zip, dependsOn: subprojects.build) { description '制作安装包' project.ext.zipSetupFile = project.name + '-' + project.version + '-' + project.ext.Timestamp + '.' + project.ext.BuildNumber + '-setup.zip' archiveName = project.name + '-' + project.version + '-' + project.ext.Timestamp + '.' + project.ext.BuildNumber + '-setup.zip' with appCopySpec(project(':spider-demo')) } import java.security.MessageDigest def generateMD5(final file) { MessageDigest digest = MessageDigest.getInstance("MD5") file.withInputStream(){is-> byte[] buffer = new byte[8192] int read = 0 while( (read = is.read(buffer)) > 0) { digest.update(buffer, 0, read); } } byte[] md5sum = digest.digest() BigInteger bigInt = new BigInteger(1, md5sum) return bigInt.toString(16) } task md5(dependsOn: [zipSetup, zipSources]) << { String md5_setup = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSetupFile)); String md5_sources = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSourcesFile)); println project.ext.zipSetupFile + '=' + md5_setup println project.ext.zipSourcesFile + '=' + md5_sources def newFile = new File("${projectDir}/build/distributions/" + project.name + '-' + project.version + '-' + project.ext.Timestamp + '.' + project.ext.BuildNumber + '-md5.txt') PrintWriter printWriter = newFile.newPrintWriter() printWriter.println project.ext.zipSetupFile + '=' + md5_setup printWriter.println project.ext.zipSourcesFile + '=' + md5_sources printWriter.flush() printWriter.close() } build.dependsOn subprojects.build, zipSetup, zipSources, md5
工程组件gradle依赖: 语音识别使用 百度api;需引入 compile 'com.baidu.aip:java-sdk:3.2.1'
apply plugin: 'spring-boot'
apply plugin: 'application'
distributions {
main {
contents {
from ("${projectDir}/config/examples") {
into "config"
}
}
}
}
distTar.enabled = false
springBoot {
executable = true
mainClass = 'com.my.ai.Application'
}
dependencies {
compile 'org.springframework.boot:spring-boot-starter-web:1.4.0.RELEASE'
compile 'dom4j:dom4j:1.6.1'
compile 'commons-httpclient:commons-httpclient:3.1'
compileOnly 'com.h2database:h2:1.4.191'
compile 'javax.cache:cache-api:1.0.0'
compile 'org.jboss.resteasy:resteasy-jaxrs:3.0.14.Final'
compile 'org.jboss.resteasy:resteasy-client:3.0.14.Final'
// Axis
compile 'axis:axis:1.4'
compile 'org.jsoup:jsoup:1.10.1'
compile 'com.alibaba:fastjson:1.2.21'
compile 'com.baidu.aip:java-sdk:3.2.1'
}
3、视频抽取音频服务“

package com.my.ai.service; import java.io.BufferedReader; import java.io.File; import java.io.InputStreamReader; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; //视频抽取音频 @Service public class ExtractAudioService { public static Logger logger = LoggerFactory.getLogger(ExtractAudioService.class); public static void main(String[] args) { new ExtractAudioService().getAudioFromVideo("E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\氧化还原反应中电子转移的方向和数目的表示方法.mp4", "D:\\ffmpeg4.2\\bin\\ffmpeg.exe"); } public String getAudioFromVideo(String videoPath,String ffmpegPath) { File video = new File(videoPath); if(video.exists() && video.isFile()){ String format = "wav"; String outPath = videoPath.substring(0,videoPath.lastIndexOf(".")) + ".wav"; processCmd(videoPath, ffmpegPath, format, outPath); return outPath; } return null; } //D:\ffmpeg4.2\bin\ffmpeg.exe -i 氧化还原反应中电子转移的方向和数目的表示方法.mp4 -f wav -vn -y 3.wav public String processCmd(String inputPath,String ffmpegPath,String format,String outPath) { List<String> commend = new java.util.ArrayList<String>(); commend.add(ffmpegPath); commend.add("-i"); commend.add(inputPath); commend.add("-y"); commend.add("-vn"); commend.add("-f"); commend.add(format); commend.add(outPath); try { ProcessBuilder builder = new ProcessBuilder(); builder.command(commend); builder.redirectErrorStream(true); Process p = builder.start(); // 1. start BufferedReader buf = null; // 保存ffmpeg的输出结果流 String line = null; // read the standard output buf = new BufferedReader(new InputStreamReader(p.getInputStream())); StringBuffer sb = new StringBuffer(); while ((line = buf.readLine()) != null) { System.out.println(line); sb.append(line); continue; } p.waitFor();// 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行 // 1. end return sb.toString(); } catch (Exception e) { // System.out.println(e); return null; } } }
4、音频切段:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | package com.my.ai.service; import java.io.BufferedReader; import java.io.File; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; @Service public class CutService { public static Logger logger = LoggerFactory.getLogger(CutService. class ); public List<String> cutFile(String media_path, String ffmpeg_path) { List<String> audios = new ArrayList<>(); int mediaTime = getMediaTime(media_path, ffmpeg_path); int num = mediaTime / 59 ; int lastNum = mediaTime % 59 ; System.out.println(mediaTime + "|" + num + "|" + lastNum); int length = 59 ; File file = new File(media_path); String filename = file.getName(); for ( int i = 0 ; i < num; i++) { String outputPath = file.getParent() + File.separator + i + "-" +filename; processCmd(media_path, ffmpeg_path, String.valueOf(length * i) , String.valueOf(length), outputPath); audios.add(outputPath); } if (lastNum > 0 ) { String outputPath = file.getParent() + File.separator + num + "-" +filename; processCmd(media_path, ffmpeg_path, String.valueOf(length * num) , String.valueOf(lastNum), outputPath); audios.add(outputPath); } return audios; } /** * 获取视频总时间 * * @param viedo_path 视频路径 * @param ffmpeg_path ffmpeg路径 * @return */ public int getMediaTime(String video_path, String ffmpeg_path) { List<String> commands = new java.util.ArrayList<String>(); commands.add(ffmpeg_path); commands.add( "-i" ); commands.add(video_path); try { ProcessBuilder builder = new ProcessBuilder(); builder.command(commands); final Process p = builder.start(); // 从输入流中读取视频信息 BufferedReader br = new BufferedReader( new InputStreamReader(p.getErrorStream())); StringBuffer sb = new StringBuffer(); String line = "" ; while ((line = br.readLine()) != null ) { sb.append(line); } System.out.println(sb.toString()); br.close(); // 从视频信息中解析时长 String regexDuration = "Duration: (.*?), bitrate: (\\d*) kb\\/s" ; Pattern pattern = Pattern.compile(regexDuration); Matcher m = pattern.matcher(sb.toString()); if (m.find()) { int time = getTimelen(m.group( 1 )); System.out .println(video_path + ",视频时长:" + time + ",比特率:" + m.group( 2 ) + "kb/s" ); return time; } } catch (Exception e) { e.printStackTrace(); } return 0 ; } // 格式:"00:00:10.68" public int getTimelen(String timelen) { int min = 0 ; String strs[] = timelen.split( ":" ); if (strs[ 0 ].compareTo( "0" ) > 0 ) { min += Integer.valueOf(strs[ 0 ]) * 60 * 60 ; // 秒 } if (strs[ 1 ].compareTo( "0" ) > 0 ) { min += Integer.valueOf(strs[ 1 ]) * 60 ; } if (strs[ 2 ].compareTo( "0" ) > 0 ) { min += Math.round(Float.valueOf(strs[ 2 ])); } return min; } //D:\ffmpeg4.2\bin\ffmpeg.exe -i 123.pcm -ss 0 -t 59 1-123.wav public String processCmd(String inputPath,String ffmpegPath, String startTime,String length,String outputPath) { List<String> commend = new java.util.ArrayList<String>(); commend.add(ffmpegPath); commend.add( "-i" ); commend.add(inputPath); commend.add( "-ss" ); commend.add(startTime); commend.add( "-t" ); commend.add(length); commend.add(outputPath); try { ProcessBuilder builder = new ProcessBuilder(); builder.command(commend); builder.redirectErrorStream( true ); Process p = builder.start(); // 1. start BufferedReader buf = null ; // 保存ffmpeg的输出结果流 String line = null ; // read the standard output buf = new BufferedReader( new InputStreamReader(p.getInputStream())); StringBuffer sb = new StringBuffer(); while ((line = buf.readLine()) != null ) { System.out.println(line); sb.append(line); continue ; } p.waitFor(); // 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行 // 1. end return sb.toString(); } catch (Exception e) { System.out.println(e); return null ; } } //ffmpeg -y -i 16k.wav -acodec pcm_s16le -f s16le -ac 1 -ar 16000 16k.pcm public static String processWavToPcm(String inputPath,String ffmpegPath,String outputPath) { List<String> commend = new java.util.ArrayList<String>(); commend.add(ffmpegPath); commend.add( "-i" ); commend.add(inputPath); commend.add( "-acodec" ); commend.add( "pcm_s16le" ); commend.add( "-f" ); commend.add( "s16le" ); commend.add( "-ac" ); commend.add( "1" ); commend.add( "-ar" ); commend.add( "16000" ); commend.add(outputPath); try { ProcessBuilder builder = new ProcessBuilder(); builder.command(commend); builder.redirectErrorStream( true ); Process p = builder.start(); // 1. start BufferedReader buf = null ; // 保存ffmpeg的输出结果流 String line = null ; // read the standard output buf = new BufferedReader( new InputStreamReader(p.getInputStream())); StringBuffer sb = new StringBuffer(); while ((line = buf.readLine()) != null ) { System.out.println(line); sb.append(line); continue ; } p.waitFor(); // 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行 // 1. end return outputPath; //sb.toString(); } catch (Exception e) { System.out.println(e); return null ; } } public static void main(String[] args) { List<String> audios = new CutService().cutFile( "E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\氧化还原反应中电子转移的方向和数目的表示方法.wav" , "D:\\ffmpeg4.2\\bin\\ffmpeg.exe" ); System.out.println(audios.size()); for (String wavPath : audios) { String out = wavPath.substring( 0 ,wavPath.lastIndexOf( "." )) + ".pcm" ; processWavToPcm(wavPath, "D:\\ffmpeg4.2\\bin\\ffmpeg.exe" , out); } } } |
5、音频格式转换,便于进行语音识别,代码如上:
6、调用sdk,获取识别结果:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | package com.my.ai.service; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import com.baidu.aip.speech.AipSpeech; @Service public class TokenService { public static Logger logger = LoggerFactory.getLogger(TokenService. class ); //设置APPID/AK/SK public static final String APP_ID = "***" ; public static final String API_KEY = "***" ; public static final String SECRET_KEY = "***" ; static AipSpeech client = null ; static { if (client == null ) { client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY); } } public static void main(String[] args) { getResult( "E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\0-氧化还原反应中电子转移的方向和数目的表示方法.pcm" ); } public static String getResult(String file) { // 可选:设置网络连接参数 client.setConnectionTimeoutInMillis( 2000 ); client.setSocketTimeoutInMillis( 60000 ); // 可选:设置代理服务器地址, http和socket二选一,或者均不设置 //client.setHttpProxy("proxy_host", proxy_port); // 设置http代理 //client.setSocketProxy("proxy_host", proxy_port); // 设置socket代理 JSONObject res = client.asr(file, "pcm" , 16000 , null ); //System.out.println(res.toString(2)); System.out.println(res.get( "result" ).toString()); return res.get( "result" ).toString(); } } |
7、结果写入文件:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | package com.my.ai.service; import java.io.BufferedOutputStream; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.RandomAccessFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; @Service public class FileService { public static Logger logger = LoggerFactory.getLogger(FileService. class ); //最慢 public static void writeFile1(String file,String content) throws IOException { FileOutputStream out = null ; out = new FileOutputStream( new File(file)); long begin = System.currentTimeMillis(); out.write(content.getBytes()); out.close(); long end = System.currentTimeMillis(); System.out.println( "FileOutputStream执行耗时:" + (end - begin) + " 毫秒" ); } //中 public static void writeFile2(String file,String content) throws IOException{ FileWriter fw = null ; fw = new FileWriter(file); long begin3 = System.currentTimeMillis(); fw.write(content); fw.close(); long end3 = System.currentTimeMillis(); System.out.println( "FileWriter执行耗时:" + (end3 - begin3) + " 毫秒" ); } //最快 public static void writeFile3(String file,String content) throws IOException{ FileOutputStream outSTr = null ; BufferedOutputStream buff = null ; outSTr = new FileOutputStream( new File(file)); buff = new BufferedOutputStream(outSTr); long begin0 = System.currentTimeMillis(); buff.write(content.getBytes()); buff.flush(); buff.close(); long end0 = System.currentTimeMillis(); System.out.println( "BufferedOutputStream执行耗时:" + (end0 - begin0) + " 毫秒" ); } public static void main(String[] args) { for ( int i = 0 ; i < 7 ; i++) { String result = TokenService.getResult( "E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\" + i + "-氧化还原反应中电子转移的方向和数目的表示方法.pcm" ); appendFile2( "E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\氧化还原反应中电子转移的方向和数目的表示方法.txt" , result+ "\r\n" ); } } public static void appendFile1(String file, String conent) { BufferedWriter out = null ; try { out = new BufferedWriter( new OutputStreamWriter( new FileOutputStream(file, true ))); out.write(conent); } catch (Exception e) { e.printStackTrace(); } finally { try { if (out != null ) { out.close(); } } catch (IOException e) { e.printStackTrace(); } } } /** * 追加文件:使用FileWriter * * @param fileName * @param content */ public static void appendFile2(String fileName, String content) { FileWriter writer = null ; try { // 打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件 writer = new FileWriter(fileName, true ); writer.write(content); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null ) { writer.close(); } } catch (IOException e) { e.printStackTrace(); } } } /** * 追加文件:使用RandomAccessFile * * @param fileName 文件名 * @param content 追加的内容 */ public static void appendFile3(String fileName, String content) { RandomAccessFile randomFile = null ; try { // 打开一个随机访问文件流,按读写方式 randomFile = new RandomAccessFile(fileName, "rw" ); // 文件长度,字节数 long fileLength = randomFile.length(); // 将写文件指针移到文件尾。 randomFile.seek(fileLength); randomFile.writeBytes(content); } catch (IOException e) { e.printStackTrace(); } finally { if (randomFile != null ) { try { randomFile.close(); } catch (IOException e) { e.printStackTrace(); } } } } } |
8、测试:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | package com.my.ai.test; import java.util.List; import com.my.ai.service.CutService; import com.my.ai.service.ExtractAudioService; import com.my.ai.service.FileService; import com.my.ai.service.TokenService; public class TestService { public static void main(String[] args) { ExtractAudioService audioService = new ExtractAudioService(); String outPath = audioService.getAudioFromVideo( "G:\\Youku Files\\transcode\\化学高中必修1__第2章第3节·氧化还原反应_标清.mp4" , "D:\\ffmpeg4.2\\bin\\ffmpeg.exe" ); List<String> audios = new CutService().cutFile(outPath, "D:\\ffmpeg4.2\\bin\\ffmpeg.exe" ); for (String wavPath : audios) { String out = wavPath.substring( 0 ,wavPath.lastIndexOf( "." )) + ".pcm" ; String outPcm = CutService.processWavToPcm(wavPath, "D:\\ffmpeg4.2\\bin\\ffmpeg.exe" , out); String result = TokenService.getResult(outPcm); FileService.appendFile2( "G:\\Youku Files\\transcode\\化学高中必修1__第2章第3节·氧化还原反应_标清.mp4-字幕.txt" , result+ "\r\n" ); } } } |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人