使用FFmpeg进行视频抽取音频,之后进行语音识别转为文字

1、首先需要下载FFmpeg;

2、Gradle依赖

复制代码
def void forceVersion(details, group, version) {
    if (details.requested.group == group) {
        details.useVersion version
    }
}

def void forceVersion(details, group, name, version) {
    if (details.requested.group == group && details.requested.name == name) {
        details.useVersion version
    }
}

allprojects { p ->
    group = 'com.my.spider'
    version = '1.0.0'
    
    apply plugin: 'java'
    apply plugin: 'maven'
    apply plugin: 'maven-publish'
    
    [compileJava, compileTestJava]*.options*.encoding = 'UTF-8'

    jar.doFirst {
        manifest {
            def manifestFile = "${projectDir}/META-INF/MANIFEST.MF"
            if (new File(manifestFile).exists())
                from (manifestFile)
            
            attributes 'Implementation-Title':p.name
            if (p.version.endsWith('-SNAPSHOT')) {
                attributes 'Implementation-Version': p.version + '-' + p.ext.Timestamp
            } else {
                attributes 'Implementation-Version': p.version
            }
            attributes 'Implementation-BuildDateTime':new Date()
        }
    }
    
    javadoc {
        options {
            encoding 'UTF-8'
            charSet 'UTF-8'
            author false
            version true
            links 'http://docs.oracle.com/javase/8/docs/api/index.html'
            memberLevel = org.gradle.external.javadoc.JavadocMemberLevel.PRIVATE
        }
    }

    if (System.env.uploadArchives) {
        build.dependsOn publish
    }

    buildscript {
        repositories {
            mavenCentral()
        }
        dependencies {classpath 'org.springframework.boot:spring-boot-gradle-plugin:1.5.14.RELEASE' }
    }
    
    afterEvaluate {Project  project -> 
        if (project.pluginManager.hasPlugin('java')) {
            configurations.all {
                resolutionStrategy.eachDependency {DependencyResolveDetails details -> 
                    forceVersion details, 'org.springframework.boot', '1.4.1.RELEASE'
                    forceVersion details, 'org.slf4j', '1.7.21'
                    forceVersion details, 'org.springframework', '4.3.3.RELEASE'
                }

                exclude module:'slf4j-log4j12'
                exclude module:'log4j'
            }

            dependencies {testCompile 'junit:junit:4.12' }
        }
    }

    repositories {
        mavenCentral()
    }
    
    // 时间戳:年月日时分
    p.ext.Timestamp = new Date().format('yyyyMMddHHmm')
    // Build Number
    p.ext.BuildNumber = System.env.BUILD_NUMBER
    if (p.ext.BuildNumber == null || "" == p.ext.BuildNumber) {
        p.ext.BuildNumber = 'x'
    }    
}

task zipSources(type: Zip) {
    description '压缩源代码'
    project.ext.zipSourcesFile = project.name + '-' + project.version + '-' +  project.ext.Timestamp + '.' + project.ext.BuildNumber + '-sources.zip' 
    archiveName = project.ext.zipSourcesFile
    includeEmptyDirs = false
    
    from project.projectDir
    
    exclude '**/.*'
    exclude 'build/*'
    allprojects.each { p ->
        exclude '**/' + p.name + '/bin/*'
        exclude '**/' + p.name + '/build/*'
        exclude '**/' + p.name + '/data/*'
        exclude '**/' + p.name + '/work/*'
        exclude '**/' + p.name + '/logs/*'    
    }
}

def CopySpec appCopySpec(Project prj, dstname = null) {
    if (!dstname) { dstname = prj.name }
    return copySpec{
        // Fat jar
        from (prj.buildDir.toString() + '/libs/' + prj.name + '-' + project.version + '.jar') {
            into dstname
        }        
    
        // Configs
        from (prj.projectDir.toString() + '/config/examples') {
            into dstname + '/config'
        }
    
        // Windows start script
        from (prj.projectDir.toString() + '/' + prj.name + '.bat') {
            into dstname
        }
        
        // Unix conf script
        from (prj.projectDir.toString() + '/' + prj.name + '.conf') {
            into dstname
            rename prj.name, prj.name + '-' + project.version
        }
    }    
}


task zipSetup(type: Zip, dependsOn: subprojects.build) { 
    description '制作安装包' 
    project.ext.zipSetupFile = project.name + '-' + project.version + '-' +  project.ext.Timestamp + '.' + project.ext.BuildNumber + '-setup.zip' 
    archiveName = project.name + '-' + project.version + '-' +  project.ext.Timestamp + '.' + project.ext.BuildNumber + '-setup.zip'
    
    with appCopySpec(project(':spider-demo'))
}

import java.security.MessageDigest

def generateMD5(final file) {
    MessageDigest digest = MessageDigest.getInstance("MD5")
    file.withInputStream(){is->
        byte[] buffer = new byte[8192]
        int read = 0
        while( (read = is.read(buffer)) > 0) {
            digest.update(buffer, 0, read);
        }
    }
    byte[] md5sum = digest.digest()
    BigInteger bigInt = new BigInteger(1, md5sum)
    return bigInt.toString(16)
}

task md5(dependsOn: [zipSetup, zipSources]) << {
    String md5_setup = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSetupFile));
    String md5_sources = generateMD5(file("${projectDir}/build/distributions/" + project.ext.zipSourcesFile));
    println project.ext.zipSetupFile + '=' + md5_setup
    println project.ext.zipSourcesFile + '=' + md5_sources
    
    def newFile = new File("${projectDir}/build/distributions/" 
                    + project.name + '-' + project.version + '-' +  project.ext.Timestamp + '.' + project.ext.BuildNumber + '-md5.txt')
    PrintWriter printWriter = newFile.newPrintWriter()
    printWriter.println project.ext.zipSetupFile + '=' + md5_setup
    printWriter.println project.ext.zipSourcesFile + '=' + md5_sources
    printWriter.flush()
    printWriter.close()
}

build.dependsOn subprojects.build, zipSetup, zipSources, md5
bulid.gradle
复制代码

工程组件gradle依赖: 语音识别使用 百度api;需引入 compile 'com.baidu.aip:java-sdk:3.2.1'

复制代码
apply plugin: 'spring-boot'
apply plugin: 'application'

distributions {
    main {
        contents {
            from ("${projectDir}/config/examples") {
                into "config"
            }
        }
    }
}

distTar.enabled = false

springBoot {
    executable = true
    mainClass = 'com.my.ai.Application'
}

dependencies {
    compile 'org.springframework.boot:spring-boot-starter-web:1.4.0.RELEASE'
    compile 'dom4j:dom4j:1.6.1'
    compile 'commons-httpclient:commons-httpclient:3.1'
    compileOnly 'com.h2database:h2:1.4.191'
    compile 'javax.cache:cache-api:1.0.0'
    compile 'org.jboss.resteasy:resteasy-jaxrs:3.0.14.Final'
    compile 'org.jboss.resteasy:resteasy-client:3.0.14.Final'
    // Axis
    compile 'axis:axis:1.4'
    
    compile 'org.jsoup:jsoup:1.10.1'
    
    compile 'com.alibaba:fastjson:1.2.21'
    
    compile 'com.baidu.aip:java-sdk:3.2.1'
    
}
复制代码

3、视频抽取音频服务“

复制代码
package com.my.ai.service;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

//视频抽取音频
@Service
public class ExtractAudioService {

    public static Logger logger  = LoggerFactory.getLogger(ExtractAudioService.class);
    
    public static void main(String[] args) {
        new ExtractAudioService().getAudioFromVideo("E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\氧化还原反应中电子转移的方向和数目的表示方法.mp4",
                "D:\\ffmpeg4.2\\bin\\ffmpeg.exe");
    }
    
    
    public String getAudioFromVideo(String videoPath,String ffmpegPath) {
        File video = new File(videoPath);
        if(video.exists() && video.isFile()){
            String format = "wav";
            String outPath = videoPath.substring(0,videoPath.lastIndexOf(".")) + ".wav";
            processCmd(videoPath, ffmpegPath, format, outPath);
            return outPath;
        }
        return null;
    }
    
    //D:\ffmpeg4.2\bin\ffmpeg.exe -i 氧化还原反应中电子转移的方向和数目的表示方法.mp4 -f wav -vn -y 3.wav
    public String processCmd(String inputPath,String ffmpegPath,String format,String outPath) {
        List<String> commend = new java.util.ArrayList<String>();
        commend.add(ffmpegPath);
        commend.add("-i");
        commend.add(inputPath);
        commend.add("-y");
        commend.add("-vn");
        commend.add("-f");
        commend.add(format);
        commend.add(outPath);
        try {

            ProcessBuilder builder = new ProcessBuilder();
            builder.command(commend);
            builder.redirectErrorStream(true);
            Process p = builder.start();

            // 1. start
            BufferedReader buf = null; // 保存ffmpeg的输出结果流
            String line = null;
            // read the standard output

            buf = new BufferedReader(new InputStreamReader(p.getInputStream()));

            StringBuffer sb = new StringBuffer();
            while ((line = buf.readLine()) != null) {
                System.out.println(line);
                sb.append(line);
                continue;
            }
            p.waitFor();// 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行
            // 1. end
            return sb.toString();
        } catch (Exception e) {
//                    System.out.println(e);    
            return null;
        }
    }

}
ExtractAudioService
复制代码

4、音频切段:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
package com.my.ai.service;
 
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
 
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
 
@Service
public class CutService {
 
    public static Logger logger = LoggerFactory.getLogger(CutService.class);
 
    public List<String> cutFile(String media_path, String ffmpeg_path) {
 
        List<String> audios = new ArrayList<>();
        int mediaTime = getMediaTime(media_path, ffmpeg_path);
        int num = mediaTime / 59;
        int lastNum = mediaTime % 59;
        System.out.println(mediaTime +"|" + num + "|"+ lastNum);
        int length = 59;
        File file = new File(media_path);
        String filename = file.getName();
        for (int i = 0; i < num; i++) {
            String outputPath = file.getParent() + File.separator + i + "-"+filename;
            processCmd(media_path, ffmpeg_path, String.valueOf(length * i) ,
                    String.valueOf(length), outputPath);
            audios.add(outputPath);
        }
        if(lastNum > 0) {
            String outputPath = file.getParent() + File.separator + num + "-"+filename;
            processCmd(media_path, ffmpeg_path, String.valueOf(length * num) ,
                    String.valueOf(lastNum), outputPath);
            audios.add(outputPath);
        }
        return audios;
    }
 
    /**
     * 获取视频总时间
     *
     * @param viedo_path  视频路径
     * @param ffmpeg_path ffmpeg路径
     * @return
     */
    public int getMediaTime(String video_path, String ffmpeg_path) {
        List<String> commands = new java.util.ArrayList<String>();
        commands.add(ffmpeg_path);
        commands.add("-i");
        commands.add(video_path);
        try {
            ProcessBuilder builder = new ProcessBuilder();
            builder.command(commands);
            final Process p = builder.start();
 
            // 从输入流中读取视频信息
            BufferedReader br = new BufferedReader(new InputStreamReader(p.getErrorStream()));
            StringBuffer sb = new StringBuffer();
            String line = "";
            while ((line = br.readLine()) != null) {
                sb.append(line);
            }
            System.out.println(sb.toString());
            br.close();
 
            // 从视频信息中解析时长
            String regexDuration = "Duration: (.*?), bitrate: (\\d*) kb\\/s";
            Pattern pattern = Pattern.compile(regexDuration);
            Matcher m = pattern.matcher(sb.toString());
            if (m.find()) {
                int time = getTimelen(m.group(1));
                System.out
                        .println(video_path + ",视频时长:" + time + ",比特率:" + m.group(2) + "kb/s");
                return time;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return 0;
    }
 
    // 格式:"00:00:10.68"
    public int getTimelen(String timelen) {
        int min = 0;
        String strs[] = timelen.split(":");
        if (strs[0].compareTo("0") > 0) {
            min += Integer.valueOf(strs[0]) * 60 * 60;// 秒
        }
        if (strs[1].compareTo("0") > 0) {
            min += Integer.valueOf(strs[1]) * 60;
        }
        if (strs[2].compareTo("0") > 0) {
            min += Math.round(Float.valueOf(strs[2]));
        
        return min;
    }
    
    //D:\ffmpeg4.2\bin\ffmpeg.exe -i 123.pcm -ss 0 -t 59 1-123.wav
    public String processCmd(String inputPath,String ffmpegPath,
            String startTime,String length,String outputPath) {
        List<String> commend = new java.util.ArrayList<String>();
        commend.add(ffmpegPath);
        commend.add("-i");
        commend.add(inputPath);
        commend.add("-ss");
        commend.add(startTime);
        commend.add("-t");
        commend.add(length);
        commend.add(outputPath);
        try {
 
            ProcessBuilder builder = new ProcessBuilder();
            builder.command(commend);
            builder.redirectErrorStream(true);
            Process p = builder.start();
 
            // 1. start
            BufferedReader buf = null; // 保存ffmpeg的输出结果流
            String line = null;
            // read the standard output
 
            buf = new BufferedReader(new InputStreamReader(p.getInputStream()));
 
            StringBuffer sb = new StringBuffer();
            while ((line = buf.readLine()) != null) {
                System.out.println(line);
                sb.append(line);
                continue;
            }
            p.waitFor();// 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行
            // 1. end
            return sb.toString();
        } catch (Exception e) {
            System.out.println(e);   
            return null;
        }
    }
 
    //ffmpeg -y  -i 16k.wav  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 16k.pcm
    public static String processWavToPcm(String inputPath,String ffmpegPath,String outputPath) {
        List<String> commend = new java.util.ArrayList<String>();
        commend.add(ffmpegPath);
        commend.add("-i");
        commend.add(inputPath);
        commend.add("-acodec");
        commend.add("pcm_s16le");
        commend.add("-f");
        commend.add("s16le");
        commend.add("-ac");
        commend.add("1");
        commend.add("-ar");
        commend.add("16000");
        commend.add(outputPath);
        try {
 
            ProcessBuilder builder = new ProcessBuilder();
            builder.command(commend);
            builder.redirectErrorStream(true);
            Process p = builder.start();
 
            // 1. start
            BufferedReader buf = null; // 保存ffmpeg的输出结果流
            String line = null;
            // read the standard output
 
            buf = new BufferedReader(new InputStreamReader(p.getInputStream()));
 
            StringBuffer sb = new StringBuffer();
            while ((line = buf.readLine()) != null) {
                System.out.println(line);
                sb.append(line);
                continue;
            }
            p.waitFor();// 这里线程阻塞,将等待外部转换进程运行成功运行结束后,才往下执行
            // 1. end
            return outputPath;
                    //sb.toString();
        } catch (Exception e) {
            System.out.println(e);   
            return null;
        }
    }
 
     
     
     
    public static void main(String[] args) {
        List<String> audios = new CutService().cutFile(
                "E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\氧化还原反应中电子转移的方向和数目的表示方法.wav",
                "D:\\ffmpeg4.2\\bin\\ffmpeg.exe");
        System.out.println(audios.size());
         
        for (String wavPath : audios) {
            String out = wavPath.substring(0,wavPath.lastIndexOf(".")) + ".pcm";
            processWavToPcm(wavPath, "D:\\ffmpeg4.2\\bin\\ffmpeg.exe", out);
        }
         
    }
 
}

 

5、音频格式转换,便于进行语音识别,代码如上:

6、调用sdk,获取识别结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
package com.my.ai.service;
 
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
 
import com.baidu.aip.speech.AipSpeech;
 
@Service
public class TokenService {
 
    public static Logger logger = LoggerFactory.getLogger(TokenService.class);
     
     //设置APPID/AK/SK
    public static final String APP_ID = "***";
    public static final String API_KEY = "***";
    public static final String SECRET_KEY = "***";
    static AipSpeech client = null;
    static {
        if(client == null) {
             client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY);
        }
    }
     
    public static void main(String[] args) {
        getResult("E:\\QLDownload\\氧化还原反应中电子转移的方向和数目的表示方法\\0-氧化还原反应中电子转移的方向和数目的表示方法.pcm");
    }
     
    public static String getResult(String file) {
          
          // 可选:设置网络连接参数
          client.setConnectionTimeoutInMillis(2000);
          client.setSocketTimeoutInMillis(60000);
          // 可选:设置代理服务器地址, http和socket二选一,或者均不设置
          //client.setHttpProxy("proxy_host", proxy_port);  // 设置http代理
          //client.setSocketProxy("proxy_host", proxy_port);  // 设置socket代理
          JSONObject res = client.asr(file, "pcm", 16000, null);
          //System.out.println(res.toString(2));
          System.out.println(res.get("result").toString());
          return res.get("result").toString();
    }
     
     
     
}

  

7、结果写入文件:

 

8、测试:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
package com.my.ai.test;
 
import java.util.List;
 
import com.my.ai.service.CutService;
import com.my.ai.service.ExtractAudioService;
import com.my.ai.service.FileService;
import com.my.ai.service.TokenService;
 
public class TestService {
 
     
    public static void main(String[] args) {
        ExtractAudioService audioService = new ExtractAudioService();
        String outPath =  audioService.getAudioFromVideo("G:\\Youku Files\\transcode\\化学高中必修1__第2章第3节·氧化还原反应_标清.mp4", "D:\\ffmpeg4.2\\bin\\ffmpeg.exe");
        List<String> audios = new CutService().cutFile(outPath,"D:\\ffmpeg4.2\\bin\\ffmpeg.exe");
        for (String wavPath : audios) {
            String out = wavPath.substring(0,wavPath.lastIndexOf(".")) + ".pcm";
            String outPcm = CutService.processWavToPcm(wavPath, "D:\\ffmpeg4.2\\bin\\ffmpeg.exe", out);
            String result = TokenService.getResult(outPcm);
            FileService.appendFile2("G:\\Youku Files\\transcode\\化学高中必修1__第2章第3节·氧化还原反应_标清.mp4-字幕.txt", result+"\r\n");
        }
    }
     
}

  

 

posted @   凉城  阅读(5955)  评论(0编辑  收藏  举报
编辑推荐:
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
点击右上角即可分享
微信分享提示