如果不是特别熟悉C/C++,又要使用FFmpeg.API处理一些简单的音视频业务,那么可以使用org.bytedeco:ffmpeg-platform,下面记录一下使用ffmpeg-platform音频解码的方法。
1. 代码实现
下面是一个将MP4中的音频数据解码出来并重采样成S16格式的例子:
public class DecodeAudio { public static void main(String[] args) throws IOException { decode_audio("t.mp4", "t.pcm"); } public static void decode_audio(String input, String output) throws IOException { AVFormatContext ifmt_ctx = new AVFormatContext(null); AVCodecContext ic = null; SwrContext swr_ctx = null; AVFrame frame = null; AVPacket pkt = null; PointerPointer<BytePointer> dst_data = new PointerPointer<>(1); IntPointer dst_linesize = new IntPointer(1); AVChannelLayout dst_ch_layout = new AVChannelLayout(); dst_ch_layout.nb_channels(2); dst_ch_layout.order(AV_CHANNEL_ORDER_NATIVE); dst_ch_layout.u_mask(AV_CH_LAYOUT_STEREO); int dst_rate = 44100, dst_nb_channels = 0, dst_sample_fmt = AV_SAMPLE_FMT_S16; long dst_nb_samples = 0, max_dst_nb_samples = 0; try (OutputStream os = new FileOutputStream(output)) { int ret = avformat_open_input(ifmt_ctx, input, null, null); if (ret < 0) { throw new IOException(ret + ":avformat_open_input error"); } ret = avformat_find_stream_info(ifmt_ctx, (AVDictionary) null); if (ret < 0) { throw new IOException(ret + ":avformat_find_stream_info error"); } int nb_streams = ifmt_ctx.nb_streams(); int audio_index = -1; for (int i = 0; i < nb_streams; i++) { if (ifmt_ctx.streams(i).codecpar().codec_type() == AVMEDIA_TYPE_AUDIO) { audio_index = i; break; } } if (audio_index == -1) { throw new IOException("audio index = -1"); } AVCodec codec = avcodec_find_decoder(ifmt_ctx.streams(audio_index).codecpar().codec_id()); if (Objects.isNull(codec)) { throw new IOException("avcodec_find_decoder error"); } ic = avcodec_alloc_context3(codec); if (Objects.isNull(ic)) { throw new IOException("avcodec_alloc_context3 error"); } /* Copy codec parameters from input stream to output codec context */ ret = avcodec_parameters_to_context(ic, ifmt_ctx.streams(audio_index).codecpar()); if (ret < 0) { throw new IOException(ret + ":avcodec_parameters_to_context error"); } ret = avcodec_open2(ic, codec, (AVDictionary) null); if (ret < 0) { throw new IOException(ret + "avcodec_open2 error"); } swr_ctx = swr_alloc(); if (Objects.isNull(swr_ctx)) { throw new IOException("swr_alloc error"); } av_opt_set_chlayout(swr_ctx, "in_chlayout", ic.ch_layout(), 0); av_opt_set_int(swr_ctx, "in_sample_rate", ic.sample_rate(), 0); av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ic.sample_fmt(), 0); av_opt_set_chlayout(swr_ctx, "out_chlayout", dst_ch_layout, 0); av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0); av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0); ret = swr_init(swr_ctx); if (ret < 0) { throw new IOException(ret + ":swr_init error"); } frame = av_frame_alloc(); if (Objects.isNull(frame)) { throw new IOException("av_frame_alloc"); } pkt = av_packet_alloc(); if (Objects.isNull(pkt)) { throw new IOException("av_packet_alloc error"); } dst_nb_samples = (int) av_rescale_rnd(ic.frame_size(), dst_rate, ic.sample_rate(), AV_ROUND_UP); max_dst_nb_samples = dst_nb_samples; dst_nb_channels = dst_ch_layout.nb_channels(); /* buffer is going to be directly written to a rawaudio file, no alignment */ ret = av_samples_alloc_array_and_samples(dst_data, dst_linesize, dst_nb_channels, (int) dst_nb_samples, dst_sample_fmt, 0); if (ret < 0) { throw new IOException(ret + ":av_samples_alloc_array_and_samples error"); } int dst_bufsize; byte[] buffer; while (true) { ret = av_read_frame(ifmt_ctx, pkt); if (ret == AVERROR_EAGAIN() || ret == AVERROR_EOF) { break; } else if (ret < 0) { throw new IOException(ret + ":av_read_frame error"); } if (pkt.stream_index() != audio_index) { continue; } ret = avcodec_send_packet(ic, pkt); if (ret < 0) { throw new IOException(ret + ":avcodec_send_packet error"); } while (true) { ret = avcodec_receive_frame(ic, frame); if (ret == AVERROR_EAGAIN() || ret == AVERROR_EOF) { break; } else if (ret < 0) { throw new IOException(ret + ":avcodec_receive_frame error"); } dst_nb_samples = avutil.av_rescale_rnd( swresample.swr_get_delay(swr_ctx, ic.sample_rate()) + frame.nb_samples(), ic.sample_rate(), ic.sample_rate(), AV_ROUND_UP); if (dst_nb_samples > max_dst_nb_samples) { av_freep(dst_data.get()); ret = av_samples_alloc(dst_data, dst_linesize, dst_nb_channels, (int) dst_nb_samples, dst_sample_fmt, 1); if (ret < 0) { break; } max_dst_nb_samples = dst_nb_samples; } /* convert to destination format */ ret = swr_convert(swr_ctx, dst_data, (int) dst_nb_samples, frame.data(), ic.frame_size()); if (ret < 0) { throw new IOException(ret + ":swr_convert error"); } dst_bufsize = av_samples_get_buffer_size(dst_linesize, dst_nb_channels, ret, dst_sample_fmt, 1); if (dst_bufsize < 0) { throw new IOException(ret + ":av_samples_get_buffer_size error"); } buffer = new byte[dst_bufsize]; dst_data.get(BytePointer.class, 0).get(buffer); os.write(buffer); System.out.printf("nb_samples = %d, dst_bufsize = %d\n", ret, dst_bufsize); } } String fmt = "s16le"; //byte[] buf = new byte[64]; //ret = av_channel_layout_describe(dst_ch_layout, buf, buf.length); System.out.printf( "Resampling succeeded. Play the output file with the command:\n" + "ffplay -f %s -channel_layout %s -channels %d -ar %d %s\n", fmt, AV_CH_LAYOUT_STEREO/* new String(buf, 0, ret) */, dst_nb_channels, dst_rate, output); } finally { dst_data.close(); dst_linesize.close(); if (Objects.nonNull(pkt)) { av_packet_free(pkt); } if (Objects.nonNull(frame)) { av_frame_free(frame); } if (Objects.nonNull(ic)) { avcodec_free_context(ic); } if (Objects.nonNull(swr_ctx)) { swr_free(swr_ctx); } avformat_close_input(ifmt_ctx); } } }
2. 效果展示
转化得到的PCM数据,可以使用ffplay播放,命令如下:
ffplay -f s16le -channel_layout 3 -channels 2 -ar 44100 t.pcm
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· .NET10 - 预览版1新功能体验(一)