随笔 - 59, 文章 - 1, 评论 - 0, 阅读 - 24152

FFmpeg：音频解码（FFmpeg 5.x 新API）（参考decode_audio.c）

如果不是特别熟悉C/C++，又要使用FFmpeg.API处理一些简单的音视频业务，那么可以使用org.bytedeco:ffmpeg-platform，下面记录一下使用ffmpeg-platform音频解码的方法。

1. 代码实现

下面是一个将MP4中的音频数据解码出来并重采样成S16格式的例子：

 public class DecodeAudio {
 
    public static void main(String[] args) throws IOException {
        decode_audio("t.mp4", "t.pcm");
    }
 
    public static void decode_audio(String input, String output) throws IOException {
        AVFormatContext ifmt_ctx = new AVFormatContext(null);
        AVCodecContext ic = null;
        SwrContext swr_ctx = null;
        AVFrame frame = null;
        AVPacket pkt = null;
        PointerPointer<BytePointer> dst_data = new PointerPointer<>(1);
        IntPointer dst_linesize = new IntPointer(1);
 
        AVChannelLayout dst_ch_layout = new AVChannelLayout();
        dst_ch_layout.nb_channels(2);
        dst_ch_layout.order(AV_CHANNEL_ORDER_NATIVE);
        dst_ch_layout.u_mask(AV_CH_LAYOUT_STEREO);
        int dst_rate = 44100, dst_nb_channels = 0, dst_sample_fmt = AV_SAMPLE_FMT_S16;
        long dst_nb_samples = 0, max_dst_nb_samples = 0;
 
        try (OutputStream os = new FileOutputStream(output)) {
            int ret = avformat_open_input(ifmt_ctx, input, null, null);
            if (ret < 0) {
                throw new IOException(ret + ":avformat_open_input error");
            }
 
            ret = avformat_find_stream_info(ifmt_ctx, (AVDictionary) null);
            if (ret < 0) {
                throw new IOException(ret + ":avformat_find_stream_info error");
            }
 
            int nb_streams = ifmt_ctx.nb_streams();
            int audio_index = -1;
            for (int i = 0; i < nb_streams; i++) {
                if (ifmt_ctx.streams(i).codecpar().codec_type() == AVMEDIA_TYPE_AUDIO) {
                    audio_index = i;
                    break;
                }
            }
            if (audio_index == -1) {
                throw new IOException("audio index = -1");
            }
 
            AVCodec codec = avcodec_find_decoder(ifmt_ctx.streams(audio_index).codecpar().codec_id());
            if (Objects.isNull(codec)) {
                throw new IOException("avcodec_find_decoder error");
            }
 
            ic = avcodec_alloc_context3(codec);
            if (Objects.isNull(ic)) {
                throw new IOException("avcodec_alloc_context3 error");
            }
 
            /* Copy codec parameters from input stream to output codec context */
            ret = avcodec_parameters_to_context(ic, ifmt_ctx.streams(audio_index).codecpar());
            if (ret < 0) {
                throw new IOException(ret + ":avcodec_parameters_to_context error");
            }
 
            ret = avcodec_open2(ic, codec, (AVDictionary) null);
            if (ret < 0) {
                throw new IOException(ret + "avcodec_open2 error");
            }
 
            swr_ctx = swr_alloc();
            if (Objects.isNull(swr_ctx)) {
                throw new IOException("swr_alloc error");
            }
            av_opt_set_chlayout(swr_ctx, "in_chlayout", ic.ch_layout(), 0);
            av_opt_set_int(swr_ctx, "in_sample_rate", ic.sample_rate(), 0);
            av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ic.sample_fmt(), 0);
 
            av_opt_set_chlayout(swr_ctx, "out_chlayout", dst_ch_layout, 0);
            av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0);
            av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
 
            ret = swr_init(swr_ctx);
            if (ret < 0) {
                throw new IOException(ret + ":swr_init error");
            }
            frame = av_frame_alloc();
            if (Objects.isNull(frame)) {
                throw new IOException("av_frame_alloc");
            }
 
            pkt = av_packet_alloc();
            if (Objects.isNull(pkt)) {
                throw new IOException("av_packet_alloc error");
            }
 
            dst_nb_samples = (int) av_rescale_rnd(ic.frame_size(), dst_rate, ic.sample_rate(), AV_ROUND_UP);
            max_dst_nb_samples = dst_nb_samples;
            dst_nb_channels = dst_ch_layout.nb_channels();
            /* buffer is going to be directly written to a rawaudio file, no alignment */
            ret = av_samples_alloc_array_and_samples(dst_data, dst_linesize, dst_nb_channels, (int) dst_nb_samples,
                    dst_sample_fmt, 0);
            if (ret < 0) {
                throw new IOException(ret + ":av_samples_alloc_array_and_samples error");
            }
 
            int dst_bufsize;
            byte[] buffer;
            while (true) {
                ret = av_read_frame(ifmt_ctx, pkt);
                if (ret == AVERROR_EAGAIN() || ret == AVERROR_EOF) {
                    break;
                } else if (ret < 0) {
                    throw new IOException(ret + ":av_read_frame error");
                }
                if (pkt.stream_index() != audio_index) {
                    continue;
                }
 
                ret = avcodec_send_packet(ic, pkt);
                if (ret < 0) {
                    throw new IOException(ret + ":avcodec_send_packet error");
                }
 
                while (true) {
                    ret = avcodec_receive_frame(ic, frame);
                    if (ret == AVERROR_EAGAIN() || ret == AVERROR_EOF) {
                        break;
                    } else if (ret < 0) {
                        throw new IOException(ret + ":avcodec_receive_frame error");
                    }
 
                    dst_nb_samples = avutil.av_rescale_rnd(
                            swresample.swr_get_delay(swr_ctx, ic.sample_rate()) + frame.nb_samples(), ic.sample_rate(),
                            ic.sample_rate(), AV_ROUND_UP);
 
                    if (dst_nb_samples > max_dst_nb_samples) {
                        av_freep(dst_data.get());
                        ret = av_samples_alloc(dst_data, dst_linesize, dst_nb_channels, (int) dst_nb_samples,
                                dst_sample_fmt, 1);
                        if (ret < 0) {
                            break;
                        }
                        max_dst_nb_samples = dst_nb_samples;
                    }
 
                    /* convert to destination format */
                    ret = swr_convert(swr_ctx, dst_data, (int) dst_nb_samples, frame.data(), ic.frame_size());
                    if (ret < 0) {
                        throw new IOException(ret + ":swr_convert error");
                    }
 
                    dst_bufsize = av_samples_get_buffer_size(dst_linesize, dst_nb_channels, ret, dst_sample_fmt, 1);
                    if (dst_bufsize < 0) {
                        throw new IOException(ret + ":av_samples_get_buffer_size error");
                    }
 
                    buffer = new byte[dst_bufsize];
                    dst_data.get(BytePointer.class, 0).get(buffer);
                    os.write(buffer);
                    System.out.printf("nb_samples = %d, dst_bufsize = %d\n", ret, dst_bufsize);
                }
            }
 
            String fmt = "s16le";
            //byte[] buf = new byte[64];
            //ret = av_channel_layout_describe(dst_ch_layout, buf, buf.length);
            System.out.printf(
                    "Resampling succeeded. Play the output file with the command:\n"
                            + "ffplay -f %s -channel_layout %s -channels %d -ar %d %s\n",
                    fmt, AV_CH_LAYOUT_STEREO/* new String(buf, 0, ret) */, dst_nb_channels, dst_rate, output);
        } finally {
            dst_data.close();
            dst_linesize.close();
            if (Objects.nonNull(pkt)) {
                av_packet_free(pkt);
            }
            if (Objects.nonNull(frame)) {
                av_frame_free(frame);
            }
            if (Objects.nonNull(ic)) {
                avcodec_free_context(ic);
            }
            if (Objects.nonNull(swr_ctx)) {
                swr_free(swr_ctx);
            }
            avformat_close_input(ifmt_ctx);
        }
    }
}

2. 效果展示

转化得到的PCM数据，可以使用ffplay播放，命令如下：

 ffplay -f s16le -channel_layout 3 -channels 2 -ar 44100 t.pcm

posted on 2023-01-14 19:53 $$X$$ 阅读(521) 评论(0) 编辑收藏举报

刷新页面返回顶部

（评论功能已被禁用）

相关博文：

· FFmpeg：视频解码（FFmpeg 5.x 新API）（参考decode_video.c）

· FFmpeg：音视频封装（含格式转化、重采样）（参考muxing.c）

· FFmpeg：音频解码（FFmpeg 5.x 新API）（参考decode_audio.c）

· FFmpeg：视频解码（FFmpeg 5.x 新API）（参考decode_video.c）

· 音频解码基础讲解

阅读排行：
· 全程不用写代码，我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了，比商业数据库还牛
· .NET10 - 预览版1新功能体验（一）

1710

昵称： $$X$$
园龄： 2年3个月
粉丝： 1
关注： 0

+加关注

1. 代码实现

2. 效果展示

我的标签

积分与排名

随笔分类 (60)

	public class DecodeAudio {

	public static void main(String[] args) throws IOException {
	decode_audio("t.mp4", "t.pcm");
	}

	public static void decode_audio(String input, String output) throws IOException {
	AVFormatContext ifmt_ctx = new AVFormatContext(null);
	AVCodecContext ic = null;
	SwrContext swr_ctx = null;
	AVFrame frame = null;
	AVPacket pkt = null;
	PointerPointer<BytePointer> dst_data = new PointerPointer<>(1);
	IntPointer dst_linesize = new IntPointer(1);

	AVChannelLayout dst_ch_layout = new AVChannelLayout();
	dst_ch_layout.nb_channels(2);
	dst_ch_layout.order(AV_CHANNEL_ORDER_NATIVE);
	dst_ch_layout.u_mask(AV_CH_LAYOUT_STEREO);
	int dst_rate = 44100, dst_nb_channels = 0, dst_sample_fmt = AV_SAMPLE_FMT_S16;
	long dst_nb_samples = 0, max_dst_nb_samples = 0;

	try (OutputStream os = new FileOutputStream(output)) {
	int ret = avformat_open_input(ifmt_ctx, input, null, null);
	if (ret < 0) {
	throw new IOException(ret + ":avformat_open_input error");
	}

	ret = avformat_find_stream_info(ifmt_ctx, (AVDictionary) null);
	if (ret < 0) {
	throw new IOException(ret + ":avformat_find_stream_info error");
	}

	int nb_streams = ifmt_ctx.nb_streams();
	int audio_index = -1;
	for (int i = 0; i < nb_streams; i++) {
	if (ifmt_ctx.streams(i).codecpar().codec_type() == AVMEDIA_TYPE_AUDIO) {
	audio_index = i;
	break;
	}
	}
	if (audio_index == -1) {
	throw new IOException("audio index = -1");
	}

	AVCodec codec = avcodec_find_decoder(ifmt_ctx.streams(audio_index).codecpar().codec_id());
	if (Objects.isNull(codec)) {
	throw new IOException("avcodec_find_decoder error");
	}

	ic = avcodec_alloc_context3(codec);
	if (Objects.isNull(ic)) {
	throw new IOException("avcodec_alloc_context3 error");
	}

	/* Copy codec parameters from input stream to output codec context */
	ret = avcodec_parameters_to_context(ic, ifmt_ctx.streams(audio_index).codecpar());
	if (ret < 0) {
	throw new IOException(ret + ":avcodec_parameters_to_context error");
	}

	ret = avcodec_open2(ic, codec, (AVDictionary) null);
	if (ret < 0) {
	throw new IOException(ret + "avcodec_open2 error");
	}

	swr_ctx = swr_alloc();
	if (Objects.isNull(swr_ctx)) {
	throw new IOException("swr_alloc error");
	}
	av_opt_set_chlayout(swr_ctx, "in_chlayout", ic.ch_layout(), 0);
	av_opt_set_int(swr_ctx, "in_sample_rate", ic.sample_rate(), 0);
	av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ic.sample_fmt(), 0);

	av_opt_set_chlayout(swr_ctx, "out_chlayout", dst_ch_layout, 0);
	av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0);
	av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);

	ret = swr_init(swr_ctx);
	if (ret < 0) {
	throw new IOException(ret + ":swr_init error");
	}
	frame = av_frame_alloc();
	if (Objects.isNull(frame)) {
	throw new IOException("av_frame_alloc");
	}

	pkt = av_packet_alloc();
	if (Objects.isNull(pkt)) {
	throw new IOException("av_packet_alloc error");
	}

	dst_nb_samples = (int) av_rescale_rnd(ic.frame_size(), dst_rate, ic.sample_rate(), AV_ROUND_UP);
	max_dst_nb_samples = dst_nb_samples;
	dst_nb_channels = dst_ch_layout.nb_channels();
	/* buffer is going to be directly written to a rawaudio file, no alignment */
	ret = av_samples_alloc_array_and_samples(dst_data, dst_linesize, dst_nb_channels, (int) dst_nb_samples,
	dst_sample_fmt, 0);
	if (ret < 0) {
	throw new IOException(ret + ":av_samples_alloc_array_and_samples error");
	}

	int dst_bufsize;
	byte[] buffer;
	while (true) {
	ret = av_read_frame(ifmt_ctx, pkt);
	if (ret == AVERROR_EAGAIN() \|\| ret == AVERROR_EOF) {
	break;
	} else if (ret < 0) {
	throw new IOException(ret + ":av_read_frame error");
	}
	if (pkt.stream_index() != audio_index) {
	continue;
	}

	ret = avcodec_send_packet(ic, pkt);
	if (ret < 0) {
	throw new IOException(ret + ":avcodec_send_packet error");
	}

	while (true) {
	ret = avcodec_receive_frame(ic, frame);
	if (ret == AVERROR_EAGAIN() \|\| ret == AVERROR_EOF) {
	break;
	} else if (ret < 0) {
	throw new IOException(ret + ":avcodec_receive_frame error");
	}

	dst_nb_samples = avutil.av_rescale_rnd(
	swresample.swr_get_delay(swr_ctx, ic.sample_rate()) + frame.nb_samples(), ic.sample_rate(),
	ic.sample_rate(), AV_ROUND_UP);

	if (dst_nb_samples > max_dst_nb_samples) {
	av_freep(dst_data.get());
	ret = av_samples_alloc(dst_data, dst_linesize, dst_nb_channels, (int) dst_nb_samples,
	dst_sample_fmt, 1);
	if (ret < 0) {
	break;
	}
	max_dst_nb_samples = dst_nb_samples;
	}

	/* convert to destination format */
	ret = swr_convert(swr_ctx, dst_data, (int) dst_nb_samples, frame.data(), ic.frame_size());
	if (ret < 0) {
	throw new IOException(ret + ":swr_convert error");
	}

	dst_bufsize = av_samples_get_buffer_size(dst_linesize, dst_nb_channels, ret, dst_sample_fmt, 1);
	if (dst_bufsize < 0) {
	throw new IOException(ret + ":av_samples_get_buffer_size error");
	}

	buffer = new byte[dst_bufsize];
	dst_data.get(BytePointer.class, 0).get(buffer);
	os.write(buffer);
	System.out.printf("nb_samples = %d, dst_bufsize = %d\n", ret, dst_bufsize);
	}
	}

	String fmt = "s16le";
	//byte[] buf = new byte[64];
	//ret = av_channel_layout_describe(dst_ch_layout, buf, buf.length);
	System.out.printf(
	"Resampling succeeded. Play the output file with the command:\n"
	+ "ffplay -f %s -channel_layout %s -channels %d -ar %d %s\n",
	fmt, AV_CH_LAYOUT_STEREO/* new String(buf, 0, ret) */, dst_nb_channels, dst_rate, output);
	} finally {
	dst_data.close();
	dst_linesize.close();
	if (Objects.nonNull(pkt)) {
	av_packet_free(pkt);
	}
	if (Objects.nonNull(frame)) {
	av_frame_free(frame);
	}
	if (Objects.nonNull(ic)) {
	avcodec_free_context(ic);
	}
	if (Objects.nonNull(swr_ctx)) {
	swr_free(swr_ctx);
	}
	avformat_close_input(ifmt_ctx);
	}
	}
	}