如果不是特别熟悉C/C++,又要使用FFmpeg.API处理一些简单的音视频业务,那么可以使用org.bytedeco:ffmpeg-platform,下面记录一下使用ffmpeg-platform获取封装音视频数据(含格式转化、重采样)的方法。
1. 基本流程
音视频封装(复用)的基本流程主要有如下步骤:
- 准备音频流、视频流
- 构建输出流AVFormatContext
- 将音视频流AVStream添加到AVFormatContext
- 使用avio_open打开输出
- avformat_write_header写入头信息
- 写入音视频帧
- av_write_trailer写如尾信息
2. 输出流结构
这里将输出流(音频流、视频流)封装成一个内部类,结构如下:
class OutputStream { AVStream st; AVCodecContext enc; /* pts of the next frame that will be generated */ long next_pts; int samples_count; AVFrame frame; AVFrame tmp_frame; AVPacket tmp_pkt; float t, tincr, tincr2; SwsContext sws_ctx; SwrContext swr_ctx; BytePointer y; BytePointer u; BytePointer v; BytePointer a; }
3. 构建AVFormatContext
使用avformat_alloc_output_context2构建输出AVFormatContext对象:
int ret = avformat.avformat_alloc_output_context2(oc, null, null, output); if (ret < 0) { ret = avformat.avformat_alloc_output_context2(oc, null, "mpeg", output); } if (ret < 0) { throw new IOException(ret + ":avformat_alloc_output_context2 error"); }
4. 添加输出流
音频、视频采用同一个函数,根据编码器区分,设置不同的参数:
private static AVCodec add_stream(OutputStream ost, AVFormatContext oc, int codec_id) throws IOException { AVCodec codec = avcodec.avcodec_find_encoder(codec_id); if (Objects.isNull(codec)) { throw new IOException("avcodec_find_encoder error"); } ost.tmp_pkt = avcodec.av_packet_alloc(); if (Objects.isNull(ost.tmp_pkt)) { throw new IOException("av_packet_alloc error"); } ost.st = avformat.avformat_new_stream(oc, null); if (Objects.isNull(ost.st)) { throw new IOException("avformat_new_stream error"); } ost.st.id(oc.nb_streams() - 1); AVCodecContext c = avcodec.avcodec_alloc_context3(codec); if (Objects.isNull(c)) { throw new IOException("avcodec_alloc_context3"); } ost.enc = c; switch (codec.type()) { case avutil.AVMEDIA_TYPE_AUDIO: c.sample_fmt( Objects.nonNull(codec.sample_fmts()) ? codec.sample_fmts().get() : avutil.AV_SAMPLE_FMT_FLTP); c.bit_rate(64000); c.sample_rate(44100); if (Objects.nonNull(codec.supported_samplerates())) { c.sample_rate(codec.supported_samplerates().get()); for (int i = 0; codec.supported_samplerates().get(i) != 0; i++) { if (codec.supported_samplerates().get(i) == 44100) { c.sample_rate(44100); } } } // @see libavutil/channel_layout.h // #define AV_CHANNEL_LAYOUT_MASK(nb, m) \ // { .order = AV_CHANNEL_ORDER_NATIVE, .nb_channels = (nb), .u = { .mask = (m) }} // #define AV_CHANNEL_LAYOUT_STEREO AV_CHANNEL_LAYOUT_MASK(2, AV_CH_LAYOUT_STEREO) // avutil.av_channel_layout_copy(c.ch_layout(), ) c.ch_layout().nb_channels(2); c.ch_layout().order(avutil.AV_CHANNEL_ORDER_NATIVE); c.ch_layout().u_mask(avutil.AV_CH_LAYOUT_STEREO); AVRational ar = new AVRational(); ar.num(1); ar.den(c.sample_rate()); ost.st.time_base(ar); break; case avutil.AVMEDIA_TYPE_VIDEO: c.codec_id(codec_id); c.bit_rate(400000); /* Resolution must be a multiple of two. */ c.width(352); c.height(288); /* * timebase: This is the fundamental unit of time (in seconds) in terms of which * frame timestamps are represented. For fixed-fps content, timebase should be * 1/framerate and timestamp increments should be identical to 1. */ AVRational vr = new AVRational(); vr.num(1); vr.den(STREAM_FRAME_RATE); ost.st.time_base(vr); c.time_base(ost.st.time_base()); /* emit one intra frame every twelve frames at most */ c.gop_size(12); c.pix_fmt(STREAM_PIX_FMT); if (c.codec_id() == avcodec.AV_CODEC_ID_MPEG2VIDEO) { /* just for testing, we also add B-frames */ c.max_b_frames(2); } if (c.codec_id() == avcodec.AV_CODEC_ID_MPEG1VIDEO) { /* * Needed to avoid using macroblocks in which some coeffs overflow. This does * not happen with normal video, it just happens here as the motion of the * chroma plane does not match the luma plane. */ c.mb_decision(2); } break; } if ((oc.oformat().flags() & avformat.AVFMT_GLOBALHEADER) != 0) { c.flags(c.flags() | avcodec.AV_CODEC_FLAG_GLOBAL_HEADER); } return codec; }
5. 设置参数
设置视频流参数:
private static void open_video(AVCodec codec, OutputStream ost) throws IOException { AVCodecContext c = ost.enc; // open the codec int ret = avcodec.avcodec_open2(c, codec, (AVDictionary) null); if (ret < 0) { throw new IOException(ret + ":avcodec_open2 error"); } ost.frame = alloc_picture(c.pix_fmt(), c.width(), c.height()); if (Objects.isNull(ost.frame)) { throw new IOException("alloc_picture error"); } /* allocate and init a re-usable frame */ if (c.pix_fmt() != avutil.AV_PIX_FMT_YUV420P) { ost.tmp_frame = alloc_picture(avutil.AV_PIX_FMT_YUV420P, c.width(), c.height()); if (Objects.isNull(ost.tmp_frame)) { throw new IOException("alloc_picture error"); } } // copy the stream parameters to the muxer ret = avcodec.avcodec_parameters_from_context(ost.st.codecpar(), c); if (ret < 0) { throw new IOException(ret + ":avcodec_parameters_from_context error"); } }
设置音频流参数:
private static void open_audio(AVCodec codec, OutputStream ost) throws IOException { AVCodecContext c = ost.enc; int ret = avcodec.avcodec_open2(c, codec, (AVDictionary) null); if (ret < 0) { throw new IOException(ret + ":avcodec_open2 error"); } // init signal generator ost.t = 0; ost.tincr = (float) (2 * Math.PI * 100.0 / c.sample_rate()); ost.tincr2 = (float) (2 * Math.PI * 100.0 / c.sample_rate() / c.sample_rate()); int nb_samples; if ((c.codec().capabilities() & avcodec.AV_CODEC_CAP_VARIABLE_FRAME_SIZE) != 0) { nb_samples = 10000; } else { nb_samples = c.frame_size(); } ost.frame = alloc_audio_frame(c.sample_fmt(), c.ch_layout(), c.sample_rate(), nb_samples); ost.tmp_frame = alloc_audio_frame(avutil.AV_SAMPLE_FMT_S16, c.ch_layout(), c.sample_rate(), nb_samples); // copy the stream parameters to the muxer ret = avcodec.avcodec_parameters_from_context(ost.st.codecpar(), c); if (ret < 0) { throw new IOException(ret + "avcodec_parameters_from_context error"); } ost.swr_ctx = swresample.swr_alloc(); if (Objects.isNull(ost.swr_ctx)) { throw new IOException("swr_alloc error"); } // set options avutil.av_opt_set_chlayout(ost.swr_ctx, "in_chlayout", c.ch_layout(), 0); avutil.av_opt_set_int(ost.swr_ctx, "in_sample_rate", c.sample_rate(), 0); avutil.av_opt_set_sample_fmt(ost.swr_ctx, "in_sample_fmt", avutil.AV_SAMPLE_FMT_S16, 0); avutil.av_opt_set_chlayout(ost.swr_ctx, "out_chlayout", c.ch_layout(), 0); avutil.av_opt_set_int(ost.swr_ctx, "out_sample_rate", c.sample_rate(), 0); avutil.av_opt_set_sample_fmt(ost.swr_ctx, "out_sample_fmt", c.sample_fmt(), 0); ret = swresample.swr_init(ost.swr_ctx); if (ret < 0) { throw new IOException(ret + ":swr_init error"); } }
6. 打开输出
设置FLAG为WRITTE:
// open the output file, if needed if ((fmt.flags() & avformat.AVFMT_NOFILE) == 0) { AVIOContext pb = new AVIOContext(null); ret = avformat.avio_open(pb, output, avformat.AVIO_FLAG_WRITE); if (ret < 0) { throw new IOException(ret + ":avio_open error"); } oc.pb(pb); }
7. 写入流数据
写入头信息:
// Write the stream header, if any ret = avformat.avformat_write_header(oc, (AVDictionary) null); if (ret < 0) { // av_err2str(ret) throw new IOException(ret + ":avformat_write_header error"); }
写入音频、视频数据:
while (encode_video || encode_audio) { if (encode_video && (!encode_audio || avutil.av_compare_ts(video_st.next_pts, video_st.enc.time_base(), audio_st.next_pts, audio_st.enc.time_base()) <= 0)) { System.out.println(">>> encode_video >>>"); encode_video = write_video_frame(oc, video_st) == 0; } else { System.out.println("<<< encode_audio <<<"); encode_audio = write_audio_frame(oc, audio_st) == 0; } }
下面是具体的写入方法:
// encode one video frame and send it to the muxer return 1 when encoding is finished, 0 otherwise private static int write_video_frame(AVFormatContext oc, OutputStream ost) throws IOException { return write_frame(oc, ost.enc, ost.st, get_video_frame(ost), ost.tmp_pkt); } // encode one audio frame and send it to the muxer return 1 when encoding is finished, 0 otherwise private static int write_audio_frame(AVFormatContext oc, OutputStream ost) throws IOException { AVFrame frame = get_audio_frame(ost); AVCodecContext c = ost.enc; if (Objects.nonNull(frame)) { // convert samples from native format to destination codec format, using the resampler // compute destination number of samples long dst_nb_samples = avutil.av_rescale_rnd( swresample.swr_get_delay(ost.swr_ctx, c.sample_rate()) + frame.nb_samples(), c.sample_rate(), c.sample_rate(), AV_ROUND_UP); // when we pass a frame to the encoder, it may keep a reference to it internally; // make sure we do not overwrite it here int ret = avutil.av_frame_make_writable(ost.frame); if (ret < 0) { throw new IOException(ret + ":av_frame_make_writable error"); } // convert to destination format ret = swresample.swr_convert(ost.swr_ctx, ost.frame.data(), (int) dst_nb_samples, frame.data(), frame.nb_samples()); if (ret < 0) { throw new IOException(ret + ":swr_convert error"); } frame = ost.frame; AVRational rational = new AVRational(); rational.num(1); rational.den(c.sample_rate()); frame.pts(avutil.av_rescale_q(ost.samples_count, rational, c.time_base())); ost.samples_count += dst_nb_samples; } return write_frame(oc, c, ost.st, frame, ost.tmp_pkt); } private static int write_frame(AVFormatContext fmt_ctx, AVCodecContext c, AVStream st, AVFrame frame, AVPacket pkt) throws IOException { // System.out.println("send st: " + st.id() + "," + frame); int ret = avcodec.avcodec_send_frame(c, frame); if (ret < 0) { throw new IOException(ret + ":avcodec_send_frame error"); } while (true) { ret = avcodec.avcodec_receive_packet(c, pkt); if (ret == avutil.AVERROR_EAGAIN() || ret == avutil.AVERROR_EOF()) { break; } else if (ret < 0) { throw new IOException(ret + ":avcodec_receive_packet error"); } // rescale output packet timestamp values from codec to stream timebase avcodec.av_packet_rescale_ts(pkt, c.time_base(), st.time_base()); pkt.stream_index(st.index()); // Write the compressed frame to the media file. ret = avformat.av_interleaved_write_frame(fmt_ctx, pkt); if (ret < 0) { throw new IOException(ret + ":av_interleaved_write_frame error"); } } return ret == avutil.AVERROR_EOF() ? 1 : 0; }
写入的音视频数据由程序生成:
private static AVFrame get_video_frame(OutputStream ost) throws IOException { AVRational rational = new AVRational(); rational.num(1); rational.den(1); if (avutil.av_compare_ts(ost.next_pts, ost.enc.time_base(), STREAM_DURATION, rational) > 0) { return null; } int ret = avutil.av_frame_make_writable(ost.frame); if (ret < 0) { throw new IOException(ret + ":av_frame_make_writable error"); } AVCodecContext c = ost.enc; if (c.pix_fmt() != avutil.AV_PIX_FMT_YUV420P) { // as we only generate a YUV420P picture, we must convert it // to the codec pixel format if needed if (Objects.isNull(ost.sws_ctx)) { ost.sws_ctx = swscale.sws_getContext(c.width(), c.height(), avutil.AV_PIX_FMT_YUV420P, c.width(), c.height(), c.pix_fmt(), SCALE_FLAGS, null, null, (DoublePointer) null); if (Objects.isNull(ost.sws_ctx)) { throw new IOException("sws_getContext error"); } } fill_yuv_image(ost, ost.frame, (int) ost.next_pts, c.width(), c.height()); swscale.sws_scale(ost.sws_ctx, ost.tmp_frame.data(), ost.tmp_frame.linesize(), 0, c.height(), ost.frame.data(), ost.frame.linesize()); } else { fill_yuv_image(ost, ost.frame, (int) ost.next_pts, c.width(), c.height()); } ost.frame.pts(ost.next_pts++); return ost.frame; } private static void fill_yuv_image(OutputStream ost, AVFrame pict, int frame_index, int width, int height) { int x, y, i; i = frame_index; if (Objects.isNull(ost.y)) { ost.y = new BytePointer(new byte[width * height]); ost.u = new BytePointer(new byte[width * height * 2 / 3]); ost.v = new BytePointer(new byte[width * height * 2 / 3]); pict.data(0, ost.y); pict.data(1, ost.u); pict.data(2, ost.v); } // Y for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { // yBuffer[y * pict.linesize(0) + x] = (byte) (x + y + i * 3); // System.out.println("1:" + (y * pict.linesize(0) + x) + ":" + pict.linesize(0)); pict.data(0).put(y * pict.linesize(0) + x, (byte) (x + y + i * 3)); } } // Cb and Cr for (y = 0; y < height / 2; y++) { for (x = 0; x < width / 2; x++) { // uBuffer[y * pict.linesize(1) + x] = (byte) (128 + y + i * 2); // System.out.println("1:" + (y * pict.linesize(1) + x) + ":" + pict.linesize(1)); // System.out.println("2:" + (y * pict.linesize(2) + x) + ":" + pict.linesize(2)); pict.data(1).put(y * pict.linesize(1) + x, (byte) (128 + y + i * 2)); // vBuffer[y * pict.linesize(2) + x] = (byte) (64 + x + i * 5); pict.data(2).put(y * pict.linesize(2) + x, (byte) (64 + x + i * 5)); } } } private static AVFrame get_audio_frame(OutputStream ost) { // check if we want to generate more frames AVRational rational = new AVRational(); rational.num(1); rational.den(1); if (avutil.av_compare_ts(ost.next_pts, ost.enc.time_base(), STREAM_DURATION, rational) > 0) { return null; } AVFrame frame = ost.tmp_frame; if (Objects.isNull(ost.a)) { ost.a = new BytePointer(new byte[frame.nb_samples() * 2 * ost.enc.ch_layout().nb_channels()]); frame.data(0, ost.a); } int j, i, v, off = 0; for (j = 0; j < frame.nb_samples(); j++) { v = (int) (Math.sin(ost.t) * 10000); for (i = 0; i < ost.enc.ch_layout().nb_channels(); i++) { frame.data(0).put(off++, (byte) (v & 0xff)); frame.data(0).put(off++, (byte) ((v >> 8) & 0xff)); } ost.t += ost.tincr; ost.tincr += ost.tincr2; } frame.pts(ost.next_pts); ost.next_pts += frame.nb_samples(); return frame; }
写入尾信息:
avformat.av_write_trailer(oc);
8. 结果演示
最终生成的效果:
完整代码:扫描左侧头像小程序获取,或私信联系。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· .NET10 - 预览版1新功能体验(一)