自己关于ffmpeg学习的心得

time_scale和time_base的理解

 在音视频领域，常常会看到time_scale和time_base这样的概念。由于这是非常基础的概念，在许多书本、工程代码中并没有解释这两个概念的实际含义。正是从这个角度出发，我将谈一谈对这两个概念的理解，故形成此文。

首先，从常识角度来看

我们现行的时间单位有：小时(h)，分钟(min)，秒(s)，毫秒(ms)，微妙(us)等。

对于小时，我们知道1(h)=60(min)。也就是将1小时细分为60个刻度，每个刻度就是1分钟。一小时的 时间刻度(time_scale) 有60个，每一个刻度的时间由 时间基(time_base) 表示，一个时间基就代表1/60个小时，也就是1分钟。因此，time_scale=60，time_base = 1/60 (h) = 1 (min)。

对于分钟，时间刻度(time_scale)也是60个，但每一个刻度，也就是时间基(time_base)代表1/60个分钟，即1秒。time_scale=60，time_base = 1/60 (min) = 1(s)。

对于秒，时间刻度(time_scale)是1000个，每一个刻度，也就是时间基(time_base)代表1/1000个秒，即1毫秒。time_scale=1000，time_base = 1/1000 (s) = 1(ms)。

相信看到这里，可以发现，本质上就是不同精度的时间度量单位的表示，这些约定俗成的转换规则就是所谓的时间转换标准。

在音视频领域，我们常看到，

time_scale = 1000，time_base = 1/1000

time_scale = 30000，time_base = 1/30000

time_scale = 90000， time_base = 1/90000

什么意思呢？

通常我们观看的电视节目、视频等，都包含有视频流、音频流、字幕流；3种流要保持同步(通俗的讲就是嘴型、声音、字幕都对的上)，就需要基于时间戳的同步机制(同步机制的原理本文不展开)。而这个时间戳就是所谓的PTS(显示时间戳)。以视频为例，假设视频的帧率为25fps，就是一秒钟要显示25帧图像，那么每帧图像的显示时长为40(ms)。如果采用time_scale = 90000， time_base = 1/90000(s)来度量PTS，意思是将1秒划分为90000个刻度，每一个刻度表示1/90000秒；那么要将25帧图像以time_sclae=90000在1秒内均分，每帧图像的显示时长的起始时刻就用pts表示，

第1帧pts=0，

第2帧pts=3600，转换为时间就是 3600*time_base=0.04(s)=40(ms)，即第2帧图像的显示的起始时刻为40ms

第3帧pts=7200，转换为时间就是 7200*time_base=0.08(s)=80(ms)，即第2帧图像的显示的起始时刻为80ms

依此类推。

不同精度的时间度量，在表现形式上不同，但时间度量上是一样。

    25fps，1秒的时间刻度time_scale = 1000， time_base = 1/1000，那么每帧图像占40个time_base = 40(ms)—基于常识时间度量单位
    1秒的时间刻度，将精度提高为time_scale = 90000，time_base=1/90000(s)，那么每帧图像占3600个time_base 也是 40(ms)

问题又来了，为什么要使用不同精度的time_scale呢？简单来说，因为视频流、音频流、字幕流的采样精度不同，为了使不同流同步，需要足够精度time_scale来表示pts，使视频同步。

// 写文件方式提取 11.yuv 里的 y u v 
void extract_yuv(const char* path,int width,int height){

    FILE* fp = fopen(path,"rb+");
    FILE* f1 = fopen("yuv420_y.y","wb+");
    FILE* f2 = fopen("yuv420_u.y","wb+");
    FILE* f3 = fopen("yuv420_v.y","wb+");

    unsigned char* p = (unsigned char*)malloc(width*height*3/2);

    int i = 0;
    while (i<1)
    {
        fread(p, 1, width*height * 3 / 2, fp);
        fwrite(p, 1, width*height, f1);
        fwrite(p + width*height, 1, width*height / 4, f2);
        fwrite(p + width*height*(1 + 1 / 4), 1, width*height / 4, f3);
        i++;
    }

    free(p); p = NULL;
    fclose(fp);
    fclose(f1);
    fclose(f2);
    fclose(f3);
}

写文件方式提取 11.yuv 里的 y u v

// 使用ffmpeg获取视频的信息
void getInfo(){

    av_register_all();

    AVFormatContext * pFormat = NULL;
    const char* path = "11.mp4";

    avformat_open_input(&pFormat, path, NULL, NULL);

    av_dump_format(pFormat, NULL, path, 0);

    int time = pFormat->duration;
    int mbittime = (time / 1000000) / 60;
    int mmintime = (time / 1000000) % 60;
    printf("该视频时长=%d分:%d秒\n", mbittime, mmintime);

    // 获取网络流的信息
    /*av_register_all();

    AVFormatContext * pFormat = NULL;
    const char* path = "http://ivi.bupt.edu.cn/hls/cctv1hd.m3u8";
    avformat_network_init();
    AVDictionary *opt = NULL;
    av_dict_set(&opt, "rtsp_transport", "tcp", 0);
    av_dict_set(&opt, "max_delay", "550", 0);

    avformat_open_input(&pFormat, path, NULL, &opt);

    av_dump_format(pFormat, NULL, path, 0);*/
}

使用ffmpeg获取视频的信息

void extract_mp4_3(const char* path){

    FILE* f1 = fopen("y.y","wb+");
    FILE* f2 = fopen("u.y", "wb+");
    FILE* f3 = fopen("v.y", "wb+");

    av_register_all();

    AVFormatContext* format = NULL;
    avformat_open_input(&format,path,NULL,NULL);

    avformat_find_stream_info(format,NULL);
    int bestStream = av_find_best_stream(format,AVMEDIA_TYPE_VIDEO,NULL,NULL,NULL,NULL);

    AVCodec* codec = avcodec_find_decoder(format->streams[bestStream]->codec->codec_id);
    
    avcodec_open2(format->streams[bestStream]->codec, codec, NULL);

    // ---------- 解码
    AVPacket* packet = (AVPacket*)av_malloc(sizeof(AVPacket));
    AVFrame* frame = av_frame_alloc();
    int go, frameCount = 0;
    int width = format->streams[bestStream]->codec->width;
    int height = format->streams[bestStream]->codec->height;

    // ---- 用来存储转用sws_scale转换过来的数据，后续用这个数据就不会播放的时候出问题
    AVFrame* frameDst = av_frame_alloc();
    int fmt = format->streams[bestStream]->codec->pix_fmt;
    uint8_t* buff = (uint8_t*)av_malloc(avpicture_get_size((AVPixelFormat)fmt, width,height));
    avpicture_fill((AVPicture*)frameDst, buff,(AVPixelFormat)fmt,width,height);

    SwsContext* swsCtx = sws_getContext(width,height,(AVPixelFormat)fmt,width,height,AV_PIX_FMT_YUV420P,SWS_BICUBIC,NULL,NULL,NULL);

    while (av_read_frame(format, packet) >= 0) // 解码1 将数据读到包里
    {
        if (packet->stream_index == AVMEDIA_TYPE_VIDEO){
            avcodec_decode_video2(format->streams[bestStream]->codec, frame, &go, packet); // 解码2 将包里的数据解码到frame
            if (go){

                sws_scale(swsCtx,frame->data,frame->linesize,0,height,frameDst->data,frameDst->linesize);

                fwrite(frameDst->data[0], 1, width*height, f1);
                fwrite(frameDst->data[1], 1, width*height, f2);
                fwrite(frameDst->data[2], 1, width*height, f3);

                frameCount++;
                printf("写了%d帧了\n", frameCount);
            }    
        }

        av_free_packet(packet);
    }

    fclose(f1);
    fclose(f2);
    fclose(f3);

    av_frame_free(&frame);
    av_frame_free(&frameDst);
    av_free(buff);
    sws_freeContext(swsCtx);
    avformat_close_input(&format);
}

经典解码，提取MP4里的Y U V

时间基、pts、dts


时间基{1，25}       下 time=5      真实的时间为：5 * （1/25） = 0.2     // 时间基里的时间明显把它变大了扩了很多倍=>  0.2 / (1/25)=5      
倒数的话 0.2 * x = 5 ; x = 25   =>   0.2 * 25 = 5   =>   0.2 / (1/25) = 5   => 0.2 *25 = 0.2 / (1/25)
时间基{1，90000} 下 time=?     这里和上面一些把真实的时间转为时间基的时候 0.2 / (1/90000) = 18000 // 如果转为真实的时间 18000 * （1/90000）= 0.2
上面的 time 可以理解为 pts 或 dts 也就是时间戳, 是占多少个时间刻度（占多少个格子）,它的单位不是秒，而是时间刻度,只有pts与time_base两者结合在一起，才能表达出具体的时间是多少。好比我只告诉你，某个物体的长度占某一把尺上的20个刻度。但是我不告诉你，每个刻度是多少厘米，你仍然无法知道物体的长度

封装格式不同，timebase（时间基）是不一样的

DTS、PTS 是在编码的时候由编码器生成的。当视频流中没有 B 帧时，通常 DTS 和 PTS 的顺序是一致的。但如果有 B 帧时，解码顺序和播放顺序不一致了。


时间基(time_base)是时间戳(timestamp)的单位, 时间戳值乘以时间基，可以得到实际的时刻值(以秒等为单位)
例如，如果一个视频帧的 dts 是 40，pts 是 160，其 time_base 是 1/1000 秒，那么可以计算出此视频帧的解码时刻是 40 毫秒(40/1000 | 40*(1/1000))，显示时刻是 160 毫秒(160/1000)。FFmpeg 中时间戳(pts/dts)的类型是 int64_t 类型，把一个 time_base 看作一个时钟脉冲，则可把 dts/pts 看作时钟脉冲的计数。


整数的倒数：把整数看做分母是1的分数，再交换分子分母的位置  2的倒数是1/2
分数的倒数：交换分子分母的位置  1/5的倒数是5

pts、dts 是时间戳（占多少个时间刻度，单位不是秒，而是时间刻度。只有pts与time_base两者结合在一起，才能得到实际的时间是多少）


实际时间到时间戳（时间刻度）：0.08 * x = 80 => x = 1000  ( 0.08 * 1000 = 0.08 / (1/1000) ; 1/1000的倒数就是1000, (1/1000)是时间基被定义成{1, 1000}  )
	实际时间 / 时间基(变大) = 时间戳 || 实际时间 * 时间基的倒数 = 时间戳  //   以上 flv 封装格式的时间基为 {1,1000}, 这里的1000是时间基定义里的倒数，实际表示为{1，1000}）
时间戳（时间刻度）到实际时间：80 * x = 0.08 => x = 0.001 （ 80 * 0.001 = 80 * (1/1000) = 80 / 1000 ）
	时间戳 * 时间基 = 实际时间 	// 0.001 = 1/1000 所以也等于  80 * {1,1000}

总结：
	时间基是很小的单位刻度，时间基有很多种；
	实际时间到时间戳是要变大的，所以要么直接除时间基，要么乘时间基的倒数；
	时间戳到实际时间是要变小的，所以要么直接乘时间基，要么除时间基的倒数
实际时间到时间戳（时间刻度）：0.08 * x = 7200 => x = 7200 / 0.08 => x = 90000 => 得ts装格式的时间基为 90000（这个是倒数，实际表示为{1，90000}）

SDL 下载地址：https://github.com/libsdl-org/SDL/releases/tag/release-2.28.2 现在最新的就是 2.28.2

SDL2-2.28.2-win32-x64.zip/ SDL2-2.28.2-win32-x86.zip 这种只有 dll 库

SDL2-devel-2.28.2-VC.zip 这种是有头文件和 lib 以及dll (里面包括32位和64位)

视频格式转换并渲染

 之前老版本的渲染：

V.buff = (uint8_t*)av_malloc(av_image_get_buffer_size(AV_PIX_FMT_YUV420P, V.codecCtx->width, V.codecCtx->height, 64));
av_image_fill_arrays(dst_data, dst_linesize, V.buff, AV_PIX_FMT_YUV420P, V.codecCtx->width, V.codecCtx->height, 1); // 或者 avpicture_fill((AVPicture*)V.dstFrame, V.buff, AV_PIX_FMT_YUV420P, V.codecCtx->width, V.codecCtx->height);
sws_scale(V.swsCtx, U.srcFrame->data, U.srcFrame->linesize, 0, U.srcFrame->height, V.dstFrame->data, V.dstFrame->linesize)
    
SDL_UpdateTexture(V.texture, NULL, V.dstFrame->data[0], V.codecCtx->width);


现在改进的视频渲染
while (av_read_frame(U.formatCtx, U.packet) >= 0 && U.isRun) {

		if (U.packet->stream_index == AVMEDIA_TYPE_VIDEO) {

			U.ret = avcodec_send_packet(V.codecCtx, U.packet);
			if (U.ret) continue;

			while (avcodec_receive_frame(V.codecCtx, U.frame) >= 0)
			{
				if (V.codecCtx->pix_fmt != AV_PIX_FMT_YUV420P) {
					uint8_t* dstData[4];
					int dstLinesize[4];

					if (!V.swsCtx) {
						V.swsCtx = sws_getCachedContext(V.swsCtx, V.codecCtx->width, V.codecCtx->height, V.codecCtx->pix_fmt, V.codecCtx->width, V.codecCtx->height, AV_PIX_FMT_YUV420P, SWS_FAST_BILINEAR, NULL, NULL, NULL);
					}

					if (!V.buff) {
						V.buff = (uint8_t*)av_malloc(av_image_get_buffer_size(AV_PIX_FMT_YUV420P, V.codecCtx->width, V.codecCtx->height, 64));
						av_image_fill_arrays(dstData, dstLinesize, V.buff, AV_PIX_FMT_YUV420P, V.codecCtx->width, V.codecCtx->height, 1);
					}

					if (sws_scale(V.swsCtx, U.frame->data, U.frame->linesize, 0, U.frame->height, dstData, dstLinesize) < 0) { printf("sws_scale error.\n"); goto END; }

					SDL_UpdateYUVTexture(V.texture, NULL, dstData[0], dstLinesize[0], dstData[1], dstLinesize[1], dstData[2], dstLinesize[2]);
				}
				else
				{
					SDL_UpdateYUVTexture(V.texture, NULL, U.frame->data[0], U.frame->linesize[0], U.frame->data[1], U.frame->linesize[1], U.frame->data[2], U.frame->linesize[2]);
				}

				SDL_RenderClear(V.render);
				SDL_RenderCopy(V.render, V.texture, NULL, &V.vRect);
				SDL_RenderPresent(V.render);


				av_frame_unref(U.frame);
			}

			av_packet_unref(U.packet);

		}
	}

posted @ 2022-07-31 00:49 封兴旺阅读(32) 评论(0) 编辑收藏举报

刷新页面返回顶部

封兴旺

封兴旺

自己关于ffmpeg学习的心得

公告

联系方式: 18274305123(微信同号)