X264-视频压缩编码VCL
在前面的过程中,我们得到了编码图像,编码器开始H264视频编码VCL。首先初始化有关参数,包括帧类型的获取、创建多参考帧的列表、初始化码流控制、初始化写码流结构和写SPS、PPS头结构信息。
1.初始化相关参数
若为IDR帧,则意味着一个新图像片的开始。在H264中为了防止编码错误扩散,规定当前片不以本片以外的其他帧为参考,因而编码器遇到IDR帧则需要重置参考帧区域。同时,将SEI、SPS、PPS分别单独写入NAL单元。这三个参数集集合了编解码的核心参数,直接关系解码端能否正常解码。
若不是IDR帧,依据帧类型设定当前NAL单元的类型和图像片的类型。接着进行一系列初始化参数的操作,包括建立关于参考帧的list0和list1,初始化码流控制得到量化步长、参考帧等相关信息,初始化写比特流环境变量等。
在函数x264_lookahead_put_frame()中,将fenc放入lookanead.next.list[]队列,再调用函数x264_lookahead_get_frame()判断帧的类型。
x264_lookahead_get_frame():
//通过lookahead分析帧类型 void x264_lookahead_get_frames( x264_t *h ) { if( h->param.i_sync_lookahead ) { /* We have a lookahead thread, so get frames from there */ x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex ); while( !h->lookahead->ofbuf.i_size && h->lookahead->b_thread_active ) x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_fill, &h->lookahead->ofbuf.mutex ); x264_lookahead_encoder_shift( h ); x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex ); } else { /* We are not running a lookahead thread, so perform all the slicetype decide on the fly */ //currect[]必须为空,next不能为空? if( h->frames.current[0] || !h->lookahead->next.i_size ) return; //分析lookahead->next->list帧的类型 x264_stack_align( x264_slicetype_decide, h ); //更新lookahead->last_nonb x264_lookahead_update_last_nonb( h, h->lookahead->next.list[0] ); int shift_frames = h->lookahead->next.list[0]->i_bframes + 1; //lookahead->next.list移动到lookahead->ofbuf.list x264_lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames ); /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */ if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) ) x264_stack_align( x264_slicetype_analyse, h, shift_frames ); //lookahead->ofbuf.list帧移动到frames->current x264_lookahead_encoder_shift( h ); } }
函数 x264_lookahead_get_frame()里调用了x264_slicetype_decide()函数来判断帧的类型:
//确定帧的类型(I、B、P) void x264_slicetype_decide( x264_t *h ) { x264_frame_t *frames[X264_BFRAME_MAX+2]; x264_frame_t *frm; int bframes; int brefs; if( !h->lookahead->next.i_size ) return; int lookahead_size = h->lookahead->next.i_size; //遍历next队列 for( int i = 0; i < h->lookahead->next.i_size; i++ ) { if( h->param.b_vfr_input ) { if( lookahead_size-- > 1 ) h->lookahead->next.list[i]->i_duration = 2 * (h->lookahead->next.list[i+1]->i_pts - h->lookahead->next.list[i]->i_pts); else h->lookahead->next.list[i]->i_duration = h->i_prev_duration; } else h->lookahead->next.list[i]->i_duration = delta_tfi_divisor[h->lookahead->next.list[i]->i_pic_struct]; h->i_prev_duration = h->lookahead->next.list[i]->i_duration; h->lookahead->next.list[i]->f_duration = (double)h->lookahead->next.list[i]->i_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; if( h->lookahead->next.list[i]->i_frame > h->i_disp_fields_last_frame && lookahead_size > 0 ) { h->lookahead->next.list[i]->i_field_cnt = h->i_disp_fields; h->i_disp_fields += h->lookahead->next.list[i]->i_duration; h->i_disp_fields_last_frame = h->lookahead->next.list[i]->i_frame; } else if( lookahead_size == 0 ) { h->lookahead->next.list[i]->i_field_cnt = h->i_disp_fields; h->lookahead->next.list[i]->i_duration = h->i_prev_duration; } } if( h->param.rc.b_stat_read ) { //b_stat_read在2pass模式的第2遍才不为0 /* Use the frame types from the first pass */ for( int i = 0; i < h->lookahead->next.i_size; i++ ) h->lookahead->next.list[i]->i_type = x264_ratecontrol_slice_type( h, h->lookahead->next.list[i]->i_frame ); } else if( (h->param.i_bframe && h->param.i_bframe_adaptive) || h->param.i_scenecut_threshold || h->param.rc.b_mb_tree || (h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead) ) x264_slicetype_analyse( h, 0 );//分析帧的类型(I、B、P) //=========================================================================== for( bframes = 0, brefs = 0;; bframes++ ) { //从next队列取出1个 frm = h->lookahead->next.list[bframes]; //BREF的处理 if( frm->i_type == X264_TYPE_BREF && h->param.i_bframe_pyramid < X264_B_PYRAMID_NORMAL && brefs == h->param.i_bframe_pyramid ) { //BREF改成B frm->i_type = X264_TYPE_B; x264_log( h, X264_LOG_WARNING, "B-ref at frame %d incompatible with B-pyramid %s \n", frm->i_frame, x264_b_pyramid_names[h->param.i_bframe_pyramid] ); } /* pyramid with multiple B-refs needs a big enough dpb that the preceding P-frame stays available. smaller dpb could be supported by smart enough use of mmco, but it's easier just to forbid it. */ else if( frm->i_type == X264_TYPE_BREF && h->param.i_bframe_pyramid == X264_B_PYRAMID_NORMAL && brefs && h->param.i_frame_reference <= (brefs+3) ) { frm->i_type = X264_TYPE_B; x264_log( h, X264_LOG_WARNING, "B-ref at frame %d incompatible with B-pyramid %s and %d reference frames\n", frm->i_frame, x264_b_pyramid_names[h->param.i_bframe_pyramid], h->param.i_frame_reference ); } //Keyframe处理 if( frm->i_type == X264_TYPE_KEYFRAME ) frm->i_type = h->param.b_open_gop ? X264_TYPE_I : X264_TYPE_IDR; /* Limit GOP size */ if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_max ) { if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I ) frm->i_type = h->param.b_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR; int warn = frm->i_type != X264_TYPE_IDR; if( warn && h->param.b_open_gop ) warn &= frm->i_type != X264_TYPE_I; if( warn ) { x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame ); frm->i_type = h->param.b_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR; } } if( frm->i_type == X264_TYPE_I && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min ) { if( h->param.b_open_gop ) { h->lookahead->i_last_keyframe = frm->i_frame; // Use display order if( h->param.b_bluray_compat ) h->lookahead->i_last_keyframe -= bframes; // Use bluray order frm->b_keyframe = 1; } else frm->i_type = X264_TYPE_IDR; } if( frm->i_type == X264_TYPE_IDR ) { /* Close GOP */ //设置当前帧为“上一个关键帧” h->lookahead->i_last_keyframe = frm->i_frame; frm->b_keyframe = 1; if( bframes > 0 ) { bframes--; h->lookahead->next.list[bframes]->i_type = X264_TYPE_P; } } if( bframes == h->param.i_bframe || !h->lookahead->next.list[bframes+1] ) { if( IS_X264_TYPE_B( frm->i_type ) ) x264_log( h, X264_LOG_WARNING, "specified frame type is not compatible with max B-frames\n" ); if( frm->i_type == X264_TYPE_AUTO || IS_X264_TYPE_B( frm->i_type ) ) frm->i_type = X264_TYPE_P; } if( frm->i_type == X264_TYPE_BREF ) brefs++; if( frm->i_type == X264_TYPE_AUTO ) frm->i_type = X264_TYPE_B; else if( !IS_X264_TYPE_B( frm->i_type ) ) break; } if( bframes ) h->lookahead->next.list[bframes-1]->b_last_minigop_bframe = 1; h->lookahead->next.list[bframes]->i_bframes = bframes; /* insert a bref into the sequence */ if( h->param.i_bframe_pyramid && bframes > 1 && !brefs ) { h->lookahead->next.list[bframes/2]->i_type = X264_TYPE_BREF; brefs++; } /* calculate the frame costs ahead of time for x264_rc_analyse_slice while we still have lowres */ if( h->param.rc.i_rc_method != X264_RC_CQP ) { x264_mb_analysis_t a; int p0, p1, b; p1 = b = bframes + 1; x264_lowres_context_init( h, &a ); frames[0] = h->lookahead->last_nonb; memcpy( &frames[1], h->lookahead->next.list, (bframes+1) * sizeof(x264_frame_t*) ); if( IS_X264_TYPE_I( h->lookahead->next.list[bframes]->i_type ) ) p0 = bframes + 1; else // P p0 = 0; x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 ); if( (p0 != p1 || bframes) && h->param.rc.i_vbv_buffer_size ) { /* We need the intra costs for row SATDs. */ x264_slicetype_frame_cost( h, &a, frames, b, b, b, 0 ); /* We need B-frame costs for row SATDs. */ p0 = 0; for( b = 1; b <= bframes; b++ ) { if( frames[b]->i_type == X264_TYPE_B ) for( p1 = b; frames[p1]->i_type == X264_TYPE_B; ) p1++; else p1 = bframes + 1; x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 ); if( frames[b]->i_type == X264_TYPE_BREF ) p0 = b; } } } /* Analyse for weighted P frames */ if( !h->param.rc.b_stat_read && h->lookahead->next.list[bframes]->i_type == X264_TYPE_P && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE ) { x264_emms(); x264_weights_analyse( h, h->lookahead->next.list[bframes], h->lookahead->last_nonb, 0 ); } /* shift sequence to coded order. use a small temporary list to avoid shifting the entire next buffer around */ int i_coded = h->lookahead->next.list[0]->i_frame; if( bframes ) { int idx_list[] = { brefs+1, 1 }; for( int i = 0; i < bframes; i++ ) { int idx = idx_list[h->lookahead->next.list[i]->i_type == X264_TYPE_BREF]++; frames[idx] = h->lookahead->next.list[i]; frames[idx]->i_reordered_pts = h->lookahead->next.list[idx]->i_pts; } frames[0] = h->lookahead->next.list[bframes]; frames[0]->i_reordered_pts = h->lookahead->next.list[0]->i_pts; memcpy( h->lookahead->next.list, frames, (bframes+1) * sizeof(x264_frame_t*) ); } for( int i = 0; i <= bframes; i++ ) { h->lookahead->next.list[i]->i_coded = i_coded++; if( i ) { x264_calculate_durations( h, h->lookahead->next.list[i], h->lookahead->next.list[i-1], &h->i_cpb_delay, &h->i_coded_fields ); h->lookahead->next.list[0]->f_planned_cpb_duration[i-1] = (double)h->lookahead->next.list[i]->i_cpb_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; } else x264_calculate_durations( h, h->lookahead->next.list[i], NULL, &h->i_cpb_delay, &h->i_coded_fields ); } }
在这个函数里又调用了另一个重要函数:x264_slicetype_analyse(),用来分析帧的类型。
//分析帧的类型(I、B、P) void x264_slicetype_analyse( x264_t *h, int intra_minigop ) { x264_mb_analysis_t a; x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, }; int num_frames, orig_num_frames, keyint_limit, framecnt; int i_mb_count = NUM_MBS; int cost1p0, cost2p0, cost1b1, cost2p1; // 确定最大的搜索长度 // 在我的调试当中, h->lookahead->next.i_size = 4 int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX ); int vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead; /* For determinism we should limit the search to the number of frames lookahead has for sure * in h->lookahead->next.list buffer, except at the end of stream. * For normal calls with (intra_minigop == 0) that is h->lookahead->i_slicetype_length + 1 frames. * And for I-frame calls (intra_minigop != 0) we already removed intra_minigop frames from there. */ if( h->param.b_deterministic ) i_max_search = X264_MIN( i_max_search, h->lookahead->i_slicetype_length + 1 - intra_minigop ); int keyframe = !!intra_minigop; assert( h->frames.b_have_lowres ); if( !h->lookahead->last_nonb ) return; //frames[0]指向上一次的非B帧 frames[0] = h->lookahead->last_nonb; //frames[] 依次指向 lookahead->next链表中的帧 for( framecnt = 0; framecnt < i_max_search && h->lookahead->next.list[framecnt]->i_type == X264_TYPE_AUTO; framecnt++ ) frames[framecnt+1] = h->lookahead->next.list[framecnt]; x264_lowres_context_init( h, &a ); if( !framecnt ) { if( h->param.rc.b_mb_tree ) x264_macroblock_tree( h, &a, frames, 0, keyframe ); return; } keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_keyframe - 1; orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit ); /* This is important psy-wise: if we have a non-scenecut keyframe, * there will be significant visual artifacts if the frames just before * go down in quality due to being referenced less, despite it being * more RD-optimal. */ if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || vbv_lookahead ) num_frames = framecnt; else if( h->param.b_open_gop && num_frames < framecnt ) num_frames++; else if( num_frames == 0 ) { frames[1]->i_type = X264_TYPE_I; return; } int num_bframes = 0; int num_analysed_frames = num_frames; int reset_start; //通过scenecut()函数判断是否有场景切换,从而确定I帧 if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames, i_max_search ) ) { frames[1]->i_type = X264_TYPE_I; return; } #if HAVE_OPENCL x264_opencl_slicetype_prep( h, frames, num_frames, a.i_lambda ); #endif //允许有B帧的时候 if( h->param.i_bframe ) { if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS ) { if( num_frames > 1 ) { char best_paths[X264_BFRAME_MAX+1][X264_LOOKAHEAD_MAX+1] = {"","P"}; int best_path_index = num_frames % (X264_BFRAME_MAX+1); /* Perform the frametype analysis. */ for( int j = 2; j <= num_frames; j++ ) x264_slicetype_path( h, &a, frames, j, best_paths ); num_bframes = strspn( best_paths[best_path_index], "B" ); /* Load the results of the analysis into the frame types. */ for( int j = 1; j < num_frames; j++ ) frames[j]->i_type = best_paths[best_path_index][j-1] == 'B' ? X264_TYPE_B : X264_TYPE_P; } frames[num_frames]->i_type = X264_TYPE_P; } else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST ) { for( int i = 0; i <= num_frames-2; ) { //i+2作为P帧编码的代价 //注:i+2始终为P帧 cost2p1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+2, 1 ); if( frames[i+2]->i_intra_mbs[2] > i_mb_count / 2 ) { frames[i+1]->i_type = X264_TYPE_P; frames[i+2]->i_type = X264_TYPE_P; i += 2; continue; } #if HAVE_OPENCL if( h->param.b_opencl ) { int b_work_done = 0; b_work_done |= x264_opencl_precalculate_frame_cost(h, frames, a.i_lambda, i+0, i+2, i+1 ); b_work_done |= x264_opencl_precalculate_frame_cost(h, frames, a.i_lambda, i+0, i+1, i+1 ); b_work_done |= x264_opencl_precalculate_frame_cost(h, frames, a.i_lambda, i+1, i+2, i+2 ); if( b_work_done ) x264_opencl_flush( h ); } #endif //计算代价 //x264_slicetype_frame_cost(,,,p0,p1,b,) //p0 b p1 //p1!=b为B帧,否则为P帧 // i + 1 作为B帧编码的代价 cost1b1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+1, 0 ); // i + 1 作为P帧编码的代价 cost1p0 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+1, i+1, 0 ); // i + 2 作为P帧编码的代价 cost2p0 = x264_slicetype_frame_cost( h, &a, frames, i+1, i+2, i+2, 0 ); //如果i+1作为P帧编码的代价 + i+2作为P帧编码的代价 //小于 i+1作为B帧编码的代价 + i+2作为P帧编码的代价 if( cost1p0 + cost2p0 < cost1b1 + cost2p1 ) { //那么i+1将作为P帧编码 //然后直接continue frames[i+1]->i_type = X264_TYPE_P; i += 1; continue; } // arbitrary and untuned #define INTER_THRESH 300 #define P_SENS_BIAS (50 - h->param.i_bframe_bias) // i+1 将作为B帧编码 frames[i+1]->i_type = X264_TYPE_B; int j; for( j = i+2; j <= X264_MIN( i+h->param.i_bframe, num_frames-1 ); j++ ) { int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-i-1), INTER_THRESH/10); // 预测j+1作为P帧编码代价 int pcost = x264_slicetype_frame_cost( h, &a, frames, i+0, j+1, j+1, 1 ); // 如果pcost 满足下述条件, 则确定了一个P帧,跳出循环 if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j-i+1] > i_mb_count/3 ) break; // 否则就是B帧 frames[j]->i_type = X264_TYPE_B; } // 将j帧确定为P帧 frames[j]->i_type = X264_TYPE_P; i = j; } // 最后一帧确定为P帧 frames[num_frames]->i_type = X264_TYPE_P; num_bframes = 0; // 确定有多少个B帧 while( num_bframes < num_frames && frames[num_bframes+1]->i_type == X264_TYPE_B ) num_bframes++; } else { // 确定多少B帧 num_bframes = X264_MIN(num_frames-1, h->param.i_bframe); // 每num_bframes + 1一个P帧, 其余皆为B帧 for( int j = 1; j < num_frames; j++ ) frames[j]->i_type = (j%(num_bframes+1)) ? X264_TYPE_B : X264_TYPE_P; // 最后一帧为P帧 frames[num_frames]->i_type = X264_TYPE_P; } /* Check scenecut on the first minigop. */ // 如果B帧中, 有帧有场景切换, 则改变其为P帧 for( int j = 1; j < num_bframes+1; j++ ) if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1, 0, orig_num_frames, i_max_search ) ) { frames[j]->i_type = X264_TYPE_P; num_analysed_frames = j; break; } reset_start = keyframe ? 1 : X264_MIN( num_bframes+2, num_analysed_frames+1 ); } else { //h->param.i_bframe为 0 //则所有的帧皆为P帧 for( int j = 1; j <= num_frames; j++ ) frames[j]->i_type = X264_TYPE_P; reset_start = !keyframe + 1; num_bframes = 0; } /* Perform the actual macroblock tree analysis. * Don't go farther than the maximum keyframe interval; this helps in short GOPs. */ if( h->param.rc.b_mb_tree ) x264_macroblock_tree( h, &a, frames, X264_MIN(num_frames, h->param.i_keyint_max), keyframe ); /* Enforce keyframe limit. */ if( !h->param.b_intra_refresh ) for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max ) { //迫使为I帧 frames[i]->i_type = X264_TYPE_I; reset_start = X264_MIN( reset_start, i+1 ); if( h->param.b_open_gop && h->param.b_bluray_compat ) while( IS_X264_TYPE_B( frames[i-1]->i_type ) ) i--; } if( vbv_lookahead ) x264_vbv_lookahead( h, &a, frames, num_frames, keyframe ); /* Restore frametypes for all frames that haven't actually been decided yet. */ for( int j = reset_start; j <= num_frames; j++ ) frames[j]->i_type = X264_TYPE_AUTO; #if HAVE_OPENCL x264_opencl_slicetype_end( h ); #endif }
获取了帧的类型后,调用函数x264_slice_init()创建Slice Header。
//创建Slice Header static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp ) { /* ------------------------ Create slice header ----------------------- */ if( i_nal_type == NAL_SLICE_IDR ) { //I帧 //对x264_slice_header_t进行赋值 x264_slice_header_init( h, &h->sh, h->sps, h->pps, h->i_idr_pic_id, h->i_frame_num, i_global_qp ); /* alternate id */ if( h->param.i_avcintra_class ) { switch( h->i_idr_pic_id ) { case 5: h->i_idr_pic_id = 3; break; case 3: h->i_idr_pic_id = 4; break; case 4: default: h->i_idr_pic_id = 5; break; } } else h->i_idr_pic_id ^= 1; } else { //非IDR帧 x264_slice_header_init( h, &h->sh, h->sps, h->pps, -1, h->i_frame_num, i_global_qp ); //参考帧列表 h->sh.i_num_ref_idx_l0_active = h->i_ref[0] <= 0 ? 1 : h->i_ref[0]; h->sh.i_num_ref_idx_l1_active = h->i_ref[1] <= 0 ? 1 : h->i_ref[1]; if( h->sh.i_num_ref_idx_l0_active != h->pps->i_num_ref_idx_l0_default_active || (h->sh.i_type == SLICE_TYPE_B && h->sh.i_num_ref_idx_l1_active != h->pps->i_num_ref_idx_l1_default_active) ) { h->sh.b_num_ref_idx_override = 1; } } if( h->fenc->i_type == X264_TYPE_BREF && h->param.b_bluray_compat && h->sh.i_mmco_command_count ) { h->b_sh_backup = 1; h->sh_backup = h->sh; } h->fdec->i_frame_num = h->sh.i_frame_num; if( h->sps->i_poc_type == 0 ) { h->sh.i_poc = h->fdec->i_poc; if( PARAM_INTERLACED ) { h->sh.i_delta_poc_bottom = h->param.b_tff ? 1 : -1; h->sh.i_poc += h->sh.i_delta_poc_bottom == -1; } else h->sh.i_delta_poc_bottom = 0; h->fdec->i_delta_poc[0] = h->sh.i_delta_poc_bottom == -1; h->fdec->i_delta_poc[1] = h->sh.i_delta_poc_bottom == 1; } else { /* Nothing to do ? */ } //主要对mb结构体赋初值 x264_macroblock_slice_init( h ); }
做好一切准备后,接下来就是对视频图像的实际编码了。
2.编码
编码的主要函数是x264_slices_write():
//真正的编码——编码1个图像帧 //注意“slice”后面有一个“s” //它其中又调用了一个x264_slice_write() //这一点要区分开 static void *x264_slices_write( x264_t *h ) { int i_slice_num = 0; int last_thread_mb = h->sh.i_last_mb; /* init stats */ memset( &h->stat.frame, 0, sizeof(h->stat.frame) ); h->mb.b_reencode_mb = 0; //循环每一个slice(一幅图像可以由多个Slice构成) while( h->sh.i_first_mb + SLICE_MBAFF*h->mb.i_mb_stride <= last_thread_mb ) { h->sh.i_last_mb = last_thread_mb; if( !i_slice_num || !x264_frame_new_slice( h, h->fdec ) ) { if( h->param.i_slice_max_mbs ) { if( SLICE_MBAFF ) { // convert first to mbaff form, add slice-max-mbs, then convert back to normal form int last_mbaff = 2*(h->sh.i_first_mb % h->mb.i_mb_width) + h->mb.i_mb_width*(h->sh.i_first_mb / h->mb.i_mb_width) + h->param.i_slice_max_mbs - 1; int last_x = (last_mbaff % (2*h->mb.i_mb_width))/2; int last_y = (last_mbaff / (2*h->mb.i_mb_width))*2 + 1; h->sh.i_last_mb = last_x + h->mb.i_mb_stride*last_y; } else { h->sh.i_last_mb = h->sh.i_first_mb + h->param.i_slice_max_mbs - 1; if( h->sh.i_last_mb < last_thread_mb && last_thread_mb - h->sh.i_last_mb < h->param.i_slice_min_mbs ) h->sh.i_last_mb = last_thread_mb - h->param.i_slice_min_mbs; } i_slice_num++; } else if( h->param.i_slice_count && !h->param.b_sliced_threads ) { int height = h->mb.i_mb_height >> PARAM_INTERLACED; int width = h->mb.i_mb_width << PARAM_INTERLACED; i_slice_num++; h->sh.i_last_mb = (height * i_slice_num + h->param.i_slice_count/2) / h->param.i_slice_count * width - 1; } } h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb ); //真正的编码——编码1个Slice //x264_stack_align()应该是平台优化过程中内存对齐的工作 //实际上就是调用x264_slice_write() if( x264_stack_align( x264_slice_write, h ) ) goto fail; //注意这里对i_first_mb进行了赋值 h->sh.i_first_mb = h->sh.i_last_mb + 1; // if i_first_mb is not the last mb in a row then go to the next mb in MBAFF order if( SLICE_MBAFF && h->sh.i_first_mb % h->mb.i_mb_width ) h->sh.i_first_mb -= h->mb.i_mb_stride; } return (void *)0; fail: /* Tell other threads we're done, so they wouldn't wait for it */ if( h->param.b_sliced_threads ) x264_threadslice_cond_broadcast( h, 2 ); return (void *)-1; }
其内部调用了函数x264_slice_write(),x264_slices_write()的单位是帧,而x264_slice_write()的单位是Slice,这两个是不一样的,一帧可以有多个Slice,一帧也可以只有一个Slice。
x264_slice_write():
static intptr_t x264_slice_write( x264_t *h ) { int i_skip; //宏块的序号,以及序号对应的x,y坐标 int mb_xy, i_mb_x, i_mb_y; /* NALUs other than the first use a 3-byte startcode. * Add one extra byte for the rbsp, and one more for the final CABAC putbyte. * Then add an extra 5 bytes just in case, to account for random NAL escapes and * other inaccuracies. */ int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5; int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0; int back_up_bitstream_cavlc = !h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH; int back_up_bitstream = slice_max_size || back_up_bitstream_cavlc; int starting_bits = bs_pos(&h->out.bs); int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1; int b_hpel = h->fdec->b_kept_as_ref; int orig_last_mb = h->sh.i_last_mb; int thread_last_mb = h->i_threadslice_end * h->mb.i_mb_width - 1; uint8_t *last_emu_check; #define BS_BAK_SLICE_MAX_SIZE 0 #define BS_BAK_CAVLC_OVERFLOW 1 #define BS_BAK_SLICE_MIN_MBS 2 #define BS_BAK_ROW_VBV 3 x264_bs_bak_t bs_bak[4]; b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv; bs_realign( &h->out.bs ); /* Slice */ //开始输出一个NAL //后面对应着x264_nal_end() x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc ); h->out.nal[h->out.i_nal].i_first_mb = h->sh.i_first_mb; /* Slice header */ //存储宏块像素的缓存fdec_buf和fenc_buf的初始化 //宏块编码缓存p_fenc[0],p_fenc[1],p_fenc[2] //宏块重建缓存p_fdec[0],p_fdec[1],p_fdec[2] //[0]存Y,[1]存U,[2]存V x264_macroblock_thread_init( h ); /* Set the QP equal to the first QP in the slice for more accurate CABAC initialization. */ h->mb.i_mb_xy = h->sh.i_first_mb; h->sh.i_qp = x264_ratecontrol_mb_qp( h ); h->sh.i_qp = SPEC_QP( h->sh.i_qp ); h->sh.i_qp_delta = h->sh.i_qp - h->pps->i_pic_init_qp; //输出 slice header x264_slice_header_write( &h->out.bs, &h->sh, h->i_nal_ref_idc ); //如果使用CABAC,需要初始化 if( h->param.b_cabac ) { /* alignment needed */ bs_align_1( &h->out.bs ); /* init cabac */ x264_cabac_context_init( h, &h->cabac, h->sh.i_type, x264_clip3( h->sh.i_qp-QP_BD_OFFSET, 0, 51 ), h->sh.i_cabac_init_idc ); x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end ); last_emu_check = h->cabac.p; } else last_emu_check = h->out.bs.p; h->mb.i_last_qp = h->sh.i_qp; h->mb.i_last_dqp = 0; h->mb.field_decoding_flag = 0; //宏块位置-纵坐标(初始值) i_mb_y = h->sh.i_first_mb / h->mb.i_mb_width; //宏块位置-横坐标(初始值) i_mb_x = h->sh.i_first_mb % h->mb.i_mb_width; i_skip = 0; //一个大循环 //对一个slice中每个宏块进行编码 while( 1 ) { //宏块序号。由i_mb_x和i_mb_y计算而来。 mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width; int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac); //一行的开始 if( i_mb_x == 0 ) { if( x264_bitstream_check_buffer( h ) ) return -1; if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size ) x264_bitstream_backup( h, &bs_bak[BS_BAK_ROW_VBV], i_skip, 1 ); //去块效应滤波、半像素插值、SSIM/PSNR计算等 //一次处理一行宏块 if( !h->mb.b_reencode_mb ) x264_fdec_filter_row( h, i_mb_y, 0 ); } if( back_up_bitstream ) { if( back_up_bitstream_cavlc ) x264_bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 ); if( slice_max_size && !(i_mb_y & SLICE_MBAFF) ) { x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 ); if( (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs ) x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 ); } } if( PARAM_INTERLACED ) { if( h->mb.b_adaptive_mbaff ) { if( !(i_mb_y&1) ) { /* FIXME: VSAD is fast but fairly poor at choosing the best interlace type. */ h->mb.b_interlaced = x264_field_vsad( h, i_mb_x, i_mb_y ); memcpy( &h->zigzagf, MB_INTERLACED ? &h->zigzagf_interlaced : &h->zigzagf_progressive, sizeof(h->zigzagf) ); if( !MB_INTERLACED && (i_mb_y+2) == h->mb.i_mb_height ) x264_expand_border_mbpair( h, i_mb_x, i_mb_y ); } } h->mb.field[mb_xy] = MB_INTERLACED; } /* load cache */ //将要编码的宏块的周围的宏块的值读进来 //主要是上面、左边块的值 if( SLICE_MBAFF ) x264_macroblock_cache_load_interlaced( h, i_mb_x, i_mb_y ); else x264_macroblock_cache_load_progressive( h, i_mb_x, i_mb_y ); //分析-帧内预测模式选择、帧间运动估计等 x264_macroblock_analyse( h ); /* encode this macroblock -> be careful it can change the mb type to P_SKIP if needed */ reencode: //编码-残差DCT变换、量化 x264_macroblock_encode( h ); //输出CABAC if( h->param.b_cabac ) { if( mb_xy > h->sh.i_first_mb && !(SLICE_MBAFF && (i_mb_y&1)) ) x264_cabac_encode_terminal( &h->cabac ); if( IS_SKIP( h->mb.i_type ) ) x264_cabac_mb_skip( h, 1 ); else { if( h->sh.i_type != SLICE_TYPE_I ) x264_cabac_mb_skip( h, 0 ); //输出 x264_macroblock_write_cabac( h, &h->cabac ); } } else { //输出CAVLC if( IS_SKIP( h->mb.i_type ) ) i_skip++; else { if( h->sh.i_type != SLICE_TYPE_I ) { bs_write_ue( &h->out.bs, i_skip ); /* skip run */ i_skip = 0; } //输出 x264_macroblock_write_cavlc( h ); /* If there was a CAVLC level code overflow, try again at a higher QP. */ if( h->mb.b_overflow ) { h->mb.i_chroma_qp = h->chroma_qp_table[++h->mb.i_qp]; h->mb.i_skip_intra = 0; h->mb.b_skip_mc = 0; h->mb.b_overflow = 0; x264_bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 ); goto reencode; } } } int total_bits = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac); int mb_size = total_bits - mb_spos; if( slice_max_size && (!SLICE_MBAFF || (i_mb_y&1)) ) { /* Count the skip run, just in case. */ if( !h->param.b_cabac ) total_bits += bs_size_ue_big( i_skip ); /* Check for escape bytes. */ uint8_t *end = h->param.b_cabac ? h->cabac.p : h->out.bs.p; for( ; last_emu_check < end - 2; last_emu_check++ ) if( last_emu_check[0] == 0 && last_emu_check[1] == 0 && last_emu_check[2] <= 3 ) { slice_max_size -= 8; last_emu_check++; } /* We'll just re-encode this last macroblock if we go over the max slice size. */ if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb ) { if( !x264_frame_new_slice( h, h->fdec ) ) { /* Handle the most obnoxious slice-min-mbs edge case: we need to end the slice * because it's gone over the maximum size, but doing so would violate slice-min-mbs. * If possible, roll back to the last checkpoint and try again. * We could try raising QP, but that would break in the case where a slice spans multiple * rows, which the re-encoding infrastructure can't currently handle. */ if( mb_xy <= thread_last_mb && (thread_last_mb+1-mb_xy) < h->param.i_slice_min_mbs ) { if( thread_last_mb-h->param.i_slice_min_mbs < h->sh.i_first_mb+h->param.i_slice_min_mbs ) { x264_log( h, X264_LOG_WARNING, "slice-max-size violated (frame %d, cause: slice-min-mbs)\n", h->i_frame ); slice_max_size = 0; goto cont; } x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], &i_skip, 0 ); h->mb.b_reencode_mb = 1; h->sh.i_last_mb = thread_last_mb-h->param.i_slice_min_mbs; break; } if( mb_xy-SLICE_MBAFF*h->mb.i_mb_stride != h->sh.i_first_mb ) { x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 ); h->mb.b_reencode_mb = 1; if( SLICE_MBAFF ) { // set to bottom of previous mbpair if( i_mb_x ) h->sh.i_last_mb = mb_xy-1+h->mb.i_mb_stride*(!(i_mb_y&1)); else h->sh.i_last_mb = (i_mb_y-2+!(i_mb_y&1))*h->mb.i_mb_stride + h->mb.i_mb_width - 1; } else h->sh.i_last_mb = mb_xy-1; break; } else h->sh.i_last_mb = mb_xy; } else slice_max_size = 0; } } cont: h->mb.b_reencode_mb = 0; /* save cache */ //保存当前宏块的的值,用于以后的宏块的编码 //包括Intra4x4宏块帧内预测模式,DCT非零系数,运动矢量,参考帧序号等等 x264_macroblock_cache_save( h ); //码率控制 if( x264_ratecontrol_mb( h, mb_size ) < 0 ) { x264_bitstream_restore( h, &bs_bak[BS_BAK_ROW_VBV], &i_skip, 1 ); h->mb.b_reencode_mb = 1; i_mb_x = 0; i_mb_y = i_mb_y - SLICE_MBAFF; h->mb.i_mb_prev_xy = i_mb_y * h->mb.i_mb_stride - 1; h->sh.i_last_mb = orig_last_mb; continue; } /* accumulate mb stats */ //后面很大一段代码都是对stat结构体中的统计信息进行赋值================================ h->stat.frame.i_mb_count[h->mb.i_type]++; int b_intra = IS_INTRA( h->mb.i_type ); int b_skip = IS_SKIP( h->mb.i_type ); if( h->param.i_log_level >= X264_LOG_INFO || h->param.rc.b_stat_write ) { if( !b_intra && !b_skip && !IS_DIRECT( h->mb.i_type ) ) { if( h->mb.i_partition != D_8x8 ) h->stat.frame.i_mb_partition[h->mb.i_partition] += 4; else for( int i = 0; i < 4; i++ ) h->stat.frame.i_mb_partition[h->mb.i_sub_partition[i]] ++; if( h->param.i_frame_reference > 1 ) for( int i_list = 0; i_list <= (h->sh.i_type == SLICE_TYPE_B); i_list++ ) for( int i = 0; i < 4; i++ ) { int i_ref = h->mb.cache.ref[i_list][ x264_scan8[4*i] ]; if( i_ref >= 0 ) h->stat.frame.i_mb_count_ref[i_list][i_ref] ++; } } } if( h->param.i_log_level >= X264_LOG_INFO ) { if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma ) { if( CHROMA444 ) { for( int i = 0; i < 4; i++ ) if( h->mb.i_cbp_luma & (1 << i) ) for( int p = 0; p < 3; p++ ) { int s8 = i*4+p*16; int nnz8x8 = M16( &h->mb.cache.non_zero_count[x264_scan8[s8]+0] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[s8]+8] ); h->stat.frame.i_mb_cbp[!b_intra + p*2] += !!nnz8x8; } } else { int cbpsum = (h->mb.i_cbp_luma&1) + ((h->mb.i_cbp_luma>>1)&1) + ((h->mb.i_cbp_luma>>2)&1) + (h->mb.i_cbp_luma>>3); h->stat.frame.i_mb_cbp[!b_intra + 0] += cbpsum; h->stat.frame.i_mb_cbp[!b_intra + 2] += !!h->mb.i_cbp_chroma; h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma >> 1; } } if( h->mb.i_cbp_luma && !b_intra ) { h->stat.frame.i_mb_count_8x8dct[0] ++; h->stat.frame.i_mb_count_8x8dct[1] += h->mb.b_transform_8x8; } if( b_intra && h->mb.i_type != I_PCM ) { if( h->mb.i_type == I_16x16 ) h->stat.frame.i_mb_pred_mode[0][h->mb.i_intra16x16_pred_mode]++; else if( h->mb.i_type == I_8x8 ) for( int i = 0; i < 16; i += 4 ) h->stat.frame.i_mb_pred_mode[1][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++; else //if( h->mb.i_type == I_4x4 ) for( int i = 0; i < 16; i++ ) h->stat.frame.i_mb_pred_mode[2][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++; h->stat.frame.i_mb_pred_mode[3][x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode]]++; } h->stat.frame.i_mb_field[b_intra?0:b_skip?2:1] += MB_INTERLACED; } //=========================================================== /* calculate deblock strength values (actual deblocking is done per-row along with hpel) */ //计算去块效应滤波器强度Bs //这里没有滤波 if( b_deblock ) x264_macroblock_deblock_strength( h ); //如果处理完最后一个宏块,就跳出大循环 if( mb_xy == h->sh.i_last_mb ) break; if( SLICE_MBAFF ) { i_mb_x += i_mb_y & 1; i_mb_y ^= i_mb_x < h->mb.i_mb_width; } else i_mb_x++;//宏块序号x加1 //处理完一行宏块 if( i_mb_x == h->mb.i_mb_width ) { //该处理下一行了 i_mb_y++;//宏块序号y加1 i_mb_x = 0;//宏块序号x设置为0 } } if( h->sh.i_last_mb < h->sh.i_first_mb ) return 0; h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb; //熵编码的收尾工作 if( h->param.b_cabac ) { x264_cabac_encode_flush( h, &h->cabac ); h->out.bs.p = h->cabac.p; } else { if( i_skip > 0 ) bs_write_ue( &h->out.bs, i_skip ); /* last skip run */ /* rbsp_slice_trailing_bits */ bs_rbsp_trailing( &h->out.bs ); bs_flush( &h->out.bs ); } //结束输出一个NAL //前面对应着x264_nal_start() if( x264_nal_end( h ) ) return -1; //多线程并行处理? if( h->sh.i_last_mb == (h->i_threadslice_end * h->mb.i_mb_width - 1) ) { h->stat.frame.i_misc_bits = bs_pos( &h->out.bs ) + (h->out.i_nal*NALU_OVERHEAD * 8) - h->stat.frame.i_tex_bits - h->stat.frame.i_mv_bits; x264_fdec_filter_row( h, h->i_threadslice_end, 0 ); if( h->param.b_sliced_threads ) { /* Tell the main thread we're done. */ x264_threadslice_cond_broadcast( h, 1 ); /* Do hpel now */ for( int mb_y = h->i_threadslice_start; mb_y <= h->i_threadslice_end; mb_y++ ) x264_fdec_filter_row( h, mb_y, 1 ); x264_threadslice_cond_broadcast( h, 2 ); /* Do the first row of hpel, now that the previous slice is done */ if( h->i_thread_idx > 0 ) { x264_threadslice_cond_wait( h->thread[h->i_thread_idx-1], 2 ); x264_fdec_filter_row( h, h->i_threadslice_start + (1 << SLICE_MBAFF), 2 ); } } /* Free mb info after the last thread's done using it */ if( h->fdec->mb_info_free && (!h->param.b_sliced_threads || h->i_thread_idx == (h->param.i_threads-1)) ) { h->fdec->mb_info_free( h->fdec->mb_info ); h->fdec->mb_info = NULL; h->fdec->mb_info_free = NULL; } } return 0; }
x264_slice_write()函数的具体流程:
首先,调用x264_nal_start()开始输出一个NALU;
接着,x264_macroblock_thread_init()函数初始化宏块重建像素缓存fdec_buf[]和编码像素缓存fenc_buf[]。
接着调用x264_slice_header_write()输出Slice Header。
进入一个循环,该循环每执行一次编码一个宏块。
(a)调用x264_fdec_filter_row()执行滤波模块;
(b)调用x264_macroblock_cache_load_progressive()将要编码的宏块的周围宏块的信息读进来。
(c)调用x264_macroblock_analyse()执行分析模块。
(d)调用x264_macroblock_encode()执行宏块编码信息。
(e)调用x264_macroblock_write_cabac()/x264_macroblock_write_cavlc()执行熵编码模块。
(f)调用x264_macroblock_cache_save()保存当前宏块的信息。
(g)调用x264_ratecontrol_mb()执行码率控制。
(h)准备处理下一个宏块。
最后调用x264_nal_end()结束输出一个NALU。