一、背景
最近搞Intel平台的多路复用视频解码,发现某些视频会报解码不兼容参数的报错,分析MFX的源码(如下图所示),解码实例通过对比新旧的SPS部分参数来判断是否是相同的视频,当前应用只判断了分辨率,所以当遇到sps_max_dec_pic_buffering数量不一致的码流时,就会导致解码器报错。 因此应用业务上需要做SPS解析,对比关键参数以对解码器做适当的复位处理。
二、SPS语法解析
SPS(sequence parameter set)图像序列参数集描述了视频序列级别的编码参数,主要作用有如下几点:
1、描述视频序列的宽度、高度、帧率、颜色空间等基本信息,帮助解码器正确解码视频序列;
2、定义帧内预测模式和帧间模式的参数,包括用于帧间参考的帧间跨度、缩放列表和运动向量参考等参数;
3、指示编码器如何将编码数据划分为CTU(Coding Tree Unit,编码树单元),以及如何对CTU进行编码;
下图为profile_tier_level语义:
如上图所示,为所需参数在SPS中的位置,以及参数的语法。在表中可以看到第二列,每个参数后面都跟着read_bits(n)的描述信息,该信息指定了对应参数的解析方法。read_bits(n)从比特流中读取接下来的n个比特,并且将比特流指针向前移动n个比特。当n=0时,read_bits(n)的返回值为0,并且不移动比特流指针。具体含义如下:
1、ae(v):上下文自适应算术熵编码语法语法元素。v表示所占比特是可变的,具体可由算法解析。
2、b(8): 任意形式的8比特字节。该描述符的解析过程通过函数read_bit(8)的返回值来规定。
3、f(n): n位固定模式比特串(由左至右),左位在先,该描述符的解析过程通过函数read_bits(n)的返回值来规定。
4、i(n): 使用n比特的有符号整数。如果n=“v”,所占比特数由其他语法元素来确定。解析过程由函数read_bits(n)的返回值规定,该返回值用最高有效位在前的2的补码表示。
5、se(v): 有符号整数0阶指数编码的语法元素,左位在前。
6、st(v): 以null结尾的字符串,编码为ISO/IEC 10646中规定的通用编码字符集(UCS)传输格式-8(UTF-8)字符。解析过程如下所示:st(v)从比特流中的字节对齐位置开始,从比特流读取并返回一系列字节,从当前位置开始,一直到但不包括等于0x00的下一个字节对齐字节,并将比特流指针前进(stringLength+1)*8个比特位置,其中,stringLength等于返回的字节数。
7、u(n): n位无符号整数。在语法表中,如果n=“v”,所占比特数由其他语法元素来确定。解析过程由函数read_bits(n)的返回值规定,该返回值用最高有效位在前的二进制表示。
8、ue(v): 无符号值整数0阶指数哥伦布编码的语法元素,左位在先。
从SPS语法表中可以看到,SPS元素只使用了u(n)和ue(v)编码方式。因此,根据表格所示,具体的语法解析代码如下:
1 #include <stdint.h> 2 #include <stdio.h> 3 #include <stdlib.h> 4 5 static int held_bits_num; 6 static int held_bits; 7 8 9 int read_bits(uint8_t **pp, int bitnum) 10 { 11 uint8_t *pByte = *pp; 12 int code = 0; 13 int aligend = 0; 14 int next_held_bits_num; 15 16 if (bitnum < held_bits_num) //bitnum小于前一次读取剩余bit,直接返回 17 { 18 code = held_bits >> (held_bits_num - bitnum); 19 code &= ~(0xff << bitnum); 20 held_bits_num -= bitnum; 21 return code; 22 } 23 24 //bitnum大于前一次读取剩余bit,先读取剩余,不足bit再取新Byte 25 bitnum -= held_bits_num; 26 code = held_bits & ~(0xff << held_bits_num); 27 code <<= bitnum; 28 29 switch((bitnum-1)>>3) 30 {//利用switch读取1~4字节数据 31 case 3: aligend = (*pByte++) << 24; 32 case 2: aligend |= (*pByte++) << 16; 33 case 1: aligend |= (*pByte++) << 8; 34 case 0: aligend |= (*pByte++); 35 } 36 next_held_bits_num = (32-bitnum) % 8; 37 code |= aligend >> next_held_bits_num; 38 39 held_bits_num = next_held_bits_num; //存储剩余可读bits 40 held_bits = aligend; 41 42 *pp = pByte; //更新sps地址 43 44 return code; 45 } 46 47 int read_bits_ue(uint8_t **pp) 48 { 49 int ZeroBits = -1; 50 int code; 51 int bitval; 52 for (bitval = 0; !bitval; ZeroBits++) 53 bitval = read_bits(pp, 1); 54 code = (1 << ZeroBits)-1 + read_bits(pp, ZeroBits); 55 56 return code; 57 } 58 59 int hevc_analysis_sps(uint8_t *pSps) 60 { 61 const uint8_t profilePresentFlag = 1; 62 uint8_t **p = &pSps; 63 int32_t sps_max_sub_layers; 64 int32_t general_profile_idc; 65 uint8_t general_profile_compatibility_flag[32]; 66 uint8_t general_level_idc; 67 uint8_t sub_layer_profile_present_flag[8]; 68 uint8_t sub_layer_profile_idc[8]; 69 uint8_t sub_layer_level_present_flag[8]; 70 uint8_t sub_layer_profile_compatibility_flag[8][32]; 71 int32_t chroma_format_idc; 72 int32_t pic_width_in_luma_samples, pic_height_in_luma_samples; 73 int32_t bit_depth_luma_minus8, bit_depth_chroma_minus8; 74 uint8_t sps_sub_layer_ordering_info_present_flag; 75 int32_t sps_max_dec_pic_buffering_minus1[7]; 76 77 read_bits(p, 4); //sps_video_parameter_set_id 78 sps_max_sub_layers = read_bits(p, 3); //sps_max_sub_layers_minus1 79 read_bits(p, 1); //sps_temporal_id_nesting_flag 80 81 //profile_tier_level start 82 if (profilePresentFlag) 83 { 84 read_bits(p, 2); //general_profile_space 85 read_bits(p, 1); //general_tier_flag 86 general_profile_idc = read_bits(p, 5); //general_profile_idc 87 for (int j=0; j<32; j++) 88 { 89 general_profile_compatibility_flag[j] = read_bits(p, 1); 90 } 91 read_bits(p, 1); //general_progressive_source_flag 92 read_bits(p, 1); //general_interlaced_source_falg 93 read_bits(p, 1); //general_non_packed_constraint_flag 94 read_bits(p, 1); //general_frame_only_constraint_falg 95 if (general_profile_idc == 4 || general_profile_compatibility_flag[4] || 96 general_profile_idc == 5 || general_profile_compatibility_flag[5] || 97 general_profile_idc == 6 || general_profile_compatibility_flag[6] || 98 general_profile_idc == 7 || general_profile_compatibility_flag[7] || 99 general_profile_idc == 8 || general_profile_compatibility_flag[8] || 100 general_profile_idc == 9 || general_profile_compatibility_flag[9] || 101 general_profile_idc == 10 || general_profile_compatibility_flag[10] || 102 general_profile_idc == 11 || general_profile_compatibility_flag[11]) 103 { 104 read_bits(p, 1); //general_max_12bit_constraint_flag 105 read_bits(p, 1); //general_max_10bit_constraint_falg 106 read_bits(p, 1); //general_max_8bit_constraint_falg 107 read_bits(p, 1); //general_max_422chroma_constraint_flag 108 read_bits(p, 1); //general_max_420chroma_constraint_flag 109 read_bits(p, 1); //general_max_monochrome_constraint_flag 110 read_bits(p, 1); //general_intra_constraint_flag 111 read_bits(p, 1); //general_one_picture_only_constraint_flag 112 read_bits(p, 1); //general_lower_bit_rate_constraint_flag 113 if (general_profile_idc == 5 || general_profile_compatibility_flag[5] || 114 general_profile_idc == 9 || general_profile_compatibility_flag[9] || 115 general_profile_idc == 10 || general_profile_compatibility_flag[10] || 116 general_profile_idc == 11 || general_profile_compatibility_flag[11]) 117 { 118 read_bits(p, 1); //general_max_14bit_constraint_flag 119 read_bits(p, 33); //general_reserved_zero_33bits 120 }else{ 121 read_bits(p, 34); //general_reserved_zero_34bits 122 } 123 } 124 else if(general_profile_idc == 2 || general_profile_compatibility_flag[2]) 125 { 126 read_bits(p, 7); //general_reserved_zero_7bits 127 read_bits(p, 1); //general_one_picture_only_constraint_flag 128 read_bits(p, 35); //general_reserved_zero_34bits 129 } 130 else 131 read_bits(p, 43); //general_reserved_zero_43bits 132 133 if (general_profile_idc == 1 || general_profile_compatibility_flag[1] || 134 general_profile_idc == 2 || general_profile_compatibility_flag[2] || 135 general_profile_idc == 3 || general_profile_compatibility_flag[3] || 136 general_profile_idc == 4 || general_profile_compatibility_flag[4] || 137 general_profile_idc == 5 || general_profile_compatibility_flag[5] || 138 general_profile_idc == 9 || general_profile_compatibility_flag[9] || 139 general_profile_idc == 11 || general_profile_compatibility_flag[11]) 140 { 141 read_bits(p, 1); //general_inbld_flag 142 } 143 else 144 read_bits(p, 1); //general_reserved_zero_bit 145 } 146 general_level_idc = read_bits(p, 8); 147 for(int i=0; i<sps_max_sub_layers; i++) 148 { 149 read_bits(p, 1); //sub_layer_profile_present_flag[i] 150 read_bits(p, 1); //sub_layer_level_present_flag[i] 151 } 152 if (sps_max_sub_layers > 0) 153 { 154 for(int i=sps_max_sub_layers; i<8; i++) 155 read_bits(p, 2); //reserved_zero_2bits[i] 156 } 157 for(int i=0; i<sps_max_sub_layers; i++) 158 { 159 if (sub_layer_profile_present_flag[i]) 160 { 161 read_bits(p, 2); //sub_layer_profile_space[i] 162 read_bits(p, 1); //sub_layer_tier_flag[i] 163 sub_layer_profile_idc[i] = read_bits(p, 5); //sub_layer_profile_idc[i] 164 for(int j=0; j<32; j++) 165 sub_layer_profile_compatibility_flag[i][j] = read_bits(p, 1); 166 read_bits(p, 1); //sub_layer_progressive_source_flag[i] 167 read_bits(p, 1); //sub_layer_interlaced_source_flag[i] 168 read_bits(p, 1); //sub_layer_non_packed_constraint_flag[i] 169 read_bits(p, 1); //sub_layer_frame_only_constraint_flag[i] 170 if (sub_layer_profile_idc[i] == 4 || sub_layer_profile_compatibility_flag[i][4] || 171 sub_layer_profile_idc[i] == 5 || sub_layer_profile_compatibility_flag[i][5] || 172 sub_layer_profile_idc[i] == 6 || sub_layer_profile_compatibility_flag[i][6] || 173 sub_layer_profile_idc[i] == 7 || sub_layer_profile_compatibility_flag[i][7] || 174 sub_layer_profile_idc[i] == 8 || sub_layer_profile_compatibility_flag[i][8] || 175 sub_layer_profile_idc[i] == 9 || sub_layer_profile_compatibility_flag[i][9] || 176 sub_layer_profile_idc[i] == 10 || sub_layer_profile_compatibility_flag[i][10] || 177 sub_layer_profile_idc[i] == 11 || sub_layer_profile_compatibility_flag[i][11]) 178 { 179 read_bits(p, 1); //sub_layer_max_422chrome_constraint_flag[i] 180 read_bits(p, 1); //sub_layer_max_420chrome_constraint_flag[i] 181 read_bits(p, 1); //sub_layer_max_monochrome_constraint_flag[i] 182 read_bits(p, 1); //sub_layer_intra_constraint_flag[i] 183 read_bits(p, 1); //sub_layer_one_picture_only_constraint_flag[i] 184 read_bits(p, 1); //sub_layer_lower_bit_rate_constraint_flag[i] 185 if (sub_layer_profile_idc[i] == 5 || sub_layer_profile_compatibility_flag[i][5] || 186 sub_layer_profile_idc[i] == 9 || sub_layer_profile_compatibility_flag[i][9] || 187 sub_layer_profile_idc[i] == 10 || sub_layer_profile_compatibility_flag[i][10] || 188 sub_layer_profile_idc[i] == 11 || sub_layer_profile_compatibility_flag[i][11]) 189 { 190 read_bits(p, 1); //sub_layer_max_14bit_constraint_flag[i] 191 read_bits(p, 33); //sub_layer_reserved_zero_33bits[i] 192 } 193 else 194 read_bits(p, 34); //sub_layer_reserved_zero_34bits[i] 195 } 196 else if (sub_layer_profile_idc[i] == 2 || sub_layer_profile_compatibility_flag[i][2]) 197 { 198 read_bits(p, 7); //sub_layer_reserved_zero_7bits[i] 199 read_bits(p, 1); //sub_layer_one_picture_only_constraint_flag[i] 200 read_bits(p, 35); //sub_layer_reserved_zero_35bits[i] 201 } 202 else 203 read_bits(p, 43); //sub_layer_reserved_zero_43bits[i] 204 205 if (sub_layer_profile_idc[i] == 1 || sub_layer_profile_compatibility_flag[i][1] || 206 sub_layer_profile_idc[i] == 2 || sub_layer_profile_compatibility_flag[i][2] || 207 sub_layer_profile_idc[i] == 3 || sub_layer_profile_compatibility_flag[i][3] || 208 sub_layer_profile_idc[i] == 4 || sub_layer_profile_compatibility_flag[i][4] || 209 sub_layer_profile_idc[i] == 5 || sub_layer_profile_compatibility_flag[i][5] || 210 sub_layer_profile_idc[i] == 5 || sub_layer_profile_compatibility_flag[i][9] || 211 sub_layer_profile_idc[i] == 5 || sub_layer_profile_compatibility_flag[i][11]) 212 { 213 read_bits(p, 1); //sub_layer_inbld_flag[i] 214 }else 215 read_bits(p, 1); //sub_layer_reserved_zero_bits[i] 216 } 217 if (sub_layer_level_present_flag[i]) 218 read_bits(p, 8); //sub_layer_level_idc[i] 219 } 220 //profile_tier_level_end 221 222 read_bits_ue(p); //sps_seq_parameter_set_id 223 chroma_format_idc = read_bits_ue(p); //chroma_format_idc 224 if (chroma_format_idc == 3) 225 read_bits(p, 1); //separate_colour_plane_flag; 226 pic_width_in_luma_samples = read_bits_ue(p); 227 pic_height_in_luma_samples = read_bits_ue(p); 228 if (read_bits(p, 1)) //conformance_window_flag 229 { 230 read_bits_ue(p); //conf_win_left_offset 231 read_bits_ue(p); //conf_win_right_offset 232 read_bits_ue(p); //conf_win_top_offset 233 read_bits_ue(p); //conf_win_bottom_offset 234 } 235 bit_depth_luma_minus8 = read_bits_ue(p); 236 bit_depth_chroma_minus8 = read_bits_ue(p); 237 read_bits_ue(p); //log2_max_pic_order_cnt_lsb_minus4 238 sps_sub_layer_ordering_info_present_flag = read_bits(p, 1); 239 for (int i=(sps_sub_layer_ordering_info_present_flag?0:sps_max_sub_layers); i <= sps_max_sub_layers; i++) 240 { 241 sps_max_dec_pic_buffering_minus1[i] = read_bits_ue(p); 242 read_bits_ue(p); //sps_max_num_reorder_pics[i] 243 read_bits_ue(p); //sps_max_latency_increase_plus1[i] 244 } 245 246 printf("general_profile_idc:%d\n", general_profile_idc); 247 printf("general_level_idc:%d\n", general_level_idc); 248 printf("chroma_format_idc:%d\n", chroma_format_idc); 249 printf("pic_width_in_luma_samples:%d\n", pic_width_in_luma_samples); 250 printf("pic_height_in_luma_samples:%d\n", pic_height_in_luma_samples); 251 printf("bit_depth_luma_minus8:%d\n", bit_depth_luma_minus8); 252 printf("bit_depth_chroma_minus8:%d\n", bit_depth_chroma_minus8); 253 printf("sps_max_dec_pic_buffering_minus1[0]:%d\n", sps_max_dec_pic_buffering_minus1[0]); 254 255 return 0; 256 } 257 258 int hevc_filter_emulation_byte(uint8_t *p, int nalulen) 259 { 260 int delimitercnt = 0; 261 for (int i=0, j=0; i<nalulen; i++) 262 { 263 if (delimitercnt == 2 && p[i] < 0x03) //NALU内不能出现分隔符 264 return -1; 265 if (delimitercnt == 2 && p[i] == 0x03) //找到emulation_prevention_three_byte 266 { 267 if (i==nalulen-1) 268 return 0; 269 if (p[i+1] > 0x03) //语法非法,返回失败 270 return -1; 271 i++; //跳过emulation_prevention_three_byte 272 delimitercnt = 0; 273 } 274 p[j++] = p[i]; 275 if (p[i] == 0x00) 276 delimitercnt++; 277 else 278 delimitercnt = 0; 279 } 280 } 281 282 #define READ_SIZE (1024*1024) 283 int main(int argc, char *argv[]) 284 { 285 int offset_start, offset_end; 286 FILE *fp = fopen(argv[1], "r"); 287 uint8_t *pSps = malloc(READ_SIZE); 288 fread(pSps, 1, READ_SIZE, fp); 289 fclose(fp); 290 for(int i=0; i<READ_SIZE; i++) 291 { 292 if (pSps[i] == 0x00 && pSps[i+1] == 0x00 && pSps[i+2] == 0x01 && 293 ((pSps[i+3]>>1)&0x3f)==33) 294 { 295 offset_start = i + 5; 296 break; 297 } 298 } 299 for(int i=offset_start; i<READ_SIZE; i++) 300 { 301 if (pSps[i] == 0x00 && pSps[i+1] == 0x00 && pSps[i+2] == 0x01) 302 { 303 offset_end = i; 304 break; 305 } 306 } 307 hevc_filter_emulation_byte(pSps+offset_start, offset_end-offset_start); 308 hevc_analysis_sps(pSps+offset_start); 309 310 return 0; 311 } 312 313 314 315 316
运行结果如下:
工具解析该段码流结果与代码输出结果一致:
profile_tier_level:
在上述代码中主要使用了u(n)和ue(v)解析方法,在后续文章中继续学习说明。
参考:
1、T-REC-H.265-202108-I!!PDF-E.pdf