mp2 encoder--window_subband函数的FPGA实现笔记
MP2编码的协议文件 “IOS1172-3”
TITLE PAGE PROVIDED BY ISO
CD 11172-3
CODING OF MOVING PICTURES AND ASSOCIATED AUDIO
FOR DIGITAL STORAGE MEDIA AT UP TO ABOUT 1.5 MBIT/s
Part 3 AUDIO
CONTENTS
编码流程框图
算法模型
FIGURE 3-C.1 Analysis subband filter flow chart
子带分析滤波器流程图
子带滤波器的参考代码
1 for (i=0;i<3;i++) 2 for (j=0;j<SCALE_BLOCK;j++) { // SSCALE_BLOCK = 12 3 for (k=0;k<stereo;k++) { 4 window_subband(&win_buf[k], &(*win_que)[k][0], k); 5 filter_subband(&(*win_que)[k][0], &(*sb_sample)[k][i][j][0]); 6 } 7 }
- 代码中的 window_subband 给子带加窗,对应 FIGURE 3-C.1 中的 第1,2,3步骤,窗系数存储在 数组C<1x512> (1行512列矩阵)里面,
- 加窗后的数据存在 Z<1x512>。
- filter_subband 子带滤波函数,对应 FIGURE 3-C.1 第4,5,6步骤,输出长度为 32 的滤波结果 S<1x32>。
- 滤波器组 矩阵 M<32x64> 是通过三角函数 cos 计算得到的,可通过定点化存储到 ROM 里面。
子带加窗函数代码:
/************************************************************************ * * window_subband() * * PURPOSE: Overlapping window on PCM samples * * SEMANTICS: * 32 16-bit pcm samples are scaled to fractional 2's complement and * concatenated to the end of the window buffer #x#. The updated window * buffer #x# is then windowed by the analysis window #c# to produce the * windowed sample #z# * ************************************************************************/ void window_subband(buffer, z, k) short **buffer; double z[HAN_SIZE]; int k; { typedef double XX[2][HAN_SIZE]; static XX *x; int i, j; static int off[2] = {0,0}; static char init = 0; static double *c; if (!init) { c = (double *) mem_alloc(sizeof(double) * HAN_SIZE, "window"); read_ana_window(c); x = (XX *) mem_alloc(sizeof(XX),"x"); for (i=0;i<2;i++) for (j=0;j<HAN_SIZE;j++) (*x)[i][j] = 0; init = 1; } /* replace 32 oldest samples with 32 new samples */ for (i=0;i<32;i++) (*x)[k][31-i+off[k]] = (double) *(*buffer)++/SCALE; /* shift samples into proper window positions */ for (i=0;i<HAN_SIZE;i++) z[i] = (*x)[k][(i+off[k])&(HAN_SIZE-1)] * c[i]; off[k] += 480; /*offset is modulo (HAN_SIZE-1)*/ off[k] &= HAN_SIZE-1; }
- 代码中的静态变量 init ,用于控制在初次调用该函数时,初始化读取窗系数C 和清零音频存储数组x。
- 注意:if代码块执行后,init=1,之后再进入window_subband函数,if代码块不再执行。
void window_subband(buffer, z, k)函数的FPGA实现方法
- 窗系数c用ROM存,音频采样缓冲buffer 由模块的外部输入,音频存储数组 x<1x512>用RAM<16bit 深度512>。
- 整个函数分成 2 个大状态。
/* replace 32 oldest samples with 32 new samples */ for (i=0;i<32;i++) (*x)[k][31-i+off[k]] = (double) *(*buffer)++/SCALE;
/* shift samples into proper window positions */ for (i=0;i<HAN_SIZE;i++) z[i] = (*x)[k][(i+off[k])&(HAN_SIZE-1)] * c[i]; off[k] += 480; /*offset is modulo (HAN_SIZE-1)*/
- 由于在verilog设计中 将off定义成 reg[8:0],所以不用增加一个状态,来运算 off[k] &= HAN_SIZE-1; 。
状态一,从buffer中取采样数据存放到x,这也要用状态机实现。状态1.1 给出取buffer的地址;状态1.2等待数据buffer取出;状态1.3 给出写入x中的数据 和地址。
状态一 ws_buf_state 中的FSM代码块:
case(ws_buf_state) ws_buf_S0: ws_buf_state <= ws_buf_S0; ws_buf_S1: if(i<32) begin // i < 32 ws_buf_state <= ws_buf_S2 ws_buf_state <= ws_buf_S2; buffer_addr <= i; //read buffer[addr] end else begin // i == 32 ws_buf_state <= ws_buf_S2 ws_buf_state <= ws_buf_S0; i <= 0; //clear i WINSUB_state <= WINSUB_S2; ws_addwin_state <= ws_addwin_S1; end ws_buf_S2: ws_buf_state <= ws_buf_S3; //wait data ws_buf_S3: begin arrayx_dina_r <= buffer; //write buffer data to ram_inputx arrayx_addra_r <= 31-i+offk0; i <= i + 1; ws_buf_state <= ws_buf_S1; // goto S1 end //ws_buf_S4: 可能还需要一个状态等待ws_buf_S3状态的数据写入完成 default: ws_buf_state <= ws_buf_S0; endcase
状态二,分别取出x 和 c,做计算,需用状态机实现。状态2.1 给出读x的地址,c的地址;状态2.2 等待数据读出;状态2.3 计算,写入 Z 中。
状态二 ws_addwin_state 中的FSM代码块:
case(ws_addwin_state) ws_addwin_S0: ws_addwin_state <= ws_addwin_S0; ws_addwin_S1: if(i < 512) begin //注意512=0x200 (10bit),所以i也需要有10bit才行的 ws_addwin_state <= ws_addwin_S2; arrayx_addra_r <= (i+offk0)&(9'h1FF); //x[(i+offk0)&511] arrayc_addra_r <= i; // c[i] end else begin ws_addwin_state <= ws_addwin_S0; offk0 <= offk0+480; //由于offk0为9bit,所以不用再计算offk0&=511; i <= 0; //clear i WINSUB_state <= WINSUB_S0; Flag_CalcArrayZ_Done <= 1; // end ws_addwin_S2: ws_addwin_state <= ws_addwin_S3; //wait read ram data ws_addwin_S3: begin arrayz_dina_r <= rom_encwin_u0douta*ram_inputx_u0douta; arrayz_addra_r <= (i&9'h1FF); i <= i + 1; ws_addwin_state <= ws_addwin_S1; // goto S1 end //ws_addwin_S4:可能还需要一个状态等待上行状态的数据写入完成 default: ws_addwin_state <= ws_addwin_S0; endcase
完整的模块代码,其中 ram_inputx_u0 存储 音频x, rom_encwin_u0 存储窗系数 C, ram_x512_u0 存储计算后的Z:
module window_subband( input rstn, input clk, input signed[15:0] buffer, output reg[5:0] buffer_addr, output signed [39:0] array_z_dout ); parameter HAN_SIZE = 512; parameter HAN_SIZE_DEC = 9'h1FF; //511 reg[8:0] arrayx_addra_r; reg[8:0] arrayc_addra_r; reg[8:0] arrayz_addra_r; reg signed[15:0] arrayx_dina_r; reg signed[39:0] arrayz_dina_r; //static int off[2] = {0,0}; //static variable init clear zero reg[8:0] offk0 = 9'd0; reg[9:0] i; parameter WINSUB_S0 = 2'b00,WINSUB_S1 = 2'b01,WINSUB_S2 = 2'b10,WINSUB_S3 = 2'b11; reg[1:0] WINSUB_state; reg[8:0] WINSUB_i; parameter ws_buf_S0 = 2'b00,ws_buf_S1 = 2'b01,ws_buf_S2 = 2'b10,ws_buf_S3 = 2'b11; reg[1:0] ws_buf_state; parameter ws_addwin_S0=2'b00,ws_addwin_S1=2'b01,ws_addwin_S2=2'b10,ws_addwin_S3=2'b11; reg[1:0] ws_addwin_state; reg Flag_CalcArrayZ_Done=0; always @(posedge clk) begin if(~rstn) begin Flag_CalcArrayZ_Done <= 0; i <= 0; WINSUB_state <= WINSUB_S1; ws_buf_state <= ws_buf_S1; end else begin case(WINSUB_state) WINSUB_S0: WINSUB_state <= WINSUB_S0; ////for (i=0;i<32;i++) (*x)[k][31-i+off[k]] = (double) *(*buffer)++/SCALE; WINSUB_S1: case(ws_buf_state) ws_buf_S0: ws_buf_state <= ws_buf_S0; ws_buf_S1: if(i<32) begin // i < 32 ws_buf_state <= ws_buf_S2 ws_buf_state <= ws_buf_S2; buffer_addr <= i; //read buffer[addr] end else begin // i == 32 ws_buf_state <= ws_buf_S2 ws_buf_state <= ws_buf_S0; i <= 0; //clear i WINSUB_state <= WINSUB_S2; ws_addwin_state <= ws_addwin_S1; end ws_buf_S2: ws_buf_state <= ws_buf_S3; //wait data ws_buf_S3: begin arrayx_dina_r <= buffer; //write buffer data to ram_inputx arrayx_addra_r <= 31-i+offk0; i <= i + 1; ws_buf_state <= ws_buf_S1; // goto S1 end //ws_buf_S4: 可能还需要一个状态等待ws_buf_S3状态的数据写入完成 default: ws_buf_state <= ws_buf_S0; endcase ////for (i=0;i<HAN_SIZE;i++) z[i] = (*x)[k][(i+off[k])&(HAN_SIZE-1)] * c[i]; WINSUB_S2: case(ws_addwin_state) ws_addwin_S0: ws_addwin_state <= ws_addwin_S0; ws_addwin_S1: if(i < 512) begin //注意512=0x200 (10bit),所以i也需要有10bit才行的 ws_addwin_state <= ws_addwin_S2; arrayx_addra_r <= (i+offk0)&(9'h1FF); //x[(i+offk0)&511] arrayc_addra_r <= i; // c[i] end else begin ws_addwin_state <= ws_addwin_S0; offk0 <= offk0+480; //由于offk0为9bit,所以不用再计算offk0&=511; i <= 0; //clear i WINSUB_state <= WINSUB_S0; Flag_CalcArrayZ_Done <= 1; // end ws_addwin_S2: ws_addwin_state <= ws_addwin_S3; //wait read ram data ws_addwin_S3: begin arrayz_dina_r <= rom_encwin_u0douta*ram_inputx_u0douta; arrayz_addra_r <= (i&9'h1FF); i <= i + 1; ws_addwin_state <= ws_addwin_S1; // goto S1 end //ws_addwin_S4:可能还需要一个状态等待上行状态的数据写入完成 default: ws_addwin_state <= ws_addwin_S0; endcase default: WINSUB_state <= WINSUB_S0; endcase end end wire ram_x512_rsta; wire ram_x512_ena; wire ram_x512_wea; wire[8:0] ram_x512_addra; wire signed[39:0] ram_x512_dina; wire signed[39:0] ram_x512_douta; //input assign ram_x512_rsta = ~rstn; assign ram_x512_ena = 1; assign ram_x512_wea = 1; assign ram_x512_addra = arrayz_addra_r; assign ram_x512_dina = arrayz_dina_r ; //output assign array_z_dout = ram_x512_douta; ram_x512 ram_x512_u0 ( .clka(clk), // input clka .rsta(ram_x512_rsta), // input rsta .ena(ram_x512_ena), // input ena .wea(ram_x512_wea), // input [0 : 0] wea .addra(ram_x512_addra), // input [8 : 0] addra .dina(ram_x512_dina), // input [39 : 0] dina .douta(ram_x512_douta) // output [39 : 0] douta ); wire rom_encwin_u0clka; wire [8 : 0] rom_encwin_u0addra; wire signed[12 : 0] rom_encwin_u0douta; assign rom_encwin_u0clka = clk; assign rom_encwin_u0addra = arrayc_addra_r; //assign rom_encwin_u0douta = rom_encwin_u0douta; rom_encwin rom_encwin_u0 ( .clka(rom_encwin_u0clka), // input clka .addra(rom_encwin_u0addra), // input [8 : 0] addra .douta(rom_encwin_u0douta) // output [12 : 0] douta ); wire ram_inputx_u0clka ; // definition wire ram_inputx_u0rsta ; wire ram_inputx_u0wea ; wire [8:0] ram_inputx_u0addra; wire signed[15:0] ram_inputx_u0dina ; wire signed[15:0] ram_inputx_u0douta; assign ram_inputx_u0clka = clk; assign ram_inputx_u0rsta = ~rstn; assign ram_inputx_u0wea = 1; assign ram_inputx_u0addra = arrayx_addra_r; assign ram_inputx_u0dina = arrayx_dina_r; //assign ram_inputx_u0douta = ram_inputx_u0douta; ram_inputx ram_inputx_u0 ( .clka(ram_inputx_u0clka), // input clka .rsta(ram_inputx_u0rsta), // input rsta .wea(ram_inputx_u0wea), // input [0 : 0] wea .addra(ram_inputx_u0addra), // input [8 : 0] addra .dina(ram_inputx_u0dina), // input [15 : 0] dina .douta(ram_inputx_u0douta) // output [15 : 0] douta );
调试时加入的数据打印代码:
`define DEBUG_PRINT_Z `ifdef DEBUG_PRINT_Z parameter printz_S0=2'b00,printz_S1=2'b01,printz_S2=2'b10,printz_S3=2'b11; reg[1:0] printz_state; reg[9:0] printz_i; integer fid; reg signed[39:0] mem_array_z_dout[0:511]; always @(posedge clk) begin if(!Flag_CalcArrayZ_Done) begin printz_state <= printz_S0; printz_i <= 0; end else begin case (printz_state) printz_S0: if(printz_i<10'd512) begin printz_state <= printz_S1; arrayz_addra_r <= (printz_i&9'h1FF); end else begin fid = $fopen("./infoout/z.txt"); if(fid <0 ) $display("fopen z err!\n"); for(printz_i = 10'd0; printz_i<10'd512; printz_i=printz_i+1'b1) $fdisplay(fid,"%d",mem_array_z_dout[printz_i]); $fclose(fid); $stop; end printz_S1: printz_state <= printz_S2; //wait read data from ram printz_S2: begin mem_array_z_dout[printz_i] <= ram_x512_douta; printz_i <= printz_i + 1'b1; printz_state <= printz_S0; end default: $stop; endcase end end `endif
testbench 文件,buffer.txt中装入32个1:
`timescale 1ns / 1ps module tb_winsub; // Inputs reg rstn; reg clk; reg signed[15:0] buffer; // Outputs wire [5:0] buffer_addr; wire signed[39:0] array_z_dout; // Instantiate the Unit Under Test (UUT) enc_top uut ( .rstn(rstn), .clk(clk), .buffer(buffer), .buffer_addr(buffer_addr), .array_z_dout(array_z_dout) ); initial begin // Initialize Inputs rstn = 0; clk = 0; buffer = 0; // Wait 100 ns for global reset to finish #100; // Add stimulus here rstn = 1; end always #5 clk <= ~clk; reg signed[15:0] mem_buffer[0:31]; initial begin $readmemh("./infoin/buffer.txt",mem_buffer); end always @(posedge clk or negedge rstn) begin if(!rstn) begin buffer <= 0; end else begin buffer <= mem_buffer[buffer_addr]; end end endmodule
仿真将输出的z.txt文件导入matlab,绘图比较,结果正确。
仿真波形:
~~~~~~~~~~~~~~~~ 博文多为个人学习中的笔记,不保证完全正确;
参考摘录了诸多书籍,文档,仅用于学习交流
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~