mp2 encoder--window_subband函数的FPGA实现笔记

MP2编码的协议文件 “IOS1172-3”

TITLE PAGE PROVIDED BY ISO
CD 11172-3
CODING OF MOVING PICTURES AND ASSOCIATED AUDIO
FOR DIGITAL STORAGE MEDIA AT UP TO ABOUT 1.5 MBIT/s
Part 3 AUDIO
CONTENTS

 

编码流程框图

算法模型

FIGURE 3-C.1 Analysis subband filter flow chart                

子带分析滤波器流程图

子带滤波器的参考代码

1 for (i=0;i<3;i++) 
2     for (j=0;j<SCALE_BLOCK;j++) {    // SSCALE_BLOCK = 12
3         for (k=0;k<stereo;k++) {
4            window_subband(&win_buf[k], &(*win_que)[k][0], k);
5            filter_subband(&(*win_que)[k][0], &(*sb_sample)[k][i][j][0]);
6         }
7     }
  • 代码中的 window_subband 给子带加窗,对应 FIGURE 3-C.1 中的 第1,2,3步骤,窗系数存储在 数组C<1x512> (1行512列矩阵)里面,
  • 加窗后的数据存在 Z<1x512>。
  • filter_subband 子带滤波函数,对应 FIGURE 3-C.1 第4,5,6步骤,输出长度为 32 的滤波结果 S<1x32>。
  • 滤波器组 矩阵 M<32x64> 是通过三角函数 cos 计算得到的,可通过定点化存储到 ROM 里面。

子带加窗函数代码:

/************************************************************************
 *
 * window_subband()
 *
 * PURPOSE:  Overlapping window on PCM samples
 *
 * SEMANTICS:
 * 32 16-bit pcm samples are scaled to fractional 2's complement and
 * concatenated to the end of the window buffer #x#. The updated window
 * buffer #x# is then windowed by the analysis window #c# to produce the
 * windowed sample #z#
 *
 ************************************************************************/
void window_subband(buffer, z, k)
short **buffer;
double z[HAN_SIZE];
int k;
{
    typedef double XX[2][HAN_SIZE];
    static XX *x;
    int i, j;
    static int off[2] = {0,0};
    static char init = 0;
    static double *c;
    if (!init) {
        c = (double *) mem_alloc(sizeof(double) * HAN_SIZE, "window");
        read_ana_window(c);
        x = (XX *) mem_alloc(sizeof(XX),"x");
        for (i=0;i<2;i++)
            for (j=0;j<HAN_SIZE;j++)
                (*x)[i][j] = 0;
        init = 1;
    }
    /* replace 32 oldest samples with 32 new samples */
    for (i=0;i<32;i++) (*x)[k][31-i+off[k]] = (double) *(*buffer)++/SCALE;
    /* shift samples into proper window positions */
    for (i=0;i<HAN_SIZE;i++) z[i] = (*x)[k][(i+off[k])&(HAN_SIZE-1)] * c[i];
    off[k] += 480;              /*offset is modulo (HAN_SIZE-1)*/
    off[k] &= HAN_SIZE-1;
}
void window_subband(buffer, z, k)
  • 代码中的静态变量 init ,用于控制在初次调用该函数时,初始化读取窗系数C 和清零音频存储数组x。
  • 注意:if代码块执行后,init=1,之后再进入window_subband函数,if代码块不再执行。

void window_subband(buffer, z, k)函数的FPGA实现方法

  •     窗系数c用ROM存,音频采样缓冲buffer 由模块的外部输入,音频存储数组 x<1x512>用RAM<16bit 深度512>。
  •     整个函数分成 2 个大状态。
/* replace 32 oldest samples with 32 new samples */
for (i=0;i<32;i++) (*x)[k][31-i+off[k]] = (double) *(*buffer)++/SCALE;
/* shift samples into proper window positions */
for (i=0;i<HAN_SIZE;i++) z[i] = (*x)[k][(i+off[k])&(HAN_SIZE-1)] * c[i];
off[k] += 480;              /*offset is modulo (HAN_SIZE-1)*/
  •    由于在verilog设计中 将off定义成 reg[8:0],所以不用增加一个状态,来运算 off[k] &= HAN_SIZE-1; 。

状态一,从buffer中取采样数据存放到x,这也要用状态机实现。状态1.1 给出取buffer的地址;状态1.2等待数据buffer取出;状态1.3 给出写入x中的数据 和地址。

状态一 ws_buf_state 中的FSM代码块:

case(ws_buf_state)
    ws_buf_S0: ws_buf_state <= ws_buf_S0;
    ws_buf_S1: if(i<32) begin   // i < 32 ws_buf_state <= ws_buf_S2
                   ws_buf_state <= ws_buf_S2;
                   buffer_addr <= i;            //read buffer[addr]
               end
               else begin       // i == 32 ws_buf_state <= ws_buf_S2
                   ws_buf_state <= ws_buf_S0;
                   i <= 0;      //clear i
                   WINSUB_state <= WINSUB_S2;
                   ws_addwin_state <= ws_addwin_S1;
               end
    ws_buf_S2: ws_buf_state <= ws_buf_S3;   //wait data
    ws_buf_S3: begin 
                   arrayx_dina_r <= buffer; //write buffer data to ram_inputx
                   arrayx_addra_r <= 31-i+offk0;
                   i <= i + 1;
                   ws_buf_state <= ws_buf_S1;   // goto S1
               end
    //ws_buf_S4: 可能还需要一个状态等待ws_buf_S3状态的数据写入完成
    default: ws_buf_state <= ws_buf_S0;
endcase

状态二,分别取出x 和 c,做计算,需用状态机实现。状态2.1 给出读x的地址,c的地址;状态2.2 等待数据读出;状态2.3 计算,写入 Z 中。

状态二 ws_addwin_state 中的FSM代码块:

case(ws_addwin_state)
    ws_addwin_S0: ws_addwin_state <= ws_addwin_S0;
    ws_addwin_S1: if(i < 512) begin //注意512=0x200 (10bit),所以i也需要有10bit才行的
                      ws_addwin_state <= ws_addwin_S2;
                      arrayx_addra_r <= (i+offk0)&(9'h1FF); //x[(i+offk0)&511]
                      arrayc_addra_r <= i;           // c[i]
                  end
                  else begin
                      ws_addwin_state <= ws_addwin_S0;
                      offk0 <= offk0+480;  //由于offk0为9bit,所以不用再计算offk0&=511;
                      i <= 0;   //clear i
                      WINSUB_state <= WINSUB_S0;
                      Flag_CalcArrayZ_Done <= 1;    //
                  end
    ws_addwin_S2: ws_addwin_state <= ws_addwin_S3;   //wait read ram data
    ws_addwin_S3: begin 
                      arrayz_dina_r <= rom_encwin_u0douta*ram_inputx_u0douta;
                      arrayz_addra_r <= (i&9'h1FF);
                      i <= i + 1;
                      ws_addwin_state <= ws_addwin_S1;  // goto S1 
                  end
    //ws_addwin_S4:可能还需要一个状态等待上行状态的数据写入完成
    default: ws_addwin_state <= ws_addwin_S0;
endcase

完整的模块代码,其中 ram_inputx_u0 存储 音频x, rom_encwin_u0 存储窗系数 C, ram_x512_u0 存储计算后的Z:

module window_subband(
    input rstn,
    input clk,
    input signed[15:0] buffer,
    output reg[5:0] buffer_addr,
    output signed [39:0] array_z_dout
    );
parameter   HAN_SIZE = 512;
parameter   HAN_SIZE_DEC = 9'h1FF;  //511

reg[8:0] arrayx_addra_r;
reg[8:0] arrayc_addra_r;
reg[8:0] arrayz_addra_r;
reg signed[15:0] arrayx_dina_r;
reg signed[39:0] arrayz_dina_r;
//static int off[2] = {0,0};
//static variable init clear zero
reg[8:0] offk0 = 9'd0;
reg[9:0] i;

parameter WINSUB_S0 = 2'b00,WINSUB_S1 = 2'b01,WINSUB_S2 = 2'b10,WINSUB_S3 = 2'b11;
reg[1:0] WINSUB_state;
reg[8:0] WINSUB_i;

parameter ws_buf_S0 = 2'b00,ws_buf_S1 = 2'b01,ws_buf_S2 = 2'b10,ws_buf_S3 = 2'b11;
reg[1:0] ws_buf_state;

parameter ws_addwin_S0=2'b00,ws_addwin_S1=2'b01,ws_addwin_S2=2'b10,ws_addwin_S3=2'b11;
reg[1:0] ws_addwin_state;

reg  Flag_CalcArrayZ_Done=0;

always @(posedge clk)
begin
    if(~rstn) begin
        Flag_CalcArrayZ_Done <= 0;
        i <= 0;
        WINSUB_state <= WINSUB_S1;
        ws_buf_state <= ws_buf_S1;
    end
    else begin
        case(WINSUB_state)
            WINSUB_S0: WINSUB_state <= WINSUB_S0;
            ////for (i=0;i<32;i++) (*x)[k][31-i+off[k]] = (double) *(*buffer)++/SCALE;
            WINSUB_S1: 
                case(ws_buf_state)
                    ws_buf_S0: ws_buf_state <= ws_buf_S0;
                    ws_buf_S1: if(i<32) begin   // i < 32 ws_buf_state <= ws_buf_S2
                                   ws_buf_state <= ws_buf_S2;
                                   buffer_addr <= i;            //read buffer[addr]
                               end
                               else begin       // i == 32 ws_buf_state <= ws_buf_S2
                                   ws_buf_state <= ws_buf_S0;
                                   i <= 0;      //clear i
                                   WINSUB_state <= WINSUB_S2;
                                   ws_addwin_state <= ws_addwin_S1;
                               end
                    ws_buf_S2: ws_buf_state <= ws_buf_S3;   //wait data
                    ws_buf_S3: begin 
                                   arrayx_dina_r <= buffer; //write buffer data to ram_inputx
                                   arrayx_addra_r <= 31-i+offk0;
                                   i <= i + 1;
                                   ws_buf_state <= ws_buf_S1;   // goto S1
                               end
                    //ws_buf_S4: 可能还需要一个状态等待ws_buf_S3状态的数据写入完成
                    default: ws_buf_state <= ws_buf_S0;
                endcase
            ////for (i=0;i<HAN_SIZE;i++) z[i] = (*x)[k][(i+off[k])&(HAN_SIZE-1)] * c[i];
            WINSUB_S2:
                case(ws_addwin_state)
                    ws_addwin_S0: ws_addwin_state <= ws_addwin_S0;
                    ws_addwin_S1: if(i < 512) begin //注意512=0x200 (10bit),所以i也需要有10bit才行的
                                      ws_addwin_state <= ws_addwin_S2;
                                      arrayx_addra_r <= (i+offk0)&(9'h1FF); //x[(i+offk0)&511]
                                      arrayc_addra_r <= i;           // c[i]
                                  end
                                  else begin
                                      ws_addwin_state <= ws_addwin_S0;
                                      offk0 <= offk0+480;  //由于offk0为9bit,所以不用再计算offk0&=511;
                                      i <= 0;   //clear i
                                      WINSUB_state <= WINSUB_S0;
                                      Flag_CalcArrayZ_Done <= 1;    //
                                  end
                    ws_addwin_S2: ws_addwin_state <= ws_addwin_S3;   //wait read ram data
                    ws_addwin_S3: begin 
                                      arrayz_dina_r <= rom_encwin_u0douta*ram_inputx_u0douta;
                                      arrayz_addra_r <= (i&9'h1FF);
                                      i <= i + 1;
                                      ws_addwin_state <= ws_addwin_S1;  // goto S1 
                                  end
                    //ws_addwin_S4:可能还需要一个状态等待上行状态的数据写入完成
                    default: ws_addwin_state <= ws_addwin_S0;
                endcase
            default: WINSUB_state <= WINSUB_S0;
        endcase
    end
end


wire ram_x512_rsta;
wire ram_x512_ena;
wire ram_x512_wea;
wire[8:0]  ram_x512_addra;
wire signed[39:0] ram_x512_dina;
wire signed[39:0] ram_x512_douta;
//input
assign ram_x512_rsta  = ~rstn;
assign ram_x512_ena   = 1;
assign ram_x512_wea   = 1;
assign ram_x512_addra = arrayz_addra_r;
assign ram_x512_dina  = arrayz_dina_r ;
//output
assign array_z_dout = ram_x512_douta;
ram_x512 ram_x512_u0 (
  .clka(clk), // input clka
  .rsta(ram_x512_rsta), // input rsta
  .ena(ram_x512_ena), // input ena
  .wea(ram_x512_wea), // input [0 : 0] wea
  .addra(ram_x512_addra), // input [8 : 0] addra
  .dina(ram_x512_dina), // input [39 : 0] dina
  .douta(ram_x512_douta) // output [39 : 0] douta
);

wire rom_encwin_u0clka;
wire [8 : 0] rom_encwin_u0addra;
wire signed[12 : 0] rom_encwin_u0douta;
assign rom_encwin_u0clka  = clk;
assign rom_encwin_u0addra = arrayc_addra_r;
//assign rom_encwin_u0douta = rom_encwin_u0douta;
rom_encwin rom_encwin_u0 (
  .clka(rom_encwin_u0clka),      // input clka
  .addra(rom_encwin_u0addra),    // input [8 : 0] addra
  .douta(rom_encwin_u0douta)     // output [12 : 0] douta
);

wire ram_inputx_u0clka        ;  // definition
wire ram_inputx_u0rsta        ;
wire ram_inputx_u0wea         ;
wire [8:0]  ram_inputx_u0addra;
wire signed[15:0] ram_inputx_u0dina ;
wire signed[15:0] ram_inputx_u0douta;
assign  ram_inputx_u0clka       = clk;
assign  ram_inputx_u0rsta       = ~rstn;
assign  ram_inputx_u0wea        = 1;
assign  ram_inputx_u0addra      = arrayx_addra_r;
assign  ram_inputx_u0dina       = arrayx_dina_r;
//assign  ram_inputx_u0douta      = ram_inputx_u0douta;
ram_inputx ram_inputx_u0 (
  .clka(ram_inputx_u0clka),     // input clka
  .rsta(ram_inputx_u0rsta),     // input rsta
  .wea(ram_inputx_u0wea),       // input [0 : 0] wea
  .addra(ram_inputx_u0addra),   // input [8 : 0] addra
  .dina(ram_inputx_u0dina),     // input [15 : 0] dina
  .douta(ram_inputx_u0douta)    // output [15 : 0] douta
);
View Code

调试时加入的数据打印代码:

`define  DEBUG_PRINT_Z
`ifdef  DEBUG_PRINT_Z
    parameter printz_S0=2'b00,printz_S1=2'b01,printz_S2=2'b10,printz_S3=2'b11;
    reg[1:0] printz_state;
    reg[9:0] printz_i;
    integer fid;
    reg signed[39:0] mem_array_z_dout[0:511];
    
    always @(posedge clk) begin
        if(!Flag_CalcArrayZ_Done) begin
            printz_state <= printz_S0;
            printz_i <= 0;
        end
        else begin
            case (printz_state)
                printz_S0: if(printz_i<10'd512) begin
                               printz_state <= printz_S1;
                               arrayz_addra_r <= (printz_i&9'h1FF);
                           end
                           else begin
                                fid = $fopen("./infoout/z.txt");
                                if(fid <0 )
                                    $display("fopen z err!\n");
                                for(printz_i = 10'd0; printz_i<10'd512; printz_i=printz_i+1'b1)
                                    $fdisplay(fid,"%d",mem_array_z_dout[printz_i]);
                                $fclose(fid);
                                $stop;
                           end
                printz_S1: printz_state <= printz_S2; //wait read data from ram
                printz_S2: begin 
                           mem_array_z_dout[printz_i] <= ram_x512_douta;
                           printz_i <= printz_i + 1'b1;
                           printz_state <= printz_S0;
                           end
                default: $stop;
            endcase
        end
    end
`endif

testbench 文件,buffer.txt中装入32个1:

`timescale 1ns / 1ps
module tb_winsub;
    // Inputs
    reg rstn;
    reg clk;
    reg signed[15:0] buffer;

    // Outputs
    wire [5:0] buffer_addr;
    wire signed[39:0] array_z_dout;

    // Instantiate the Unit Under Test (UUT)
    enc_top uut (
        .rstn(rstn), 
        .clk(clk), 
        .buffer(buffer), 
        .buffer_addr(buffer_addr), 
        .array_z_dout(array_z_dout)
    );

    initial begin
        // Initialize Inputs
        rstn = 0;
        clk = 0;
        buffer = 0;

        // Wait 100 ns for global reset to finish
        #100;
        
        // Add stimulus here
        rstn = 1;
    end

    always #5 clk <= ~clk;
    
    reg signed[15:0] mem_buffer[0:31];
    initial begin
        $readmemh("./infoin/buffer.txt",mem_buffer);
    end
    always @(posedge clk or negedge rstn) begin
        if(!rstn) begin
            buffer <= 0;
        end
        else begin
            buffer <= mem_buffer[buffer_addr];
        end
    end
    
endmodule
View Code

仿真将输出的z.txt文件导入matlab,绘图比较,结果正确。

仿真波形:

posted @ 2017-11-12 21:16  .Think  阅读(529)  评论(1编辑  收藏  举报