verilog 实现8位无符号乘法器

一、移位相加乘法器—串行形式

1、RTL代码

module unsigned_mul_1 #(
            parameter DATAWIDTH=8
)(clk, x, y, result);
    
    parameter s0 = 0, s1 = 1, s2 = 2;
    
    input clk;
    input    [DATAWIDTH-1:0] x, y;
    output   [DATAWIDTH*2-1:0] result;

    reg      [DATAWIDTH*2-1:0] result;

    
    reg  [DATAWIDTH-1:0] count = 0;
    reg  [1:0] state = 0;
    reg  [DATAWIDTH*2-1:0] P, T;
    reg  [DATAWIDTH-1:0] y_reg;

    always @(posedge clk) begin
        case (state)
            s0: begin
                count <= 0;
                P <= 0;
                y_reg <= y;
                T <= {{DATAWIDTH{1'b0}}, x};
                state <= s1;
            end
            s1: begin
                if(count == DATAWIDTH)
                    state <= s2;
                else begin
                    if(y_reg[0] == 1'b1)
                        P <= P + T;
                    else
                        P <= P;
                    y_reg <= y_reg >> 1;
                    T <= T << 1;
                    count <= count + 1;
                    state <= s1;
                end
            end
            s2: begin
                result <= P;
                state <= s0;
            end
            default: ;
        endcase
    end
  
endmodule

2、仿真程序

`timescale 1ns / 1ps

module tb_unsigned_mul1();

parameter DATAWIDTH=8;
reg clk;

reg   [DATAWIDTH-1:0] Ain,Bin;

wire   [DATAWIDTH*2-1:0] result;
initial
	begin
		Ain = 5;
		Bin = 3;
		clk = 0;
	end
always #5 clk = ~clk;
always @(posedge clk)
	begin
		#110
		Ain = Ain+2;
		Bin = Bin+1;
	end
unsigned_mul_1 #(.DATAWIDTH( DATAWIDTH)) u1(clk,Ain,Bin,result);
endmodule

3、仿真结果
在这里插入图片描述
可以看出,输出延迟输入10个时钟周期,这和程序是对应的。

二、移位相加乘法器—流水线形式

1、RTL代码

module unsigned_mul_2(
             mul_a,
             mul_b,
             mul_out,
             clk,
             rst_n
             );
                        
   parameter   MUL_WIDTH  = 8;
   parameter   MUL_RESULT = 16;
  
   input [MUL_WIDTH-1:0]   mul_a;
   input [MUL_WIDTH-1:0]   mul_b;
   input                   clk;
   input                   rst_n;
  
   output [MUL_RESULT-1:0]   mul_out;
  
   reg [MUL_RESULT-1:0]   mul_out;

   reg [MUL_RESULT-1:0]   stored0;
   reg [MUL_RESULT-1:0]   stored1;
   reg [MUL_RESULT-1:0]   stored2;
   reg [MUL_RESULT-1:0]   stored3;
   reg [MUL_RESULT-1:0]   stored4;
   reg [MUL_RESULT-1:0]   stored5;
   reg [MUL_RESULT-1:0]   stored6;
   reg [MUL_RESULT-1:0]   stored7;
   reg [MUL_RESULT-1:0]   out1,out2;
   reg [MUL_RESULT-1:0]   add1,add2,add3,add4;
 
 always @ ( posedge clk or negedge rst_n )
 begin
    if ( !rst_n )
       begin

          stored0 <= 14'b0;
          stored1 <= 14'b0;
          stored2 <= 14'b0;
          stored3 <= 14'b0;
          stored4 <= 14'b0;
          stored5 <= 14'b0;
          stored6 <= 14'b0;
          out1<= 14'b0;
          out2<= 14'b0;

          add1 <= 14'b0;
		  add2 <= 14'b0;
		  add3 <= 14'b0;
		  add4 <= 14'b0;
          
       
       end
    else
       begin           
         //注意,下面两句是没有延迟的,因为他们的右侧的mul_a,mul_b是输入信号

          stored0 <= mul_b[0] ? {8'b0,mul_a}       : 16'b0;
          stored1 <= mul_b[1] ? {7'b0,mul_a,1'b0}  : 16'b0;
          stored2 <= mul_b[2] ? {6'b0,mul_a,2'b0}  : 16'b0;
          stored3 <= mul_b[3] ? {5'b0,mul_a,3'b0}  : 16'b0;
          stored4 <= mul_b[4] ? {4'b0,mul_a,4'b0}  : 16'b0;
          stored5 <= mul_b[5] ? {3'b0,mul_a,5'b0}  : 16'b0;
          stored6 <= mul_b[6] ? {2'b0,mul_a,6'b0}  : 16'b0;
          stored7 <= mul_b[7] ? {1'b0,mul_a,7'b0}  : 16'b0;
          add1 <= stored1 + stored0;
          add2 <= stored3 + stored2;
          add3 <= stored5 + stored4;
          add4 <= stored6 + stored7;
          out1 <= add1 + add2;
          out2 <= add3 + add4;
          mul_out <= out1 + out2;

     end

 end

 endmodule

2、仿真程序

`timescale 1ns / 1ps

module tb_unsigned_mul2();
reg   clk,rst_n;
reg   [7:0] Ain,Bin;
wire  [15:0] result;
initial
	begin
		#1
		Ain = 5;
		Bin = 2;
		clk = 0;
		rst_n=0;
		#3
		rst_n=1;
	end
always #5 clk = ~clk;
always @(posedge clk)
	begin
		#1
		Ain = Ain + 2;
		Bin = Bin + 1;
	end
unsigned_mul_2 u2(Ain,Bin,result,clk,rst_n);
endmodule

3、仿真结果

在这里插入图片描述
可以看出,由于采用了三级流水线,因此输出延迟输入3个时钟周期。

posted @ 2020-09-29 21:51  耐心的小黑  阅读(536)  评论(0编辑  收藏  举报