verilog 实现8位无符号乘法器
一、移位相加乘法器—串行形式
1、RTL代码
module unsigned_mul_1 #(
parameter DATAWIDTH=8
)(clk, x, y, result);
parameter s0 = 0, s1 = 1, s2 = 2;
input clk;
input [DATAWIDTH-1:0] x, y;
output [DATAWIDTH*2-1:0] result;
reg [DATAWIDTH*2-1:0] result;
reg [DATAWIDTH-1:0] count = 0;
reg [1:0] state = 0;
reg [DATAWIDTH*2-1:0] P, T;
reg [DATAWIDTH-1:0] y_reg;
always @(posedge clk) begin
case (state)
s0: begin
count <= 0;
P <= 0;
y_reg <= y;
T <= {{DATAWIDTH{1'b0}}, x};
state <= s1;
end
s1: begin
if(count == DATAWIDTH)
state <= s2;
else begin
if(y_reg[0] == 1'b1)
P <= P + T;
else
P <= P;
y_reg <= y_reg >> 1;
T <= T << 1;
count <= count + 1;
state <= s1;
end
end
s2: begin
result <= P;
state <= s0;
end
default: ;
endcase
end
endmodule
2、仿真程序
`timescale 1ns / 1ps
module tb_unsigned_mul1();
parameter DATAWIDTH=8;
reg clk;
reg [DATAWIDTH-1:0] Ain,Bin;
wire [DATAWIDTH*2-1:0] result;
initial
begin
Ain = 5;
Bin = 3;
clk = 0;
end
always #5 clk = ~clk;
always @(posedge clk)
begin
#110
Ain = Ain+2;
Bin = Bin+1;
end
unsigned_mul_1 #(.DATAWIDTH( DATAWIDTH)) u1(clk,Ain,Bin,result);
endmodule
3、仿真结果
可以看出,输出延迟输入10个时钟周期,这和程序是对应的。
二、移位相加乘法器—流水线形式
1、RTL代码
module unsigned_mul_2(
mul_a,
mul_b,
mul_out,
clk,
rst_n
);
parameter MUL_WIDTH = 8;
parameter MUL_RESULT = 16;
input [MUL_WIDTH-1:0] mul_a;
input [MUL_WIDTH-1:0] mul_b;
input clk;
input rst_n;
output [MUL_RESULT-1:0] mul_out;
reg [MUL_RESULT-1:0] mul_out;
reg [MUL_RESULT-1:0] stored0;
reg [MUL_RESULT-1:0] stored1;
reg [MUL_RESULT-1:0] stored2;
reg [MUL_RESULT-1:0] stored3;
reg [MUL_RESULT-1:0] stored4;
reg [MUL_RESULT-1:0] stored5;
reg [MUL_RESULT-1:0] stored6;
reg [MUL_RESULT-1:0] stored7;
reg [MUL_RESULT-1:0] out1,out2;
reg [MUL_RESULT-1:0] add1,add2,add3,add4;
always @ ( posedge clk or negedge rst_n )
begin
if ( !rst_n )
begin
stored0 <= 14'b0;
stored1 <= 14'b0;
stored2 <= 14'b0;
stored3 <= 14'b0;
stored4 <= 14'b0;
stored5 <= 14'b0;
stored6 <= 14'b0;
out1<= 14'b0;
out2<= 14'b0;
add1 <= 14'b0;
add2 <= 14'b0;
add3 <= 14'b0;
add4 <= 14'b0;
end
else
begin
//注意,下面两句是没有延迟的,因为他们的右侧的mul_a,mul_b是输入信号
stored0 <= mul_b[0] ? {8'b0,mul_a} : 16'b0;
stored1 <= mul_b[1] ? {7'b0,mul_a,1'b0} : 16'b0;
stored2 <= mul_b[2] ? {6'b0,mul_a,2'b0} : 16'b0;
stored3 <= mul_b[3] ? {5'b0,mul_a,3'b0} : 16'b0;
stored4 <= mul_b[4] ? {4'b0,mul_a,4'b0} : 16'b0;
stored5 <= mul_b[5] ? {3'b0,mul_a,5'b0} : 16'b0;
stored6 <= mul_b[6] ? {2'b0,mul_a,6'b0} : 16'b0;
stored7 <= mul_b[7] ? {1'b0,mul_a,7'b0} : 16'b0;
add1 <= stored1 + stored0;
add2 <= stored3 + stored2;
add3 <= stored5 + stored4;
add4 <= stored6 + stored7;
out1 <= add1 + add2;
out2 <= add3 + add4;
mul_out <= out1 + out2;
end
end
endmodule
2、仿真程序
`timescale 1ns / 1ps
module tb_unsigned_mul2();
reg clk,rst_n;
reg [7:0] Ain,Bin;
wire [15:0] result;
initial
begin
#1
Ain = 5;
Bin = 2;
clk = 0;
rst_n=0;
#3
rst_n=1;
end
always #5 clk = ~clk;
always @(posedge clk)
begin
#1
Ain = Ain + 2;
Bin = Bin + 1;
end
unsigned_mul_2 u2(Ain,Bin,result,clk,rst_n);
endmodule
3、仿真结果
可以看出,由于采用了三级流水线,因此输出延迟输入3个时钟周期。