FPGA直方图均衡化

使用FPGA对图像直方图做出均衡化,公式如下:

$$D _ { B } = f ( D _ { A } ) = \frac { D _ { m a x } } { A _ { 0 } } \sum _ { i = 0 } ^ { D _ { A } } H ( i )$$

上式中,H(i)为第i级灰度的像素个数,A为图像的面积,也即像素总数。因此,
计算均衡后的图像步骤如下:

  1. 首先计算出当前图像的直方图H(i)
  2. 其次计算像素直方图累计和
  3. 将上式乘以灰度值的最大值
  4. 将上式除以图像像素总数

后两步运算统称为归一化运算。

要把上述算法映射到FPGA中,第一步需要做的工作是什么呢?自然是帧缓存。
这是由于直方图统计需要至少一帧的数据才能完成。第一帧完成后,根据上述步骤计算出来的累加和∑,H(i)中的D,已经是一帧之前的像素值了。要得到这个值进行累加和查找,就必须要至少缓存一帧图像。

加速操作-近似直方图均衡化
在实际应用中,处理帧缓存是费时费力费资源的一件事情,在许多情况下,图像的变换比较慢,在这种情况下的一个近似是当建立当前帧的直方图统计结果时使用从前一帧得到的映射。结果是直方图均衡化可能不是非常准确,但是消耗的资源和处理的延时都有显著地减少。

 

Verilog

/***********************************************************
********* Company: WHUT
********* Engineer:        ZhengXiaoliang
********* Design Name:      
********* Module Name:     hist_equalized.v
********* Project Name:    Image Process 
********* Description:     histogram equalized operation
********* Dependencies:    
***********************************************************/

// Engineer: radiumlrb
// Date: 2023.10.03
// Version: 1.0



`timescale 1ns/1ns

module hist_equalized(
	rst_n, 
	clk, 
	din_valid, 
	din, 
	dout, 
	vsync, 
	dout_valid, 
	vsync_out
);

parameter  DW = 8;   
parameter  IH = 512;
parameter  IW = 640;
parameter  TW = 32;

localparam TOTAL_CNT = IW * IH;
localparam HALF_WIDTH = (TW>>1);

localparam latency = 6;

input  rst_n;
input  clk;
input  din_valid;
input  [DW-1:0]din;
output [DW-1:0]dout;
input  vsync;
output vsync_out;
output dout_valid;

reg [DW-1:0]hist_cnt_addr;
wire [TW-1:0]hist_cnt_out;

histogram_2d hist(
	.rst_n(rst_n), 
	.clk(clk), 
	.din_valid(din_valid), 
	.din(din), 
	.vsync(vsync), 
	.hist_cnt_addr(hist_cnt_addr),
	.hist_cnt_out(hist_cnt_out)
);

defparam hist.DW = DW;
defparam hist.IH = IH;
defparam hist.IW = IW;

wire vsync_fall;
wire valid;
reg  [1:0]frame_cnt;
reg  hist_valid_temp;
reg  vsync_r;

reg [latency:0]valid_r;

always @(posedge clk or negedge rst_n)
if (((~(rst_n))) == 1'b1) begin
	valid_r[latency:0] <= {latency+1{1'b0}};
end
else begin
	valid_r <= #1 {valid_r[latency-1:0],valid};
end

reg [DW-1:0]din_r;


always @(posedge clk or negedge rst_n)
if (((~(rst_n))) == 1'b1) begin
	hist_cnt_addr <= {DW{1'b0}};
end
else begin
	if(valid_r[0])
		hist_cnt_addr <= #1 din_r;
end

reg [2*TW-1:0]mul_temp[0:2];
reg [DW-1:0]dout_temp;

// for 512*512
generate
if((IW ==512) & (IH ==512) )begin :IW_512

	always @(posedge clk or negedge rst_n)
	if (((~(rst_n))) == 1'b1) begin
		mul_temp[0] <= {2*TW{1'b0}};
	end
	else begin
		if(valid_r[1])
			//hist_cnt_out*255,DW must be 8 
			mul_temp[0] <= #1 {{TW-DW{1'b0}},hist_cnt_out[TW-1:0],{DW{1'b0}}} - {{TW{1'b0}},hist_cnt_out};
		if(valid_r[1])
			//hist_cnt_out/(512*512) IW = IH = 512 
			mul_temp[1] <= #1 {{18{1'b0}},mul_temp[0][2*TW-1:18]};
		if(valid_r[2])
			dout_temp <= #1 mul_temp[1][DW-1:0];
	end
end 
endgenerate

// for 640*512
generate
if(IW ==640 & IH ==512 ) begin :IW_640
	wire [2*TW-1:0]dout_tmp ;
	assign dout_tmp = {{16{1'b0}},mul_temp[2][2*TW-1:16]};
	always @(posedge clk or negedge rst_n)
	if (((~(rst_n))) == 1'b1) begin
		mul_temp[0] <= {2*TW{1'b0}};
	end
	else begin
		if(valid_r[1])
			//hist_cnt_out*51,DW must be 8 
			//hist_cnt_out*32 + hist_cnt_out*16
			mul_temp[0] <= #1 {{TW-5{1'b0}},hist_cnt_out[TW-1:0],{5{1'b0}}} + {{TW-4{1'b0}},hist_cnt_out[TW-1:0],{4{1'b0}}};
			//hist_cnt_out*2 + hist_cnt_out*1
			mul_temp[1] <= #1 {{TW{1'b0}},hist_cnt_out[TW-1:0]} + {{TW-1{1'b0}},hist_cnt_out[TW-1:0],{1{1'b0}}};
		if(valid_r[1])
			//hist_cnt_out/(64*2*512)  
			mul_temp[2] <= #1 mul_temp[0] + mul_temp[1];
			//
		if(valid_r[2])
			dout_temp <= #1 dout_tmp[DW-1:0];
	end
end 
endgenerate 


assign dout = dout_temp;
assign dout_valid  = valid_r[latency];
assign vsync_out  = vsync;

endmodule

 

VHDL&&TESTBENCH

-- This entity performs histogram equalization on an input image. 
-- It takes in an 8-bit input signal and outputs an 8-bit output signal. 
-- The entity also takes in a vsync signal and outputs a vsync_out signal. 
-- The entity uses a histogram_2d component to generate a histogram of the input image. The histogram is then used to perform histogram equalization on the input image. The entity has several generic parameters that can be adjusted to fit the specific needs of the user. The entity has a latency of 6 clock cycles.

-- Engineer: Radiumlrb
-- Create Date:   20231004
-- Design Name:   hist_equalized

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

entity hist_equalized is
    generic (
        DW : positive := 8;
        IH : positive := 512;
        IW : positive := 640;
        TW : positive := 32;
        latency : positive := 6
    );
    port (
        rst_n : in std_logic;
        clk : in std_logic;
        din_valid : in std_logic;
        din : in std_logic_vector(DW-1 downto 0);
        dout : out std_logic_vector(DW-1 downto 0);
        vsync : in std_logic;
        dout_valid : out std_logic;
        vsync_out : out std_logic
    );
end entity hist_equalized;

architecture rtl of hist_equalized is
    constant TOTAL_CNT : integer := IW * IH;
    constant HALF_WIDTH : integer := TW/2;

    signal hist_cnt_addr : std_logic_vector(DW-1 downto 0);
    signal hist_cnt_out : std_logic_vector(TW-1 downto 0);

    component histogram_2d is
        generic (
            DW : positive := 8;
            IH : positive := 512;
            IW : positive := 640
        );
        port (
            rst_n : in std_logic;
            clk : in std_logic;
            din_valid : in std_logic;
            din : in std_logic_vector(DW-1 downto 0);
            vsync : in std_logic;
            hist_cnt_addr : out std_logic_vector(DW-1 downto 0);
            hist_cnt_out : out std_logic_vector(TW-1 downto 0)
        );
    end component histogram_2d;

    signal vsync_fall : std_logic;
    signal valid : std_logic;
    signal frame_cnt : std_logic_vector(1 downto 0);
    signal hist_valid_temp : std_logic;
    signal vsync_r : std_logic;

    signal valid_r : std_logic_vector(latency downto 0);

    signal din_r : std_logic_vector(DW-1 downto 0);

    signal mul_temp : std_logic_vector(2*TW-1 downto 0);
    signal dout_temp : std_logic_vector(DW-1 downto 0);

begin

    hist : histogram_2d
        generic map (
            DW => DW,
            IH => IH,
            IW => IW
        )
        port map (
            rst_n => rst_n,
            clk => clk,
            din_valid => din_valid,
            din => din,
            vsync => vsync,
            hist_cnt_addr => hist_cnt_addr,
            hist_cnt_out => hist_cnt_out
        );

    process (clk, rst_n)
    begin
        if rst_n = '0' then
            valid_r <= (others => '0');
        elsif rising_edge(clk) then
            valid_r <= valid_r(valid_r'high-1 downto 0) & valid;
        end if;
    end process;

    process (clk, rst_n)
    begin
        if rst_n = '0' then
            hist_cnt_addr <= (others => '0');
        elsif rising_edge(clk) then
            if valid_r(0) = '1' then
                hist_cnt_addr <= din_r;
            end if;
        end if;
    end process;

    process (clk, rst_n)
    begin
        if rst_n = '0' then
            mul_temp(0) <= (others => '0');
        elsif rising_edge(clk) then
            if valid_r(1) = '1' then
                mul_temp(0) <= std_logic_vector(resize(unsigned(hist_cnt_out(TW-1 downto 0)), 2*TW)) - std_logic_vector(resize(unsigned(hist_cnt_out), 2*TW));
            end if;
            if valid_r(1) = '1' then
                mul_temp(1) <= std_logic_vector(resize(unsigned(mul_temp(0)(2*TW-1 downto 18)), 2*TW)) & (others => '0');
            end if;
            if valid_r(2) = '1' then
                dout_temp <= mul_temp(1)(DW-1 downto 0);
            end if;
        end if;
    end process;

    dout <= dout_temp;
    dout_valid <= valid_r(latency);
    vsync_out <= vsync;

    din_r <= din;

end architecture rtl;




--Testbench for hist_equalized entity

library ieee;
use ieee.std_logic_1164.all;

entity hist_equalized_tb is
end entity hist_equalized_tb;

architecture sim of hist_equalized_tb is
    signal rst_n : std_logic := '0';
    signal clk : std_logic := '0';
    signal din_valid : std_logic := '0';
    signal din : std_logic_vector(7 downto 0) := (others => '0');
    signal dout : std_logic_vector(7 downto 0);
    signal vsync : std_logic := '0';
    signal dout_valid : std_logic;
    signal vsync_out : std_logic;

begin

    uut : entity work.hist_equalized
        generic map (
            DW => 8,
            IH => 512,
            IW => 640,
            TW => 32,
            latency => 6
        )
        port map (
            rst_n => rst_n,
            clk => clk,
            din_valid => din_valid,
            din => din,
            dout => dout,
            vsync => vsync,
            dout_valid => dout_valid,
            vsync_out => vsync_out
        );

    process
    begin
        rst_n <= '0';
        clk <= '0';
        din_valid <= '0';
        din <= (others => '0');
        vsync <= '0';
        wait for 10 ns;
        rst_n <= '1';
        wait for 10 ns;
        din_valid <= '1';
        din <= x"01";
        wait for 10 ns;
        din <= x"02";
        wait for 10 ns;
        din <= x"03";
        wait for 10 ns;
        din <= x"04";
        wait for 10 ns;
        din <= x"05";
        wait for 10 ns;
        din <= x"06";
        wait for 10 ns;
        din <= x"07";
        wait for 10 ns;
        din <= x"08";
        wait for 10 ns;
        din_valid <= '0';
        wait for 10 ns;
        din_valid <= '1';
        din <= x"09";
        wait for 10 ns;
        din <= x"0A";
        wait for 10 ns;
        din <= x"0B";
        wait for 10 ns;
        din <= x"0C";
        wait for 10 ns;
        din <= x"0D";
        wait for 10 ns;
        din <= x"0E";
        wait for 10 ns;
        din <= x"0F";
        wait for 10 ns;
        din <= x"10";
        wait for 10 ns;
        din_valid <= '0';
        wait for 10 ns;
        wait;
    end process;

end architecture sim;

 

posted @ 2023-10-03 13:23  Radiumlrb  阅读(216)  评论(0编辑  收藏  举报