FPGA直方图均衡化
使用FPGA对图像直方图做出均衡化,公式如下:
$$D _ { B } = f ( D _ { A } ) = \frac { D _ { m a x } } { A _ { 0 } } \sum _ { i = 0 } ^ { D _ { A } } H ( i )$$
上式中,H(i)为第i级灰度的像素个数,A为图像的面积,也即像素总数。因此,
计算均衡后的图像步骤如下:
- 首先计算出当前图像的直方图H(i)
- 其次计算像素直方图累计和
- 将上式乘以灰度值的最大值
- 将上式除以图像像素总数
后两步运算统称为归一化运算。
要把上述算法映射到FPGA中,第一步需要做的工作是什么呢?自然是帧缓存。
这是由于直方图统计需要至少一帧的数据才能完成。第一帧完成后,根据上述步骤计算出来的累加和∑,H(i)中的D,已经是一帧之前的像素值了。要得到这个值进行累加和查找,就必须要至少缓存一帧图像。
加速操作-近似直方图均衡化
在实际应用中,处理帧缓存是费时费力费资源的一件事情,在许多情况下,图像的变换比较慢,在这种情况下的一个近似是当建立当前帧的直方图统计结果时使用从前一帧得到的映射。结果是直方图均衡化可能不是非常准确,但是消耗的资源和处理的延时都有显著地减少。
Verilog
/***********************************************************
********* Company: WHUT
********* Engineer: ZhengXiaoliang
********* Design Name:
********* Module Name: hist_equalized.v
********* Project Name: Image Process
********* Description: histogram equalized operation
********* Dependencies:
***********************************************************/
// Engineer: radiumlrb
// Date: 2023.10.03
// Version: 1.0
`timescale 1ns/1ns
module hist_equalized(
rst_n,
clk,
din_valid,
din,
dout,
vsync,
dout_valid,
vsync_out
);
parameter DW = 8;
parameter IH = 512;
parameter IW = 640;
parameter TW = 32;
localparam TOTAL_CNT = IW * IH;
localparam HALF_WIDTH = (TW>>1);
localparam latency = 6;
input rst_n;
input clk;
input din_valid;
input [DW-1:0]din;
output [DW-1:0]dout;
input vsync;
output vsync_out;
output dout_valid;
reg [DW-1:0]hist_cnt_addr;
wire [TW-1:0]hist_cnt_out;
histogram_2d hist(
.rst_n(rst_n),
.clk(clk),
.din_valid(din_valid),
.din(din),
.vsync(vsync),
.hist_cnt_addr(hist_cnt_addr),
.hist_cnt_out(hist_cnt_out)
);
defparam hist.DW = DW;
defparam hist.IH = IH;
defparam hist.IW = IW;
wire vsync_fall;
wire valid;
reg [1:0]frame_cnt;
reg hist_valid_temp;
reg vsync_r;
reg [latency:0]valid_r;
always @(posedge clk or negedge rst_n)
if (((~(rst_n))) == 1'b1) begin
valid_r[latency:0] <= {latency+1{1'b0}};
end
else begin
valid_r <= #1 {valid_r[latency-1:0],valid};
end
reg [DW-1:0]din_r;
always @(posedge clk or negedge rst_n)
if (((~(rst_n))) == 1'b1) begin
hist_cnt_addr <= {DW{1'b0}};
end
else begin
if(valid_r[0])
hist_cnt_addr <= #1 din_r;
end
reg [2*TW-1:0]mul_temp[0:2];
reg [DW-1:0]dout_temp;
// for 512*512
generate
if((IW ==512) & (IH ==512) )begin :IW_512
always @(posedge clk or negedge rst_n)
if (((~(rst_n))) == 1'b1) begin
mul_temp[0] <= {2*TW{1'b0}};
end
else begin
if(valid_r[1])
//hist_cnt_out*255,DW must be 8
mul_temp[0] <= #1 {{TW-DW{1'b0}},hist_cnt_out[TW-1:0],{DW{1'b0}}} - {{TW{1'b0}},hist_cnt_out};
if(valid_r[1])
//hist_cnt_out/(512*512) IW = IH = 512
mul_temp[1] <= #1 {{18{1'b0}},mul_temp[0][2*TW-1:18]};
if(valid_r[2])
dout_temp <= #1 mul_temp[1][DW-1:0];
end
end
endgenerate
// for 640*512
generate
if(IW ==640 & IH ==512 ) begin :IW_640
wire [2*TW-1:0]dout_tmp ;
assign dout_tmp = {{16{1'b0}},mul_temp[2][2*TW-1:16]};
always @(posedge clk or negedge rst_n)
if (((~(rst_n))) == 1'b1) begin
mul_temp[0] <= {2*TW{1'b0}};
end
else begin
if(valid_r[1])
//hist_cnt_out*51,DW must be 8
//hist_cnt_out*32 + hist_cnt_out*16
mul_temp[0] <= #1 {{TW-5{1'b0}},hist_cnt_out[TW-1:0],{5{1'b0}}} + {{TW-4{1'b0}},hist_cnt_out[TW-1:0],{4{1'b0}}};
//hist_cnt_out*2 + hist_cnt_out*1
mul_temp[1] <= #1 {{TW{1'b0}},hist_cnt_out[TW-1:0]} + {{TW-1{1'b0}},hist_cnt_out[TW-1:0],{1{1'b0}}};
if(valid_r[1])
//hist_cnt_out/(64*2*512)
mul_temp[2] <= #1 mul_temp[0] + mul_temp[1];
//
if(valid_r[2])
dout_temp <= #1 dout_tmp[DW-1:0];
end
end
endgenerate
assign dout = dout_temp;
assign dout_valid = valid_r[latency];
assign vsync_out = vsync;
endmodule
VHDL&&TESTBENCH
-- This entity performs histogram equalization on an input image.
-- It takes in an 8-bit input signal and outputs an 8-bit output signal.
-- The entity also takes in a vsync signal and outputs a vsync_out signal.
-- The entity uses a histogram_2d component to generate a histogram of the input image. The histogram is then used to perform histogram equalization on the input image. The entity has several generic parameters that can be adjusted to fit the specific needs of the user. The entity has a latency of 6 clock cycles.
-- Engineer: Radiumlrb
-- Create Date: 20231004
-- Design Name: hist_equalized
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity hist_equalized is
generic (
DW : positive := 8;
IH : positive := 512;
IW : positive := 640;
TW : positive := 32;
latency : positive := 6
);
port (
rst_n : in std_logic;
clk : in std_logic;
din_valid : in std_logic;
din : in std_logic_vector(DW-1 downto 0);
dout : out std_logic_vector(DW-1 downto 0);
vsync : in std_logic;
dout_valid : out std_logic;
vsync_out : out std_logic
);
end entity hist_equalized;
architecture rtl of hist_equalized is
constant TOTAL_CNT : integer := IW * IH;
constant HALF_WIDTH : integer := TW/2;
signal hist_cnt_addr : std_logic_vector(DW-1 downto 0);
signal hist_cnt_out : std_logic_vector(TW-1 downto 0);
component histogram_2d is
generic (
DW : positive := 8;
IH : positive := 512;
IW : positive := 640
);
port (
rst_n : in std_logic;
clk : in std_logic;
din_valid : in std_logic;
din : in std_logic_vector(DW-1 downto 0);
vsync : in std_logic;
hist_cnt_addr : out std_logic_vector(DW-1 downto 0);
hist_cnt_out : out std_logic_vector(TW-1 downto 0)
);
end component histogram_2d;
signal vsync_fall : std_logic;
signal valid : std_logic;
signal frame_cnt : std_logic_vector(1 downto 0);
signal hist_valid_temp : std_logic;
signal vsync_r : std_logic;
signal valid_r : std_logic_vector(latency downto 0);
signal din_r : std_logic_vector(DW-1 downto 0);
signal mul_temp : std_logic_vector(2*TW-1 downto 0);
signal dout_temp : std_logic_vector(DW-1 downto 0);
begin
hist : histogram_2d
generic map (
DW => DW,
IH => IH,
IW => IW
)
port map (
rst_n => rst_n,
clk => clk,
din_valid => din_valid,
din => din,
vsync => vsync,
hist_cnt_addr => hist_cnt_addr,
hist_cnt_out => hist_cnt_out
);
process (clk, rst_n)
begin
if rst_n = '0' then
valid_r <= (others => '0');
elsif rising_edge(clk) then
valid_r <= valid_r(valid_r'high-1 downto 0) & valid;
end if;
end process;
process (clk, rst_n)
begin
if rst_n = '0' then
hist_cnt_addr <= (others => '0');
elsif rising_edge(clk) then
if valid_r(0) = '1' then
hist_cnt_addr <= din_r;
end if;
end if;
end process;
process (clk, rst_n)
begin
if rst_n = '0' then
mul_temp(0) <= (others => '0');
elsif rising_edge(clk) then
if valid_r(1) = '1' then
mul_temp(0) <= std_logic_vector(resize(unsigned(hist_cnt_out(TW-1 downto 0)), 2*TW)) - std_logic_vector(resize(unsigned(hist_cnt_out), 2*TW));
end if;
if valid_r(1) = '1' then
mul_temp(1) <= std_logic_vector(resize(unsigned(mul_temp(0)(2*TW-1 downto 18)), 2*TW)) & (others => '0');
end if;
if valid_r(2) = '1' then
dout_temp <= mul_temp(1)(DW-1 downto 0);
end if;
end if;
end process;
dout <= dout_temp;
dout_valid <= valid_r(latency);
vsync_out <= vsync;
din_r <= din;
end architecture rtl;
--Testbench for hist_equalized entity
library ieee;
use ieee.std_logic_1164.all;
entity hist_equalized_tb is
end entity hist_equalized_tb;
architecture sim of hist_equalized_tb is
signal rst_n : std_logic := '0';
signal clk : std_logic := '0';
signal din_valid : std_logic := '0';
signal din : std_logic_vector(7 downto 0) := (others => '0');
signal dout : std_logic_vector(7 downto 0);
signal vsync : std_logic := '0';
signal dout_valid : std_logic;
signal vsync_out : std_logic;
begin
uut : entity work.hist_equalized
generic map (
DW => 8,
IH => 512,
IW => 640,
TW => 32,
latency => 6
)
port map (
rst_n => rst_n,
clk => clk,
din_valid => din_valid,
din => din,
dout => dout,
vsync => vsync,
dout_valid => dout_valid,
vsync_out => vsync_out
);
process
begin
rst_n <= '0';
clk <= '0';
din_valid <= '0';
din <= (others => '0');
vsync <= '0';
wait for 10 ns;
rst_n <= '1';
wait for 10 ns;
din_valid <= '1';
din <= x"01";
wait for 10 ns;
din <= x"02";
wait for 10 ns;
din <= x"03";
wait for 10 ns;
din <= x"04";
wait for 10 ns;
din <= x"05";
wait for 10 ns;
din <= x"06";
wait for 10 ns;
din <= x"07";
wait for 10 ns;
din <= x"08";
wait for 10 ns;
din_valid <= '0';
wait for 10 ns;
din_valid <= '1';
din <= x"09";
wait for 10 ns;
din <= x"0A";
wait for 10 ns;
din <= x"0B";
wait for 10 ns;
din <= x"0C";
wait for 10 ns;
din <= x"0D";
wait for 10 ns;
din <= x"0E";
wait for 10 ns;
din <= x"0F";
wait for 10 ns;
din <= x"10";
wait for 10 ns;
din_valid <= '0';
wait for 10 ns;
wait;
end process;
end architecture sim;
版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 3.0 许可协议。转载请注明出处!