verilog实现卷积运算
卷积的运算原理

构造一个3 * 3的卷积核,并利用该卷积核完成与6×6矩阵的卷积运算,数据位宽8bit补码数, 结果位宽20bit补码数。
卷积的基本过程如下:
对卷积核进行180度翻转(数据读写顺序的调度)将3×3卷积核的中心对准 6×6 矩阵的每个数进行对应数据乘累加得出结果,如此往复作业。输入数据补码8bit,实际有效7bit,输出数据补码20bit,实际有效19bit,卷积增加位数3×3=9,所以单个乘法最多增加19-7-9=3bit。所以卷积核采用3bit数,即4bit补码数。
内置电路图如图二所示:

正如第二部分对本次设计的介绍,我们要做到的是对模拟信号的采样由A/D转换器来完成,而卷积过程由信号的移位来实现。为了设计卷积运算器,首先要设计RAM 和A/D转换器的VerilogHDL 模型。在电子工业发达的国家,可以通过商业渠道得到非常准确的外围器件的虚拟模型。如果没有外围器件的虚拟模型。因为RAM和A/D转换器不是我们设计的硬件对象,所以需要的只是他们的行为模型,精确的行为模型需要认真细致的编写,并不比综合模块容易编写。
运算过程简介
系统内置3×3的4bit补码数的卷积核

外部输入6*6的8比特补码数:

//TESTBENCH `timescale 1us/1us module TESTBENCH(); reg signed [7:0] TiData[1:6][1:6]; // Test input Data reg signed [19:0] ToData[1:4][1:4]; // Test output Data reg signed [7:0] TiDataSingle; // for transmission wire signed [19:0] ToDataSingle; // for transmission reg clk; reg reset; reg CONV_start; wire CONV_finish; reg [7:0] i; reg [7:0] j; parameter period = 10; parameter hperiod = 5; CONV CONV_T( .reset(reset), .clk(clk), .CONV_start(CONV_start), .CONV_finish(CONV_finish), .CONV_iData(TiDataSingle), .CONV_oData(ToDataSingle)); initial begin $display("0.Load Data"); $readmemh("Data_input.txt", TiData); for(i = 1; i < 7; i = i + 1) $display("%d %d %d %d %d %d", TiData[i][1], TiData[i][2], TiData[i][3], TiData[i][4], TiData[i][5], TiData[i][6]); clk = 0; CONV_start = 0; reset = 1; // Reset Chip #period reset = 0; // Chip Working #period CONV_start = 1; // CONV start and writing data // align test data to the negedge of clk $display("1.Write Data"); for(i = 1; i < 7; i = i + 1) for(j = 1; j < 7; j = j + 1) begin TiDataSingle = TiData[i][j]; #period; end CONV_start = 0; // finish writing data $display("2.Convolution"); while(!CONV_finish) #period; #period; $display("3.Read Data"); for(i = 1; i < 5; i = i + 1) for(j = 1; j < 5; j = j + 1) begin ToData[i][j] = ToDataSingle; end for(i = 1; i < 5; i = i + 1) $display("%d %d %d %d", ToData[i][1], ToData[i][2], ToData[i][3], ToData[i][4]); $display("End"): end always #hperiod clk = !clk; endmodule
verlog源码
module CONV( input wire reset, input wire clk, input wire CONV_start, output reg CONV_finish, input wire signed [7:0] CONV_iData, output reg signed [19:0] CONV_oData ); reg signed [3:0]CONV_core[1:9]; reg [3:0] ii_count; reg [3:0] ij_count; reg [3:0] ci_count; reg [3:0] cj_count; reg [3:0] oi_count; reg [3:0] oj_count; reg signed [7:0] CONV_iArrayData[1:6][1:6]; // input Data reg signed [19:0] CONV_oArrayData[1:4][1:4]; // output Data reg CONV_StartCal; // Start convolution // For ReConstruct wire signed [7:0] CONV_iReCon[1:9]; // input ReConstruct Temp wire signed [19:0] CONV_mul[1:9]; wire signed [19:0] CONV_result; // Calculating Convolution assign CONV_iReCon[1] = CONV_iArrayData[ci_count+0][cj_count+0]; assign CONV_iReCon[2] = CONV_iArrayData[ci_count+0][cj_count+1]; assign CONV_iReCon[3] = CONV_iArrayData[ci_count+0][cj_count+2]; assign CONV_iReCon[4] = CONV_iArrayData[ci_count+1][cj_count+0]; assign CONV_iReCon[5] = CONV_iArrayData[ci_count+1][cj_count+1]; assign CONV_iReCon[6] = CONV_iArrayData[ci_count+1][cj_count+2]; assign CONV_iReCon[7] = CONV_iArrayData[ci_count+2][cj_count+0]; assign CONV_iReCon[8] = CONV_iArrayData[ci_count+2][cj_count+1]; assign CONV_iReCon[9] = CONV_iArrayData[ci_count+2][cj_count+2]; assign CONV_mul[1] = CONV_core[9]*CONV_iReCon[1]; assign CONV_mul[2] = CONV_core[8]*CONV_iReCon[2]; assign CONV_mul[3] = CONV_core[7]*CONV_iReCon[3]; assign CONV_mul[4] = CONV_core[6]*CONV_iReCon[4]; assign CONV_mul[5] = CONV_core[5]*CONV_iReCon[5]; assign CONV_mul[6] = CONV_core[4]*CONV_iReCon[6]; assign CONV_mul[7] = CONV_core[3]*CONV_iReCon[7]; assign CONV_mul[8] = CONV_core[2]*CONV_iReCon[8]; assign CONV_mul[9] = CONV_core[1]*CONV_iReCon[9]; assign CONV_result = CONV_mul[1] + CONV_mul[2] + CONV_mul[3] + CONV_mul[4] + CONV_mul[5] + CONV_mul[6] + CONV_mul[7] + CONV_mul[8] + CONV_mul[9]; // Init Core always @(posedge reset) begin CONV_core[1] <= 4'h1; CONV_core[2] <= 4'h2; CONV_core[3] <= 4'hf; CONV_core[4] <= 4'hd; CONV_core[5] <= 4'h5; CONV_core[6] <= 4'h3; CONV_core[7] <= 4'he; CONV_core[8] <= 4'h1; CONV_core[9] <= 4'h2; end // Load input Data always @(posedge clk or posedge reset or posedge CONV_finish) begin if(reset || CONV_finish) begin ii_count <= 1; ij_count <= 1; CONV_StartCal <= 0; end else if(CONV_start && (ii_count < 7)) begin if(ij_count < 6) ij_count <= ij_count + 1; else begin if(ii_count < 6)begin ii_count <= ii_count + 1; ij_count <= 1; end else begin CONV_StartCal <= 1; end end CONV_iArrayData[ii_count][ij_count] <= CONV_iData; // Load Data end end // Convolution always @(posedge clk or posedge reset) begin if(reset) begin ci_count <= 1; cj_count <= 1; CONV_finish <= 0; end else if(CONV_StartCal && (ci_count < 5)) begin if(cj_count < 4) cj_count <= cj_count + 1; else begin if(ci_count < 4) begin ci_count <= ci_count + 1; cj_count <= 1; end else begin CONV_finish <= 1; end end CONV_oArrayData[ci_count][cj_count] <= CONV_result; // Record the Result end end // Output Data always @(posedge clk or posedge reset or posedge CONV_start) begin if(reset || CONV_start) begin oi_count <= 1; oj_count <= 1; end else if(CONV_finish && (oi_count < 5)) begin if(oj_count < 4) oj_count <= oj_count + 1; else begin if(oi_count < 4)begin oi_count <= oi_count + 1; oj_count <= 1; end end CONV_oData <= CONV_oArrayData[oi_count][oj_count]; // Output Data end end endmodule
python验证
import numpy as np from scipy import signal from scipy import misc input_data=[ [1, 2, 3, 4, 5, 6], [17, 18,19,20,21,22], [33, 34,35,36,37,38], [65, 66,67,68,69,70], [-127,-126,-125, -124, -123, -122], [-95,-94,-93, -92,-91, -90] ] heigh,wid=input_data[:2] weights_data=[ [1 ,2,-1], [-3,5,3], [-2 ,1,2] ] heigh1,wid1 = weights_data[:2] con_result = signal.convolve(input_data,weights_data,mode= 'full') grad=signal.convolve2d(weights_data,input_data) print(grad[2:6,2:6])
小编还在成长,请大家多多指教!
发布者:全栈程序员-站长,转载请注明出处:https://javaforall.net/228108.html原文链接:https://javaforall.net
