Skip to content

Commit

Permalink
fix timing
Browse files Browse the repository at this point in the history
  • Loading branch information
awengz committed Aug 1, 2024
1 parent 3e3bda2 commit 985e8ac
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 86 deletions.
69 changes: 61 additions & 8 deletions deepsocflow/rtl/proc_engine.sv
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,8 @@ module proc_engine #(

logic [COLS-1:0] en_outshift, sel_outshift, outshift_flag;
logic shift_out_ready_last_col_prev;
logic [BITS_COLS-1:0] count_outshift;
logic cnt_en;

logic [BITS_COLS-1:0] cnt_outshift, cnt_acc;
logic cnt_outshift_valid, cnt_acc_valid;
logic [COLS-1:0] s_axis_tvalid;

genvar k2, c_1;
Expand Down Expand Up @@ -322,9 +321,63 @@ endgenerate
assign m_last_pkt = shift_last_pkt [COLS-1];

// -------------- OUTPUT SHIFTER ----------------
//logic [BITS_COLS-1:0] highest_acc, lowest_outshift;
//logic cnt_acc_valid_next, cnt_outshift_valid_next;
// integer a, b;
// always_comb begin // priority encoder - used to find highest acc.
// highest_acc = 0;
// cnt_acc_valid_next = 0;
// for (a = 0; a < COLS-1; a++) begin
// if(acc_m_valid_next[a] == 1) begin
// highest_acc = a;
// cnt_acc_valid_next = 1;
// break;
// end
// end
// end

// always_comb begin //TODO priority encoder - used to find lowest outshift.
// lowest_outshift = 0;
// cnt_outshift_valid_next = 0;
// for (b = 0; b < COLS-1; b++) begin
// if(~shift_out_ready[b]) begin
// lowest_outshift = b;
// cnt_outshift_valid_next = 1;
// break;
// end
// end
// end

always@(posedge clk `OR_NEGEDGE(resetn))begin
if(!resetn) begin
{cnt_acc_valid, cnt_outshift_valid, cnt_acc, cnt_outshift} <= 0;
end
else begin
if(en[0] && acc_m_valid_next[0]) cnt_acc_valid <= 1;
else if (shift_out_ready[COLS-1] && acc_m_valid[COLS-1]) cnt_acc_valid <= 0; // reset condition

if (cnt_acc_valid && en[0]) begin
if (cnt_acc==COLS-1) cnt_acc <= 0;
else cnt_acc <= cnt_acc + 1;
end

// cnt_acc_valid <= cnt_acc_valid_next;
// cnt_acc <= highest_acc;

//assign en_mac = &(~acc_m_valid | shift_out_ready);
//assign en[0] = ~acc_m_valid[0] | shift_out_ready[0];
// cnt_outshift_valid <= cnt_outshift_valid_next;
// cnt_outshift <= lowest_outshift;
if(~sel_outshift[0]) cnt_outshift_valid <= 1;

if (m_ready & outshift_flag[COLS-1]) begin
if (cnt_outshift==COLS-1) cnt_outshift <= 0;
else cnt_outshift <= cnt_outshift + 1;
end
end
end

wire freeze;
assign freeze = ((cnt_outshift <= cnt_acc) && cnt_acc_valid && cnt_outshift_valid);

for(c=0; c<COLS; c++) begin : C
if(c<COLS-1) begin
// If current column and next column output shifter regs both have valid data, and accumulator has valid data column gets frozen
Expand All @@ -340,7 +393,7 @@ endgenerate
end
end
else begin // Final Column
assign mac_freeze[c] = (acc_m_valid[c] & ~shift_out_ready[c]);
//assign mac_freeze[c] = (acc_m_valid[c] & ~shift_out_ready[c]);
//assign en[c] = (~acc_m_valid[c] | shift_out_ready[c]);
always_ff @(posedge clk `OR_NEGEDGE(resetn))
if (!resetn) begin
Expand All @@ -353,8 +406,8 @@ endgenerate
end

//assign en[c] = &(~mac_freeze);
assign en[c] = &(~mac_freeze[COLS-1:c]); // all cols to the left of frozen column should freeze.

//assign en[c] = &(~mac_freeze[COLS-1:c]); // all cols to the left of frozen column should freeze.
assign en[c] = ~(freeze && (c <= cnt_acc));
assign acc_m_valid_next[c] = !sel_shift[c] & mul_m_valid[c] & (mul_m_user[c].is_config | mul_m_user[c].is_cin_last);

always_ff @(posedge clk `OR_NEGEDGE(resetn))
Expand Down
2 changes: 1 addition & 1 deletion deepsocflow/tcl/fpga/zcu104.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ create_project ${PROJECT_NAME} ${PROJECT_NAME} -part xczu7ev-ffvc1156-2-e -force
set_property board_part xilinx.com:zcu104:part0:1.1 [current_project]

create_bd_design "design_1"
create_bd_cell -type ip -vlnv xilinx.com:ip:zynq_ultra_ps_e:3.4 zynq_ultra_ps_e_0
create_bd_cell -type ip -vlnv xilinx.com:ip:zynq_ultra_ps_e:3.5 zynq_ultra_ps_e_0
apply_bd_automation -rule xilinx.com:bd_rule:zynq_ultra_ps_e -config {apply_board_preset "1" } [get_bd_cells zynq_ultra_ps_e_0]
set_property -dict [list CONFIG.PSU__USE__M_AXI_GP1 {1} CONFIG.PSU__USE__S_AXI_GP0 {1} CONFIG.PSU__USE__S_AXI_GP1 {1} CONFIG.PSU__USE__S_AXI_GP2 {1} CONFIG.PSU__CRL_APB__PL0_REF_CTRL__FREQMHZ $FREQ CONFIG.PSU__USE__M_AXI_GP0 {0} CONFIG.PSU__QSPI__PERIPHERAL__ENABLE {0} CONFIG.PSU__SAXIGP0__DATA_WIDTH $AXI_WIDTH CONFIG.PSU__SAXIGP1__DATA_WIDTH $AXI_WIDTH CONFIG.PSU__SAXIGP2__DATA_WIDTH $AXI_WIDTH] [get_bd_cells zynq_ultra_ps_e_0]

Expand Down
18 changes: 9 additions & 9 deletions run/param_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
from keras.utils import to_categorical
from qkeras.utils import load_qmodel
import numpy as np
# import tensorflow as tf
#tf.keras.utils.set_random_seed(0)
import tensorflow as tf
tf.keras.utils.set_random_seed(0)

from deepsocflow import *

(SIM, SIM_PATH) = ('xsim', "F:/Xilinx/Vivado/2022.2/bin/") if os.name=='nt' else ('verilator', '')
(SIM, SIM_PATH) = ('xsim', "E:/Vivado/2023.2/bin/") if os.name=='nt' else ('verilator', '')

'''
Dataset
Expand Down Expand Up @@ -175,24 +175,24 @@ def product_dict(**kwargs):
yield dict(zip(kwargs.keys(), instance))

@pytest.mark.parametrize("PARAMS", list(product_dict(
processing_elements = [(8,24) ],
processing_elements = [(32,64) ],
frequency_mhz = [ 250 ],
bits_input = [ 4 ],
bits_weights = [ 4 ],
bits_sum = [ 32 ],
bits_sum = [ 20 ],
bits_bias = [ 16 ],
max_batch_size = [ 64 ],
max_channels_in = [ 2048 ],
max_kernel_size = [ 9 ],
max_image_size = [ 512 ],
max_n_bundles = [ 64 ],
ram_weights_depth = [ 20 ],
ram_weights_depth = [ 512 ],
ram_edges_depth = [ 288 ],
axi_width = [ 128 ],
axi_width = [ 64 ],
config_baseaddr = ["B0000000"],
target_cpu_int_bits = [ 32 ],
valid_prob = [ 0.1 ],
ready_prob = [ 0.01 ],
valid_prob = [ 1 ],
ready_prob = [ 1 ],
data_dir = ['vectors'],
)))
def test_dnn_engine(PARAMS):
Expand Down
Loading

0 comments on commit 985e8ac

Please sign in to comment.