Skip to content

Commit

Permalink
Bugfix for the retrans-issue that stopped RDMA-writes at 8k: We now h… (
Browse files Browse the repository at this point in the history
#86)

* Bugfix for the retrans-issue that stopped RDMA-writes at 8k: We now have a ddr-write-access counter in rdma_mux_retrans that allows to split the incoming data stream in multiple MTU-sized bursts for writing to the retrans-memory in HBM.
  • Loading branch information
maximilianheer authored Nov 19, 2024
1 parent 48572ba commit 6e3d30a
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 51 deletions.
104 changes: 53 additions & 51 deletions hw/hdl/network/rdma/rdma_mux_retrans.sv
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ logic [0:0] state_C, state_N;

logic rd_C, rd_N;
logic actv_C, actv_N;
logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N;
logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N, cnt_ddr_wr;

logic tr_done;

Expand Down Expand Up @@ -295,6 +295,22 @@ always_comb begin: DP
endcase
end

// Counting the outgoing data transmissions to the retrans buffer
always_ff @ (posedge aclk) begin

if(aresetn == 1'b0) begin
cnt_ddr_wr <= 1'b0;
end else begin
if(s_req_net.valid) begin
// Once a new command comes in, set the transmission counter to the length transmitted via the command interface
cnt_ddr_wr <= s_req_net.data.len[LEN_BITS-1:0]/64;
end else begin
// Decrement the counter with every successfull write to the retrans-memory
cnt_ddr_wr <= (axis_ddr_wr.tvalid & axis_ddr_wr.tready) ? (cnt_ddr_wr-1) : cnt_ddr_wr;
end
end
end

// Mux
always_comb begin
if(state_C == ST_MUX) begin
Expand Down Expand Up @@ -343,62 +359,48 @@ assign axis_net.tlast = actv_C ? (rd_C ? s_axis_user_rsp.tlast : s_axis_user_req
// Data-loop? Not exactly what this is for. Seems to loop data back from the top-level module to the top-level module
assign axis_ddr_wr.tdata = s_axis_user_req.tdata;
assign axis_ddr_wr.tkeep = s_axis_user_req.tkeep;
assign axis_ddr_wr.tlast = s_axis_user_req.tlast;
assign axis_ddr_wr.tlast = (cnt_ddr_wr == 1);

//
// DEBUG
//


// create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_retrans
// set_property -dict [list CONFIG.C_DATA_DEPTH {8192} CONFIG.C_PROBE29_WIDTH {22} CONFIG.C_PROBE23_WIDTH {28} CONFIG.C_NUM_OF_PROBES {35} CONFIG.Component_Name {ila_retrans} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_PROBE34_MU_CNT {2} CONFIG.C_PROBE33_MU_CNT {2} CONFIG.C_PROBE32_MU_CNT {2} CONFIG.C_PROBE31_MU_CNT {2} CONFIG.C_PROBE30_MU_CNT {2} CONFIG.C_PROBE29_MU_CNT {2} CONFIG.C_PROBE28_MU_CNT {2} CONFIG.C_PROBE27_MU_CNT {2} CONFIG.C_PROBE26_MU_CNT {2} CONFIG.C_PROBE25_MU_CNT {2} CONFIG.C_PROBE24_MU_CNT {2} CONFIG.C_PROBE23_MU_CNT {2} CONFIG.C_PROBE22_MU_CNT {2} CONFIG.C_PROBE21_MU_CNT {2} CONFIG.C_PROBE20_MU_CNT {2} CONFIG.C_PROBE19_MU_CNT {2} CONFIG.C_PROBE18_MU_CNT {2} CONFIG.C_PROBE17_MU_CNT {2} CONFIG.C_PROBE16_MU_CNT {2} CONFIG.C_PROBE15_MU_CNT {2} CONFIG.C_PROBE14_MU_CNT {2} CONFIG.C_PROBE13_MU_CNT {2} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_retrans]

/* ila_retrans inst_ila_retrans (
.clk(aclk),
.probe0(s_req_net.valid),
.probe1(s_req_net.ready),
.probe2(m_req_user.valid),
.probe3(m_req_user.ready),
.probe4(s_axis_user_rsp.tvalid),
.probe5(s_axis_user_rsp.tready),
.probe6(s_axis_user_rsp.tlast),
.probe7(s_axis_user_req.tvalid),
.probe8(s_axis_user_req.tready),
.probe9(s_axis_user_req.tlast),
.probe10(m_axis_net.tvalid),
.probe11(m_axis_net.tready),
.probe12(m_axis_net.tlast),
.probe13(m_req_ddr_rd.valid),
.probe14(m_req_ddr_rd.ready),
.probe15(m_req_ddr_wr.valid),
.probe16(m_req_ddr_wr.ready),
.probe17(s_axis_ddr.tvalid),
.probe18(s_axis_ddr.tready),
.probe19(s_axis_ddr.tlast),
.probe20(m_axis_ddr.tvalid),
.probe21(m_axis_ddr.tready),
.probe22(m_axis_ddr.tlast),
.probe23(len_snk[27:0]), // 28
.probe24(actv_snk),
.probe25(rd_snk),
.probe26(seq_snk_valid),
.probe27(seq_snk_ready),
.probe28(state_C),
.probe29(cnt_C[21:0]), // 22
.probe30(rd_C),
.probe31(actv_C),
.probe32(tr_done),
.probe33(req_user.ready),
.probe34(req_user.valid)
.clk(aclk),
.probe0(s_req_net.valid),
.probe1(s_req_net.data), // 128
.probe2(s_req_net.ready),
.probe3(s_axis_user_req.tvalid),
.probe4(s_axis_user_req.tdata), // 512
.probe5(s_axis_user_req.tkeep), // 64
.probe6(s_axis_user_req.tready),
.probe7(s_axis_user_req.tlast),
.probe8(m_axis_net.tvalid),
.probe9(m_axis_net.tdata), // 512
.probe10(m_axis_net.tkeep), // 64
.probe11(m_axis_net.tready),
.probe12(m_axis_net.tlast),
.probe13(m_req_ddr_wr.valid),
.probe14(m_req_ddr_wr.data), // 128
.probe15(m_req_ddr_wr.ready),
.probe16(m_axis_ddr.tvalid),
.probe17(m_axis_ddr.tdata), // 512
.probe18(m_axis_ddr.tkeep), // 64
.probe19(m_axis_ddr.tready),
.probe20(m_axis_ddr.tlast),
.probe21(seq_snk_valid),
.probe22(seq_snk_ready),
.probe23(rd_snk),
.probe24(actv_snk),
.probe25(cnt_C), // 26
.probe26(state_C),
.probe27(cnt_ddr_wr), // 26
.probe28(tr_done)
); */

/*
create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_retrans
set_property -dict [list CONFIG.C_PROBE29_WIDTH {22} CONFIG.C_PROBE23_WIDTH {28} CONFIG.C_NUM_OF_PROBES {35} CONFIG.Component_Name {ila_retrans} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_PROBE34_MU_CNT {2} CONFIG.C_PROBE33_MU_CNT {2} CONFIG.C_PROBE32_MU_CNT {2} CONFIG.C_PROBE31_MU_CNT {2} CONFIG.C_PROBE30_MU_CNT {2} CONFIG.C_PROBE29_MU_CNT {2} CONFIG.C_PROBE28_MU_CNT {2} CONFIG.C_PROBE27_MU_CNT {2} CONFIG.C_PROBE26_MU_CNT {2} CONFIG.C_PROBE25_MU_CNT {2} CONFIG.C_PROBE24_MU_CNT {2} CONFIG.C_PROBE23_MU_CNT {2} CONFIG.C_PROBE22_MU_CNT {2} CONFIG.C_PROBE21_MU_CNT {2} CONFIG.C_PROBE20_MU_CNT {2} CONFIG.C_PROBE19_MU_CNT {2} CONFIG.C_PROBE18_MU_CNT {2} CONFIG.C_PROBE17_MU_CNT {2} CONFIG.C_PROBE16_MU_CNT {2} CONFIG.C_PROBE15_MU_CNT {2} CONFIG.C_PROBE14_MU_CNT {2} CONFIG.C_PROBE13_MU_CNT {2} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_retrans]
*/

endmodule
34 changes: 34 additions & 0 deletions hw/hdl/network/rdma/roce_stack.sv
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,40 @@ assign rdma_wr_req.ready = m_rdma_wr_req.ready;
// RoCE stack
//

/* ila_rdma inst_ila_rdma (
.clk(nclk),
.probe0(s_rdma_qp_interface.valid),
.probe1(s_rdma_qp_interface.ready),
.probe2(s_rdma_qp_interface.data), // 184
.probe3(s_rdma_conn_interface.valid),
.probe4(s_rdma_conn_interface.ready),
.probe5(s_rdma_conn_interface.data), // 184
.probe6(s_rdma_sq.valid),
.probe7(s_rdma_sq.ready),
.probe8(s_rdma_sq.data), // 256
.probe9(m_rdma_rd_req.valid),
.probe10(m_rdma_rd_req.ready),
.probe11(m_rdma_rd_req.data), // 128
.probe12(m_rdma_wr_req.valid),
.probe13(m_rdma_wr_req.ready),
.probe14(m_rdma_wr_req.data), // 128
.probe15(m_rdma_mem_rd_cmd.valid),
.probe16(m_rdma_mem_rd_cmd.ready),
.probe17(m_rdma_mem_rd_cmd.data), // 96
.probe18(m_rdma_mem_wr_cmd.valid),
.probe19(m_rdma_mem_wr_cmd.ready),
.probe20(m_rdma_mem_wr_cmd.data), // 96
.probe21(s_axis_rdma_rd_req.tvalid),
.probe22(s_axis_rdma_rd_req.tdata), // 512
.probe23(s_axis_rdma_rd_req.tkeep), // 64
.probe24(s_axis_rdma_rd_req.tready),
.probe25(s_axis_rdma_rd_req.tlast),
.probe26(m_axis_rdma_wr.tvalid),
.probe27(m_axis_rdma_wr.tdata), // 512
.probe28(m_axis_rdma_wr.tkeep), // 64
.probe29(m_axis_rdma_wr.tready),
.probe30(m_axis_rdma_wr.tlast)
); */

/*
ila_rdma inst_ila_rdma (
Expand Down

0 comments on commit 6e3d30a

Please sign in to comment.