From 789214db943a1ed3d339e5148f5fb7265bf0f8e1 Mon Sep 17 00:00:00 2001 From: kodario Date: Mon, 12 Jun 2023 18:02:24 +0200 Subject: [PATCH] rdma cmplt compiling, pid hash. --- driver/coyote_dev.h | 6 +- driver/fpga_dev.c | 2 +- driver/fpga_fops.c | 71 +++++++++++--------- driver/fpga_sysfs.c | 20 ++---- hw/config.cmake | 15 ++++- hw/hdl/network/rdma/rdma_req_parser.sv | 17 +---- hw/hdl/network/rdma/roce_stack.sv | 7 +- hw/hdl/network/stack/network_ccross_early.sv | 4 +- hw/hdl/network/stack/network_ccross_late.sv | 4 +- hw/hdl/network/stack/network_slice.sv | 2 +- hw/hdl/network/stack/network_slice_array.sv | 2 +- hw/hdl/network/stack/network_stack.sv | 14 ++-- hw/hdl/slave/static_slave.sv | 63 ++++++++--------- hw/scripts/ip_inst/network_stack.tcl | 24 ++++--- hw/scripts/wr_hdl/template_gen/lynx_pkg.txt | 28 +++++--- 15 files changed, 142 insertions(+), 137 deletions(-) diff --git a/driver/coyote_dev.h b/driver/coyote_dev.h index ad9da51..12b9ccf 100644 --- a/driver/coyote_dev.h +++ b/driver/coyote_dev.h @@ -330,6 +330,7 @@ extern long int eost; #define PR_BATCH_SIZE (2 * 1024 * 1024) #define USER_HASH_TABLE_ORDER 8 +#define PID_HASH_TABLE_ORDER 8 /* PID */ #define N_CPID_MAX 64 @@ -531,7 +532,7 @@ struct xdma_engine { /* Inode */ struct cid_entry { struct hlist_node entry; - uint64_t ino; + pid_t pid; int32_t cpid; }; @@ -556,7 +557,8 @@ struct pr_pages { struct page **pages; }; -extern struct hlist_head cid_map[MAX_N_REGIONS][1 << (USER_HASH_TABLE_ORDER)]; // cid mapping +/* PID tables */ +extern struct hlist_head pid_cpid_map[MAX_N_REGIONS][1 << (PID_HASH_TABLE_ORDER)]; /* User tables */ extern struct hlist_head user_lbuff_map[MAX_N_REGIONS][1 << (USER_HASH_TABLE_ORDER)]; // large alloc diff --git a/driver/fpga_dev.c b/driver/fpga_dev.c index 020716f..611668a 100644 --- a/driver/fpga_dev.c +++ b/driver/fpga_dev.c @@ -443,7 +443,7 @@ int init_fpga_devices(struct bus_drvdata *d) d->fpga_dev[i].cdev.ops = &fpga_fops; // Init hash - hash_init(cid_map[i]); + hash_init(pid_cpid_map[i]); hash_init(user_lbuff_map[i]); hash_init(user_sbuff_map[i]); diff --git a/driver/fpga_fops.c b/driver/fpga_fops.c index e005b77..4540d71 100644 --- a/driver/fpga_fops.c +++ b/driver/fpga_fops.c @@ -36,7 +36,7 @@ |_| */ -struct hlist_head cid_map[MAX_N_REGIONS][1 << (USER_HASH_TABLE_ORDER)]; // cid mapping +struct hlist_head pid_cpid_map[MAX_N_REGIONS][1 << (USER_HASH_TABLE_ORDER)]; // cid mapping /** * @brief Acquire a region @@ -63,19 +63,19 @@ int fpga_open(struct inode *inode, struct file *file) */ int fpga_release(struct inode *inode, struct file *file) { - uint64_t ino; int32_t cpid; struct cid_entry *tmp_cid; + pid_t pid; int minor = iminor(inode); struct fpga_dev *d = container_of(inode->i_cdev, struct fpga_dev, cdev); BUG_ON(!d); - ino = inode->i_ino; + pid = current->pid; - hash_for_each_possible(cid_map[d->id], tmp_cid, entry, ino) { - if(tmp_cid->ino == ino) { + hash_for_each_possible(pid_cpid_map[d->id], tmp_cid, entry, pid) { + if(tmp_cid->pid == pid) { cpid = tmp_cid->cpid; // unamp all leftover user pages @@ -129,6 +129,7 @@ long fpga_ioctl(struct file *file, unsigned int cmd, unsigned long arg) int ret_val, i; uint64_t tmp[MAX_USER_WORDS]; uint64_t cpid; + pid_t pid; struct cid_entry *tmp_cid; struct fpga_dev *d = (struct fpga_dev *)file->private_data; @@ -224,37 +225,32 @@ long fpga_ioctl(struct file *file, unsigned int cmd, unsigned long arg) // register pid case IOCTL_REGISTER_PID: - // read pid - ret_val = copy_from_user(&tmp, (unsigned long *)arg, sizeof(unsigned long)); - if (ret_val != 0) { - pr_info("user data could not be coppied, return %d\n", ret_val); + spin_lock(&pd->stat_lock); + + pid = current->pid; + + cpid = (uint64_t)register_pid(d, pid); + if (cpid == -1) + { + dbg_info("registration failed pid %d\n", pid); + return -1; } - else { - spin_lock(&pd->stat_lock); - cpid = (uint64_t)register_pid(d, tmp[0]); // tmp[0] - pid - if (cpid == -1) - { - dbg_info("registration failed pid %lld\n", tmp[0]); - return -1; - } + dbg_info("registration succeeded pid %d, cpid %lld\n", pid, cpid); - dbg_info("registration succeeded pid %lld, cpid %lld\n", tmp[0], cpid); + tmp_cid = kzalloc(sizeof(struct cid_entry), GFP_KERNEL); + BUG_ON(!tmp_cid); - // inode - tmp_cid = kzalloc(sizeof(struct cid_entry), GFP_KERNEL); - BUG_ON(!tmp_cid); + tmp_cid->pid = pid; + tmp_cid->cpid = cpid; - tmp_cid->ino = file->f_path.dentry->d_inode->i_ino; - tmp_cid->cpid = cpid; + hash_add(pid_cpid_map[d->id], &tmp_cid->entry, pid); - hash_add(cid_map[d->id], &tmp_cid->entry, tmp_cid->ino); + // return cpid + ret_val = copy_to_user((unsigned long *)arg + 1, &cpid, sizeof(unsigned long)); - // return cpid - ret_val = copy_to_user((unsigned long *)arg + 1, &cpid, sizeof(unsigned long)); + spin_unlock(&pd->stat_lock); - spin_unlock(&pd->stat_lock); - } break; // unregister pid @@ -266,16 +262,25 @@ long fpga_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } else { spin_lock(&pd->stat_lock); + + cpid = tmp[0]; + pid = d->pid_array[cpid]; - ret_val = unregister_pid(d, tmp[0]); // tmp[0] - cpid + ret_val = unregister_pid(d, cpid); // tmp[0] - cpid if (ret_val == -1) { - dbg_info("unregistration failed cpid %lld\n", tmp[0]); + dbg_info("unregistration failed cpid %lld\n", cpid); return -1; } - // inode - hash_for_each_possible(cid_map[d->id], tmp_cid, entry, file->f_path.dentry->d_inode->i_ino) { - if(tmp_cid->ino == file->f_path.dentry->d_inode->i_ino && tmp_cid->cpid == tmp[0]) { + // map + hash_for_each_possible(pid_cpid_map[d->id], tmp_cid, entry, pid) { + if(tmp_cid->pid == pid && tmp_cid->cpid == cpid) { + // unamp all leftover user pages + tlb_put_user_pages_cpid(d, cpid, 1); + + // unregister (if registered) + unregister_pid(d, cpid); + // Free from hash hash_del(&tmp_cid->entry); } diff --git a/driver/fpga_sysfs.c b/driver/fpga_sysfs.c index f34e9e2..a08d421 100644 --- a/driver/fpga_sysfs.c +++ b/driver/fpga_sysfs.c @@ -173,9 +173,7 @@ ssize_t cyt_attr_nstats_q0_show(struct kobject *kobj, struct kobj_attribute *att pr_info("coyote-sysfs: net stats QSFP0\n"); return sprintf(buf, "\n -- \033[31m\e[1mNET STATS\033[0m\e[0m QSFP0\n\n" - "RX words: %lld\n" "RX pkgs: %lld\n" - "TX words: %lld\n" "TX pkgs: %lld\n" "ARP RX pkgs: %lld\n" "ARP TX pkgs: %lld\n" @@ -187,10 +185,10 @@ ssize_t cyt_attr_nstats_q0_show(struct kobject *kobj, struct kobj_attribute *att "ROCE TX pkgs: %lld\n" "IBV RX pkgs: %lld\n" "IBV TX pkgs: %lld\n" - "CRC drop cnt: %lld\n" "PSN drop cnt: %lld\n" + "Retrans cnt: %lld\n" "TCP session cnt: %lld\n" - "STRM down cnt: %lld\n\n", + "STRM down: %lld\n\n", LOW_32 (pd->fpga_stat_cnfg->net_0_debug[0]), HIGH_32(pd->fpga_stat_cnfg->net_0_debug[0]), @@ -207,9 +205,7 @@ ssize_t cyt_attr_nstats_q0_show(struct kobject *kobj, struct kobj_attribute *att LOW_32 (pd->fpga_stat_cnfg->net_0_debug[6]), HIGH_32(pd->fpga_stat_cnfg->net_0_debug[6]), LOW_32 (pd->fpga_stat_cnfg->net_0_debug[7]), - HIGH_32(pd->fpga_stat_cnfg->net_0_debug[7]), - LOW_32 (pd->fpga_stat_cnfg->net_0_debug[8]), - LOW_32 (pd->fpga_stat_cnfg->net_0_debug[9]) + LOW_32 (pd->fpga_stat_cnfg->net_0_debug[8]) ); } @@ -223,9 +219,7 @@ ssize_t cyt_attr_nstats_q1_show(struct kobject *kobj, struct kobj_attribute *att pr_info("coyote-sysfs: net stats QSFP1\n"); return sprintf(buf, "\n -- \033[31m\e[1mNET STATS\033[0m\e[0m QSFP1\n\n" - "RX words: %lld\n" "RX pkgs: %lld\n" - "TX words: %lld\n" "TX pkgs: %lld\n" "ARP RX pkgs: %lld\n" "ARP TX pkgs: %lld\n" @@ -237,10 +231,10 @@ ssize_t cyt_attr_nstats_q1_show(struct kobject *kobj, struct kobj_attribute *att "ROCE TX pkgs: %lld\n" "IBV RX pkgs: %lld\n" "IBV TX pkgs: %lld\n" - "CRC drop cnt: %lld\n" "PSN drop cnt: %lld\n" + "Retrans cnt: %lld\n" "TCP session cnt: %lld\n" - "STRM down cnt: %lld\n\n", + "STRM down: %lld\n\n", LOW_32 (pd->fpga_stat_cnfg->net_1_debug[0]), HIGH_32(pd->fpga_stat_cnfg->net_1_debug[0]), @@ -257,9 +251,7 @@ ssize_t cyt_attr_nstats_q1_show(struct kobject *kobj, struct kobj_attribute *att LOW_32 (pd->fpga_stat_cnfg->net_1_debug[6]), HIGH_32(pd->fpga_stat_cnfg->net_1_debug[6]), LOW_32 (pd->fpga_stat_cnfg->net_1_debug[7]), - HIGH_32(pd->fpga_stat_cnfg->net_1_debug[7]), - LOW_32 (pd->fpga_stat_cnfg->net_1_debug[8]), - LOW_32 (pd->fpga_stat_cnfg->net_1_debug[9]) + LOW_32 (pd->fpga_stat_cnfg->net_1_debug[8]) ); } diff --git a/hw/config.cmake b/hw/config.cmake index d6c07f3..d847b51 100644 --- a/hw/config.cmake +++ b/hw/config.cmake @@ -4,7 +4,11 @@ ## General -# Max supported regions (could be more if really needed with a bit of hacking) +# Max supported regions +set(MULT_REGIONS 0) +if(N_REGIONS GREATER 1) + set(MULT_REGIONS 1) +endif() if(N_REGIONS GREATER 15) message(FATAL_ERROR "Max 15 regions supported.") endif() @@ -165,6 +169,11 @@ if(DDR_AUTO) endif() endif() +set(MULT_DDR_CHAN 0) +if(N_DDR_CHAN GREATER 1) + set(MULT_DDR_CHAN 1) +endif() + # Compare for mismatch if(EN_DCARD) MATH(EXPR N_DDRS "${DDR_0}+${DDR_1}+${DDR_2}+${DDR_3}") @@ -231,9 +240,13 @@ endif() # Channel designators set(NN 0) +set(MULT_STRM_AXI 0) if(EN_STRM) set(STRM_CHAN ${NN}) MATH(EXPR NN "${NN}+1") + if(N_STRM_AXI GREATER 1) + set(MULT_STRM_AXI 1) + endif() else() set(STRM_CHAN -1) endif() diff --git a/hw/hdl/network/rdma/rdma_req_parser.sv b/hw/hdl/network/rdma/rdma_req_parser.sv index c2f930d..edd93ad 100644 --- a/hw/hdl/network/rdma/rdma_req_parser.sv +++ b/hw/hdl/network/rdma/rdma_req_parser.sv @@ -42,9 +42,7 @@ module rdma_req_parser #( input logic aresetn, metaIntf.s s_req, - metaIntf.m m_req, - - output logic [31:0] used + metaIntf.m m_req ); // FSM @@ -110,17 +108,7 @@ ila_req_parser inst_ila_parser ( ); // Decoupling -axis_data_fifo_cnfg_rdma_512 inst_cmd_queue_in ( - .s_axis_aresetn(aresetn), - .s_axis_aclk(aclk), - .s_axis_tvalid(s_req.valid), - .s_axis_tready(s_req.ready), - .s_axis_tdata(s_req.data), - .m_axis_tvalid(req_pre_parsed.valid), - .m_axis_tready(req_pre_parsed.ready), - .m_axis_tdata(req_pre_parsed.data), - .axis_wr_data_count(used) -); +`META_ASSIGN(s_req, req_pre_parsed) logic [31:0] queue_used_out; @@ -275,6 +263,7 @@ always_comb begin: DP req_parsed.data.last = plast_C; req_parsed.data.cmplt = cmplt_C; req_parsed.data.ssn = ssn_C; + req_parsed.data.offs = 0; req_parsed.data.msg[RDMA_LVADDR_OFFS+:RDMA_VADDR_BITS] = plvaddr_C; req_parsed.data.msg[RDMA_RVADDR_OFFS+:RDMA_VADDR_BITS] = prvaddr_C; req_parsed.data.msg[RDMA_LEN_OFFS+:RDMA_LEN_BITS] = plen_C; diff --git a/hw/hdl/network/rdma/roce_stack.sv b/hw/hdl/network/rdma/roce_stack.sv index 5c09542..75fc899 100644 --- a/hw/hdl/network/rdma/roce_stack.sv +++ b/hw/hdl/network/rdma/roce_stack.sv @@ -89,12 +89,11 @@ always_comb begin rdma_sq_data[32+:RDMA_QPN_BITS] = rdma_sq.data.qpn; rdma_sq_data[32+RDMA_QPN_BITS+0+:1] = rdma_sq.data.host; - rdma_sq_data[32+RDMA_QPN_BITS+2+:1] = rdma_sq.data.last; + rdma_sq_data[32+RDMA_QPN_BITS+1+:1] = rdma_sq.data.last; - rdma_sq_data[32+RDMA_QPN_BITS+4+:RDMA_MSN_BITS] = rdma_sq.data.ssn; - rdma_sq_data[32+RDMA_QPN_BITS+4+RDMA_MSN_BITS+:RDMA_OFFS_BITS] = rdma_sq.data.offs; + rdma_sq_data[32+RDMA_QPN_BITS+2+:RDMA_OFFS_BITS] = rdma_sq.data.offs; - rdma_sq_data[32+RDMA_QPN_BITS+4+RDMA_MSN_BITS+RDMA_OFFS_BITS+:RDMA_MSG_BITS] = rdma_sq.data.msg; + rdma_sq_data[32+RDMA_QPN_BITS+2+RDMA_OFFS_BITS+:RDMA_MSG_BITS] = rdma_sq.data.msg; `else rdma_sq_data = 0; diff --git a/hw/hdl/network/stack/network_ccross_early.sv b/hw/hdl/network/stack/network_ccross_early.sv index cbf397d..d9c9a39 100644 --- a/hw/hdl/network/stack/network_ccross_early.sv +++ b/hw/hdl/network/stack/network_ccross_early.sv @@ -82,7 +82,7 @@ if(ENABLED == 1) begin // Crossings // - axis_data_fifo_net_ccross_512 inst_cross_ns_nr ( + axis_data_fifo_net_ccross_early_512 inst_cross_ns_nr ( .m_axis_aclk(rclk), .s_axis_aclk(nclk), .s_axis_aresetn(nresetn_reg), @@ -98,7 +98,7 @@ if(ENABLED == 1) begin .m_axis_tlast(m_axis_rclk_int.tlast) ); - axis_data_fifo_net_ccross_512 inst_cross_nr_ns ( + axis_data_fifo_net_ccross_early_512 inst_cross_nr_ns ( .m_axis_aclk(nclk), .s_axis_aclk(rclk), .s_axis_aresetn(rresetn_reg), diff --git a/hw/hdl/network/stack/network_ccross_late.sv b/hw/hdl/network/stack/network_ccross_late.sv index f015f9f..0739305 100644 --- a/hw/hdl/network/stack/network_ccross_late.sv +++ b/hw/hdl/network/stack/network_ccross_late.sv @@ -139,7 +139,7 @@ if(ENABLED == 1) begin `ifdef EN_STATS // Stats - axis_clock_converter_net_608 inst_ccross_qp_interface ( + axis_clock_converter_net_512 inst_ccross_qp_interface ( .s_axis_aresetn(nresetn), .m_axis_aresetn(aresetn), .s_axis_aclk(nclk), @@ -255,7 +255,7 @@ else begin `ifdef EN_STATS // Stats - axis_register_slice_net_608 inst_reg_net_stats ( + axis_register_slice_net_512 inst_reg_net_stats ( .aclk(aclk), .aresetn(aresetn), .s_axis_tvalid(1'b1), diff --git a/hw/hdl/network/stack/network_slice.sv b/hw/hdl/network/stack/network_slice.sv index 933b852..780f737 100644 --- a/hw/hdl/network/stack/network_slice.sv +++ b/hw/hdl/network/stack/network_slice.sv @@ -120,7 +120,7 @@ module network_slice ( `ifdef EN_STATS // Stats - axis_register_slice_net_608 inst_reg_net_stats ( + axis_register_slice_net_512 inst_reg_net_stats ( .aclk(aclk), .aresetn(aresetn), .s_axis_tvalid(1'b1), diff --git a/hw/hdl/network/stack/network_slice_array.sv b/hw/hdl/network/stack/network_slice_array.sv index 578d3b6..a9f6c25 100644 --- a/hw/hdl/network/stack/network_slice_array.sv +++ b/hw/hdl/network/stack/network_slice_array.sv @@ -161,7 +161,7 @@ for(genvar i = 0; i < N_STAGES; i++) begin `ifdef EN_STATS // ARP reply - axis_register_slice_net_608 ( + axis_register_slice_net_512 ( .aclk(aclk), .aresetn(aresetn), .s_axis_tvalid(1'b1), diff --git a/hw/hdl/network/stack/network_stack.sv b/hw/hdl/network/stack/network_stack.sv index 9ef57bf..2cca913 100644 --- a/hw/hdl/network/stack/network_stack.sv +++ b/hw/hdl/network/stack/network_stack.sv @@ -860,13 +860,12 @@ end logic[31:0] roce_tx_pkg_counter; logic[31:0] roce_retrans_counter; - logic[31:0] axis_stream_down_counter; + logic[15:0] axis_stream_down_counter; + logic axis_stream_down; net_stat_t[NET_STATS_DELAY-1:0] net_stats_tmp; // Slice - assign net_stats_tmp[0].rx_word_counter = rx_word_counter; assign net_stats_tmp[0].rx_pkg_counter = rx_pkg_counter; - assign net_stats_tmp[0].tx_word_counter = tx_word_counter; assign net_stats_tmp[0].tx_pkg_counter = tx_pkg_counter; assign net_stats_tmp[0].arp_rx_pkg_counter = arp_rx_pkg_counter; assign net_stats_tmp[0].arp_tx_pkg_counter = arp_tx_pkg_counter; @@ -874,15 +873,14 @@ end assign net_stats_tmp[0].icmp_tx_pkg_counter = icmp_tx_pkg_counter; assign net_stats_tmp[0].tcp_rx_pkg_counter = tcp_rx_pkg_counter; assign net_stats_tmp[0].tcp_tx_pkg_counter = tcp_tx_pkg_counter; - assign net_stats_tmp[0].tcp_session_counter = session_count_data; assign net_stats_tmp[0].roce_rx_pkg_counter = roce_rx_pkg_counter; assign net_stats_tmp[0].roce_tx_pkg_counter = roce_tx_pkg_counter; assign net_stats_tmp[0].ibv_rx_pkg_counter = regIbvRxPkgCount; assign net_stats_tmp[0].ibv_tx_pkg_counter = regIbvTxPkgCount; - assign net_stats_tmp[0].roce_crc_drop_counter = regCrcDropPkgCount; assign net_stats_tmp[0].roce_psn_drop_counter = regInvalidPsnDropCount; assign net_stats_tmp[0].roce_retrans_counter = regRetransCount; - assign net_stats_tmp[0].axis_stream_down_counter = axis_stream_down_counter; + assign net_stats_tmp[0].tcp_session_counter = session_count_data; + assign net_stats_tmp[0].axis_stream_down = axis_stream_down; assign m_net_stats = net_stats_tmp[NET_STATS_DELAY-1]; @@ -904,6 +902,7 @@ end roce_tx_pkg_counter <= '0; axis_stream_down_counter <= '0; + axis_stream_down <= 1'b0; end // Reg the stats @@ -978,8 +977,9 @@ end axis_stream_down_counter <= '0; end if (s_axis_net.tvalid && ~s_axis_net.tready) begin - axis_stream_down_counter <= axis_stream_down_counter + 1; + axis_stream_down_counter <= (axis_stream_down_counter == NET_STRM_DOWN_THRS) ? axis_stream_down_counter : axis_stream_down_counter + 1; end + axis_stream_down <= (axis_stream_down_counter == NET_STRM_DOWN_THRS); end diff --git a/hw/hdl/slave/static_slave.sv b/hw/hdl/slave/static_slave.sv index a77ef63..1b07f11 100644 --- a/hw/hdl/slave/static_slave.sv +++ b/hw/hdl/slave/static_slave.sv @@ -145,7 +145,7 @@ module static_slave ( // ------------------------------------------------------------------ // Constants -localparam integer N_REGS = 128; +localparam integer N_REGS = 160; localparam integer ADDR_LSB = $clog2(AXIL_DATA_BITS/8); localparam integer ADDR_MSB = $clog2(N_REGS); localparam integer AXIL_ADDR_BITS = ADDR_LSB + ADDR_MSB; @@ -316,28 +316,27 @@ localparam integer XDMA_STAT_2_AXIS = 72; localparam integer XDMA_STAT_3_BPSS = 73; localparam integer XDMA_STAT_3_CMPL = 74; localparam integer XDMA_STAT_3_AXIS = 75; -// NET STATS -localparam integer NET_STAT_0_RX_REG = 96; -localparam integer NET_STAT_0_TX_REG = 97; -localparam integer NET_STAT_0_ARP_REG = 98; -localparam integer NET_STAT_0_ICMP_REG = 99; -localparam integer NET_STAT_0_TCP_REG = 100; -localparam integer NET_STAT_0_RDMA_REG = 101; -localparam integer NET_STAT_0_IBV_REG = 102; -localparam integer NET_STAT_0_DROP_REG = 103; -localparam integer NET_STAT_0_SESS_REG = 104; -localparam integer NET_STAT_0_DOWN_REG = 105; -localparam integer NET_STAT_1_RX_REG = 112; -localparam integer NET_STAT_1_TX_REG = 113; -localparam integer NET_STAT_1_ARP_REG = 114; -localparam integer NET_STAT_1_ICMP_REG = 115; -localparam integer NET_STAT_1_TCP_REG = 116; -localparam integer NET_STAT_1_RDMA_REG = 117; -localparam integer NET_STAT_1_IBV_REG = 118; -localparam integer NET_STAT_1_DROP_REG = 119; -localparam integer NET_STAT_1_SESS_REG = 120; -localparam integer NET_STAT_1_DOWN_REG = 121; +// NET STATS +localparam integer NET_STAT_0_PKG_REG = 96; +localparam integer NET_STAT_0_ARP_REG = 97; +localparam integer NET_STAT_0_ICMP_REG = 98; +localparam integer NET_STAT_0_TCP_REG = 99; +localparam integer NET_STAT_0_RDMA_REG = 100; +localparam integer NET_STAT_0_IBV_REG = 101; +localparam integer NET_STAT_0_DROP_REG = 102; +localparam integer NET_STAT_0_SESS_REG = 103; +localparam integer NET_STAT_0_DOWN_REG = 104; + +localparam integer NET_STAT_1_PKG_REG = 128; +localparam integer NET_STAT_1_ARP_REG = 129; +localparam integer NET_STAT_1_ICMP_REG = 130; +localparam integer NET_STAT_1_TCP_REG = 131; +localparam integer NET_STAT_1_RDMA_REG = 132; +localparam integer NET_STAT_1_IBV_REG = 133; +localparam integer NET_STAT_1_DROP_REG = 134; +localparam integer NET_STAT_1_SESS_REG = 135; +localparam integer NET_STAT_1_DOWN_REG = 136; // ---------------------------------------------------------------------------------------- // Write process @@ -883,10 +882,8 @@ always_ff @(posedge aclk) begin `endif `ifdef EN_NET_0 - NET_STAT_0_RX_REG: // rx - axi_rdata <= {s_net_stats_0.rx_pkg_counter, s_net_stats_0.rx_word_counter}; - NET_STAT_0_TX_REG: // tx - axi_rdata <= {s_net_stats_0.tx_pkg_counter, s_net_stats_0.tx_word_counter}; + NET_STAT_0_PKG_REG: // rx and tx + axi_rdata <= {s_net_stats_0.tx_pkg_counter, s_net_stats_0.rx_pkg_counter}; NET_STAT_0_ARP_REG: // arp axi_rdata <= {s_net_stats_0.arp_tx_pkg_counter, s_net_stats_0.arp_rx_pkg_counter}; NET_STAT_0_ICMP_REG: // icmp @@ -898,18 +895,16 @@ always_ff @(posedge aclk) begin NET_STAT_0_IBV_REG: // ibv axi_rdata <= {s_net_stats_0.ibv_tx_pkg_counter, s_net_stats_0.ibv_rx_pkg_counter}; NET_STAT_0_DROP_REG: // rdma drop - axi_rdata <= {s_net_stats_0.roce_psn_drop_counter, s_net_stats_0.roce_crc_drop_counter}; + axi_rdata <= {s_net_stats_0.roce_retrans_counter, s_net_stats_0.roce_psn_drop_counter}; NET_STAT_0_SESS_REG: // tcp sessions axi_rdata[31:0] <= s_net_stats_0.tcp_session_counter; NET_STAT_0_DOWN_REG: // rdma - axi_rdata <= {s_net_stats_0.roce_retrans_counter, s_net_stats_0.axis_stream_down_counter}; + axi_rdata[0] <= s_net_stats_0.axis_stream_down; `endif `ifdef EN_NET_1 - NET_STAT_1_RX_REG: // rx - axi_rdata <= {s_net_stats_1.rx_pkg_counter, s_net_stats_1.rx_word_counter}; - NET_STAT_1_TX_REG: // tx - axi_rdata <= {s_net_stats_1.tx_pkg_counter, s_net_stats_1.tx_word_counter}; + NET_STAT_1_PKG_REG: // rx and tx + axi_rdata <= {s_net_stats_1.tx_pkg_counter, s_net_stats_1.rx_pkg_counter}; NET_STAT_1_ARP_REG: // arp axi_rdata <= {s_net_stats_1.arp_tx_pkg_counter, s_net_stats_1.arp_rx_pkg_counter}; NET_STAT_1_ICMP_REG: // icmp @@ -921,11 +916,11 @@ always_ff @(posedge aclk) begin NET_STAT_1_IBV_REG: // ibv axi_rdata <= {s_net_stats_1.ibv_tx_pkg_counter, s_net_stats_1.ibv_rx_pkg_counter}; NET_STAT_1_DROP_REG: // rdma drop - axi_rdata <= {s_net_stats_1.roce_psn_drop_counter, s_net_stats_1.roce_crc_drop_counter}; + axi_rdata <= {s_net_stats_1.roce_retrans_counter, s_net_stats_1.roce_psn_drop_counter}; NET_STAT_1_SESS_REG: // tcp sessions axi_rdata[31:0] <= s_net_stats_1.tcp_session_counter; NET_STAT_1_DOWN_REG: // rdma - axi_rdata <= {s_net_stats_1.roce_retrans_counter, s_net_stats_1.axis_stream_down_counter}; + axi_rdata[0] <= s_net_stats_1.axis_stream_down; `endif `endif diff --git a/hw/scripts/ip_inst/network_stack.tcl b/hw/scripts/ip_inst/network_stack.tcl index dbcf814..4d05f24 100644 --- a/hw/scripts/ip_inst/network_stack.tcl +++ b/hw/scripts/ip_inst/network_stack.tcl @@ -98,6 +98,10 @@ if {$cfg(en_rdma) eq 1} { create_ip -name rocev2 -vendor ethz.systems.fpga -library hls -version 0.82 -module_name rocev2_ip } +# Cmd +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_req_512_used +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.FIFO_DEPTH {32} CONFIG.HAS_WR_DATA_COUNT {1} ] [get_ips axis_data_fifo_req_512_used] + ## Crossings create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_rdma_16 set_property -dict [list CONFIG.TDATA_NUM_BYTES {2} CONFIG.SYNCHRONIZATION_STAGES {4} ] [get_ips axis_clock_converter_rdma_16] @@ -433,8 +437,8 @@ set_property -dict [list CONFIG.AXI_ADDR_WIDTH {64} CONFIG.INTERCONNECT_DATA_WID ## Network top ## -create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_net_ccross_512 -set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.IS_ACLK_ASYNC {1} CONFIG.FIFO_DEPTH {512} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} ] [get_ips axis_data_fifo_net_ccross_512] +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_net_ccross_early_512 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.IS_ACLK_ASYNC {1} CONFIG.FIFO_DEPTH {512} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} ] [get_ips axis_data_fifo_net_ccross_early_512] ## Crossings create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_net_8 @@ -449,8 +453,8 @@ set_property -dict [list CONFIG.TDATA_NUM_BYTES {6} CONFIG.SYNCHRONIZATION_STAGE create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_net_56 set_property -dict [list CONFIG.TDATA_NUM_BYTES {7} CONFIG.SYNCHRONIZATION_STAGES {4} ] [get_ips axis_clock_converter_net_56] -create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_net_608 -set_property -dict [list CONFIG.TDATA_NUM_BYTES {76} CONFIG.SYNCHRONIZATION_STAGES {4} ] [get_ips axis_clock_converter_net_608] +create_ip -name axis_clock_converter -vendor xilinx.com -library ip -version 1.1 -module_name axis_clock_converter_net_512 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.SYNCHRONIZATION_STAGES {4} ] [get_ips axis_clock_converter_net_512] ## Crossings FIFO create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_net_ccross_8 @@ -465,8 +469,8 @@ set_property -dict [list CONFIG.TDATA_NUM_BYTES {6} CONFIG.IS_ACLK_ASYNC {1} CON create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_net_ccross_56 set_property -dict [list CONFIG.TDATA_NUM_BYTES {7} CONFIG.IS_ACLK_ASYNC {1} CONFIG.FIFO_DEPTH {32} ] [get_ips axis_data_fifo_net_ccross_56] -create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_net_ccross_608 -set_property -dict [list CONFIG.TDATA_NUM_BYTES {76} CONFIG.IS_ACLK_ASYNC {1} CONFIG.FIFO_DEPTH {32} ] [get_ips axis_data_fifo_net_ccross_608] +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_net_ccross_512 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.IS_ACLK_ASYNC {1} CONFIG.FIFO_DEPTH {32} ] [get_ips axis_data_fifo_net_ccross_512] ## Slicing create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_net_8 @@ -481,8 +485,8 @@ set_property -dict [list CONFIG.TDATA_NUM_BYTES {6} CONFIG.REG_CONFIG {8} ] [get create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_net_56 set_property -dict [list CONFIG.TDATA_NUM_BYTES {7} CONFIG.REG_CONFIG {8} ] [get_ips axis_register_slice_net_56] -create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_net_608 -set_property -dict [list CONFIG.TDATA_NUM_BYTES {76} CONFIG.REG_CONFIG {8} ] [get_ips axis_register_slice_net_608] +create_ip -name axis_register_slice -vendor xilinx.com -library ip -version 1.1 -module_name axis_register_slice_net_512 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.REG_CONFIG {8} ] [get_ips axis_register_slice_net_512] ## Buffering create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_net_8 @@ -497,8 +501,8 @@ set_property -dict [list CONFIG.TDATA_NUM_BYTES {6} CONFIG.FIFO_DEPTH {32} ] [ge create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_net_56 set_property -dict [list CONFIG.TDATA_NUM_BYTES {7} CONFIG.FIFO_DEPTH {32} ] [get_ips axis_data_fifo_net_56] -create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_net_608 -set_property -dict [list CONFIG.TDATA_NUM_BYTES {76} CONFIG.FIFO_DEPTH {32} ] [get_ips axis_data_fifo_net_608] +create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_data_fifo_net_512 +set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.FIFO_DEPTH {32} ] [get_ips axis_data_fifo_net_512] ## ## Network stack diff --git a/hw/scripts/wr_hdl/template_gen/lynx_pkg.txt b/hw/scripts/wr_hdl/template_gen/lynx_pkg.txt index d90f2b1..5c3517b 100644 --- a/hw/scripts/wr_hdl/template_gen/lynx_pkg.txt +++ b/hw/scripts/wr_hdl/template_gen/lynx_pkg.txt @@ -52,12 +52,6 @@ {% if cnfg.en_net %} `define EN_NET {% endif %} -{% if cnfg.mult_regions %} -`define MULT_REGIONS -{% endif %} -{% if cnfg.mult_ddr_chan %} -`define MULT_DDR_CHAN -{% endif %} {% if cnfg.en_aclk %} `define EN_ACLK {% endif %} @@ -97,9 +91,15 @@ {% if cnfg.vit_hls %} `define VITIS_HLS {% endif %} +{% if cnfg.mult_regions %} +`define MULT_REGIONS +{% endif %} {% if cnfg.mult_strm_axi %} `define MULT_STRM_AXI {% endif %} +{% if cnfg.mult_ddr_chan %} +`define MULT_DDR_CHAN +{% endif %} package lynxTypes; @@ -188,6 +188,7 @@ package lynxTypes; parameter integer MAC_ADDR_BITS = 48; parameter integer DEF_MAC_ADDRESS = 48'hE59D02350A00; // LSB first, 00:0A:35:02:9D:E5 parameter integer DEF_IP_ADDRESS = 32'hD1D4010B; // LSB first, 0B:01:D4:D1 + parameter integer NET_STRM_DOWN_THRS = 256; // Network RDMA parameter integer APP_READ = 0; @@ -195,6 +196,9 @@ package lynxTypes; parameter integer APP_SEND = 2; parameter integer APP_IMMED = 3; + parameter integer RC_SEND_FIRST = 5'h0; + parameter integer RC_SEND_MIDDLE = 5'h1; + parameter integer RC_SEND_LAST = 5'h2; parameter integer RC_SEND_ONLY = 5'h4; parameter integer RC_RDMA_WRITE_FIRST = 5'h6; parameter integer RC_RDMA_WRITE_MIDDLE = 5'h7; @@ -229,6 +233,7 @@ package lynxTypes; parameter integer RDMA_LEN_OFFS = 2*RDMA_VADDR_BITS; parameter integer RDMA_PARAMS_OFFS = 2*RDMA_VADDR_BITS + RDMA_LEN_BITS; parameter integer RDMA_MSN_BITS = 24; + parameter integer RDMA_OFFS_BITS = 4; parameter integer RDMA_SNDRM_BITS = 8; parameter integer RDMA_MAX_OUTSTANDING = 32; parameter integer RDMA_MODE_PARSE = 0; @@ -374,15 +379,19 @@ package lynxTypes; logic host; logic mode; logic last; + logic cmplt; + logic [RDMA_MSN_BITS-1:0] ssn; + logic [RDMA_OFFS_BITS-1:0] offs; logic [RDMA_MSG_BITS-1:0] msg; logic [RDMA_REQ_BITS-RDMA_MSG_BITS-3-RDMA_QPN_BITS-RDMA_OPCODE_BITS-1:0] rsrvd; } rdma_req_t; typedef struct packed { logic rd; + logic cmplt; logic [PID_BITS-1:0] pid; logic [DEST_BITS-1:0] vfid; - logic [RDMA_ACK_MSN_BITS-1:0] psn; + logic [RDMA_ACK_MSN_BITS-1:0] ssn; } rdma_ack_t; typedef struct packed { @@ -450,9 +459,7 @@ package lynxTypes; } xdma_stat_t; typedef struct packed { - logic [31:0] rx_word_counter; logic [31:0] rx_pkg_counter; - logic [31:0] tx_word_counter; logic [31:0] tx_pkg_counter; logic [31:0] arp_rx_pkg_counter; logic [31:0] arp_tx_pkg_counter; @@ -464,10 +471,9 @@ package lynxTypes; logic [31:0] roce_tx_pkg_counter; logic [31:0] ibv_rx_pkg_counter; logic [31:0] ibv_tx_pkg_counter; - logic [31:0] roce_crc_drop_counter; logic [31:0] roce_psn_drop_counter; + logic [31:0] roce_retrans_counter; logic [15:0] tcp_session_counter; - logic [7:0] axis_stream_down_counter; logic axis_stream_down; } net_stat_t;