completion test.

This commit is contained in:
kodario 2023-06-15 11:44:39 +02:00
parent 789214db94
commit 357b2bc35a
16 changed files with 175 additions and 191 deletions

View File

@ -49,7 +49,7 @@ int fpga_open(struct inode *inode, struct file *file)
struct fpga_dev *d = container_of(inode->i_cdev, struct fpga_dev, cdev); struct fpga_dev *d = container_of(inode->i_cdev, struct fpga_dev, cdev);
BUG_ON(!d); BUG_ON(!d);
dbg_info("fpga device %d acquired, inode %ld\n", minor, inode->i_ino); dbg_info("fpga device %d acquired, calling pid %d\n", minor, current->pid);
// set private data // set private data
file->private_data = (void *)d; file->private_data = (void *)d;
@ -89,7 +89,7 @@ int fpga_release(struct inode *inode, struct file *file)
} }
} }
dbg_info("fpga device %d released, inode %ld\n", minor, inode->i_ino); dbg_info("fpga device %d released, pid %d\n", minor, current->pid);
return 0; return 0;
} }
@ -225,32 +225,34 @@ long fpga_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
// register pid // register pid
case IOCTL_REGISTER_PID: case IOCTL_REGISTER_PID:
spin_lock(&pd->stat_lock); ret_val = copy_from_user(&tmp, (unsigned long *)arg, sizeof(unsigned long));
if (ret_val != 0) {
pr_info("user data could not be coppied, return %d\n", ret_val);
} else {
spin_lock(&pd->stat_lock);
pid = current->pid; pid = (pid_t) tmp[0];
cpid = (uint64_t)register_pid(d, pid); cpid = (int32_t)register_pid(d, pid);
if (cpid == -1) if (cpid == -1)
{ {
dbg_info("registration failed pid %d\n", pid); dbg_info("registration failed pid %d\n", pid);
return -1; return -1;
}
tmp_cid = kzalloc(sizeof(struct cid_entry), GFP_KERNEL);
BUG_ON(!tmp_cid);
tmp_cid->pid = pid;
tmp_cid->cpid = cpid;
hash_add(pid_cpid_map[d->id], &tmp_cid->entry, pid);
// return cpid
ret_val = copy_to_user((unsigned long *)arg + 1, &cpid, sizeof(unsigned long));
spin_unlock(&pd->stat_lock);
} }
dbg_info("registration succeeded pid %d, cpid %lld\n", pid, cpid);
tmp_cid = kzalloc(sizeof(struct cid_entry), GFP_KERNEL);
BUG_ON(!tmp_cid);
tmp_cid->pid = pid;
tmp_cid->cpid = cpid;
hash_add(pid_cpid_map[d->id], &tmp_cid->entry, pid);
// return cpid
ret_val = copy_to_user((unsigned long *)arg + 1, &cpid, sizeof(unsigned long));
spin_unlock(&pd->stat_lock);
break; break;
// unregister pid // unregister pid
@ -259,35 +261,30 @@ long fpga_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
ret_val = copy_from_user(&tmp, (unsigned long *)arg, sizeof(unsigned long)); ret_val = copy_from_user(&tmp, (unsigned long *)arg, sizeof(unsigned long));
if (ret_val != 0) { if (ret_val != 0) {
pr_info("user data could not be coppied, return %d\n", ret_val); pr_info("user data could not be coppied, return %d\n", ret_val);
} } else {
else {
spin_lock(&pd->stat_lock); spin_lock(&pd->stat_lock);
cpid = tmp[0]; cpid = tmp[0];
pid = d->pid_array[cpid]; pid = d->pid_array[cpid];
ret_val = unregister_pid(d, cpid); // tmp[0] - cpid
if (ret_val == -1) {
dbg_info("unregistration failed cpid %lld\n", cpid);
return -1;
}
// map // map
hash_for_each_possible(pid_cpid_map[d->id], tmp_cid, entry, pid) { hash_for_each_possible(pid_cpid_map[d->id], tmp_cid, entry, pid) {
if(tmp_cid->pid == pid && tmp_cid->cpid == cpid) { if(tmp_cid->pid == pid && tmp_cid->cpid == cpid) {
// unamp all leftover user pages // unamp all leftover user pages
tlb_put_user_pages_cpid(d, cpid, 1); tlb_put_user_pages_cpid(d, cpid, 1);
// unregister (if registered)
unregister_pid(d, cpid);
// Free from hash // Free from hash
hash_del(&tmp_cid->entry); hash_del(&tmp_cid->entry);
} }
} }
ret_val = unregister_pid(d, cpid); // tmp[0] - cpid
if (ret_val == -1) {
dbg_info("unregistration failed cpid %lld\n", cpid);
return -1;
}
spin_unlock(&pd->stat_lock); spin_unlock(&pd->stat_lock);
dbg_info("unregistration succeeded cpid %lld\n", tmp[0]);
} }
break; break;
@ -807,6 +804,8 @@ int32_t register_pid(struct fpga_dev *d, pid_t pid)
// unlock // unlock
spin_unlock(&d->card_pid_lock); spin_unlock(&d->card_pid_lock);
dbg_info("registration succeeded pid %d, cpid %d\n", pid, cpid);
return cpid; return cpid;
} }
@ -832,6 +831,8 @@ int unregister_pid(struct fpga_dev *d, int32_t cpid)
// release lock // release lock
spin_unlock(&d->card_pid_lock); spin_unlock(&d->card_pid_lock);
dbg_info("unregistration succeeded cpid %d\n", cpid);
return 0; return 0;
} }

View File

@ -538,6 +538,7 @@ end
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// DEBUG // DEBUG
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
//`define DBG_TLB_FSM_RD
`ifdef DBG_TLB_FSM_RD `ifdef DBG_TLB_FSM_RD
ila_fsm inst_ila_rd ( ila_fsm inst_ila_rd (
.clk(aclk), .clk(aclk),

View File

@ -334,31 +334,7 @@ axis_interconnect_tlb inst_mux_stlb (
.S00_ARB_REQ_SUPPRESS(1'b0), .S00_ARB_REQ_SUPPRESS(1'b0),
.S01_ARB_REQ_SUPPRESS(1'b0) .S01_ARB_REQ_SUPPRESS(1'b0)
); );
/*
ila_tlbf_2 inst_ila_tlbf2 (
.clk(aclk),
.probe0(axis_sTlb_0.tvalid),
.probe1(axis_sTlb_0.tready),
.probe2(axis_sTlb_0.tdata), // 128
.probe3(axis_sTlb_0.tlast),
.probe4(axis_sTlb_1.tvalid),
.probe5(axis_sTlb_1.tready),
.probe6(axis_sTlb_1.tdata), // 128
.probe7(axis_sTlb_1.tlast),
.probe8(axis_sTlb.tvalid),
.probe9(axis_sTlb.tready),
.probe10(axis_sTlb.tdata), // 128
.probe11(axis_sTlb.tlast),
.probe12(s_axis_sTlb.tvalid),
.probe13(s_axis_sTlb.tready),
.probe14(s_axis_sTlb.tdata), // 512
.probe15(s_axis_sTlb.tlast),
.probe16(s_axis_sTlb.tkeep) // 64
);
*/
axis_interconnect_tlb inst_mux_ltlb ( axis_interconnect_tlb inst_mux_ltlb (
.ACLK(aclk), .ACLK(aclk),
.ARESETN(aresetn), .ARESETN(aresetn),

View File

@ -40,29 +40,6 @@ logic [PID_BITS-1:0] req_pid;
metaIntf #(.STYPE(rdma_ack_t)) ack_que_in (); metaIntf #(.STYPE(rdma_ack_t)) ack_que_in ();
ila_req inst_ila_req (
.clk(aclk),
.probe0(s_req.valid),
.probe1(s_req.ready),
.probe2(s_req.data), // 512
.probe3(m_req.valid),
.probe4(m_req.ready),
.probe5(head_C[0][0]), // 4
.probe6(tail_C[0][0]), // 4
.probe7(issued_C[0][0]),
.probe8(ssn_rd_C),
.probe9(ack_vfid_C[0]),
.probe10(ack_pid_C[0]), // 6
.probe11(ack_rd_C),
.probe12(s_ack.valid),
.probe13(s_ack.ready),
.probe14(s_ack.data), // 40
.probe15(ssn_wr), // 4
.probe16(ssn_addr), // 12
.probe17(ssn_in), // 32
.probe18(ssn_out) // 32
);
ram_sp_nc #( ram_sp_nc #(
.ADDR_BITS(1+N_REGIONS_BITS+PID_BITS+RDMA_OST_BITS), .ADDR_BITS(1+N_REGIONS_BITS+PID_BITS+RDMA_OST_BITS),
.DATA_BITS(32) .DATA_BITS(32)
@ -111,15 +88,11 @@ always_comb begin
ssn_wr = 0; ssn_wr = 0;
ssn_addr = 0; ssn_addr = 0;
ssn_in = {s_req.data.cmplt, s_req.data.last, s_req.data.ssn}; ssn_in = {6'd0, s_req.data.cmplt, s_req.data.last, s_req.data.ssn};
s_ack.ready = 1'b0; s_ack.ready = 1'b0;
s_req.ready = 1'b0; s_req.ready = 1'b0;
m_req.valid = s_req.valid & s_req.ready;
m_req.data = s_req.data;
m_req.data.offs = head_C[req_rd][req_vfid][req_pid];
if(s_ack.valid) begin if(s_ack.valid) begin
// Service ack // Service ack
s_ack.ready = 1'b1; s_ack.ready = 1'b1;
@ -130,7 +103,7 @@ always_comb begin
end end
ssn_rd_N = 1'b1; ssn_rd_N = 1'b1;
ssn_addr = {ack_rd, ack_vfid, ack_pid, tail_C[ack_rd][ack_vfid][ack_pid]}; ssn_addr = {ack_rd, ack_vfid[N_REGIONS_BITS-1:0], ack_pid, tail_C[ack_rd][ack_vfid][ack_pid]};
ack_vfid_N = ack_vfid; ack_vfid_N = ack_vfid;
ack_pid_N = ack_pid; ack_pid_N = ack_pid;
@ -145,11 +118,18 @@ always_comb begin
issued_N[req_rd][req_vfid][req_pid] = 1'b1; issued_N[req_rd][req_vfid][req_pid] = 1'b1;
ssn_wr = ~0; ssn_wr = ~0;
ssn_addr = {req_rd, req_vfid, req_pid, head_C[req_rd][req_vfid][req_pid]}; ssn_addr = {req_rd, req_vfid[N_REGIONS_BITS-1:0], req_pid, head_C[req_rd][req_vfid][req_pid]};
end end
end end
end end
always_comb begin
m_req.valid = s_req.valid & s_req.ready;
m_req.data = s_req.data;
m_req.data.offs = head_C[req_rd][req_vfid][req_pid];
end
// DP // DP
assign ack_que_in.valid = ssn_rd_C && ssn_out[RDMA_MSN_BITS]; assign ack_que_in.valid = ssn_rd_C && ssn_out[RDMA_MSN_BITS];
assign ack_que_in.data.rd = ack_rd_C; assign ack_que_in.data.rd = ack_rd_C;
@ -177,4 +157,29 @@ assign req_rd = s_req.data.opcode == RC_RDMA_READ_REQUEST;
assign req_pid = s_req.data.qpn[0+:PID_BITS]; assign req_pid = s_req.data.qpn[0+:PID_BITS];
assign req_vfid = s_req.data.qpn[PID_BITS+:N_REGIONS_BITS]; assign req_vfid = s_req.data.qpn[PID_BITS+:N_REGIONS_BITS];
/*
ila_req inst_ila_req (
.clk(aclk),
.probe0(s_req.valid),
.probe1(s_req.ready),
.probe2(s_req.data), // 512
.probe3(m_req.valid),
.probe4(m_req.ready),
.probe5(head_C[0][0][0]), // 4
.probe6(tail_C[0][0][0]), // 4
.probe7(issued_C[0][0][0]),
.probe8(ssn_rd_C),
.probe9(ack_vfid_C[0]),
.probe10(ack_pid_C[0]), // 6
.probe11(ack_rd_C),
.probe12(s_ack.valid),
.probe13(s_ack.ready),
.probe14(s_ack.data), // 40
.probe15(ssn_wr), // 4
.probe16(ssn_addr), // 12
.probe17(ssn_in), // 32
.probe18(ssn_out) // 32
);
*/
endmodule endmodule

View File

@ -82,31 +82,6 @@ logic [RDMA_LEN_BITS-1:0] plen_C, plen_N;
metaIntf #(.STYPE(rdma_req_t)) req_pre_parsed (); metaIntf #(.STYPE(rdma_req_t)) req_pre_parsed ();
metaIntf #(.STYPE(rdma_req_t)) req_parsed (); metaIntf #(.STYPE(rdma_req_t)) req_parsed ();
ila_req_parser inst_ila_parser (
.clk(aclk),
.probe0(s_req.valid),
.probe1(s_req.ready),
.probe2(m_req.valid),
.probe3(m_req.ready),
.probe4(state_C), // 4
.probe5(qp_C), // 10
.probe6(host_C),
.probe7(mode_C),
.probe8(cmplt_C),
.probe9(ssn_C), // 24
.probe10(params_C), // 256
.probe11(op_C), // 5
.probe12(last_C),
.probe13(lvaddr_C[47:0]), // 48
.probe14(rvaddr_C[47:0]), // 48
.probe15(len_C), // 28
.probe16(pop_C), // 5
.probe17(plast_C),
.probe18(plvaddr_C[47:0]), // 48
.probe19(prvaddr_C[47:0]), // 48
.probe20(plen_C) // 32
);
// Decoupling // Decoupling
`META_ASSIGN(s_req, req_pre_parsed) `META_ASSIGN(s_req, req_pre_parsed)

View File

@ -144,7 +144,7 @@ assign rdma_ack.data.rd = ack_meta_data[0];
assign rdma_ack.data.cmplt = 1'b0; assign rdma_ack.data.cmplt = 1'b0;
assign rdma_ack.data.pid = ack_meta_data[1+:PID_BITS]; assign rdma_ack.data.pid = ack_meta_data[1+:PID_BITS];
assign rdma_ack.data.vfid = ack_meta_data[1+PID_BITS+:N_REGIONS_BITS]; assign rdma_ack.data.vfid = ack_meta_data[1+PID_BITS+:N_REGIONS_BITS];
assign rdma_ack.data.ssn = ack_meta_data[1+RDMA_ACK_QPN_BITS+:RDMA_ACK_PSN_BITS]; assign rdma_ack.data.ssn = ack_meta_data[1+RDMA_ACK_QPN_BITS+:RDMA_ACK_PSN_BITS]; // msn
// Flow control // Flow control
rdma_flow inst_rdma_flow ( rdma_flow inst_rdma_flow (
@ -318,4 +318,32 @@ rocev2_ip rocev2_inst(
`endif `endif
); );
/*
ila_ack inst_ila_ack (
.clk(nclk),
.probe0(rdma_ack.valid),
.probe1(rdma_ack.ready),
.probe2(rdma_ack.data), // 36
.probe3(rdma_sq.valid),
.probe4(rdma_sq.ready),
.probe5(rdma_sq.data), // 512
.probe6(s_axis_rx.tvalid),
.probe7(s_axis_rx.tready),
.probe8(s_axis_rx.tlast),
.probe9(m_axis_tx.tvalid),
.probe10(m_axis_tx.tready),
.probe11(m_axis_tx.tlast),
.probe12(m_rdma_wr_req.valid),
.probe13(m_rdma_wr_req.ready),
.probe14(m_rdma_rd_req.valid),
.probe15(m_rdma_rd_req.ready),
.probe16(m_axis_rdma_wr.tvalid),
.probe17(m_axis_rdma_wr.tready),
.probe18(m_axis_rdma_wr.tlast),
.probe19(s_axis_rdma_rd.tvalid),
.probe20(s_axis_rdma_rd.tready),
.probe21(s_axis_rdma_rd.tlast)
);
*/
endmodule endmodule

View File

@ -109,54 +109,5 @@ end
// Slices (RX and TX) // Slices (RX and TX)
axis_reg_array #(.N_STAGES(N_STGS)) inst_rx (.aclk(aclk), .aresetn(aresetn), .s_axis(rx_axis), .m_axis(m_rx_axis)); axis_reg_array #(.N_STAGES(N_STGS)) inst_rx (.aclk(aclk), .aresetn(aresetn), .s_axis(rx_axis), .m_axis(m_rx_axis));
axis_reg_array #(.N_STAGES(N_STGS)) inst_tx (.aclk(aclk), .aresetn(aresetn), .s_axis(s_tx_axis), .m_axis(m_tx_axis)); axis_reg_array #(.N_STAGES(N_STGS)) inst_tx (.aclk(aclk), .aresetn(aresetn), .s_axis(s_tx_axis), .m_axis(m_tx_axis));
/*
logic [31:0] cnt_data_s;
logic [31:0] cnt_data_s_n4k_rx;
logic [31:0] cnt_data_m;
logic [31:0] cnt_data_m_n4k_rx;
always_ff @(posedge aclk) begin
if(~aresetn) begin
cnt_data_s_n4k_rx <= 0;
cnt_data_s <= 0;
cnt_data_m_n4k_rx <= 0;
cnt_data_m <= 0;
end
else begin
cnt_data_s <= (s_rx_axis.tvalid & s_rx_axis.tready & s_rx_axis.tlast) ?
0 : (s_rx_axis.tvalid & s_rx_axis.tready ? cnt_data_s + 1 : cnt_data_s);
cnt_data_s_n4k_rx <= (s_rx_axis.tvalid & s_rx_axis.tready & s_rx_axis.tlast) && (cnt_data_s != 64) ? cnt_data_s_n4k_rx + 1 : cnt_data_s_n4k_rx;
cnt_data_m <= (rx_axis.tvalid & rx_axis.tready & rx_axis.tlast) ?
0 : (rx_axis.tvalid & rx_axis.tready ? cnt_data_m + 1 : cnt_data_m);
cnt_data_m_n4k_rx <= (rx_axis.tvalid & rx_axis.tready & rx_axis.tlast) && (cnt_data_m != 64) ? cnt_data_m_n4k_rx + 1 : cnt_data_m_n4k_rx;
end
end
ila_nstack inst_ila_nstack (
.clk(aclk),
.probe0(s_rx_axis.tvalid),
.probe1(s_rx_axis.tready),
.probe2(s_rx_axis.tdata), // 512
.probe3(s_rx_axis.tlast),
.probe4(s_rx_axis.tkeep), // 64
.probe5(rx_axis.tvalid),
.probe6(rx_axis.tready),
.probe7(rx_axis.tdata), // 512
.probe8(rx_axis.tlast),
.probe9(rx_axis.tkeep), // 64
.probe10(cnt_data_s), // 32
.probe11(cnt_data_m), // 32
.probe12(cnt_data_s_n4k_rx), // 32
.probe13(cnt_data_m_n4k_rx), // 32
.probe14(prog_full),
.probe15(wr_cnt), // 32
.probe16(state_C) // 2
);
*/
endmodule endmodule

View File

@ -649,9 +649,9 @@ always_ff @(posedge aclk) begin
RDMA_0_STAT_POSTED_REG: // Posts RDMA_0_STAT_POSTED_REG: // Posts
axi_rdata[31:0] <= slv_reg[RDMA_0_STAT_POSTED_REG][31:0]; axi_rdata[31:0] <= slv_reg[RDMA_0_STAT_POSTED_REG][31:0];
RDMA_0_CMPLT_REG: begin RDMA_0_CMPLT_REG: begin
axi_rdata[0] <= cmplt_que_rdma_0_out.data.ssn; axi_rdata[0] <= cmplt_que_rdma_0_out.data.valid;
axi_rdata[RDMA_CMPLT_PID_OFFS+:PID_BITS] <= cmplt_que_rdma_0_out.data.pid; axi_rdata[RDMA_CMPLT_PID_OFFS+:PID_BITS] <= cmplt_que_rdma_0_out.data.pid;
axi_rdata[RDMA_CMPLT_SSN_OFFS+:RDMA_MSN_BITS] <= cmplt_que_rdma_0_out.valid; axi_rdata[RDMA_CMPLT_SSN_OFFS+:RDMA_MSN_BITS] <= cmplt_que_rdma_0_out.ssn;
end end
`endif `endif
@ -677,9 +677,9 @@ always_ff @(posedge aclk) begin
RDMA_1_STAT_POSTED_REG: // Posts RDMA_1_STAT_POSTED_REG: // Posts
axi_rdata[31:0] <= slv_reg[RDMA_1_STAT_POSTED_REG][31:0]; axi_rdata[31:0] <= slv_reg[RDMA_1_STAT_POSTED_REG][31:0];
RDMA_1_CMPLT_REG: begin RDMA_1_CMPLT_REG: begin
axi_rdata[0] <= cmplt_que_rdma_1_out.data.ssn; axi_rdata[0] <= cmplt_que_rdma_1_out.data.valid;
axi_rdata[RDMA_CMPLT_PID_OFFS+:PID_BITS] <= cmplt_que_rdma_1_out.data.pid; axi_rdata[RDMA_CMPLT_PID_OFFS+:PID_BITS] <= cmplt_que_rdma_1_out.data.pid;
axi_rdata[RDMA_CMPLT_SSN_OFFS+:RDMA_MSN_BITS] <= cmplt_que_rdma_1_out.valid; axi_rdata[RDMA_CMPLT_SSN_OFFS+:RDMA_MSN_BITS] <= cmplt_que_rdma_1_out.ssn;
end end
`endif `endif

View File

@ -494,9 +494,9 @@ always_ff @(posedge aclk) begin
[RDMA_0_STAT_REG:RDMA_0_STAT_REG]: [RDMA_0_STAT_REG:RDMA_0_STAT_REG]:
axi_rdata[63:0] <= {slv_reg[RDMA_0_STAT_REG][RDMA_POSTED_OFFS+:32], rdma_0_queue_used[31:0]}; axi_rdata[63:0] <= {slv_reg[RDMA_0_STAT_REG][RDMA_POSTED_OFFS+:32], rdma_0_queue_used[31:0]};
[RDMA_0_CMPLT_REG:RDMA_0_CMPLT_REG]: begin [RDMA_0_CMPLT_REG:RDMA_0_CMPLT_REG]: begin
axi_rdata[0] <= cmplt_que_rdma_0_out.data.ssn; axi_rdata[0] <= cmplt_que_rdma_0_out.data.valid;
axi_rdata[RDMA_CMPLT_PID_OFFS+:PID_BITS] <= cmplt_que_rdma_0_out.data.pid; axi_rdata[RDMA_CMPLT_PID_OFFS+:PID_BITS] <= cmplt_que_rdma_0_out.data.pid;
axi_rdata[RDMA_CMPLT_SSN_OFFS+:RDMA_MSN_BITS] <= cmplt_que_rdma_0_out.valid; axi_rdata[RDMA_CMPLT_SSN_OFFS+:RDMA_MSN_BITS] <= cmplt_que_rdma_0_out.ssn;
end end
`endif `endif
@ -508,9 +508,9 @@ always_ff @(posedge aclk) begin
[RDMA_1_STAT_REG:RDMA_1_STAT_REG]: [RDMA_1_STAT_REG:RDMA_1_STAT_REG]:
axi_rdata[63:0] <= {slv_reg[RDMA_1_STAT_REG][RDMA_POSTED_OFFS+:32], rdma_1_queue_used[31:0]}; axi_rdata[63:0] <= {slv_reg[RDMA_1_STAT_REG][RDMA_POSTED_OFFS+:32], rdma_1_queue_used[31:0]};
[RDMA_1_CMPLT_REG:RDMA_1_CMPLT_REG]: begin [RDMA_1_CMPLT_REG:RDMA_1_CMPLT_REG]: begin
axi_rdata[0] <= cmplt_que_rdma_1_out.data.ssn; axi_rdata[0] <= cmplt_que_rdma_1_out.data.valid;
axi_rdata[RDMA_CMPLT_PID_OFFS+:PID_BITS] <= cmplt_que_rdma_1_out.data.pid; axi_rdata[RDMA_CMPLT_PID_OFFS+:PID_BITS] <= cmplt_que_rdma_1_out.data.pid;
axi_rdata[RDMA_CMPLT_SSN_OFFS+:RDMA_MSN_BITS] <= cmplt_que_rdma_1_out.valid; axi_rdata[RDMA_CMPLT_SSN_OFFS+:RDMA_MSN_BITS] <= cmplt_que_rdma_1_out.ssn;
end end
`endif `endif
@ -953,7 +953,7 @@ metaIntf #(.STYPE(rdma_req_t)) rdma_0_sq();
assign rdma_0_sq_cnfg.data.opcode = slv_reg[RDMA_0_POST_REG][1+:RDMA_OPCODE_BITS]; // opcode assign rdma_0_sq_cnfg.data.opcode = slv_reg[RDMA_0_POST_REG][1+:RDMA_OPCODE_BITS]; // opcode
assign rdma_0_sq_cnfg.data.qpn[0+:PID_BITS] = slv_reg[RDMA_0_POST_REG][1+RDMA_OPCODE_BITS+:PID_BITS]; // local cpid assign rdma_0_sq_cnfg.data.qpn[0+:PID_BITS] = slv_reg[RDMA_0_POST_REG][1+RDMA_OPCODE_BITS+:PID_BITS]; // local cpid
assign rdma_0_sq_cnfg.data.qpn[PID_BITS+:DEST_BITS] = ID_REG; // local region assign rdma_0_sq_cnfg.data.qpn[PID_BITS+:DEST_BITS] = ID_REG; // local region
assign rdma_0_sq_cnfg.data.host = slv_reg[RDMA_0_POST_REG][1+RDMA_OPCODE_BITS+PID_BITS+DEST_BITS]; // host assign rdma_0_sq_cnfg.data.host = 1'b1;//slv_reg[RDMA_0_POST_REG][1+RDMA_OPCODE_BITS+PID_BITS+DEST_BITS]; // host
assign rdma_0_sq_cnfg.data.mode = RDMA_MODE_PARSE; // mode assign rdma_0_sq_cnfg.data.mode = RDMA_MODE_PARSE; // mode
assign rdma_0_sq_cnfg.data.last = 1'b1; assign rdma_0_sq_cnfg.data.last = 1'b1;
assign rdma_0_sq_cnfg.data.cmplt = slv_reg[RDMA_0_POST_REG][1+RDMA_OPCODE_BITS+PID_BITS+DEST_BITS+3]; assign rdma_0_sq_cnfg.data.cmplt = slv_reg[RDMA_0_POST_REG][1+RDMA_OPCODE_BITS+PID_BITS+DEST_BITS+3];
@ -1097,7 +1097,7 @@ metaIntf #(.STYPE(rdma_req_t)) rdma_1_sq();
assign rdma_1_sq_cnfg.data.opcode = slv_reg[RDMA_1_POST_REG][1+:RDMA_OPCODE_BITS]; // opcode assign rdma_1_sq_cnfg.data.opcode = slv_reg[RDMA_1_POST_REG][1+:RDMA_OPCODE_BITS]; // opcode
assign rdma_1_sq_cnfg.data.qpn[0+:PID_BITS] = slv_reg[RDMA_1_POST_REG][1+RDMA_OPCODE_BITS+:PID_BITS]; // local cpid assign rdma_1_sq_cnfg.data.qpn[0+:PID_BITS] = slv_reg[RDMA_1_POST_REG][1+RDMA_OPCODE_BITS+:PID_BITS]; // local cpid
assign rdma_1_sq_cnfg.data.qpn[PID_BITS+:DEST_BITS] = ID_REG; // local region assign rdma_1_sq_cnfg.data.qpn[PID_BITS+:DEST_BITS] = ID_REG; // local region
assign rdma_1_sq_cnfg.data.host = slv_reg[RDMA_1_POST_REG][1+RDMA_OPCODE_BITS+PID_BITS+DEST_BITS]; // host assign rdma_1_sq_cnfg.data.host = 1'b1; //slv_reg[RDMA_1_POST_REG][1+RDMA_OPCODE_BITS+PID_BITS+DEST_BITS]; // host
assign rdma_1_sq_cnfg.data.mode = RDMA_MODE_PARSE; // mode assign rdma_1_sq_cnfg.data.mode = RDMA_MODE_PARSE; // mode
assign rdma_1_sq_cnfg.data.last = 1'b1; assign rdma_1_sq_cnfg.data.last = 1'b1;
assign rdma_1_sq_cnfg.data.cmplt = slv_reg[RDMA_1_POST_REG][1+RDMA_OPCODE_BITS+PID_BITS+DEST_BITS+3]; assign rdma_1_sq_cnfg.data.cmplt = slv_reg[RDMA_1_POST_REG][1+RDMA_OPCODE_BITS+PID_BITS+DEST_BITS+3];

View File

@ -221,11 +221,11 @@ package lynxTypes;
parameter integer RDMA_BASE_REQ_BITS = 96; parameter integer RDMA_BASE_REQ_BITS = 96;
parameter integer RDMA_VADDR_BITS = 64; parameter integer RDMA_VADDR_BITS = 64;
parameter integer RDMA_LEN_BITS = 32; parameter integer RDMA_LEN_BITS = 32;
parameter integer RDMA_REQ_BITS = 544; parameter integer RDMA_REQ_BITS = 512;
parameter integer RDMA_OPCODE_BITS = 5; parameter integer RDMA_OPCODE_BITS = 5;
parameter integer RDMA_QPN_BITS = 10; parameter integer RDMA_QPN_BITS = 10;
parameter integer RDMA_PARAMS_BITS = 352; parameter integer RDMA_PARAMS_BITS = 288;
parameter integer RDMA_MSG_BITS = 512; parameter integer RDMA_MSG_BITS = 448;
parameter integer RDMA_QP_INTF_BITS = 168; parameter integer RDMA_QP_INTF_BITS = 168;
parameter integer RDMA_QP_CONN_BITS = 184; parameter integer RDMA_QP_CONN_BITS = 184;
parameter integer RDMA_LVADDR_OFFS = 0; parameter integer RDMA_LVADDR_OFFS = 0;

View File

@ -1,9 +1,7 @@
#pragma once #pragma once
#include <stdint.h> #include <stdint.h>
//#if ${RETRANS_EN} #define RETRANS_EN
#define RETRANS_EN
//#endif
const unsigned DATA_WIDTH = ${DATA_WIDTH} * 8; const unsigned DATA_WIDTH = ${DATA_WIDTH} * 8;

View File

@ -64,7 +64,7 @@ int main(int argc, char *argv[])
("tcpaddr,t", boost::program_options::value<string>(), "TCP conn IP") ("tcpaddr,t", boost::program_options::value<string>(), "TCP conn IP")
("benchruns,b", boost::program_options::value<uint32_t>(), "Number of bench runs") ("benchruns,b", boost::program_options::value<uint32_t>(), "Number of bench runs")
("repst,r", boost::program_options::value<uint32_t>(), "Number of throughput repetitions within a run") ("repst,r", boost::program_options::value<uint32_t>(), "Number of throughput repetitions within a run")
("repsl,r", boost::program_options::value<uint32_t>(), "Number of latency repetitions within a run") ("repsl,l", boost::program_options::value<uint32_t>(), "Number of latency repetitions within a run")
("mins,n", boost::program_options::value<uint32_t>(), "Minimum transfer size") ("mins,n", boost::program_options::value<uint32_t>(), "Minimum transfer size")
("maxs,x", boost::program_options::value<uint32_t>(), "Maximum transfer size") ("maxs,x", boost::program_options::value<uint32_t>(), "Maximum transfer size")
("oper,w", boost::program_options::value<bool>(), "Read or Write"); ("oper,w", boost::program_options::value<bool>(), "Read or Write");
@ -112,7 +112,7 @@ int main(int argc, char *argv[])
std::cout << "Max size: " << max_size << std::endl; std::cout << "Max size: " << max_size << std::endl;
std::cout << "Number of throughput reps: " << n_reps_thr << std::endl; std::cout << "Number of throughput reps: " << n_reps_thr << std::endl;
std::cout << "Number of latency reps: " << n_reps_lat << std::endl; std::cout << "Number of latency reps: " << n_reps_lat << std::endl;
// Create queue pairs // Create queue pairs
ibvQpMap ictx; ibvQpMap ictx;
ictx.addQpair(qpId, targetRegion, ibv_ip, n_pages); ictx.addQpair(qpId, targetRegion, ibv_ip, n_pages);
@ -180,6 +180,8 @@ int main(int argc, char *argv[])
std::cout << std::fixed << std::setprecision(2); std::cout << std::fixed << std::setprecision(2);
std::cout << std::setw(8) << sg.type.rdma.len << " [bytes], thoughput: " std::cout << std::setw(8) << sg.type.rdma.len << " [bytes], thoughput: "
<< std::setw(8) << ((1 + oper) * ((1000 * sg.type.rdma.len))) / ((bench.getAvg()) / n_reps_thr) << " [MB/s], latency: "; << std::setw(8) << ((1 + oper) * ((1000 * sg.type.rdma.len))) / ((bench.getAvg()) / n_reps_thr) << " [MB/s], latency: ";
std::cout << std::endl << std::endl << "ACKs: " << cproc->ibvCheckAcks() << std::endl;
#endif #endif
// Reset // Reset

View File

@ -159,6 +159,7 @@ enum class CnfgAvxRegs : uint32_t {
RDMA_POST_REG_0 = 17, RDMA_POST_REG_0 = 17,
RDMA_POST_REG_1 = 18, RDMA_POST_REG_1 = 18,
RDMA_STAT_REG = 19, RDMA_STAT_REG = 19,
RDMA_CMPLT_REG = 20,
STAT_DMA_REG = 64 STAT_DMA_REG = 64
}; };
@ -197,7 +198,9 @@ enum class CnfgLegRegs : uint32_t {
RDMA_POST_REG_7 = 40, RDMA_POST_REG_7 = 40,
RDMA_STAT_CMD_USED_REG = 41, RDMA_STAT_CMD_USED_REG = 41,
RDMA_STAT_POSTED_REG = 42, RDMA_STAT_POSTED_REG = 42,
STAT_DMA_REG = 64 RDMA_CMPLT_REG = 43,
STAT_DMA_REG = 64,
STAT_RDMA_REG = 128,
}; };
/** /**

View File

@ -198,6 +198,8 @@ public:
* @brief Return the number of completed RDMA acks * @brief Return the number of completed RDMA acks
* *
*/ */
uint32_t ibvCheckAcks();
int32_t ibvGetCompleted(int32_t &cpid);
uint32_t checkIbvAcks(); uint32_t checkIbvAcks();
void clearIbvAcks(); void clearIbvAcks();

View File

@ -555,6 +555,50 @@ void cProcess::clearCompleted() {
// Network // Network
// ======------------------------------------------------------------------------------- // ======-------------------------------------------------------------------------------
/**
* @brief Check number of completed RDMA operations
*
* @param cpid - Coyote operation struct
* @return uint32_t - number of completed operations
*/
uint32_t cProcess::ibvCheckAcks() {
if(fcnfg.en_wb) {
return wback[cpid + ((fcnfg.qsfp ? 3 : 2) * nCpidMax)];
} else {
#ifdef EN_AVX
if(fcnfg.en_avx)
return fcnfg.qsfp ? _mm256_extract_epi32(cnfg_reg_avx[static_cast<uint32_t>(CnfgAvxRegs::STAT_DMA_REG) + cpid], 3) :
_mm256_extract_epi32(cnfg_reg_avx[static_cast<uint32_t>(CnfgAvxRegs::STAT_DMA_REG) + cpid], 2);
else
#endif
return (fcnfg.qsfp ? (HIGH_32(cnfg_reg[static_cast<uint32_t>(CnfgLegRegs::STAT_RDMA_REG) + cpid])) :
( LOW_32(cnfg_reg[static_cast<uint32_t>(CnfgLegRegs::STAT_RDMA_REG) + cpid])));
}
}
/**
* @brief Check completion queue
*
* @param cmplt_cpid - Coyote pid
* @return int32_t - ssn
*/
int32_t cProcess::ibvGetCompleted(int32_t &cpid) {
uint64_t cmplt_meta;
#ifdef EN_AVX
if(fcnfg.en_avx)
cmplt_meta = _mm256_extract_epi64(cnfg_reg_avx[static_cast<uint32_t>(CnfgAvxRegs::RDMA_CMPLT_REG)], 0);
else
#endif
cmplt_meta = cnfg_reg[static_cast<uint32_t>(CnfgLegRegs::RDMA_CMPLT_REG)];
if(cmplt_meta & 0x1) {
cpid = (cmplt_meta >> 16) & 0x3f;
return HIGH_32(cmplt_meta);
} else {
return -1;
}
}
/** /**
* @brief Post an IB operation * @brief Post an IB operation
* *
@ -697,7 +741,7 @@ void cProcess::clearIbvAcks() {
*/ */
void cProcess::postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64_t offs_0) { void cProcess::postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64_t offs_0) {
// Lock // Lock
//dlock.lock(); dlock.lock();
// Check outstanding // Check outstanding
while (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) { while (rdma_cmd_cnt > (cmd_fifo_depth - cmd_fifo_thr)) {
@ -714,10 +758,8 @@ void cProcess::postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64
// Send // Send
#ifdef EN_AVX #ifdef EN_AVX
if(fcnfg.en_avx) { if(fcnfg.en_avx) {
std::cout << "HERE FIRING" << std::endl;
cnfg_reg_avx[static_cast<uint32_t>(CnfgAvxRegs::RDMA_POST_REG) + fcnfg.qsfp_offs] = _mm256_set_epi64x(offs_3, offs_2, offs_1, offs_0); cnfg_reg_avx[static_cast<uint32_t>(CnfgAvxRegs::RDMA_POST_REG) + fcnfg.qsfp_offs] = _mm256_set_epi64x(offs_3, offs_2, offs_1, offs_0);
std::cout << "HERE FIRING 2" << std::endl;
// Inc // Inc
rdma_cmd_cnt++; rdma_cmd_cnt++;
} else { } else {
@ -736,7 +778,7 @@ void cProcess::postCmd(uint64_t offs_3, uint64_t offs_2, uint64_t offs_1, uint64
#endif #endif
// Unlock // Unlock
//dlock.unlock(); dlock.unlock();
} }
// ======------------------------------------------------------------------------------- // ======-------------------------------------------------------------------------------

View File

@ -101,7 +101,7 @@ void ibvQpConn::initLocalQueue(string ip_addr) {
qpair->local.rkey = 0; qpair->local.rkey = 0;
// Allocate buffer // Allocate buffer
void *vaddr = fdev->getMem({CoyoteAlloc::HOST_2M, n_pages}); void *vaddr = fdev->getMem({CoyoteAlloc::HUGE_2M, n_pages});
qpair->local.vaddr = (uint64_t) vaddr; qpair->local.vaddr = (uint64_t) vaddr;
qpair->local.size = n_pages * hugePageSize; qpair->local.size = n_pages * hugePageSize;
} }