diff --git a/deploy/sample-backup-configs/sample_config_hwdb.yaml b/deploy/sample-backup-configs/sample_config_hwdb.yaml index f2e20835..d73affdb 100644 --- a/deploy/sample-backup-configs/sample_config_hwdb.yaml +++ b/deploy/sample-backup-configs/sample_config_hwdb.yaml @@ -9,34 +9,25 @@ # If you are using an older version of FireSim, you will need to generate your # own images. -firesim_rocket_singlecore_sha3_no_nic_l2_llc4mb_ddr3_printf: - agfi: agfi-088e72c309ad8bb84 - deploy_triplet_override: null - custom_runtime_config: null - -firesim_boom_singlecore_nic_l2_llc4mb_ddr3: - agfi: agfi-0f8e76f0bae8086fb - deploy_triplet_override: null - custom_runtime_config: null - -firesim_boom_singlecore_no_nic_l2_llc4mb_ddr3: - agfi: agfi-06190ac8ae0f6de18 - deploy_triplet_override: null - custom_runtime_config: null - # DOCREF START: Example HWDB Entry -firesim_rocket_quadcore_nic_l2_llc4mb_ddr3: - agfi: agfi-0d3f979b71eec9b7f +firesim_boom_singlecore_nic_l2_llc4mb_ddr3: + agfi: agfi-0da1eb7805ed745b4 deploy_triplet_override: null custom_runtime_config: null # DOCREF END: Example HWDB Entry - +firesim_boom_singlecore_no_nic_l2_llc4mb_ddr3: + agfi: agfi-0165525d8d88f7a5f + deploy_triplet_override: null + custom_runtime_config: null +firesim_rocket_quadcore_nic_l2_llc4mb_ddr3: + agfi: agfi-07ca3beae463369b3 + deploy_triplet_override: null + custom_runtime_config: null firesim_rocket_quadcore_no_nic_l2_llc4mb_ddr3: - agfi: agfi-0467dc13c58dfd13c + agfi: agfi-0018bceeef7cc7809 deploy_triplet_override: null custom_runtime_config: null - firesim_supernode_rocket_singlecore_nic_l2_lbp: - agfi: agfi-0d4b18f24bfedf193 + agfi: agfi-043ef11ebeaf519a6 deploy_triplet_override: null custom_runtime_config: null diff --git a/sim/midas/src/main/cc/bridges/cpu_managed_stream.cc b/sim/midas/src/main/cc/bridges/cpu_managed_stream.cc index 3dd6a973..30ef85fd 100644 --- a/sim/midas/src/main/cc/bridges/cpu_managed_stream.cc +++ b/sim/midas/src/main/cc/bridges/cpu_managed_stream.cc @@ -19,10 +19,10 @@ size_t StreamFromCPU::push(void *src, size_t num_bytes, size_t required_bytes) { // implement non-multiples of 512b. The FPGA-side queue will take on the // high-order bytes of the final beat in the transaction, and the strobe is // not respected. So put the assertion here and discuss what to do next. - assert((num_bytes % DMA_BEAT_BYTES) == 0); + assert((num_bytes % CPU_MANAGED_AXI4_BEAT_BYTES) == 0); - auto num_beats = num_bytes / DMA_BEAT_BYTES; - auto threshold_beats = required_bytes / DMA_BEAT_BYTES; + auto num_beats = num_bytes / CPU_MANAGED_AXI4_BEAT_BYTES; + auto threshold_beats = required_bytes / CPU_MANAGED_AXI4_BEAT_BYTES; assert(threshold_beats <= this->fpga_buffer_size()); auto space_available = @@ -33,8 +33,9 @@ size_t StreamFromCPU::push(void *src, size_t num_bytes, size_t required_bytes) { } auto push_beats = std::min(space_available, num_beats); - auto push_bytes = push_beats * DMA_BEAT_BYTES; - auto bytes_written = pcis_write(this->dma_addr(), (char *)src, push_bytes); + auto push_bytes = push_beats * CPU_MANAGED_AXI4_BEAT_BYTES; + auto bytes_written = + this->axi4_write(this->dma_addr(), (char *)src, push_bytes); assert(bytes_written == push_bytes); return bytes_written; @@ -54,19 +55,19 @@ size_t StreamToCPU::pull(void *dest, size_t num_bytes, size_t required_bytes) { assert(num_bytes >= required_bytes); // The legacy code is clearly broken for requests that aren't a - // multiple of 512b since DMA_SIZE is fixed to the full width of the AXI4 IF. - // The high-order bytes of the final word will be copied into the destination - // buffer (potentially an overflow, bug 1), and since reads are destructive, - // will not be visible to future pulls (bug 2). So i've put this assertion - // here for now... + // multiple of 512b since CPU_MANAGED_AXI4_SIZE is fixed to the full width of + // the AXI4 IF. The high-order bytes of the final word will be copied into the + // destination buffer (potentially an overflow, bug 1), and since reads are + // destructive, will not be visible to future pulls (bug 2). So i've put this + // assertion here for now... // Due to the destructive nature of reads, if we wish to support reads that // aren't a multiple of 512b, we'll need to keep a little buffer around for // the remainder, and prepend this to the destination buffer. - assert((num_bytes % DMA_BEAT_BYTES) == 0); + assert((num_bytes % CPU_MANAGED_AXI4_BEAT_BYTES) == 0); - auto num_beats = num_bytes / DMA_BEAT_BYTES; - auto threshold_beats = required_bytes / DMA_BEAT_BYTES; + auto num_beats = num_bytes / CPU_MANAGED_AXI4_BEAT_BYTES; + auto threshold_beats = required_bytes / CPU_MANAGED_AXI4_BEAT_BYTES; assert(threshold_beats <= this->fpga_buffer_size()); auto count = this->mmio_read(this->count_addr()); @@ -76,8 +77,8 @@ size_t StreamToCPU::pull(void *dest, size_t num_bytes, size_t required_bytes) { } auto pull_beats = std::min(count, num_beats); - auto pull_bytes = pull_beats * DMA_BEAT_BYTES; - auto bytes_read = this->pcis_read(this->dma_addr(), (char *)dest, pull_bytes); + auto pull_bytes = pull_beats * CPU_MANAGED_AXI4_BEAT_BYTES; + auto bytes_read = this->axi4_read(this->dma_addr(), (char *)dest, pull_bytes); assert(bytes_read == pull_bytes); return bytes_read; } diff --git a/sim/midas/src/main/cc/bridges/cpu_managed_stream.h b/sim/midas/src/main/cc/bridges/cpu_managed_stream.h index bfc15b33..6c165b2e 100644 --- a/sim/midas/src/main/cc/bridges/cpu_managed_stream.h +++ b/sim/midas/src/main/cc/bridges/cpu_managed_stream.h @@ -28,15 +28,15 @@ typedef struct CPUManagedStreamParameters { /** * @brief Base class for CPU-managed streams * - * Streams implemented with the CPUManagedStreamingEngine have a common set of + * Streams implemented with the CPUManagedStreamEngine have a common set of * parameters, and use MMIO to measure FPGA-queue occupancy. This base class * captures that. * * Children of this class implement the host-independent control for streams. * Generally, this consists of doing an MMIO read to FPGA-side queue capacity, * to determine if a stream request can be served. Host implementations - * instantiate these classes with callbacks to implement MMIO and DMA/PCIS/PCIM - * for their platform. + * instantiate these classes with callbacks to implement MMIO and either CPU- or + * FPGA-managed AXI4 for their platform. * */ class CPUManagedStream { @@ -61,41 +61,41 @@ public: * @brief Implements streams sunk by the driver (sourced by the FPGA) * * Extends CPUManagedStream to provide a pull method, which moves data from the - * FPGA into a user-provided buffer. IO over a CPU-mastered AXI4 IF is - * implemented with pcis_read, and is provided by the host-platform. + * FPGA into a user-provided buffer. IO over a CPU-managed AXI4 IF is + * implemented with axi4_read, and is provided by the host-platform. * */ class StreamToCPU : public CPUManagedStream { public: StreamToCPU(CPUManagedStreamParameters params, std::function mmio_read, - std::function pcis_read) - : CPUManagedStream(params, mmio_read), pcis_read(pcis_read){}; + std::function axi4_read) + : CPUManagedStream(params, mmio_read), axi4_read(axi4_read){}; size_t pull(void *dest, size_t num_bytes, size_t required_bytes); private: - std::function pcis_read; + std::function axi4_read; }; /** * @brief Implements streams sourced by the driver (sunk by the FPGA) * * Extends CPUManagedStream to provide a push method, which moves data to the - * FPGA out of a user-provided buffer. IO over a CPU-mastered AXI4 IF is - * implemented with pcis_write, and is provided by the host-platform. + * FPGA out of a user-provided buffer. IO over a CPU-managed AXI4 IF is + * implemented with axi4_write, and is provided by the host-platform. */ class StreamFromCPU : public CPUManagedStream { public: StreamFromCPU(CPUManagedStreamParameters params, std::function mmio_read, - std::function pcis_write) - : CPUManagedStream(params, mmio_read), pcis_write(pcis_write){}; + std::function axi4_write) + : CPUManagedStream(params, mmio_read), axi4_write(axi4_write){}; size_t push(void *src, size_t num_bytes, size_t required_bytes); private: - std::function pcis_write; + std::function axi4_write; }; #endif // __CPU_MANAGED_STREAM_H diff --git a/sim/midas/src/main/cc/bridges/synthesized_prints.h b/sim/midas/src/main/cc/bridges/synthesized_prints.h index e23e08b2..a16a069d 100644 --- a/sim/midas/src/main/cc/bridges/synthesized_prints.h +++ b/sim/midas/src/main/cc/bridges/synthesized_prints.h @@ -82,10 +82,11 @@ private: ClockInfo clock_info; const int printno; - // DMA batching parameters + // Stream batching parameters static constexpr size_t beat_bytes = BridgeConstants::STREAM_WIDTH_BYTES; - // The number of DMA beats to pull off the FPGA on each invocation of tick() - // This will be set based on the ratio of token_size : desired_batch_beats + // The number of stream beats to pull off the FPGA on each invocation of + // tick() This will be set based on the ratio of token_size : + // desired_batch_beats size_t batch_beats; // This will be modified to be a multiple of the token size const size_t desired_batch_beats = stream_depth / 2; diff --git a/sim/midas/src/main/cc/emul/vcs-harness.cc b/sim/midas/src/main/cc/emul/vcs-harness.cc index 20b6f666..64545a38 100644 --- a/sim/midas/src/main/cc/emul/vcs-harness.cc +++ b/sim/midas/src/main/cc/emul/vcs-harness.cc @@ -11,11 +11,59 @@ extern bool vcs_fin; extern bool vcs_rst; extern uint64_t main_time; -static const size_t CTRL_DATA_SIZE = CTRL_BEAT_BYTES / sizeof(uint32_t); -static const size_t DMA_DATA_SIZE = DMA_BEAT_BYTES / sizeof(uint32_t); -static const size_t DMA_STRB_SIZE = - (DMA_BEAT_BYTES / 8 + sizeof(uint32_t) - 1) / sizeof(uint32_t); -static const size_t MEM_DATA_SIZE = MEM_BEAT_BYTES / sizeof(uint32_t); +constexpr size_t CTRL_DATA_SIZE = CTRL_BEAT_BYTES / sizeof(uint32_t); +constexpr size_t CPU_MANAGED_AXI4_DATA_SIZE = + CPU_MANAGED_AXI4_BEAT_BYTES / sizeof(uint32_t); +constexpr size_t CPU_MANAGED_AXI4_STRB_SIZE = + (CPU_MANAGED_AXI4_BEAT_BYTES / 8 + sizeof(uint32_t) - 1) / sizeof(uint32_t); +constexpr size_t FPGA_MANAGED_AXI4_DATA_SIZE = + (FPGA_MANAGED_AXI4_DATA_BITS / 8) / sizeof(uint32_t); +constexpr size_t FPGA_MANAGED_AXI4_STRB_SIZE = + ((FPGA_MANAGED_AXI4_DATA_BITS / 8) / 8 + sizeof(uint32_t) - 1) / + sizeof(uint32_t); +constexpr size_t MEM_DATA_SIZE = MEM_BEAT_BYTES / sizeof(uint32_t); + +/** + * @brief get a uint64_t from a vc_handle that may be a scalar or vector + * + * vc_handles for single bit vs multibit values need to be accessed at runtime + * with different methods. This handles that for fields that might be 1-bit wide + * + * In practise this is just the ID field, so return uint64_t which is what mm + * expects + * + * @param h the vc_handle + * @param width the expected width of the bitvector + * @return uint64_t the bitvector encoded as uint64_t + */ +uint64_t getScalarOrVector(const vc_handle &h, int width) { + assert(width >= 1 && width <= 64); + return (width == 1) ? vc_getScalar(h) : vc_4stVectorRef(h)->d; +} + +/** + * @brief Put the LSBs of @value into a vc_handle that may be a vector or + * scalar. + * + * @param h the vc_handle + * @param value a uint64_t whose LSBs contain a bitvector to drive onto the + * handle + * @param width the width of the bitvector + */ +void putScalarOrVector(const vc_handle &h, uint64_t value, int width) { + assert(width >= 1 && width <= 64); + if (width == 1) { + vc_putScalar(h, value & 1); + } else { + vec32 md[sizeof(uint64_t) / sizeof(uint32_t)]; + md[0].c = 0; + md[0].d = (uint32_t)value; + md[1].c = 0; + md[1].d = (uint32_t)(value >> 32); + vc_put4stVector(h, md); + } +} + extern "C" { void tick(vc_handle reset, vc_handle fin, @@ -52,37 +100,69 @@ void tick(vc_handle reset, vc_handle ctrl_b_bits_resp, vc_handle ctrl_b_bits_id, - vc_handle dma_ar_valid, - vc_handle dma_ar_ready, - vc_handle dma_ar_bits_addr, - vc_handle dma_ar_bits_id, - vc_handle dma_ar_bits_size, - vc_handle dma_ar_bits_len, + vc_handle cpu_managed_axi4_ar_valid, + vc_handle cpu_managed_axi4_ar_ready, + vc_handle cpu_managed_axi4_ar_bits_addr, + vc_handle cpu_managed_axi4_ar_bits_id, + vc_handle cpu_managed_axi4_ar_bits_size, + vc_handle cpu_managed_axi4_ar_bits_len, - vc_handle dma_aw_valid, - vc_handle dma_aw_ready, - vc_handle dma_aw_bits_addr, - vc_handle dma_aw_bits_id, - vc_handle dma_aw_bits_size, - vc_handle dma_aw_bits_len, + vc_handle cpu_managed_axi4_aw_valid, + vc_handle cpu_managed_axi4_aw_ready, + vc_handle cpu_managed_axi4_aw_bits_addr, + vc_handle cpu_managed_axi4_aw_bits_id, + vc_handle cpu_managed_axi4_aw_bits_size, + vc_handle cpu_managed_axi4_aw_bits_len, - vc_handle dma_w_valid, - vc_handle dma_w_ready, - vc_handle dma_w_bits_strb, - vc_handle dma_w_bits_data, - vc_handle dma_w_bits_last, + vc_handle cpu_managed_axi4_w_valid, + vc_handle cpu_managed_axi4_w_ready, + vc_handle cpu_managed_axi4_w_bits_strb, + vc_handle cpu_managed_axi4_w_bits_data, + vc_handle cpu_managed_axi4_w_bits_last, - vc_handle dma_r_valid, - vc_handle dma_r_ready, - vc_handle dma_r_bits_resp, - vc_handle dma_r_bits_id, - vc_handle dma_r_bits_data, - vc_handle dma_r_bits_last, + vc_handle cpu_managed_axi4_r_valid, + vc_handle cpu_managed_axi4_r_ready, + vc_handle cpu_managed_axi4_r_bits_resp, + vc_handle cpu_managed_axi4_r_bits_id, + vc_handle cpu_managed_axi4_r_bits_data, + vc_handle cpu_managed_axi4_r_bits_last, - vc_handle dma_b_valid, - vc_handle dma_b_ready, - vc_handle dma_b_bits_resp, - vc_handle dma_b_bits_id, + vc_handle cpu_managed_axi4_b_valid, + vc_handle cpu_managed_axi4_b_ready, + vc_handle cpu_managed_axi4_b_bits_resp, + vc_handle cpu_managed_axi4_b_bits_id, + + vc_handle fpga_managed_axi4_ar_valid, + vc_handle fpga_managed_axi4_ar_ready, + vc_handle fpga_managed_axi4_ar_bits_addr, + vc_handle fpga_managed_axi4_ar_bits_id, + vc_handle fpga_managed_axi4_ar_bits_size, + vc_handle fpga_managed_axi4_ar_bits_len, + + vc_handle fpga_managed_axi4_aw_valid, + vc_handle fpga_managed_axi4_aw_ready, + vc_handle fpga_managed_axi4_aw_bits_addr, + vc_handle fpga_managed_axi4_aw_bits_id, + vc_handle fpga_managed_axi4_aw_bits_size, + vc_handle fpga_managed_axi4_aw_bits_len, + + vc_handle fpga_managed_axi4_w_valid, + vc_handle fpga_managed_axi4_w_ready, + vc_handle fpga_managed_axi4_w_bits_strb, + vc_handle fpga_managed_axi4_w_bits_data, + vc_handle fpga_managed_axi4_w_bits_last, + + vc_handle fpga_managed_axi4_r_valid, + vc_handle fpga_managed_axi4_r_ready, + vc_handle fpga_managed_axi4_r_bits_resp, + vc_handle fpga_managed_axi4_r_bits_id, + vc_handle fpga_managed_axi4_r_bits_data, + vc_handle fpga_managed_axi4_r_bits_last, + + vc_handle fpga_managed_axi4_b_valid, + vc_handle fpga_managed_axi4_b_ready, + vc_handle fpga_managed_axi4_b_bits_resp, + vc_handle fpga_managed_axi4_b_bits_id, vc_handle mem_0_ar_valid, vc_handle mem_0_ar_ready, @@ -214,42 +294,86 @@ void tick(vc_handle reset, try { // The driver ucontext is initialized before spawning the VCS // context, so these pointers should be initialized. - assert(simif_emul_t::dma != nullptr); + assert(simif_emul_t::cpu_managed_axi4 != nullptr); assert(simif_emul_t::master != nullptr); - assert(DMA_STRB_SIZE <= 2); + static_assert(CPU_MANAGED_AXI4_STRB_SIZE <= 2); uint32_t ctrl_r_data[CTRL_DATA_SIZE]; for (size_t i = 0; i < CTRL_DATA_SIZE; i++) { ctrl_r_data[i] = vc_4stVectorRef(ctrl_r_bits_data)[i].d; } - uint32_t dma_r_data[DMA_DATA_SIZE]; - for (size_t i = 0; i < DMA_DATA_SIZE; i++) { - dma_r_data[i] = vc_4stVectorRef(dma_r_bits_data)[i].d; - } - simif_emul_t::master->tick(vcs_rst, vc_getScalar(ctrl_ar_ready), vc_getScalar(ctrl_aw_ready), vc_getScalar(ctrl_w_ready), - vc_4stVectorRef(ctrl_r_bits_id)->d, + getScalarOrVector(ctrl_r_bits_id, CTRL_ID_BITS), ctrl_r_data, vc_getScalar(ctrl_r_bits_last), vc_getScalar(ctrl_r_valid), - vc_4stVectorRef(ctrl_b_bits_id)->d, + getScalarOrVector(ctrl_b_bits_id, CTRL_ID_BITS), vc_getScalar(ctrl_b_valid)); - simif_emul_t::dma->tick(vcs_rst, - vc_getScalar(dma_ar_ready), - vc_getScalar(dma_aw_ready), - vc_getScalar(dma_w_ready), - vc_4stVectorRef(dma_r_bits_id)->d, - dma_r_data, - vc_getScalar(dma_r_bits_last), - vc_getScalar(dma_r_valid), - vc_4stVectorRef(dma_b_bits_id)->d, - vc_getScalar(dma_b_valid)); +#ifdef CPU_MANAGED_AXI4_PRESENT + assert(CPU_MANAGED_AXI4_STRB_SIZE <= 2); + uint32_t cpu_managed_axi4_r_data[CPU_MANAGED_AXI4_DATA_SIZE]; + for (size_t i = 0; i < CPU_MANAGED_AXI4_DATA_SIZE; i++) { + cpu_managed_axi4_r_data[i] = + vc_4stVectorRef(cpu_managed_axi4_r_bits_data)[i].d; + } + + simif_emul_t::cpu_managed_axi4->tick( + vcs_rst, + vc_getScalar(cpu_managed_axi4_ar_ready), + vc_getScalar(cpu_managed_axi4_aw_ready), + vc_getScalar(cpu_managed_axi4_w_ready), + vc_4stVectorRef(cpu_managed_axi4_r_bits_id)->d, + cpu_managed_axi4_r_data, + vc_getScalar(cpu_managed_axi4_r_bits_last), + vc_getScalar(cpu_managed_axi4_r_valid), + vc_4stVectorRef(cpu_managed_axi4_b_bits_id)->d, + vc_getScalar(cpu_managed_axi4_b_valid)); +#endif // CPU_MANAGED_AXI4_PRESENT + +#ifdef FPGA_MANAGED_AXI4_PRESENT + uint32_t fpga_managed_axi4_w_data[FPGA_MANAGED_AXI4_DATA_SIZE]; + for (size_t i = 0; i < FPGA_MANAGED_AXI4_DATA_SIZE; i++) { + fpga_managed_axi4_w_data[i] = + vc_4stVectorRef(fpga_managed_axi4_w_bits_data)[i].d; + } + + uint64_t fpga_managed_axi4_w_strb; + static_assert(FPGA_MANAGED_AXI4_STRB_SIZE <= 2); + for (size_t i = 0; i < FPGA_MANAGED_AXI4_STRB_SIZE; i++) { + ((uint32_t *)&fpga_managed_axi4_w_strb)[i] = + vc_4stVectorRef(fpga_managed_axi4_w_bits_strb)[i].d; + } + + simif_emul_t::cpu_mem->tick( + vcs_rst, + vc_getScalar(fpga_managed_axi4_ar_valid), + vc_4stVectorRef(fpga_managed_axi4_ar_bits_addr)->d, + getScalarOrVector(fpga_managed_axi4_ar_bits_id, + FPGA_MANAGED_AXI4_ID_BITS), + vc_4stVectorRef(fpga_managed_axi4_ar_bits_size)->d, + vc_4stVectorRef(fpga_managed_axi4_ar_bits_len)->d, + + vc_getScalar(fpga_managed_axi4_aw_valid), + vc_4stVectorRef(fpga_managed_axi4_aw_bits_addr)->d, + getScalarOrVector(fpga_managed_axi4_aw_bits_id, + FPGA_MANAGED_AXI4_ID_BITS), + vc_4stVectorRef(fpga_managed_axi4_aw_bits_size)->d, + vc_4stVectorRef(fpga_managed_axi4_aw_bits_len)->d, + + vc_getScalar(fpga_managed_axi4_w_valid), + fpga_managed_axi4_w_strb, + fpga_managed_axi4_w_data, + vc_getScalar(fpga_managed_axi4_w_bits_last), + + vc_getScalar(fpga_managed_axi4_r_ready), + vc_getScalar(fpga_managed_axi4_b_ready)); +#endif // FPGA_MANAGED_AXI4_PRESENT #define MEMORY_CHANNEL_TICK(IDX) \ uint32_t mem_##IDX##_w_data[MEM_DATA_SIZE]; \ @@ -303,9 +427,6 @@ void tick(vc_handle reset, vec32 md[CTRL_DATA_SIZE]; md[0].c = 0; - md[0].d = simif_emul_t::master->aw_id(); - vc_put4stVector(ctrl_aw_bits_id, md); - md[0].c = 0; md[0].d = simif_emul_t::master->aw_addr(); vc_put4stVector(ctrl_aw_bits_addr, md); md[0].c = 0; @@ -315,9 +436,6 @@ void tick(vc_handle reset, md[0].d = simif_emul_t::master->aw_len(); vc_put4stVector(ctrl_aw_bits_len, md); md[0].c = 0; - md[0].d = simif_emul_t::master->ar_id(); - vc_put4stVector(ctrl_ar_bits_id, md); - md[0].c = 0; md[0].d = simif_emul_t::master->ar_addr(); vc_put4stVector(ctrl_ar_bits_addr, md); md[0].c = 0; @@ -336,55 +454,100 @@ void tick(vc_handle reset, } vc_put4stVector(ctrl_w_bits_data, md); - vc_putScalar(dma_aw_valid, simif_emul_t::dma->aw_valid()); - vc_putScalar(dma_ar_valid, simif_emul_t::dma->ar_valid()); - vc_putScalar(dma_w_valid, simif_emul_t::dma->w_valid()); - vc_putScalar(dma_w_bits_last, simif_emul_t::dma->w_last()); - vc_putScalar(dma_r_ready, simif_emul_t::dma->r_ready()); - vc_putScalar(dma_b_ready, simif_emul_t::dma->b_ready()); + putScalarOrVector( + ctrl_aw_bits_id, simif_emul_t::master->aw_id(), CTRL_ID_BITS); + putScalarOrVector( + ctrl_ar_bits_id, simif_emul_t::master->ar_id(), CTRL_ID_BITS); - vec32 dd[DMA_DATA_SIZE]; - dd[0].c = 0; - dd[0].d = simif_emul_t::dma->aw_id(); - vc_put4stVector(dma_aw_bits_id, dd); - dd[0].c = 0; - dd[0].d = simif_emul_t::dma->aw_addr(); - dd[1].c = 0; - dd[1].d = simif_emul_t::dma->aw_addr() >> 32; - vc_put4stVector(dma_aw_bits_addr, dd); - dd[0].c = 0; - dd[0].d = simif_emul_t::dma->aw_size(); - vc_put4stVector(dma_aw_bits_size, dd); - dd[0].c = 0; - dd[0].d = simif_emul_t::dma->aw_len(); - vc_put4stVector(dma_aw_bits_len, dd); - dd[0].c = 0; - dd[0].d = simif_emul_t::dma->ar_id(); - vc_put4stVector(dma_ar_bits_id, dd); - dd[0].c = 0; - dd[0].d = simif_emul_t::dma->ar_addr(); - dd[1].c = 0; - dd[1].d = simif_emul_t::dma->ar_addr() >> 32; - vc_put4stVector(dma_ar_bits_addr, dd); - dd[0].c = 0; - dd[0].d = simif_emul_t::dma->ar_size(); - vc_put4stVector(dma_ar_bits_size, dd); - dd[0].c = 0; - dd[0].d = simif_emul_t::dma->ar_len(); - vc_put4stVector(dma_ar_bits_len, dd); +#ifdef CPU_MANAGED_AXI4_PRESENT + vc_putScalar(cpu_managed_axi4_aw_valid, + simif_emul_t::cpu_managed_axi4->aw_valid()); + vc_putScalar(cpu_managed_axi4_ar_valid, + simif_emul_t::cpu_managed_axi4->ar_valid()); + vc_putScalar(cpu_managed_axi4_w_valid, + simif_emul_t::cpu_managed_axi4->w_valid()); + vc_putScalar(cpu_managed_axi4_w_bits_last, + simif_emul_t::cpu_managed_axi4->w_last()); + vc_putScalar(cpu_managed_axi4_r_ready, + simif_emul_t::cpu_managed_axi4->r_ready()); + vc_putScalar(cpu_managed_axi4_b_ready, + simif_emul_t::cpu_managed_axi4->b_ready()); - auto strb = simif_emul_t::dma->w_strb(); - for (size_t i = 0; i < DMA_STRB_SIZE; i++) { + vec32 dd[CPU_MANAGED_AXI4_DATA_SIZE]; + dd[0].c = 0; + dd[0].d = simif_emul_t::cpu_managed_axi4->aw_id(); + vc_put4stVector(cpu_managed_axi4_aw_bits_id, dd); + dd[0].c = 0; + dd[0].d = simif_emul_t::cpu_managed_axi4->aw_addr(); + dd[1].c = 0; + dd[1].d = simif_emul_t::cpu_managed_axi4->aw_addr() >> 32; + vc_put4stVector(cpu_managed_axi4_aw_bits_addr, dd); + dd[0].c = 0; + dd[0].d = simif_emul_t::cpu_managed_axi4->aw_size(); + vc_put4stVector(cpu_managed_axi4_aw_bits_size, dd); + dd[0].c = 0; + dd[0].d = simif_emul_t::cpu_managed_axi4->aw_len(); + vc_put4stVector(cpu_managed_axi4_aw_bits_len, dd); + dd[0].c = 0; + dd[0].d = simif_emul_t::cpu_managed_axi4->ar_id(); + vc_put4stVector(cpu_managed_axi4_ar_bits_id, dd); + dd[0].c = 0; + dd[0].d = simif_emul_t::cpu_managed_axi4->ar_addr(); + dd[1].c = 0; + dd[1].d = simif_emul_t::cpu_managed_axi4->ar_addr() >> 32; + vc_put4stVector(cpu_managed_axi4_ar_bits_addr, dd); + dd[0].c = 0; + dd[0].d = simif_emul_t::cpu_managed_axi4->ar_size(); + vc_put4stVector(cpu_managed_axi4_ar_bits_size, dd); + dd[0].c = 0; + dd[0].d = simif_emul_t::cpu_managed_axi4->ar_len(); + vc_put4stVector(cpu_managed_axi4_ar_bits_len, dd); + + auto strb = simif_emul_t::cpu_managed_axi4->w_strb(); + for (size_t i = 0; i < CPU_MANAGED_AXI4_STRB_SIZE; i++) { dd[i].c = 0; dd[i].d = ((uint32_t *)(&strb))[i]; } - vc_put4stVector(dma_w_bits_strb, dd); + vc_put4stVector(cpu_managed_axi4_w_bits_strb, dd); - for (size_t i = 0; i < DMA_DATA_SIZE; i++) { + for (size_t i = 0; i < CPU_MANAGED_AXI4_DATA_SIZE; i++) { dd[i].c = 0; - dd[i].d = ((uint32_t *)simif_emul_t::dma->w_data())[i]; + dd[i].d = ((uint32_t *)simif_emul_t::cpu_managed_axi4->w_data())[i]; } - vc_put4stVector(dma_w_bits_data, dd); + vc_put4stVector(cpu_managed_axi4_w_bits_data, dd); +#endif // CPU_MANAGED_AXI4_PRESENT + +#ifdef FPGA_MANAGED_AXI4_PRESENT + vc_putScalar(fpga_managed_axi4_aw_ready, simif_emul_t::cpu_mem->aw_ready()); + vc_putScalar(fpga_managed_axi4_ar_ready, simif_emul_t::cpu_mem->ar_ready()); + vc_putScalar(fpga_managed_axi4_w_ready, simif_emul_t::cpu_mem->w_ready()); + vc_putScalar(fpga_managed_axi4_b_valid, simif_emul_t::cpu_mem->b_valid()); + vc_putScalar(fpga_managed_axi4_r_valid, simif_emul_t::cpu_mem->r_valid()); + vc_putScalar(fpga_managed_axi4_r_bits_last, + simif_emul_t::cpu_mem->r_last()); + + vec32 fpga_managed_axi4d[FPGA_MANAGED_AXI4_DATA_SIZE]; + fpga_managed_axi4d[0].c = 0; + fpga_managed_axi4d[0].d = simif_emul_t::cpu_mem->b_resp(); + vc_put4stVector(fpga_managed_axi4_b_bits_resp, fpga_managed_axi4d); + fpga_managed_axi4d[0].c = 0; + fpga_managed_axi4d[0].d = simif_emul_t::cpu_mem->r_resp(); + vc_put4stVector(fpga_managed_axi4_r_bits_resp, fpga_managed_axi4d); + + for (size_t i = 0; i < FPGA_MANAGED_AXI4_DATA_SIZE; i++) { + fpga_managed_axi4d[i].c = 0; + fpga_managed_axi4d[i].d = + ((uint32_t *)simif_emul_t::cpu_mem->r_data())[i]; + } + vc_put4stVector(fpga_managed_axi4_r_bits_data, fpga_managed_axi4d); + + putScalarOrVector(fpga_managed_axi4_b_bits_id, + simif_emul_t::cpu_mem->b_id(), + FPGA_MANAGED_AXI4_ID_BITS); + putScalarOrVector(fpga_managed_axi4_r_bits_id, + simif_emul_t::cpu_mem->r_id(), + FPGA_MANAGED_AXI4_ID_BITS); +#endif // FPGA_MANAGED_AXI4_PRESENT #define MEMORY_CHANNEL_PROP(IDX) \ vc_putScalar(mem_##IDX##_aw_ready, simif_emul_t::slave[IDX]->aw_ready()); \ diff --git a/sim/midas/src/main/cc/emul/verilator-harness.cc b/sim/midas/src/main/cc/emul/verilator-harness.cc index 36005fdc..9786a101 100644 --- a/sim/midas/src/main/cc/emul/verilator-harness.cc +++ b/sim/midas/src/main/cc/emul/verilator-harness.cc @@ -1,6 +1,7 @@ #include "simif_emul.h" #include #include +#include #include #if VM_TRACE #include @@ -13,7 +14,7 @@ extern VerilatedVcdC *tfp; #endif // VM_TRACE void tick() { - assert(simif_emul_t::dma != nullptr); + assert(simif_emul_t::cpu_managed_axi4 != nullptr); assert(simif_emul_t::master != nullptr); // ASSUMPTION: All models have *no* combinational paths through I/O @@ -39,29 +40,61 @@ void tick() { memcpy( &top->ctrl_w_bits_data, simif_emul_t::master->w_data(), CTRL_BEAT_BYTES); - top->dma_aw_valid = simif_emul_t::dma->aw_valid(); - top->dma_aw_bits_id = simif_emul_t::dma->aw_id(); - top->dma_aw_bits_addr = simif_emul_t::dma->aw_addr(); - top->dma_aw_bits_size = simif_emul_t::dma->aw_size(); - top->dma_aw_bits_len = simif_emul_t::dma->aw_len(); +#ifdef CPU_MANAGED_AXI4_PRESENT + top->cpu_managed_axi4_aw_valid = simif_emul_t::cpu_managed_axi4->aw_valid(); + top->cpu_managed_axi4_aw_bits_id = simif_emul_t::cpu_managed_axi4->aw_id(); + top->cpu_managed_axi4_aw_bits_addr = + simif_emul_t::cpu_managed_axi4->aw_addr(); + top->cpu_managed_axi4_aw_bits_size = + simif_emul_t::cpu_managed_axi4->aw_size(); + top->cpu_managed_axi4_aw_bits_len = simif_emul_t::cpu_managed_axi4->aw_len(); - top->dma_ar_valid = simif_emul_t::dma->ar_valid(); - top->dma_ar_bits_id = simif_emul_t::dma->ar_id(); - top->dma_ar_bits_addr = simif_emul_t::dma->ar_addr(); - top->dma_ar_bits_size = simif_emul_t::dma->ar_size(); - top->dma_ar_bits_len = simif_emul_t::dma->ar_len(); + top->cpu_managed_axi4_ar_valid = simif_emul_t::cpu_managed_axi4->ar_valid(); + top->cpu_managed_axi4_ar_bits_id = simif_emul_t::cpu_managed_axi4->ar_id(); + top->cpu_managed_axi4_ar_bits_addr = + simif_emul_t::cpu_managed_axi4->ar_addr(); + top->cpu_managed_axi4_ar_bits_size = + simif_emul_t::cpu_managed_axi4->ar_size(); + top->cpu_managed_axi4_ar_bits_len = simif_emul_t::cpu_managed_axi4->ar_len(); - top->dma_w_valid = simif_emul_t::dma->w_valid(); - top->dma_w_bits_strb = simif_emul_t::dma->w_strb(); - top->dma_w_bits_last = simif_emul_t::dma->w_last(); + top->cpu_managed_axi4_w_valid = simif_emul_t::cpu_managed_axi4->w_valid(); + top->cpu_managed_axi4_w_bits_strb = simif_emul_t::cpu_managed_axi4->w_strb(); + top->cpu_managed_axi4_w_bits_last = simif_emul_t::cpu_managed_axi4->w_last(); - top->dma_r_ready = simif_emul_t::dma->r_ready(); - top->dma_b_ready = simif_emul_t::dma->b_ready(); -#if DMA_DATA_BITS > 64 - memcpy(top->dma_w_bits_data, simif_emul_t::dma->w_data(), DMA_BEAT_BYTES); + top->cpu_managed_axi4_r_ready = simif_emul_t::cpu_managed_axi4->r_ready(); + top->cpu_managed_axi4_b_ready = simif_emul_t::cpu_managed_axi4->b_ready(); +#if CPU_MANAGED_AXI4_DATA_BITS > 64 + memcpy(top->cpu_managed_axi4_w_bits_data, + simif_emul_t::cpu_managed_axi4->w_data(), + CPU_MANAGED_AXI4_BEAT_BYTES); #else - memcpy(&top->dma_w_bits_data, simif_emul_t::dma->w_data(), DMA_BEAT_BYTES); + memcpy(&top->cpu_managed_axi4_w_bits_data, + simif_emul_t::cpu_managed_axi4->w_data(), + CPU_MANAGED_AXI4_BEAT_BYTES); #endif +#endif // CPU_MANAGED_AXI4_PRESENT + +#ifdef FPGA_MANAGED_AXI4_PRESENT + top->fpga_managed_axi4_aw_ready = simif_emul_t::cpu_mem->aw_ready(); + top->fpga_managed_axi4_ar_ready = simif_emul_t::cpu_mem->ar_ready(); + top->fpga_managed_axi4_w_ready = simif_emul_t::cpu_mem->w_ready(); + top->fpga_managed_axi4_b_valid = simif_emul_t::cpu_mem->b_valid(); + top->fpga_managed_axi4_b_bits_id = simif_emul_t::cpu_mem->b_id(); + top->fpga_managed_axi4_b_bits_resp = simif_emul_t::cpu_mem->b_resp(); + top->fpga_managed_axi4_r_valid = simif_emul_t::cpu_mem->r_valid(); + top->fpga_managed_axi4_r_bits_id = simif_emul_t::cpu_mem->r_id(); + top->fpga_managed_axi4_r_bits_resp = simif_emul_t::cpu_mem->r_resp(); + top->fpga_managed_axi4_r_bits_last = simif_emul_t::cpu_mem->r_last(); +#if MEM_DATA_BITS > 64 + memcpy(top->fpga_managed_axi4_r_bits_data, + simif_emul_t::cpu_mem->r_data(), + FPGA_MANAGED_AXI4_DATA_BITS / 8); +#else + memcpy(&top->fpga_managed_axi4_r_bits_data, + simif_emul_t::cpu_mem->r_data(), + FPGA_MANAGED_AXI4_DATA_BITS / 8); +#endif +#endif // FPGA_MANAGED_AXI4_PRESENT top->mem_0_aw_ready = simif_emul_t::slave[0]->aw_ready(); top->mem_0_ar_ready = simif_emul_t::slave[0]->ar_ready(); @@ -171,16 +204,49 @@ void tick() { top->ctrl_b_bits_id, top->ctrl_b_valid); - simif_emul_t::dma->tick(top->reset, - top->dma_ar_ready, - top->dma_aw_ready, - top->dma_w_ready, - top->dma_r_bits_id, - &top->dma_r_bits_data, - top->dma_r_bits_last, - top->dma_r_valid, - top->dma_b_bits_id, - top->dma_b_valid); +#ifdef CPU_MANAGED_AXI4_PRESENT + simif_emul_t::cpu_managed_axi4->tick(top->reset, + top->cpu_managed_axi4_ar_ready, + top->cpu_managed_axi4_aw_ready, + top->cpu_managed_axi4_w_ready, + top->cpu_managed_axi4_r_bits_id, + &top->cpu_managed_axi4_r_bits_data, + top->cpu_managed_axi4_r_bits_last, + top->cpu_managed_axi4_r_valid, + top->cpu_managed_axi4_b_bits_id, + top->cpu_managed_axi4_b_valid); +#endif // CPU_MANAGED_AXI4_PRESENT + +#ifdef FPGA_MANAGED_AXI4_PRESENT + simif_emul_t::cpu_mem->tick(top->reset, + top->fpga_managed_axi4_ar_valid, + top->fpga_managed_axi4_ar_bits_addr, + top->fpga_managed_axi4_ar_bits_id, + top->fpga_managed_axi4_ar_bits_size, + top->fpga_managed_axi4_ar_bits_len, + + top->fpga_managed_axi4_aw_valid, + top->fpga_managed_axi4_aw_bits_addr, + top->fpga_managed_axi4_aw_bits_id, + top->fpga_managed_axi4_aw_bits_size, + top->fpga_managed_axi4_aw_bits_len, + + top->fpga_managed_axi4_w_valid, +#if FPGA_MANAGED_AXI4_STRB_BITS > 64 + &top->fpga_managed_axi4_w_bits_strb, +#else + top->fpga_managed_axi4_w_bits_strb, +#endif +#if FPGA_MANAGED_AXI4_DATA_BITS > 64 + &top->fpga_managed_axi4_w_bits_data, +#else + top->fpga_managed_axi4_w_bits_data, +#endif + top->fpga_managed_axi4_w_bits_last, + + top->fpga_managed_axi4_r_ready, + top->fpga_managed_axi4_b_ready); +#endif // FPGA_MANAGED_AXI4_PRESENT simif_emul_t::slave[0]->tick(top->reset, top->mem_0_ar_valid, diff --git a/sim/midas/src/main/cc/simif_emul.cc b/sim/midas/src/main/cc/simif_emul.cc index 7dbddc21..8fd6932b 100644 --- a/sim/midas/src/main/cc/simif_emul.cc +++ b/sim/midas/src/main/cc/simif_emul.cc @@ -47,10 +47,21 @@ void handle_sigterm(int sig) { finish(); } simif_emul_t::simif_emul_t() { +#ifdef FPGA_MANAGED_AXI4_PRESENT + // The final parameter, line size, is not used under mm_magic_t + cpu_mem->init((1ULL << FPGA_MANAGED_AXI4_ADDR_BITS), + FPGA_MANAGED_AXI4_DATA_BITS / 8, + 512); +#endif + using namespace std::placeholders; auto mmio_read_func = std::bind(&simif_emul_t::read, this, _1); - auto pcis_read_func = std::bind(&simif_emul_t::pcis_read, this, _1, _2, _3); - auto pcis_write_func = std::bind(&simif_emul_t::pcis_write, this, _1, _2, _3); + +#ifdef CPUMANAGEDSTREAMENGINE_0_PRESENT + auto cpu_managed_axi4_read_func = + std::bind(&simif_emul_t::cpu_managed_axi4_read, this, _1, _2, _3); + auto cpu_managed_axi4_write_func = + std::bind(&simif_emul_t::cpu_managed_axi4_write, this, _1, _2, _3); for (size_t i = 0; i < CPUMANAGEDSTREAMENGINE_0_from_cpu_stream_count; i++) { auto params = CPUManagedStreamParameters( @@ -60,7 +71,7 @@ simif_emul_t::simif_emul_t() { CPUMANAGEDSTREAMENGINE_0_from_cpu_buffer_sizes[i]); from_host_streams.push_back( - StreamFromCPU(params, mmio_read_func, pcis_write_func)); + StreamFromCPU(params, mmio_read_func, cpu_managed_axi4_write_func)); } for (size_t i = 0; i < CPUMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) { @@ -71,8 +82,9 @@ simif_emul_t::simif_emul_t() { CPUMANAGEDSTREAMENGINE_0_to_cpu_buffer_sizes[i]); to_host_streams.push_back( - StreamToCPU(params, mmio_read_func, pcis_read_func)); + StreamToCPU(params, mmio_read_func, cpu_managed_axi4_read_func)); } +#endif // CPUMANAGEDSTREAMENGINE_0_PRESENT } simif_emul_t::~simif_emul_t(){}; @@ -202,32 +214,35 @@ size_t simif_emul_t::push(unsigned stream_idx, src, num_bytes, threshold_bytes); } -size_t simif_emul_t::pcis_read(size_t addr, char *data, size_t size) { - ssize_t len = (size - 1) / DMA_BEAT_BYTES; +size_t +simif_emul_t::cpu_managed_axi4_read(size_t addr, char *data, size_t size) { + ssize_t len = (size - 1) / CPU_MANAGED_AXI4_BEAT_BYTES; while (len >= 0) { size_t part_len = len % (MAX_LEN + 1); - dma->read_req(addr, DMA_SIZE, part_len); - wait_read(dma, data); + cpu_managed_axi4->read_req( + addr, log2(CPU_MANAGED_AXI4_BEAT_BYTES), part_len); + wait_read(cpu_managed_axi4, data); len -= (part_len + 1); - addr += (part_len + 1) * DMA_BEAT_BYTES; - data += (part_len + 1) * DMA_BEAT_BYTES; + addr += (part_len + 1) * CPU_MANAGED_AXI4_BEAT_BYTES; + data += (part_len + 1) * CPU_MANAGED_AXI4_BEAT_BYTES; } return size; } -size_t simif_emul_t::pcis_write(size_t addr, char *data, size_t size) { - ssize_t len = (size - 1) / DMA_BEAT_BYTES; - size_t remaining = size - len * DMA_BEAT_BYTES; +size_t +simif_emul_t::cpu_managed_axi4_write(size_t addr, char *data, size_t size) { + ssize_t len = (size - 1) / CPU_MANAGED_AXI4_BEAT_BYTES; + size_t remaining = size - len * CPU_MANAGED_AXI4_BEAT_BYTES; size_t strb[len + 1]; size_t *strb_ptr = &strb[0]; for (int i = 0; i < len; i++) - strb[i] = (1LL << DMA_BEAT_BYTES) - 1; + strb[i] = (1LL << CPU_MANAGED_AXI4_BEAT_BYTES) - 1; - if (remaining == DMA_BEAT_BYTES) + if (remaining == CPU_MANAGED_AXI4_BEAT_BYTES) strb[len] = strb[0]; else strb[len] = (1LL << remaining) - 1; @@ -235,12 +250,13 @@ size_t simif_emul_t::pcis_write(size_t addr, char *data, size_t size) { while (len >= 0) { size_t part_len = len % (MAX_LEN + 1); - dma->write_req(addr, DMA_SIZE, part_len, data, strb_ptr); - wait_write(dma); + cpu_managed_axi4->write_req( + addr, log2(CPU_MANAGED_AXI4_BEAT_BYTES), part_len, data, strb_ptr); + wait_write(cpu_managed_axi4); len -= (part_len + 1); - addr += (part_len + 1) * DMA_BEAT_BYTES; - data += (part_len + 1) * DMA_BEAT_BYTES; + addr += (part_len + 1) * CPU_MANAGED_AXI4_BEAT_BYTES; + data += (part_len + 1) * CPU_MANAGED_AXI4_BEAT_BYTES; strb_ptr += (part_len + 1); } diff --git a/sim/midas/src/main/cc/simif_emul.h b/sim/midas/src/main/cc/simif_emul.h index c92fc701..5e42232c 100644 --- a/sim/midas/src/main/cc/simif_emul.h +++ b/sim/midas/src/main/cc/simif_emul.h @@ -37,7 +37,8 @@ public: * These have external linkage to enable VCS to easily access them. */ inline static mmio_t *master = new mmio_t(CTRL_BEAT_BYTES); - inline static mmio_t *dma = new mmio_t(DMA_BEAT_BYTES); + inline static mmio_t *cpu_managed_axi4 = + new mmio_t(CPU_MANAGED_AXI4_BEAT_BYTES); /** * @brief Host DRAM models shared across the RTL simulator and driver * contexts. @@ -48,19 +49,27 @@ public: * simif_emul_t::load_mems. */ inline static mm_t *slave[MEM_NUM_CHANNELS] = {nullptr}; + /** + * @brief A model of FPGA-addressable CPU-host memory. + * + * In metasimulations, FPGA-managed AXI4 transactions read and write to this + * AXI4 memory subordinate as a proxy for writing into actual host-CPU DRAM. + * The driver-side of FPGAManagedStreams inspect circular buffers hosted here. + */ + inline static mm_t *cpu_mem = new mm_magic_t; private: // The maximum number of cycles the RTL simulator can advance before // switching back to the driver process. +fuzz-host-timings sets this to a - // value > 1, introducing random delays in MMIO (read, write) and DMA (push, - // pull) requests + // value > 1, introducing random delays in axi4 tranactions that MMIO and + // bridge streams. int maximum_host_delay = 1; void advance_target(); void wait_read(mmio_t *mmio, void *data); void wait_write(mmio_t *mmio); - size_t pcis_write(size_t addr, char *data, size_t size); - size_t pcis_read(size_t addr, char *data, size_t size); + size_t cpu_managed_axi4_write(size_t addr, char *data, size_t size); + size_t cpu_managed_axi4_read(size_t addr, char *data, size_t size); // Writes directly into the host DRAM models to initialize them. void load_mems(const char *fname); diff --git a/sim/midas/src/main/cc/simif_f1.cc b/sim/midas/src/main/cc/simif_f1.cc index 76d3aefe..75e5cf6d 100644 --- a/sim/midas/src/main/cc/simif_f1.cc +++ b/sim/midas/src/main/cc/simif_f1.cc @@ -30,8 +30,10 @@ simif_f1_t::simif_f1_t(int argc, char **argv) { using namespace std::placeholders; auto mmio_read_func = std::bind(&simif_f1_t::read, this, _1); - auto pcis_read_func = std::bind(&simif_f1_t::pcis_read, this, _1, _2, _3); - auto pcis_write_func = std::bind(&simif_f1_t::pcis_write, this, _1, _2, _3); + auto cpu_managed_axi4_read_func = + std::bind(&simif_f1_t::cpu_managed_axi4_read, this, _1, _2, _3); + auto cpu_managed_axi4_write_func = + std::bind(&simif_f1_t::cpu_managed_axi4_write, this, _1, _2, _3); for (int i = 0; i < CPUMANAGEDSTREAMENGINE_0_from_cpu_stream_count; i++) { auto params = CPUManagedStreamParameters( @@ -41,7 +43,7 @@ simif_f1_t::simif_f1_t(int argc, char **argv) { CPUMANAGEDSTREAMENGINE_0_from_cpu_buffer_sizes[i]); from_host_streams.push_back( - StreamFromCPU(params, mmio_read_func, pcis_write_func)); + StreamFromCPU(params, mmio_read_func, cpu_managed_axi4_write_func)); } for (int i = 0; i < CPUMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) { @@ -52,7 +54,7 @@ simif_f1_t::simif_f1_t(int argc, char **argv) { CPUMANAGEDSTREAMENGINE_0_to_cpu_buffer_sizes[i]); to_host_streams.push_back( - StreamToCPU(params, mmio_read_func, pcis_read_func)); + StreamToCPU(params, mmio_read_func, cpu_managed_axi4_read_func)); } } @@ -227,7 +229,7 @@ uint32_t simif_f1_t::read(size_t addr) { #endif } -size_t simif_f1_t::pcis_read(size_t addr, char *data, size_t size) { +size_t simif_f1_t::cpu_managed_axi4_read(size_t addr, char *data, size_t size) { #ifdef SIMULATION_XSIM assert(false); // PCIS is unsupported in FPGA-level metasimulation #else @@ -235,7 +237,8 @@ size_t simif_f1_t::pcis_read(size_t addr, char *data, size_t size) { #endif } -size_t simif_f1_t::pcis_write(size_t addr, char *data, size_t size) { +size_t +simif_f1_t::cpu_managed_axi4_write(size_t addr, char *data, size_t size) { #ifdef SIMULATION_XSIM assert(false); // PCIS is unsupported in FPGA-level metasimulation #else diff --git a/sim/midas/src/main/cc/simif_f1.h b/sim/midas/src/main/cc/simif_f1.h index 86d8f19c..d0e832c0 100644 --- a/sim/midas/src/main/cc/simif_f1.h +++ b/sim/midas/src/main/cc/simif_f1.h @@ -41,8 +41,8 @@ private: std::vector to_host_streams; std::vector from_host_streams; - size_t pcis_write(size_t addr, char *data, size_t size); - size_t pcis_read(size_t addr, char *data, size_t size); + size_t cpu_managed_axi4_write(size_t addr, char *data, size_t size); + size_t cpu_managed_axi4_read(size_t addr, char *data, size_t size); #ifdef SIMULATION_XSIM char *driver_to_xsim = "/tmp/driver_to_xsim"; diff --git a/sim/midas/src/main/scala/midas/Config.scala b/sim/midas/src/main/scala/midas/Config.scala index 4a00f595..1a079e6e 100644 --- a/sim/midas/src/main/scala/midas/Config.scala +++ b/sim/midas/src/main/scala/midas/Config.scala @@ -10,6 +10,7 @@ import firrtl.stage.TransformManager.TransformDependency import junctions.{NastiKey, NastiParameters} import freechips.rocketchip.config.{Parameters, Config, Field} import freechips.rocketchip.unittest.UnitTests +import freechips.rocketchip.diplomacy.{TransferSizes} import java.io.{File} @@ -79,9 +80,9 @@ class WithoutTLMonitors extends freechips.rocketchip.subsystem.WithoutTLMonitors class SimConfig extends Config (new Config((site, here, up) => { case SynthAsserts => false case SynthPrints => false - case DMANastiKey => NastiParameters(512, 64, 6) case AXIDebugPrint => false - + // TODO remove + case HasDMAChannel => site(CPUManagedAXI4Key).nonEmpty // Remove once AXI4 port is complete case MemNastiKey => { NastiParameters( @@ -94,6 +95,13 @@ class SimConfig extends Config (new Config((site, here, up) => { class F1Config extends Config(new Config((site, here, up) => { case Platform => (p: Parameters) => new F1Shim()(p) case HasDMAChannel => true + case StreamEngineInstantiatorKey => (e: StreamEngineParameters, p: Parameters) => new CPUManagedStreamEngine(p, e) + case CPUManagedAXI4Key => Some(CPUManagedAXI4Params( + addrBits = 64, + dataBits = 512, + idBits = 6, + )) + case FPGAManagedAXI4Key => None case CtrlNastiKey => NastiParameters(32, 25, 12) case HostMemChannelKey => HostMemChannelParams( size = 0x400000000L, // 16 GiB @@ -104,13 +112,33 @@ class F1Config extends Config(new Config((site, here, up) => { class VitisConfig extends Config(new Config((site, here, up) => { case Platform => (p: Parameters) => new VitisShim()(p) - case HasDMAChannel => false - // ID Width = 1 to avoid any potential zero-width wire issues. + case CPUManagedAXI4Key => None + case FPGAManagedAXI4Key => + val dataBits = 512 + Some(FPGAManagedAXI4Params( + // This value was chosen arbitrarily. Vitis makes it natural to + // request multiples of 1 GiB, and we may wish to expand this as after some + // performance analysis. + size = 4096 * 1024, + dataBits = dataBits, + // This was chosen to match the AXI4 recommendations and could change. + idBits = 4, + // Don't support narrow reads/writes, and cap at a page per the AXI5 spec + writeTransferSizes = TransferSizes(dataBits / 8, 4096), + readTransferSizes = TransferSizes(dataBits / 8, 4096) + )) + case StreamEngineInstantiatorKey => (e: StreamEngineParameters, p: Parameters) => new FPGAManagedStreamEngine(p, e) + // Notes on width selection for the control bus + // Address: This needs further investigation. 12 may not be sufficient when using many auto counters + // ID: AXI4Lite does not use ID bits. Use one here since Nasti (which + // lacks a native AXI4LITE implementation) can't handle 0-width wires. case CtrlNastiKey => NastiParameters(32, 12, 1) case HostMemChannelKey => HostMemChannelParams( size = 0x400000000L, // 16 GiB beatBytes = 8, idBits = 16) + // This could be as many as four on a U250, but support for the other + // channels requires adding address offsets in the shim (TODO). case HostMemNumChannels => 1 }) ++ new SimConfig) diff --git a/sim/midas/src/main/scala/midas/core/CPUManagedStreamEngine.scala b/sim/midas/src/main/scala/midas/core/CPUManagedStreamEngine.scala index 2c98c166..62c3d87b 100644 --- a/sim/midas/src/main/scala/midas/core/CPUManagedStreamEngine.scala +++ b/sim/midas/src/main/scala/midas/core/CPUManagedStreamEngine.scala @@ -27,43 +27,46 @@ case class StreamDriverParameters( class CPUManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) extends StreamEngine(p) { - val dmaBytes = p(DMANastiKey).dataBits / 8 - val pcisNodeOpt = Some(AXI4SlaveNode( + val cpuManagedAXI4params = p(CPUManagedAXI4Key).get + require(BridgeStreamConstants.streamWidthBits == cpuManagedAXI4params.dataBits, + s"CPU-managed AXI4 IF data width must match the stream width: ${BridgeStreamConstants.streamWidthBits}.") + + val beatBytes = cpuManagedAXI4params.dataBits / 8 + + val cpuManagedAXI4NodeOpt = Some(AXI4SlaveNode( Seq(AXI4SlavePortParameters( slaves = Seq(AXI4SlaveParameters( - address = Seq(AddressSet(0, (BigInt(1) << p(DMANastiKey).dataBits) - 1)), + address = Seq(AddressSet(0, (BigInt(1) << cpuManagedAXI4params.addrBits) - 1)), resources = (new MemoryDevice).reg, regionType = RegionType.UNCACHED, // cacheable executable = false, - supportsWrite = TransferSizes(dmaBytes, 4096), - supportsRead = TransferSizes(dmaBytes, 4096), + supportsWrite = TransferSizes(beatBytes, 4096), + supportsRead = TransferSizes(beatBytes, 4096), interleavedId = Some(0))), // slave does not interleave read responses - beatBytes = dmaBytes) + beatBytes = beatBytes) )) ) - //require(BridgeStreamConstants.streamWidthBits == p(DMANastiKey).dataBits, - // s"CPU-mastered AXI4 IF data width must match the stream width ${BridgeStreamConstants.streamWidthBits}".) - val pcimNodeOpt = None + val fpgaManagedAXI4NodeOpt = None lazy val module = new WidgetImp(this) { val io = IO(new WidgetIO) - val dma = pcisNodeOpt.get.in.head._1 + val axi4 = cpuManagedAXI4NodeOpt.get.in.head._1 // FromHostCPU streams are implemented using the AW, W, B channels, which // write into large BRAM FIFOs for each stream. - assert(!dma.aw.valid || dma.aw.bits.size === log2Ceil(dmaBytes).U) - assert(!dma.w.valid || dma.w.bits.strb === ~0.U(dmaBytes.W)) + assert(!axi4.aw.valid || axi4.aw.bits.size === log2Ceil(beatBytes).U) + assert(!axi4.w.valid || axi4.w.bits.strb === ~0.U(beatBytes.W)) - dma.b.bits.resp := 0.U(2.W) - dma.b.bits.id := dma.aw.bits.id - dma.b.bits.user := dma.aw.bits.user + axi4.b.bits.resp := 0.U(2.W) + axi4.b.bits.id := axi4.aw.bits.id + axi4.b.bits.user := axi4.aw.bits.user // This will be set by the channel given the grant using last connect semantics - dma.b.valid := false.B - dma.aw.ready := false.B - dma.w.ready := false.B + axi4.b.valid := false.B + axi4.aw.ready := false.B + axi4.w.ready := false.B // TODO: Chisel naming prefix to indicate what channel this hw belongs to. @@ -75,39 +78,39 @@ class CPUManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) addressSpaceBits: Int): StreamDriverParameters = prefix(chParams.name) { val streamName = chParams.name - val grant = (dma.aw.bits.addr >> addressSpaceBits) === idx.U + val grant = (axi4.aw.bits.addr >> addressSpaceBits) === idx.U val incomingQueue = Module(new BRAMQueue(chParams.fpgaBufferDepth)(UInt(BridgeStreamConstants.streamWidthBits.W))) xdc.RAMStyleHint(incomingQueue.fq.ram, xdc.RAMStyles.ULTRA) channel <> incomingQueue.io.deq - // check to see if pcis is ready to accept data instead of forcing writes + // check to see if axi4 is ready to accept data instead of forcing writes val countAddr = attach(incomingQueue.io.count, s"${chParams.name}_count", ReadOnly) val writeHelper = DecoupledHelper( - dma.aw.valid, - dma.w.valid, - dma.b.ready, + axi4.aw.valid, + axi4.w.valid, + axi4.b.ready, incomingQueue.io.enq.ready ) // TODO: Get rid of this magic number. val writeBeatCounter = RegInit(0.U(9.W)) - val lastWriteBeat = writeBeatCounter === dma.aw.bits.len - when (grant && dma.w.fire) { + val lastWriteBeat = writeBeatCounter === axi4.aw.bits.len + when (grant && axi4.w.fire) { writeBeatCounter := Mux(lastWriteBeat, 0.U, writeBeatCounter + 1.U) } when (grant) { - dma.w.ready := writeHelper.fire(dma.w.valid) - dma.aw.ready := writeHelper.fire(dma.aw.valid, lastWriteBeat) - dma.b.valid := writeHelper.fire(dma.b.ready, lastWriteBeat) + axi4.w.ready := writeHelper.fire(axi4.w.valid) + axi4.aw.ready := writeHelper.fire(axi4.aw.valid, lastWriteBeat) + axi4.b.valid := writeHelper.fire(axi4.b.ready, lastWriteBeat) } incomingQueue.io.enq.valid := grant && writeHelper.fire(incomingQueue.io.enq.ready) - incomingQueue.io.enq.bits := dma.w.bits.data + incomingQueue.io.enq.bits := axi4.w.bits.data StreamDriverParameters( chParams.name, @@ -117,13 +120,13 @@ class CPUManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) ) } - assert(!dma.ar.valid || dma.ar.bits.size === log2Ceil(dmaBytes).U) + assert(!axi4.ar.valid || axi4.ar.bits.size === log2Ceil(beatBytes).U) - dma.r.bits.resp := 0.U(2.W) - dma.r.bits.id := dma.ar.bits.id - dma.r.bits.user := dma.ar.bits.user - dma.r.valid := false.B - dma.ar.ready := false.B + axi4.r.bits.resp := 0.U(2.W) + axi4.r.bits.id := axi4.ar.bits.id + axi4.r.bits.user := axi4.ar.bits.user + axi4.r.valid := false.B + axi4.ar.ready := false.B // This demultiplexes the AW, W, and B channels onto the decoupled ports representing each stream. def elaborateToHostCPUStream( @@ -132,36 +135,36 @@ class CPUManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) idx: Int, addressSpaceBits: Int): StreamDriverParameters = prefix(chParams.name) { - val grant = (dma.ar.bits.addr >> addressSpaceBits) === idx.U + val grant = (axi4.ar.bits.addr >> addressSpaceBits) === idx.U val outgoingQueue = Module(new BRAMQueue(chParams.fpgaBufferDepth)(UInt(BridgeStreamConstants.streamWidthBits.W))) xdc.RAMStyleHint(outgoingQueue.fq.ram, xdc.RAMStyles.ULTRA) outgoingQueue.io.enq <> channel - // check to see if pcis has valid output instead of waiting for timeouts + // check to see if axi4 has valid output instead of waiting for timeouts val countAddr = attach(outgoingQueue.io.count, s"${chParams.name}_count", ReadOnly) val readHelper = DecoupledHelper( - dma.ar.valid, - dma.r.ready, + axi4.ar.valid, + axi4.r.ready, outgoingQueue.io.deq.valid ) val readBeatCounter = RegInit(0.U(9.W)) - val lastReadBeat = readBeatCounter === dma.ar.bits.len - when (dma.r.fire) { + val lastReadBeat = readBeatCounter === axi4.ar.bits.len + when (axi4.r.fire) { readBeatCounter := Mux(lastReadBeat, 0.U, readBeatCounter + 1.U) } outgoingQueue.io.deq.ready := grant && readHelper.fire(outgoingQueue.io.deq.valid) when (grant) { - dma.r.valid := readHelper.fire(dma.r.ready) - dma.r.bits.data := outgoingQueue.io.deq.bits - dma.r.bits.last := lastReadBeat - dma.ar.ready := readHelper.fire(dma.ar.valid, lastReadBeat) + axi4.r.valid := readHelper.fire(axi4.r.ready) + axi4.r.bits.data := outgoingQueue.io.deq.bits + axi4.r.bits.last := lastReadBeat + axi4.ar.ready := readHelper.fire(axi4.ar.valid, lastReadBeat) } StreamDriverParameters( chParams.name, @@ -182,14 +185,10 @@ class CPUManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) // burst type (which is semantically consistent with draining or filling a queue). // // However, since large DMA transactions initiated by the driver are - // fractured into multiple, smaller AXI4 transactions on the PCIS - // interface*, it is simplest to maintain the illusion that each stream is - // granted an address range at least as large as the largest DMA access. - // - // * On EC2 F1, and likely all XDMA-based systems, requests larger than a - // 4K page are fractured into 4K or smaller transactions. - // treats them as "FIXED" type bursts - def streamASBits = log2Ceil(dmaBytes * streamParameters.map(_.fpgaBufferDepth).max) + // fractured into multiple, smaller AXI4 transactions (<= 4K in size), it + // is simplest to maintain the illusion that each stream is granted an + // address range at least as large as the largest DMA access. + def streamASBits = log2Ceil(beatBytes * streamParameters.map(_.fpgaBufferDepth).max) for (((port, params), idx) <- streamPorts.zip(streamParameters).zipWithIndex) yield { elaborator(port, params, idx, streamASBits) diff --git a/sim/midas/src/main/scala/midas/core/FPGAManagedStreamEngine.scala b/sim/midas/src/main/scala/midas/core/FPGAManagedStreamEngine.scala index e7db7f04..a98fc9f6 100644 --- a/sim/midas/src/main/scala/midas/core/FPGAManagedStreamEngine.scala +++ b/sim/midas/src/main/scala/midas/core/FPGAManagedStreamEngine.scala @@ -14,8 +14,8 @@ import midas.widgets._ * This is a stub to foreshadow the other implementation */ class FPGAManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) extends StreamEngine(p) { - val pcisNodeOpt = None - val pcimNodeOpt = Some(AXI4IdentityNode()) + val cpuManagedAXI4NodeOpt = None + val fpgaManagedAXI4NodeOpt = Some(midas.widgets.AXI4TieOff()(p)) lazy val module = new WidgetImp(this) { val io = IO(new WidgetIO) diff --git a/sim/midas/src/main/scala/midas/core/FPGATop.scala b/sim/midas/src/main/scala/midas/core/FPGATop.scala index 0edb4287..c7c8fac1 100644 --- a/sim/midas/src/main/scala/midas/core/FPGATop.scala +++ b/sim/midas/src/main/scala/midas/core/FPGATop.scala @@ -18,12 +18,24 @@ import scala.collection.immutable.ListMap import scala.collection.mutable /** - * The following case objects define the widths of the three AXI4 bus types presented - * to a simulator. + * The following [[Field]]s capture the parameters of the four AXI4 bus types + * presented to a simulator (in [[FPGATop]]). A [[PlatformShim]] is free to + * adapt these widths, apply address offsets, etc..., but the values set here + * define what is used in metasimulation, since it treats + * [[FPGATop]] as the root of the module hierarchy. */ -// The AXI4 key for the DMA bus -case object DMANastiKey extends Field[NastiParameters] +/** CPU-managed AXI4, aka "pcis" on EC2 F1. Used by the CPU to do DMA into fabric-controlled memories. + * This could include in-fabric RAMs/FIFOs (for bridge streams) or (in the future) FPGA-attached DRAM channels. + */ +case object CPUManagedAXI4Key extends Field[Option[CPUManagedAXI4Params]] + +/** FPGA-managed AXI4, aka "pcim" on F1. Used by the fabric to do DMA into + * the host-CPU's memory. Used to implement bridge streams on platforms that lack a CPU-managed AXI4 interface. + * Set this to None if this interface is not present on the host. + */ +case object FPGAManagedAXI4Key extends Field[Option[FPGAManagedAXI4Params]] + // The AXI4 widths for a single host-DRAM channel case object HostMemChannelKey extends Field[HostMemChannelParams] // The number of host-DRAM channels -> all channels must have the same AXI4 widths @@ -55,10 +67,6 @@ case class AXI4IdSpaceConstraint(idBits: Int = 4, maxFlight: Int = 8) // Legacy: the aggregate memory-space seen by masters wanting DRAM. Derived from HostMemChannelKey case object MemNastiKey extends Field[NastiParameters] -class FPGATopIO(implicit val p: Parameters) extends WidgetIO { - val dma = Flipped(new NastiIO()(p alterPartial ({ case NastiKey => p(DMANastiKey) }))) -} - /** Specifies the size and width of external memory ports */ case class HostMemChannelParams( size: BigInt, @@ -71,6 +79,49 @@ case class HostMemChannelParams( idBits = idBits) } +/** + * Specifies the AXI4 interface for FPGA-driven DMA + * + * @param size The size, in bytes, of the addressable region on the host CPU. + * The addressable region is assumed to span [0, size). Host-specific offsets + * should be handled by the FPGAShim. + * @param dataBits The width of the interface in bits. + * @param idBits The number of ID bits supported by the interface. + * @param writeTransferSizes Supported write transfer sizes in bytes + * @param readTransferSizes Supported read transfer sizes in bytes + * @param interleavedId Set to indicate DMA responses may be interleaved. + */ +case class FPGAManagedAXI4Params( + size: BigInt, + dataBits: Int, + idBits: Int, + writeTransferSizes: TransferSizes, + readTransferSizes: TransferSizes, + interleavedId: Option[Int] = Some(0), + ) { + require(interleavedId == Some(0), "IdDeinterleaver not currently instantiated in FPGATop") + require((isPow2(size)) && (size % 4096 == 0), + "The size of the FPGA-managed DMA regions must be a power of 2, and larger than a page.") + + def axi4BundleParams = AXI4BundleParameters( + addrBits = log2Ceil(size), + dataBits = dataBits, + idBits = idBits, + ) +} + +case class CPUManagedAXI4Params( + addrBits: Int, + dataBits: Int, + idBits: Int, + maxFlight: Option[Int] = None, + ) { + def axi4BundleParams = AXI4BundleParameters( + addrBits = addrBits, + dataBits = dataBits, + idBits = idBits, + ) +} // Platform agnostic wrapper of the simulation models for FPGA class FPGATop(implicit p: Parameters) extends LazyModule with HasWidgets { @@ -225,32 +276,54 @@ class FPGATop(implicit p: Parameters) extends LazyModule with HasWidgets { val toCPUStreamParams = bridgesWithToHostCPUStreams.map { _.streamSourceParams } val fromCPUStreamParams = bridgesWithFromHostCPUStreams.map { _.streamSinkParams } - val pcisAXI4BundleParams = AXI4BundleParameters( - addrBits = p(DMANastiKey).addrBits, - dataBits = p(DMANastiKey).dataBits, - idBits = p(DMANastiKey).idBits) // Dubious... - - val pcisNode = AXI4MasterNode( - Seq(AXI4MasterPortParameters( - masters = Seq(AXI4MasterParameters( - name = "cpu-mastered-axi4", - id = IdRange(0, 1 << p(DMANastiKey).idBits), - aligned = false, - maxFlight = None, // None = infinite, else is a per-ID cap - )) - ) - ) - ) - val streamingEngine = addWidget(p(StreamEngineInstantiatorKey)( StreamEngineParameters(toCPUStreamParams.toSeq, fromCPUStreamParams.toSeq), p) ) - streamingEngine.pcisNodeOpt.foreach { - _ := AXI4Buffer() := pcisNode + require(streamingEngine.fpgaManagedAXI4NodeOpt.isEmpty || p(FPGAManagedAXI4Key).nonEmpty, + "Selected StreamEngine uses the FPGA-managed AXI4 interface but it is not available on this platform." + ) + require(streamingEngine.cpuManagedAXI4NodeOpt.isEmpty || p(CPUManagedAXI4Key).nonEmpty, + "Selected StreamEngine uses the CPU-managed AXI4 interface, but it is not available on this platform." + ) + + val cpuManagedAXI4NodeTuple = p(CPUManagedAXI4Key).map { params => + val node = AXI4MasterNode(Seq(AXI4MasterPortParameters( + masters = Seq(AXI4MasterParameters( + name = "cpu-managed-axi4", + id = IdRange(0, 1 << params.idBits), + aligned = false, + maxFlight = params.maxFlight, // None = infinite, else is a per-ID cap + )) + ) + )) + streamingEngine.cpuManagedAXI4NodeOpt.foreach { + _ := AXI4Buffer() := node + } + (node, params) } - override def genHeader(sb: StringBuilder) { + val fpgaManagedAXI4NodeTuple = p(FPGAManagedAXI4Key).map { params => + val node = AXI4SlaveNode( + Seq(AXI4SlavePortParameters( + slaves = Seq(AXI4SlaveParameters( + address = Seq(AddressSet(0, params.size - 1)), + resources = (new MemoryDevice).reg, + regionType = RegionType.UNCACHED, // cacheable + executable = false, + supportsWrite = params.writeTransferSizes, + supportsRead = params.readTransferSizes, + interleavedId = params.interleavedId)), + beatBytes = params.dataBits / 8) + )) + + streamingEngine.fpgaManagedAXI4NodeOpt.foreach { + node := AXI4IdIndexer(params.idBits) := AXI4Buffer() := _ + } + (node, params) + } + + override def genHeader(sb: StringBuilder): Unit = { super.genHeader(sb) targetMemoryRegions.foreach(_.serializeToHeader(sb)) } @@ -267,11 +340,24 @@ class FPGATopImp(outer: FPGATop)(implicit p: Parameters) extends LazyModuleImp(o val ctrl = IO(Flipped(WidgetMMIO())) val mem = IO(Vec(p(HostMemNumChannels), AXI4Bundle(p(HostMemChannelKey).axi4BundleParams))) - val dma = IO(Flipped(AXI4Bundle(outer.pcisAXI4BundleParams))) + + val cpu_managed_axi4 = outer.cpuManagedAXI4NodeTuple.map { case (node, params) => + val port = IO(Flipped(AXI4Bundle(params.axi4BundleParams))) + node.out.head._1 <> port + port + } + + val fpga_managed_axi4 = outer.fpgaManagedAXI4NodeTuple.map { case (node, params) => + val port = IO(AXI4Bundle(params.axi4BundleParams)) + port <> node.in.head._1 + port + } // Hack: Don't touch the ports so that we can use FPGATop as top-level in ML simulation dontTouch(ctrl) dontTouch(mem) - dontTouch(dma) + cpu_managed_axi4.foreach(dontTouch(_)) + fpga_managed_axi4.foreach(dontTouch(_)) + (mem zip outer.memAXI4Nodes.map(_.in.head)).foreach { case (io, (bundle, _)) => require(bundle.params.idBits <= p(HostMemChannelKey).idBits, s"""| Required memory channel ID bits exceeds that present on host. @@ -280,8 +366,6 @@ class FPGATopImp(outer: FPGATop)(implicit p: Parameters) extends LazyModuleImp(o io <> bundle } - outer.pcisNode.out.head._1 <> dma - val sim = Module(new SimWrapper(p(SimWrapperKey))) val simIo = sim.channelPorts @@ -338,12 +422,18 @@ class FPGATopImp(outer: FPGATop)(implicit p: Parameters) extends LazyModuleImp(o "MEM_LEN_BITS" -> AXI4Parameters.lenBits, "MEM_RESP_BITS" -> AXI4Parameters.respBits, // Address width of the aggregated host-DRAM space - "DMA_ID_BITS" -> dma.params.idBits, - "DMA_ADDR_BITS" -> dma.params.addrBits, - "DMA_DATA_BITS" -> dma.params.dataBits, - "DMA_STRB_BITS" -> dma.params.dataBits / 8, - "DMA_BEAT_BYTES" -> p(DMANastiKey).dataBits / 8, - "DMA_SIZE" -> log2Ceil(p(DMANastiKey).dataBits / 8), - ) ++ Seq.tabulate[(String, Long)](p(HostMemNumChannels))(idx => s"MEM_HAS_CHANNEL${idx}" -> 1) + "CPU_MANAGED_AXI4_ID_BITS" -> cpu_managed_axi4.map(_.params.idBits) .getOrElse(0).toLong, + "CPU_MANAGED_AXI4_ADDR_BITS" -> cpu_managed_axi4.map(_.params.addrBits) .getOrElse(0).toLong, + "CPU_MANAGED_AXI4_DATA_BITS" -> cpu_managed_axi4.map(_.params.dataBits) .getOrElse(0).toLong, + "CPU_MANAGED_AXI4_STRB_BITS" -> cpu_managed_axi4.map(_.params.dataBits / 8).getOrElse(0).toLong, + "CPU_MANAGED_AXI4_BEAT_BYTES" -> cpu_managed_axi4.map(_.params.dataBits / 8).getOrElse(0).toLong, + // Widths of the AXI4 FPGA to CPU channel + "FPGA_MANAGED_AXI4_ID_BITS" -> fpga_managed_axi4.map(_.params.idBits) .getOrElse(0).toLong, + "FPGA_MANAGED_AXI4_ADDR_BITS" -> fpga_managed_axi4.map(_.params.addrBits).getOrElse(0).toLong, + "FPGA_MANAGED_AXI4_DATA_BITS" -> fpga_managed_axi4.map(_.params.dataBits).getOrElse(0).toLong, + ) ++: + cpu_managed_axi4.map { _ => "CPU_MANAGED_AXI4_PRESENT" -> 1.toLong } ++: + fpga_managed_axi4.map { _ => "FPGA_MANAGED_AXI4_PRESENT" -> 1.toLong } ++: + Seq.tabulate[(String, Long)](p(HostMemNumChannels))(idx => s"MEM_HAS_CHANNEL${idx}" -> 1) def genHeader(sb: StringBuilder)(implicit p: Parameters) = outer.genHeader(sb) } diff --git a/sim/midas/src/main/scala/midas/core/StreamEngine.scala b/sim/midas/src/main/scala/midas/core/StreamEngine.scala index 53de1b04..6ebd4be8 100644 --- a/sim/midas/src/main/scala/midas/core/StreamEngine.scala +++ b/sim/midas/src/main/scala/midas/core/StreamEngine.scala @@ -16,8 +16,7 @@ import midas.widgets._ * that host. e.g. F1 uses CPU-driven XDMA and so uses an engine that only * uses the AXI4M interface. */ -case object StreamEngineInstantiatorKey extends Field[(StreamEngineParameters, Parameters) => StreamEngine]( - (e: StreamEngineParameters, p: Parameters) => new CPUManagedStreamEngine(p, e)) +case object StreamEngineInstantiatorKey extends Field[(StreamEngineParameters, Parameters) => StreamEngine] /** @@ -52,18 +51,20 @@ case class StreamEngineParameters( * the transport using an AXI4 slave and / or AXI4 master port, which is * presented by the host platform. * - * Implementations that require an AXI4 slave set pcisNodeOpt = Some() - * Implementations that require an AXI4 master set pcimNodeOpt = Some() + * Implementations that require an AXI4 subordinate set cpuManagedAXI4NodeOpt = Some() + * Implementations that require an AXI4 manager set fpgaManagedAXI4NodeOpt = Some() * */ abstract class StreamEngine( p: Parameters, ) extends Widget()(p) { def params: StreamEngineParameters - def pcisNodeOpt: Option[AXI4InwardNode] - def pcimNodeOpt: Option[AXI4OutwardNode] + def cpuManagedAXI4NodeOpt: Option[AXI4InwardNode] + def fpgaManagedAXI4NodeOpt: Option[AXI4OutwardNode] + lazy val StreamEngineParameters(sourceParams, sinkParams) = params + def hasStreams: Boolean = sourceParams.nonEmpty || sinkParams.nonEmpty // Connections to bridges that drive streams val streamsToHostCPU = InModuleBody { diff --git a/sim/midas/src/main/scala/midas/platform/F1Shim.scala b/sim/midas/src/main/scala/midas/platform/F1Shim.scala index 35e9e822..e80c17b1 100644 --- a/sim/midas/src/main/scala/midas/platform/F1Shim.scala +++ b/sim/midas/src/main/scala/midas/platform/F1Shim.scala @@ -8,38 +8,35 @@ import freechips.rocketchip.config.{Parameters, Field} import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp} import freechips.rocketchip.util.HeterogeneousBag -import midas.core.{DMANastiKey} +import midas.core.{CPUManagedAXI4Key} import midas.widgets.{AXI4Printf, CtrlNastiKey} import midas.stage.GoldenGateOutputFileAnnotation import midas.targetutils.xdc._ case object AXIDebugPrint extends Field[Boolean] -class F1ShimIO(implicit val p: Parameters) extends Bundle { - val master = Flipped(new NastiIO()(p alterPartial ({ case NastiKey => p(CtrlNastiKey) }))) - val dma = Flipped(new NastiIO()(p alterPartial ({ case NastiKey => p(DMANastiKey) }))) -} - class F1Shim(implicit p: Parameters) extends PlatformShim { lazy val module = new LazyModuleImp(this) { - val io = IO(new F1ShimIO) + val io_master = IO(Flipped(new NastiIO()(p alterPartial { case NastiKey => p(CtrlNastiKey) }))) + val io_dma = IO(Flipped(new NastiIO()(p alterPartial { + case NastiKey => NastiParameters(p(CPUManagedAXI4Key).get.axi4BundleParams) }))) val io_slave = IO(HeterogeneousBag(top.module.mem.map(x => x.cloneType))) if (p(AXIDebugPrint)) { - AXI4Printf(io.master, "master") - AXI4Printf(io.dma, "dma") + AXI4Printf(io_master, "master") + AXI4Printf(io_dma, "dma") io_slave.zipWithIndex foreach { case (io, idx) => AXI4Printf(io, s"slave_${idx}") } } - top.module.ctrl <> io.master - AXI4NastiAssigner.toAXI4(top.module.dma, io.dma) + top.module.ctrl <> io_master + AXI4NastiAssigner.toAXI4(top.module.cpu_managed_axi4.get, io_dma) io_slave.zip(top.module.mem).foreach({ case (io, bundle) => io <> bundle }) // Biancolin: It would be good to put in writing why ID is being reassigned... - val (wCounterValue, wCounterWrap) = Counter(io.master.aw.fire, 1 << p(CtrlNastiKey).idBits) + val (wCounterValue, wCounterWrap) = Counter(io_master.aw.fire, 1 << p(CtrlNastiKey).idBits) top.module.ctrl.aw.bits.id := wCounterValue - val (rCounterValue, rCounterWrap) = Counter(io.master.ar.fire, 1 << p(CtrlNastiKey).idBits) + val (rCounterValue, rCounterWrap) = Counter(io_master.ar.fire, 1 << p(CtrlNastiKey).idBits) top.module.ctrl.ar.bits.id := rCounterValue // Capture FPGA-toolflow related verilog defines diff --git a/sim/midas/src/main/scala/midas/platform/VitisShim.scala b/sim/midas/src/main/scala/midas/platform/VitisShim.scala index db9241b8..53701d8f 100644 --- a/sim/midas/src/main/scala/midas/platform/VitisShim.scala +++ b/sim/midas/src/main/scala/midas/platform/VitisShim.scala @@ -9,7 +9,7 @@ import freechips.rocketchip.config.{Field, Parameters} import freechips.rocketchip.diplomacy.{LazyModule, LazyRawModuleImp} import freechips.rocketchip.util.HeterogeneousBag -import midas.core.{DMANastiKey, HostMemChannelKey} +import midas.core.HostMemChannelKey import midas.widgets.{AXI4Printf, CtrlNastiKey} import midas.stage.GoldenGateOutputFileAnnotation import midas.platform.xilinx._ @@ -48,13 +48,6 @@ class VitisShim(implicit p: Parameters) extends PlatformShim { top.module.reset := hostSyncReset top.module.clock := hostClock - // tie-off dma/io_slave interfaces - top.module.dma.ar.valid := false.B - top.module.dma.aw.valid := false.B - top.module.dma.w.valid := false.B - top.module.dma.r.ready := false.B - top.module.dma.b.ready := false.B - top.module.mem.foreach({ case bundle => bundle.ar.ready := false.B bundle.aw.ready := false.B @@ -92,6 +85,16 @@ class VitisShim(implicit p: Parameters) extends PlatformShim { host_mem_cdc.io.m_axi_aclk := ap_clk host_mem_cdc.io.m_axi_aresetn := ap_rst_n + top.module.fpga_managed_axi4.map { axi4 => + axi4.ar.ready := false.B + axi4.aw.ready := false.B + axi4.w.ready := false.B + axi4.r <> DontCare + axi4.b <> DontCare + axi4.r.valid := false.B + axi4.b.valid := false.B + } + GoldenGateOutputFileAnnotation.annotateFromChisel( s"// Vitis Shim requires no dynamically generated macros \n", fileSuffix = ".defines.vh", diff --git a/sim/midas/src/main/scala/midas/widgets/PrintBridge.scala b/sim/midas/src/main/scala/midas/widgets/PrintBridge.scala index 2df696ed..650b06cb 100644 --- a/sim/midas/src/main/scala/midas/widgets/PrintBridge.scala +++ b/sim/midas/src/main/scala/midas/widgets/PrintBridge.scala @@ -11,8 +11,6 @@ import chisel3.experimental.{DataMirror, Direction} import freechips.rocketchip.config.{Parameters} import freechips.rocketchip.util.{DecoupledHelper} -import midas.core.{DMANastiKey} - class PrintRecord(portType: firrtl.ir.BundleType, val formatString: String) extends Record { def regenLeafType(tpe: firrtl.ir.Type): Data = tpe match { case firrtl.ir.UIntType(width: firrtl.ir.IntWidth) => UInt(width.width.toInt.W) @@ -188,7 +186,7 @@ class PrintBridgeModule(key: PrintBridgeParameters)(implicit p: Parameters) val argumentOffsets = printPort.printRecords.map(_._2.argumentOffsets.map(UInt32(_))) val formatStrings = printPort.printRecords.map(_._2.formatString).map(CStrLit) - override def genHeader(base: BigInt, sb: StringBuilder) { + override def genHeader(base: BigInt, sb: StringBuilder): Unit = { import CppGenerationUtils._ val headerWidgetName = getWName.toUpperCase super.genHeader(base, sb) diff --git a/sim/midas/src/main/verilog/vcs_top.v b/sim/midas/src/main/verilog/vcs_top.v index 4300b066..dda8c766 100644 --- a/sim/midas/src/main/verilog/vcs_top.v +++ b/sim/midas/src/main/verilog/vcs_top.v @@ -35,37 +35,70 @@ extern "A" void tick input reg [1:0] ctrl_b_resp, input reg [`CTRL_ID_BITS-1:0] ctrl_b_id, - output reg dma_ar_valid, - input reg dma_ar_ready, - output reg [`DMA_ADDR_BITS-1:0] dma_ar_addr, - output reg [`DMA_ID_BITS-1:0] dma_ar_id, - output reg [2:0] dma_ar_size, - output reg [7:0] dma_ar_len, + output reg cpu_managed_axi4_ar_valid, + input reg cpu_managed_axi4_ar_ready, + output reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_ar_addr, + output reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_ar_id, + output reg [2:0] cpu_managed_axi4_ar_size, + output reg [7:0] cpu_managed_axi4_ar_len, - output reg dma_aw_valid, - input reg dma_aw_ready, - output reg [`DMA_ADDR_BITS-1:0] dma_aw_addr, - output reg [`DMA_ID_BITS-1:0] dma_aw_id, - output reg [2:0] dma_aw_size, - output reg [7:0] dma_aw_len, + output reg cpu_managed_axi4_aw_valid, + input reg cpu_managed_axi4_aw_ready, + output reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_aw_addr, + output reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_aw_id, + output reg [2:0] cpu_managed_axi4_aw_size, + output reg [7:0] cpu_managed_axi4_aw_len, - output reg dma_w_valid, - input reg dma_w_ready, - output reg [`DMA_STRB_BITS-1:0] dma_w_strb, - output reg [`DMA_DATA_BITS-1:0] dma_w_data, - output reg dma_w_last, + output reg cpu_managed_axi4_w_valid, + input reg cpu_managed_axi4_w_ready, + output reg [`CPU_MANAGED_AXI4_STRB_BITS-1:0] cpu_managed_axi4_w_strb, + output reg [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_w_data, + output reg cpu_managed_axi4_w_last, - input reg dma_r_valid, - output reg dma_r_ready, - input reg [1:0] dma_r_resp, - input reg [`DMA_ID_BITS-1:0] dma_r_id, - input reg [`DMA_DATA_BITS-1:0] dma_r_data, - input reg dma_r_last, + input reg cpu_managed_axi4_r_valid, + output reg cpu_managed_axi4_r_ready, + input reg [1:0] cpu_managed_axi4_r_resp, + input reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_r_id, + input reg [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_r_data, + input reg cpu_managed_axi4_r_last, + + input reg cpu_managed_axi4_b_valid, + output reg cpu_managed_axi4_b_ready, + input reg [1:0] cpu_managed_axi4_b_resp, + input reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_b_id, + + input reg fpga_managed_axi4_ar_valid, + output reg fpga_managed_axi4_ar_ready, + input reg [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_ar_addr, + input reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_ar_id, + input reg [2:0] fpga_managed_axi4_ar_size, + input reg [7:0] fpga_managed_axi4_ar_len, + + input reg fpga_managed_axi4_aw_valid, + output reg fpga_managed_axi4_aw_ready, + input reg [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_aw_addr, + input reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_aw_id, + input reg [2:0] fpga_managed_axi4_aw_size, + input reg [7:0] fpga_managed_axi4_aw_len, + + input reg fpga_managed_axi4_w_valid, + output reg fpga_managed_axi4_w_ready, + input reg [(`FPGA_MANAGED_AXI4_DATA_BITS/8)-1:0] fpga_managed_axi4_w_strb, + input reg [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_w_data, + input reg fpga_managed_axi4_w_last, + + output reg fpga_managed_axi4_r_valid, + input reg fpga_managed_axi4_r_ready, + output reg [1:0] fpga_managed_axi4_r_resp, + output reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_r_id, + output reg [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_r_data, + output reg fpga_managed_axi4_r_last, + + output reg fpga_managed_axi4_b_valid, + input reg fpga_managed_axi4_b_ready, + output reg [1:0] fpga_managed_axi4_b_resp, + output reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_b_id, - input reg dma_b_valid, - output reg dma_b_ready, - input reg [1:0] dma_b_resp, - input reg [`DMA_ID_BITS-1:0] dma_b_id, input reg mem_0_ar_valid, output reg mem_0_ar_ready, @@ -262,37 +295,69 @@ module emul; wire [1:0] ctrl_b_resp; wire [`CTRL_ID_BITS-1:0] ctrl_b_id; - reg dma_ar_valid; - wire dma_ar_ready; - reg [`DMA_ADDR_BITS-1:0] dma_ar_addr; - reg [`DMA_ID_BITS-1:0] dma_ar_id; - reg [2:0] dma_ar_size; - reg [7:0] dma_ar_len; + reg cpu_managed_axi4_ar_valid; + wire cpu_managed_axi4_ar_ready; + reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_ar_addr; + reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_ar_id; + reg [2:0] cpu_managed_axi4_ar_size; + reg [7:0] cpu_managed_axi4_ar_len; - reg dma_aw_valid; - wire dma_aw_ready; - reg [`DMA_ADDR_BITS-1:0] dma_aw_addr; - reg [`DMA_ID_BITS-1:0] dma_aw_id; - reg [2:0] dma_aw_size; - reg [7:0] dma_aw_len; + reg cpu_managed_axi4_aw_valid; + wire cpu_managed_axi4_aw_ready; + reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_aw_addr; + reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_aw_id; + reg [2:0] cpu_managed_axi4_aw_size; + reg [7:0] cpu_managed_axi4_aw_len; - reg dma_w_valid; - wire dma_w_ready; - reg [`DMA_STRB_BITS-1:0] dma_w_strb; - reg [`DMA_DATA_BITS-1:0] dma_w_data; - reg dma_w_last; + reg cpu_managed_axi4_w_valid; + wire cpu_managed_axi4_w_ready; + reg [`CPU_MANAGED_AXI4_STRB_BITS-1:0] cpu_managed_axi4_w_strb; + reg [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_w_data; + reg cpu_managed_axi4_w_last; - wire dma_r_valid; - reg dma_r_ready; - wire [1:0] dma_r_resp; - wire [`DMA_ID_BITS-1:0] dma_r_id; - wire [`DMA_DATA_BITS-1:0] dma_r_data; - wire dma_r_last; + wire cpu_managed_axi4_r_valid; + reg cpu_managed_axi4_r_ready; + wire [1:0] cpu_managed_axi4_r_resp; + wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_r_id; + wire [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_r_data; + wire cpu_managed_axi4_r_last; - wire dma_b_valid; - reg dma_b_ready; - wire [1:0] dma_b_resp; - wire [`DMA_ID_BITS-1:0] dma_b_id; + wire cpu_managed_axi4_b_valid; + reg cpu_managed_axi4_b_ready; + wire [1:0] cpu_managed_axi4_b_resp; + wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_b_id; + + wire fpga_managed_axi4_ar_valid; + reg fpga_managed_axi4_ar_ready; + wire [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_ar_addr; + wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_ar_id; + wire [2:0] fpga_managed_axi4_ar_size; + wire [7:0] fpga_managed_axi4_ar_len; + + wire fpga_managed_axi4_aw_valid; + reg fpga_managed_axi4_aw_ready; + wire [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_aw_addr; + wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_aw_id; + wire [2:0] fpga_managed_axi4_aw_size; + wire [7:0] fpga_managed_axi4_aw_len; + + wire fpga_managed_axi4_w_valid; + reg fpga_managed_axi4_w_ready; + wire [(`FPGA_MANAGED_AXI4_DATA_BITS/8)-1:0] fpga_managed_axi4_w_strb; + wire [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_w_data; + wire fpga_managed_axi4_w_last; + + reg fpga_managed_axi4_r_valid; + wire fpga_managed_axi4_r_ready; + reg [1:0] fpga_managed_axi4_r_resp; + reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_r_id; + reg [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_r_data; + reg fpga_managed_axi4_r_last; + + reg fpga_managed_axi4_b_valid; + wire fpga_managed_axi4_b_ready; + reg [1:0] fpga_managed_axi4_b_resp; + reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_b_id; wire mem_0_ar_valid; reg mem_0_ar_ready; @@ -456,37 +521,69 @@ module emul; wire [1:0] ctrl_b_resp_delay; wire [`CTRL_ID_BITS-1:0] ctrl_b_id_delay; - wire dma_ar_valid_delay; - wire dma_ar_ready_delay; - wire [`DMA_ADDR_BITS-1:0] dma_ar_addr_delay; - wire [`DMA_ID_BITS-1:0] dma_ar_id_delay; - wire [2:0] dma_ar_size_delay; - wire [7:0] dma_ar_len_delay; + wire cpu_managed_axi4_ar_valid_delay; + wire cpu_managed_axi4_ar_ready_delay; + wire [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_ar_addr_delay; + wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_ar_id_delay; + wire [2:0] cpu_managed_axi4_ar_size_delay; + wire [7:0] cpu_managed_axi4_ar_len_delay; - wire dma_aw_valid_delay; - wire dma_aw_ready_delay; - wire [`DMA_ADDR_BITS-1:0] dma_aw_addr_delay; - wire [`DMA_ID_BITS-1:0] dma_aw_id_delay; - wire [2:0] dma_aw_size_delay; - wire [7:0] dma_aw_len_delay; + wire cpu_managed_axi4_aw_valid_delay; + wire cpu_managed_axi4_aw_ready_delay; + wire [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_aw_addr_delay; + wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_aw_id_delay; + wire [2:0] cpu_managed_axi4_aw_size_delay; + wire [7:0] cpu_managed_axi4_aw_len_delay; - wire dma_w_valid_delay; - wire dma_w_ready_delay; - wire [`DMA_STRB_BITS-1:0] dma_w_strb_delay; - wire [`DMA_DATA_BITS-1:0] dma_w_data_delay; - wire dma_w_last_delay; + wire cpu_managed_axi4_w_valid_delay; + wire cpu_managed_axi4_w_ready_delay; + wire [`CPU_MANAGED_AXI4_STRB_BITS-1:0] cpu_managed_axi4_w_strb_delay; + wire [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_w_data_delay; + wire cpu_managed_axi4_w_last_delay; - wire dma_r_valid_delay; - wire dma_r_ready_delay; - wire [1:0] dma_r_resp_delay; - wire [`DMA_ID_BITS-1:0] dma_r_id_delay; - wire [`DMA_DATA_BITS-1:0] dma_r_data_delay; - wire dma_r_last_delay; + wire cpu_managed_axi4_r_valid_delay; + wire cpu_managed_axi4_r_ready_delay; + wire [1:0] cpu_managed_axi4_r_resp_delay; + wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_r_id_delay; + wire [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_r_data_delay; + wire cpu_managed_axi4_r_last_delay; - wire dma_b_valid_delay; - wire dma_b_ready_delay; - wire [1:0] dma_b_resp_delay; - wire [`DMA_ID_BITS-1:0] dma_b_id_delay; + wire cpu_managed_axi4_b_valid_delay; + wire cpu_managed_axi4_b_ready_delay; + wire [1:0] cpu_managed_axi4_b_resp_delay; + wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_b_id_delay; + + wire fpga_managed_axi4_ar_valid_delay; + wire fpga_managed_axi4_ar_ready_delay; + wire [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_ar_addr_delay; + wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_ar_id_delay; + wire [2:0] fpga_managed_axi4_ar_size_delay; + wire [7:0] fpga_managed_axi4_ar_len_delay; + + wire fpga_managed_axi4_aw_valid_delay; + wire fpga_managed_axi4_aw_ready_delay; + wire [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_aw_addr_delay; + wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_aw_id_delay; + wire [2:0] fpga_managed_axi4_aw_size_delay; + wire [7:0] fpga_managed_axi4_aw_len_delay; + + wire fpga_managed_axi4_w_valid_delay; + wire fpga_managed_axi4_w_ready_delay; + wire [(`FPGA_MANAGED_AXI4_DATA_BITS/8)-1:0] fpga_managed_axi4_w_strb_delay; + wire [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_w_data_delay; + wire fpga_managed_axi4_w_last_delay; + + wire fpga_managed_axi4_r_valid_delay; + wire fpga_managed_axi4_r_ready_delay; + wire [1:0] fpga_managed_axi4_r_resp_delay; + wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_r_id_delay; + wire [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_r_data_delay; + wire fpga_managed_axi4_r_last_delay; + + wire fpga_managed_axi4_b_valid_delay; + wire fpga_managed_axi4_b_ready_delay; + wire [1:0] fpga_managed_axi4_b_resp_delay; + wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_b_id_delay; wire mem_0_ar_valid_delay; wire mem_0_ar_ready_delay; @@ -648,37 +745,69 @@ module emul; assign #0.1 ctrl_b_resp = ctrl_b_resp_delay; assign #0.1 ctrl_b_id = ctrl_b_id_delay; - assign #0.1 dma_ar_valid_delay = dma_ar_valid; - assign #0.1 dma_ar_ready = dma_ar_ready_delay; - assign #0.1 dma_ar_addr_delay = dma_ar_addr; - assign #0.1 dma_ar_id_delay = dma_ar_id; - assign #0.1 dma_ar_size_delay = dma_ar_size; - assign #0.1 dma_ar_len_delay = dma_ar_len; + assign #0.1 cpu_managed_axi4_ar_valid_delay = cpu_managed_axi4_ar_valid; + assign #0.1 cpu_managed_axi4_ar_ready = cpu_managed_axi4_ar_ready_delay; + assign #0.1 cpu_managed_axi4_ar_addr_delay = cpu_managed_axi4_ar_addr; + assign #0.1 cpu_managed_axi4_ar_id_delay = cpu_managed_axi4_ar_id; + assign #0.1 cpu_managed_axi4_ar_size_delay = cpu_managed_axi4_ar_size; + assign #0.1 cpu_managed_axi4_ar_len_delay = cpu_managed_axi4_ar_len; - assign #0.1 dma_aw_valid_delay = dma_aw_valid; - assign #0.1 dma_aw_ready = dma_aw_ready_delay; - assign #0.1 dma_aw_addr_delay = dma_aw_addr; - assign #0.1 dma_aw_id_delay = dma_aw_id; - assign #0.1 dma_aw_size_delay = dma_aw_size; - assign #0.1 dma_aw_len_delay = dma_aw_len; + assign #0.1 cpu_managed_axi4_aw_valid_delay = cpu_managed_axi4_aw_valid; + assign #0.1 cpu_managed_axi4_aw_ready = cpu_managed_axi4_aw_ready_delay; + assign #0.1 cpu_managed_axi4_aw_addr_delay = cpu_managed_axi4_aw_addr; + assign #0.1 cpu_managed_axi4_aw_id_delay = cpu_managed_axi4_aw_id; + assign #0.1 cpu_managed_axi4_aw_size_delay = cpu_managed_axi4_aw_size; + assign #0.1 cpu_managed_axi4_aw_len_delay = cpu_managed_axi4_aw_len; - assign #0.1 dma_w_valid_delay = dma_w_valid; - assign #0.1 dma_w_ready = dma_w_ready_delay; - assign #0.1 dma_w_strb_delay = dma_w_strb; - assign #0.1 dma_w_data_delay = dma_w_data; - assign #0.1 dma_w_last_delay = dma_w_last; + assign #0.1 cpu_managed_axi4_w_valid_delay = cpu_managed_axi4_w_valid; + assign #0.1 cpu_managed_axi4_w_ready = cpu_managed_axi4_w_ready_delay; + assign #0.1 cpu_managed_axi4_w_strb_delay = cpu_managed_axi4_w_strb; + assign #0.1 cpu_managed_axi4_w_data_delay = cpu_managed_axi4_w_data; + assign #0.1 cpu_managed_axi4_w_last_delay = cpu_managed_axi4_w_last; - assign #0.1 dma_r_valid = dma_r_valid_delay; - assign #0.1 dma_r_ready_delay = dma_r_ready; - assign #0.1 dma_r_resp = dma_r_resp_delay; - assign #0.1 dma_r_id = dma_r_id_delay; - assign #0.1 dma_r_data = dma_r_data_delay; - assign #0.1 dma_r_last = dma_r_last_delay; + assign #0.1 cpu_managed_axi4_r_valid = cpu_managed_axi4_r_valid_delay; + assign #0.1 cpu_managed_axi4_r_ready_delay = cpu_managed_axi4_r_ready; + assign #0.1 cpu_managed_axi4_r_resp = cpu_managed_axi4_r_resp_delay; + assign #0.1 cpu_managed_axi4_r_id = cpu_managed_axi4_r_id_delay; + assign #0.1 cpu_managed_axi4_r_data = cpu_managed_axi4_r_data_delay; + assign #0.1 cpu_managed_axi4_r_last = cpu_managed_axi4_r_last_delay; - assign #0.1 dma_b_valid = dma_b_valid_delay; - assign #0.1 dma_b_ready_delay = dma_b_ready; - assign #0.1 dma_b_resp = dma_b_resp_delay; - assign #0.1 dma_b_id = dma_b_id_delay; + assign #0.1 cpu_managed_axi4_b_valid = cpu_managed_axi4_b_valid_delay; + assign #0.1 cpu_managed_axi4_b_ready_delay = cpu_managed_axi4_b_ready; + assign #0.1 cpu_managed_axi4_b_resp = cpu_managed_axi4_b_resp_delay; + assign #0.1 cpu_managed_axi4_b_id = cpu_managed_axi4_b_id_delay; + + assign #0.1 fpga_managed_axi4_ar_valid = fpga_managed_axi4_ar_valid_delay; + assign #0.1 fpga_managed_axi4_ar_ready_delay = fpga_managed_axi4_ar_ready; + assign #0.1 fpga_managed_axi4_ar_addr = fpga_managed_axi4_ar_addr_delay; + assign #0.1 fpga_managed_axi4_ar_id = fpga_managed_axi4_ar_id_delay; + assign #0.1 fpga_managed_axi4_ar_size = fpga_managed_axi4_ar_size_delay; + assign #0.1 fpga_managed_axi4_ar_len = fpga_managed_axi4_ar_len_delay; + + assign #0.1 fpga_managed_axi4_aw_valid = fpga_managed_axi4_aw_valid_delay; + assign #0.1 fpga_managed_axi4_aw_ready_delay = fpga_managed_axi4_aw_ready; + assign #0.1 fpga_managed_axi4_aw_addr = fpga_managed_axi4_aw_addr_delay; + assign #0.1 fpga_managed_axi4_aw_id = fpga_managed_axi4_aw_id_delay; + assign #0.1 fpga_managed_axi4_aw_size = fpga_managed_axi4_aw_size_delay; + assign #0.1 fpga_managed_axi4_aw_len = fpga_managed_axi4_aw_len_delay; + + assign #0.1 fpga_managed_axi4_w_valid = fpga_managed_axi4_w_valid_delay; + assign #0.1 fpga_managed_axi4_w_ready_delay = fpga_managed_axi4_w_ready; + assign #0.1 fpga_managed_axi4_w_strb = fpga_managed_axi4_w_strb_delay; + assign #0.1 fpga_managed_axi4_w_data = fpga_managed_axi4_w_data_delay; + assign #0.1 fpga_managed_axi4_w_last = fpga_managed_axi4_w_last_delay; + + assign #0.1 fpga_managed_axi4_r_valid_delay = fpga_managed_axi4_r_valid; + assign #0.1 fpga_managed_axi4_r_ready = fpga_managed_axi4_r_ready_delay; + assign #0.1 fpga_managed_axi4_r_resp_delay = fpga_managed_axi4_r_resp; + assign #0.1 fpga_managed_axi4_r_id_delay = fpga_managed_axi4_r_id; + assign #0.1 fpga_managed_axi4_r_data_delay = fpga_managed_axi4_r_data; + assign #0.1 fpga_managed_axi4_r_last_delay = fpga_managed_axi4_r_last; + + assign #0.1 fpga_managed_axi4_b_valid_delay = fpga_managed_axi4_b_valid; + assign #0.1 fpga_managed_axi4_b_ready = fpga_managed_axi4_b_ready_delay; + assign #0.1 fpga_managed_axi4_b_resp_delay = fpga_managed_axi4_b_resp; + assign #0.1 fpga_managed_axi4_b_id_delay = fpga_managed_axi4_b_id; assign #0.1 mem_0_ar_valid = mem_0_ar_valid_delay; assign #0.1 mem_0_ar_ready_delay = mem_0_ar_ready; @@ -843,38 +972,72 @@ module emul; .ctrl_b_ready(ctrl_b_ready_delay), .ctrl_b_bits_resp(ctrl_b_resp_delay), .ctrl_b_bits_id(ctrl_b_id_delay), +`ifdef CPU_MANAGED_AXI4_PRESENT + .cpu_managed_axi4_ar_valid(cpu_managed_axi4_ar_valid_delay), + .cpu_managed_axi4_ar_ready(cpu_managed_axi4_ar_ready_delay), + .cpu_managed_axi4_ar_bits_addr(cpu_managed_axi4_ar_addr_delay), + .cpu_managed_axi4_ar_bits_id(cpu_managed_axi4_ar_id_delay), + .cpu_managed_axi4_ar_bits_size(cpu_managed_axi4_ar_size_delay), + .cpu_managed_axi4_ar_bits_len(cpu_managed_axi4_ar_len_delay), - .dma_ar_valid(dma_ar_valid_delay), - .dma_ar_ready(dma_ar_ready_delay), - .dma_ar_bits_addr(dma_ar_addr_delay), - .dma_ar_bits_id(dma_ar_id_delay), - .dma_ar_bits_size(dma_ar_size_delay), - .dma_ar_bits_len(dma_ar_len_delay), + .cpu_managed_axi4_aw_valid(cpu_managed_axi4_aw_valid_delay), + .cpu_managed_axi4_aw_ready(cpu_managed_axi4_aw_ready_delay), + .cpu_managed_axi4_aw_bits_addr(cpu_managed_axi4_aw_addr_delay), + .cpu_managed_axi4_aw_bits_id(cpu_managed_axi4_aw_id_delay), + .cpu_managed_axi4_aw_bits_size(cpu_managed_axi4_aw_size_delay), + .cpu_managed_axi4_aw_bits_len(cpu_managed_axi4_aw_len_delay), - .dma_aw_valid(dma_aw_valid_delay), - .dma_aw_ready(dma_aw_ready_delay), - .dma_aw_bits_addr(dma_aw_addr_delay), - .dma_aw_bits_id(dma_aw_id_delay), - .dma_aw_bits_size(dma_aw_size_delay), - .dma_aw_bits_len(dma_aw_len_delay), + .cpu_managed_axi4_w_valid(cpu_managed_axi4_w_valid_delay), + .cpu_managed_axi4_w_ready(cpu_managed_axi4_w_ready_delay), + .cpu_managed_axi4_w_bits_strb(cpu_managed_axi4_w_strb_delay), + .cpu_managed_axi4_w_bits_data(cpu_managed_axi4_w_data_delay), + .cpu_managed_axi4_w_bits_last(cpu_managed_axi4_w_last_delay), - .dma_w_valid(dma_w_valid_delay), - .dma_w_ready(dma_w_ready_delay), - .dma_w_bits_strb(dma_w_strb_delay), - .dma_w_bits_data(dma_w_data_delay), - .dma_w_bits_last(dma_w_last_delay), + .cpu_managed_axi4_r_valid(cpu_managed_axi4_r_valid_delay), + .cpu_managed_axi4_r_ready(cpu_managed_axi4_r_ready_delay), + .cpu_managed_axi4_r_bits_resp(cpu_managed_axi4_r_resp_delay), + .cpu_managed_axi4_r_bits_id(cpu_managed_axi4_r_id_delay), + .cpu_managed_axi4_r_bits_data(cpu_managed_axi4_r_data_delay), + .cpu_managed_axi4_r_bits_last(cpu_managed_axi4_r_last_delay), - .dma_r_valid(dma_r_valid_delay), - .dma_r_ready(dma_r_ready_delay), - .dma_r_bits_resp(dma_r_resp_delay), - .dma_r_bits_id(dma_r_id_delay), - .dma_r_bits_data(dma_r_data_delay), - .dma_r_bits_last(dma_r_last_delay), + .cpu_managed_axi4_b_valid(cpu_managed_axi4_b_valid_delay), + .cpu_managed_axi4_b_ready(cpu_managed_axi4_b_ready_delay), + .cpu_managed_axi4_b_bits_resp(cpu_managed_axi4_b_resp_delay), + .cpu_managed_axi4_b_bits_id(cpu_managed_axi4_b_id_delay), +`endif +`ifdef FPGA_MANAGED_AXI4_PRESENT + .fpga_managed_axi4_ar_valid(fpga_managed_axi4_ar_valid_delay), + .fpga_managed_axi4_ar_ready(fpga_managed_axi4_ar_ready_delay), + .fpga_managed_axi4_ar_bits_addr(fpga_managed_axi4_ar_addr_delay), + .fpga_managed_axi4_ar_bits_id(fpga_managed_axi4_ar_id_delay), + .fpga_managed_axi4_ar_bits_size(fpga_managed_axi4_ar_size_delay), + .fpga_managed_axi4_ar_bits_len(fpga_managed_axi4_ar_len_delay), - .dma_b_valid(dma_b_valid_delay), - .dma_b_ready(dma_b_ready_delay), - .dma_b_bits_resp(dma_b_resp_delay), - .dma_b_bits_id(dma_b_id_delay), + .fpga_managed_axi4_aw_valid(fpga_managed_axi4_aw_valid_delay), + .fpga_managed_axi4_aw_ready(fpga_managed_axi4_aw_ready_delay), + .fpga_managed_axi4_aw_bits_addr(fpga_managed_axi4_aw_addr_delay), + .fpga_managed_axi4_aw_bits_id(fpga_managed_axi4_aw_id_delay), + .fpga_managed_axi4_aw_bits_size(fpga_managed_axi4_aw_size_delay), + .fpga_managed_axi4_aw_bits_len(fpga_managed_axi4_aw_len_delay), + + .fpga_managed_axi4_w_valid(fpga_managed_axi4_w_valid_delay), + .fpga_managed_axi4_w_ready(fpga_managed_axi4_w_ready_delay), + .fpga_managed_axi4_w_bits_strb(fpga_managed_axi4_w_strb_delay), + .fpga_managed_axi4_w_bits_data(fpga_managed_axi4_w_data_delay), + .fpga_managed_axi4_w_bits_last(fpga_managed_axi4_w_last_delay), + + .fpga_managed_axi4_r_valid(fpga_managed_axi4_r_valid_delay), + .fpga_managed_axi4_r_ready(fpga_managed_axi4_r_ready_delay), + .fpga_managed_axi4_r_bits_resp(fpga_managed_axi4_r_resp_delay), + .fpga_managed_axi4_r_bits_id(fpga_managed_axi4_r_id_delay), + .fpga_managed_axi4_r_bits_data(fpga_managed_axi4_r_data_delay), + .fpga_managed_axi4_r_bits_last(fpga_managed_axi4_r_last_delay), + + .fpga_managed_axi4_b_valid(fpga_managed_axi4_b_valid_delay), + .fpga_managed_axi4_b_ready(fpga_managed_axi4_b_ready_delay), + .fpga_managed_axi4_b_bits_resp(fpga_managed_axi4_b_resp_delay), + .fpga_managed_axi4_b_bits_id(fpga_managed_axi4_b_id_delay), +`endif .mem_0_ar_valid(mem_0_ar_valid_delay), .mem_0_ar_ready(mem_0_ar_ready_delay), @@ -907,7 +1070,6 @@ module emul; .mem_0_b_ready(mem_0_b_ready_delay), .mem_0_b_bits_resp(mem_0_b_resp_delay), .mem_0_b_bits_id(mem_0_b_id_delay), - `ifdef MEM_HAS_CHANNEL1 .mem_1_ar_valid(mem_1_ar_valid_delay), .mem_1_ar_ready(mem_1_ar_ready_delay), @@ -1049,37 +1211,69 @@ module emul; ctrl_b_resp, ctrl_b_id, - dma_ar_valid, - dma_ar_ready, - dma_ar_addr, - dma_ar_id, - dma_ar_size, - dma_ar_len, + cpu_managed_axi4_ar_valid, + cpu_managed_axi4_ar_ready, + cpu_managed_axi4_ar_addr, + cpu_managed_axi4_ar_id, + cpu_managed_axi4_ar_size, + cpu_managed_axi4_ar_len, - dma_aw_valid, - dma_aw_ready, - dma_aw_addr, - dma_aw_id, - dma_aw_size, - dma_aw_len, + cpu_managed_axi4_aw_valid, + cpu_managed_axi4_aw_ready, + cpu_managed_axi4_aw_addr, + cpu_managed_axi4_aw_id, + cpu_managed_axi4_aw_size, + cpu_managed_axi4_aw_len, - dma_w_valid, - dma_w_ready, - dma_w_strb, - dma_w_data, - dma_w_last, + cpu_managed_axi4_w_valid, + cpu_managed_axi4_w_ready, + cpu_managed_axi4_w_strb, + cpu_managed_axi4_w_data, + cpu_managed_axi4_w_last, - dma_r_valid, - dma_r_ready, - dma_r_resp, - dma_r_id, - dma_r_data, - dma_r_last, + cpu_managed_axi4_r_valid, + cpu_managed_axi4_r_ready, + cpu_managed_axi4_r_resp, + cpu_managed_axi4_r_id, + cpu_managed_axi4_r_data, + cpu_managed_axi4_r_last, - dma_b_valid, - dma_b_ready, - dma_b_resp, - dma_b_id, + cpu_managed_axi4_b_valid, + cpu_managed_axi4_b_ready, + cpu_managed_axi4_b_resp, + cpu_managed_axi4_b_id, + + fpga_managed_axi4_ar_valid, + fpga_managed_axi4_ar_ready, + fpga_managed_axi4_ar_addr, + fpga_managed_axi4_ar_id, + fpga_managed_axi4_ar_size, + fpga_managed_axi4_ar_len, + + fpga_managed_axi4_aw_valid, + fpga_managed_axi4_aw_ready, + fpga_managed_axi4_aw_addr, + fpga_managed_axi4_aw_id, + fpga_managed_axi4_aw_size, + fpga_managed_axi4_aw_len, + + fpga_managed_axi4_w_valid, + fpga_managed_axi4_w_ready, + fpga_managed_axi4_w_strb, + fpga_managed_axi4_w_data, + fpga_managed_axi4_w_last, + + fpga_managed_axi4_r_valid, + fpga_managed_axi4_r_ready, + fpga_managed_axi4_r_resp, + fpga_managed_axi4_r_id, + fpga_managed_axi4_r_data, + fpga_managed_axi4_r_last, + + fpga_managed_axi4_b_valid, + fpga_managed_axi4_b_ready, + fpga_managed_axi4_b_resp, + fpga_managed_axi4_b_id, mem_0_ar_valid, mem_0_ar_ready, diff --git a/sim/midas/src/main/verilog/verilator_top.sv b/sim/midas/src/main/verilog/verilator_top.sv index 614e376d..d79ccfca 100644 --- a/sim/midas/src/main/verilog/verilator_top.sv +++ b/sim/midas/src/main/verilog/verilator_top.sv @@ -35,37 +35,73 @@ module verilator_top ( output reg [1:0] ctrl_b_bits_resp, output reg [`CTRL_ID_BITS-1:0] ctrl_b_bits_id, - input reg dma_ar_valid, - output reg dma_ar_ready, - input reg [`DMA_ADDR_BITS-1:0] dma_ar_bits_addr, - input reg [`DMA_ID_BITS-1:0] dma_ar_bits_id, - input reg [2:0] dma_ar_bits_size, - input reg [7:0] dma_ar_bits_len, +`ifdef CPU_MANAGED_AXI4_PRESENT + input reg cpu_managed_axi4_ar_valid, + output reg cpu_managed_axi4_ar_ready, + input reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_ar_bits_addr, + input reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_ar_bits_id, + input reg [2:0] cpu_managed_axi4_ar_bits_size, + input reg [7:0] cpu_managed_axi4_ar_bits_len, - input reg dma_aw_valid, - output reg dma_aw_ready, - input reg [`DMA_ADDR_BITS-1:0] dma_aw_bits_addr, - input reg [`DMA_ID_BITS-1:0] dma_aw_bits_id, - input reg [2:0] dma_aw_bits_size, - input reg [7:0] dma_aw_bits_len, + input reg cpu_managed_axi4_aw_valid, + output reg cpu_managed_axi4_aw_ready, + input reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_aw_bits_addr, + input reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_aw_bits_id, + input reg [2:0] cpu_managed_axi4_aw_bits_size, + input reg [7:0] cpu_managed_axi4_aw_bits_len, - input reg dma_w_valid, - output reg dma_w_ready, - input reg [`DMA_STRB_BITS-1:0] dma_w_bits_strb, - input reg [`DMA_DATA_BITS-1:0] dma_w_bits_data, - input reg dma_w_bits_last, + input reg cpu_managed_axi4_w_valid, + output reg cpu_managed_axi4_w_ready, + input reg [`CPU_MANAGED_AXI4_STRB_BITS-1:0] cpu_managed_axi4_w_bits_strb, + input reg [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_w_bits_data, + input reg cpu_managed_axi4_w_bits_last, - output reg dma_r_valid, - input reg dma_r_ready, - output reg [1:0] dma_r_bits_resp, - output reg [`DMA_ID_BITS-1:0] dma_r_bits_id, - output reg [`DMA_DATA_BITS-1:0] dma_r_bits_data, - output reg dma_r_bits_last, + output reg cpu_managed_axi4_r_valid, + input reg cpu_managed_axi4_r_ready, + output reg [1:0] cpu_managed_axi4_r_bits_resp, + output reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_r_bits_id, + output reg [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_r_bits_data, + output reg cpu_managed_axi4_r_bits_last, - output reg dma_b_valid, - input reg dma_b_ready, - output reg [1:0] dma_b_bits_resp, - output reg [`DMA_ID_BITS-1:0] dma_b_bits_id, + output reg cpu_managed_axi4_b_valid, + input reg cpu_managed_axi4_b_ready, + output reg [1:0] cpu_managed_axi4_b_bits_resp, + output reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_b_bits_id, +`endif // CPU_MANAGED_AXI4_PRESENT + +`ifdef FPGA_MANAGED_AXI4_PRESENT + output reg fpga_managed_axi4_ar_valid, + input reg fpga_managed_axi4_ar_ready, + output reg [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_ar_bits_addr, + output reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_ar_bits_id, + output reg [2:0] fpga_managed_axi4_ar_bits_size, + output reg [7:0] fpga_managed_axi4_ar_bits_len, + + output reg fpga_managed_axi4_aw_valid, + input reg fpga_managed_axi4_aw_ready, + output reg [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_aw_bits_addr, + output reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_aw_bits_id, + output reg [2:0] fpga_managed_axi4_aw_bits_size, + output reg [7:0] fpga_managed_axi4_aw_bits_len, + + output reg fpga_managed_axi4_w_valid, + input reg fpga_managed_axi4_w_ready, + output reg [(`FPGA_MANAGED_AXI4_DATA_BITS/8)-1:0] fpga_managed_axi4_w_bits_strb, + output reg [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_w_bits_data, + output reg fpga_managed_axi4_w_bits_last, + + input reg fpga_managed_axi4_r_valid, + output reg fpga_managed_axi4_r_ready, + input reg [1:0] fpga_managed_axi4_r_bits_resp, + input reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_r_bits_id, + input reg [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_r_bits_data, + input reg fpga_managed_axi4_r_bits_last, + + input reg fpga_managed_axi4_b_valid, + output reg fpga_managed_axi4_b_ready, + input reg [1:0] fpga_managed_axi4_b_bits_resp, + input reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_b_bits_id, +`endif output reg mem_0_ar_valid, input reg mem_0_ar_ready, @@ -237,37 +273,73 @@ module verilator_top ( .ctrl_b_bits_resp(ctrl_b_bits_resp), .ctrl_b_bits_id(ctrl_b_bits_id), - .dma_ar_valid(dma_ar_valid), - .dma_ar_ready(dma_ar_ready), - .dma_ar_bits_addr(dma_ar_bits_addr), - .dma_ar_bits_id(dma_ar_bits_id), - .dma_ar_bits_size(dma_ar_bits_size), - .dma_ar_bits_len(dma_ar_bits_len), +`ifdef CPU_MANAGED_AXI4_PRESENT + .cpu_managed_axi4_ar_valid(cpu_managed_axi4_ar_valid), + .cpu_managed_axi4_ar_ready(cpu_managed_axi4_ar_ready), + .cpu_managed_axi4_ar_bits_addr(cpu_managed_axi4_ar_bits_addr), + .cpu_managed_axi4_ar_bits_id(cpu_managed_axi4_ar_bits_id), + .cpu_managed_axi4_ar_bits_size(cpu_managed_axi4_ar_bits_size), + .cpu_managed_axi4_ar_bits_len(cpu_managed_axi4_ar_bits_len), - .dma_aw_valid(dma_aw_valid), - .dma_aw_ready(dma_aw_ready), - .dma_aw_bits_addr(dma_aw_bits_addr), - .dma_aw_bits_id(dma_aw_bits_id), - .dma_aw_bits_size(dma_aw_bits_size), - .dma_aw_bits_len(dma_aw_bits_len), + .cpu_managed_axi4_aw_valid(cpu_managed_axi4_aw_valid), + .cpu_managed_axi4_aw_ready(cpu_managed_axi4_aw_ready), + .cpu_managed_axi4_aw_bits_addr(cpu_managed_axi4_aw_bits_addr), + .cpu_managed_axi4_aw_bits_id(cpu_managed_axi4_aw_bits_id), + .cpu_managed_axi4_aw_bits_size(cpu_managed_axi4_aw_bits_size), + .cpu_managed_axi4_aw_bits_len(cpu_managed_axi4_aw_bits_len), - .dma_w_valid(dma_w_valid), - .dma_w_ready(dma_w_ready), - .dma_w_bits_strb(dma_w_bits_strb), - .dma_w_bits_data(dma_w_bits_data), - .dma_w_bits_last(dma_w_bits_last), + .cpu_managed_axi4_w_valid(cpu_managed_axi4_w_valid), + .cpu_managed_axi4_w_ready(cpu_managed_axi4_w_ready), + .cpu_managed_axi4_w_bits_strb(cpu_managed_axi4_w_bits_strb), + .cpu_managed_axi4_w_bits_data(cpu_managed_axi4_w_bits_data), + .cpu_managed_axi4_w_bits_last(cpu_managed_axi4_w_bits_last), - .dma_r_valid(dma_r_valid), - .dma_r_ready(dma_r_ready), - .dma_r_bits_resp(dma_r_bits_resp), - .dma_r_bits_id(dma_r_bits_id), - .dma_r_bits_data(dma_r_bits_data), - .dma_r_bits_last(dma_r_bits_last), + .cpu_managed_axi4_r_valid(cpu_managed_axi4_r_valid), + .cpu_managed_axi4_r_ready(cpu_managed_axi4_r_ready), + .cpu_managed_axi4_r_bits_resp(cpu_managed_axi4_r_bits_resp), + .cpu_managed_axi4_r_bits_id(cpu_managed_axi4_r_bits_id), + .cpu_managed_axi4_r_bits_data(cpu_managed_axi4_r_bits_data), + .cpu_managed_axi4_r_bits_last(cpu_managed_axi4_r_bits_last), - .dma_b_valid(dma_b_valid), - .dma_b_ready(dma_b_ready), - .dma_b_bits_resp(dma_b_bits_resp), - .dma_b_bits_id(dma_b_bits_id), + .cpu_managed_axi4_b_valid(cpu_managed_axi4_b_valid), + .cpu_managed_axi4_b_ready(cpu_managed_axi4_b_ready), + .cpu_managed_axi4_b_bits_resp(cpu_managed_axi4_b_bits_resp), + .cpu_managed_axi4_b_bits_id(cpu_managed_axi4_b_bits_id), +`endif + +`ifdef FPGA_MANAGED_AXI4_PRESENT + .fpga_managed_axi4_ar_valid(fpga_managed_axi4_ar_valid), + .fpga_managed_axi4_ar_ready(fpga_managed_axi4_ar_ready), + .fpga_managed_axi4_ar_bits_addr(fpga_managed_axi4_ar_bits_addr), + .fpga_managed_axi4_ar_bits_id(fpga_managed_axi4_ar_bits_id), + .fpga_managed_axi4_ar_bits_size(fpga_managed_axi4_ar_bits_size), + .fpga_managed_axi4_ar_bits_len(fpga_managed_axi4_ar_bits_len), + + .fpga_managed_axi4_aw_valid(fpga_managed_axi4_aw_valid), + .fpga_managed_axi4_aw_ready(fpga_managed_axi4_aw_ready), + .fpga_managed_axi4_aw_bits_addr(fpga_managed_axi4_aw_bits_addr), + .fpga_managed_axi4_aw_bits_id(fpga_managed_axi4_aw_bits_id), + .fpga_managed_axi4_aw_bits_size(fpga_managed_axi4_aw_bits_size), + .fpga_managed_axi4_aw_bits_len(fpga_managed_axi4_aw_bits_len), + + .fpga_managed_axi4_w_valid(fpga_managed_axi4_w_valid), + .fpga_managed_axi4_w_ready(fpga_managed_axi4_w_ready), + .fpga_managed_axi4_w_bits_strb(fpga_managed_axi4_w_bits_strb), + .fpga_managed_axi4_w_bits_data(fpga_managed_axi4_w_bits_data), + .fpga_managed_axi4_w_bits_last(fpga_managed_axi4_w_bits_last), + + .fpga_managed_axi4_r_valid(fpga_managed_axi4_r_valid), + .fpga_managed_axi4_r_ready(fpga_managed_axi4_r_ready), + .fpga_managed_axi4_r_bits_resp(fpga_managed_axi4_r_bits_resp), + .fpga_managed_axi4_r_bits_id(fpga_managed_axi4_r_bits_id), + .fpga_managed_axi4_r_bits_data(fpga_managed_axi4_r_bits_data), + .fpga_managed_axi4_r_bits_last(fpga_managed_axi4_r_bits_last), + + .fpga_managed_axi4_b_valid(fpga_managed_axi4_b_valid), + .fpga_managed_axi4_b_ready(fpga_managed_axi4_b_ready), + .fpga_managed_axi4_b_bits_resp(fpga_managed_axi4_b_bits_resp), + .fpga_managed_axi4_b_bits_id(fpga_managed_axi4_b_bits_id), +`endif .mem_0_ar_valid(mem_0_ar_valid), .mem_0_ar_ready(mem_0_ar_ready), diff --git a/sim/src/main/scala/midasexamples/Config.scala b/sim/src/main/scala/midasexamples/Config.scala index 63817028..390047c8 100644 --- a/sim/src/main/scala/midasexamples/Config.scala +++ b/sim/src/main/scala/midasexamples/Config.scala @@ -11,19 +11,29 @@ import firesim.configs.{WithDefaultMemModel, WithWiringTransform} class NoConfig extends Config(Parameters.empty) // This is incomplete and must be mixed into a complete platform config -class DefaultF1Config extends Config(new Config((site, here, up) => { - case DesiredHostFrequency => 75 - case SynthAsserts => true - case GenerateMultiCycleRamModels => true - case EnableModelMultiThreading => true - case EnableAutoILA => true - case SynthPrints => true - case EnableAutoCounter => true -}) ++ new Config( - new firesim.configs.WithEC2F1Artefacts ++ +class BaseMidasExamplesConfig extends Config( new WithDefaultMemModel ++ new WithWiringTransform ++ - new midas.F1Config)) + new Config((site, here, up) => { + case DesiredHostFrequency => 75 + case SynthAsserts => true + case GenerateMultiCycleRamModels => true + case EnableModelMultiThreading => true + case EnableAutoILA => true + case SynthPrints => true + case EnableAutoCounter => true + }) +) +class DefaultF1Config extends Config( + new firesim.configs.WithEC2F1Artefacts ++ + new BaseMidasExamplesConfig ++ + new midas.F1Config +) + +class DefaultVitisConfig extends Config( + new BaseMidasExamplesConfig ++ + new midas.VitisConfig +) class PointerChaserConfig extends Config((site, here, up) => { case MemSize => BigInt(1 << 30) // 1 GB diff --git a/sim/src/test/scala/midasexamples/TutorialSuite.scala b/sim/src/test/scala/midasexamples/TutorialSuite.scala index c313bec3..628bed08 100644 --- a/sim/src/test/scala/midasexamples/TutorialSuite.scala +++ b/sim/src/test/scala/midasexamples/TutorialSuite.scala @@ -187,6 +187,11 @@ class ParityF1Test extends TutorialSuite("Parity") { runTest("verilator", true) runTest("vcs", true) } + +class ParityVitisTest extends TutorialSuite("Parity", platformConfigs = classOf[DefaultVitisConfig].getSimpleName) { + runTest("verilator", true) + runTest("vcs", true) +} class ShiftRegisterF1Test extends TutorialSuite("ShiftRegister") class ResetShiftRegisterF1Test extends TutorialSuite("ResetShiftRegister") class EnableShiftRegisterF1Test extends TutorialSuite("EnableShiftRegister")