Merge pull request #1191 from firesim/fpga-managed-axi4-in-metasim

Support FPGA-managed AXI4/DMA in metasimulation
This commit is contained in:
David Biancolin 2022-09-26 22:13:29 -07:00 committed by GitHub
commit d3c45e005d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 1214 additions and 567 deletions

View File

@ -9,34 +9,25 @@
# If you are using an older version of FireSim, you will need to generate your
# own images.
firesim_rocket_singlecore_sha3_no_nic_l2_llc4mb_ddr3_printf:
agfi: agfi-088e72c309ad8bb84
deploy_triplet_override: null
custom_runtime_config: null
firesim_boom_singlecore_nic_l2_llc4mb_ddr3:
agfi: agfi-0f8e76f0bae8086fb
deploy_triplet_override: null
custom_runtime_config: null
firesim_boom_singlecore_no_nic_l2_llc4mb_ddr3:
agfi: agfi-06190ac8ae0f6de18
deploy_triplet_override: null
custom_runtime_config: null
# DOCREF START: Example HWDB Entry
firesim_rocket_quadcore_nic_l2_llc4mb_ddr3:
agfi: agfi-0d3f979b71eec9b7f
firesim_boom_singlecore_nic_l2_llc4mb_ddr3:
agfi: agfi-0da1eb7805ed745b4
deploy_triplet_override: null
custom_runtime_config: null
# DOCREF END: Example HWDB Entry
firesim_boom_singlecore_no_nic_l2_llc4mb_ddr3:
agfi: agfi-0165525d8d88f7a5f
deploy_triplet_override: null
custom_runtime_config: null
firesim_rocket_quadcore_nic_l2_llc4mb_ddr3:
agfi: agfi-07ca3beae463369b3
deploy_triplet_override: null
custom_runtime_config: null
firesim_rocket_quadcore_no_nic_l2_llc4mb_ddr3:
agfi: agfi-0467dc13c58dfd13c
agfi: agfi-0018bceeef7cc7809
deploy_triplet_override: null
custom_runtime_config: null
firesim_supernode_rocket_singlecore_nic_l2_lbp:
agfi: agfi-0d4b18f24bfedf193
agfi: agfi-043ef11ebeaf519a6
deploy_triplet_override: null
custom_runtime_config: null

View File

@ -19,10 +19,10 @@ size_t StreamFromCPU::push(void *src, size_t num_bytes, size_t required_bytes) {
// implement non-multiples of 512b. The FPGA-side queue will take on the
// high-order bytes of the final beat in the transaction, and the strobe is
// not respected. So put the assertion here and discuss what to do next.
assert((num_bytes % DMA_BEAT_BYTES) == 0);
assert((num_bytes % CPU_MANAGED_AXI4_BEAT_BYTES) == 0);
auto num_beats = num_bytes / DMA_BEAT_BYTES;
auto threshold_beats = required_bytes / DMA_BEAT_BYTES;
auto num_beats = num_bytes / CPU_MANAGED_AXI4_BEAT_BYTES;
auto threshold_beats = required_bytes / CPU_MANAGED_AXI4_BEAT_BYTES;
assert(threshold_beats <= this->fpga_buffer_size());
auto space_available =
@ -33,8 +33,9 @@ size_t StreamFromCPU::push(void *src, size_t num_bytes, size_t required_bytes) {
}
auto push_beats = std::min(space_available, num_beats);
auto push_bytes = push_beats * DMA_BEAT_BYTES;
auto bytes_written = pcis_write(this->dma_addr(), (char *)src, push_bytes);
auto push_bytes = push_beats * CPU_MANAGED_AXI4_BEAT_BYTES;
auto bytes_written =
this->axi4_write(this->dma_addr(), (char *)src, push_bytes);
assert(bytes_written == push_bytes);
return bytes_written;
@ -54,19 +55,19 @@ size_t StreamToCPU::pull(void *dest, size_t num_bytes, size_t required_bytes) {
assert(num_bytes >= required_bytes);
// The legacy code is clearly broken for requests that aren't a
// multiple of 512b since DMA_SIZE is fixed to the full width of the AXI4 IF.
// The high-order bytes of the final word will be copied into the destination
// buffer (potentially an overflow, bug 1), and since reads are destructive,
// will not be visible to future pulls (bug 2). So i've put this assertion
// here for now...
// multiple of 512b since CPU_MANAGED_AXI4_SIZE is fixed to the full width of
// the AXI4 IF. The high-order bytes of the final word will be copied into the
// destination buffer (potentially an overflow, bug 1), and since reads are
// destructive, will not be visible to future pulls (bug 2). So i've put this
// assertion here for now...
// Due to the destructive nature of reads, if we wish to support reads that
// aren't a multiple of 512b, we'll need to keep a little buffer around for
// the remainder, and prepend this to the destination buffer.
assert((num_bytes % DMA_BEAT_BYTES) == 0);
assert((num_bytes % CPU_MANAGED_AXI4_BEAT_BYTES) == 0);
auto num_beats = num_bytes / DMA_BEAT_BYTES;
auto threshold_beats = required_bytes / DMA_BEAT_BYTES;
auto num_beats = num_bytes / CPU_MANAGED_AXI4_BEAT_BYTES;
auto threshold_beats = required_bytes / CPU_MANAGED_AXI4_BEAT_BYTES;
assert(threshold_beats <= this->fpga_buffer_size());
auto count = this->mmio_read(this->count_addr());
@ -76,8 +77,8 @@ size_t StreamToCPU::pull(void *dest, size_t num_bytes, size_t required_bytes) {
}
auto pull_beats = std::min(count, num_beats);
auto pull_bytes = pull_beats * DMA_BEAT_BYTES;
auto bytes_read = this->pcis_read(this->dma_addr(), (char *)dest, pull_bytes);
auto pull_bytes = pull_beats * CPU_MANAGED_AXI4_BEAT_BYTES;
auto bytes_read = this->axi4_read(this->dma_addr(), (char *)dest, pull_bytes);
assert(bytes_read == pull_bytes);
return bytes_read;
}

View File

@ -28,15 +28,15 @@ typedef struct CPUManagedStreamParameters {
/**
* @brief Base class for CPU-managed streams
*
* Streams implemented with the CPUManagedStreamingEngine have a common set of
* Streams implemented with the CPUManagedStreamEngine have a common set of
* parameters, and use MMIO to measure FPGA-queue occupancy. This base class
* captures that.
*
* Children of this class implement the host-independent control for streams.
* Generally, this consists of doing an MMIO read to FPGA-side queue capacity,
* to determine if a stream request can be served. Host implementations
* instantiate these classes with callbacks to implement MMIO and DMA/PCIS/PCIM
* for their platform.
* instantiate these classes with callbacks to implement MMIO and either CPU- or
* FPGA-managed AXI4 for their platform.
*
*/
class CPUManagedStream {
@ -61,41 +61,41 @@ public:
* @brief Implements streams sunk by the driver (sourced by the FPGA)
*
* Extends CPUManagedStream to provide a pull method, which moves data from the
* FPGA into a user-provided buffer. IO over a CPU-mastered AXI4 IF is
* implemented with pcis_read, and is provided by the host-platform.
* FPGA into a user-provided buffer. IO over a CPU-managed AXI4 IF is
* implemented with axi4_read, and is provided by the host-platform.
*
*/
class StreamToCPU : public CPUManagedStream {
public:
StreamToCPU(CPUManagedStreamParameters params,
std::function<uint32_t(size_t)> mmio_read,
std::function<size_t(size_t, char *, size_t)> pcis_read)
: CPUManagedStream(params, mmio_read), pcis_read(pcis_read){};
std::function<size_t(size_t, char *, size_t)> axi4_read)
: CPUManagedStream(params, mmio_read), axi4_read(axi4_read){};
size_t pull(void *dest, size_t num_bytes, size_t required_bytes);
private:
std::function<size_t(size_t, char *, size_t)> pcis_read;
std::function<size_t(size_t, char *, size_t)> axi4_read;
};
/**
* @brief Implements streams sourced by the driver (sunk by the FPGA)
*
* Extends CPUManagedStream to provide a push method, which moves data to the
* FPGA out of a user-provided buffer. IO over a CPU-mastered AXI4 IF is
* implemented with pcis_write, and is provided by the host-platform.
* FPGA out of a user-provided buffer. IO over a CPU-managed AXI4 IF is
* implemented with axi4_write, and is provided by the host-platform.
*/
class StreamFromCPU : public CPUManagedStream {
public:
StreamFromCPU(CPUManagedStreamParameters params,
std::function<uint32_t(size_t)> mmio_read,
std::function<size_t(size_t, char *, size_t)> pcis_write)
: CPUManagedStream(params, mmio_read), pcis_write(pcis_write){};
std::function<size_t(size_t, char *, size_t)> axi4_write)
: CPUManagedStream(params, mmio_read), axi4_write(axi4_write){};
size_t push(void *src, size_t num_bytes, size_t required_bytes);
private:
std::function<size_t(size_t, char *, size_t)> pcis_write;
std::function<size_t(size_t, char *, size_t)> axi4_write;
};
#endif // __CPU_MANAGED_STREAM_H

View File

@ -82,10 +82,11 @@ private:
ClockInfo clock_info;
const int printno;
// DMA batching parameters
// Stream batching parameters
static constexpr size_t beat_bytes = BridgeConstants::STREAM_WIDTH_BYTES;
// The number of DMA beats to pull off the FPGA on each invocation of tick()
// This will be set based on the ratio of token_size : desired_batch_beats
// The number of stream beats to pull off the FPGA on each invocation of
// tick() This will be set based on the ratio of token_size :
// desired_batch_beats
size_t batch_beats;
// This will be modified to be a multiple of the token size
const size_t desired_batch_beats = stream_depth / 2;

View File

@ -11,11 +11,59 @@ extern bool vcs_fin;
extern bool vcs_rst;
extern uint64_t main_time;
static const size_t CTRL_DATA_SIZE = CTRL_BEAT_BYTES / sizeof(uint32_t);
static const size_t DMA_DATA_SIZE = DMA_BEAT_BYTES / sizeof(uint32_t);
static const size_t DMA_STRB_SIZE =
(DMA_BEAT_BYTES / 8 + sizeof(uint32_t) - 1) / sizeof(uint32_t);
static const size_t MEM_DATA_SIZE = MEM_BEAT_BYTES / sizeof(uint32_t);
constexpr size_t CTRL_DATA_SIZE = CTRL_BEAT_BYTES / sizeof(uint32_t);
constexpr size_t CPU_MANAGED_AXI4_DATA_SIZE =
CPU_MANAGED_AXI4_BEAT_BYTES / sizeof(uint32_t);
constexpr size_t CPU_MANAGED_AXI4_STRB_SIZE =
(CPU_MANAGED_AXI4_BEAT_BYTES / 8 + sizeof(uint32_t) - 1) / sizeof(uint32_t);
constexpr size_t FPGA_MANAGED_AXI4_DATA_SIZE =
(FPGA_MANAGED_AXI4_DATA_BITS / 8) / sizeof(uint32_t);
constexpr size_t FPGA_MANAGED_AXI4_STRB_SIZE =
((FPGA_MANAGED_AXI4_DATA_BITS / 8) / 8 + sizeof(uint32_t) - 1) /
sizeof(uint32_t);
constexpr size_t MEM_DATA_SIZE = MEM_BEAT_BYTES / sizeof(uint32_t);
/**
* @brief get a uint64_t from a vc_handle that may be a scalar or vector
*
* vc_handles for single bit vs multibit values need to be accessed at runtime
* with different methods. This handles that for fields that might be 1-bit wide
*
* In practise this is just the ID field, so return uint64_t which is what mm
* expects
*
* @param h the vc_handle
* @param width the expected width of the bitvector
* @return uint64_t the bitvector encoded as uint64_t
*/
uint64_t getScalarOrVector(const vc_handle &h, int width) {
assert(width >= 1 && width <= 64);
return (width == 1) ? vc_getScalar(h) : vc_4stVectorRef(h)->d;
}
/**
* @brief Put the LSBs of @value into a vc_handle that may be a vector or
* scalar.
*
* @param h the vc_handle
* @param value a uint64_t whose LSBs contain a bitvector to drive onto the
* handle
* @param width the width of the bitvector
*/
void putScalarOrVector(const vc_handle &h, uint64_t value, int width) {
assert(width >= 1 && width <= 64);
if (width == 1) {
vc_putScalar(h, value & 1);
} else {
vec32 md[sizeof(uint64_t) / sizeof(uint32_t)];
md[0].c = 0;
md[0].d = (uint32_t)value;
md[1].c = 0;
md[1].d = (uint32_t)(value >> 32);
vc_put4stVector(h, md);
}
}
extern "C" {
void tick(vc_handle reset,
vc_handle fin,
@ -52,37 +100,69 @@ void tick(vc_handle reset,
vc_handle ctrl_b_bits_resp,
vc_handle ctrl_b_bits_id,
vc_handle dma_ar_valid,
vc_handle dma_ar_ready,
vc_handle dma_ar_bits_addr,
vc_handle dma_ar_bits_id,
vc_handle dma_ar_bits_size,
vc_handle dma_ar_bits_len,
vc_handle cpu_managed_axi4_ar_valid,
vc_handle cpu_managed_axi4_ar_ready,
vc_handle cpu_managed_axi4_ar_bits_addr,
vc_handle cpu_managed_axi4_ar_bits_id,
vc_handle cpu_managed_axi4_ar_bits_size,
vc_handle cpu_managed_axi4_ar_bits_len,
vc_handle dma_aw_valid,
vc_handle dma_aw_ready,
vc_handle dma_aw_bits_addr,
vc_handle dma_aw_bits_id,
vc_handle dma_aw_bits_size,
vc_handle dma_aw_bits_len,
vc_handle cpu_managed_axi4_aw_valid,
vc_handle cpu_managed_axi4_aw_ready,
vc_handle cpu_managed_axi4_aw_bits_addr,
vc_handle cpu_managed_axi4_aw_bits_id,
vc_handle cpu_managed_axi4_aw_bits_size,
vc_handle cpu_managed_axi4_aw_bits_len,
vc_handle dma_w_valid,
vc_handle dma_w_ready,
vc_handle dma_w_bits_strb,
vc_handle dma_w_bits_data,
vc_handle dma_w_bits_last,
vc_handle cpu_managed_axi4_w_valid,
vc_handle cpu_managed_axi4_w_ready,
vc_handle cpu_managed_axi4_w_bits_strb,
vc_handle cpu_managed_axi4_w_bits_data,
vc_handle cpu_managed_axi4_w_bits_last,
vc_handle dma_r_valid,
vc_handle dma_r_ready,
vc_handle dma_r_bits_resp,
vc_handle dma_r_bits_id,
vc_handle dma_r_bits_data,
vc_handle dma_r_bits_last,
vc_handle cpu_managed_axi4_r_valid,
vc_handle cpu_managed_axi4_r_ready,
vc_handle cpu_managed_axi4_r_bits_resp,
vc_handle cpu_managed_axi4_r_bits_id,
vc_handle cpu_managed_axi4_r_bits_data,
vc_handle cpu_managed_axi4_r_bits_last,
vc_handle dma_b_valid,
vc_handle dma_b_ready,
vc_handle dma_b_bits_resp,
vc_handle dma_b_bits_id,
vc_handle cpu_managed_axi4_b_valid,
vc_handle cpu_managed_axi4_b_ready,
vc_handle cpu_managed_axi4_b_bits_resp,
vc_handle cpu_managed_axi4_b_bits_id,
vc_handle fpga_managed_axi4_ar_valid,
vc_handle fpga_managed_axi4_ar_ready,
vc_handle fpga_managed_axi4_ar_bits_addr,
vc_handle fpga_managed_axi4_ar_bits_id,
vc_handle fpga_managed_axi4_ar_bits_size,
vc_handle fpga_managed_axi4_ar_bits_len,
vc_handle fpga_managed_axi4_aw_valid,
vc_handle fpga_managed_axi4_aw_ready,
vc_handle fpga_managed_axi4_aw_bits_addr,
vc_handle fpga_managed_axi4_aw_bits_id,
vc_handle fpga_managed_axi4_aw_bits_size,
vc_handle fpga_managed_axi4_aw_bits_len,
vc_handle fpga_managed_axi4_w_valid,
vc_handle fpga_managed_axi4_w_ready,
vc_handle fpga_managed_axi4_w_bits_strb,
vc_handle fpga_managed_axi4_w_bits_data,
vc_handle fpga_managed_axi4_w_bits_last,
vc_handle fpga_managed_axi4_r_valid,
vc_handle fpga_managed_axi4_r_ready,
vc_handle fpga_managed_axi4_r_bits_resp,
vc_handle fpga_managed_axi4_r_bits_id,
vc_handle fpga_managed_axi4_r_bits_data,
vc_handle fpga_managed_axi4_r_bits_last,
vc_handle fpga_managed_axi4_b_valid,
vc_handle fpga_managed_axi4_b_ready,
vc_handle fpga_managed_axi4_b_bits_resp,
vc_handle fpga_managed_axi4_b_bits_id,
vc_handle mem_0_ar_valid,
vc_handle mem_0_ar_ready,
@ -214,42 +294,86 @@ void tick(vc_handle reset,
try {
// The driver ucontext is initialized before spawning the VCS
// context, so these pointers should be initialized.
assert(simif_emul_t::dma != nullptr);
assert(simif_emul_t::cpu_managed_axi4 != nullptr);
assert(simif_emul_t::master != nullptr);
assert(DMA_STRB_SIZE <= 2);
static_assert(CPU_MANAGED_AXI4_STRB_SIZE <= 2);
uint32_t ctrl_r_data[CTRL_DATA_SIZE];
for (size_t i = 0; i < CTRL_DATA_SIZE; i++) {
ctrl_r_data[i] = vc_4stVectorRef(ctrl_r_bits_data)[i].d;
}
uint32_t dma_r_data[DMA_DATA_SIZE];
for (size_t i = 0; i < DMA_DATA_SIZE; i++) {
dma_r_data[i] = vc_4stVectorRef(dma_r_bits_data)[i].d;
}
simif_emul_t::master->tick(vcs_rst,
vc_getScalar(ctrl_ar_ready),
vc_getScalar(ctrl_aw_ready),
vc_getScalar(ctrl_w_ready),
vc_4stVectorRef(ctrl_r_bits_id)->d,
getScalarOrVector(ctrl_r_bits_id, CTRL_ID_BITS),
ctrl_r_data,
vc_getScalar(ctrl_r_bits_last),
vc_getScalar(ctrl_r_valid),
vc_4stVectorRef(ctrl_b_bits_id)->d,
getScalarOrVector(ctrl_b_bits_id, CTRL_ID_BITS),
vc_getScalar(ctrl_b_valid));
simif_emul_t::dma->tick(vcs_rst,
vc_getScalar(dma_ar_ready),
vc_getScalar(dma_aw_ready),
vc_getScalar(dma_w_ready),
vc_4stVectorRef(dma_r_bits_id)->d,
dma_r_data,
vc_getScalar(dma_r_bits_last),
vc_getScalar(dma_r_valid),
vc_4stVectorRef(dma_b_bits_id)->d,
vc_getScalar(dma_b_valid));
#ifdef CPU_MANAGED_AXI4_PRESENT
assert(CPU_MANAGED_AXI4_STRB_SIZE <= 2);
uint32_t cpu_managed_axi4_r_data[CPU_MANAGED_AXI4_DATA_SIZE];
for (size_t i = 0; i < CPU_MANAGED_AXI4_DATA_SIZE; i++) {
cpu_managed_axi4_r_data[i] =
vc_4stVectorRef(cpu_managed_axi4_r_bits_data)[i].d;
}
simif_emul_t::cpu_managed_axi4->tick(
vcs_rst,
vc_getScalar(cpu_managed_axi4_ar_ready),
vc_getScalar(cpu_managed_axi4_aw_ready),
vc_getScalar(cpu_managed_axi4_w_ready),
vc_4stVectorRef(cpu_managed_axi4_r_bits_id)->d,
cpu_managed_axi4_r_data,
vc_getScalar(cpu_managed_axi4_r_bits_last),
vc_getScalar(cpu_managed_axi4_r_valid),
vc_4stVectorRef(cpu_managed_axi4_b_bits_id)->d,
vc_getScalar(cpu_managed_axi4_b_valid));
#endif // CPU_MANAGED_AXI4_PRESENT
#ifdef FPGA_MANAGED_AXI4_PRESENT
uint32_t fpga_managed_axi4_w_data[FPGA_MANAGED_AXI4_DATA_SIZE];
for (size_t i = 0; i < FPGA_MANAGED_AXI4_DATA_SIZE; i++) {
fpga_managed_axi4_w_data[i] =
vc_4stVectorRef(fpga_managed_axi4_w_bits_data)[i].d;
}
uint64_t fpga_managed_axi4_w_strb;
static_assert(FPGA_MANAGED_AXI4_STRB_SIZE <= 2);
for (size_t i = 0; i < FPGA_MANAGED_AXI4_STRB_SIZE; i++) {
((uint32_t *)&fpga_managed_axi4_w_strb)[i] =
vc_4stVectorRef(fpga_managed_axi4_w_bits_strb)[i].d;
}
simif_emul_t::cpu_mem->tick(
vcs_rst,
vc_getScalar(fpga_managed_axi4_ar_valid),
vc_4stVectorRef(fpga_managed_axi4_ar_bits_addr)->d,
getScalarOrVector(fpga_managed_axi4_ar_bits_id,
FPGA_MANAGED_AXI4_ID_BITS),
vc_4stVectorRef(fpga_managed_axi4_ar_bits_size)->d,
vc_4stVectorRef(fpga_managed_axi4_ar_bits_len)->d,
vc_getScalar(fpga_managed_axi4_aw_valid),
vc_4stVectorRef(fpga_managed_axi4_aw_bits_addr)->d,
getScalarOrVector(fpga_managed_axi4_aw_bits_id,
FPGA_MANAGED_AXI4_ID_BITS),
vc_4stVectorRef(fpga_managed_axi4_aw_bits_size)->d,
vc_4stVectorRef(fpga_managed_axi4_aw_bits_len)->d,
vc_getScalar(fpga_managed_axi4_w_valid),
fpga_managed_axi4_w_strb,
fpga_managed_axi4_w_data,
vc_getScalar(fpga_managed_axi4_w_bits_last),
vc_getScalar(fpga_managed_axi4_r_ready),
vc_getScalar(fpga_managed_axi4_b_ready));
#endif // FPGA_MANAGED_AXI4_PRESENT
#define MEMORY_CHANNEL_TICK(IDX) \
uint32_t mem_##IDX##_w_data[MEM_DATA_SIZE]; \
@ -303,9 +427,6 @@ void tick(vc_handle reset,
vec32 md[CTRL_DATA_SIZE];
md[0].c = 0;
md[0].d = simif_emul_t::master->aw_id();
vc_put4stVector(ctrl_aw_bits_id, md);
md[0].c = 0;
md[0].d = simif_emul_t::master->aw_addr();
vc_put4stVector(ctrl_aw_bits_addr, md);
md[0].c = 0;
@ -315,9 +436,6 @@ void tick(vc_handle reset,
md[0].d = simif_emul_t::master->aw_len();
vc_put4stVector(ctrl_aw_bits_len, md);
md[0].c = 0;
md[0].d = simif_emul_t::master->ar_id();
vc_put4stVector(ctrl_ar_bits_id, md);
md[0].c = 0;
md[0].d = simif_emul_t::master->ar_addr();
vc_put4stVector(ctrl_ar_bits_addr, md);
md[0].c = 0;
@ -336,55 +454,100 @@ void tick(vc_handle reset,
}
vc_put4stVector(ctrl_w_bits_data, md);
vc_putScalar(dma_aw_valid, simif_emul_t::dma->aw_valid());
vc_putScalar(dma_ar_valid, simif_emul_t::dma->ar_valid());
vc_putScalar(dma_w_valid, simif_emul_t::dma->w_valid());
vc_putScalar(dma_w_bits_last, simif_emul_t::dma->w_last());
vc_putScalar(dma_r_ready, simif_emul_t::dma->r_ready());
vc_putScalar(dma_b_ready, simif_emul_t::dma->b_ready());
putScalarOrVector(
ctrl_aw_bits_id, simif_emul_t::master->aw_id(), CTRL_ID_BITS);
putScalarOrVector(
ctrl_ar_bits_id, simif_emul_t::master->ar_id(), CTRL_ID_BITS);
vec32 dd[DMA_DATA_SIZE];
dd[0].c = 0;
dd[0].d = simif_emul_t::dma->aw_id();
vc_put4stVector(dma_aw_bits_id, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::dma->aw_addr();
dd[1].c = 0;
dd[1].d = simif_emul_t::dma->aw_addr() >> 32;
vc_put4stVector(dma_aw_bits_addr, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::dma->aw_size();
vc_put4stVector(dma_aw_bits_size, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::dma->aw_len();
vc_put4stVector(dma_aw_bits_len, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::dma->ar_id();
vc_put4stVector(dma_ar_bits_id, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::dma->ar_addr();
dd[1].c = 0;
dd[1].d = simif_emul_t::dma->ar_addr() >> 32;
vc_put4stVector(dma_ar_bits_addr, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::dma->ar_size();
vc_put4stVector(dma_ar_bits_size, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::dma->ar_len();
vc_put4stVector(dma_ar_bits_len, dd);
#ifdef CPU_MANAGED_AXI4_PRESENT
vc_putScalar(cpu_managed_axi4_aw_valid,
simif_emul_t::cpu_managed_axi4->aw_valid());
vc_putScalar(cpu_managed_axi4_ar_valid,
simif_emul_t::cpu_managed_axi4->ar_valid());
vc_putScalar(cpu_managed_axi4_w_valid,
simif_emul_t::cpu_managed_axi4->w_valid());
vc_putScalar(cpu_managed_axi4_w_bits_last,
simif_emul_t::cpu_managed_axi4->w_last());
vc_putScalar(cpu_managed_axi4_r_ready,
simif_emul_t::cpu_managed_axi4->r_ready());
vc_putScalar(cpu_managed_axi4_b_ready,
simif_emul_t::cpu_managed_axi4->b_ready());
auto strb = simif_emul_t::dma->w_strb();
for (size_t i = 0; i < DMA_STRB_SIZE; i++) {
vec32 dd[CPU_MANAGED_AXI4_DATA_SIZE];
dd[0].c = 0;
dd[0].d = simif_emul_t::cpu_managed_axi4->aw_id();
vc_put4stVector(cpu_managed_axi4_aw_bits_id, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::cpu_managed_axi4->aw_addr();
dd[1].c = 0;
dd[1].d = simif_emul_t::cpu_managed_axi4->aw_addr() >> 32;
vc_put4stVector(cpu_managed_axi4_aw_bits_addr, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::cpu_managed_axi4->aw_size();
vc_put4stVector(cpu_managed_axi4_aw_bits_size, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::cpu_managed_axi4->aw_len();
vc_put4stVector(cpu_managed_axi4_aw_bits_len, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::cpu_managed_axi4->ar_id();
vc_put4stVector(cpu_managed_axi4_ar_bits_id, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::cpu_managed_axi4->ar_addr();
dd[1].c = 0;
dd[1].d = simif_emul_t::cpu_managed_axi4->ar_addr() >> 32;
vc_put4stVector(cpu_managed_axi4_ar_bits_addr, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::cpu_managed_axi4->ar_size();
vc_put4stVector(cpu_managed_axi4_ar_bits_size, dd);
dd[0].c = 0;
dd[0].d = simif_emul_t::cpu_managed_axi4->ar_len();
vc_put4stVector(cpu_managed_axi4_ar_bits_len, dd);
auto strb = simif_emul_t::cpu_managed_axi4->w_strb();
for (size_t i = 0; i < CPU_MANAGED_AXI4_STRB_SIZE; i++) {
dd[i].c = 0;
dd[i].d = ((uint32_t *)(&strb))[i];
}
vc_put4stVector(dma_w_bits_strb, dd);
vc_put4stVector(cpu_managed_axi4_w_bits_strb, dd);
for (size_t i = 0; i < DMA_DATA_SIZE; i++) {
for (size_t i = 0; i < CPU_MANAGED_AXI4_DATA_SIZE; i++) {
dd[i].c = 0;
dd[i].d = ((uint32_t *)simif_emul_t::dma->w_data())[i];
dd[i].d = ((uint32_t *)simif_emul_t::cpu_managed_axi4->w_data())[i];
}
vc_put4stVector(dma_w_bits_data, dd);
vc_put4stVector(cpu_managed_axi4_w_bits_data, dd);
#endif // CPU_MANAGED_AXI4_PRESENT
#ifdef FPGA_MANAGED_AXI4_PRESENT
vc_putScalar(fpga_managed_axi4_aw_ready, simif_emul_t::cpu_mem->aw_ready());
vc_putScalar(fpga_managed_axi4_ar_ready, simif_emul_t::cpu_mem->ar_ready());
vc_putScalar(fpga_managed_axi4_w_ready, simif_emul_t::cpu_mem->w_ready());
vc_putScalar(fpga_managed_axi4_b_valid, simif_emul_t::cpu_mem->b_valid());
vc_putScalar(fpga_managed_axi4_r_valid, simif_emul_t::cpu_mem->r_valid());
vc_putScalar(fpga_managed_axi4_r_bits_last,
simif_emul_t::cpu_mem->r_last());
vec32 fpga_managed_axi4d[FPGA_MANAGED_AXI4_DATA_SIZE];
fpga_managed_axi4d[0].c = 0;
fpga_managed_axi4d[0].d = simif_emul_t::cpu_mem->b_resp();
vc_put4stVector(fpga_managed_axi4_b_bits_resp, fpga_managed_axi4d);
fpga_managed_axi4d[0].c = 0;
fpga_managed_axi4d[0].d = simif_emul_t::cpu_mem->r_resp();
vc_put4stVector(fpga_managed_axi4_r_bits_resp, fpga_managed_axi4d);
for (size_t i = 0; i < FPGA_MANAGED_AXI4_DATA_SIZE; i++) {
fpga_managed_axi4d[i].c = 0;
fpga_managed_axi4d[i].d =
((uint32_t *)simif_emul_t::cpu_mem->r_data())[i];
}
vc_put4stVector(fpga_managed_axi4_r_bits_data, fpga_managed_axi4d);
putScalarOrVector(fpga_managed_axi4_b_bits_id,
simif_emul_t::cpu_mem->b_id(),
FPGA_MANAGED_AXI4_ID_BITS);
putScalarOrVector(fpga_managed_axi4_r_bits_id,
simif_emul_t::cpu_mem->r_id(),
FPGA_MANAGED_AXI4_ID_BITS);
#endif // FPGA_MANAGED_AXI4_PRESENT
#define MEMORY_CHANNEL_PROP(IDX) \
vc_putScalar(mem_##IDX##_aw_ready, simif_emul_t::slave[IDX]->aw_ready()); \

View File

@ -1,6 +1,7 @@
#include "simif_emul.h"
#include <cassert>
#include <cmath>
#include <memory>
#include <verilated.h>
#if VM_TRACE
#include <verilated_vcd_c.h>
@ -13,7 +14,7 @@ extern VerilatedVcdC *tfp;
#endif // VM_TRACE
void tick() {
assert(simif_emul_t::dma != nullptr);
assert(simif_emul_t::cpu_managed_axi4 != nullptr);
assert(simif_emul_t::master != nullptr);
// ASSUMPTION: All models have *no* combinational paths through I/O
@ -39,29 +40,61 @@ void tick() {
memcpy(
&top->ctrl_w_bits_data, simif_emul_t::master->w_data(), CTRL_BEAT_BYTES);
top->dma_aw_valid = simif_emul_t::dma->aw_valid();
top->dma_aw_bits_id = simif_emul_t::dma->aw_id();
top->dma_aw_bits_addr = simif_emul_t::dma->aw_addr();
top->dma_aw_bits_size = simif_emul_t::dma->aw_size();
top->dma_aw_bits_len = simif_emul_t::dma->aw_len();
#ifdef CPU_MANAGED_AXI4_PRESENT
top->cpu_managed_axi4_aw_valid = simif_emul_t::cpu_managed_axi4->aw_valid();
top->cpu_managed_axi4_aw_bits_id = simif_emul_t::cpu_managed_axi4->aw_id();
top->cpu_managed_axi4_aw_bits_addr =
simif_emul_t::cpu_managed_axi4->aw_addr();
top->cpu_managed_axi4_aw_bits_size =
simif_emul_t::cpu_managed_axi4->aw_size();
top->cpu_managed_axi4_aw_bits_len = simif_emul_t::cpu_managed_axi4->aw_len();
top->dma_ar_valid = simif_emul_t::dma->ar_valid();
top->dma_ar_bits_id = simif_emul_t::dma->ar_id();
top->dma_ar_bits_addr = simif_emul_t::dma->ar_addr();
top->dma_ar_bits_size = simif_emul_t::dma->ar_size();
top->dma_ar_bits_len = simif_emul_t::dma->ar_len();
top->cpu_managed_axi4_ar_valid = simif_emul_t::cpu_managed_axi4->ar_valid();
top->cpu_managed_axi4_ar_bits_id = simif_emul_t::cpu_managed_axi4->ar_id();
top->cpu_managed_axi4_ar_bits_addr =
simif_emul_t::cpu_managed_axi4->ar_addr();
top->cpu_managed_axi4_ar_bits_size =
simif_emul_t::cpu_managed_axi4->ar_size();
top->cpu_managed_axi4_ar_bits_len = simif_emul_t::cpu_managed_axi4->ar_len();
top->dma_w_valid = simif_emul_t::dma->w_valid();
top->dma_w_bits_strb = simif_emul_t::dma->w_strb();
top->dma_w_bits_last = simif_emul_t::dma->w_last();
top->cpu_managed_axi4_w_valid = simif_emul_t::cpu_managed_axi4->w_valid();
top->cpu_managed_axi4_w_bits_strb = simif_emul_t::cpu_managed_axi4->w_strb();
top->cpu_managed_axi4_w_bits_last = simif_emul_t::cpu_managed_axi4->w_last();
top->dma_r_ready = simif_emul_t::dma->r_ready();
top->dma_b_ready = simif_emul_t::dma->b_ready();
#if DMA_DATA_BITS > 64
memcpy(top->dma_w_bits_data, simif_emul_t::dma->w_data(), DMA_BEAT_BYTES);
top->cpu_managed_axi4_r_ready = simif_emul_t::cpu_managed_axi4->r_ready();
top->cpu_managed_axi4_b_ready = simif_emul_t::cpu_managed_axi4->b_ready();
#if CPU_MANAGED_AXI4_DATA_BITS > 64
memcpy(top->cpu_managed_axi4_w_bits_data,
simif_emul_t::cpu_managed_axi4->w_data(),
CPU_MANAGED_AXI4_BEAT_BYTES);
#else
memcpy(&top->dma_w_bits_data, simif_emul_t::dma->w_data(), DMA_BEAT_BYTES);
memcpy(&top->cpu_managed_axi4_w_bits_data,
simif_emul_t::cpu_managed_axi4->w_data(),
CPU_MANAGED_AXI4_BEAT_BYTES);
#endif
#endif // CPU_MANAGED_AXI4_PRESENT
#ifdef FPGA_MANAGED_AXI4_PRESENT
top->fpga_managed_axi4_aw_ready = simif_emul_t::cpu_mem->aw_ready();
top->fpga_managed_axi4_ar_ready = simif_emul_t::cpu_mem->ar_ready();
top->fpga_managed_axi4_w_ready = simif_emul_t::cpu_mem->w_ready();
top->fpga_managed_axi4_b_valid = simif_emul_t::cpu_mem->b_valid();
top->fpga_managed_axi4_b_bits_id = simif_emul_t::cpu_mem->b_id();
top->fpga_managed_axi4_b_bits_resp = simif_emul_t::cpu_mem->b_resp();
top->fpga_managed_axi4_r_valid = simif_emul_t::cpu_mem->r_valid();
top->fpga_managed_axi4_r_bits_id = simif_emul_t::cpu_mem->r_id();
top->fpga_managed_axi4_r_bits_resp = simif_emul_t::cpu_mem->r_resp();
top->fpga_managed_axi4_r_bits_last = simif_emul_t::cpu_mem->r_last();
#if MEM_DATA_BITS > 64
memcpy(top->fpga_managed_axi4_r_bits_data,
simif_emul_t::cpu_mem->r_data(),
FPGA_MANAGED_AXI4_DATA_BITS / 8);
#else
memcpy(&top->fpga_managed_axi4_r_bits_data,
simif_emul_t::cpu_mem->r_data(),
FPGA_MANAGED_AXI4_DATA_BITS / 8);
#endif
#endif // FPGA_MANAGED_AXI4_PRESENT
top->mem_0_aw_ready = simif_emul_t::slave[0]->aw_ready();
top->mem_0_ar_ready = simif_emul_t::slave[0]->ar_ready();
@ -171,16 +204,49 @@ void tick() {
top->ctrl_b_bits_id,
top->ctrl_b_valid);
simif_emul_t::dma->tick(top->reset,
top->dma_ar_ready,
top->dma_aw_ready,
top->dma_w_ready,
top->dma_r_bits_id,
&top->dma_r_bits_data,
top->dma_r_bits_last,
top->dma_r_valid,
top->dma_b_bits_id,
top->dma_b_valid);
#ifdef CPU_MANAGED_AXI4_PRESENT
simif_emul_t::cpu_managed_axi4->tick(top->reset,
top->cpu_managed_axi4_ar_ready,
top->cpu_managed_axi4_aw_ready,
top->cpu_managed_axi4_w_ready,
top->cpu_managed_axi4_r_bits_id,
&top->cpu_managed_axi4_r_bits_data,
top->cpu_managed_axi4_r_bits_last,
top->cpu_managed_axi4_r_valid,
top->cpu_managed_axi4_b_bits_id,
top->cpu_managed_axi4_b_valid);
#endif // CPU_MANAGED_AXI4_PRESENT
#ifdef FPGA_MANAGED_AXI4_PRESENT
simif_emul_t::cpu_mem->tick(top->reset,
top->fpga_managed_axi4_ar_valid,
top->fpga_managed_axi4_ar_bits_addr,
top->fpga_managed_axi4_ar_bits_id,
top->fpga_managed_axi4_ar_bits_size,
top->fpga_managed_axi4_ar_bits_len,
top->fpga_managed_axi4_aw_valid,
top->fpga_managed_axi4_aw_bits_addr,
top->fpga_managed_axi4_aw_bits_id,
top->fpga_managed_axi4_aw_bits_size,
top->fpga_managed_axi4_aw_bits_len,
top->fpga_managed_axi4_w_valid,
#if FPGA_MANAGED_AXI4_STRB_BITS > 64
&top->fpga_managed_axi4_w_bits_strb,
#else
top->fpga_managed_axi4_w_bits_strb,
#endif
#if FPGA_MANAGED_AXI4_DATA_BITS > 64
&top->fpga_managed_axi4_w_bits_data,
#else
top->fpga_managed_axi4_w_bits_data,
#endif
top->fpga_managed_axi4_w_bits_last,
top->fpga_managed_axi4_r_ready,
top->fpga_managed_axi4_b_ready);
#endif // FPGA_MANAGED_AXI4_PRESENT
simif_emul_t::slave[0]->tick(top->reset,
top->mem_0_ar_valid,

View File

@ -47,10 +47,21 @@ void handle_sigterm(int sig) { finish(); }
simif_emul_t::simif_emul_t() {
#ifdef FPGA_MANAGED_AXI4_PRESENT
// The final parameter, line size, is not used under mm_magic_t
cpu_mem->init((1ULL << FPGA_MANAGED_AXI4_ADDR_BITS),
FPGA_MANAGED_AXI4_DATA_BITS / 8,
512);
#endif
using namespace std::placeholders;
auto mmio_read_func = std::bind(&simif_emul_t::read, this, _1);
auto pcis_read_func = std::bind(&simif_emul_t::pcis_read, this, _1, _2, _3);
auto pcis_write_func = std::bind(&simif_emul_t::pcis_write, this, _1, _2, _3);
#ifdef CPUMANAGEDSTREAMENGINE_0_PRESENT
auto cpu_managed_axi4_read_func =
std::bind(&simif_emul_t::cpu_managed_axi4_read, this, _1, _2, _3);
auto cpu_managed_axi4_write_func =
std::bind(&simif_emul_t::cpu_managed_axi4_write, this, _1, _2, _3);
for (size_t i = 0; i < CPUMANAGEDSTREAMENGINE_0_from_cpu_stream_count; i++) {
auto params = CPUManagedStreamParameters(
@ -60,7 +71,7 @@ simif_emul_t::simif_emul_t() {
CPUMANAGEDSTREAMENGINE_0_from_cpu_buffer_sizes[i]);
from_host_streams.push_back(
StreamFromCPU(params, mmio_read_func, pcis_write_func));
StreamFromCPU(params, mmio_read_func, cpu_managed_axi4_write_func));
}
for (size_t i = 0; i < CPUMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) {
@ -71,8 +82,9 @@ simif_emul_t::simif_emul_t() {
CPUMANAGEDSTREAMENGINE_0_to_cpu_buffer_sizes[i]);
to_host_streams.push_back(
StreamToCPU(params, mmio_read_func, pcis_read_func));
StreamToCPU(params, mmio_read_func, cpu_managed_axi4_read_func));
}
#endif // CPUMANAGEDSTREAMENGINE_0_PRESENT
}
simif_emul_t::~simif_emul_t(){};
@ -202,32 +214,35 @@ size_t simif_emul_t::push(unsigned stream_idx,
src, num_bytes, threshold_bytes);
}
size_t simif_emul_t::pcis_read(size_t addr, char *data, size_t size) {
ssize_t len = (size - 1) / DMA_BEAT_BYTES;
size_t
simif_emul_t::cpu_managed_axi4_read(size_t addr, char *data, size_t size) {
ssize_t len = (size - 1) / CPU_MANAGED_AXI4_BEAT_BYTES;
while (len >= 0) {
size_t part_len = len % (MAX_LEN + 1);
dma->read_req(addr, DMA_SIZE, part_len);
wait_read(dma, data);
cpu_managed_axi4->read_req(
addr, log2(CPU_MANAGED_AXI4_BEAT_BYTES), part_len);
wait_read(cpu_managed_axi4, data);
len -= (part_len + 1);
addr += (part_len + 1) * DMA_BEAT_BYTES;
data += (part_len + 1) * DMA_BEAT_BYTES;
addr += (part_len + 1) * CPU_MANAGED_AXI4_BEAT_BYTES;
data += (part_len + 1) * CPU_MANAGED_AXI4_BEAT_BYTES;
}
return size;
}
size_t simif_emul_t::pcis_write(size_t addr, char *data, size_t size) {
ssize_t len = (size - 1) / DMA_BEAT_BYTES;
size_t remaining = size - len * DMA_BEAT_BYTES;
size_t
simif_emul_t::cpu_managed_axi4_write(size_t addr, char *data, size_t size) {
ssize_t len = (size - 1) / CPU_MANAGED_AXI4_BEAT_BYTES;
size_t remaining = size - len * CPU_MANAGED_AXI4_BEAT_BYTES;
size_t strb[len + 1];
size_t *strb_ptr = &strb[0];
for (int i = 0; i < len; i++)
strb[i] = (1LL << DMA_BEAT_BYTES) - 1;
strb[i] = (1LL << CPU_MANAGED_AXI4_BEAT_BYTES) - 1;
if (remaining == DMA_BEAT_BYTES)
if (remaining == CPU_MANAGED_AXI4_BEAT_BYTES)
strb[len] = strb[0];
else
strb[len] = (1LL << remaining) - 1;
@ -235,12 +250,13 @@ size_t simif_emul_t::pcis_write(size_t addr, char *data, size_t size) {
while (len >= 0) {
size_t part_len = len % (MAX_LEN + 1);
dma->write_req(addr, DMA_SIZE, part_len, data, strb_ptr);
wait_write(dma);
cpu_managed_axi4->write_req(
addr, log2(CPU_MANAGED_AXI4_BEAT_BYTES), part_len, data, strb_ptr);
wait_write(cpu_managed_axi4);
len -= (part_len + 1);
addr += (part_len + 1) * DMA_BEAT_BYTES;
data += (part_len + 1) * DMA_BEAT_BYTES;
addr += (part_len + 1) * CPU_MANAGED_AXI4_BEAT_BYTES;
data += (part_len + 1) * CPU_MANAGED_AXI4_BEAT_BYTES;
strb_ptr += (part_len + 1);
}

View File

@ -37,7 +37,8 @@ public:
* These have external linkage to enable VCS to easily access them.
*/
inline static mmio_t *master = new mmio_t(CTRL_BEAT_BYTES);
inline static mmio_t *dma = new mmio_t(DMA_BEAT_BYTES);
inline static mmio_t *cpu_managed_axi4 =
new mmio_t(CPU_MANAGED_AXI4_BEAT_BYTES);
/**
* @brief Host DRAM models shared across the RTL simulator and driver
* contexts.
@ -48,19 +49,27 @@ public:
* simif_emul_t::load_mems.
*/
inline static mm_t *slave[MEM_NUM_CHANNELS] = {nullptr};
/**
* @brief A model of FPGA-addressable CPU-host memory.
*
* In metasimulations, FPGA-managed AXI4 transactions read and write to this
* AXI4 memory subordinate as a proxy for writing into actual host-CPU DRAM.
* The driver-side of FPGAManagedStreams inspect circular buffers hosted here.
*/
inline static mm_t *cpu_mem = new mm_magic_t;
private:
// The maximum number of cycles the RTL simulator can advance before
// switching back to the driver process. +fuzz-host-timings sets this to a
// value > 1, introducing random delays in MMIO (read, write) and DMA (push,
// pull) requests
// value > 1, introducing random delays in axi4 tranactions that MMIO and
// bridge streams.
int maximum_host_delay = 1;
void advance_target();
void wait_read(mmio_t *mmio, void *data);
void wait_write(mmio_t *mmio);
size_t pcis_write(size_t addr, char *data, size_t size);
size_t pcis_read(size_t addr, char *data, size_t size);
size_t cpu_managed_axi4_write(size_t addr, char *data, size_t size);
size_t cpu_managed_axi4_read(size_t addr, char *data, size_t size);
// Writes directly into the host DRAM models to initialize them.
void load_mems(const char *fname);

View File

@ -30,8 +30,10 @@ simif_f1_t::simif_f1_t(int argc, char **argv) {
using namespace std::placeholders;
auto mmio_read_func = std::bind(&simif_f1_t::read, this, _1);
auto pcis_read_func = std::bind(&simif_f1_t::pcis_read, this, _1, _2, _3);
auto pcis_write_func = std::bind(&simif_f1_t::pcis_write, this, _1, _2, _3);
auto cpu_managed_axi4_read_func =
std::bind(&simif_f1_t::cpu_managed_axi4_read, this, _1, _2, _3);
auto cpu_managed_axi4_write_func =
std::bind(&simif_f1_t::cpu_managed_axi4_write, this, _1, _2, _3);
for (int i = 0; i < CPUMANAGEDSTREAMENGINE_0_from_cpu_stream_count; i++) {
auto params = CPUManagedStreamParameters(
@ -41,7 +43,7 @@ simif_f1_t::simif_f1_t(int argc, char **argv) {
CPUMANAGEDSTREAMENGINE_0_from_cpu_buffer_sizes[i]);
from_host_streams.push_back(
StreamFromCPU(params, mmio_read_func, pcis_write_func));
StreamFromCPU(params, mmio_read_func, cpu_managed_axi4_write_func));
}
for (int i = 0; i < CPUMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) {
@ -52,7 +54,7 @@ simif_f1_t::simif_f1_t(int argc, char **argv) {
CPUMANAGEDSTREAMENGINE_0_to_cpu_buffer_sizes[i]);
to_host_streams.push_back(
StreamToCPU(params, mmio_read_func, pcis_read_func));
StreamToCPU(params, mmio_read_func, cpu_managed_axi4_read_func));
}
}
@ -227,7 +229,7 @@ uint32_t simif_f1_t::read(size_t addr) {
#endif
}
size_t simif_f1_t::pcis_read(size_t addr, char *data, size_t size) {
size_t simif_f1_t::cpu_managed_axi4_read(size_t addr, char *data, size_t size) {
#ifdef SIMULATION_XSIM
assert(false); // PCIS is unsupported in FPGA-level metasimulation
#else
@ -235,7 +237,8 @@ size_t simif_f1_t::pcis_read(size_t addr, char *data, size_t size) {
#endif
}
size_t simif_f1_t::pcis_write(size_t addr, char *data, size_t size) {
size_t
simif_f1_t::cpu_managed_axi4_write(size_t addr, char *data, size_t size) {
#ifdef SIMULATION_XSIM
assert(false); // PCIS is unsupported in FPGA-level metasimulation
#else

View File

@ -41,8 +41,8 @@ private:
std::vector<StreamToCPU> to_host_streams;
std::vector<StreamFromCPU> from_host_streams;
size_t pcis_write(size_t addr, char *data, size_t size);
size_t pcis_read(size_t addr, char *data, size_t size);
size_t cpu_managed_axi4_write(size_t addr, char *data, size_t size);
size_t cpu_managed_axi4_read(size_t addr, char *data, size_t size);
#ifdef SIMULATION_XSIM
char *driver_to_xsim = "/tmp/driver_to_xsim";

View File

@ -10,6 +10,7 @@ import firrtl.stage.TransformManager.TransformDependency
import junctions.{NastiKey, NastiParameters}
import freechips.rocketchip.config.{Parameters, Config, Field}
import freechips.rocketchip.unittest.UnitTests
import freechips.rocketchip.diplomacy.{TransferSizes}
import java.io.{File}
@ -79,9 +80,9 @@ class WithoutTLMonitors extends freechips.rocketchip.subsystem.WithoutTLMonitors
class SimConfig extends Config (new Config((site, here, up) => {
case SynthAsserts => false
case SynthPrints => false
case DMANastiKey => NastiParameters(512, 64, 6)
case AXIDebugPrint => false
// TODO remove
case HasDMAChannel => site(CPUManagedAXI4Key).nonEmpty
// Remove once AXI4 port is complete
case MemNastiKey => {
NastiParameters(
@ -94,6 +95,13 @@ class SimConfig extends Config (new Config((site, here, up) => {
class F1Config extends Config(new Config((site, here, up) => {
case Platform => (p: Parameters) => new F1Shim()(p)
case HasDMAChannel => true
case StreamEngineInstantiatorKey => (e: StreamEngineParameters, p: Parameters) => new CPUManagedStreamEngine(p, e)
case CPUManagedAXI4Key => Some(CPUManagedAXI4Params(
addrBits = 64,
dataBits = 512,
idBits = 6,
))
case FPGAManagedAXI4Key => None
case CtrlNastiKey => NastiParameters(32, 25, 12)
case HostMemChannelKey => HostMemChannelParams(
size = 0x400000000L, // 16 GiB
@ -104,13 +112,33 @@ class F1Config extends Config(new Config((site, here, up) => {
class VitisConfig extends Config(new Config((site, here, up) => {
case Platform => (p: Parameters) => new VitisShim()(p)
case HasDMAChannel => false
// ID Width = 1 to avoid any potential zero-width wire issues.
case CPUManagedAXI4Key => None
case FPGAManagedAXI4Key =>
val dataBits = 512
Some(FPGAManagedAXI4Params(
// This value was chosen arbitrarily. Vitis makes it natural to
// request multiples of 1 GiB, and we may wish to expand this as after some
// performance analysis.
size = 4096 * 1024,
dataBits = dataBits,
// This was chosen to match the AXI4 recommendations and could change.
idBits = 4,
// Don't support narrow reads/writes, and cap at a page per the AXI5 spec
writeTransferSizes = TransferSizes(dataBits / 8, 4096),
readTransferSizes = TransferSizes(dataBits / 8, 4096)
))
case StreamEngineInstantiatorKey => (e: StreamEngineParameters, p: Parameters) => new FPGAManagedStreamEngine(p, e)
// Notes on width selection for the control bus
// Address: This needs further investigation. 12 may not be sufficient when using many auto counters
// ID: AXI4Lite does not use ID bits. Use one here since Nasti (which
// lacks a native AXI4LITE implementation) can't handle 0-width wires.
case CtrlNastiKey => NastiParameters(32, 12, 1)
case HostMemChannelKey => HostMemChannelParams(
size = 0x400000000L, // 16 GiB
beatBytes = 8,
idBits = 16)
// This could be as many as four on a U250, but support for the other
// channels requires adding address offsets in the shim (TODO).
case HostMemNumChannels => 1
}) ++ new SimConfig)

View File

@ -27,43 +27,46 @@ case class StreamDriverParameters(
class CPUManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) extends StreamEngine(p) {
val dmaBytes = p(DMANastiKey).dataBits / 8
val pcisNodeOpt = Some(AXI4SlaveNode(
val cpuManagedAXI4params = p(CPUManagedAXI4Key).get
require(BridgeStreamConstants.streamWidthBits == cpuManagedAXI4params.dataBits,
s"CPU-managed AXI4 IF data width must match the stream width: ${BridgeStreamConstants.streamWidthBits}.")
val beatBytes = cpuManagedAXI4params.dataBits / 8
val cpuManagedAXI4NodeOpt = Some(AXI4SlaveNode(
Seq(AXI4SlavePortParameters(
slaves = Seq(AXI4SlaveParameters(
address = Seq(AddressSet(0, (BigInt(1) << p(DMANastiKey).dataBits) - 1)),
address = Seq(AddressSet(0, (BigInt(1) << cpuManagedAXI4params.addrBits) - 1)),
resources = (new MemoryDevice).reg,
regionType = RegionType.UNCACHED, // cacheable
executable = false,
supportsWrite = TransferSizes(dmaBytes, 4096),
supportsRead = TransferSizes(dmaBytes, 4096),
supportsWrite = TransferSizes(beatBytes, 4096),
supportsRead = TransferSizes(beatBytes, 4096),
interleavedId = Some(0))), // slave does not interleave read responses
beatBytes = dmaBytes)
beatBytes = beatBytes)
))
)
//require(BridgeStreamConstants.streamWidthBits == p(DMANastiKey).dataBits,
// s"CPU-mastered AXI4 IF data width must match the stream width ${BridgeStreamConstants.streamWidthBits}".)
val pcimNodeOpt = None
val fpgaManagedAXI4NodeOpt = None
lazy val module = new WidgetImp(this) {
val io = IO(new WidgetIO)
val dma = pcisNodeOpt.get.in.head._1
val axi4 = cpuManagedAXI4NodeOpt.get.in.head._1
// FromHostCPU streams are implemented using the AW, W, B channels, which
// write into large BRAM FIFOs for each stream.
assert(!dma.aw.valid || dma.aw.bits.size === log2Ceil(dmaBytes).U)
assert(!dma.w.valid || dma.w.bits.strb === ~0.U(dmaBytes.W))
assert(!axi4.aw.valid || axi4.aw.bits.size === log2Ceil(beatBytes).U)
assert(!axi4.w.valid || axi4.w.bits.strb === ~0.U(beatBytes.W))
dma.b.bits.resp := 0.U(2.W)
dma.b.bits.id := dma.aw.bits.id
dma.b.bits.user := dma.aw.bits.user
axi4.b.bits.resp := 0.U(2.W)
axi4.b.bits.id := axi4.aw.bits.id
axi4.b.bits.user := axi4.aw.bits.user
// This will be set by the channel given the grant using last connect semantics
dma.b.valid := false.B
dma.aw.ready := false.B
dma.w.ready := false.B
axi4.b.valid := false.B
axi4.aw.ready := false.B
axi4.w.ready := false.B
// TODO: Chisel naming prefix to indicate what channel this hw belongs to.
@ -75,39 +78,39 @@ class CPUManagedStreamEngine(p: Parameters, val params: StreamEngineParameters)
addressSpaceBits: Int): StreamDriverParameters = prefix(chParams.name) {
val streamName = chParams.name
val grant = (dma.aw.bits.addr >> addressSpaceBits) === idx.U
val grant = (axi4.aw.bits.addr >> addressSpaceBits) === idx.U
val incomingQueue = Module(new BRAMQueue(chParams.fpgaBufferDepth)(UInt(BridgeStreamConstants.streamWidthBits.W)))
xdc.RAMStyleHint(incomingQueue.fq.ram, xdc.RAMStyles.ULTRA)
channel <> incomingQueue.io.deq
// check to see if pcis is ready to accept data instead of forcing writes
// check to see if axi4 is ready to accept data instead of forcing writes
val countAddr =
attach(incomingQueue.io.count, s"${chParams.name}_count", ReadOnly)
val writeHelper = DecoupledHelper(
dma.aw.valid,
dma.w.valid,
dma.b.ready,
axi4.aw.valid,
axi4.w.valid,
axi4.b.ready,
incomingQueue.io.enq.ready
)
// TODO: Get rid of this magic number.
val writeBeatCounter = RegInit(0.U(9.W))
val lastWriteBeat = writeBeatCounter === dma.aw.bits.len
when (grant && dma.w.fire) {
val lastWriteBeat = writeBeatCounter === axi4.aw.bits.len
when (grant && axi4.w.fire) {
writeBeatCounter := Mux(lastWriteBeat, 0.U, writeBeatCounter + 1.U)
}
when (grant) {
dma.w.ready := writeHelper.fire(dma.w.valid)
dma.aw.ready := writeHelper.fire(dma.aw.valid, lastWriteBeat)
dma.b.valid := writeHelper.fire(dma.b.ready, lastWriteBeat)
axi4.w.ready := writeHelper.fire(axi4.w.valid)
axi4.aw.ready := writeHelper.fire(axi4.aw.valid, lastWriteBeat)
axi4.b.valid := writeHelper.fire(axi4.b.ready, lastWriteBeat)
}
incomingQueue.io.enq.valid := grant && writeHelper.fire(incomingQueue.io.enq.ready)
incomingQueue.io.enq.bits := dma.w.bits.data
incomingQueue.io.enq.bits := axi4.w.bits.data
StreamDriverParameters(
chParams.name,
@ -117,13 +120,13 @@ class CPUManagedStreamEngine(p: Parameters, val params: StreamEngineParameters)
)
}
assert(!dma.ar.valid || dma.ar.bits.size === log2Ceil(dmaBytes).U)
assert(!axi4.ar.valid || axi4.ar.bits.size === log2Ceil(beatBytes).U)
dma.r.bits.resp := 0.U(2.W)
dma.r.bits.id := dma.ar.bits.id
dma.r.bits.user := dma.ar.bits.user
dma.r.valid := false.B
dma.ar.ready := false.B
axi4.r.bits.resp := 0.U(2.W)
axi4.r.bits.id := axi4.ar.bits.id
axi4.r.bits.user := axi4.ar.bits.user
axi4.r.valid := false.B
axi4.ar.ready := false.B
// This demultiplexes the AW, W, and B channels onto the decoupled ports representing each stream.
def elaborateToHostCPUStream(
@ -132,36 +135,36 @@ class CPUManagedStreamEngine(p: Parameters, val params: StreamEngineParameters)
idx: Int,
addressSpaceBits: Int): StreamDriverParameters = prefix(chParams.name) {
val grant = (dma.ar.bits.addr >> addressSpaceBits) === idx.U
val grant = (axi4.ar.bits.addr >> addressSpaceBits) === idx.U
val outgoingQueue = Module(new BRAMQueue(chParams.fpgaBufferDepth)(UInt(BridgeStreamConstants.streamWidthBits.W)))
xdc.RAMStyleHint(outgoingQueue.fq.ram, xdc.RAMStyles.ULTRA)
outgoingQueue.io.enq <> channel
// check to see if pcis has valid output instead of waiting for timeouts
// check to see if axi4 has valid output instead of waiting for timeouts
val countAddr =
attach(outgoingQueue.io.count, s"${chParams.name}_count", ReadOnly)
val readHelper = DecoupledHelper(
dma.ar.valid,
dma.r.ready,
axi4.ar.valid,
axi4.r.ready,
outgoingQueue.io.deq.valid
)
val readBeatCounter = RegInit(0.U(9.W))
val lastReadBeat = readBeatCounter === dma.ar.bits.len
when (dma.r.fire) {
val lastReadBeat = readBeatCounter === axi4.ar.bits.len
when (axi4.r.fire) {
readBeatCounter := Mux(lastReadBeat, 0.U, readBeatCounter + 1.U)
}
outgoingQueue.io.deq.ready := grant && readHelper.fire(outgoingQueue.io.deq.valid)
when (grant) {
dma.r.valid := readHelper.fire(dma.r.ready)
dma.r.bits.data := outgoingQueue.io.deq.bits
dma.r.bits.last := lastReadBeat
dma.ar.ready := readHelper.fire(dma.ar.valid, lastReadBeat)
axi4.r.valid := readHelper.fire(axi4.r.ready)
axi4.r.bits.data := outgoingQueue.io.deq.bits
axi4.r.bits.last := lastReadBeat
axi4.ar.ready := readHelper.fire(axi4.ar.valid, lastReadBeat)
}
StreamDriverParameters(
chParams.name,
@ -182,14 +185,10 @@ class CPUManagedStreamEngine(p: Parameters, val params: StreamEngineParameters)
// burst type (which is semantically consistent with draining or filling a queue).
//
// However, since large DMA transactions initiated by the driver are
// fractured into multiple, smaller AXI4 transactions on the PCIS
// interface*, it is simplest to maintain the illusion that each stream is
// granted an address range at least as large as the largest DMA access.
//
// * On EC2 F1, and likely all XDMA-based systems, requests larger than a
// 4K page are fractured into 4K or smaller transactions.
// treats them as "FIXED" type bursts
def streamASBits = log2Ceil(dmaBytes * streamParameters.map(_.fpgaBufferDepth).max)
// fractured into multiple, smaller AXI4 transactions (<= 4K in size), it
// is simplest to maintain the illusion that each stream is granted an
// address range at least as large as the largest DMA access.
def streamASBits = log2Ceil(beatBytes * streamParameters.map(_.fpgaBufferDepth).max)
for (((port, params), idx) <- streamPorts.zip(streamParameters).zipWithIndex) yield {
elaborator(port, params, idx, streamASBits)

View File

@ -14,8 +14,8 @@ import midas.widgets._
* This is a stub to foreshadow the other implementation
*/
class FPGAManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) extends StreamEngine(p) {
val pcisNodeOpt = None
val pcimNodeOpt = Some(AXI4IdentityNode())
val cpuManagedAXI4NodeOpt = None
val fpgaManagedAXI4NodeOpt = Some(midas.widgets.AXI4TieOff()(p))
lazy val module = new WidgetImp(this) {
val io = IO(new WidgetIO)

View File

@ -18,12 +18,24 @@ import scala.collection.immutable.ListMap
import scala.collection.mutable
/**
* The following case objects define the widths of the three AXI4 bus types presented
* to a simulator.
* The following [[Field]]s capture the parameters of the four AXI4 bus types
* presented to a simulator (in [[FPGATop]]). A [[PlatformShim]] is free to
* adapt these widths, apply address offsets, etc..., but the values set here
* define what is used in metasimulation, since it treats
* [[FPGATop]] as the root of the module hierarchy.
*/
// The AXI4 key for the DMA bus
case object DMANastiKey extends Field[NastiParameters]
/** CPU-managed AXI4, aka "pcis" on EC2 F1. Used by the CPU to do DMA into fabric-controlled memories.
* This could include in-fabric RAMs/FIFOs (for bridge streams) or (in the future) FPGA-attached DRAM channels.
*/
case object CPUManagedAXI4Key extends Field[Option[CPUManagedAXI4Params]]
/** FPGA-managed AXI4, aka "pcim" on F1. Used by the fabric to do DMA into
* the host-CPU's memory. Used to implement bridge streams on platforms that lack a CPU-managed AXI4 interface.
* Set this to None if this interface is not present on the host.
*/
case object FPGAManagedAXI4Key extends Field[Option[FPGAManagedAXI4Params]]
// The AXI4 widths for a single host-DRAM channel
case object HostMemChannelKey extends Field[HostMemChannelParams]
// The number of host-DRAM channels -> all channels must have the same AXI4 widths
@ -55,10 +67,6 @@ case class AXI4IdSpaceConstraint(idBits: Int = 4, maxFlight: Int = 8)
// Legacy: the aggregate memory-space seen by masters wanting DRAM. Derived from HostMemChannelKey
case object MemNastiKey extends Field[NastiParameters]
class FPGATopIO(implicit val p: Parameters) extends WidgetIO {
val dma = Flipped(new NastiIO()(p alterPartial ({ case NastiKey => p(DMANastiKey) })))
}
/** Specifies the size and width of external memory ports */
case class HostMemChannelParams(
size: BigInt,
@ -71,6 +79,49 @@ case class HostMemChannelParams(
idBits = idBits)
}
/**
* Specifies the AXI4 interface for FPGA-driven DMA
*
* @param size The size, in bytes, of the addressable region on the host CPU.
* The addressable region is assumed to span [0, size). Host-specific offsets
* should be handled by the FPGAShim.
* @param dataBits The width of the interface in bits.
* @param idBits The number of ID bits supported by the interface.
* @param writeTransferSizes Supported write transfer sizes in bytes
* @param readTransferSizes Supported read transfer sizes in bytes
* @param interleavedId Set to indicate DMA responses may be interleaved.
*/
case class FPGAManagedAXI4Params(
size: BigInt,
dataBits: Int,
idBits: Int,
writeTransferSizes: TransferSizes,
readTransferSizes: TransferSizes,
interleavedId: Option[Int] = Some(0),
) {
require(interleavedId == Some(0), "IdDeinterleaver not currently instantiated in FPGATop")
require((isPow2(size)) && (size % 4096 == 0),
"The size of the FPGA-managed DMA regions must be a power of 2, and larger than a page.")
def axi4BundleParams = AXI4BundleParameters(
addrBits = log2Ceil(size),
dataBits = dataBits,
idBits = idBits,
)
}
case class CPUManagedAXI4Params(
addrBits: Int,
dataBits: Int,
idBits: Int,
maxFlight: Option[Int] = None,
) {
def axi4BundleParams = AXI4BundleParameters(
addrBits = addrBits,
dataBits = dataBits,
idBits = idBits,
)
}
// Platform agnostic wrapper of the simulation models for FPGA
class FPGATop(implicit p: Parameters) extends LazyModule with HasWidgets {
@ -225,32 +276,54 @@ class FPGATop(implicit p: Parameters) extends LazyModule with HasWidgets {
val toCPUStreamParams = bridgesWithToHostCPUStreams.map { _.streamSourceParams }
val fromCPUStreamParams = bridgesWithFromHostCPUStreams.map { _.streamSinkParams }
val pcisAXI4BundleParams = AXI4BundleParameters(
addrBits = p(DMANastiKey).addrBits,
dataBits = p(DMANastiKey).dataBits,
idBits = p(DMANastiKey).idBits) // Dubious...
val pcisNode = AXI4MasterNode(
Seq(AXI4MasterPortParameters(
masters = Seq(AXI4MasterParameters(
name = "cpu-mastered-axi4",
id = IdRange(0, 1 << p(DMANastiKey).idBits),
aligned = false,
maxFlight = None, // None = infinite, else is a per-ID cap
))
)
)
)
val streamingEngine = addWidget(p(StreamEngineInstantiatorKey)(
StreamEngineParameters(toCPUStreamParams.toSeq, fromCPUStreamParams.toSeq), p)
)
streamingEngine.pcisNodeOpt.foreach {
_ := AXI4Buffer() := pcisNode
require(streamingEngine.fpgaManagedAXI4NodeOpt.isEmpty || p(FPGAManagedAXI4Key).nonEmpty,
"Selected StreamEngine uses the FPGA-managed AXI4 interface but it is not available on this platform."
)
require(streamingEngine.cpuManagedAXI4NodeOpt.isEmpty || p(CPUManagedAXI4Key).nonEmpty,
"Selected StreamEngine uses the CPU-managed AXI4 interface, but it is not available on this platform."
)
val cpuManagedAXI4NodeTuple = p(CPUManagedAXI4Key).map { params =>
val node = AXI4MasterNode(Seq(AXI4MasterPortParameters(
masters = Seq(AXI4MasterParameters(
name = "cpu-managed-axi4",
id = IdRange(0, 1 << params.idBits),
aligned = false,
maxFlight = params.maxFlight, // None = infinite, else is a per-ID cap
))
)
))
streamingEngine.cpuManagedAXI4NodeOpt.foreach {
_ := AXI4Buffer() := node
}
(node, params)
}
override def genHeader(sb: StringBuilder) {
val fpgaManagedAXI4NodeTuple = p(FPGAManagedAXI4Key).map { params =>
val node = AXI4SlaveNode(
Seq(AXI4SlavePortParameters(
slaves = Seq(AXI4SlaveParameters(
address = Seq(AddressSet(0, params.size - 1)),
resources = (new MemoryDevice).reg,
regionType = RegionType.UNCACHED, // cacheable
executable = false,
supportsWrite = params.writeTransferSizes,
supportsRead = params.readTransferSizes,
interleavedId = params.interleavedId)),
beatBytes = params.dataBits / 8)
))
streamingEngine.fpgaManagedAXI4NodeOpt.foreach {
node := AXI4IdIndexer(params.idBits) := AXI4Buffer() := _
}
(node, params)
}
override def genHeader(sb: StringBuilder): Unit = {
super.genHeader(sb)
targetMemoryRegions.foreach(_.serializeToHeader(sb))
}
@ -267,11 +340,24 @@ class FPGATopImp(outer: FPGATop)(implicit p: Parameters) extends LazyModuleImp(o
val ctrl = IO(Flipped(WidgetMMIO()))
val mem = IO(Vec(p(HostMemNumChannels), AXI4Bundle(p(HostMemChannelKey).axi4BundleParams)))
val dma = IO(Flipped(AXI4Bundle(outer.pcisAXI4BundleParams)))
val cpu_managed_axi4 = outer.cpuManagedAXI4NodeTuple.map { case (node, params) =>
val port = IO(Flipped(AXI4Bundle(params.axi4BundleParams)))
node.out.head._1 <> port
port
}
val fpga_managed_axi4 = outer.fpgaManagedAXI4NodeTuple.map { case (node, params) =>
val port = IO(AXI4Bundle(params.axi4BundleParams))
port <> node.in.head._1
port
}
// Hack: Don't touch the ports so that we can use FPGATop as top-level in ML simulation
dontTouch(ctrl)
dontTouch(mem)
dontTouch(dma)
cpu_managed_axi4.foreach(dontTouch(_))
fpga_managed_axi4.foreach(dontTouch(_))
(mem zip outer.memAXI4Nodes.map(_.in.head)).foreach { case (io, (bundle, _)) =>
require(bundle.params.idBits <= p(HostMemChannelKey).idBits,
s"""| Required memory channel ID bits exceeds that present on host.
@ -280,8 +366,6 @@ class FPGATopImp(outer: FPGATop)(implicit p: Parameters) extends LazyModuleImp(o
io <> bundle
}
outer.pcisNode.out.head._1 <> dma
val sim = Module(new SimWrapper(p(SimWrapperKey)))
val simIo = sim.channelPorts
@ -338,12 +422,18 @@ class FPGATopImp(outer: FPGATop)(implicit p: Parameters) extends LazyModuleImp(o
"MEM_LEN_BITS" -> AXI4Parameters.lenBits,
"MEM_RESP_BITS" -> AXI4Parameters.respBits,
// Address width of the aggregated host-DRAM space
"DMA_ID_BITS" -> dma.params.idBits,
"DMA_ADDR_BITS" -> dma.params.addrBits,
"DMA_DATA_BITS" -> dma.params.dataBits,
"DMA_STRB_BITS" -> dma.params.dataBits / 8,
"DMA_BEAT_BYTES" -> p(DMANastiKey).dataBits / 8,
"DMA_SIZE" -> log2Ceil(p(DMANastiKey).dataBits / 8),
) ++ Seq.tabulate[(String, Long)](p(HostMemNumChannels))(idx => s"MEM_HAS_CHANNEL${idx}" -> 1)
"CPU_MANAGED_AXI4_ID_BITS" -> cpu_managed_axi4.map(_.params.idBits) .getOrElse(0).toLong,
"CPU_MANAGED_AXI4_ADDR_BITS" -> cpu_managed_axi4.map(_.params.addrBits) .getOrElse(0).toLong,
"CPU_MANAGED_AXI4_DATA_BITS" -> cpu_managed_axi4.map(_.params.dataBits) .getOrElse(0).toLong,
"CPU_MANAGED_AXI4_STRB_BITS" -> cpu_managed_axi4.map(_.params.dataBits / 8).getOrElse(0).toLong,
"CPU_MANAGED_AXI4_BEAT_BYTES" -> cpu_managed_axi4.map(_.params.dataBits / 8).getOrElse(0).toLong,
// Widths of the AXI4 FPGA to CPU channel
"FPGA_MANAGED_AXI4_ID_BITS" -> fpga_managed_axi4.map(_.params.idBits) .getOrElse(0).toLong,
"FPGA_MANAGED_AXI4_ADDR_BITS" -> fpga_managed_axi4.map(_.params.addrBits).getOrElse(0).toLong,
"FPGA_MANAGED_AXI4_DATA_BITS" -> fpga_managed_axi4.map(_.params.dataBits).getOrElse(0).toLong,
) ++:
cpu_managed_axi4.map { _ => "CPU_MANAGED_AXI4_PRESENT" -> 1.toLong } ++:
fpga_managed_axi4.map { _ => "FPGA_MANAGED_AXI4_PRESENT" -> 1.toLong } ++:
Seq.tabulate[(String, Long)](p(HostMemNumChannels))(idx => s"MEM_HAS_CHANNEL${idx}" -> 1)
def genHeader(sb: StringBuilder)(implicit p: Parameters) = outer.genHeader(sb)
}

View File

@ -16,8 +16,7 @@ import midas.widgets._
* that host. e.g. F1 uses CPU-driven XDMA and so uses an engine that only
* uses the AXI4M interface.
*/
case object StreamEngineInstantiatorKey extends Field[(StreamEngineParameters, Parameters) => StreamEngine](
(e: StreamEngineParameters, p: Parameters) => new CPUManagedStreamEngine(p, e))
case object StreamEngineInstantiatorKey extends Field[(StreamEngineParameters, Parameters) => StreamEngine]
/**
@ -52,18 +51,20 @@ case class StreamEngineParameters(
* the transport using an AXI4 slave and / or AXI4 master port, which is
* presented by the host platform.
*
* Implementations that require an AXI4 slave set pcisNodeOpt = Some(<node graph>)
* Implementations that require an AXI4 master set pcimNodeOpt = Some(<node graph>)
* Implementations that require an AXI4 subordinate set cpuManagedAXI4NodeOpt = Some(<node graph>)
* Implementations that require an AXI4 manager set fpgaManagedAXI4NodeOpt = Some(<node graph>)
*
*/
abstract class StreamEngine(
p: Parameters,
) extends Widget()(p) {
def params: StreamEngineParameters
def pcisNodeOpt: Option[AXI4InwardNode]
def pcimNodeOpt: Option[AXI4OutwardNode]
def cpuManagedAXI4NodeOpt: Option[AXI4InwardNode]
def fpgaManagedAXI4NodeOpt: Option[AXI4OutwardNode]
lazy val StreamEngineParameters(sourceParams, sinkParams) = params
def hasStreams: Boolean = sourceParams.nonEmpty || sinkParams.nonEmpty
// Connections to bridges that drive streams
val streamsToHostCPU = InModuleBody {

View File

@ -8,38 +8,35 @@ import freechips.rocketchip.config.{Parameters, Field}
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import freechips.rocketchip.util.HeterogeneousBag
import midas.core.{DMANastiKey}
import midas.core.{CPUManagedAXI4Key}
import midas.widgets.{AXI4Printf, CtrlNastiKey}
import midas.stage.GoldenGateOutputFileAnnotation
import midas.targetutils.xdc._
case object AXIDebugPrint extends Field[Boolean]
class F1ShimIO(implicit val p: Parameters) extends Bundle {
val master = Flipped(new NastiIO()(p alterPartial ({ case NastiKey => p(CtrlNastiKey) })))
val dma = Flipped(new NastiIO()(p alterPartial ({ case NastiKey => p(DMANastiKey) })))
}
class F1Shim(implicit p: Parameters) extends PlatformShim {
lazy val module = new LazyModuleImp(this) {
val io = IO(new F1ShimIO)
val io_master = IO(Flipped(new NastiIO()(p alterPartial { case NastiKey => p(CtrlNastiKey) })))
val io_dma = IO(Flipped(new NastiIO()(p alterPartial {
case NastiKey => NastiParameters(p(CPUManagedAXI4Key).get.axi4BundleParams) })))
val io_slave = IO(HeterogeneousBag(top.module.mem.map(x => x.cloneType)))
if (p(AXIDebugPrint)) {
AXI4Printf(io.master, "master")
AXI4Printf(io.dma, "dma")
AXI4Printf(io_master, "master")
AXI4Printf(io_dma, "dma")
io_slave.zipWithIndex foreach { case (io, idx) => AXI4Printf(io, s"slave_${idx}") }
}
top.module.ctrl <> io.master
AXI4NastiAssigner.toAXI4(top.module.dma, io.dma)
top.module.ctrl <> io_master
AXI4NastiAssigner.toAXI4(top.module.cpu_managed_axi4.get, io_dma)
io_slave.zip(top.module.mem).foreach({ case (io, bundle) => io <> bundle })
// Biancolin: It would be good to put in writing why ID is being reassigned...
val (wCounterValue, wCounterWrap) = Counter(io.master.aw.fire, 1 << p(CtrlNastiKey).idBits)
val (wCounterValue, wCounterWrap) = Counter(io_master.aw.fire, 1 << p(CtrlNastiKey).idBits)
top.module.ctrl.aw.bits.id := wCounterValue
val (rCounterValue, rCounterWrap) = Counter(io.master.ar.fire, 1 << p(CtrlNastiKey).idBits)
val (rCounterValue, rCounterWrap) = Counter(io_master.ar.fire, 1 << p(CtrlNastiKey).idBits)
top.module.ctrl.ar.bits.id := rCounterValue
// Capture FPGA-toolflow related verilog defines

View File

@ -9,7 +9,7 @@ import freechips.rocketchip.config.{Field, Parameters}
import freechips.rocketchip.diplomacy.{LazyModule, LazyRawModuleImp}
import freechips.rocketchip.util.HeterogeneousBag
import midas.core.{DMANastiKey, HostMemChannelKey}
import midas.core.HostMemChannelKey
import midas.widgets.{AXI4Printf, CtrlNastiKey}
import midas.stage.GoldenGateOutputFileAnnotation
import midas.platform.xilinx._
@ -48,13 +48,6 @@ class VitisShim(implicit p: Parameters) extends PlatformShim {
top.module.reset := hostSyncReset
top.module.clock := hostClock
// tie-off dma/io_slave interfaces
top.module.dma.ar.valid := false.B
top.module.dma.aw.valid := false.B
top.module.dma.w.valid := false.B
top.module.dma.r.ready := false.B
top.module.dma.b.ready := false.B
top.module.mem.foreach({ case bundle =>
bundle.ar.ready := false.B
bundle.aw.ready := false.B
@ -92,6 +85,16 @@ class VitisShim(implicit p: Parameters) extends PlatformShim {
host_mem_cdc.io.m_axi_aclk := ap_clk
host_mem_cdc.io.m_axi_aresetn := ap_rst_n
top.module.fpga_managed_axi4.map { axi4 =>
axi4.ar.ready := false.B
axi4.aw.ready := false.B
axi4.w.ready := false.B
axi4.r <> DontCare
axi4.b <> DontCare
axi4.r.valid := false.B
axi4.b.valid := false.B
}
GoldenGateOutputFileAnnotation.annotateFromChisel(
s"// Vitis Shim requires no dynamically generated macros \n",
fileSuffix = ".defines.vh",

View File

@ -11,8 +11,6 @@ import chisel3.experimental.{DataMirror, Direction}
import freechips.rocketchip.config.{Parameters}
import freechips.rocketchip.util.{DecoupledHelper}
import midas.core.{DMANastiKey}
class PrintRecord(portType: firrtl.ir.BundleType, val formatString: String) extends Record {
def regenLeafType(tpe: firrtl.ir.Type): Data = tpe match {
case firrtl.ir.UIntType(width: firrtl.ir.IntWidth) => UInt(width.width.toInt.W)
@ -188,7 +186,7 @@ class PrintBridgeModule(key: PrintBridgeParameters)(implicit p: Parameters)
val argumentOffsets = printPort.printRecords.map(_._2.argumentOffsets.map(UInt32(_)))
val formatStrings = printPort.printRecords.map(_._2.formatString).map(CStrLit)
override def genHeader(base: BigInt, sb: StringBuilder) {
override def genHeader(base: BigInt, sb: StringBuilder): Unit = {
import CppGenerationUtils._
val headerWidgetName = getWName.toUpperCase
super.genHeader(base, sb)

View File

@ -35,37 +35,70 @@ extern "A" void tick
input reg [1:0] ctrl_b_resp,
input reg [`CTRL_ID_BITS-1:0] ctrl_b_id,
output reg dma_ar_valid,
input reg dma_ar_ready,
output reg [`DMA_ADDR_BITS-1:0] dma_ar_addr,
output reg [`DMA_ID_BITS-1:0] dma_ar_id,
output reg [2:0] dma_ar_size,
output reg [7:0] dma_ar_len,
output reg cpu_managed_axi4_ar_valid,
input reg cpu_managed_axi4_ar_ready,
output reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_ar_addr,
output reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_ar_id,
output reg [2:0] cpu_managed_axi4_ar_size,
output reg [7:0] cpu_managed_axi4_ar_len,
output reg dma_aw_valid,
input reg dma_aw_ready,
output reg [`DMA_ADDR_BITS-1:0] dma_aw_addr,
output reg [`DMA_ID_BITS-1:0] dma_aw_id,
output reg [2:0] dma_aw_size,
output reg [7:0] dma_aw_len,
output reg cpu_managed_axi4_aw_valid,
input reg cpu_managed_axi4_aw_ready,
output reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_aw_addr,
output reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_aw_id,
output reg [2:0] cpu_managed_axi4_aw_size,
output reg [7:0] cpu_managed_axi4_aw_len,
output reg dma_w_valid,
input reg dma_w_ready,
output reg [`DMA_STRB_BITS-1:0] dma_w_strb,
output reg [`DMA_DATA_BITS-1:0] dma_w_data,
output reg dma_w_last,
output reg cpu_managed_axi4_w_valid,
input reg cpu_managed_axi4_w_ready,
output reg [`CPU_MANAGED_AXI4_STRB_BITS-1:0] cpu_managed_axi4_w_strb,
output reg [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_w_data,
output reg cpu_managed_axi4_w_last,
input reg dma_r_valid,
output reg dma_r_ready,
input reg [1:0] dma_r_resp,
input reg [`DMA_ID_BITS-1:0] dma_r_id,
input reg [`DMA_DATA_BITS-1:0] dma_r_data,
input reg dma_r_last,
input reg cpu_managed_axi4_r_valid,
output reg cpu_managed_axi4_r_ready,
input reg [1:0] cpu_managed_axi4_r_resp,
input reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_r_id,
input reg [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_r_data,
input reg cpu_managed_axi4_r_last,
input reg cpu_managed_axi4_b_valid,
output reg cpu_managed_axi4_b_ready,
input reg [1:0] cpu_managed_axi4_b_resp,
input reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_b_id,
input reg fpga_managed_axi4_ar_valid,
output reg fpga_managed_axi4_ar_ready,
input reg [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_ar_addr,
input reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_ar_id,
input reg [2:0] fpga_managed_axi4_ar_size,
input reg [7:0] fpga_managed_axi4_ar_len,
input reg fpga_managed_axi4_aw_valid,
output reg fpga_managed_axi4_aw_ready,
input reg [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_aw_addr,
input reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_aw_id,
input reg [2:0] fpga_managed_axi4_aw_size,
input reg [7:0] fpga_managed_axi4_aw_len,
input reg fpga_managed_axi4_w_valid,
output reg fpga_managed_axi4_w_ready,
input reg [(`FPGA_MANAGED_AXI4_DATA_BITS/8)-1:0] fpga_managed_axi4_w_strb,
input reg [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_w_data,
input reg fpga_managed_axi4_w_last,
output reg fpga_managed_axi4_r_valid,
input reg fpga_managed_axi4_r_ready,
output reg [1:0] fpga_managed_axi4_r_resp,
output reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_r_id,
output reg [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_r_data,
output reg fpga_managed_axi4_r_last,
output reg fpga_managed_axi4_b_valid,
input reg fpga_managed_axi4_b_ready,
output reg [1:0] fpga_managed_axi4_b_resp,
output reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_b_id,
input reg dma_b_valid,
output reg dma_b_ready,
input reg [1:0] dma_b_resp,
input reg [`DMA_ID_BITS-1:0] dma_b_id,
input reg mem_0_ar_valid,
output reg mem_0_ar_ready,
@ -262,37 +295,69 @@ module emul;
wire [1:0] ctrl_b_resp;
wire [`CTRL_ID_BITS-1:0] ctrl_b_id;
reg dma_ar_valid;
wire dma_ar_ready;
reg [`DMA_ADDR_BITS-1:0] dma_ar_addr;
reg [`DMA_ID_BITS-1:0] dma_ar_id;
reg [2:0] dma_ar_size;
reg [7:0] dma_ar_len;
reg cpu_managed_axi4_ar_valid;
wire cpu_managed_axi4_ar_ready;
reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_ar_addr;
reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_ar_id;
reg [2:0] cpu_managed_axi4_ar_size;
reg [7:0] cpu_managed_axi4_ar_len;
reg dma_aw_valid;
wire dma_aw_ready;
reg [`DMA_ADDR_BITS-1:0] dma_aw_addr;
reg [`DMA_ID_BITS-1:0] dma_aw_id;
reg [2:0] dma_aw_size;
reg [7:0] dma_aw_len;
reg cpu_managed_axi4_aw_valid;
wire cpu_managed_axi4_aw_ready;
reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_aw_addr;
reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_aw_id;
reg [2:0] cpu_managed_axi4_aw_size;
reg [7:0] cpu_managed_axi4_aw_len;
reg dma_w_valid;
wire dma_w_ready;
reg [`DMA_STRB_BITS-1:0] dma_w_strb;
reg [`DMA_DATA_BITS-1:0] dma_w_data;
reg dma_w_last;
reg cpu_managed_axi4_w_valid;
wire cpu_managed_axi4_w_ready;
reg [`CPU_MANAGED_AXI4_STRB_BITS-1:0] cpu_managed_axi4_w_strb;
reg [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_w_data;
reg cpu_managed_axi4_w_last;
wire dma_r_valid;
reg dma_r_ready;
wire [1:0] dma_r_resp;
wire [`DMA_ID_BITS-1:0] dma_r_id;
wire [`DMA_DATA_BITS-1:0] dma_r_data;
wire dma_r_last;
wire cpu_managed_axi4_r_valid;
reg cpu_managed_axi4_r_ready;
wire [1:0] cpu_managed_axi4_r_resp;
wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_r_id;
wire [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_r_data;
wire cpu_managed_axi4_r_last;
wire dma_b_valid;
reg dma_b_ready;
wire [1:0] dma_b_resp;
wire [`DMA_ID_BITS-1:0] dma_b_id;
wire cpu_managed_axi4_b_valid;
reg cpu_managed_axi4_b_ready;
wire [1:0] cpu_managed_axi4_b_resp;
wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_b_id;
wire fpga_managed_axi4_ar_valid;
reg fpga_managed_axi4_ar_ready;
wire [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_ar_addr;
wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_ar_id;
wire [2:0] fpga_managed_axi4_ar_size;
wire [7:0] fpga_managed_axi4_ar_len;
wire fpga_managed_axi4_aw_valid;
reg fpga_managed_axi4_aw_ready;
wire [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_aw_addr;
wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_aw_id;
wire [2:0] fpga_managed_axi4_aw_size;
wire [7:0] fpga_managed_axi4_aw_len;
wire fpga_managed_axi4_w_valid;
reg fpga_managed_axi4_w_ready;
wire [(`FPGA_MANAGED_AXI4_DATA_BITS/8)-1:0] fpga_managed_axi4_w_strb;
wire [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_w_data;
wire fpga_managed_axi4_w_last;
reg fpga_managed_axi4_r_valid;
wire fpga_managed_axi4_r_ready;
reg [1:0] fpga_managed_axi4_r_resp;
reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_r_id;
reg [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_r_data;
reg fpga_managed_axi4_r_last;
reg fpga_managed_axi4_b_valid;
wire fpga_managed_axi4_b_ready;
reg [1:0] fpga_managed_axi4_b_resp;
reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_b_id;
wire mem_0_ar_valid;
reg mem_0_ar_ready;
@ -456,37 +521,69 @@ module emul;
wire [1:0] ctrl_b_resp_delay;
wire [`CTRL_ID_BITS-1:0] ctrl_b_id_delay;
wire dma_ar_valid_delay;
wire dma_ar_ready_delay;
wire [`DMA_ADDR_BITS-1:0] dma_ar_addr_delay;
wire [`DMA_ID_BITS-1:0] dma_ar_id_delay;
wire [2:0] dma_ar_size_delay;
wire [7:0] dma_ar_len_delay;
wire cpu_managed_axi4_ar_valid_delay;
wire cpu_managed_axi4_ar_ready_delay;
wire [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_ar_addr_delay;
wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_ar_id_delay;
wire [2:0] cpu_managed_axi4_ar_size_delay;
wire [7:0] cpu_managed_axi4_ar_len_delay;
wire dma_aw_valid_delay;
wire dma_aw_ready_delay;
wire [`DMA_ADDR_BITS-1:0] dma_aw_addr_delay;
wire [`DMA_ID_BITS-1:0] dma_aw_id_delay;
wire [2:0] dma_aw_size_delay;
wire [7:0] dma_aw_len_delay;
wire cpu_managed_axi4_aw_valid_delay;
wire cpu_managed_axi4_aw_ready_delay;
wire [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_aw_addr_delay;
wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_aw_id_delay;
wire [2:0] cpu_managed_axi4_aw_size_delay;
wire [7:0] cpu_managed_axi4_aw_len_delay;
wire dma_w_valid_delay;
wire dma_w_ready_delay;
wire [`DMA_STRB_BITS-1:0] dma_w_strb_delay;
wire [`DMA_DATA_BITS-1:0] dma_w_data_delay;
wire dma_w_last_delay;
wire cpu_managed_axi4_w_valid_delay;
wire cpu_managed_axi4_w_ready_delay;
wire [`CPU_MANAGED_AXI4_STRB_BITS-1:0] cpu_managed_axi4_w_strb_delay;
wire [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_w_data_delay;
wire cpu_managed_axi4_w_last_delay;
wire dma_r_valid_delay;
wire dma_r_ready_delay;
wire [1:0] dma_r_resp_delay;
wire [`DMA_ID_BITS-1:0] dma_r_id_delay;
wire [`DMA_DATA_BITS-1:0] dma_r_data_delay;
wire dma_r_last_delay;
wire cpu_managed_axi4_r_valid_delay;
wire cpu_managed_axi4_r_ready_delay;
wire [1:0] cpu_managed_axi4_r_resp_delay;
wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_r_id_delay;
wire [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_r_data_delay;
wire cpu_managed_axi4_r_last_delay;
wire dma_b_valid_delay;
wire dma_b_ready_delay;
wire [1:0] dma_b_resp_delay;
wire [`DMA_ID_BITS-1:0] dma_b_id_delay;
wire cpu_managed_axi4_b_valid_delay;
wire cpu_managed_axi4_b_ready_delay;
wire [1:0] cpu_managed_axi4_b_resp_delay;
wire [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_b_id_delay;
wire fpga_managed_axi4_ar_valid_delay;
wire fpga_managed_axi4_ar_ready_delay;
wire [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_ar_addr_delay;
wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_ar_id_delay;
wire [2:0] fpga_managed_axi4_ar_size_delay;
wire [7:0] fpga_managed_axi4_ar_len_delay;
wire fpga_managed_axi4_aw_valid_delay;
wire fpga_managed_axi4_aw_ready_delay;
wire [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_aw_addr_delay;
wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_aw_id_delay;
wire [2:0] fpga_managed_axi4_aw_size_delay;
wire [7:0] fpga_managed_axi4_aw_len_delay;
wire fpga_managed_axi4_w_valid_delay;
wire fpga_managed_axi4_w_ready_delay;
wire [(`FPGA_MANAGED_AXI4_DATA_BITS/8)-1:0] fpga_managed_axi4_w_strb_delay;
wire [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_w_data_delay;
wire fpga_managed_axi4_w_last_delay;
wire fpga_managed_axi4_r_valid_delay;
wire fpga_managed_axi4_r_ready_delay;
wire [1:0] fpga_managed_axi4_r_resp_delay;
wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_r_id_delay;
wire [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_r_data_delay;
wire fpga_managed_axi4_r_last_delay;
wire fpga_managed_axi4_b_valid_delay;
wire fpga_managed_axi4_b_ready_delay;
wire [1:0] fpga_managed_axi4_b_resp_delay;
wire [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_b_id_delay;
wire mem_0_ar_valid_delay;
wire mem_0_ar_ready_delay;
@ -648,37 +745,69 @@ module emul;
assign #0.1 ctrl_b_resp = ctrl_b_resp_delay;
assign #0.1 ctrl_b_id = ctrl_b_id_delay;
assign #0.1 dma_ar_valid_delay = dma_ar_valid;
assign #0.1 dma_ar_ready = dma_ar_ready_delay;
assign #0.1 dma_ar_addr_delay = dma_ar_addr;
assign #0.1 dma_ar_id_delay = dma_ar_id;
assign #0.1 dma_ar_size_delay = dma_ar_size;
assign #0.1 dma_ar_len_delay = dma_ar_len;
assign #0.1 cpu_managed_axi4_ar_valid_delay = cpu_managed_axi4_ar_valid;
assign #0.1 cpu_managed_axi4_ar_ready = cpu_managed_axi4_ar_ready_delay;
assign #0.1 cpu_managed_axi4_ar_addr_delay = cpu_managed_axi4_ar_addr;
assign #0.1 cpu_managed_axi4_ar_id_delay = cpu_managed_axi4_ar_id;
assign #0.1 cpu_managed_axi4_ar_size_delay = cpu_managed_axi4_ar_size;
assign #0.1 cpu_managed_axi4_ar_len_delay = cpu_managed_axi4_ar_len;
assign #0.1 dma_aw_valid_delay = dma_aw_valid;
assign #0.1 dma_aw_ready = dma_aw_ready_delay;
assign #0.1 dma_aw_addr_delay = dma_aw_addr;
assign #0.1 dma_aw_id_delay = dma_aw_id;
assign #0.1 dma_aw_size_delay = dma_aw_size;
assign #0.1 dma_aw_len_delay = dma_aw_len;
assign #0.1 cpu_managed_axi4_aw_valid_delay = cpu_managed_axi4_aw_valid;
assign #0.1 cpu_managed_axi4_aw_ready = cpu_managed_axi4_aw_ready_delay;
assign #0.1 cpu_managed_axi4_aw_addr_delay = cpu_managed_axi4_aw_addr;
assign #0.1 cpu_managed_axi4_aw_id_delay = cpu_managed_axi4_aw_id;
assign #0.1 cpu_managed_axi4_aw_size_delay = cpu_managed_axi4_aw_size;
assign #0.1 cpu_managed_axi4_aw_len_delay = cpu_managed_axi4_aw_len;
assign #0.1 dma_w_valid_delay = dma_w_valid;
assign #0.1 dma_w_ready = dma_w_ready_delay;
assign #0.1 dma_w_strb_delay = dma_w_strb;
assign #0.1 dma_w_data_delay = dma_w_data;
assign #0.1 dma_w_last_delay = dma_w_last;
assign #0.1 cpu_managed_axi4_w_valid_delay = cpu_managed_axi4_w_valid;
assign #0.1 cpu_managed_axi4_w_ready = cpu_managed_axi4_w_ready_delay;
assign #0.1 cpu_managed_axi4_w_strb_delay = cpu_managed_axi4_w_strb;
assign #0.1 cpu_managed_axi4_w_data_delay = cpu_managed_axi4_w_data;
assign #0.1 cpu_managed_axi4_w_last_delay = cpu_managed_axi4_w_last;
assign #0.1 dma_r_valid = dma_r_valid_delay;
assign #0.1 dma_r_ready_delay = dma_r_ready;
assign #0.1 dma_r_resp = dma_r_resp_delay;
assign #0.1 dma_r_id = dma_r_id_delay;
assign #0.1 dma_r_data = dma_r_data_delay;
assign #0.1 dma_r_last = dma_r_last_delay;
assign #0.1 cpu_managed_axi4_r_valid = cpu_managed_axi4_r_valid_delay;
assign #0.1 cpu_managed_axi4_r_ready_delay = cpu_managed_axi4_r_ready;
assign #0.1 cpu_managed_axi4_r_resp = cpu_managed_axi4_r_resp_delay;
assign #0.1 cpu_managed_axi4_r_id = cpu_managed_axi4_r_id_delay;
assign #0.1 cpu_managed_axi4_r_data = cpu_managed_axi4_r_data_delay;
assign #0.1 cpu_managed_axi4_r_last = cpu_managed_axi4_r_last_delay;
assign #0.1 dma_b_valid = dma_b_valid_delay;
assign #0.1 dma_b_ready_delay = dma_b_ready;
assign #0.1 dma_b_resp = dma_b_resp_delay;
assign #0.1 dma_b_id = dma_b_id_delay;
assign #0.1 cpu_managed_axi4_b_valid = cpu_managed_axi4_b_valid_delay;
assign #0.1 cpu_managed_axi4_b_ready_delay = cpu_managed_axi4_b_ready;
assign #0.1 cpu_managed_axi4_b_resp = cpu_managed_axi4_b_resp_delay;
assign #0.1 cpu_managed_axi4_b_id = cpu_managed_axi4_b_id_delay;
assign #0.1 fpga_managed_axi4_ar_valid = fpga_managed_axi4_ar_valid_delay;
assign #0.1 fpga_managed_axi4_ar_ready_delay = fpga_managed_axi4_ar_ready;
assign #0.1 fpga_managed_axi4_ar_addr = fpga_managed_axi4_ar_addr_delay;
assign #0.1 fpga_managed_axi4_ar_id = fpga_managed_axi4_ar_id_delay;
assign #0.1 fpga_managed_axi4_ar_size = fpga_managed_axi4_ar_size_delay;
assign #0.1 fpga_managed_axi4_ar_len = fpga_managed_axi4_ar_len_delay;
assign #0.1 fpga_managed_axi4_aw_valid = fpga_managed_axi4_aw_valid_delay;
assign #0.1 fpga_managed_axi4_aw_ready_delay = fpga_managed_axi4_aw_ready;
assign #0.1 fpga_managed_axi4_aw_addr = fpga_managed_axi4_aw_addr_delay;
assign #0.1 fpga_managed_axi4_aw_id = fpga_managed_axi4_aw_id_delay;
assign #0.1 fpga_managed_axi4_aw_size = fpga_managed_axi4_aw_size_delay;
assign #0.1 fpga_managed_axi4_aw_len = fpga_managed_axi4_aw_len_delay;
assign #0.1 fpga_managed_axi4_w_valid = fpga_managed_axi4_w_valid_delay;
assign #0.1 fpga_managed_axi4_w_ready_delay = fpga_managed_axi4_w_ready;
assign #0.1 fpga_managed_axi4_w_strb = fpga_managed_axi4_w_strb_delay;
assign #0.1 fpga_managed_axi4_w_data = fpga_managed_axi4_w_data_delay;
assign #0.1 fpga_managed_axi4_w_last = fpga_managed_axi4_w_last_delay;
assign #0.1 fpga_managed_axi4_r_valid_delay = fpga_managed_axi4_r_valid;
assign #0.1 fpga_managed_axi4_r_ready = fpga_managed_axi4_r_ready_delay;
assign #0.1 fpga_managed_axi4_r_resp_delay = fpga_managed_axi4_r_resp;
assign #0.1 fpga_managed_axi4_r_id_delay = fpga_managed_axi4_r_id;
assign #0.1 fpga_managed_axi4_r_data_delay = fpga_managed_axi4_r_data;
assign #0.1 fpga_managed_axi4_r_last_delay = fpga_managed_axi4_r_last;
assign #0.1 fpga_managed_axi4_b_valid_delay = fpga_managed_axi4_b_valid;
assign #0.1 fpga_managed_axi4_b_ready = fpga_managed_axi4_b_ready_delay;
assign #0.1 fpga_managed_axi4_b_resp_delay = fpga_managed_axi4_b_resp;
assign #0.1 fpga_managed_axi4_b_id_delay = fpga_managed_axi4_b_id;
assign #0.1 mem_0_ar_valid = mem_0_ar_valid_delay;
assign #0.1 mem_0_ar_ready_delay = mem_0_ar_ready;
@ -843,38 +972,72 @@ module emul;
.ctrl_b_ready(ctrl_b_ready_delay),
.ctrl_b_bits_resp(ctrl_b_resp_delay),
.ctrl_b_bits_id(ctrl_b_id_delay),
`ifdef CPU_MANAGED_AXI4_PRESENT
.cpu_managed_axi4_ar_valid(cpu_managed_axi4_ar_valid_delay),
.cpu_managed_axi4_ar_ready(cpu_managed_axi4_ar_ready_delay),
.cpu_managed_axi4_ar_bits_addr(cpu_managed_axi4_ar_addr_delay),
.cpu_managed_axi4_ar_bits_id(cpu_managed_axi4_ar_id_delay),
.cpu_managed_axi4_ar_bits_size(cpu_managed_axi4_ar_size_delay),
.cpu_managed_axi4_ar_bits_len(cpu_managed_axi4_ar_len_delay),
.dma_ar_valid(dma_ar_valid_delay),
.dma_ar_ready(dma_ar_ready_delay),
.dma_ar_bits_addr(dma_ar_addr_delay),
.dma_ar_bits_id(dma_ar_id_delay),
.dma_ar_bits_size(dma_ar_size_delay),
.dma_ar_bits_len(dma_ar_len_delay),
.cpu_managed_axi4_aw_valid(cpu_managed_axi4_aw_valid_delay),
.cpu_managed_axi4_aw_ready(cpu_managed_axi4_aw_ready_delay),
.cpu_managed_axi4_aw_bits_addr(cpu_managed_axi4_aw_addr_delay),
.cpu_managed_axi4_aw_bits_id(cpu_managed_axi4_aw_id_delay),
.cpu_managed_axi4_aw_bits_size(cpu_managed_axi4_aw_size_delay),
.cpu_managed_axi4_aw_bits_len(cpu_managed_axi4_aw_len_delay),
.dma_aw_valid(dma_aw_valid_delay),
.dma_aw_ready(dma_aw_ready_delay),
.dma_aw_bits_addr(dma_aw_addr_delay),
.dma_aw_bits_id(dma_aw_id_delay),
.dma_aw_bits_size(dma_aw_size_delay),
.dma_aw_bits_len(dma_aw_len_delay),
.cpu_managed_axi4_w_valid(cpu_managed_axi4_w_valid_delay),
.cpu_managed_axi4_w_ready(cpu_managed_axi4_w_ready_delay),
.cpu_managed_axi4_w_bits_strb(cpu_managed_axi4_w_strb_delay),
.cpu_managed_axi4_w_bits_data(cpu_managed_axi4_w_data_delay),
.cpu_managed_axi4_w_bits_last(cpu_managed_axi4_w_last_delay),
.dma_w_valid(dma_w_valid_delay),
.dma_w_ready(dma_w_ready_delay),
.dma_w_bits_strb(dma_w_strb_delay),
.dma_w_bits_data(dma_w_data_delay),
.dma_w_bits_last(dma_w_last_delay),
.cpu_managed_axi4_r_valid(cpu_managed_axi4_r_valid_delay),
.cpu_managed_axi4_r_ready(cpu_managed_axi4_r_ready_delay),
.cpu_managed_axi4_r_bits_resp(cpu_managed_axi4_r_resp_delay),
.cpu_managed_axi4_r_bits_id(cpu_managed_axi4_r_id_delay),
.cpu_managed_axi4_r_bits_data(cpu_managed_axi4_r_data_delay),
.cpu_managed_axi4_r_bits_last(cpu_managed_axi4_r_last_delay),
.dma_r_valid(dma_r_valid_delay),
.dma_r_ready(dma_r_ready_delay),
.dma_r_bits_resp(dma_r_resp_delay),
.dma_r_bits_id(dma_r_id_delay),
.dma_r_bits_data(dma_r_data_delay),
.dma_r_bits_last(dma_r_last_delay),
.cpu_managed_axi4_b_valid(cpu_managed_axi4_b_valid_delay),
.cpu_managed_axi4_b_ready(cpu_managed_axi4_b_ready_delay),
.cpu_managed_axi4_b_bits_resp(cpu_managed_axi4_b_resp_delay),
.cpu_managed_axi4_b_bits_id(cpu_managed_axi4_b_id_delay),
`endif
`ifdef FPGA_MANAGED_AXI4_PRESENT
.fpga_managed_axi4_ar_valid(fpga_managed_axi4_ar_valid_delay),
.fpga_managed_axi4_ar_ready(fpga_managed_axi4_ar_ready_delay),
.fpga_managed_axi4_ar_bits_addr(fpga_managed_axi4_ar_addr_delay),
.fpga_managed_axi4_ar_bits_id(fpga_managed_axi4_ar_id_delay),
.fpga_managed_axi4_ar_bits_size(fpga_managed_axi4_ar_size_delay),
.fpga_managed_axi4_ar_bits_len(fpga_managed_axi4_ar_len_delay),
.dma_b_valid(dma_b_valid_delay),
.dma_b_ready(dma_b_ready_delay),
.dma_b_bits_resp(dma_b_resp_delay),
.dma_b_bits_id(dma_b_id_delay),
.fpga_managed_axi4_aw_valid(fpga_managed_axi4_aw_valid_delay),
.fpga_managed_axi4_aw_ready(fpga_managed_axi4_aw_ready_delay),
.fpga_managed_axi4_aw_bits_addr(fpga_managed_axi4_aw_addr_delay),
.fpga_managed_axi4_aw_bits_id(fpga_managed_axi4_aw_id_delay),
.fpga_managed_axi4_aw_bits_size(fpga_managed_axi4_aw_size_delay),
.fpga_managed_axi4_aw_bits_len(fpga_managed_axi4_aw_len_delay),
.fpga_managed_axi4_w_valid(fpga_managed_axi4_w_valid_delay),
.fpga_managed_axi4_w_ready(fpga_managed_axi4_w_ready_delay),
.fpga_managed_axi4_w_bits_strb(fpga_managed_axi4_w_strb_delay),
.fpga_managed_axi4_w_bits_data(fpga_managed_axi4_w_data_delay),
.fpga_managed_axi4_w_bits_last(fpga_managed_axi4_w_last_delay),
.fpga_managed_axi4_r_valid(fpga_managed_axi4_r_valid_delay),
.fpga_managed_axi4_r_ready(fpga_managed_axi4_r_ready_delay),
.fpga_managed_axi4_r_bits_resp(fpga_managed_axi4_r_resp_delay),
.fpga_managed_axi4_r_bits_id(fpga_managed_axi4_r_id_delay),
.fpga_managed_axi4_r_bits_data(fpga_managed_axi4_r_data_delay),
.fpga_managed_axi4_r_bits_last(fpga_managed_axi4_r_last_delay),
.fpga_managed_axi4_b_valid(fpga_managed_axi4_b_valid_delay),
.fpga_managed_axi4_b_ready(fpga_managed_axi4_b_ready_delay),
.fpga_managed_axi4_b_bits_resp(fpga_managed_axi4_b_resp_delay),
.fpga_managed_axi4_b_bits_id(fpga_managed_axi4_b_id_delay),
`endif
.mem_0_ar_valid(mem_0_ar_valid_delay),
.mem_0_ar_ready(mem_0_ar_ready_delay),
@ -907,7 +1070,6 @@ module emul;
.mem_0_b_ready(mem_0_b_ready_delay),
.mem_0_b_bits_resp(mem_0_b_resp_delay),
.mem_0_b_bits_id(mem_0_b_id_delay),
`ifdef MEM_HAS_CHANNEL1
.mem_1_ar_valid(mem_1_ar_valid_delay),
.mem_1_ar_ready(mem_1_ar_ready_delay),
@ -1049,37 +1211,69 @@ module emul;
ctrl_b_resp,
ctrl_b_id,
dma_ar_valid,
dma_ar_ready,
dma_ar_addr,
dma_ar_id,
dma_ar_size,
dma_ar_len,
cpu_managed_axi4_ar_valid,
cpu_managed_axi4_ar_ready,
cpu_managed_axi4_ar_addr,
cpu_managed_axi4_ar_id,
cpu_managed_axi4_ar_size,
cpu_managed_axi4_ar_len,
dma_aw_valid,
dma_aw_ready,
dma_aw_addr,
dma_aw_id,
dma_aw_size,
dma_aw_len,
cpu_managed_axi4_aw_valid,
cpu_managed_axi4_aw_ready,
cpu_managed_axi4_aw_addr,
cpu_managed_axi4_aw_id,
cpu_managed_axi4_aw_size,
cpu_managed_axi4_aw_len,
dma_w_valid,
dma_w_ready,
dma_w_strb,
dma_w_data,
dma_w_last,
cpu_managed_axi4_w_valid,
cpu_managed_axi4_w_ready,
cpu_managed_axi4_w_strb,
cpu_managed_axi4_w_data,
cpu_managed_axi4_w_last,
dma_r_valid,
dma_r_ready,
dma_r_resp,
dma_r_id,
dma_r_data,
dma_r_last,
cpu_managed_axi4_r_valid,
cpu_managed_axi4_r_ready,
cpu_managed_axi4_r_resp,
cpu_managed_axi4_r_id,
cpu_managed_axi4_r_data,
cpu_managed_axi4_r_last,
dma_b_valid,
dma_b_ready,
dma_b_resp,
dma_b_id,
cpu_managed_axi4_b_valid,
cpu_managed_axi4_b_ready,
cpu_managed_axi4_b_resp,
cpu_managed_axi4_b_id,
fpga_managed_axi4_ar_valid,
fpga_managed_axi4_ar_ready,
fpga_managed_axi4_ar_addr,
fpga_managed_axi4_ar_id,
fpga_managed_axi4_ar_size,
fpga_managed_axi4_ar_len,
fpga_managed_axi4_aw_valid,
fpga_managed_axi4_aw_ready,
fpga_managed_axi4_aw_addr,
fpga_managed_axi4_aw_id,
fpga_managed_axi4_aw_size,
fpga_managed_axi4_aw_len,
fpga_managed_axi4_w_valid,
fpga_managed_axi4_w_ready,
fpga_managed_axi4_w_strb,
fpga_managed_axi4_w_data,
fpga_managed_axi4_w_last,
fpga_managed_axi4_r_valid,
fpga_managed_axi4_r_ready,
fpga_managed_axi4_r_resp,
fpga_managed_axi4_r_id,
fpga_managed_axi4_r_data,
fpga_managed_axi4_r_last,
fpga_managed_axi4_b_valid,
fpga_managed_axi4_b_ready,
fpga_managed_axi4_b_resp,
fpga_managed_axi4_b_id,
mem_0_ar_valid,
mem_0_ar_ready,

View File

@ -35,37 +35,73 @@ module verilator_top (
output reg [1:0] ctrl_b_bits_resp,
output reg [`CTRL_ID_BITS-1:0] ctrl_b_bits_id,
input reg dma_ar_valid,
output reg dma_ar_ready,
input reg [`DMA_ADDR_BITS-1:0] dma_ar_bits_addr,
input reg [`DMA_ID_BITS-1:0] dma_ar_bits_id,
input reg [2:0] dma_ar_bits_size,
input reg [7:0] dma_ar_bits_len,
`ifdef CPU_MANAGED_AXI4_PRESENT
input reg cpu_managed_axi4_ar_valid,
output reg cpu_managed_axi4_ar_ready,
input reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_ar_bits_addr,
input reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_ar_bits_id,
input reg [2:0] cpu_managed_axi4_ar_bits_size,
input reg [7:0] cpu_managed_axi4_ar_bits_len,
input reg dma_aw_valid,
output reg dma_aw_ready,
input reg [`DMA_ADDR_BITS-1:0] dma_aw_bits_addr,
input reg [`DMA_ID_BITS-1:0] dma_aw_bits_id,
input reg [2:0] dma_aw_bits_size,
input reg [7:0] dma_aw_bits_len,
input reg cpu_managed_axi4_aw_valid,
output reg cpu_managed_axi4_aw_ready,
input reg [`CPU_MANAGED_AXI4_ADDR_BITS-1:0] cpu_managed_axi4_aw_bits_addr,
input reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_aw_bits_id,
input reg [2:0] cpu_managed_axi4_aw_bits_size,
input reg [7:0] cpu_managed_axi4_aw_bits_len,
input reg dma_w_valid,
output reg dma_w_ready,
input reg [`DMA_STRB_BITS-1:0] dma_w_bits_strb,
input reg [`DMA_DATA_BITS-1:0] dma_w_bits_data,
input reg dma_w_bits_last,
input reg cpu_managed_axi4_w_valid,
output reg cpu_managed_axi4_w_ready,
input reg [`CPU_MANAGED_AXI4_STRB_BITS-1:0] cpu_managed_axi4_w_bits_strb,
input reg [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_w_bits_data,
input reg cpu_managed_axi4_w_bits_last,
output reg dma_r_valid,
input reg dma_r_ready,
output reg [1:0] dma_r_bits_resp,
output reg [`DMA_ID_BITS-1:0] dma_r_bits_id,
output reg [`DMA_DATA_BITS-1:0] dma_r_bits_data,
output reg dma_r_bits_last,
output reg cpu_managed_axi4_r_valid,
input reg cpu_managed_axi4_r_ready,
output reg [1:0] cpu_managed_axi4_r_bits_resp,
output reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_r_bits_id,
output reg [`CPU_MANAGED_AXI4_DATA_BITS-1:0] cpu_managed_axi4_r_bits_data,
output reg cpu_managed_axi4_r_bits_last,
output reg dma_b_valid,
input reg dma_b_ready,
output reg [1:0] dma_b_bits_resp,
output reg [`DMA_ID_BITS-1:0] dma_b_bits_id,
output reg cpu_managed_axi4_b_valid,
input reg cpu_managed_axi4_b_ready,
output reg [1:0] cpu_managed_axi4_b_bits_resp,
output reg [`CPU_MANAGED_AXI4_ID_BITS-1:0] cpu_managed_axi4_b_bits_id,
`endif // CPU_MANAGED_AXI4_PRESENT
`ifdef FPGA_MANAGED_AXI4_PRESENT
output reg fpga_managed_axi4_ar_valid,
input reg fpga_managed_axi4_ar_ready,
output reg [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_ar_bits_addr,
output reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_ar_bits_id,
output reg [2:0] fpga_managed_axi4_ar_bits_size,
output reg [7:0] fpga_managed_axi4_ar_bits_len,
output reg fpga_managed_axi4_aw_valid,
input reg fpga_managed_axi4_aw_ready,
output reg [`FPGA_MANAGED_AXI4_ADDR_BITS-1:0] fpga_managed_axi4_aw_bits_addr,
output reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_aw_bits_id,
output reg [2:0] fpga_managed_axi4_aw_bits_size,
output reg [7:0] fpga_managed_axi4_aw_bits_len,
output reg fpga_managed_axi4_w_valid,
input reg fpga_managed_axi4_w_ready,
output reg [(`FPGA_MANAGED_AXI4_DATA_BITS/8)-1:0] fpga_managed_axi4_w_bits_strb,
output reg [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_w_bits_data,
output reg fpga_managed_axi4_w_bits_last,
input reg fpga_managed_axi4_r_valid,
output reg fpga_managed_axi4_r_ready,
input reg [1:0] fpga_managed_axi4_r_bits_resp,
input reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_r_bits_id,
input reg [`FPGA_MANAGED_AXI4_DATA_BITS-1:0] fpga_managed_axi4_r_bits_data,
input reg fpga_managed_axi4_r_bits_last,
input reg fpga_managed_axi4_b_valid,
output reg fpga_managed_axi4_b_ready,
input reg [1:0] fpga_managed_axi4_b_bits_resp,
input reg [`FPGA_MANAGED_AXI4_ID_BITS-1:0] fpga_managed_axi4_b_bits_id,
`endif
output reg mem_0_ar_valid,
input reg mem_0_ar_ready,
@ -237,37 +273,73 @@ module verilator_top (
.ctrl_b_bits_resp(ctrl_b_bits_resp),
.ctrl_b_bits_id(ctrl_b_bits_id),
.dma_ar_valid(dma_ar_valid),
.dma_ar_ready(dma_ar_ready),
.dma_ar_bits_addr(dma_ar_bits_addr),
.dma_ar_bits_id(dma_ar_bits_id),
.dma_ar_bits_size(dma_ar_bits_size),
.dma_ar_bits_len(dma_ar_bits_len),
`ifdef CPU_MANAGED_AXI4_PRESENT
.cpu_managed_axi4_ar_valid(cpu_managed_axi4_ar_valid),
.cpu_managed_axi4_ar_ready(cpu_managed_axi4_ar_ready),
.cpu_managed_axi4_ar_bits_addr(cpu_managed_axi4_ar_bits_addr),
.cpu_managed_axi4_ar_bits_id(cpu_managed_axi4_ar_bits_id),
.cpu_managed_axi4_ar_bits_size(cpu_managed_axi4_ar_bits_size),
.cpu_managed_axi4_ar_bits_len(cpu_managed_axi4_ar_bits_len),
.dma_aw_valid(dma_aw_valid),
.dma_aw_ready(dma_aw_ready),
.dma_aw_bits_addr(dma_aw_bits_addr),
.dma_aw_bits_id(dma_aw_bits_id),
.dma_aw_bits_size(dma_aw_bits_size),
.dma_aw_bits_len(dma_aw_bits_len),
.cpu_managed_axi4_aw_valid(cpu_managed_axi4_aw_valid),
.cpu_managed_axi4_aw_ready(cpu_managed_axi4_aw_ready),
.cpu_managed_axi4_aw_bits_addr(cpu_managed_axi4_aw_bits_addr),
.cpu_managed_axi4_aw_bits_id(cpu_managed_axi4_aw_bits_id),
.cpu_managed_axi4_aw_bits_size(cpu_managed_axi4_aw_bits_size),
.cpu_managed_axi4_aw_bits_len(cpu_managed_axi4_aw_bits_len),
.dma_w_valid(dma_w_valid),
.dma_w_ready(dma_w_ready),
.dma_w_bits_strb(dma_w_bits_strb),
.dma_w_bits_data(dma_w_bits_data),
.dma_w_bits_last(dma_w_bits_last),
.cpu_managed_axi4_w_valid(cpu_managed_axi4_w_valid),
.cpu_managed_axi4_w_ready(cpu_managed_axi4_w_ready),
.cpu_managed_axi4_w_bits_strb(cpu_managed_axi4_w_bits_strb),
.cpu_managed_axi4_w_bits_data(cpu_managed_axi4_w_bits_data),
.cpu_managed_axi4_w_bits_last(cpu_managed_axi4_w_bits_last),
.dma_r_valid(dma_r_valid),
.dma_r_ready(dma_r_ready),
.dma_r_bits_resp(dma_r_bits_resp),
.dma_r_bits_id(dma_r_bits_id),
.dma_r_bits_data(dma_r_bits_data),
.dma_r_bits_last(dma_r_bits_last),
.cpu_managed_axi4_r_valid(cpu_managed_axi4_r_valid),
.cpu_managed_axi4_r_ready(cpu_managed_axi4_r_ready),
.cpu_managed_axi4_r_bits_resp(cpu_managed_axi4_r_bits_resp),
.cpu_managed_axi4_r_bits_id(cpu_managed_axi4_r_bits_id),
.cpu_managed_axi4_r_bits_data(cpu_managed_axi4_r_bits_data),
.cpu_managed_axi4_r_bits_last(cpu_managed_axi4_r_bits_last),
.dma_b_valid(dma_b_valid),
.dma_b_ready(dma_b_ready),
.dma_b_bits_resp(dma_b_bits_resp),
.dma_b_bits_id(dma_b_bits_id),
.cpu_managed_axi4_b_valid(cpu_managed_axi4_b_valid),
.cpu_managed_axi4_b_ready(cpu_managed_axi4_b_ready),
.cpu_managed_axi4_b_bits_resp(cpu_managed_axi4_b_bits_resp),
.cpu_managed_axi4_b_bits_id(cpu_managed_axi4_b_bits_id),
`endif
`ifdef FPGA_MANAGED_AXI4_PRESENT
.fpga_managed_axi4_ar_valid(fpga_managed_axi4_ar_valid),
.fpga_managed_axi4_ar_ready(fpga_managed_axi4_ar_ready),
.fpga_managed_axi4_ar_bits_addr(fpga_managed_axi4_ar_bits_addr),
.fpga_managed_axi4_ar_bits_id(fpga_managed_axi4_ar_bits_id),
.fpga_managed_axi4_ar_bits_size(fpga_managed_axi4_ar_bits_size),
.fpga_managed_axi4_ar_bits_len(fpga_managed_axi4_ar_bits_len),
.fpga_managed_axi4_aw_valid(fpga_managed_axi4_aw_valid),
.fpga_managed_axi4_aw_ready(fpga_managed_axi4_aw_ready),
.fpga_managed_axi4_aw_bits_addr(fpga_managed_axi4_aw_bits_addr),
.fpga_managed_axi4_aw_bits_id(fpga_managed_axi4_aw_bits_id),
.fpga_managed_axi4_aw_bits_size(fpga_managed_axi4_aw_bits_size),
.fpga_managed_axi4_aw_bits_len(fpga_managed_axi4_aw_bits_len),
.fpga_managed_axi4_w_valid(fpga_managed_axi4_w_valid),
.fpga_managed_axi4_w_ready(fpga_managed_axi4_w_ready),
.fpga_managed_axi4_w_bits_strb(fpga_managed_axi4_w_bits_strb),
.fpga_managed_axi4_w_bits_data(fpga_managed_axi4_w_bits_data),
.fpga_managed_axi4_w_bits_last(fpga_managed_axi4_w_bits_last),
.fpga_managed_axi4_r_valid(fpga_managed_axi4_r_valid),
.fpga_managed_axi4_r_ready(fpga_managed_axi4_r_ready),
.fpga_managed_axi4_r_bits_resp(fpga_managed_axi4_r_bits_resp),
.fpga_managed_axi4_r_bits_id(fpga_managed_axi4_r_bits_id),
.fpga_managed_axi4_r_bits_data(fpga_managed_axi4_r_bits_data),
.fpga_managed_axi4_r_bits_last(fpga_managed_axi4_r_bits_last),
.fpga_managed_axi4_b_valid(fpga_managed_axi4_b_valid),
.fpga_managed_axi4_b_ready(fpga_managed_axi4_b_ready),
.fpga_managed_axi4_b_bits_resp(fpga_managed_axi4_b_bits_resp),
.fpga_managed_axi4_b_bits_id(fpga_managed_axi4_b_bits_id),
`endif
.mem_0_ar_valid(mem_0_ar_valid),
.mem_0_ar_ready(mem_0_ar_ready),

View File

@ -11,19 +11,29 @@ import firesim.configs.{WithDefaultMemModel, WithWiringTransform}
class NoConfig extends Config(Parameters.empty)
// This is incomplete and must be mixed into a complete platform config
class DefaultF1Config extends Config(new Config((site, here, up) => {
case DesiredHostFrequency => 75
case SynthAsserts => true
case GenerateMultiCycleRamModels => true
case EnableModelMultiThreading => true
case EnableAutoILA => true
case SynthPrints => true
case EnableAutoCounter => true
}) ++ new Config(
new firesim.configs.WithEC2F1Artefacts ++
class BaseMidasExamplesConfig extends Config(
new WithDefaultMemModel ++
new WithWiringTransform ++
new midas.F1Config))
new Config((site, here, up) => {
case DesiredHostFrequency => 75
case SynthAsserts => true
case GenerateMultiCycleRamModels => true
case EnableModelMultiThreading => true
case EnableAutoILA => true
case SynthPrints => true
case EnableAutoCounter => true
})
)
class DefaultF1Config extends Config(
new firesim.configs.WithEC2F1Artefacts ++
new BaseMidasExamplesConfig ++
new midas.F1Config
)
class DefaultVitisConfig extends Config(
new BaseMidasExamplesConfig ++
new midas.VitisConfig
)
class PointerChaserConfig extends Config((site, here, up) => {
case MemSize => BigInt(1 << 30) // 1 GB

View File

@ -187,6 +187,11 @@ class ParityF1Test extends TutorialSuite("Parity") {
runTest("verilator", true)
runTest("vcs", true)
}
class ParityVitisTest extends TutorialSuite("Parity", platformConfigs = classOf[DefaultVitisConfig].getSimpleName) {
runTest("verilator", true)
runTest("vcs", true)
}
class ShiftRegisterF1Test extends TutorialSuite("ShiftRegister")
class ResetShiftRegisterF1Test extends TutorialSuite("ResetShiftRegister")
class EnableShiftRegisterF1Test extends TutorialSuite("EnableShiftRegister")