FPGA-managed bridge stream support in metasimulation (#1181)
* metasim-able FPGA-controlled bridge streams * simif: Add a virtual method to permit doing streamengine init * Remove unneeded vitis kernel def changes * Address some of nandors comments
This commit is contained in:
parent
d74c8d639d
commit
fdb5d6d439
|
@ -29,7 +29,6 @@ project {
|
||||||
"glob:**midas/src/main/scala/midas/SynthUnitTests.scala",
|
"glob:**midas/src/main/scala/midas/SynthUnitTests.scala",
|
||||||
"glob:**midas/src/main/scala/midas/core/CPUManagedStreamEngine.scala",
|
"glob:**midas/src/main/scala/midas/core/CPUManagedStreamEngine.scala",
|
||||||
"glob:**midas/src/main/scala/midas/core/Channel.scala",
|
"glob:**midas/src/main/scala/midas/core/Channel.scala",
|
||||||
"glob:**midas/src/main/scala/midas/core/FPGAManagedStreamEngine.scala",
|
|
||||||
"glob:**midas/src/main/scala/midas/core/FPGATop.scala",
|
"glob:**midas/src/main/scala/midas/core/FPGATop.scala",
|
||||||
"glob:**midas/src/main/scala/midas/core/Interfaces.scala",
|
"glob:**midas/src/main/scala/midas/core/Interfaces.scala",
|
||||||
"glob:**midas/src/main/scala/midas/core/LIBDNUnitTest.scala",
|
"glob:**midas/src/main/scala/midas/core/LIBDNUnitTest.scala",
|
||||||
|
|
|
@ -292,6 +292,7 @@ void tracerv_t::tick() {
|
||||||
|
|
||||||
// Pull in any remaining tokens and flush them to file
|
// Pull in any remaining tokens and flush them to file
|
||||||
void tracerv_t::flush() {
|
void tracerv_t::flush() {
|
||||||
|
pull_flush(stream_idx);
|
||||||
while (this->trace_enabled && (process_tokens(this->stream_depth, 0) > 0))
|
while (this->trace_enabled && (process_tokens(this->stream_depth, 0) > 0))
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,6 +72,7 @@ protected:
|
||||||
return 0;
|
return 0;
|
||||||
return sim->push(stream_idx, data, size, minimum_batch_size);
|
return sim->push(stream_idx, data, size, minimum_batch_size);
|
||||||
}
|
}
|
||||||
|
void pull_flush(unsigned stream_idx) { return sim->pull_flush(stream_idx); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
simif_t *sim;
|
simif_t *sim;
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
// See LICENSE for license details.
|
||||||
|
|
||||||
|
#ifndef __BRIDGES_BRIDGE_STREAM_DRIVER_H
|
||||||
|
#define __BRIDGES_BRIDGE_STREAM_DRIVER_H
|
||||||
|
|
||||||
|
class FPGAToCPUStreamDriver {
|
||||||
|
public:
|
||||||
|
virtual ~FPGAToCPUStreamDriver(){};
|
||||||
|
virtual void init() = 0;
|
||||||
|
virtual size_t pull(void *dest, size_t num_bytes, size_t required_bytes) = 0;
|
||||||
|
virtual void flush() = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CPUToFPGAStreamDriver {
|
||||||
|
public:
|
||||||
|
virtual ~CPUToFPGAStreamDriver(){};
|
||||||
|
virtual void init() = 0;
|
||||||
|
virtual size_t push(void *src, size_t num_bytes, size_t required_bytes) = 0;
|
||||||
|
virtual void flush() = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // __BRIDGES_BRIDGE_STREAM_DRIVER_H
|
|
@ -12,7 +12,9 @@
|
||||||
* would be enqueued, this method enqueues none and returns 0.
|
* would be enqueued, this method enqueues none and returns 0.
|
||||||
* @return size_t
|
* @return size_t
|
||||||
*/
|
*/
|
||||||
size_t StreamFromCPU::push(void *src, size_t num_bytes, size_t required_bytes) {
|
size_t CPUManagedStreams::CPUToFPGADriver::push(void *src,
|
||||||
|
size_t num_bytes,
|
||||||
|
size_t required_bytes) {
|
||||||
assert(num_bytes >= required_bytes);
|
assert(num_bytes >= required_bytes);
|
||||||
|
|
||||||
// Similarly to above, the legacy implementation of DMA does not correctly
|
// Similarly to above, the legacy implementation of DMA does not correctly
|
||||||
|
@ -51,7 +53,9 @@ size_t StreamFromCPU::push(void *src, size_t num_bytes, size_t required_bytes) {
|
||||||
* would be dequeued, dequeue none and return 0.
|
* would be dequeued, dequeue none and return 0.
|
||||||
* @return size_t Number of bytes successfully dequeued
|
* @return size_t Number of bytes successfully dequeued
|
||||||
*/
|
*/
|
||||||
size_t StreamToCPU::pull(void *dest, size_t num_bytes, size_t required_bytes) {
|
size_t CPUManagedStreams::FPGAToCPUDriver::pull(void *dest,
|
||||||
|
size_t num_bytes,
|
||||||
|
size_t required_bytes) {
|
||||||
assert(num_bytes >= required_bytes);
|
assert(num_bytes >= required_bytes);
|
||||||
|
|
||||||
// The legacy code is clearly broken for requests that aren't a
|
// The legacy code is clearly broken for requests that aren't a
|
||||||
|
|
|
@ -1,29 +1,33 @@
|
||||||
// See LICENSE for license details.
|
// See LICENSE for license details.
|
||||||
|
|
||||||
#ifndef __CPU_MANAGED_STREAM_H
|
#ifndef __BRIDGES_CPU_MANAGED_STREAM_H
|
||||||
#define __CPU_MANAGED_STREAM_H
|
#define __BRIDGES_CPU_MANAGED_STREAM_H
|
||||||
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "bridge_stream_driver.h"
|
||||||
|
|
||||||
|
namespace CPUManagedStreams {
|
||||||
/**
|
/**
|
||||||
* @brief Parameters emitted for a CPU-managed stream emitted by Golden Gate.
|
* @brief Parameters emitted for a CPU-managed stream emitted by Golden Gate.
|
||||||
*
|
*
|
||||||
* This will be replaced by a protobuf-derived class, and re-used across both
|
* This will be replaced by a protobuf-derived class, and re-used across both
|
||||||
* Scala and C++.
|
* Scala and C++.
|
||||||
*/
|
*/
|
||||||
typedef struct CPUManagedStreamParameters {
|
typedef struct StreamParameters {
|
||||||
std::string stream_name;
|
std::string stream_name;
|
||||||
uint64_t dma_addr;
|
uint64_t dma_addr;
|
||||||
uint64_t count_addr;
|
uint64_t count_addr;
|
||||||
uint32_t fpga_buffer_size;
|
uint32_t fpga_buffer_size;
|
||||||
|
|
||||||
CPUManagedStreamParameters(std::string stream_name,
|
StreamParameters(std::string stream_name,
|
||||||
uint64_t dma_addr,
|
uint64_t dma_addr,
|
||||||
uint64_t count_addr,
|
uint64_t count_addr,
|
||||||
int fpga_buffer_size)
|
int fpga_buffer_size)
|
||||||
: stream_name(stream_name), dma_addr(dma_addr), count_addr(count_addr),
|
: stream_name(stream_name), dma_addr(dma_addr), count_addr(count_addr),
|
||||||
fpga_buffer_size(fpga_buffer_size){};
|
fpga_buffer_size(fpga_buffer_size){};
|
||||||
} CPUManagedStreamParameters;
|
} StreamParameters;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Base class for CPU-managed streams
|
* @brief Base class for CPU-managed streams
|
||||||
|
@ -39,14 +43,15 @@ typedef struct CPUManagedStreamParameters {
|
||||||
* FPGA-managed AXI4 for their platform.
|
* FPGA-managed AXI4 for their platform.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
class CPUManagedStream {
|
class CPUManagedDriver {
|
||||||
public:
|
public:
|
||||||
CPUManagedStream(CPUManagedStreamParameters params,
|
CPUManagedDriver(StreamParameters params,
|
||||||
std::function<uint32_t(size_t)> mmio_read_func)
|
std::function<uint32_t(size_t)> mmio_read_func)
|
||||||
: params(params), mmio_read_func(mmio_read_func){};
|
: params(params), mmio_read_func(mmio_read_func){};
|
||||||
|
virtual ~CPUManagedDriver(){};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CPUManagedStreamParameters params;
|
StreamParameters params;
|
||||||
std::function<uint32_t(size_t)> mmio_read_func;
|
std::function<uint32_t(size_t)> mmio_read_func;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -65,14 +70,20 @@ public:
|
||||||
* implemented with axi4_read, and is provided by the host-platform.
|
* implemented with axi4_read, and is provided by the host-platform.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
class StreamToCPU : public CPUManagedStream {
|
class FPGAToCPUDriver final : public CPUManagedDriver,
|
||||||
|
public FPGAToCPUStreamDriver {
|
||||||
public:
|
public:
|
||||||
StreamToCPU(CPUManagedStreamParameters params,
|
FPGAToCPUDriver(StreamParameters params,
|
||||||
std::function<uint32_t(size_t)> mmio_read,
|
std::function<uint32_t(size_t)> mmio_read,
|
||||||
std::function<size_t(size_t, char *, size_t)> axi4_read)
|
std::function<size_t(size_t, char *, size_t)> axi4_read)
|
||||||
: CPUManagedStream(params, mmio_read), axi4_read(axi4_read){};
|
: CPUManagedDriver(params, mmio_read), axi4_read(axi4_read){};
|
||||||
|
|
||||||
size_t pull(void *dest, size_t num_bytes, size_t required_bytes);
|
virtual size_t
|
||||||
|
pull(void *dest, size_t num_bytes, size_t required_bytes) override;
|
||||||
|
// The CPU-managed stream engine makes all beats available to the bridge,
|
||||||
|
// hence the NOP.
|
||||||
|
virtual void flush() override{};
|
||||||
|
virtual void init() override{};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::function<size_t(size_t, char *, size_t)> axi4_read;
|
std::function<size_t(size_t, char *, size_t)> axi4_read;
|
||||||
|
@ -85,17 +96,24 @@ private:
|
||||||
* FPGA out of a user-provided buffer. IO over a CPU-managed AXI4 IF is
|
* FPGA out of a user-provided buffer. IO over a CPU-managed AXI4 IF is
|
||||||
* implemented with axi4_write, and is provided by the host-platform.
|
* implemented with axi4_write, and is provided by the host-platform.
|
||||||
*/
|
*/
|
||||||
class StreamFromCPU : public CPUManagedStream {
|
class CPUToFPGADriver final : public CPUManagedDriver,
|
||||||
|
public CPUToFPGAStreamDriver {
|
||||||
public:
|
public:
|
||||||
StreamFromCPU(CPUManagedStreamParameters params,
|
CPUToFPGADriver(StreamParameters params,
|
||||||
std::function<uint32_t(size_t)> mmio_read,
|
std::function<uint32_t(size_t)> mmio_read,
|
||||||
std::function<size_t(size_t, char *, size_t)> axi4_write)
|
std::function<size_t(size_t, char *, size_t)> axi4_write)
|
||||||
: CPUManagedStream(params, mmio_read), axi4_write(axi4_write){};
|
: CPUManagedDriver(params, mmio_read), axi4_write(axi4_write){};
|
||||||
|
|
||||||
size_t push(void *src, size_t num_bytes, size_t required_bytes);
|
virtual size_t
|
||||||
|
push(void *src, size_t num_bytes, size_t required_bytes) override;
|
||||||
|
// On a push all beats are delivered to the FPGA, so a NOP is sufficient here.
|
||||||
|
virtual void flush() override{};
|
||||||
|
virtual void init() override{};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::function<size_t(size_t, char *, size_t)> axi4_write;
|
std::function<size_t(size_t, char *, size_t)> axi4_write;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __CPU_MANAGED_STREAM_H
|
} // namespace CPUManagedStreams
|
||||||
|
|
||||||
|
#endif // __BRIDGES_CPU_MANAGED_STREAM_H
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
#include "fpga_managed_stream.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
void FPGAManagedStreams::FPGAToCPUDriver::init() {
|
||||||
|
mmio_write(params.toHostPhysAddrHighAddr, (uint32_t)(buffer_base_fpga >> 32));
|
||||||
|
mmio_write(params.toHostPhysAddrLowAddr, (uint32_t)buffer_base_fpga);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @brief Dequeues as much as num_bytes of data from the associated bridge
|
||||||
|
* stream.
|
||||||
|
*
|
||||||
|
* @param dest Buffer into which to copy dequeued stream data
|
||||||
|
* @param num_bytes Bytes of data to dequeue
|
||||||
|
* @param required_bytes Minimum number of bytes to dequeue. If fewer bytes
|
||||||
|
* would be dequeued, dequeue none and return 0.
|
||||||
|
* @return size_t Number of bytes successfully dequeued
|
||||||
|
*/
|
||||||
|
size_t FPGAManagedStreams::FPGAToCPUDriver::pull(void *dest,
|
||||||
|
size_t num_bytes,
|
||||||
|
size_t required_bytes) {
|
||||||
|
assert(num_bytes >= required_bytes);
|
||||||
|
size_t bytes_in_buffer = mmio_read(params.bytesAvailableAddr);
|
||||||
|
if (bytes_in_buffer < required_bytes) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *src_addr = (char *)buffer_base + buffer_offset;
|
||||||
|
size_t first_copy_bytes =
|
||||||
|
((buffer_offset + bytes_in_buffer) > params.buffer_capacity)
|
||||||
|
? params.buffer_capacity - buffer_offset
|
||||||
|
: bytes_in_buffer;
|
||||||
|
std::memcpy(dest, src_addr, first_copy_bytes);
|
||||||
|
if (first_copy_bytes < bytes_in_buffer) {
|
||||||
|
std::memcpy((char *)dest + first_copy_bytes,
|
||||||
|
buffer_base,
|
||||||
|
bytes_in_buffer - first_copy_bytes);
|
||||||
|
}
|
||||||
|
buffer_offset = (buffer_offset + bytes_in_buffer) % params.buffer_capacity;
|
||||||
|
mmio_write(params.bytesConsumedAddr, bytes_in_buffer);
|
||||||
|
return bytes_in_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FPGAManagedStreams::FPGAToCPUDriver::flush() {
|
||||||
|
mmio_write(params.toHostStreamFlushAddr, 1);
|
||||||
|
// TODO: Consider if this should be made non-blocking // alternate API
|
||||||
|
auto flush_done = false;
|
||||||
|
int attempts = 0;
|
||||||
|
while (!flush_done) {
|
||||||
|
flush_done = (mmio_read(params.toHostStreamFlushDoneAddr) & 1);
|
||||||
|
if (++attempts > 256) {
|
||||||
|
exit(1); // Bridge stream flush appears to deadlock
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,88 @@
|
||||||
|
#ifndef __BRIDGES_FPGA_MANAGED_STREAM_H
|
||||||
|
#define __BRIDGES_FPGA_MANAGED_STREAM_H
|
||||||
|
|
||||||
|
// See LICENSE for license details.
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "bridge_stream_driver.h"
|
||||||
|
|
||||||
|
namespace FPGAManagedStreams {
|
||||||
|
/**
|
||||||
|
* @brief Parameters emitted for a FPGA-managed stream emitted by Golden Gate.
|
||||||
|
*
|
||||||
|
* This will be replaced by a protobuf-derived class, and re-used across both
|
||||||
|
* Scala and C++.
|
||||||
|
*/
|
||||||
|
typedef struct StreamParameters {
|
||||||
|
std::string stream_name;
|
||||||
|
uint32_t buffer_capacity;
|
||||||
|
uint64_t toHostPhysAddrHighAddr;
|
||||||
|
uint64_t toHostPhysAddrLowAddr;
|
||||||
|
uint64_t bytesAvailableAddr;
|
||||||
|
uint64_t bytesConsumedAddr;
|
||||||
|
uint64_t toHostStreamDoneInitAddr;
|
||||||
|
uint64_t toHostStreamFlushAddr;
|
||||||
|
uint64_t toHostStreamFlushDoneAddr;
|
||||||
|
|
||||||
|
StreamParameters(std::string stream_name,
|
||||||
|
uint32_t buffer_capacity,
|
||||||
|
uint64_t toHostPhysAddrHighAddr,
|
||||||
|
uint64_t toHostPhysAddrLowAddr,
|
||||||
|
uint64_t bytesAvailableAddr,
|
||||||
|
uint64_t bytesConsumedAddr,
|
||||||
|
uint64_t toHostStreamDoneInitAddr,
|
||||||
|
uint64_t toHostStreamFlushAddr,
|
||||||
|
uint64_t toHostStreamFlushDoneAddr)
|
||||||
|
: stream_name(stream_name), buffer_capacity(buffer_capacity),
|
||||||
|
toHostPhysAddrHighAddr(toHostPhysAddrHighAddr),
|
||||||
|
toHostPhysAddrLowAddr(toHostPhysAddrLowAddr),
|
||||||
|
bytesAvailableAddr(bytesAvailableAddr),
|
||||||
|
bytesConsumedAddr(bytesConsumedAddr),
|
||||||
|
toHostStreamDoneInitAddr(toHostStreamDoneInitAddr),
|
||||||
|
toHostStreamFlushAddr(toHostStreamFlushAddr),
|
||||||
|
toHostStreamFlushDoneAddr(toHostStreamFlushDoneAddr){};
|
||||||
|
} StreamParameters;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Implements streams sunk by the driver (sourced by the FPGA)
|
||||||
|
*
|
||||||
|
* Extends FPGAManagedStream to provide a pull method, which moves data from the
|
||||||
|
* FPGA into a user-provided buffer. IO over a FPGA-mastered AXI4 IF is
|
||||||
|
* implemented with pcis_read, and is provided by the host-platform.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class FPGAToCPUDriver : public FPGAToCPUStreamDriver {
|
||||||
|
public:
|
||||||
|
FPGAToCPUDriver(StreamParameters params,
|
||||||
|
void *buffer_base,
|
||||||
|
uint64_t buffer_base_fpga,
|
||||||
|
std::function<uint32_t(size_t)> mmio_read,
|
||||||
|
std::function<void(size_t, uint32_t)> mmio_write)
|
||||||
|
: params(params), buffer_base(buffer_base),
|
||||||
|
buffer_base_fpga(buffer_base_fpga), mmio_read_func(mmio_read),
|
||||||
|
mmio_write_func(mmio_write){};
|
||||||
|
|
||||||
|
virtual size_t
|
||||||
|
pull(void *dest, size_t num_bytes, size_t required_bytes) override;
|
||||||
|
virtual void flush() override;
|
||||||
|
virtual void init() override;
|
||||||
|
|
||||||
|
size_t mmio_read(size_t addr) { return mmio_read_func(addr); };
|
||||||
|
void mmio_write(size_t addr, uint32_t data) { mmio_write_func(addr, data); };
|
||||||
|
|
||||||
|
private:
|
||||||
|
StreamParameters params;
|
||||||
|
void *buffer_base;
|
||||||
|
uint64_t buffer_base_fpga;
|
||||||
|
std::function<uint32_t(size_t)> mmio_read_func;
|
||||||
|
std::function<void(size_t, uint32_t)> mmio_write_func;
|
||||||
|
|
||||||
|
// A read pointer offset from the base, in bytes
|
||||||
|
int buffer_offset = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace FPGAManagedStreams
|
||||||
|
|
||||||
|
#endif // __BRIDGES_FPGA_MANAGED_STREAM_H
|
|
@ -233,7 +233,7 @@ size_t synthesized_prints_t::process_tokens(size_t beats,
|
||||||
// See FireSim issue #208
|
// See FireSim issue #208
|
||||||
// This needs to be page aligned, as a DMA request that spans a page is
|
// This needs to be page aligned, as a DMA request that spans a page is
|
||||||
// fractured into a pair, and for reasons unknown, first beat of the second
|
// fractured into a pair, and for reasons unknown, first beat of the second
|
||||||
// request is lost. Once aligned, qequests larger than a page will be
|
// request is lost. Once aligned, requests larger than a page will be
|
||||||
// fractured into page-size (64-beat) requests and these seem to behave
|
// fractured into page-size (64-beat) requests and these seem to behave
|
||||||
// correctly.
|
// correctly.
|
||||||
alignas(4096) char buf[maximum_batch_bytes];
|
alignas(4096) char buf[maximum_batch_bytes];
|
||||||
|
@ -307,11 +307,14 @@ void synthesized_prints_t::flush() {
|
||||||
// empty. It might be safer to put a bound on this though.
|
// empty. It might be safer to put a bound on this though.
|
||||||
while (process_tokens(batch_beats, 0) != 0)
|
while (process_tokens(batch_beats, 0) != 0)
|
||||||
;
|
;
|
||||||
|
pull_flush(stream_idx);
|
||||||
|
process_tokens(batch_beats, 0);
|
||||||
|
|
||||||
// If multiple tokens are being packed into a single stream beat, force the
|
// If multiple tokens are being packed into a single stream beat, force the
|
||||||
// widget to write out any incomplete beat
|
// widget to write out any incomplete beat
|
||||||
if (token_bytes < beat_bytes) {
|
if (token_bytes < beat_bytes) {
|
||||||
write(mmio_addrs.flushNarrowPacket, 1);
|
write(mmio_addrs.flushNarrowPacket, 1);
|
||||||
|
pull_flush(stream_idx);
|
||||||
|
|
||||||
// On an FPGA reading from the stream will have enough latency that
|
// On an FPGA reading from the stream will have enough latency that
|
||||||
// process_tokens will return non-zero on the first attempt, introducing no
|
// process_tokens will return non-zero on the first attempt, introducing no
|
||||||
|
|
|
@ -52,6 +52,7 @@ void simif_t::target_init() {
|
||||||
if (!fastloadmem && !load_mem_path.empty()) {
|
if (!fastloadmem && !load_mem_path.empty()) {
|
||||||
loadmem.load_mem_from_file(load_mem_path);
|
loadmem.load_mem_from_file(load_mem_path);
|
||||||
}
|
}
|
||||||
|
host_mmio_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
int simif_t::simulation_run() {
|
int simif_t::simulation_run() {
|
||||||
|
|
|
@ -76,25 +76,22 @@ protected:
|
||||||
*
|
*
|
||||||
* Historically this god class wrapped all of the features presented by FireSim
|
* Historically this god class wrapped all of the features presented by FireSim
|
||||||
* / MIDAS-derived simulators. Critically, it declares an interface for
|
* / MIDAS-derived simulators. Critically, it declares an interface for
|
||||||
interacting with
|
* interacting with the host-FPGA, which consist of methods for implementing
|
||||||
* the host-FPGA, which consist of methods for implementing 32b MMIO (read,
|
* 32b MMIO (read, write), and latency-insensitive bridge streams (push, pull).
|
||||||
* write), and latency-insensitive bridge streams (push, pull). Concrete
|
* Concrete subclasses of simif_t must be written for metasimulation and each
|
||||||
* subclasses of simif_t must be written for metasimulation and each supported
|
* supported host plaform. See simif_f1_t for an example.
|
||||||
* host plaform. See simif_f1_t for an example.
|
|
||||||
|
|
||||||
* simif_t also provides a few core functions that are tied to bridges and
|
* simif_t also provides a few core functions that are tied to bridges and
|
||||||
widgets that
|
* widgets that must be present in all simulators:
|
||||||
* must be present in all simulators:
|
|
||||||
*
|
*
|
||||||
* - To track simulation time, it provides methods to interact with the
|
* - To track simulation time, it provides methods to interact with the
|
||||||
* ClockBridge. This bridge is solely responsible for defining a schedule of
|
* ClockBridge. This bridge is solely responsible for defining a schedule of
|
||||||
* clock edges to simulate, and must be instantiated in all targets. See
|
* clock edges to simulate, and must be instantiated in all targets. See
|
||||||
actual_tcycle() and hcycle().
|
* actual_tcycle() and hcycle(). Utilities to report performance are based
|
||||||
* Utilities to report performance are based off these measures of time.
|
* off these measures of time.
|
||||||
*
|
*
|
||||||
* - To read and write into FPGA DRAM, the LoadMem widget provides a
|
* - To read and write into FPGA DRAM, the LoadMem widget provides a
|
||||||
* low-bandwidth side channel via MMIO. See read_mem, write_mem,
|
* low-bandwidth side channel via MMIO. See read_mem, write_mem,
|
||||||
zero_out_dram.
|
* zero_out_dram.
|
||||||
*/
|
*/
|
||||||
class simif_t {
|
class simif_t {
|
||||||
public:
|
public:
|
||||||
|
@ -122,6 +119,14 @@ public:
|
||||||
|
|
||||||
/** Bridge / Widget MMIO methods */
|
/** Bridge / Widget MMIO methods */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Provides a hook to do mmio-related initialization _before_ bridges.
|
||||||
|
*
|
||||||
|
* This permits setting up core simulation widgets (like stream engines) in a
|
||||||
|
* fashion that may vary across different specializations of simif_t.
|
||||||
|
*/
|
||||||
|
virtual void host_mmio_init() = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief 32b MMIO write, issued over the simulation control bus (AXI4-lite).
|
* @brief 32b MMIO write, issued over the simulation control bus (AXI4-lite).
|
||||||
*
|
*
|
||||||
|
@ -180,6 +185,23 @@ public:
|
||||||
void *src,
|
void *src,
|
||||||
size_t num_bytes,
|
size_t num_bytes,
|
||||||
size_t required_bytes) = 0;
|
size_t required_bytes) = 0;
|
||||||
|
/**
|
||||||
|
* @brief Hint that a stream should bypass any underlying batching
|
||||||
|
* optimizations.
|
||||||
|
*
|
||||||
|
* A user-directed hint that a stream should bypass any underlying batching
|
||||||
|
* optimizations. This may permit a future pull to read data that may
|
||||||
|
* otherwise remain queued in parts of the host.
|
||||||
|
*
|
||||||
|
* @param stream_no The index of the stream to flush
|
||||||
|
*/
|
||||||
|
virtual void pull_flush(unsigned int stream_no) = 0;
|
||||||
|
/**
|
||||||
|
* @brief Analagous to pull_flush but for CPU-to-FPGA streams
|
||||||
|
*
|
||||||
|
* @param stream_no The index of the stream to flush
|
||||||
|
*/
|
||||||
|
virtual void push_flush(unsigned int stream_no) = 0;
|
||||||
|
|
||||||
// End host-platform interface.
|
// End host-platform interface.
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#include "simif_emul.h"
|
#include "simif_emul.h"
|
||||||
|
|
||||||
#include "bridges/cpu_managed_stream.h"
|
#include "bridges/cpu_managed_stream.h"
|
||||||
|
#include "bridges/fpga_managed_stream.h"
|
||||||
|
|
||||||
simif_emul_t::simif_emul_t(const std::vector<std::string> &args)
|
simif_emul_t::simif_emul_t(const std::vector<std::string> &args)
|
||||||
: simif_t(args) {
|
: simif_t(args) {
|
||||||
|
@ -40,6 +41,7 @@ simif_emul_t::simif_emul_t(const std::vector<std::string> &args)
|
||||||
|
|
||||||
using namespace std::placeholders;
|
using namespace std::placeholders;
|
||||||
auto mmio_read_func = std::bind(&simif_emul_t::read, this, _1);
|
auto mmio_read_func = std::bind(&simif_emul_t::read, this, _1);
|
||||||
|
auto mmio_write_func = std::bind(&simif_emul_t::write, this, _1, _2);
|
||||||
|
|
||||||
#ifdef CPUMANAGEDSTREAMENGINE_0_PRESENT
|
#ifdef CPUMANAGEDSTREAMENGINE_0_PRESENT
|
||||||
auto cpu_managed_axi4_read_func =
|
auto cpu_managed_axi4_read_func =
|
||||||
|
@ -48,31 +50,69 @@ simif_emul_t::simif_emul_t(const std::vector<std::string> &args)
|
||||||
std::bind(&simif_emul_t::cpu_managed_axi4_write, this, _1, _2, _3);
|
std::bind(&simif_emul_t::cpu_managed_axi4_write, this, _1, _2, _3);
|
||||||
|
|
||||||
for (size_t i = 0; i < CPUMANAGEDSTREAMENGINE_0_from_cpu_stream_count; i++) {
|
for (size_t i = 0; i < CPUMANAGEDSTREAMENGINE_0_from_cpu_stream_count; i++) {
|
||||||
auto params = CPUManagedStreamParameters(
|
auto params = CPUManagedStreams::StreamParameters(
|
||||||
std::string(CPUMANAGEDSTREAMENGINE_0_from_cpu_names[i]),
|
std::string(CPUMANAGEDSTREAMENGINE_0_from_cpu_names[i]),
|
||||||
CPUMANAGEDSTREAMENGINE_0_from_cpu_dma_addrs[i],
|
CPUMANAGEDSTREAMENGINE_0_from_cpu_dma_addrs[i],
|
||||||
CPUMANAGEDSTREAMENGINE_0_from_cpu_count_addrs[i],
|
CPUMANAGEDSTREAMENGINE_0_from_cpu_count_addrs[i],
|
||||||
CPUMANAGEDSTREAMENGINE_0_from_cpu_buffer_sizes[i]);
|
CPUMANAGEDSTREAMENGINE_0_from_cpu_buffer_sizes[i]);
|
||||||
|
|
||||||
from_host_streams.push_back(
|
cpu_to_fpga_streams.push_back(
|
||||||
StreamFromCPU(params, mmio_read_func, cpu_managed_axi4_write_func));
|
std::make_unique<CPUManagedStreams::CPUToFPGADriver>(
|
||||||
|
params, mmio_read_func, cpu_managed_axi4_write_func));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < CPUMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) {
|
for (size_t i = 0; i < CPUMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) {
|
||||||
auto params = CPUManagedStreamParameters(
|
auto params = CPUManagedStreams::StreamParameters(
|
||||||
std::string(CPUMANAGEDSTREAMENGINE_0_to_cpu_names[i]),
|
std::string(CPUMANAGEDSTREAMENGINE_0_to_cpu_names[i]),
|
||||||
CPUMANAGEDSTREAMENGINE_0_to_cpu_dma_addrs[i],
|
CPUMANAGEDSTREAMENGINE_0_to_cpu_dma_addrs[i],
|
||||||
CPUMANAGEDSTREAMENGINE_0_to_cpu_count_addrs[i],
|
CPUMANAGEDSTREAMENGINE_0_to_cpu_count_addrs[i],
|
||||||
CPUMANAGEDSTREAMENGINE_0_to_cpu_buffer_sizes[i]);
|
CPUMANAGEDSTREAMENGINE_0_to_cpu_buffer_sizes[i]);
|
||||||
|
|
||||||
to_host_streams.push_back(
|
fpga_to_cpu_streams.push_back(
|
||||||
StreamToCPU(params, mmio_read_func, cpu_managed_axi4_read_func));
|
std::make_unique<CPUManagedStreams::FPGAToCPUDriver>(
|
||||||
|
params, mmio_read_func, cpu_managed_axi4_read_func));
|
||||||
}
|
}
|
||||||
#endif // CPUMANAGEDSTREAMENGINE_0_PRESENT
|
#endif // CPUMANAGEDSTREAMENGINE_0_PRESENT
|
||||||
|
#ifdef FPGAMANAGEDSTREAMENGINE_0_PRESENT
|
||||||
|
auto fpga_address_memory_base = ((char *)cpu_mem->get_data());
|
||||||
|
auto offset = 0;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < FPGAMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) {
|
||||||
|
auto params = FPGAManagedStreams::StreamParameters(
|
||||||
|
std::string(FPGAMANAGEDSTREAMENGINE_0_to_cpu_names[i]),
|
||||||
|
FPGAMANAGEDSTREAMENGINE_0_to_cpu_fpgaBufferDepth[i],
|
||||||
|
FPGAMANAGEDSTREAMENGINE_0_to_cpu_toHostPhysAddrHighAddrs[i],
|
||||||
|
FPGAMANAGEDSTREAMENGINE_0_to_cpu_toHostPhysAddrLowAddrs[i],
|
||||||
|
FPGAMANAGEDSTREAMENGINE_0_to_cpu_bytesAvailableAddrs[i],
|
||||||
|
FPGAMANAGEDSTREAMENGINE_0_to_cpu_bytesConsumedAddrs[i],
|
||||||
|
FPGAMANAGEDSTREAMENGINE_0_to_cpu_toHostStreamDoneInitAddrs[i],
|
||||||
|
FPGAMANAGEDSTREAMENGINE_0_to_cpu_toHostStreamFlushAddrs[i],
|
||||||
|
FPGAMANAGEDSTREAMENGINE_0_to_cpu_toHostStreamFlushDoneAddrs[i]);
|
||||||
|
|
||||||
|
fpga_to_cpu_streams.push_back(
|
||||||
|
std::make_unique<FPGAManagedStreams::FPGAToCPUDriver>(
|
||||||
|
params,
|
||||||
|
(void *)(fpga_address_memory_base + offset),
|
||||||
|
offset,
|
||||||
|
mmio_read_func,
|
||||||
|
mmio_write_func));
|
||||||
|
offset += params.buffer_capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // FPGAMANAGEDSTREAMENGINE_0_PRESENT
|
||||||
}
|
}
|
||||||
|
|
||||||
simif_emul_t::~simif_emul_t(){};
|
simif_emul_t::~simif_emul_t(){};
|
||||||
|
|
||||||
|
void simif_emul_t::host_mmio_init() {
|
||||||
|
for (auto &stream : this->fpga_to_cpu_streams) {
|
||||||
|
stream->init();
|
||||||
|
}
|
||||||
|
for (auto &stream : this->cpu_to_fpga_streams) {
|
||||||
|
stream->init();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
int simif_emul_t::run() {
|
int simif_emul_t::run() {
|
||||||
if (fastloadmem && !load_mem_path.empty()) {
|
if (fastloadmem && !load_mem_path.empty()) {
|
||||||
fprintf(stdout, "[fast loadmem] %s\n", load_mem_path.c_str());
|
fprintf(stdout, "[fast loadmem] %s\n", load_mem_path.c_str());
|
||||||
|
@ -119,8 +159,8 @@ size_t simif_emul_t::pull(unsigned stream_idx,
|
||||||
void *dest,
|
void *dest,
|
||||||
size_t num_bytes,
|
size_t num_bytes,
|
||||||
size_t threshold_bytes) {
|
size_t threshold_bytes) {
|
||||||
assert(stream_idx < to_host_streams.size());
|
assert(stream_idx < fpga_to_cpu_streams.size());
|
||||||
return this->to_host_streams[stream_idx].pull(
|
return this->fpga_to_cpu_streams[stream_idx]->pull(
|
||||||
dest, num_bytes, threshold_bytes);
|
dest, num_bytes, threshold_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -128,11 +168,21 @@ size_t simif_emul_t::push(unsigned stream_idx,
|
||||||
void *src,
|
void *src,
|
||||||
size_t num_bytes,
|
size_t num_bytes,
|
||||||
size_t threshold_bytes) {
|
size_t threshold_bytes) {
|
||||||
assert(stream_idx < from_host_streams.size());
|
assert(stream_idx < cpu_to_fpga_streams.size());
|
||||||
return this->from_host_streams[stream_idx].push(
|
return this->cpu_to_fpga_streams[stream_idx]->push(
|
||||||
src, num_bytes, threshold_bytes);
|
src, num_bytes, threshold_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void simif_emul_t::pull_flush(unsigned stream_idx) {
|
||||||
|
assert(stream_idx < fpga_to_cpu_streams.size());
|
||||||
|
return this->fpga_to_cpu_streams[stream_idx]->flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
void simif_emul_t::push_flush(unsigned stream_idx) {
|
||||||
|
assert(stream_idx < cpu_to_fpga_streams.size());
|
||||||
|
return this->cpu_to_fpga_streams[stream_idx]->flush();
|
||||||
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
simif_emul_t::cpu_managed_axi4_read(size_t addr, char *data, size_t size) {
|
simif_emul_t::cpu_managed_axi4_read(size_t addr, char *data, size_t size) {
|
||||||
ssize_t len = (size - 1) / CPU_MANAGED_AXI4_BEAT_BYTES;
|
ssize_t len = (size - 1) / CPU_MANAGED_AXI4_BEAT_BYTES;
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#ifndef __SIMIF_EMUL_H
|
#ifndef __SIMIF_EMUL_H
|
||||||
#define __SIMIF_EMUL_H
|
#define __SIMIF_EMUL_H
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "bridges/cpu_managed_stream.h"
|
#include "bridges/cpu_managed_stream.h"
|
||||||
|
@ -21,6 +22,8 @@ public:
|
||||||
|
|
||||||
virtual void sim_init() = 0;
|
virtual void sim_init() = 0;
|
||||||
|
|
||||||
|
void host_mmio_init() override;
|
||||||
|
|
||||||
void write(size_t addr, uint32_t data) override;
|
void write(size_t addr, uint32_t data) override;
|
||||||
uint32_t read(size_t addr) override;
|
uint32_t read(size_t addr) override;
|
||||||
|
|
||||||
|
@ -32,6 +35,10 @@ public:
|
||||||
void *src,
|
void *src,
|
||||||
size_t num_bytes,
|
size_t num_bytes,
|
||||||
size_t threshold_bytes) override;
|
size_t threshold_bytes) override;
|
||||||
|
|
||||||
|
void pull_flush(unsigned int stream_no) override;
|
||||||
|
void push_flush(unsigned int stream_no) override;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Pointers to inter-context (i.e., between VCS/verilator and driver)
|
* @brief Pointers to inter-context (i.e., between VCS/verilator and driver)
|
||||||
* AXI4 transaction channels
|
* AXI4 transaction channels
|
||||||
|
@ -89,8 +96,8 @@ protected:
|
||||||
// Writes directly into the host DRAM models to initialize them.
|
// Writes directly into the host DRAM models to initialize them.
|
||||||
void load_mems(const char *fname);
|
void load_mems(const char *fname);
|
||||||
|
|
||||||
std::vector<StreamToCPU> to_host_streams;
|
std::vector<std::unique_ptr<FPGAToCPUStreamDriver>> fpga_to_cpu_streams;
|
||||||
std::vector<StreamFromCPU> from_host_streams;
|
std::vector<std::unique_ptr<CPUToFPGAStreamDriver>> cpu_to_fpga_streams;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __SIMIF_EMUL_H
|
#endif // __SIMIF_EMUL_H
|
||||||
|
|
|
@ -36,25 +36,25 @@ simif_f1_t::simif_f1_t(const std::vector<std::string> &args) : simif_t(args) {
|
||||||
std::bind(&simif_f1_t::cpu_managed_axi4_write, this, _1, _2, _3);
|
std::bind(&simif_f1_t::cpu_managed_axi4_write, this, _1, _2, _3);
|
||||||
|
|
||||||
for (int i = 0; i < CPUMANAGEDSTREAMENGINE_0_from_cpu_stream_count; i++) {
|
for (int i = 0; i < CPUMANAGEDSTREAMENGINE_0_from_cpu_stream_count; i++) {
|
||||||
auto params = CPUManagedStreamParameters(
|
auto params = CPUManagedStreams::StreamParameters(
|
||||||
std::string(CPUMANAGEDSTREAMENGINE_0_from_cpu_names[i]),
|
std::string(CPUMANAGEDSTREAMENGINE_0_from_cpu_names[i]),
|
||||||
CPUMANAGEDSTREAMENGINE_0_from_cpu_dma_addrs[i],
|
CPUMANAGEDSTREAMENGINE_0_from_cpu_dma_addrs[i],
|
||||||
CPUMANAGEDSTREAMENGINE_0_from_cpu_count_addrs[i],
|
CPUMANAGEDSTREAMENGINE_0_from_cpu_count_addrs[i],
|
||||||
CPUMANAGEDSTREAMENGINE_0_from_cpu_buffer_sizes[i]);
|
CPUMANAGEDSTREAMENGINE_0_from_cpu_buffer_sizes[i]);
|
||||||
|
|
||||||
from_host_streams.push_back(
|
from_host_streams.push_back(CPUManagedStreams::CPUToFPGADriver(
|
||||||
StreamFromCPU(params, mmio_read_func, cpu_managed_axi4_write_func));
|
params, mmio_read_func, cpu_managed_axi4_write_func));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < CPUMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) {
|
for (int i = 0; i < CPUMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) {
|
||||||
auto params = CPUManagedStreamParameters(
|
auto params = CPUManagedStreams::StreamParameters(
|
||||||
std::string(CPUMANAGEDSTREAMENGINE_0_to_cpu_names[i]),
|
std::string(CPUMANAGEDSTREAMENGINE_0_to_cpu_names[i]),
|
||||||
CPUMANAGEDSTREAMENGINE_0_to_cpu_dma_addrs[i],
|
CPUMANAGEDSTREAMENGINE_0_to_cpu_dma_addrs[i],
|
||||||
CPUMANAGEDSTREAMENGINE_0_to_cpu_count_addrs[i],
|
CPUMANAGEDSTREAMENGINE_0_to_cpu_count_addrs[i],
|
||||||
CPUMANAGEDSTREAMENGINE_0_to_cpu_buffer_sizes[i]);
|
CPUMANAGEDSTREAMENGINE_0_to_cpu_buffer_sizes[i]);
|
||||||
|
|
||||||
to_host_streams.push_back(
|
to_host_streams.push_back(CPUManagedStreams::FPGAToCPUDriver(
|
||||||
StreamToCPU(params, mmio_read_func, cpu_managed_axi4_read_func));
|
params, mmio_read_func, cpu_managed_axi4_read_func));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,8 +14,9 @@ public:
|
||||||
simif_f1_t(const std::vector<std::string> &args);
|
simif_f1_t(const std::vector<std::string> &args);
|
||||||
~simif_f1_t();
|
~simif_f1_t();
|
||||||
|
|
||||||
// Unused by F1 since initialization / deinitization is done in the
|
// Unused since no F1-specific MMIO is required to setup the simulation.
|
||||||
// constructor
|
void host_mmio_init() override{};
|
||||||
|
|
||||||
int run() override { return simulation_run(); }
|
int run() override { return simulation_run(); }
|
||||||
|
|
||||||
void write(size_t addr, uint32_t data) override;
|
void write(size_t addr, uint32_t data) override;
|
||||||
|
@ -28,6 +29,10 @@ public:
|
||||||
void *src,
|
void *src,
|
||||||
size_t num_bytes,
|
size_t num_bytes,
|
||||||
size_t threshold_bytes) override;
|
size_t threshold_bytes) override;
|
||||||
|
|
||||||
|
void pull_flush(unsigned int stream_no) override {}
|
||||||
|
void push_flush(unsigned int stream_no) override {}
|
||||||
|
|
||||||
uint32_t is_write_ready();
|
uint32_t is_write_ready();
|
||||||
void check_rc(int rc, char *infostr);
|
void check_rc(int rc, char *infostr);
|
||||||
void fpga_shutdown();
|
void fpga_shutdown();
|
||||||
|
@ -37,8 +42,8 @@ private:
|
||||||
char in_buf[CTRL_BEAT_BYTES];
|
char in_buf[CTRL_BEAT_BYTES];
|
||||||
char out_buf[CTRL_BEAT_BYTES];
|
char out_buf[CTRL_BEAT_BYTES];
|
||||||
|
|
||||||
std::vector<StreamToCPU> to_host_streams;
|
std::vector<CPUManagedStreams::FPGAToCPUDriver> to_host_streams;
|
||||||
std::vector<StreamFromCPU> from_host_streams;
|
std::vector<CPUManagedStreams::CPUToFPGADriver> from_host_streams;
|
||||||
|
|
||||||
size_t cpu_managed_axi4_write(size_t addr, char *data, size_t size);
|
size_t cpu_managed_axi4_write(size_t addr, char *data, size_t size);
|
||||||
size_t cpu_managed_axi4_read(size_t addr, char *data, size_t size);
|
size_t cpu_managed_axi4_read(size_t addr, char *data, size_t size);
|
||||||
|
|
|
@ -12,8 +12,10 @@ public:
|
||||||
simif_vitis_t(const std::vector<std::string> &args);
|
simif_vitis_t(const std::vector<std::string> &args);
|
||||||
~simif_vitis_t() {}
|
~simif_vitis_t() {}
|
||||||
|
|
||||||
// Unused by Vitis since initialization / deinitization is done in the
|
// Will be used once FPGA-managed AXI4 is fully plumbed through the shim
|
||||||
// constructor
|
// to setup the FPGAManagedStream engine.
|
||||||
|
void host_mmio_init() override{};
|
||||||
|
|
||||||
int run() override { return simulation_run(); }
|
int run() override { return simulation_run(); }
|
||||||
|
|
||||||
void write(size_t addr, uint32_t data) override;
|
void write(size_t addr, uint32_t data) override;
|
||||||
|
|
|
@ -5,20 +5,280 @@ package midas.core
|
||||||
import chisel3._
|
import chisel3._
|
||||||
import chisel3.util._
|
import chisel3.util._
|
||||||
import freechips.rocketchip.amba.axi4._
|
import freechips.rocketchip.amba.axi4._
|
||||||
import freechips.rocketchip.config.{Parameters, Field}
|
import freechips.rocketchip.config.{Field, Parameters}
|
||||||
import freechips.rocketchip.diplomacy._
|
import freechips.rocketchip.diplomacy._
|
||||||
|
|
||||||
import midas.widgets._
|
import midas.widgets._
|
||||||
|
import midas.widgets.CppGenerationUtils._
|
||||||
|
|
||||||
|
class WriteMetadata(val numBeatsWidth: Int) extends Bundle {
|
||||||
|
val numBeats = Output(UInt(numBeatsWidth.W))
|
||||||
|
val isFlush = Output(Bool())
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* This is a stub to foreshadow the other implementation
|
|
||||||
*/
|
|
||||||
class FPGAManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) extends StreamEngine(p) {
|
class FPGAManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) extends StreamEngine(p) {
|
||||||
|
require(sinkParams.isEmpty, "FPGAManagedStreamEngine does not currently support FPGA-sunk streams.")
|
||||||
|
|
||||||
|
// Beats refers to 512b words moving over a stream
|
||||||
|
val pageBytes = 4096
|
||||||
|
val beatBytes = BridgeStreamConstants.streamWidthBits / 8
|
||||||
|
val pageBeats = pageBytes / beatBytes
|
||||||
|
|
||||||
|
def maxFlightForStream(params: StreamSourceParameters): Int =
|
||||||
|
(params.fpgaBufferDepth * beatBytes) / pageBytes
|
||||||
|
|
||||||
val cpuManagedAXI4NodeOpt = None
|
val cpuManagedAXI4NodeOpt = None
|
||||||
val fpgaManagedAXI4NodeOpt = Some(midas.widgets.AXI4TieOff()(p))
|
|
||||||
|
val (fpgaManagedAXI4NodeOpt, toCPUNode) = if (hasStreams) {
|
||||||
|
// The implicit val defined in StreamEngine is not accessible here; Make a
|
||||||
|
// duplicate that can be referenced by diplomatic nodes
|
||||||
|
implicit val pShadow = p
|
||||||
|
val xbar = AXI4Xbar()
|
||||||
|
val toCPUNode = AXI4MasterNode(
|
||||||
|
sourceParams.map { p =>
|
||||||
|
AXI4MasterPortParameters(Seq(AXI4MasterParameters(name = p.name, maxFlight = Some(maxFlightForStream(p)))))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
xbar :=* AXI4Buffer() :=* toCPUNode
|
||||||
|
(Some(xbar), Some(toCPUNode))
|
||||||
|
} else {
|
||||||
|
(None, None)
|
||||||
|
}
|
||||||
|
|
||||||
lazy val module = new WidgetImp(this) {
|
lazy val module = new WidgetImp(this) {
|
||||||
val io = IO(new WidgetIO)
|
val io = IO(new WidgetIO)
|
||||||
|
|
||||||
|
case class ToCPUStreamDriverParameters(
|
||||||
|
name: String,
|
||||||
|
fpgaBufferDepth: Int,
|
||||||
|
toHostPhysAddrHighAddr: Int,
|
||||||
|
toHostPhysAddrLowAddr: Int,
|
||||||
|
bytesAvailableAddr: Int,
|
||||||
|
bytesConsumedAddr: Int,
|
||||||
|
toHostStreamDoneInitAddr: Int,
|
||||||
|
toHostStreamFlushAddr: Int,
|
||||||
|
toHostStreamFlushDoneAddr: Int,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Invoke this in the module implementation
|
||||||
|
def elaborateToHostCPUStream(
|
||||||
|
channel: DecoupledIO[UInt],
|
||||||
|
axi4: AXI4Bundle,
|
||||||
|
chParams: StreamSourceParameters,
|
||||||
|
): ToCPUStreamDriverParameters = {
|
||||||
|
|
||||||
|
require(
|
||||||
|
BridgeStreamConstants.streamWidthBits == axi4.params.dataBits,
|
||||||
|
s"FPGAManagedStreamEngine requires stream widths to match FPGA-managed AXI4 data width",
|
||||||
|
)
|
||||||
|
val cpuBufferDepthBeats = chParams.fpgaBufferDepth
|
||||||
|
require(cpuBufferDepthBeats > pageBeats)
|
||||||
|
val cpuBufferSizeBytes = (1 << log2Ceil(cpuBufferDepthBeats)) * (BridgeStreamConstants.streamWidthBits / 8)
|
||||||
|
// This to simplify the hardware
|
||||||
|
require(isPow2(cpuBufferSizeBytes))
|
||||||
|
|
||||||
|
val toHostPhysAddrHigh = Reg(UInt(32.W))
|
||||||
|
val toHostPhysAddrLow = Reg(UInt(32.W))
|
||||||
|
val bytesConsumedByCPU = RegInit(0.U(log2Ceil(cpuBufferSizeBytes + 1).W))
|
||||||
|
|
||||||
|
// This sets up a double buffer that should give full throughput for a
|
||||||
|
// single stream system. This queue could be grown under a multi-stream system.
|
||||||
|
val outgoingQueue = Module(new BRAMQueue(2 * pageBeats)(UInt(BridgeStreamConstants.streamWidthBits.W)))
|
||||||
|
outgoingQueue.io.enq <> channel
|
||||||
|
|
||||||
|
val writeCredits = RegInit(cpuBufferSizeBytes.U(log2Ceil(cpuBufferSizeBytes + 1).W))
|
||||||
|
val readCredits = RegInit(0.U(log2Ceil(cpuBufferSizeBytes + 1).W))
|
||||||
|
val writePtr = RegInit(0.U(log2Ceil(cpuBufferSizeBytes).W))
|
||||||
|
val doneInit = RegInit(false.B)
|
||||||
|
// Key assumption: write acknowledgements can be used as a synchronization
|
||||||
|
// point, after which the CPU can read new data written into its circular
|
||||||
|
// buffer. This tracks inflight requests, to increment read credits on
|
||||||
|
// write acknowledgement, and to cap maxflight.
|
||||||
|
val inflightBeatCounts = Module(
|
||||||
|
new Queue(new WriteMetadata(log2Ceil(pageBeats + 1)), maxFlightForStream(chParams))
|
||||||
|
)
|
||||||
|
|
||||||
|
val idle :: sendAddress :: sendData :: Nil = Enum(3)
|
||||||
|
val state = RegInit(idle)
|
||||||
|
val beatsToSendMinus1 = RegInit(0.U(log2Ceil(pageBeats).W))
|
||||||
|
|
||||||
|
// Ensure we do not cross page boundaries per AXI4 spec.
|
||||||
|
val beatsToPageBoundary =
|
||||||
|
pageBeats.U - writePtr(log2Ceil(pageBytes) - 1, log2Ceil(beatBytes))
|
||||||
|
assert((beatsToPageBoundary > 0.U) && (beatsToPageBoundary <= (pageBeats.U)))
|
||||||
|
|
||||||
|
// Establish the largest AXI4 write request we can make, by doing a min
|
||||||
|
// reduction over the following bounds:
|
||||||
|
val writeBounds = Seq(
|
||||||
|
outgoingQueue.io.count, // Beats available for enqueue in local FPGA buffer
|
||||||
|
writeCredits >> log2Ceil(beatBytes).U, // Space available in cpu buffer
|
||||||
|
beatsToPageBoundary,
|
||||||
|
) // Length to end of page
|
||||||
|
// NB: BeatsToPageBoundary covers the end of the circular buffer only because
|
||||||
|
// we ensure the buffer size is a multiple of page size
|
||||||
|
|
||||||
|
val writeableBeats = writeBounds.reduce { (a, b) => Mux(a < b, a, b) }
|
||||||
|
val writeableBeatsMinus1 = writeableBeats - 1.U
|
||||||
|
|
||||||
|
// This register resets itself to 0 on cycles it is not set by the host
|
||||||
|
// CPU. If it is non-zero it was written to in the last cycle, and so we
|
||||||
|
// know we can update credits.
|
||||||
|
assert(
|
||||||
|
!doneInit || (!(RegNext(bytesConsumedByCPU) =/= 0.U) || (bytesConsumedByCPU === 0.U)),
|
||||||
|
"Back-to-back MMIO accesses, or incorrect toggling on bytesConsumedByCPU",
|
||||||
|
)
|
||||||
|
when(bytesConsumedByCPU =/= 0.U) {
|
||||||
|
bytesConsumedByCPU := 0.U
|
||||||
|
writeCredits := writeCredits + bytesConsumedByCPU
|
||||||
|
readCredits := readCredits - bytesConsumedByCPU
|
||||||
|
}
|
||||||
|
|
||||||
|
val doFlush, inFlush = RegInit(false.B)
|
||||||
|
val flushBeatsToIssue, flushBeatsToAck = RegInit(0.U(log2Ceil(cpuBufferDepthBeats + 1).W))
|
||||||
|
|
||||||
|
assert(readCredits >= bytesConsumedByCPU, "Driver read more bytes than available in circular buffer.")
|
||||||
|
assert(
|
||||||
|
(writeCredits + bytesConsumedByCPU) <= cpuBufferSizeBytes.U,
|
||||||
|
"Driver granted more write credit than physically allowable.",
|
||||||
|
)
|
||||||
|
|
||||||
|
switch(state) {
|
||||||
|
is(idle) {
|
||||||
|
doFlush := false.B
|
||||||
|
when(doFlush && !inFlush && (outgoingQueue.io.count > 0.U)) {
|
||||||
|
inFlush := true.B
|
||||||
|
flushBeatsToIssue := outgoingQueue.io.count
|
||||||
|
flushBeatsToAck := outgoingQueue.io.count
|
||||||
|
}
|
||||||
|
val start =
|
||||||
|
(inflightBeatCounts.io.enq.ready) &&
|
||||||
|
((flushBeatsToIssue =/= 0.U) || (writeableBeats === beatsToPageBoundary))
|
||||||
|
|
||||||
|
when(start) { state := sendAddress }
|
||||||
|
}
|
||||||
|
is(sendAddress) {
|
||||||
|
when(axi4.aw.fire) {
|
||||||
|
state := sendData
|
||||||
|
beatsToSendMinus1 := writeableBeatsMinus1
|
||||||
|
writePtr := writePtr + (writeableBeats * beatBytes.U)
|
||||||
|
writeCredits := writeCredits + bytesConsumedByCPU - (writeableBeats * beatBytes.U)
|
||||||
|
flushBeatsToIssue := Mux(flushBeatsToIssue < writeableBeats, 0.U, flushBeatsToIssue - writeableBeats)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
is(sendData) {
|
||||||
|
when(axi4.w.fire) {
|
||||||
|
state := Mux(axi4.w.bits.last, idle, sendData)
|
||||||
|
beatsToSendMinus1 := beatsToSendMinus1 - 1.U
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
axi4.aw.valid := (state === sendAddress)
|
||||||
|
axi4.aw.bits.id := 0.U
|
||||||
|
axi4.aw.bits.addr := Cat(toHostPhysAddrHigh, toHostPhysAddrLow) + writePtr
|
||||||
|
axi4.aw.bits.len := writeableBeatsMinus1
|
||||||
|
axi4.aw.bits.size := (log2Ceil(beatBytes)).U
|
||||||
|
// This is assumed but not exposed by the PCIM interface, and is the
|
||||||
|
// default transaction type supported by XDMA-backed AXI4 IFs anyways
|
||||||
|
axi4.aw.bits.burst := AXI4Parameters.BURST_INCR
|
||||||
|
// This to permit intermediate width adapters, etc, to pack narrower
|
||||||
|
// transactions into larger ones, in the event we make this IF narrower than 512b
|
||||||
|
axi4.aw.bits.cache := AXI4Parameters.CACHE_MODIFIABLE
|
||||||
|
// Assume page-sized transfers for now
|
||||||
|
// These fields are unused by F1 PCIM, but pick reasonable default values for future proofing
|
||||||
|
axi4.aw.bits.prot := 0.U // Unpriviledged, secure, data access
|
||||||
|
axi4.aw.bits.qos := 0.U // Default; unused
|
||||||
|
axi4.aw.bits.lock := 0.U // Normal, non-exclusive
|
||||||
|
|
||||||
|
inflightBeatCounts.io.enq.valid := axi4.aw.fire
|
||||||
|
inflightBeatCounts.io.enq.bits.numBeats := writeableBeats
|
||||||
|
inflightBeatCounts.io.enq.bits.isFlush := flushBeatsToIssue =/= 0.U
|
||||||
|
|
||||||
|
axi4.w.valid := (state === sendData) && outgoingQueue.io.deq.valid
|
||||||
|
axi4.w.bits.data := outgoingQueue.io.deq.bits
|
||||||
|
axi4.w.bits.strb := ((BigInt(1) << beatBytes) - 1).U
|
||||||
|
axi4.w.bits.last := beatsToSendMinus1 === 0.U
|
||||||
|
outgoingQueue.io.deq.ready := (state === sendData) && axi4.w.ready
|
||||||
|
|
||||||
|
// Write Response handling
|
||||||
|
axi4.b.ready := true.B
|
||||||
|
|
||||||
|
val ackBeats = inflightBeatCounts.io.deq.bits.numBeats
|
||||||
|
val ackFlush = inflightBeatCounts.io.deq.bits.isFlush
|
||||||
|
when(axi4.b.fire) {
|
||||||
|
readCredits := readCredits + (ackBeats * beatBytes.U) - bytesConsumedByCPU
|
||||||
|
when(ackFlush) {
|
||||||
|
val remainingBeatsToAck = Mux(ackBeats < flushBeatsToAck, flushBeatsToAck - ackBeats, 0.U)
|
||||||
|
flushBeatsToAck := remainingBeatsToAck
|
||||||
|
inFlush := remainingBeatsToAck =/= 0.U
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inflightBeatCounts.io.deq.ready := axi4.b.fire
|
||||||
|
assert(!axi4.b.valid || inflightBeatCounts.io.deq.valid)
|
||||||
|
|
||||||
|
// We only use the write channels to implement FPGA-to-CPU streams
|
||||||
|
axi4.ar.valid := false.B
|
||||||
|
axi4.r.ready := false.B
|
||||||
|
|
||||||
|
// Register Driver-programmable MMIO registers
|
||||||
|
ToCPUStreamDriverParameters(
|
||||||
|
chParams.name,
|
||||||
|
cpuBufferSizeBytes,
|
||||||
|
attach(toHostPhysAddrHigh, s"${chParams.name}_toHostPhysAddrHigh"),
|
||||||
|
attach(toHostPhysAddrLow, s"${chParams.name}_toHostPhysAddrLow"),
|
||||||
|
attach(readCredits, s"${chParams.name}_bytesAvailable", ReadOnly),
|
||||||
|
attach(bytesConsumedByCPU, s"${chParams.name}_bytesConsumed"),
|
||||||
|
attach(doneInit, s"${chParams.name}_toHostStreamDoneInit"),
|
||||||
|
attach(doFlush, s"${chParams.name}_toHostStreamFlush"),
|
||||||
|
attach(!(doFlush || inFlush), s"${chParams.name}_toHostStreamFlushDone", ReadOnly),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
val sourceDriverParameters = if (hasStreams) {
|
||||||
|
val axi4Bundles = toCPUNode.get.out.map(_._1)
|
||||||
|
(for (((axi4IF, streamIF), params) <- axi4Bundles.zip(streamsToHostCPU).zip(sourceParams)) yield {
|
||||||
|
chisel3.experimental.prefix(params.name) {
|
||||||
|
elaborateToHostCPUStream(streamIF, axi4IF, params)
|
||||||
|
}
|
||||||
|
}).toSeq
|
||||||
|
} else {
|
||||||
|
Seq()
|
||||||
|
}
|
||||||
|
|
||||||
genCRFile()
|
genCRFile()
|
||||||
|
|
||||||
|
override def genHeader(base: BigInt, sb: StringBuilder) {
|
||||||
|
val headerWidgetName = getWName.toUpperCase
|
||||||
|
super.genHeader(base, sb)
|
||||||
|
|
||||||
|
def serializeStreamParameters(prefix: String, params: Seq[ToCPUStreamDriverParameters]): Unit = {
|
||||||
|
val numStreams = params.size
|
||||||
|
sb.append(genConstStatic(s"${headerWidgetName}_${prefix}_stream_count", UInt32(numStreams)))
|
||||||
|
|
||||||
|
// Hack: avoid emitting a zero-sized array by providing a dummy set of
|
||||||
|
// parameters when no streams are generated. This is a limitation of the
|
||||||
|
// current C emission strategy. Note, the actual number of streams is still reported above.
|
||||||
|
val placeholder = ToCPUStreamDriverParameters("UNUSED", 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
val nonEmptyParams = if (numStreams == 0) Seq(placeholder) else params
|
||||||
|
|
||||||
|
val arraysToEmit = Seq(
|
||||||
|
"names" -> nonEmptyParams.map { p => CStrLit(p.name) },
|
||||||
|
"fpgaBufferDepth" -> nonEmptyParams.map { p => UInt32(p.fpgaBufferDepth) },
|
||||||
|
"toHostPhysAddrHighAddrs" -> nonEmptyParams.map { p => UInt64(base + p.toHostPhysAddrHighAddr) },
|
||||||
|
"toHostPhysAddrLowAddrs" -> nonEmptyParams.map { p => UInt64(base + p.toHostPhysAddrLowAddr) },
|
||||||
|
"bytesAvailableAddrs" -> nonEmptyParams.map { p => UInt64(base + p.bytesAvailableAddr) },
|
||||||
|
"bytesConsumedAddrs" -> nonEmptyParams.map { p => UInt64(base + p.bytesConsumedAddr) },
|
||||||
|
"toHostStreamDoneInitAddrs" -> nonEmptyParams.map { p => UInt64(base + p.toHostStreamDoneInitAddr) },
|
||||||
|
"toHostStreamFlushAddrs" -> nonEmptyParams.map { p => UInt64(base + p.toHostStreamFlushAddr) },
|
||||||
|
"toHostStreamFlushDoneAddrs" -> nonEmptyParams.map { p => UInt64(base + p.toHostStreamFlushDoneAddr) },
|
||||||
|
)
|
||||||
|
|
||||||
|
for ((name, values) <- arraysToEmit) {
|
||||||
|
sb.append(genArray(s"${headerWidgetName}_${prefix}_${name}", values))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
serializeStreamParameters("to_cpu", sourceDriverParameters)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -317,8 +317,11 @@ class FPGATop(implicit p: Parameters) extends LazyModule with HasWidgets {
|
||||||
beatBytes = params.dataBits / 8)
|
beatBytes = params.dataBits / 8)
|
||||||
))
|
))
|
||||||
|
|
||||||
streamingEngine.fpgaManagedAXI4NodeOpt.foreach {
|
streamingEngine.fpgaManagedAXI4NodeOpt match {
|
||||||
node := AXI4IdIndexer(params.idBits) := AXI4Buffer() := _
|
case Some(engineNode) =>
|
||||||
|
node := AXI4IdIndexer(params.idBits) := AXI4Buffer() := engineNode
|
||||||
|
case None =>
|
||||||
|
node := AXI4TieOff()
|
||||||
}
|
}
|
||||||
(node, params)
|
(node, params)
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ import midas.core.{
|
||||||
/**
|
/**
|
||||||
* Bridge Streams serve as means to do bulk transport from BridgeDriver to
|
* Bridge Streams serve as means to do bulk transport from BridgeDriver to
|
||||||
* BridgeModule and vice versa. Abstractly, they can be thought of as a 512b
|
* BridgeModule and vice versa. Abstractly, they can be thought of as a 512b
|
||||||
* wide latency-insensitive channel (i.e., queue).
|
* wide latency-insensitive channel (i.e., a queue with some unknown latency).
|
||||||
*
|
*
|
||||||
* The two mixins in this file implement the two directions of
|
* The two mixins in this file implement the two directions of
|
||||||
* producer-consumer relationships: [[StreamFromHostCPU]] add a stream in
|
* producer-consumer relationships: [[StreamFromHostCPU]] add a stream in
|
||||||
|
|
|
@ -14,6 +14,7 @@ class NoConfig extends Config(Parameters.empty)
|
||||||
class BaseMidasExamplesConfig extends Config(
|
class BaseMidasExamplesConfig extends Config(
|
||||||
new WithDefaultMemModel ++
|
new WithDefaultMemModel ++
|
||||||
new WithWiringTransform ++
|
new WithWiringTransform ++
|
||||||
|
new HostDebugFeatures ++
|
||||||
new Config((site, here, up) => {
|
new Config((site, here, up) => {
|
||||||
case SynthAsserts => true
|
case SynthAsserts => true
|
||||||
case GenerateMultiCycleRamModels => true
|
case GenerateMultiCycleRamModels => true
|
||||||
|
|
|
@ -6,22 +6,31 @@ import scala.util.matching.Regex
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
import org.scalatest.Suites
|
import org.scalatest.Suites
|
||||||
import org.scalatest.matchers.should._
|
import org.scalatest.matchers.should._
|
||||||
|
import freechips.rocketchip.config.Config
|
||||||
|
|
||||||
|
object BaseConfigs {
|
||||||
|
def f1 = Seq(classOf[DefaultF1Config])
|
||||||
|
def vitis = Seq(classOf[DefaultVitisConfig])
|
||||||
|
}
|
||||||
|
|
||||||
abstract class TutorialSuite(
|
abstract class TutorialSuite(
|
||||||
val targetName: String, // See GeneratorUtils
|
val targetName: String, // See GeneratorUtils
|
||||||
targetConfigs: String = "NoConfig",
|
targetConfigs: String = "NoConfig",
|
||||||
platformConfigs: String = "HostDebugFeatures_DefaultF1Config",
|
platformConfigs: Seq[Class[_ <: Config]] = Seq(),
|
||||||
tracelen: Int = 8,
|
tracelen: Int = 8,
|
||||||
simulationArgs: Seq[String] = Seq()
|
simulationArgs: Seq[String] = Seq()
|
||||||
) extends firesim.TestSuiteCommon with Matchers {
|
) extends firesim.TestSuiteCommon with Matchers {
|
||||||
|
|
||||||
|
lazy val basePlatformConfig = BaseConfigs.f1.asInstanceOf[Seq[Class[_ <: Config]]]
|
||||||
val backendSimulator = "verilator"
|
val backendSimulator = "verilator"
|
||||||
|
def platformConfigString = (platformConfigs ++ basePlatformConfig).map(_.getSimpleName).mkString("_")
|
||||||
|
|
||||||
val targetTuple = s"$targetName-$targetConfigs-$platformConfigs"
|
|
||||||
|
val targetTuple = s"$targetName-$targetConfigs-${platformConfigString}"
|
||||||
val commonMakeArgs = Seq(s"TARGET_PROJECT=midasexamples",
|
val commonMakeArgs = Seq(s"TARGET_PROJECT=midasexamples",
|
||||||
s"DESIGN=$targetName",
|
s"DESIGN=$targetName",
|
||||||
s"TARGET_CONFIG=${targetConfigs}",
|
s"TARGET_CONFIG=${targetConfigs}",
|
||||||
s"PLATFORM_CONFIG=${platformConfigs}")
|
s"PLATFORM_CONFIG=${platformConfigString}")
|
||||||
|
|
||||||
def run(backend: String,
|
def run(backend: String,
|
||||||
debug: Boolean = false,
|
debug: Boolean = false,
|
||||||
|
@ -181,14 +190,17 @@ abstract class TutorialSuite(
|
||||||
|
|
||||||
//class PointerChaserF1Test extends TutorialSuite(
|
//class PointerChaserF1Test extends TutorialSuite(
|
||||||
// "PointerChaser", "PointerChaserConfig", simulationArgs = Seq("`cat runtime.conf`"))
|
// "PointerChaser", "PointerChaserConfig", simulationArgs = Seq("`cat runtime.conf`"))
|
||||||
|
|
||||||
class GCDF1Test extends TutorialSuite("GCD")
|
class GCDF1Test extends TutorialSuite("GCD")
|
||||||
|
class GCDVitisTest extends GCDF1Test { override lazy val basePlatformConfig = BaseConfigs.vitis }
|
||||||
|
|
||||||
// Hijack Parity to test all of the Midas-level backends
|
// Hijack Parity to test all of the Midas-level backends
|
||||||
class ParityF1Test extends TutorialSuite("Parity") {
|
class ParityF1Test extends TutorialSuite("Parity") {
|
||||||
runTest("verilator", true)
|
runTest("verilator", true)
|
||||||
runTest("vcs", true)
|
runTest("vcs", true)
|
||||||
}
|
}
|
||||||
|
class ParityVitisTest extends TutorialSuite("Parity") {
|
||||||
class ParityVitisTest extends TutorialSuite("Parity", platformConfigs = classOf[DefaultVitisConfig].getSimpleName) {
|
override lazy val basePlatformConfig = BaseConfigs.vitis
|
||||||
runTest("verilator", true)
|
runTest("verilator", true)
|
||||||
runTest("vcs", true)
|
runTest("vcs", true)
|
||||||
}
|
}
|
||||||
|
@ -254,7 +266,7 @@ class AutoCounterCoverModuleF1Test extends TutorialSuite("AutoCounterCoverModule
|
||||||
}
|
}
|
||||||
class AutoCounterPrintfF1Test extends TutorialSuite("AutoCounterPrintfModule",
|
class AutoCounterPrintfF1Test extends TutorialSuite("AutoCounterPrintfModule",
|
||||||
simulationArgs = Seq("+print-file=synthprinttest.out"),
|
simulationArgs = Seq("+print-file=synthprinttest.out"),
|
||||||
platformConfigs = "AutoCounterPrintf_HostDebugFeatures_DefaultF1Config") {
|
platformConfigs = classOf[AutoCounterPrintf] +: BaseConfigs.f1) {
|
||||||
diffSynthesizedLog("synthprinttest.out0", stdoutPrefix = "AUTOCOUNTER_PRINT CYCLE", synthPrefix = "CYCLE")
|
diffSynthesizedLog("synthprinttest.out0", stdoutPrefix = "AUTOCOUNTER_PRINT CYCLE", synthPrefix = "CYCLE")
|
||||||
}
|
}
|
||||||
class AutoCounterGlobalResetConditionF1Test extends TutorialSuite("AutoCounterGlobalResetCondition",
|
class AutoCounterGlobalResetConditionF1Test extends TutorialSuite("AutoCounterGlobalResetCondition",
|
||||||
|
@ -282,8 +294,12 @@ class AutoCounterGlobalResetConditionF1Test extends TutorialSuite("AutoCounterGl
|
||||||
|
|
||||||
class PrintfModuleF1Test extends TutorialSuite("PrintfModule",
|
class PrintfModuleF1Test extends TutorialSuite("PrintfModule",
|
||||||
simulationArgs = Seq("+print-no-cycle-prefix", "+print-file=synthprinttest.out")) {
|
simulationArgs = Seq("+print-no-cycle-prefix", "+print-file=synthprinttest.out")) {
|
||||||
|
runTest("vcs", true)
|
||||||
diffSynthesizedLog("synthprinttest.out0")
|
diffSynthesizedLog("synthprinttest.out0")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class PrintfModuleVitisTest extends PrintfModuleF1Test { override lazy val basePlatformConfig = BaseConfigs.vitis }
|
||||||
|
|
||||||
class NarrowPrintfModuleF1Test extends TutorialSuite("NarrowPrintfModule",
|
class NarrowPrintfModuleF1Test extends TutorialSuite("NarrowPrintfModule",
|
||||||
simulationArgs = Seq("+print-no-cycle-prefix", "+print-file=synthprinttest.out")) {
|
simulationArgs = Seq("+print-no-cycle-prefix", "+print-file=synthprinttest.out")) {
|
||||||
diffSynthesizedLog("synthprinttest.out0")
|
diffSynthesizedLog("synthprinttest.out0")
|
||||||
|
@ -353,6 +369,8 @@ class MulticlockPrintF1Test extends TutorialSuite("MulticlockPrintfModule",
|
||||||
synthLinesToDrop = 4)
|
synthLinesToDrop = 4)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class MulticlockPrintVitisTest extends MulticlockPrintF1Test { override lazy val basePlatformConfig = BaseConfigs.vitis }
|
||||||
|
|
||||||
class MulticlockAutoCounterF1Test extends TutorialSuite("MulticlockAutoCounterModule",
|
class MulticlockAutoCounterF1Test extends TutorialSuite("MulticlockAutoCounterModule",
|
||||||
simulationArgs = Seq("+autocounter-readrate=1000", "+autocounter-filename-base=autocounter")) {
|
simulationArgs = Seq("+autocounter-readrate=1000", "+autocounter-filename-base=autocounter")) {
|
||||||
checkAutoCounterCSV("autocounter0.csv", "AUTOCOUNTER_PRINT ")
|
checkAutoCounterCSV("autocounter0.csv", "AUTOCOUNTER_PRINT ")
|
||||||
|
@ -395,7 +413,7 @@ class PassthroughModelBridgeSourceTest extends TutorialSuite("PassthroughModelBr
|
||||||
class ResetPulseBridgeActiveHighTest extends TutorialSuite(
|
class ResetPulseBridgeActiveHighTest extends TutorialSuite(
|
||||||
"ResetPulseBridgeTest",
|
"ResetPulseBridgeTest",
|
||||||
// Disable assertion synthesis to rely on native chisel assertions to catch bad behavior
|
// Disable assertion synthesis to rely on native chisel assertions to catch bad behavior
|
||||||
platformConfigs = "NoSynthAsserts_HostDebugFeatures_DefaultF1Config",
|
platformConfigs = classOf[NoSynthAsserts] +: BaseConfigs.f1,
|
||||||
simulationArgs = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength}")) {
|
simulationArgs = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength}")) {
|
||||||
runTest(backendSimulator,
|
runTest(backendSimulator,
|
||||||
args = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength + 1}"),
|
args = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength + 1}"),
|
||||||
|
@ -405,7 +423,7 @@ class ResetPulseBridgeActiveHighTest extends TutorialSuite(
|
||||||
class ResetPulseBridgeActiveLowTest extends TutorialSuite(
|
class ResetPulseBridgeActiveLowTest extends TutorialSuite(
|
||||||
"ResetPulseBridgeTest",
|
"ResetPulseBridgeTest",
|
||||||
targetConfigs = "ResetPulseBridgeActiveLowConfig",
|
targetConfigs = "ResetPulseBridgeActiveLowConfig",
|
||||||
platformConfigs = "NoSynthAsserts_HostDebugFeatures_DefaultF1Config",
|
platformConfigs = classOf[NoSynthAsserts] +: BaseConfigs.f1,
|
||||||
simulationArgs = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength}")) {
|
simulationArgs = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength}")) {
|
||||||
runTest(backendSimulator,
|
runTest(backendSimulator,
|
||||||
args = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength + 1}"),
|
args = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength + 1}"),
|
||||||
|
@ -434,8 +452,7 @@ class CustomConstraintsF1Test extends TutorialSuite("CustomConstraints") {
|
||||||
atLeast (1, xdc) should fullyMatch regex "constrain_impl2 \\[reg WRAPPER_INST/CL/firesim_top/.*/dut/r1]".r
|
atLeast (1, xdc) should fullyMatch regex "constrain_impl2 \\[reg WRAPPER_INST/CL/firesim_top/.*/dut/r1]".r
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Midasexample Suite Collections
|
||||||
// Suite Collections
|
|
||||||
class ChiselExampleDesigns extends Suites(
|
class ChiselExampleDesigns extends Suites(
|
||||||
new GCDF1Test,
|
new GCDF1Test,
|
||||||
new ParityF1Test,
|
new ParityF1Test,
|
||||||
|
@ -499,6 +516,13 @@ class FMRCITests extends Suites(
|
||||||
new PassthroughModelBridgeSourceTest,
|
new PassthroughModelBridgeSourceTest,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
class VitisCITests extends Suites (
|
||||||
|
new GCDVitisTest,
|
||||||
|
new ParityVitisTest,
|
||||||
|
new PrintfModuleVitisTest,
|
||||||
|
new MulticlockPrintVitisTest,
|
||||||
|
)
|
||||||
|
|
||||||
// These groups are vestigial from CircleCI container limits
|
// These groups are vestigial from CircleCI container limits
|
||||||
class CIGroupA extends Suites(
|
class CIGroupA extends Suites(
|
||||||
new ChiselExampleDesigns,
|
new ChiselExampleDesigns,
|
||||||
|
@ -515,5 +539,6 @@ class CIGroupB extends Suites(
|
||||||
new firesim.fasedtests.CIGroupB,
|
new firesim.fasedtests.CIGroupB,
|
||||||
new firesim.AllMidasUnitTests,
|
new firesim.AllMidasUnitTests,
|
||||||
new firesim.FailingUnitTests,
|
new firesim.FailingUnitTests,
|
||||||
new FMRCITests
|
new FMRCITests,
|
||||||
|
new VitisCITests
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue