FPGA-managed bridge stream support in metasimulation (#1181)

* metasim-able FPGA-controlled bridge streams

* simif: Add a virtual method to permit doing streamengine init

* Remove unneeded vitis kernel def changes

* Address some of nandors comments
This commit is contained in:
David Biancolin 2022-12-24 11:18:03 -05:00 committed by GitHub
parent d74c8d639d
commit fdb5d6d439
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 651 additions and 82 deletions

View File

@ -29,7 +29,6 @@ project {
"glob:**midas/src/main/scala/midas/SynthUnitTests.scala",
"glob:**midas/src/main/scala/midas/core/CPUManagedStreamEngine.scala",
"glob:**midas/src/main/scala/midas/core/Channel.scala",
"glob:**midas/src/main/scala/midas/core/FPGAManagedStreamEngine.scala",
"glob:**midas/src/main/scala/midas/core/FPGATop.scala",
"glob:**midas/src/main/scala/midas/core/Interfaces.scala",
"glob:**midas/src/main/scala/midas/core/LIBDNUnitTest.scala",

View File

@ -292,6 +292,7 @@ void tracerv_t::tick() {
// Pull in any remaining tokens and flush them to file
void tracerv_t::flush() {
pull_flush(stream_idx);
while (this->trace_enabled && (process_tokens(this->stream_depth, 0) > 0))
;
}

View File

@ -72,6 +72,7 @@ protected:
return 0;
return sim->push(stream_idx, data, size, minimum_batch_size);
}
void pull_flush(unsigned stream_idx) { return sim->pull_flush(stream_idx); }
private:
simif_t *sim;

View File

@ -0,0 +1,22 @@
// See LICENSE for license details.
#ifndef __BRIDGES_BRIDGE_STREAM_DRIVER_H
#define __BRIDGES_BRIDGE_STREAM_DRIVER_H
class FPGAToCPUStreamDriver {
public:
virtual ~FPGAToCPUStreamDriver(){};
virtual void init() = 0;
virtual size_t pull(void *dest, size_t num_bytes, size_t required_bytes) = 0;
virtual void flush() = 0;
};
class CPUToFPGAStreamDriver {
public:
virtual ~CPUToFPGAStreamDriver(){};
virtual void init() = 0;
virtual size_t push(void *src, size_t num_bytes, size_t required_bytes) = 0;
virtual void flush() = 0;
};
#endif // __BRIDGES_BRIDGE_STREAM_DRIVER_H

View File

@ -12,7 +12,9 @@
* would be enqueued, this method enqueues none and returns 0.
* @return size_t
*/
size_t StreamFromCPU::push(void *src, size_t num_bytes, size_t required_bytes) {
size_t CPUManagedStreams::CPUToFPGADriver::push(void *src,
size_t num_bytes,
size_t required_bytes) {
assert(num_bytes >= required_bytes);
// Similarly to above, the legacy implementation of DMA does not correctly
@ -51,7 +53,9 @@ size_t StreamFromCPU::push(void *src, size_t num_bytes, size_t required_bytes) {
* would be dequeued, dequeue none and return 0.
* @return size_t Number of bytes successfully dequeued
*/
size_t StreamToCPU::pull(void *dest, size_t num_bytes, size_t required_bytes) {
size_t CPUManagedStreams::FPGAToCPUDriver::pull(void *dest,
size_t num_bytes,
size_t required_bytes) {
assert(num_bytes >= required_bytes);
// The legacy code is clearly broken for requests that aren't a

View File

@ -1,29 +1,33 @@
// See LICENSE for license details.
#ifndef __CPU_MANAGED_STREAM_H
#define __CPU_MANAGED_STREAM_H
#ifndef __BRIDGES_CPU_MANAGED_STREAM_H
#define __BRIDGES_CPU_MANAGED_STREAM_H
#include <functional>
#include <string>
#include "bridge_stream_driver.h"
namespace CPUManagedStreams {
/**
* @brief Parameters emitted for a CPU-managed stream emitted by Golden Gate.
*
* This will be replaced by a protobuf-derived class, and re-used across both
* Scala and C++.
*/
typedef struct CPUManagedStreamParameters {
typedef struct StreamParameters {
std::string stream_name;
uint64_t dma_addr;
uint64_t count_addr;
uint32_t fpga_buffer_size;
CPUManagedStreamParameters(std::string stream_name,
uint64_t dma_addr,
uint64_t count_addr,
int fpga_buffer_size)
StreamParameters(std::string stream_name,
uint64_t dma_addr,
uint64_t count_addr,
int fpga_buffer_size)
: stream_name(stream_name), dma_addr(dma_addr), count_addr(count_addr),
fpga_buffer_size(fpga_buffer_size){};
} CPUManagedStreamParameters;
} StreamParameters;
/**
* @brief Base class for CPU-managed streams
@ -39,14 +43,15 @@ typedef struct CPUManagedStreamParameters {
* FPGA-managed AXI4 for their platform.
*
*/
class CPUManagedStream {
class CPUManagedDriver {
public:
CPUManagedStream(CPUManagedStreamParameters params,
CPUManagedDriver(StreamParameters params,
std::function<uint32_t(size_t)> mmio_read_func)
: params(params), mmio_read_func(mmio_read_func){};
virtual ~CPUManagedDriver(){};
private:
CPUManagedStreamParameters params;
StreamParameters params;
std::function<uint32_t(size_t)> mmio_read_func;
public:
@ -65,14 +70,20 @@ public:
* implemented with axi4_read, and is provided by the host-platform.
*
*/
class StreamToCPU : public CPUManagedStream {
class FPGAToCPUDriver final : public CPUManagedDriver,
public FPGAToCPUStreamDriver {
public:
StreamToCPU(CPUManagedStreamParameters params,
std::function<uint32_t(size_t)> mmio_read,
std::function<size_t(size_t, char *, size_t)> axi4_read)
: CPUManagedStream(params, mmio_read), axi4_read(axi4_read){};
FPGAToCPUDriver(StreamParameters params,
std::function<uint32_t(size_t)> mmio_read,
std::function<size_t(size_t, char *, size_t)> axi4_read)
: CPUManagedDriver(params, mmio_read), axi4_read(axi4_read){};
size_t pull(void *dest, size_t num_bytes, size_t required_bytes);
virtual size_t
pull(void *dest, size_t num_bytes, size_t required_bytes) override;
// The CPU-managed stream engine makes all beats available to the bridge,
// hence the NOP.
virtual void flush() override{};
virtual void init() override{};
private:
std::function<size_t(size_t, char *, size_t)> axi4_read;
@ -85,17 +96,24 @@ private:
* FPGA out of a user-provided buffer. IO over a CPU-managed AXI4 IF is
* implemented with axi4_write, and is provided by the host-platform.
*/
class StreamFromCPU : public CPUManagedStream {
class CPUToFPGADriver final : public CPUManagedDriver,
public CPUToFPGAStreamDriver {
public:
StreamFromCPU(CPUManagedStreamParameters params,
std::function<uint32_t(size_t)> mmio_read,
std::function<size_t(size_t, char *, size_t)> axi4_write)
: CPUManagedStream(params, mmio_read), axi4_write(axi4_write){};
CPUToFPGADriver(StreamParameters params,
std::function<uint32_t(size_t)> mmio_read,
std::function<size_t(size_t, char *, size_t)> axi4_write)
: CPUManagedDriver(params, mmio_read), axi4_write(axi4_write){};
size_t push(void *src, size_t num_bytes, size_t required_bytes);
virtual size_t
push(void *src, size_t num_bytes, size_t required_bytes) override;
// On a push all beats are delivered to the FPGA, so a NOP is sufficient here.
virtual void flush() override{};
virtual void init() override{};
private:
std::function<size_t(size_t, char *, size_t)> axi4_write;
};
#endif // __CPU_MANAGED_STREAM_H
} // namespace CPUManagedStreams
#endif // __BRIDGES_CPU_MANAGED_STREAM_H

View File

@ -0,0 +1,57 @@
#include "fpga_managed_stream.h"
#include <assert.h>
#include <cstring>
#include <iostream>
void FPGAManagedStreams::FPGAToCPUDriver::init() {
mmio_write(params.toHostPhysAddrHighAddr, (uint32_t)(buffer_base_fpga >> 32));
mmio_write(params.toHostPhysAddrLowAddr, (uint32_t)buffer_base_fpga);
}
/**
* @brief Dequeues as much as num_bytes of data from the associated bridge
* stream.
*
* @param dest Buffer into which to copy dequeued stream data
* @param num_bytes Bytes of data to dequeue
* @param required_bytes Minimum number of bytes to dequeue. If fewer bytes
* would be dequeued, dequeue none and return 0.
* @return size_t Number of bytes successfully dequeued
*/
size_t FPGAManagedStreams::FPGAToCPUDriver::pull(void *dest,
size_t num_bytes,
size_t required_bytes) {
assert(num_bytes >= required_bytes);
size_t bytes_in_buffer = mmio_read(params.bytesAvailableAddr);
if (bytes_in_buffer < required_bytes) {
return 0;
}
void *src_addr = (char *)buffer_base + buffer_offset;
size_t first_copy_bytes =
((buffer_offset + bytes_in_buffer) > params.buffer_capacity)
? params.buffer_capacity - buffer_offset
: bytes_in_buffer;
std::memcpy(dest, src_addr, first_copy_bytes);
if (first_copy_bytes < bytes_in_buffer) {
std::memcpy((char *)dest + first_copy_bytes,
buffer_base,
bytes_in_buffer - first_copy_bytes);
}
buffer_offset = (buffer_offset + bytes_in_buffer) % params.buffer_capacity;
mmio_write(params.bytesConsumedAddr, bytes_in_buffer);
return bytes_in_buffer;
}
void FPGAManagedStreams::FPGAToCPUDriver::flush() {
mmio_write(params.toHostStreamFlushAddr, 1);
// TODO: Consider if this should be made non-blocking // alternate API
auto flush_done = false;
int attempts = 0;
while (!flush_done) {
flush_done = (mmio_read(params.toHostStreamFlushDoneAddr) & 1);
if (++attempts > 256) {
exit(1); // Bridge stream flush appears to deadlock
};
}
}

View File

@ -0,0 +1,88 @@
#ifndef __BRIDGES_FPGA_MANAGED_STREAM_H
#define __BRIDGES_FPGA_MANAGED_STREAM_H
// See LICENSE for license details.
#include <functional>
#include <string>
#include "bridge_stream_driver.h"
namespace FPGAManagedStreams {
/**
* @brief Parameters emitted for a FPGA-managed stream emitted by Golden Gate.
*
* This will be replaced by a protobuf-derived class, and re-used across both
* Scala and C++.
*/
typedef struct StreamParameters {
std::string stream_name;
uint32_t buffer_capacity;
uint64_t toHostPhysAddrHighAddr;
uint64_t toHostPhysAddrLowAddr;
uint64_t bytesAvailableAddr;
uint64_t bytesConsumedAddr;
uint64_t toHostStreamDoneInitAddr;
uint64_t toHostStreamFlushAddr;
uint64_t toHostStreamFlushDoneAddr;
StreamParameters(std::string stream_name,
uint32_t buffer_capacity,
uint64_t toHostPhysAddrHighAddr,
uint64_t toHostPhysAddrLowAddr,
uint64_t bytesAvailableAddr,
uint64_t bytesConsumedAddr,
uint64_t toHostStreamDoneInitAddr,
uint64_t toHostStreamFlushAddr,
uint64_t toHostStreamFlushDoneAddr)
: stream_name(stream_name), buffer_capacity(buffer_capacity),
toHostPhysAddrHighAddr(toHostPhysAddrHighAddr),
toHostPhysAddrLowAddr(toHostPhysAddrLowAddr),
bytesAvailableAddr(bytesAvailableAddr),
bytesConsumedAddr(bytesConsumedAddr),
toHostStreamDoneInitAddr(toHostStreamDoneInitAddr),
toHostStreamFlushAddr(toHostStreamFlushAddr),
toHostStreamFlushDoneAddr(toHostStreamFlushDoneAddr){};
} StreamParameters;
/**
* @brief Implements streams sunk by the driver (sourced by the FPGA)
*
* Extends FPGAManagedStream to provide a pull method, which moves data from the
* FPGA into a user-provided buffer. IO over a FPGA-mastered AXI4 IF is
* implemented with pcis_read, and is provided by the host-platform.
*
*/
class FPGAToCPUDriver : public FPGAToCPUStreamDriver {
public:
FPGAToCPUDriver(StreamParameters params,
void *buffer_base,
uint64_t buffer_base_fpga,
std::function<uint32_t(size_t)> mmio_read,
std::function<void(size_t, uint32_t)> mmio_write)
: params(params), buffer_base(buffer_base),
buffer_base_fpga(buffer_base_fpga), mmio_read_func(mmio_read),
mmio_write_func(mmio_write){};
virtual size_t
pull(void *dest, size_t num_bytes, size_t required_bytes) override;
virtual void flush() override;
virtual void init() override;
size_t mmio_read(size_t addr) { return mmio_read_func(addr); };
void mmio_write(size_t addr, uint32_t data) { mmio_write_func(addr, data); };
private:
StreamParameters params;
void *buffer_base;
uint64_t buffer_base_fpga;
std::function<uint32_t(size_t)> mmio_read_func;
std::function<void(size_t, uint32_t)> mmio_write_func;
// A read pointer offset from the base, in bytes
int buffer_offset = 0;
};
} // namespace FPGAManagedStreams
#endif // __BRIDGES_FPGA_MANAGED_STREAM_H

View File

@ -233,7 +233,7 @@ size_t synthesized_prints_t::process_tokens(size_t beats,
// See FireSim issue #208
// This needs to be page aligned, as a DMA request that spans a page is
// fractured into a pair, and for reasons unknown, first beat of the second
// request is lost. Once aligned, qequests larger than a page will be
// request is lost. Once aligned, requests larger than a page will be
// fractured into page-size (64-beat) requests and these seem to behave
// correctly.
alignas(4096) char buf[maximum_batch_bytes];
@ -307,11 +307,14 @@ void synthesized_prints_t::flush() {
// empty. It might be safer to put a bound on this though.
while (process_tokens(batch_beats, 0) != 0)
;
pull_flush(stream_idx);
process_tokens(batch_beats, 0);
// If multiple tokens are being packed into a single stream beat, force the
// widget to write out any incomplete beat
if (token_bytes < beat_bytes) {
write(mmio_addrs.flushNarrowPacket, 1);
pull_flush(stream_idx);
// On an FPGA reading from the stream will have enough latency that
// process_tokens will return non-zero on the first attempt, introducing no

View File

@ -52,6 +52,7 @@ void simif_t::target_init() {
if (!fastloadmem && !load_mem_path.empty()) {
loadmem.load_mem_from_file(load_mem_path);
}
host_mmio_init();
}
int simif_t::simulation_run() {

View File

@ -76,25 +76,22 @@ protected:
*
* Historically this god class wrapped all of the features presented by FireSim
* / MIDAS-derived simulators. Critically, it declares an interface for
interacting with
* the host-FPGA, which consist of methods for implementing 32b MMIO (read,
* write), and latency-insensitive bridge streams (push, pull). Concrete
* subclasses of simif_t must be written for metasimulation and each supported
* host plaform. See simif_f1_t for an example.
* interacting with the host-FPGA, which consist of methods for implementing
* 32b MMIO (read, write), and latency-insensitive bridge streams (push, pull).
* Concrete subclasses of simif_t must be written for metasimulation and each
* supported host plaform. See simif_f1_t for an example.
* simif_t also provides a few core functions that are tied to bridges and
widgets that
* must be present in all simulators:
* widgets that must be present in all simulators:
*
* - To track simulation time, it provides methods to interact with the
* ClockBridge. This bridge is solely responsible for defining a schedule of
* clock edges to simulate, and must be instantiated in all targets. See
actual_tcycle() and hcycle().
* Utilities to report performance are based off these measures of time.
* actual_tcycle() and hcycle(). Utilities to report performance are based
* off these measures of time.
*
* - To read and write into FPGA DRAM, the LoadMem widget provides a
* low-bandwidth side channel via MMIO. See read_mem, write_mem,
zero_out_dram.
* zero_out_dram.
*/
class simif_t {
public:
@ -122,6 +119,14 @@ public:
/** Bridge / Widget MMIO methods */
/**
* @brief Provides a hook to do mmio-related initialization _before_ bridges.
*
* This permits setting up core simulation widgets (like stream engines) in a
* fashion that may vary across different specializations of simif_t.
*/
virtual void host_mmio_init() = 0;
/**
* @brief 32b MMIO write, issued over the simulation control bus (AXI4-lite).
*
@ -180,6 +185,23 @@ public:
void *src,
size_t num_bytes,
size_t required_bytes) = 0;
/**
* @brief Hint that a stream should bypass any underlying batching
* optimizations.
*
* A user-directed hint that a stream should bypass any underlying batching
* optimizations. This may permit a future pull to read data that may
* otherwise remain queued in parts of the host.
*
* @param stream_no The index of the stream to flush
*/
virtual void pull_flush(unsigned int stream_no) = 0;
/**
* @brief Analagous to pull_flush but for CPU-to-FPGA streams
*
* @param stream_no The index of the stream to flush
*/
virtual void push_flush(unsigned int stream_no) = 0;
// End host-platform interface.

View File

@ -3,6 +3,7 @@
#include "simif_emul.h"
#include "bridges/cpu_managed_stream.h"
#include "bridges/fpga_managed_stream.h"
simif_emul_t::simif_emul_t(const std::vector<std::string> &args)
: simif_t(args) {
@ -40,6 +41,7 @@ simif_emul_t::simif_emul_t(const std::vector<std::string> &args)
using namespace std::placeholders;
auto mmio_read_func = std::bind(&simif_emul_t::read, this, _1);
auto mmio_write_func = std::bind(&simif_emul_t::write, this, _1, _2);
#ifdef CPUMANAGEDSTREAMENGINE_0_PRESENT
auto cpu_managed_axi4_read_func =
@ -48,31 +50,69 @@ simif_emul_t::simif_emul_t(const std::vector<std::string> &args)
std::bind(&simif_emul_t::cpu_managed_axi4_write, this, _1, _2, _3);
for (size_t i = 0; i < CPUMANAGEDSTREAMENGINE_0_from_cpu_stream_count; i++) {
auto params = CPUManagedStreamParameters(
auto params = CPUManagedStreams::StreamParameters(
std::string(CPUMANAGEDSTREAMENGINE_0_from_cpu_names[i]),
CPUMANAGEDSTREAMENGINE_0_from_cpu_dma_addrs[i],
CPUMANAGEDSTREAMENGINE_0_from_cpu_count_addrs[i],
CPUMANAGEDSTREAMENGINE_0_from_cpu_buffer_sizes[i]);
from_host_streams.push_back(
StreamFromCPU(params, mmio_read_func, cpu_managed_axi4_write_func));
cpu_to_fpga_streams.push_back(
std::make_unique<CPUManagedStreams::CPUToFPGADriver>(
params, mmio_read_func, cpu_managed_axi4_write_func));
}
for (size_t i = 0; i < CPUMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) {
auto params = CPUManagedStreamParameters(
auto params = CPUManagedStreams::StreamParameters(
std::string(CPUMANAGEDSTREAMENGINE_0_to_cpu_names[i]),
CPUMANAGEDSTREAMENGINE_0_to_cpu_dma_addrs[i],
CPUMANAGEDSTREAMENGINE_0_to_cpu_count_addrs[i],
CPUMANAGEDSTREAMENGINE_0_to_cpu_buffer_sizes[i]);
to_host_streams.push_back(
StreamToCPU(params, mmio_read_func, cpu_managed_axi4_read_func));
fpga_to_cpu_streams.push_back(
std::make_unique<CPUManagedStreams::FPGAToCPUDriver>(
params, mmio_read_func, cpu_managed_axi4_read_func));
}
#endif // CPUMANAGEDSTREAMENGINE_0_PRESENT
#ifdef FPGAMANAGEDSTREAMENGINE_0_PRESENT
auto fpga_address_memory_base = ((char *)cpu_mem->get_data());
auto offset = 0;
for (size_t i = 0; i < FPGAMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) {
auto params = FPGAManagedStreams::StreamParameters(
std::string(FPGAMANAGEDSTREAMENGINE_0_to_cpu_names[i]),
FPGAMANAGEDSTREAMENGINE_0_to_cpu_fpgaBufferDepth[i],
FPGAMANAGEDSTREAMENGINE_0_to_cpu_toHostPhysAddrHighAddrs[i],
FPGAMANAGEDSTREAMENGINE_0_to_cpu_toHostPhysAddrLowAddrs[i],
FPGAMANAGEDSTREAMENGINE_0_to_cpu_bytesAvailableAddrs[i],
FPGAMANAGEDSTREAMENGINE_0_to_cpu_bytesConsumedAddrs[i],
FPGAMANAGEDSTREAMENGINE_0_to_cpu_toHostStreamDoneInitAddrs[i],
FPGAMANAGEDSTREAMENGINE_0_to_cpu_toHostStreamFlushAddrs[i],
FPGAMANAGEDSTREAMENGINE_0_to_cpu_toHostStreamFlushDoneAddrs[i]);
fpga_to_cpu_streams.push_back(
std::make_unique<FPGAManagedStreams::FPGAToCPUDriver>(
params,
(void *)(fpga_address_memory_base + offset),
offset,
mmio_read_func,
mmio_write_func));
offset += params.buffer_capacity;
}
#endif // FPGAMANAGEDSTREAMENGINE_0_PRESENT
}
simif_emul_t::~simif_emul_t(){};
void simif_emul_t::host_mmio_init() {
for (auto &stream : this->fpga_to_cpu_streams) {
stream->init();
}
for (auto &stream : this->cpu_to_fpga_streams) {
stream->init();
}
};
int simif_emul_t::run() {
if (fastloadmem && !load_mem_path.empty()) {
fprintf(stdout, "[fast loadmem] %s\n", load_mem_path.c_str());
@ -119,8 +159,8 @@ size_t simif_emul_t::pull(unsigned stream_idx,
void *dest,
size_t num_bytes,
size_t threshold_bytes) {
assert(stream_idx < to_host_streams.size());
return this->to_host_streams[stream_idx].pull(
assert(stream_idx < fpga_to_cpu_streams.size());
return this->fpga_to_cpu_streams[stream_idx]->pull(
dest, num_bytes, threshold_bytes);
}
@ -128,11 +168,21 @@ size_t simif_emul_t::push(unsigned stream_idx,
void *src,
size_t num_bytes,
size_t threshold_bytes) {
assert(stream_idx < from_host_streams.size());
return this->from_host_streams[stream_idx].push(
assert(stream_idx < cpu_to_fpga_streams.size());
return this->cpu_to_fpga_streams[stream_idx]->push(
src, num_bytes, threshold_bytes);
}
void simif_emul_t::pull_flush(unsigned stream_idx) {
assert(stream_idx < fpga_to_cpu_streams.size());
return this->fpga_to_cpu_streams[stream_idx]->flush();
}
void simif_emul_t::push_flush(unsigned stream_idx) {
assert(stream_idx < cpu_to_fpga_streams.size());
return this->cpu_to_fpga_streams[stream_idx]->flush();
}
size_t
simif_emul_t::cpu_managed_axi4_read(size_t addr, char *data, size_t size) {
ssize_t len = (size - 1) / CPU_MANAGED_AXI4_BEAT_BYTES;

View File

@ -3,6 +3,7 @@
#ifndef __SIMIF_EMUL_H
#define __SIMIF_EMUL_H
#include <memory>
#include <vector>
#include "bridges/cpu_managed_stream.h"
@ -21,6 +22,8 @@ public:
virtual void sim_init() = 0;
void host_mmio_init() override;
void write(size_t addr, uint32_t data) override;
uint32_t read(size_t addr) override;
@ -32,6 +35,10 @@ public:
void *src,
size_t num_bytes,
size_t threshold_bytes) override;
void pull_flush(unsigned int stream_no) override;
void push_flush(unsigned int stream_no) override;
/**
* @brief Pointers to inter-context (i.e., between VCS/verilator and driver)
* AXI4 transaction channels
@ -89,8 +96,8 @@ protected:
// Writes directly into the host DRAM models to initialize them.
void load_mems(const char *fname);
std::vector<StreamToCPU> to_host_streams;
std::vector<StreamFromCPU> from_host_streams;
std::vector<std::unique_ptr<FPGAToCPUStreamDriver>> fpga_to_cpu_streams;
std::vector<std::unique_ptr<CPUToFPGAStreamDriver>> cpu_to_fpga_streams;
};
#endif // __SIMIF_EMUL_H

View File

@ -36,25 +36,25 @@ simif_f1_t::simif_f1_t(const std::vector<std::string> &args) : simif_t(args) {
std::bind(&simif_f1_t::cpu_managed_axi4_write, this, _1, _2, _3);
for (int i = 0; i < CPUMANAGEDSTREAMENGINE_0_from_cpu_stream_count; i++) {
auto params = CPUManagedStreamParameters(
auto params = CPUManagedStreams::StreamParameters(
std::string(CPUMANAGEDSTREAMENGINE_0_from_cpu_names[i]),
CPUMANAGEDSTREAMENGINE_0_from_cpu_dma_addrs[i],
CPUMANAGEDSTREAMENGINE_0_from_cpu_count_addrs[i],
CPUMANAGEDSTREAMENGINE_0_from_cpu_buffer_sizes[i]);
from_host_streams.push_back(
StreamFromCPU(params, mmio_read_func, cpu_managed_axi4_write_func));
from_host_streams.push_back(CPUManagedStreams::CPUToFPGADriver(
params, mmio_read_func, cpu_managed_axi4_write_func));
}
for (int i = 0; i < CPUMANAGEDSTREAMENGINE_0_to_cpu_stream_count; i++) {
auto params = CPUManagedStreamParameters(
auto params = CPUManagedStreams::StreamParameters(
std::string(CPUMANAGEDSTREAMENGINE_0_to_cpu_names[i]),
CPUMANAGEDSTREAMENGINE_0_to_cpu_dma_addrs[i],
CPUMANAGEDSTREAMENGINE_0_to_cpu_count_addrs[i],
CPUMANAGEDSTREAMENGINE_0_to_cpu_buffer_sizes[i]);
to_host_streams.push_back(
StreamToCPU(params, mmio_read_func, cpu_managed_axi4_read_func));
to_host_streams.push_back(CPUManagedStreams::FPGAToCPUDriver(
params, mmio_read_func, cpu_managed_axi4_read_func));
}
}

View File

@ -14,8 +14,9 @@ public:
simif_f1_t(const std::vector<std::string> &args);
~simif_f1_t();
// Unused by F1 since initialization / deinitization is done in the
// constructor
// Unused since no F1-specific MMIO is required to setup the simulation.
void host_mmio_init() override{};
int run() override { return simulation_run(); }
void write(size_t addr, uint32_t data) override;
@ -28,6 +29,10 @@ public:
void *src,
size_t num_bytes,
size_t threshold_bytes) override;
void pull_flush(unsigned int stream_no) override {}
void push_flush(unsigned int stream_no) override {}
uint32_t is_write_ready();
void check_rc(int rc, char *infostr);
void fpga_shutdown();
@ -37,8 +42,8 @@ private:
char in_buf[CTRL_BEAT_BYTES];
char out_buf[CTRL_BEAT_BYTES];
std::vector<StreamToCPU> to_host_streams;
std::vector<StreamFromCPU> from_host_streams;
std::vector<CPUManagedStreams::FPGAToCPUDriver> to_host_streams;
std::vector<CPUManagedStreams::CPUToFPGADriver> from_host_streams;
size_t cpu_managed_axi4_write(size_t addr, char *data, size_t size);
size_t cpu_managed_axi4_read(size_t addr, char *data, size_t size);

View File

@ -12,8 +12,10 @@ public:
simif_vitis_t(const std::vector<std::string> &args);
~simif_vitis_t() {}
// Unused by Vitis since initialization / deinitization is done in the
// constructor
// Will be used once FPGA-managed AXI4 is fully plumbed through the shim
// to setup the FPGAManagedStream engine.
void host_mmio_init() override{};
int run() override { return simulation_run(); }
void write(size_t addr, uint32_t data) override;

View File

@ -5,20 +5,280 @@ package midas.core
import chisel3._
import chisel3.util._
import freechips.rocketchip.amba.axi4._
import freechips.rocketchip.config.{Parameters, Field}
import freechips.rocketchip.config.{Field, Parameters}
import freechips.rocketchip.diplomacy._
import midas.widgets._
import midas.widgets.CppGenerationUtils._
class WriteMetadata(val numBeatsWidth: Int) extends Bundle {
val numBeats = Output(UInt(numBeatsWidth.W))
val isFlush = Output(Bool())
}
/**
* This is a stub to foreshadow the other implementation
*/
class FPGAManagedStreamEngine(p: Parameters, val params: StreamEngineParameters) extends StreamEngine(p) {
require(sinkParams.isEmpty, "FPGAManagedStreamEngine does not currently support FPGA-sunk streams.")
// Beats refers to 512b words moving over a stream
val pageBytes = 4096
val beatBytes = BridgeStreamConstants.streamWidthBits / 8
val pageBeats = pageBytes / beatBytes
def maxFlightForStream(params: StreamSourceParameters): Int =
(params.fpgaBufferDepth * beatBytes) / pageBytes
val cpuManagedAXI4NodeOpt = None
val fpgaManagedAXI4NodeOpt = Some(midas.widgets.AXI4TieOff()(p))
val (fpgaManagedAXI4NodeOpt, toCPUNode) = if (hasStreams) {
// The implicit val defined in StreamEngine is not accessible here; Make a
// duplicate that can be referenced by diplomatic nodes
implicit val pShadow = p
val xbar = AXI4Xbar()
val toCPUNode = AXI4MasterNode(
sourceParams.map { p =>
AXI4MasterPortParameters(Seq(AXI4MasterParameters(name = p.name, maxFlight = Some(maxFlightForStream(p)))))
}
)
xbar :=* AXI4Buffer() :=* toCPUNode
(Some(xbar), Some(toCPUNode))
} else {
(None, None)
}
lazy val module = new WidgetImp(this) {
val io = IO(new WidgetIO)
case class ToCPUStreamDriverParameters(
name: String,
fpgaBufferDepth: Int,
toHostPhysAddrHighAddr: Int,
toHostPhysAddrLowAddr: Int,
bytesAvailableAddr: Int,
bytesConsumedAddr: Int,
toHostStreamDoneInitAddr: Int,
toHostStreamFlushAddr: Int,
toHostStreamFlushDoneAddr: Int,
)
// Invoke this in the module implementation
def elaborateToHostCPUStream(
channel: DecoupledIO[UInt],
axi4: AXI4Bundle,
chParams: StreamSourceParameters,
): ToCPUStreamDriverParameters = {
require(
BridgeStreamConstants.streamWidthBits == axi4.params.dataBits,
s"FPGAManagedStreamEngine requires stream widths to match FPGA-managed AXI4 data width",
)
val cpuBufferDepthBeats = chParams.fpgaBufferDepth
require(cpuBufferDepthBeats > pageBeats)
val cpuBufferSizeBytes = (1 << log2Ceil(cpuBufferDepthBeats)) * (BridgeStreamConstants.streamWidthBits / 8)
// This to simplify the hardware
require(isPow2(cpuBufferSizeBytes))
val toHostPhysAddrHigh = Reg(UInt(32.W))
val toHostPhysAddrLow = Reg(UInt(32.W))
val bytesConsumedByCPU = RegInit(0.U(log2Ceil(cpuBufferSizeBytes + 1).W))
// This sets up a double buffer that should give full throughput for a
// single stream system. This queue could be grown under a multi-stream system.
val outgoingQueue = Module(new BRAMQueue(2 * pageBeats)(UInt(BridgeStreamConstants.streamWidthBits.W)))
outgoingQueue.io.enq <> channel
val writeCredits = RegInit(cpuBufferSizeBytes.U(log2Ceil(cpuBufferSizeBytes + 1).W))
val readCredits = RegInit(0.U(log2Ceil(cpuBufferSizeBytes + 1).W))
val writePtr = RegInit(0.U(log2Ceil(cpuBufferSizeBytes).W))
val doneInit = RegInit(false.B)
// Key assumption: write acknowledgements can be used as a synchronization
// point, after which the CPU can read new data written into its circular
// buffer. This tracks inflight requests, to increment read credits on
// write acknowledgement, and to cap maxflight.
val inflightBeatCounts = Module(
new Queue(new WriteMetadata(log2Ceil(pageBeats + 1)), maxFlightForStream(chParams))
)
val idle :: sendAddress :: sendData :: Nil = Enum(3)
val state = RegInit(idle)
val beatsToSendMinus1 = RegInit(0.U(log2Ceil(pageBeats).W))
// Ensure we do not cross page boundaries per AXI4 spec.
val beatsToPageBoundary =
pageBeats.U - writePtr(log2Ceil(pageBytes) - 1, log2Ceil(beatBytes))
assert((beatsToPageBoundary > 0.U) && (beatsToPageBoundary <= (pageBeats.U)))
// Establish the largest AXI4 write request we can make, by doing a min
// reduction over the following bounds:
val writeBounds = Seq(
outgoingQueue.io.count, // Beats available for enqueue in local FPGA buffer
writeCredits >> log2Ceil(beatBytes).U, // Space available in cpu buffer
beatsToPageBoundary,
) // Length to end of page
// NB: BeatsToPageBoundary covers the end of the circular buffer only because
// we ensure the buffer size is a multiple of page size
val writeableBeats = writeBounds.reduce { (a, b) => Mux(a < b, a, b) }
val writeableBeatsMinus1 = writeableBeats - 1.U
// This register resets itself to 0 on cycles it is not set by the host
// CPU. If it is non-zero it was written to in the last cycle, and so we
// know we can update credits.
assert(
!doneInit || (!(RegNext(bytesConsumedByCPU) =/= 0.U) || (bytesConsumedByCPU === 0.U)),
"Back-to-back MMIO accesses, or incorrect toggling on bytesConsumedByCPU",
)
when(bytesConsumedByCPU =/= 0.U) {
bytesConsumedByCPU := 0.U
writeCredits := writeCredits + bytesConsumedByCPU
readCredits := readCredits - bytesConsumedByCPU
}
val doFlush, inFlush = RegInit(false.B)
val flushBeatsToIssue, flushBeatsToAck = RegInit(0.U(log2Ceil(cpuBufferDepthBeats + 1).W))
assert(readCredits >= bytesConsumedByCPU, "Driver read more bytes than available in circular buffer.")
assert(
(writeCredits + bytesConsumedByCPU) <= cpuBufferSizeBytes.U,
"Driver granted more write credit than physically allowable.",
)
switch(state) {
is(idle) {
doFlush := false.B
when(doFlush && !inFlush && (outgoingQueue.io.count > 0.U)) {
inFlush := true.B
flushBeatsToIssue := outgoingQueue.io.count
flushBeatsToAck := outgoingQueue.io.count
}
val start =
(inflightBeatCounts.io.enq.ready) &&
((flushBeatsToIssue =/= 0.U) || (writeableBeats === beatsToPageBoundary))
when(start) { state := sendAddress }
}
is(sendAddress) {
when(axi4.aw.fire) {
state := sendData
beatsToSendMinus1 := writeableBeatsMinus1
writePtr := writePtr + (writeableBeats * beatBytes.U)
writeCredits := writeCredits + bytesConsumedByCPU - (writeableBeats * beatBytes.U)
flushBeatsToIssue := Mux(flushBeatsToIssue < writeableBeats, 0.U, flushBeatsToIssue - writeableBeats)
}
}
is(sendData) {
when(axi4.w.fire) {
state := Mux(axi4.w.bits.last, idle, sendData)
beatsToSendMinus1 := beatsToSendMinus1 - 1.U
}
}
}
axi4.aw.valid := (state === sendAddress)
axi4.aw.bits.id := 0.U
axi4.aw.bits.addr := Cat(toHostPhysAddrHigh, toHostPhysAddrLow) + writePtr
axi4.aw.bits.len := writeableBeatsMinus1
axi4.aw.bits.size := (log2Ceil(beatBytes)).U
// This is assumed but not exposed by the PCIM interface, and is the
// default transaction type supported by XDMA-backed AXI4 IFs anyways
axi4.aw.bits.burst := AXI4Parameters.BURST_INCR
// This to permit intermediate width adapters, etc, to pack narrower
// transactions into larger ones, in the event we make this IF narrower than 512b
axi4.aw.bits.cache := AXI4Parameters.CACHE_MODIFIABLE
// Assume page-sized transfers for now
// These fields are unused by F1 PCIM, but pick reasonable default values for future proofing
axi4.aw.bits.prot := 0.U // Unpriviledged, secure, data access
axi4.aw.bits.qos := 0.U // Default; unused
axi4.aw.bits.lock := 0.U // Normal, non-exclusive
inflightBeatCounts.io.enq.valid := axi4.aw.fire
inflightBeatCounts.io.enq.bits.numBeats := writeableBeats
inflightBeatCounts.io.enq.bits.isFlush := flushBeatsToIssue =/= 0.U
axi4.w.valid := (state === sendData) && outgoingQueue.io.deq.valid
axi4.w.bits.data := outgoingQueue.io.deq.bits
axi4.w.bits.strb := ((BigInt(1) << beatBytes) - 1).U
axi4.w.bits.last := beatsToSendMinus1 === 0.U
outgoingQueue.io.deq.ready := (state === sendData) && axi4.w.ready
// Write Response handling
axi4.b.ready := true.B
val ackBeats = inflightBeatCounts.io.deq.bits.numBeats
val ackFlush = inflightBeatCounts.io.deq.bits.isFlush
when(axi4.b.fire) {
readCredits := readCredits + (ackBeats * beatBytes.U) - bytesConsumedByCPU
when(ackFlush) {
val remainingBeatsToAck = Mux(ackBeats < flushBeatsToAck, flushBeatsToAck - ackBeats, 0.U)
flushBeatsToAck := remainingBeatsToAck
inFlush := remainingBeatsToAck =/= 0.U
}
}
inflightBeatCounts.io.deq.ready := axi4.b.fire
assert(!axi4.b.valid || inflightBeatCounts.io.deq.valid)
// We only use the write channels to implement FPGA-to-CPU streams
axi4.ar.valid := false.B
axi4.r.ready := false.B
// Register Driver-programmable MMIO registers
ToCPUStreamDriverParameters(
chParams.name,
cpuBufferSizeBytes,
attach(toHostPhysAddrHigh, s"${chParams.name}_toHostPhysAddrHigh"),
attach(toHostPhysAddrLow, s"${chParams.name}_toHostPhysAddrLow"),
attach(readCredits, s"${chParams.name}_bytesAvailable", ReadOnly),
attach(bytesConsumedByCPU, s"${chParams.name}_bytesConsumed"),
attach(doneInit, s"${chParams.name}_toHostStreamDoneInit"),
attach(doFlush, s"${chParams.name}_toHostStreamFlush"),
attach(!(doFlush || inFlush), s"${chParams.name}_toHostStreamFlushDone", ReadOnly),
)
}
val sourceDriverParameters = if (hasStreams) {
val axi4Bundles = toCPUNode.get.out.map(_._1)
(for (((axi4IF, streamIF), params) <- axi4Bundles.zip(streamsToHostCPU).zip(sourceParams)) yield {
chisel3.experimental.prefix(params.name) {
elaborateToHostCPUStream(streamIF, axi4IF, params)
}
}).toSeq
} else {
Seq()
}
genCRFile()
override def genHeader(base: BigInt, sb: StringBuilder) {
val headerWidgetName = getWName.toUpperCase
super.genHeader(base, sb)
def serializeStreamParameters(prefix: String, params: Seq[ToCPUStreamDriverParameters]): Unit = {
val numStreams = params.size
sb.append(genConstStatic(s"${headerWidgetName}_${prefix}_stream_count", UInt32(numStreams)))
// Hack: avoid emitting a zero-sized array by providing a dummy set of
// parameters when no streams are generated. This is a limitation of the
// current C emission strategy. Note, the actual number of streams is still reported above.
val placeholder = ToCPUStreamDriverParameters("UNUSED", 0, 0, 0, 0, 0, 0, 0, 0)
val nonEmptyParams = if (numStreams == 0) Seq(placeholder) else params
val arraysToEmit = Seq(
"names" -> nonEmptyParams.map { p => CStrLit(p.name) },
"fpgaBufferDepth" -> nonEmptyParams.map { p => UInt32(p.fpgaBufferDepth) },
"toHostPhysAddrHighAddrs" -> nonEmptyParams.map { p => UInt64(base + p.toHostPhysAddrHighAddr) },
"toHostPhysAddrLowAddrs" -> nonEmptyParams.map { p => UInt64(base + p.toHostPhysAddrLowAddr) },
"bytesAvailableAddrs" -> nonEmptyParams.map { p => UInt64(base + p.bytesAvailableAddr) },
"bytesConsumedAddrs" -> nonEmptyParams.map { p => UInt64(base + p.bytesConsumedAddr) },
"toHostStreamDoneInitAddrs" -> nonEmptyParams.map { p => UInt64(base + p.toHostStreamDoneInitAddr) },
"toHostStreamFlushAddrs" -> nonEmptyParams.map { p => UInt64(base + p.toHostStreamFlushAddr) },
"toHostStreamFlushDoneAddrs" -> nonEmptyParams.map { p => UInt64(base + p.toHostStreamFlushDoneAddr) },
)
for ((name, values) <- arraysToEmit) {
sb.append(genArray(s"${headerWidgetName}_${prefix}_${name}", values))
}
}
serializeStreamParameters("to_cpu", sourceDriverParameters)
}
}
}

View File

@ -317,8 +317,11 @@ class FPGATop(implicit p: Parameters) extends LazyModule with HasWidgets {
beatBytes = params.dataBits / 8)
))
streamingEngine.fpgaManagedAXI4NodeOpt.foreach {
node := AXI4IdIndexer(params.idBits) := AXI4Buffer() := _
streamingEngine.fpgaManagedAXI4NodeOpt match {
case Some(engineNode) =>
node := AXI4IdIndexer(params.idBits) := AXI4Buffer() := engineNode
case None =>
node := AXI4TieOff()
}
(node, params)
}

View File

@ -17,7 +17,7 @@ import midas.core.{
/**
* Bridge Streams serve as means to do bulk transport from BridgeDriver to
* BridgeModule and vice versa. Abstractly, they can be thought of as a 512b
* wide latency-insensitive channel (i.e., queue).
* wide latency-insensitive channel (i.e., a queue with some unknown latency).
*
* The two mixins in this file implement the two directions of
* producer-consumer relationships: [[StreamFromHostCPU]] add a stream in

View File

@ -14,6 +14,7 @@ class NoConfig extends Config(Parameters.empty)
class BaseMidasExamplesConfig extends Config(
new WithDefaultMemModel ++
new WithWiringTransform ++
new HostDebugFeatures ++
new Config((site, here, up) => {
case SynthAsserts => true
case GenerateMultiCycleRamModels => true

View File

@ -6,22 +6,31 @@ import scala.util.matching.Regex
import scala.io.Source
import org.scalatest.Suites
import org.scalatest.matchers.should._
import freechips.rocketchip.config.Config
object BaseConfigs {
def f1 = Seq(classOf[DefaultF1Config])
def vitis = Seq(classOf[DefaultVitisConfig])
}
abstract class TutorialSuite(
val targetName: String, // See GeneratorUtils
targetConfigs: String = "NoConfig",
platformConfigs: String = "HostDebugFeatures_DefaultF1Config",
platformConfigs: Seq[Class[_ <: Config]] = Seq(),
tracelen: Int = 8,
simulationArgs: Seq[String] = Seq()
) extends firesim.TestSuiteCommon with Matchers {
lazy val basePlatformConfig = BaseConfigs.f1.asInstanceOf[Seq[Class[_ <: Config]]]
val backendSimulator = "verilator"
def platformConfigString = (platformConfigs ++ basePlatformConfig).map(_.getSimpleName).mkString("_")
val targetTuple = s"$targetName-$targetConfigs-$platformConfigs"
val targetTuple = s"$targetName-$targetConfigs-${platformConfigString}"
val commonMakeArgs = Seq(s"TARGET_PROJECT=midasexamples",
s"DESIGN=$targetName",
s"TARGET_CONFIG=${targetConfigs}",
s"PLATFORM_CONFIG=${platformConfigs}")
s"PLATFORM_CONFIG=${platformConfigString}")
def run(backend: String,
debug: Boolean = false,
@ -181,14 +190,17 @@ abstract class TutorialSuite(
//class PointerChaserF1Test extends TutorialSuite(
// "PointerChaser", "PointerChaserConfig", simulationArgs = Seq("`cat runtime.conf`"))
class GCDF1Test extends TutorialSuite("GCD")
class GCDVitisTest extends GCDF1Test { override lazy val basePlatformConfig = BaseConfigs.vitis }
// Hijack Parity to test all of the Midas-level backends
class ParityF1Test extends TutorialSuite("Parity") {
runTest("verilator", true)
runTest("vcs", true)
}
class ParityVitisTest extends TutorialSuite("Parity", platformConfigs = classOf[DefaultVitisConfig].getSimpleName) {
class ParityVitisTest extends TutorialSuite("Parity") {
override lazy val basePlatformConfig = BaseConfigs.vitis
runTest("verilator", true)
runTest("vcs", true)
}
@ -254,7 +266,7 @@ class AutoCounterCoverModuleF1Test extends TutorialSuite("AutoCounterCoverModule
}
class AutoCounterPrintfF1Test extends TutorialSuite("AutoCounterPrintfModule",
simulationArgs = Seq("+print-file=synthprinttest.out"),
platformConfigs = "AutoCounterPrintf_HostDebugFeatures_DefaultF1Config") {
platformConfigs = classOf[AutoCounterPrintf] +: BaseConfigs.f1) {
diffSynthesizedLog("synthprinttest.out0", stdoutPrefix = "AUTOCOUNTER_PRINT CYCLE", synthPrefix = "CYCLE")
}
class AutoCounterGlobalResetConditionF1Test extends TutorialSuite("AutoCounterGlobalResetCondition",
@ -282,8 +294,12 @@ class AutoCounterGlobalResetConditionF1Test extends TutorialSuite("AutoCounterGl
class PrintfModuleF1Test extends TutorialSuite("PrintfModule",
simulationArgs = Seq("+print-no-cycle-prefix", "+print-file=synthprinttest.out")) {
runTest("vcs", true)
diffSynthesizedLog("synthprinttest.out0")
}
class PrintfModuleVitisTest extends PrintfModuleF1Test { override lazy val basePlatformConfig = BaseConfigs.vitis }
class NarrowPrintfModuleF1Test extends TutorialSuite("NarrowPrintfModule",
simulationArgs = Seq("+print-no-cycle-prefix", "+print-file=synthprinttest.out")) {
diffSynthesizedLog("synthprinttest.out0")
@ -353,6 +369,8 @@ class MulticlockPrintF1Test extends TutorialSuite("MulticlockPrintfModule",
synthLinesToDrop = 4)
}
class MulticlockPrintVitisTest extends MulticlockPrintF1Test { override lazy val basePlatformConfig = BaseConfigs.vitis }
class MulticlockAutoCounterF1Test extends TutorialSuite("MulticlockAutoCounterModule",
simulationArgs = Seq("+autocounter-readrate=1000", "+autocounter-filename-base=autocounter")) {
checkAutoCounterCSV("autocounter0.csv", "AUTOCOUNTER_PRINT ")
@ -395,7 +413,7 @@ class PassthroughModelBridgeSourceTest extends TutorialSuite("PassthroughModelBr
class ResetPulseBridgeActiveHighTest extends TutorialSuite(
"ResetPulseBridgeTest",
// Disable assertion synthesis to rely on native chisel assertions to catch bad behavior
platformConfigs = "NoSynthAsserts_HostDebugFeatures_DefaultF1Config",
platformConfigs = classOf[NoSynthAsserts] +: BaseConfigs.f1,
simulationArgs = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength}")) {
runTest(backendSimulator,
args = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength + 1}"),
@ -405,7 +423,7 @@ class ResetPulseBridgeActiveHighTest extends TutorialSuite(
class ResetPulseBridgeActiveLowTest extends TutorialSuite(
"ResetPulseBridgeTest",
targetConfigs = "ResetPulseBridgeActiveLowConfig",
platformConfigs = "NoSynthAsserts_HostDebugFeatures_DefaultF1Config",
platformConfigs = classOf[NoSynthAsserts] +: BaseConfigs.f1,
simulationArgs = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength}")) {
runTest(backendSimulator,
args = Seq(s"+reset-pulse-length0=${ResetPulseBridgeTestConsts.maxPulseLength + 1}"),
@ -434,8 +452,7 @@ class CustomConstraintsF1Test extends TutorialSuite("CustomConstraints") {
atLeast (1, xdc) should fullyMatch regex "constrain_impl2 \\[reg WRAPPER_INST/CL/firesim_top/.*/dut/r1]".r
}
}
// Suite Collections
// Midasexample Suite Collections
class ChiselExampleDesigns extends Suites(
new GCDF1Test,
new ParityF1Test,
@ -499,6 +516,13 @@ class FMRCITests extends Suites(
new PassthroughModelBridgeSourceTest,
)
class VitisCITests extends Suites (
new GCDVitisTest,
new ParityVitisTest,
new PrintfModuleVitisTest,
new MulticlockPrintVitisTest,
)
// These groups are vestigial from CircleCI container limits
class CIGroupA extends Suites(
new ChiselExampleDesigns,
@ -515,5 +539,6 @@ class CIGroupB extends Suites(
new firesim.fasedtests.CIGroupB,
new firesim.AllMidasUnitTests,
new firesim.FailingUnitTests,
new FMRCITests
new FMRCITests,
new VitisCITests
)