Support > 2 commit width

This commit is contained in:
abejgonzalez 2023-09-04 13:05:29 -07:00
parent f805350c27
commit c072673b89
4 changed files with 102 additions and 77 deletions

View File

@ -16,7 +16,7 @@
(((__ITYPE__)-1) >> ((sizeof(__ITYPE__) * CHAR_BIT) - (__ONE_COUNT__))))
#define TO_BYTES(__BITS__) ((__BITS__) / 8)
// #define DEBUG
#define DEBUG
char cospike_t::KIND;
@ -32,6 +32,7 @@ cospike_t::cospike_t(simif_t &sim,
uint32_t cause_width,
uint32_t wdata_width,
uint32_t num_commit_insts,
uint32_t bits_per_trace,
const char *isa,
uint32_t vlen,
const char *priv,
@ -47,7 +48,8 @@ cospike_t::cospike_t(simif_t &sim,
_vlen(vlen), _priv(priv), _pmp_regions(pmp_regions),
_mem0_base(mem0_base), _mem0_size(mem0_size), _nharts(nharts),
_bootrom(bootrom), _hartid(hartid), _num_commit_insts(num_commit_insts),
stream_idx(stream_idx), stream_depth(stream_depth) {
_bits_per_trace(bits_per_trace), stream_idx(stream_idx),
stream_depth(stream_depth) {
this->_valid_width = 1;
this->_iaddr_width = TO_BYTES(iaddr_width);
this->_insn_width = TO_BYTES(insn_width);
@ -67,9 +69,6 @@ cospike_t::cospike_t(simif_t &sim,
this->_cause_offset = this->_interrupt_offset + this->_interrupt_width;
this->_wdata_offset = this->_cause_offset + this->_cause_width;
// setup misc. state variables
this->_commit_inst_idx = 0;
this->cospike_failed = false;
this->cospike_exit_code = 0;
}
@ -131,23 +130,23 @@ int cospike_t::invoke_cospike(uint8_t *buf) {
: 0;
uint8_t priv = buf[this->_priv_offset];
if (valid || exception || cause) {
#ifdef DEBUG
fprintf(stderr,
"C[%d] V(%d) PC(0x%x) Insn(0x%x) EIC(%d:%d:%d) Wdata(%d:0x%x) "
"Priv(%d)\n",
this->_hartid,
valid,
iaddr,
insn,
exception,
interrupt,
cause,
(this->_wdata_width != 0),
wdata,
priv);
fprintf(stderr,
"C[%d] V(%d) PC(0x%x) Insn(0x%x) EIC(%d:%d:%d) Wdata(%d:0x%x) "
"Priv(%d)\n",
this->_hartid,
valid,
iaddr,
insn,
exception,
interrupt,
cause,
(this->_wdata_width != 0),
wdata,
priv);
#endif
if (valid || exception || cause) {
return cospike_cosim(0, // TODO: No cycle given
this->_hartid,
(this->_wdata_width != 0),
@ -170,13 +169,29 @@ int cospike_t::invoke_cospike(uint8_t *buf) {
size_t cospike_t::process_tokens(int num_beats, size_t minimum_batch_beats) {
const size_t maximum_batch_bytes = num_beats * STREAM_WIDTH_BYTES;
const size_t minimum_batch_bytes = minimum_batch_beats * STREAM_WIDTH_BYTES;
const size_t bits_per_trace = STREAM_WIDTH_BYTES / 2;
// TODO: as opt can mmap file and just load directly into it.
page_aligned_sized_array(OUTBUF, maximum_batch_bytes);
auto bytes_received =
pull(stream_idx, OUTBUF, maximum_batch_bytes, minimum_batch_bytes);
const size_t bytes_per_trace = this->_bits_per_trace / 8;
for (uint32_t offset = 0; offset < bytes_received;
offset += bytes_per_trace) {
#ifdef DEBUG
fprintf(stderr,
"Off(%d/%d:%d) token(",
offset,
bytes_received,
offset / bytes_per_trace);
for (int32_t i = STREAM_WIDTH_BYTES - 1; i >= 0; --i) {
fprintf(stderr, "%02x", (OUTBUF + offset)[i]);
if (i == bytes_per_trace)
fprintf(stderr, " ");
}
fprintf(stderr, ")\n");
#endif
for (uint32_t offset = 0; offset < bytes_received; offset += bits_per_trace) {
// invoke cospike (requires that buffer is aligned properly)
int rval = this->invoke_cospike(((uint8_t *)OUTBUF) + offset);
if (rval) {
@ -185,45 +200,27 @@ size_t cospike_t::process_tokens(int num_beats, size_t minimum_batch_beats) {
printf("[ERROR] Cospike: Errored during simulation with %d\n", rval);
#ifdef DEBUG
fprintf(stderr,
"C[%d] off(%d) token(",
this->_commit_inst_idx,
offset / bits_per_trace);
fprintf(stderr, "Off(%d) token(", offset / bytes_per_trace);
for (int32_t i = STREAM_WIDTH_BYTES - 1; i >= 0; --i) {
fprintf(stderr, "%02x", (OUTBUF + offset)[i]);
if (i == bits_per_trace)
if (i == bytes_per_trace)
fprintf(stderr, " ");
}
fprintf(stderr, ")\n");
fprintf(stderr, "get_next_token token(");
uint32_t next_off = offset += STREAM_WIDTH_BYTES;
auto next_off = offset + STREAM_WIDTH_BYTES;
for (int32_t i = STREAM_WIDTH_BYTES - 1; i >= 0; --i) {
for (auto i = STREAM_WIDTH_BYTES - 1; i >= 0; --i) {
fprintf(stderr, "%02x", (OUTBUF + next_off)[i]);
if (i == bits_per_trace)
if (i == bytes_per_trace)
fprintf(stderr, " ");
}
fprintf(stderr, ")\n");
#endif
return bytes_received;
}
// move to next inst. trace
this->_commit_inst_idx =
(this->_commit_inst_idx + 1) % this->_num_commit_insts;
// add an extra STREAM_WIDTH_BYTES if there is an odd amount of traces
if (this->_commit_inst_idx == 0 && (this->_num_commit_insts % 2 == 1)) {
#ifdef DEBUG
fprintf(stderr,
"off(%d + 1) = %d\n",
offset / bits_per_trace,
(offset + bits_per_trace) / bits_per_trace);
#endif
offset += bits_per_trace;
break;
}
}

View File

@ -20,6 +20,7 @@ public:
uint32_t cause_width,
uint32_t wdata_width,
uint32_t num_commit_insts,
uint32_t bits_per_trace,
const char *isa,
uint32_t vlen,
const char *priv,
@ -81,7 +82,7 @@ private:
// other misc members
uint32_t _num_commit_insts;
uint8_t _commit_inst_idx;
uint32_t _bits_per_trace;
bool cospike_failed;
int cospike_exit_code;

View File

@ -4,6 +4,7 @@ package firesim.bridges
import chisel3._
import chisel3.util._
import org.chipsalliance.cde.config.Parameters
import freechips.rocketchip.util.DecoupledHelper
import testchipip.{SerializableTileTraceIO, SpikeCosimConfig, TileTraceIO, TraceBundleWidths}
@ -108,42 +109,66 @@ class CospikeBridgeModule(params: CospikeBridgeParams)(implicit p: Parameters)
}
}
val maxTraceSize = paddedTraces.map(t => t.getWidth).max
val outDataSzBits = streamEnq.bits.getWidth
val maxTraceSize = paddedTraces.map(t => t.getWidth).max
val outDataSzBits = streamEnq.bits.getWidth
val totalTracesPerToken = (outDataSzBits / maxTraceSize).toInt
val bitsPerTrace = roundUp(outDataSzBits / totalTracesPerToken, 8)
// constant
// TODO: match tracerv in supporting commitWidth > 2
val totalTracesPerToken = 2 // minTraceSz==190b so round up to nearest is 256b
// constant
require(
maxTraceSize < bitsPerTrace,
f"All instruction trace bits (i.e. valid, pc, instBits...) (${maxTraceSize}b) must fit in ${bitsPerTrace}b",
)
require(
bitsPerTrace * totalTracesPerToken <= outDataSzBits,
f"All traces must fit in single token (${bitsPerTrace * totalTracesPerToken} > ${outDataSzBits})",
)
val bitsPerTrace = outDataSzBits / totalTracesPerToken
val armCount = (traces.length + totalTracesPerToken - 1) / totalTracesPerToken
require(maxTraceSize < bitsPerTrace, "All instruction trace bits (i.e. valid, pc, instBits...) must fit in 256b")
// Literally each arm of the mux, these are directly the bits that get put into the bump
val allStreamBits =
paddedTraces.grouped(totalTracesPerToken).toSeq.map(grp => Cat(grp.map(t => t.asUInt.pad(bitsPerTrace)).reverse))
// how many traces being sent over
val numTraces = traces.size
// num tokens needed to display full set of instructions from one cycle
val numTokenForAll = ((numTraces - 1) / totalTracesPerToken) + 1
// Number of bits to use for the counter, the +1 is required because the counter will count 1 past the number of arms
val counterBits = log2Ceil(armCount + 1)
// only inc the counter when the something is sent (this implies that the input is valid and output is avail on the other side)
val counterFire = streamEnq.fire
val (cnt, wrap) = Counter(counterFire, numTokenForAll)
// This counter acts to select the mux arm
val counter = RegInit(0.U(counterBits.W))
val paddedTracesAligned = paddedTraces.map(t => t.asUInt.pad(bitsPerTrace))
val paddedTracesTruncated = if (numTraces == 1) {
(VecInit(paddedTracesAligned).asUInt >> (outDataSzBits.U * cnt))
} else {
(VecInit(paddedTracesAligned).asUInt >> (outDataSzBits.U * cnt))(outDataSzBits - 1, 0)
// The main mux where the input arms are different possible valid traces, and the output goes to streamEnq
val streamMux = MuxLookup(counter, allStreamBits(0), Seq.tabulate(armCount)(x => x.U -> allStreamBits(x)))
// a parallel set of arms to a parallel mux, true if any instructions in the arm are valid (OR reduction)
val anyValid =
traces
.grouped(totalTracesPerToken)
.toSeq
.map(arm => arm.map(trace => trace.valid | trace.exception | (trace.cause =/= 0.U)).reduce((a, b) => (a | b)))
// all of the valids of the larger indexed arms are OR reduced
val anyValidRemain =
Seq.tabulate(armCount)(idx => (idx until armCount).map(x => anyValid(x)).reduce((a, b) => (a | b)))
val anyValidRemainMux = MuxLookup(counter, false.B, Seq.tabulate(armCount)(x => x.U -> anyValidRemain(x)))
streamEnq.bits := streamMux
val maybeFire = !anyValidRemainMux || (counter === (armCount - 1).U)
val maybeEnq = anyValidRemainMux
val commonPredicates = Seq(hPort.toHost.hValid, streamEnq.ready)
val do_enq_helper = DecoupledHelper((maybeEnq +: commonPredicates): _*)
val do_fire_helper = DecoupledHelper((maybeFire +: commonPredicates): _*)
// Note, if we dequeue a token that wins out over the increment below
when(do_fire_helper.fire()) {
counter := 0.U
}.elsewhen(do_enq_helper.fire()) {
counter := counter + 1.U
}
streamEnq.valid := hPort.toHost.hValid
streamEnq.bits := paddedTracesTruncated
// tell the host that you are ready to get more
hPort.toHost.hReady := streamEnq.ready && wrap
// This is uni-directional. We don't drive tokens back to the target.
hPort.fromHost.hValid := true.B
streamEnq.valid := do_enq_helper.fire(streamEnq.ready)
hPort.toHost.hReady := do_fire_helper.fire(hPort.toHost.hValid)
hPort.fromHost.hValid := true.B // this is uni-directional. we don't drive tokens back to target
genCRFile()
@ -159,7 +184,8 @@ class CospikeBridgeModule(params: CospikeBridgeParams)(implicit p: Parameters)
UInt32(insnWidth),
UInt32(causeWidth),
UInt32(wdataWidth),
UInt32(numTraces),
UInt32(traces.length),
UInt32(bitsPerTrace),
CStrLit(params.cfg.isa),
UInt32(params.cfg.vlen),
CStrLit(params.cfg.priv),
@ -178,6 +204,7 @@ class CospikeBridgeModule(params: CospikeBridgeParams)(implicit p: Parameters)
// general information printout
println(s"Cospike Bridge Information")
println(s" Total Inst. Traces / Commit Width: ${numTraces}")
println(s" Total Inst. Traces (i.e. Commit Width): ${traces.length}")
println(s" Total Traces Per Token: ${totalTracesPerToken}")
}
}

@ -1 +1 @@
Subproject commit 3c42e63732bbb65ea6a2cf4a1b9c3f920b44f351
Subproject commit 5541582639f8c5feb578b91b75e5d660e37ed006