Support > 2 commit width
This commit is contained in:
parent
f805350c27
commit
c072673b89
|
@ -16,7 +16,7 @@
|
|||
(((__ITYPE__)-1) >> ((sizeof(__ITYPE__) * CHAR_BIT) - (__ONE_COUNT__))))
|
||||
#define TO_BYTES(__BITS__) ((__BITS__) / 8)
|
||||
|
||||
// #define DEBUG
|
||||
#define DEBUG
|
||||
|
||||
char cospike_t::KIND;
|
||||
|
||||
|
@ -32,6 +32,7 @@ cospike_t::cospike_t(simif_t &sim,
|
|||
uint32_t cause_width,
|
||||
uint32_t wdata_width,
|
||||
uint32_t num_commit_insts,
|
||||
uint32_t bits_per_trace,
|
||||
const char *isa,
|
||||
uint32_t vlen,
|
||||
const char *priv,
|
||||
|
@ -47,7 +48,8 @@ cospike_t::cospike_t(simif_t &sim,
|
|||
_vlen(vlen), _priv(priv), _pmp_regions(pmp_regions),
|
||||
_mem0_base(mem0_base), _mem0_size(mem0_size), _nharts(nharts),
|
||||
_bootrom(bootrom), _hartid(hartid), _num_commit_insts(num_commit_insts),
|
||||
stream_idx(stream_idx), stream_depth(stream_depth) {
|
||||
_bits_per_trace(bits_per_trace), stream_idx(stream_idx),
|
||||
stream_depth(stream_depth) {
|
||||
this->_valid_width = 1;
|
||||
this->_iaddr_width = TO_BYTES(iaddr_width);
|
||||
this->_insn_width = TO_BYTES(insn_width);
|
||||
|
@ -67,9 +69,6 @@ cospike_t::cospike_t(simif_t &sim,
|
|||
this->_cause_offset = this->_interrupt_offset + this->_interrupt_width;
|
||||
this->_wdata_offset = this->_cause_offset + this->_cause_width;
|
||||
|
||||
// setup misc. state variables
|
||||
this->_commit_inst_idx = 0;
|
||||
|
||||
this->cospike_failed = false;
|
||||
this->cospike_exit_code = 0;
|
||||
}
|
||||
|
@ -131,23 +130,23 @@ int cospike_t::invoke_cospike(uint8_t *buf) {
|
|||
: 0;
|
||||
uint8_t priv = buf[this->_priv_offset];
|
||||
|
||||
if (valid || exception || cause) {
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr,
|
||||
"C[%d] V(%d) PC(0x%x) Insn(0x%x) EIC(%d:%d:%d) Wdata(%d:0x%x) "
|
||||
"Priv(%d)\n",
|
||||
this->_hartid,
|
||||
valid,
|
||||
iaddr,
|
||||
insn,
|
||||
exception,
|
||||
interrupt,
|
||||
cause,
|
||||
(this->_wdata_width != 0),
|
||||
wdata,
|
||||
priv);
|
||||
fprintf(stderr,
|
||||
"C[%d] V(%d) PC(0x%x) Insn(0x%x) EIC(%d:%d:%d) Wdata(%d:0x%x) "
|
||||
"Priv(%d)\n",
|
||||
this->_hartid,
|
||||
valid,
|
||||
iaddr,
|
||||
insn,
|
||||
exception,
|
||||
interrupt,
|
||||
cause,
|
||||
(this->_wdata_width != 0),
|
||||
wdata,
|
||||
priv);
|
||||
#endif
|
||||
|
||||
if (valid || exception || cause) {
|
||||
return cospike_cosim(0, // TODO: No cycle given
|
||||
this->_hartid,
|
||||
(this->_wdata_width != 0),
|
||||
|
@ -170,13 +169,29 @@ int cospike_t::invoke_cospike(uint8_t *buf) {
|
|||
size_t cospike_t::process_tokens(int num_beats, size_t minimum_batch_beats) {
|
||||
const size_t maximum_batch_bytes = num_beats * STREAM_WIDTH_BYTES;
|
||||
const size_t minimum_batch_bytes = minimum_batch_beats * STREAM_WIDTH_BYTES;
|
||||
const size_t bits_per_trace = STREAM_WIDTH_BYTES / 2;
|
||||
// TODO: as opt can mmap file and just load directly into it.
|
||||
page_aligned_sized_array(OUTBUF, maximum_batch_bytes);
|
||||
auto bytes_received =
|
||||
pull(stream_idx, OUTBUF, maximum_batch_bytes, minimum_batch_bytes);
|
||||
const size_t bytes_per_trace = this->_bits_per_trace / 8;
|
||||
|
||||
for (uint32_t offset = 0; offset < bytes_received;
|
||||
offset += bytes_per_trace) {
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr,
|
||||
"Off(%d/%d:%d) token(",
|
||||
offset,
|
||||
bytes_received,
|
||||
offset / bytes_per_trace);
|
||||
|
||||
for (int32_t i = STREAM_WIDTH_BYTES - 1; i >= 0; --i) {
|
||||
fprintf(stderr, "%02x", (OUTBUF + offset)[i]);
|
||||
if (i == bytes_per_trace)
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
fprintf(stderr, ")\n");
|
||||
#endif
|
||||
|
||||
for (uint32_t offset = 0; offset < bytes_received; offset += bits_per_trace) {
|
||||
// invoke cospike (requires that buffer is aligned properly)
|
||||
int rval = this->invoke_cospike(((uint8_t *)OUTBUF) + offset);
|
||||
if (rval) {
|
||||
|
@ -185,45 +200,27 @@ size_t cospike_t::process_tokens(int num_beats, size_t minimum_batch_beats) {
|
|||
printf("[ERROR] Cospike: Errored during simulation with %d\n", rval);
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr,
|
||||
"C[%d] off(%d) token(",
|
||||
this->_commit_inst_idx,
|
||||
offset / bits_per_trace);
|
||||
fprintf(stderr, "Off(%d) token(", offset / bytes_per_trace);
|
||||
|
||||
for (int32_t i = STREAM_WIDTH_BYTES - 1; i >= 0; --i) {
|
||||
fprintf(stderr, "%02x", (OUTBUF + offset)[i]);
|
||||
if (i == bits_per_trace)
|
||||
if (i == bytes_per_trace)
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
fprintf(stderr, ")\n");
|
||||
|
||||
fprintf(stderr, "get_next_token token(");
|
||||
uint32_t next_off = offset += STREAM_WIDTH_BYTES;
|
||||
auto next_off = offset + STREAM_WIDTH_BYTES;
|
||||
|
||||
for (int32_t i = STREAM_WIDTH_BYTES - 1; i >= 0; --i) {
|
||||
for (auto i = STREAM_WIDTH_BYTES - 1; i >= 0; --i) {
|
||||
fprintf(stderr, "%02x", (OUTBUF + next_off)[i]);
|
||||
if (i == bits_per_trace)
|
||||
if (i == bytes_per_trace)
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
fprintf(stderr, ")\n");
|
||||
#endif
|
||||
|
||||
return bytes_received;
|
||||
}
|
||||
|
||||
// move to next inst. trace
|
||||
this->_commit_inst_idx =
|
||||
(this->_commit_inst_idx + 1) % this->_num_commit_insts;
|
||||
|
||||
// add an extra STREAM_WIDTH_BYTES if there is an odd amount of traces
|
||||
if (this->_commit_inst_idx == 0 && (this->_num_commit_insts % 2 == 1)) {
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr,
|
||||
"off(%d + 1) = %d\n",
|
||||
offset / bits_per_trace,
|
||||
(offset + bits_per_trace) / bits_per_trace);
|
||||
#endif
|
||||
offset += bits_per_trace;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ public:
|
|||
uint32_t cause_width,
|
||||
uint32_t wdata_width,
|
||||
uint32_t num_commit_insts,
|
||||
uint32_t bits_per_trace,
|
||||
const char *isa,
|
||||
uint32_t vlen,
|
||||
const char *priv,
|
||||
|
@ -81,7 +82,7 @@ private:
|
|||
|
||||
// other misc members
|
||||
uint32_t _num_commit_insts;
|
||||
uint8_t _commit_inst_idx;
|
||||
uint32_t _bits_per_trace;
|
||||
bool cospike_failed;
|
||||
int cospike_exit_code;
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ package firesim.bridges
|
|||
import chisel3._
|
||||
import chisel3.util._
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import freechips.rocketchip.util.DecoupledHelper
|
||||
|
||||
import testchipip.{SerializableTileTraceIO, SpikeCosimConfig, TileTraceIO, TraceBundleWidths}
|
||||
|
||||
|
@ -108,42 +109,66 @@ class CospikeBridgeModule(params: CospikeBridgeParams)(implicit p: Parameters)
|
|||
}
|
||||
}
|
||||
|
||||
val maxTraceSize = paddedTraces.map(t => t.getWidth).max
|
||||
val outDataSzBits = streamEnq.bits.getWidth
|
||||
val maxTraceSize = paddedTraces.map(t => t.getWidth).max
|
||||
val outDataSzBits = streamEnq.bits.getWidth
|
||||
val totalTracesPerToken = (outDataSzBits / maxTraceSize).toInt
|
||||
val bitsPerTrace = roundUp(outDataSzBits / totalTracesPerToken, 8)
|
||||
|
||||
// constant
|
||||
// TODO: match tracerv in supporting commitWidth > 2
|
||||
val totalTracesPerToken = 2 // minTraceSz==190b so round up to nearest is 256b
|
||||
// constant
|
||||
require(
|
||||
maxTraceSize < bitsPerTrace,
|
||||
f"All instruction trace bits (i.e. valid, pc, instBits...) (${maxTraceSize}b) must fit in ${bitsPerTrace}b",
|
||||
)
|
||||
require(
|
||||
bitsPerTrace * totalTracesPerToken <= outDataSzBits,
|
||||
f"All traces must fit in single token (${bitsPerTrace * totalTracesPerToken} > ${outDataSzBits})",
|
||||
)
|
||||
|
||||
val bitsPerTrace = outDataSzBits / totalTracesPerToken
|
||||
val armCount = (traces.length + totalTracesPerToken - 1) / totalTracesPerToken
|
||||
|
||||
require(maxTraceSize < bitsPerTrace, "All instruction trace bits (i.e. valid, pc, instBits...) must fit in 256b")
|
||||
// Literally each arm of the mux, these are directly the bits that get put into the bump
|
||||
val allStreamBits =
|
||||
paddedTraces.grouped(totalTracesPerToken).toSeq.map(grp => Cat(grp.map(t => t.asUInt.pad(bitsPerTrace)).reverse))
|
||||
|
||||
// how many traces being sent over
|
||||
val numTraces = traces.size
|
||||
// num tokens needed to display full set of instructions from one cycle
|
||||
val numTokenForAll = ((numTraces - 1) / totalTracesPerToken) + 1
|
||||
// Number of bits to use for the counter, the +1 is required because the counter will count 1 past the number of arms
|
||||
val counterBits = log2Ceil(armCount + 1)
|
||||
|
||||
// only inc the counter when the something is sent (this implies that the input is valid and output is avail on the other side)
|
||||
val counterFire = streamEnq.fire
|
||||
val (cnt, wrap) = Counter(counterFire, numTokenForAll)
|
||||
// This counter acts to select the mux arm
|
||||
val counter = RegInit(0.U(counterBits.W))
|
||||
|
||||
val paddedTracesAligned = paddedTraces.map(t => t.asUInt.pad(bitsPerTrace))
|
||||
val paddedTracesTruncated = if (numTraces == 1) {
|
||||
(VecInit(paddedTracesAligned).asUInt >> (outDataSzBits.U * cnt))
|
||||
} else {
|
||||
(VecInit(paddedTracesAligned).asUInt >> (outDataSzBits.U * cnt))(outDataSzBits - 1, 0)
|
||||
// The main mux where the input arms are different possible valid traces, and the output goes to streamEnq
|
||||
val streamMux = MuxLookup(counter, allStreamBits(0), Seq.tabulate(armCount)(x => x.U -> allStreamBits(x)))
|
||||
|
||||
// a parallel set of arms to a parallel mux, true if any instructions in the arm are valid (OR reduction)
|
||||
val anyValid =
|
||||
traces
|
||||
.grouped(totalTracesPerToken)
|
||||
.toSeq
|
||||
.map(arm => arm.map(trace => trace.valid | trace.exception | (trace.cause =/= 0.U)).reduce((a, b) => (a | b)))
|
||||
|
||||
// all of the valids of the larger indexed arms are OR reduced
|
||||
val anyValidRemain =
|
||||
Seq.tabulate(armCount)(idx => (idx until armCount).map(x => anyValid(x)).reduce((a, b) => (a | b)))
|
||||
val anyValidRemainMux = MuxLookup(counter, false.B, Seq.tabulate(armCount)(x => x.U -> anyValidRemain(x)))
|
||||
|
||||
streamEnq.bits := streamMux
|
||||
|
||||
val maybeFire = !anyValidRemainMux || (counter === (armCount - 1).U)
|
||||
val maybeEnq = anyValidRemainMux
|
||||
|
||||
val commonPredicates = Seq(hPort.toHost.hValid, streamEnq.ready)
|
||||
val do_enq_helper = DecoupledHelper((maybeEnq +: commonPredicates): _*)
|
||||
val do_fire_helper = DecoupledHelper((maybeFire +: commonPredicates): _*)
|
||||
|
||||
// Note, if we dequeue a token that wins out over the increment below
|
||||
when(do_fire_helper.fire()) {
|
||||
counter := 0.U
|
||||
}.elsewhen(do_enq_helper.fire()) {
|
||||
counter := counter + 1.U
|
||||
}
|
||||
|
||||
streamEnq.valid := hPort.toHost.hValid
|
||||
streamEnq.bits := paddedTracesTruncated
|
||||
|
||||
// tell the host that you are ready to get more
|
||||
hPort.toHost.hReady := streamEnq.ready && wrap
|
||||
|
||||
// This is uni-directional. We don't drive tokens back to the target.
|
||||
hPort.fromHost.hValid := true.B
|
||||
streamEnq.valid := do_enq_helper.fire(streamEnq.ready)
|
||||
hPort.toHost.hReady := do_fire_helper.fire(hPort.toHost.hValid)
|
||||
hPort.fromHost.hValid := true.B // this is uni-directional. we don't drive tokens back to target
|
||||
|
||||
genCRFile()
|
||||
|
||||
|
@ -159,7 +184,8 @@ class CospikeBridgeModule(params: CospikeBridgeParams)(implicit p: Parameters)
|
|||
UInt32(insnWidth),
|
||||
UInt32(causeWidth),
|
||||
UInt32(wdataWidth),
|
||||
UInt32(numTraces),
|
||||
UInt32(traces.length),
|
||||
UInt32(bitsPerTrace),
|
||||
CStrLit(params.cfg.isa),
|
||||
UInt32(params.cfg.vlen),
|
||||
CStrLit(params.cfg.priv),
|
||||
|
@ -178,6 +204,7 @@ class CospikeBridgeModule(params: CospikeBridgeParams)(implicit p: Parameters)
|
|||
|
||||
// general information printout
|
||||
println(s"Cospike Bridge Information")
|
||||
println(s" Total Inst. Traces / Commit Width: ${numTraces}")
|
||||
println(s" Total Inst. Traces (i.e. Commit Width): ${traces.length}")
|
||||
println(s" Total Traces Per Token: ${totalTracesPerToken}")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 3c42e63732bbb65ea6a2cf4a1b9c3f920b44f351
|
||||
Subproject commit 5541582639f8c5feb578b91b75e5d660e37ed006
|
Loading…
Reference in New Issue