clock-gate the pipeline during D$ misses
This commit is contained in:
parent
c30c89b798
commit
dbe65d79e7
|
@ -828,6 +828,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||
io.cpu.perf.release := edge.done(tl_out_c)
|
||||
io.cpu.perf.grant := d_done
|
||||
io.cpu.perf.tlbMiss := io.ptw.req.fire()
|
||||
io.cpu.perf.blocked := {
|
||||
// stop reporting blocked just before unblocking to avoid overly conservative stalling
|
||||
val cycles = outer.bufferUncachedRequests.map(n => if (n > 1) 1 else 2).getOrElse(2)
|
||||
cached_grant_wait && d_address_inc < ((cacheBlockBytes - cycles * beatBytes) max 0)
|
||||
}
|
||||
|
||||
// report errors
|
||||
val (data_error, data_error_uncorrectable, data_error_addr) =
|
||||
|
|
|
@ -137,6 +137,7 @@ class HellaCachePerfEvents extends Bundle {
|
|||
val release = Bool()
|
||||
val grant = Bool()
|
||||
val tlbMiss = Bool()
|
||||
val blocked = Bool()
|
||||
}
|
||||
|
||||
// interface between D$ and processor/DTLB
|
||||
|
|
|
@ -64,6 +64,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
|
|||
io.requestor(i).perf := io.mem.perf
|
||||
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
|
||||
io.requestor(i).s2_nack_cause_raw := io.mem.s2_nack_cause_raw
|
||||
io.requestor(i).clock_enabled := io.mem.clock_enabled
|
||||
resp.bits := io.mem.resp.bits
|
||||
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
|
||||
|
||||
|
|
|
@ -87,6 +87,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||
with HasCoreIO {
|
||||
|
||||
val clock_en_reg = RegInit(true.B)
|
||||
val long_latency_stall = Reg(Bool())
|
||||
val imem_might_request_reg = Reg(Bool())
|
||||
val clock_en = Wire(init=true.B)
|
||||
val gated_clock =
|
||||
|
@ -671,8 +672,12 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||
checkHazards(fp_hazard_targets, fp_sboard.read _)
|
||||
} else Bool(false)
|
||||
|
||||
val dcache_blocked = Reg(Bool())
|
||||
dcache_blocked := !io.dmem.req.ready && io.dmem.clock_enabled && (io.dmem.req.valid || dcache_blocked)
|
||||
val dcache_blocked = {
|
||||
// speculate that a blocked D$ will unblock the cycle after a Grant
|
||||
val blocked = Reg(Bool())
|
||||
blocked := !io.dmem.req.ready && io.dmem.clock_enabled && !io.dmem.perf.grant && (blocked || io.dmem.req.valid || io.dmem.s2_nack)
|
||||
blocked && !io.dmem.perf.grant
|
||||
}
|
||||
val rocc_blocked = Reg(Bool())
|
||||
rocc_blocked := !wb_xcpt && !io.rocc.cmd.ready && (io.rocc.cmd.valid || rocc_blocked)
|
||||
|
||||
|
@ -762,14 +767,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||
|
||||
// gate the clock
|
||||
if (rocketParams.clockGate) {
|
||||
clock_en := clock_en_reg || (!csr.io.csr_stall && io.imem.resp.valid)
|
||||
long_latency_stall := csr.io.csr_stall || io.dmem.perf.blocked
|
||||
clock_en := clock_en_reg || (!long_latency_stall && io.imem.resp.valid)
|
||||
clock_en_reg :=
|
||||
ex_pc_valid || mem_pc_valid || wb_pc_valid || // instruction in flight
|
||||
io.ptw.customCSRs.disableCoreClockGate || // chicken bit
|
||||
!div.io.req.ready || // mul/div in flight
|
||||
usingFPU && !io.fpu.fcsr_rdy || // long-latency FPU in flight
|
||||
io.dmem.replay_next || // long-latency load replaying
|
||||
(!csr.io.csr_stall && (ibuf.io.inst(0).valid || io.imem.resp.valid)) // instruction pending
|
||||
(!long_latency_stall && (ibuf.io.inst(0).valid || io.imem.resp.valid)) // instruction pending
|
||||
}
|
||||
|
||||
// evaluate performance counters
|
||||
|
|
Loading…
Reference in New Issue