clock-gate the pipeline during D$ misses

This commit is contained in:
Andrew Waterman 2018-10-01 20:13:55 -07:00
parent c30c89b798
commit dbe65d79e7
4 changed files with 17 additions and 4 deletions

View File

@ -828,6 +828,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
io.cpu.perf.release := edge.done(tl_out_c)
io.cpu.perf.grant := d_done
io.cpu.perf.tlbMiss := io.ptw.req.fire()
io.cpu.perf.blocked := {
// stop reporting blocked just before unblocking to avoid overly conservative stalling
val cycles = outer.bufferUncachedRequests.map(n => if (n > 1) 1 else 2).getOrElse(2)
cached_grant_wait && d_address_inc < ((cacheBlockBytes - cycles * beatBytes) max 0)
}
// report errors
val (data_error, data_error_uncorrectable, data_error_addr) =

View File

@ -137,6 +137,7 @@ class HellaCachePerfEvents extends Bundle {
val release = Bool()
val grant = Bool()
val tlbMiss = Bool()
val blocked = Bool()
}
// interface between D$ and processor/DTLB

View File

@ -64,6 +64,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
io.requestor(i).perf := io.mem.perf
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
io.requestor(i).s2_nack_cause_raw := io.mem.s2_nack_cause_raw
io.requestor(i).clock_enabled := io.mem.clock_enabled
resp.bits := io.mem.resp.bits
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)

View File

@ -87,6 +87,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
with HasCoreIO {
val clock_en_reg = RegInit(true.B)
val long_latency_stall = Reg(Bool())
val imem_might_request_reg = Reg(Bool())
val clock_en = Wire(init=true.B)
val gated_clock =
@ -671,8 +672,12 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
checkHazards(fp_hazard_targets, fp_sboard.read _)
} else Bool(false)
val dcache_blocked = Reg(Bool())
dcache_blocked := !io.dmem.req.ready && io.dmem.clock_enabled && (io.dmem.req.valid || dcache_blocked)
val dcache_blocked = {
// speculate that a blocked D$ will unblock the cycle after a Grant
val blocked = Reg(Bool())
blocked := !io.dmem.req.ready && io.dmem.clock_enabled && !io.dmem.perf.grant && (blocked || io.dmem.req.valid || io.dmem.s2_nack)
blocked && !io.dmem.perf.grant
}
val rocc_blocked = Reg(Bool())
rocc_blocked := !wb_xcpt && !io.rocc.cmd.ready && (io.rocc.cmd.valid || rocc_blocked)
@ -762,14 +767,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
// gate the clock
if (rocketParams.clockGate) {
clock_en := clock_en_reg || (!csr.io.csr_stall && io.imem.resp.valid)
long_latency_stall := csr.io.csr_stall || io.dmem.perf.blocked
clock_en := clock_en_reg || (!long_latency_stall && io.imem.resp.valid)
clock_en_reg :=
ex_pc_valid || mem_pc_valid || wb_pc_valid || // instruction in flight
io.ptw.customCSRs.disableCoreClockGate || // chicken bit
!div.io.req.ready || // mul/div in flight
usingFPU && !io.fpu.fcsr_rdy || // long-latency FPU in flight
io.dmem.replay_next || // long-latency load replaying
(!csr.io.csr_stall && (ibuf.io.inst(0).valid || io.imem.resp.valid)) // instruction pending
(!long_latency_stall && (ibuf.io.inst(0).valid || io.imem.resp.valid)) // instruction pending
}
// evaluate performance counters