clock-gate the pipeline during D$ misses
This commit is contained in:
parent
c30c89b798
commit
dbe65d79e7
|
@ -828,6 +828,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||||
io.cpu.perf.release := edge.done(tl_out_c)
|
io.cpu.perf.release := edge.done(tl_out_c)
|
||||||
io.cpu.perf.grant := d_done
|
io.cpu.perf.grant := d_done
|
||||||
io.cpu.perf.tlbMiss := io.ptw.req.fire()
|
io.cpu.perf.tlbMiss := io.ptw.req.fire()
|
||||||
|
io.cpu.perf.blocked := {
|
||||||
|
// stop reporting blocked just before unblocking to avoid overly conservative stalling
|
||||||
|
val cycles = outer.bufferUncachedRequests.map(n => if (n > 1) 1 else 2).getOrElse(2)
|
||||||
|
cached_grant_wait && d_address_inc < ((cacheBlockBytes - cycles * beatBytes) max 0)
|
||||||
|
}
|
||||||
|
|
||||||
// report errors
|
// report errors
|
||||||
val (data_error, data_error_uncorrectable, data_error_addr) =
|
val (data_error, data_error_uncorrectable, data_error_addr) =
|
||||||
|
|
|
@ -137,6 +137,7 @@ class HellaCachePerfEvents extends Bundle {
|
||||||
val release = Bool()
|
val release = Bool()
|
||||||
val grant = Bool()
|
val grant = Bool()
|
||||||
val tlbMiss = Bool()
|
val tlbMiss = Bool()
|
||||||
|
val blocked = Bool()
|
||||||
}
|
}
|
||||||
|
|
||||||
// interface between D$ and processor/DTLB
|
// interface between D$ and processor/DTLB
|
||||||
|
|
|
@ -64,6 +64,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
|
||||||
io.requestor(i).perf := io.mem.perf
|
io.requestor(i).perf := io.mem.perf
|
||||||
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
|
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
|
||||||
io.requestor(i).s2_nack_cause_raw := io.mem.s2_nack_cause_raw
|
io.requestor(i).s2_nack_cause_raw := io.mem.s2_nack_cause_raw
|
||||||
|
io.requestor(i).clock_enabled := io.mem.clock_enabled
|
||||||
resp.bits := io.mem.resp.bits
|
resp.bits := io.mem.resp.bits
|
||||||
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
|
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
|
||||||
|
|
||||||
|
|
|
@ -87,6 +87,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||||
with HasCoreIO {
|
with HasCoreIO {
|
||||||
|
|
||||||
val clock_en_reg = RegInit(true.B)
|
val clock_en_reg = RegInit(true.B)
|
||||||
|
val long_latency_stall = Reg(Bool())
|
||||||
val imem_might_request_reg = Reg(Bool())
|
val imem_might_request_reg = Reg(Bool())
|
||||||
val clock_en = Wire(init=true.B)
|
val clock_en = Wire(init=true.B)
|
||||||
val gated_clock =
|
val gated_clock =
|
||||||
|
@ -671,8 +672,12 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||||
checkHazards(fp_hazard_targets, fp_sboard.read _)
|
checkHazards(fp_hazard_targets, fp_sboard.read _)
|
||||||
} else Bool(false)
|
} else Bool(false)
|
||||||
|
|
||||||
val dcache_blocked = Reg(Bool())
|
val dcache_blocked = {
|
||||||
dcache_blocked := !io.dmem.req.ready && io.dmem.clock_enabled && (io.dmem.req.valid || dcache_blocked)
|
// speculate that a blocked D$ will unblock the cycle after a Grant
|
||||||
|
val blocked = Reg(Bool())
|
||||||
|
blocked := !io.dmem.req.ready && io.dmem.clock_enabled && !io.dmem.perf.grant && (blocked || io.dmem.req.valid || io.dmem.s2_nack)
|
||||||
|
blocked && !io.dmem.perf.grant
|
||||||
|
}
|
||||||
val rocc_blocked = Reg(Bool())
|
val rocc_blocked = Reg(Bool())
|
||||||
rocc_blocked := !wb_xcpt && !io.rocc.cmd.ready && (io.rocc.cmd.valid || rocc_blocked)
|
rocc_blocked := !wb_xcpt && !io.rocc.cmd.ready && (io.rocc.cmd.valid || rocc_blocked)
|
||||||
|
|
||||||
|
@ -762,14 +767,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||||
|
|
||||||
// gate the clock
|
// gate the clock
|
||||||
if (rocketParams.clockGate) {
|
if (rocketParams.clockGate) {
|
||||||
clock_en := clock_en_reg || (!csr.io.csr_stall && io.imem.resp.valid)
|
long_latency_stall := csr.io.csr_stall || io.dmem.perf.blocked
|
||||||
|
clock_en := clock_en_reg || (!long_latency_stall && io.imem.resp.valid)
|
||||||
clock_en_reg :=
|
clock_en_reg :=
|
||||||
ex_pc_valid || mem_pc_valid || wb_pc_valid || // instruction in flight
|
ex_pc_valid || mem_pc_valid || wb_pc_valid || // instruction in flight
|
||||||
io.ptw.customCSRs.disableCoreClockGate || // chicken bit
|
io.ptw.customCSRs.disableCoreClockGate || // chicken bit
|
||||||
!div.io.req.ready || // mul/div in flight
|
!div.io.req.ready || // mul/div in flight
|
||||||
usingFPU && !io.fpu.fcsr_rdy || // long-latency FPU in flight
|
usingFPU && !io.fpu.fcsr_rdy || // long-latency FPU in flight
|
||||||
io.dmem.replay_next || // long-latency load replaying
|
io.dmem.replay_next || // long-latency load replaying
|
||||||
(!csr.io.csr_stall && (ibuf.io.inst(0).valid || io.imem.resp.valid)) // instruction pending
|
(!long_latency_stall && (ibuf.io.inst(0).valid || io.imem.resp.valid)) // instruction pending
|
||||||
}
|
}
|
||||||
|
|
||||||
// evaluate performance counters
|
// evaluate performance counters
|
||||||
|
|
Loading…
Reference in New Issue