Merge pull request #1786 from freechipsproject/qor

Minor QoR improvements
This commit is contained in:
Andrew Waterman 2019-01-31 19:27:13 -08:00 committed by GitHub
commit cf048eb481
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 69 additions and 51 deletions

View File

@ -10,6 +10,7 @@ import freechips.rocketchip.tile.LookupByHartId
import freechips.rocketchip.tilelink._
import freechips.rocketchip.util._
import freechips.rocketchip.util.property._
import chisel3.core.{DontCare, WireInit}
import chisel3.internal.sourceinfo.SourceInfo
import chisel3.experimental._
import TLMessages._
@ -32,7 +33,6 @@ class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
val addr = Bits(width = untagBits)
val write = Bool()
val wdata = UInt(width = encBits * rowBytes / eccBytes)
val poison = Bool()
val wordMask = UInt(width = rowBytes / wordBytes)
val eccMask = UInt(width = wordBytes / eccBytes)
val way_en = Bits(width = nWays)
@ -116,7 +116,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val dataArb = Module(new Arbiter(new DCacheDataReq, 4) with InlineInstance)
dataArb.io.in.tail.foreach(_.bits.wdata := dataArb.io.in.head.bits.wdata) // tie off write ports by default
data.io.req <> dataArb.io.out
data.io.req.bits.wdata := encodeData(dataArb.io.out.bits.wdata(rowBits-1, 0), dataArb.io.out.bits.poison)
dataArb.io.out.ready := true
metaArb.io.out.ready := clock_en_reg
@ -161,6 +160,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s1_waw_hazard = Wire(Bool())
val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_retry :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8)
val supports_flush = outer.flushOnFenceI || coreParams.haveCFlush
val flushed = Reg(init=Bool(true))
val cached_grant_wait = Reg(init=Bool(false))
val release_ack_wait = Reg(init=Bool(false))
val can_acquire_before_release = !release_ack_wait && release_queue_empty
@ -173,6 +174,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
// I/O MSHRs
val uncachedInFlight = RegInit(Vec.fill(maxUncachedInFlight)(false.B))
val uncachedReqs = Reg(Vec(maxUncachedInFlight, new HellaCacheReq))
val uncachedResp = WireInit(new HellaCacheReq, DontCare)
// hit initiation path
val s0_read = isRead(io.cpu.req.bits.cmd)
@ -236,8 +238,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
(s1_meta_hit_way, s1_meta_hit_state, s1_meta, s1_meta_uncorrected(s1_victim_way))
}
val s1_data_way = Wire(init = if (nWays == 1) 1.U else Mux(inWriteback, releaseWay, s1_hit_way))
val s1_all_data_ways = Vec(data.io.resp :+ dummyEncodeData(tl_out.d.bits.data))
val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).mask)
val s1_all_data_ways = Vec(data.io.resp :+ encodeData(tl_out.d.bits.data, tl_out.d.bits.corrupt))
val s1_mask_xwr = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).mask
val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, s1_mask_xwr)
// for partial writes, s1_data.mask must be a subset of s1_mask_xwr
assert(!(s1_valid_masked && s1_req.cmd === M_PWR) || (s1_mask_xwr | ~io.cpu.s1_data.mask).andR)
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false))
val s2_valid_no_xcpt = s2_valid && !io.cpu.s2_xcpt.asUInt.orR
@ -267,13 +272,22 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s2_meta_error = (s2_meta_uncorrectable_errors | s2_meta_correctable_errors).orR
val s2_flush_valid = s2_flush_valid_pre_tag_ecc && !s2_meta_error
val s2_data = {
val en = s1_valid || inWriteback
val wordsPerRow = rowBits / wordBits
val en = s1_valid || inWriteback || io.cpu.replay_next
val word_en = Mux(inWriteback, Fill(wordsPerRow, 1.U), Mux(!s1_did_read, 0.U, UIntToOH(s1_req.addr.extract((rowBits/8).log2-1, wordBytes.log2), wordsPerRow)))
val s1_way_words = s1_all_data_ways.map(_.grouped(dECC.width(eccBits) * (wordBits / eccBits)))
if (cacheParams.pipelineWayMux) {
val s2_data_way = RegEnable(s1_data_way, en)
val s2_all_data_ways = (0 until nWays).map(i => RegEnable(s1_all_data_ways(i), en))
Mux1H(s2_data_way, s2_all_data_ways)
val s1_word_en = Mux(io.cpu.replay_next, 0.U, word_en)
(for (i <- 0 until wordsPerRow) yield {
val s2_way_en = RegEnable(Mux(s1_word_en(i), s1_data_way, 0.U), en)
val s2_way_words = (0 until nWays).map(j => RegEnable(s1_way_words(j)(i), en))
(0 until nWays).map(j => Mux(s2_way_en(j), s2_way_words(j), 0.U)).reduce(_|_)
}).asUInt
} else {
RegEnable(Mux1H(s1_data_way, s1_all_data_ways), en || tl_out.d.fire())
val s1_word_en = Mux(!io.cpu.replay_next, word_en, UIntToOH(uncachedResp.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes)), wordsPerRow))
(for (i <- 0 until wordsPerRow) yield {
RegEnable(Mux1H(Mux(s1_word_en(i), s1_data_way, 0.U), s1_way_words.map(_(i))), en)
}).asUInt
}
}
val s2_probe_way = RegEnable(s1_hit_way, s1_probe)
@ -286,14 +300,14 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val (s2_hit, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd)
val s2_data_decoded = decodeData(s2_data)
val s2_word_idx = s2_req.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes))
val s2_did_read = RegEnable(s1_did_read, s1_valid_not_nacked)
val s2_data_error = s2_did_read && (s2_data_decoded.map(_.error).grouped(wordBits/eccBits).map(_.reduce(_||_)).toSeq)(s2_word_idx)
val s2_data_error_uncorrectable = (s2_data_decoded.map(_.uncorrectable).grouped(wordBits/eccBits).map(_.reduce(_||_)).toSeq)(s2_word_idx)
val s2_data_error = s2_data_decoded.map(_.error).orR
val s2_data_error_uncorrectable = s2_data_decoded.map(_.uncorrectable).orR
val s2_data_corrected = (s2_data_decoded.map(_.corrected): Seq[UInt]).asUInt
val s2_data_uncorrected = (s2_data_decoded.map(_.uncorrected): Seq[UInt]).asUInt
val s2_valid_hit_pre_data_ecc = s2_valid_masked && s2_readwrite && !s2_meta_error && s2_hit
val s2_valid_data_error = s2_valid_hit_pre_data_ecc && s2_data_error && can_acquire_before_release
val s2_valid_hit = s2_valid_hit_pre_data_ecc && !s2_data_error && (!s2_waw_hazard || s2_store_merge)
val s2_valid_hit_pre_data_ecc_and_waw = s2_valid_masked && s2_readwrite && !s2_meta_error && s2_hit
val s2_valid_hit_pre_data_ecc = s2_valid_hit_pre_data_ecc_and_waw && (!s2_waw_hazard || s2_store_merge)
val s2_valid_data_error = s2_valid_hit_pre_data_ecc_and_waw && s2_data_error && can_acquire_before_release
val s2_valid_hit = s2_valid_hit_pre_data_ecc && !s2_data_error
val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_meta_error && !s2_hit && can_acquire_before_release
val s2_valid_cached_miss = s2_valid_miss && !s2_uncached && !uncachedInFlight.asUInt.orR
dontTouch(s2_valid_cached_miss)
@ -309,8 +323,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val (s2_victim_dirty, s2_shrink_param, voluntaryNewCoh) = s2_victim_state.onCacheControl(M_FLUSH)
dontTouch(s2_victim_dirty)
val s2_update_meta = s2_hit_state =/= s2_new_hit_state
io.cpu.s2_nack := s2_valid_no_xcpt && !s2_valid_hit && !(s2_valid_uncached_pending && tl_out_a.ready)
when (io.cpu.s2_nack || (s2_valid_hit && s2_update_meta)) { s1_nack := true }
io.cpu.s2_nack := s2_valid_no_xcpt && !(s2_valid_uncached_pending && tl_out_a.ready || supports_flush && s2_req.cmd === M_FLUSH_ALL && flushed) && !s2_valid_hit
when (io.cpu.s2_nack || (s2_valid_hit_pre_data_ecc_and_waw && s2_update_meta)) { s1_nack := true }
// tag updates on ECC errors
val s2_first_meta_corrected = PriorityMux(s2_meta_correctable_errors, s2_meta_corrected)
@ -326,7 +340,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
}
// tag updates on hit
metaArb.io.in(2).valid := s2_valid_hit_pre_data_ecc && s2_update_meta
metaArb.io.in(2).valid := s2_valid_hit_pre_data_ecc_and_waw && s2_update_meta
metaArb.io.in(2).bits.write := !s2_data_error && !io.cpu.s2_kill
metaArb.io.in(2).bits.way_en := s2_victim_way
metaArb.io.in(2).bits.idx := s2_vaddr(idxMSB, idxLSB)
@ -353,7 +367,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
// don't perform data correction if it might clobber a recent store
val s2_correct = s2_data_error && !any_pstore_valid && !RegNext(any_pstore_valid) && Bool(usingDataScratchpad)
// pending store buffer
val s2_valid_correct = s2_valid_hit_pre_data_ecc && s2_correct && !io.cpu.s2_kill
val s2_valid_correct = s2_valid_hit_pre_data_ecc_and_waw && s2_correct && !io.cpu.s2_kill
def s2_store_valid_pre_kill = s2_valid_hit && s2_write && !s2_sc_fail
def s2_store_valid = s2_store_valid_pre_kill && !io.cpu.s2_kill
val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write)
@ -370,7 +384,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val pstore_drain_on_miss = releaseInFlight || RegNext(io.cpu.s2_nack)
val pstore1_held = Reg(Bool())
val pstore1_valid_likely = s2_valid && s2_write || pstore1_held
def pstore1_valid_not_rmw(s2_kill: Bool) = s2_valid_hit_pre_data_ecc && (!s2_waw_hazard || s2_store_merge) && s2_write && !s2_sc_fail && !s2_kill || pstore1_held
def pstore1_valid_not_rmw(s2_kill: Bool) = s2_valid_hit_pre_data_ecc && s2_write && !s2_kill || pstore1_held
val pstore1_valid = s2_store_valid || pstore1_held
any_pstore_valid := pstore1_held || pstore2_valid
val pstore_drain_structural = pstore1_valid_likely && pstore2_valid && ((s1_valid && s1_write) || pstore1_rmw)
@ -415,15 +429,14 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
dataArb.io.in(0).bits.write := pstore_drain
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way)
dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_data))
dataArb.io.in(0).bits.poison := false
dataArb.io.in(0).bits.wdata := encodeData(Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_data)), false.B)
dataArb.io.in(0).bits.wordMask := UIntToOH(Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb))
dataArb.io.in(0).bits.eccMask := eccMask(Mux(pstore2_valid, pstore2_storegen_mask, pstore1_mask))
// store->load RAW hazard detection
def s1Depends(addr: UInt, mask: UInt) =
addr(idxMSB, wordOffBits) === s1_req.addr(idxMSB, wordOffBits) &&
Mux(s1_write, (eccByteMask(mask) & eccByteMask(s1_mask)).orR, (mask & s1_mask).orR)
Mux(s1_write, (eccByteMask(mask) & eccByteMask(s1_mask_xwr)).orR, (mask & s1_mask_xwr).orR)
val s1_hazard =
(pstore1_valid_likely && s1Depends(pstore1_addr, pstore1_mask)) ||
(pstore2_valid && s1Depends(pstore2_addr, pstore2_storegen_mask))
@ -517,7 +530,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
}
} .elsewhen (grantIsUncached) {
val d_sel = UIntToOH(tl_out.d.bits.source, maxUncachedInFlight+mmioOffset) >> mmioOffset
val req = Mux1H(d_sel, uncachedReqs)
uncachedResp := Mux1H(d_sel, uncachedReqs)
(d_sel.asBools zip uncachedInFlight) foreach { case (s, f) =>
when (s && d_last) {
assert(f, "An AccessAck was unexpected by the dcache.") // TODO must handle Ack coming back on same cycle!
@ -528,14 +541,14 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
if (!cacheParams.pipelineWayMux)
s1_data_way := 1.U << nWays
s2_req.cmd := M_XRD
s2_req.typ := req.typ
s2_req.tag := req.tag
s2_req.typ := uncachedResp.typ
s2_req.tag := uncachedResp.tag
s2_req.addr := {
require(rowOffBits >= beatOffBits)
val dontCareBits = s1_paddr >> rowOffBits << rowOffBits
dontCareBits | req.addr(beatOffBits-1, 0)
dontCareBits | uncachedResp.addr(beatOffBits-1, 0)
}
s2_uncached_resp_addr := req.addr
s2_uncached_resp_addr := uncachedResp.addr
}
} .elsewhen (grantIsVoluntary) {
assert(release_ack_wait, "A ReleaseAck was unexpected by the dcache.") // TODO should handle Ack coming back on same cycle!
@ -560,8 +573,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
dataArb.io.in(1).bits.write := true
dataArb.io.in(1).bits.addr := (s2_vaddr >> idxLSB) << idxLSB | d_address_inc
dataArb.io.in(1).bits.way_en := s2_victim_way
dataArb.io.in(1).bits.wdata := tl_out.d.bits.data
dataArb.io.in(1).bits.poison := tl_out.d.bits.corrupt
dataArb.io.in(1).bits.wdata := s1_all_data_ways.last
dataArb.io.in(1).bits.wordMask := ~UInt(0, rowBytes / wordBytes)
dataArb.io.in(1).bits.eccMask := ~UInt(0, wordBytes / eccBytes)
} else {
@ -610,8 +622,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire())
val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected)
val releaseDataBeat = Cat(UInt(0), c_count) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid))
val writeback_data_error = s2_data_decoded.map(_.error).reduce(_||_)
val writeback_data_uncorrectable = s2_data_decoded.map(_.uncorrectable).reduce(_||_)
val nackResponseMessage = edge.ProbeAck(b = probe_bits, reportPermissions = TLPermissions.NtoN)
val cleanReleaseMessage = edge.ProbeAck(b = probe_bits, reportPermissions = s2_report_param)
@ -681,7 +691,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
tl_out_c.bits.source := probe_bits.source
tl_out_c.bits.address := probe_bits.address
tl_out_c.bits.data := s2_data_corrected
tl_out_c.bits.corrupt := inWriteback && writeback_data_uncorrectable
tl_out_c.bits.corrupt := inWriteback && s2_data_error_uncorrectable
}
dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles
@ -700,7 +710,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
when (metaArb.io.in(4).fire()) { release_state := s_ready }
// cached response
io.cpu.resp.valid := s2_valid_hit
io.cpu.resp.bits <> s2_req
io.cpu.resp.bits.has_data := s2_read
io.cpu.resp.bits.replay := false
@ -727,21 +736,24 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
}
// uncached response
io.cpu.replay_next := tl_out.d.fire() && grantIsUncachedData
val s2_uncached_data_word = {
val word_idx = uncachedResp.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes))
val words = tl_out.d.bits.data.grouped(wordBits)
RegEnable(words(word_idx), io.cpu.replay_next)
}
val doUncachedResp = Reg(next = io.cpu.replay_next)
val s2_uncached_data_beat = RegEnable(tl_out.d.bits.data, io.cpu.replay_next)
io.cpu.resp.valid := (s2_valid_hit_pre_data_ecc || doUncachedResp) && !s2_data_error
io.cpu.replay_next := tl_out.d.fire() && grantIsUncachedData
when (doUncachedResp) {
assert(!s2_valid_hit)
io.cpu.resp.valid := true
io.cpu.resp.bits.replay := true
io.cpu.resp.bits.addr := s2_uncached_resp_addr
}
// load data subword mux/sign extension
val s2_data_word = ((0 until rowBits by wordBits).map(i => s2_data_uncorrected(wordBits+i-1,i)): Seq[UInt])(s2_word_idx)
val s2_data_word_corrected = ((0 until rowBits by wordBits).map(i => s2_data_corrected(wordBits+i-1,i)): Seq[UInt])(s2_word_idx)
val s2_uncached_data_word = ((0 until cacheDataBits by wordBits).map(i => s2_uncached_data_beat(wordBits+i-1,i)): Seq[UInt])(s2_word_idx)
val s2_data_word_possibly_uncached = Mux(cacheParams.pipelineWayMux && doUncachedResp, s2_uncached_data_word, s2_data_word)
val s2_data_word = (0 until rowBits by wordBits).map(i => s2_data_uncorrected(wordBits+i-1,i)).reduce(_|_)
val s2_data_word_corrected = (0 until rowBits by wordBits).map(i => s2_data_corrected(wordBits+i-1,i)).reduce(_|_)
val s2_data_word_possibly_uncached = Mux(cacheParams.pipelineWayMux && doUncachedResp, s2_uncached_data_word, 0.U) | s2_data_word
val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word_possibly_uncached, s2_sc, wordBytes)
io.cpu.resp.bits.data := loadgen.data | s2_sc_fail
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
@ -768,7 +780,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val resetting = RegInit(false.B)
if (!usingDataScratchpad)
when (RegNext(reset)) { resetting := true }
val flushed = Reg(init=Bool(true))
val flushing = Reg(init=Bool(false))
val flushCounter = Reg(init=UInt(nSets * (nWays-1), log2Ceil(nSets * nWays)))
val flushCounterNext = flushCounter +& 1
@ -785,10 +796,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
metaArb.io.in(5).bits.data := metaArb.io.in(4).bits.data
// Only flush D$ on FENCE.I if some cached executable regions are untracked.
val supports_flush = outer.flushOnFenceI || coreParams.haveCFlush
if (supports_flush) {
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
io.cpu.s2_nack := !flushed
when (!flushed) {
flushing := !io.cpu.s2_kill && !release_ack_wait && !uncachedInFlight.asUInt.orR
}
@ -867,8 +876,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
// report errors
val (data_error, data_error_uncorrectable, data_error_addr) =
if (usingDataScratchpad) (s2_valid_data_error, s2_data_error_uncorrectable, s2_req.addr) else {
(RegNext(tl_out_c.fire() && inWriteback && writeback_data_error),
RegNext(writeback_data_uncorrectable),
(RegNext(tl_out_c.fire() && inWriteback && s2_data_error),
RegNext(s2_data_error_uncorrectable),
probe_bits.address) // This is stable for a cycle after tl_out_c.fire, so don't need a register
}
{

View File

@ -94,7 +94,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
def entry_data = data.map(_.asTypeOf(new EntryData))
private def sectorIdx(vpn: UInt) = vpn.extract(nSectors.log2-1, 0)
def getData(vpn: UInt) = data(sectorIdx(vpn)).asTypeOf(new EntryData)
def getData(vpn: UInt) = OptimizationBarrier(data(sectorIdx(vpn)).asTypeOf(new EntryData))
def sectorHit(vpn: UInt) = valid.orR && sectorTagMatch(vpn)
def sectorTagMatch(vpn: UInt) = ((tag ^ vpn) >> nSectors.log2) === 0
def hit(vpn: UInt) = {

View File

@ -101,8 +101,8 @@ class BusErrorUnit[T <: BusErrors](t: => T, params: BusErrorUnitParams)(implicit
}
when (cause === 0 && cause_wen) {
cause := OptimizationBarrier(new_cause)
value := OptimizationBarrier(new_value)
cause := new_cause
value := new_value
}
val (int_out, _) = intNode.out(0)

View File

@ -207,6 +207,15 @@ package object util {
helper(1, x)(width-1, 0)
}
def OptimizationBarrier(x: UInt): UInt = ~(~x)
def OptimizationBarrier[T <: Data](x: T): T = OptimizationBarrier(x.asUInt).asTypeOf(x)
def OptimizationBarrier[T <: Data](in: T): T = {
val foo = Module(new Module {
val io = IO(new Bundle {
val x = Input(in)
val y = Output(in)
})
io.y := io.x
})
foo.io.x := in
foo.io.y
}
}