package memControl import Chisel._ import Chisel.ImplicitConversions._ import freechips.rocketchip.tile._ import freechips.rocketchip.config._ import freechips.rocketchip.diplomacy._ import freechips.rocketchip.rocket._ import freechips.rocketchip.tilelink._ //import freechips.rocketchip.util.InOrderArbiter import freechips.rocketchip.util._ import freechips.rocketchip.system._ import vivadoHLS._ import controlUtils._ class RequestParserIO(dataWidth: Int, addrWidth: Int) extends Bundle{ val reqIn = new ApBusReq(dataWidth, addrWidth).flip val reqOut = new ApBusReq(dataWidth, addrWidth) val offsetAddr = UInt(INPUT, width=addrWidth) val loadOffset = Bool(INPUT) override def cloneType: this.type = new RequestParserIO(dataWidth, addrWidth).asInstanceOf[this.type] } class RequestParser(dataWidth: Int, addrWidth: Int) extends Module{ val io = IO(new RequestParserIO(dataWidth, addrWidth)) val offsetReg = Reg(init = UInt(0, width=addrWidth)) val multAmt = UInt(dataWidth/8) when(io.loadOffset){ offsetReg := io.offsetAddr } io.reqOut.din := io.reqIn.din io.reqOut.dataout := io.reqIn.dataout io.reqOut.size := io.reqIn.size io.reqOut.address := (io.reqIn.address*multAmt) + offsetReg } class TimestampedRequestIO(dataWidth:Int, addrWidth:Int, counterSize: Int) extends Bundle{ val req = new ApBusReq(dataWidth, addrWidth) val timestamp = UInt(INPUT, log2Up(counterSize)) override def cloneType: this.type = new TimestampedRequestIO(dataWidth, addrWidth, counterSize).asInstanceOf[this.type] } class ApBusReqType (dataWidth:Int, addrWidth:Int) extends Bundle{ //Req specific lines //Specifies a write request val din = Bool() //req_din in verilog //Lines used for req val address = UInt(width = addrWidth) val dataout = UInt(width = dataWidth) val size = UInt(width = addrWidth) override def cloneType: this.type = new ApBusReqType(dataWidth, addrWidth).asInstanceOf[this.type] } class TimestampedRequestIOType(dataWidth:Int, addrWidth:Int, counterSize: Int) extends Bundle{ val req = new ApBusReqType(dataWidth, addrWidth) val timestamp = UInt(width = log2Up(counterSize)) override def cloneType: this.type = new TimestampedRequestIOType(dataWidth, addrWidth, counterSize).asInstanceOf[this.type] } class RequestIngestIO(dataWidth: Seq[Int], addrWidth: Seq[Int], counterSize: Int, inputBufferLen: Int) extends Bundle{ //val reqsIn = Vec.tabulate(dataWidth.length)((i) => Wire(new ApBusReq(dataWidth(i), addrWidth(i)).flip)) val reqsIn = HeterogeneousBag(dataWidth.zip(addrWidth).map { case (dw, aw) => new ApBusReq(dw, aw) }).flip val reqsFullN = Vec(dataWidth.length, Output(Bool())) //val reqsWrite = Vec(dataWidth.length, Bool(INPUT)).flip val reqsWrite = Vec(dataWidth.length, Input(Bool())) //val offsetAddrs = Vec.tabulate(dataWidth.length)((i) => UInt(INPUT, width=addrWidth(i))) //val offsetAddrs = HeterogeneousBag(addrWidth.map(aw => UInt(INPUT, width = aw))) val offsetAddrs = HeterogeneousBag(addrWidth.map(aw => Input(UInt(aw.W)))) //val loadOffsets = Vec(dataWidth.length, Bool(INPUT)).flip val loadOffsets = Vec(dataWidth.length, Input(Bool())) val newRequests = UInt(OUTPUT, width = log2Up(dataWidth.length)+1) //The number of new requests recieved in this cycle (used to track number of outstanding requests) //The widths are the maximums of all of the input widths val reqOut = Decoupled(new ApBusReq(if (dataWidth.length > 0) dataWidth.max else 0, if (addrWidth.length > 0) addrWidth.max else 0)) val selectedBus = UInt(OUTPUT, log2Up(dataWidth.length)) override def cloneType: this.type = new RequestIngestIO(dataWidth, addrWidth, counterSize, inputBufferLen).asInstanceOf[this.type] } class RequestIngest(dataWidth: Seq[Int], addrWidth: Seq[Int], inputBufferLen: Int) extends Module{ val busCount = dataWidth.length //We only need enough counter values to account for the worst case scenario when memory is stalled and each buffer is filled one at a time //Adding an extra 1 is probably overly conservative as the extra one may be reassigned but is not at the front of the queue. If it was, then a queue is not //Taking the base 2 log and rasing 2 to that power ensures that the overflow semantics are what is expected. This is important for the priority difference val counterSize = BigInt(2).pow(log2Up(inputBufferLen*busCount)).toInt val io = IO(new RequestIngestIO(dataWidth, addrWidth, counterSize, inputBufferLen)) val counter = Counter(counterSize) val prevTimestamp = Reg(init = UInt(0, width=log2Up(counterSize))) val parsers = Seq.tabulate(busCount)((i) => Module(new RequestParser(dataWidth(i), addrWidth(i))).io) //val queues = Vec.tabulate(busCount)((i) => Module(new Queue(new TimestampedRequestIOType(dataWidth(i), addrWidth(i), log2Up(counterSize)), inputBufferLen))) val queues = Seq.tabulate(busCount)((i) => Module(new Queue(new TimestampedRequestIOType(dataWidth(i), addrWidth(i), log2Up(counterSize)), inputBufferLen)).io) /*val queues = HeterogeneousBag(dataWidth.zip(addrWidth).map{ case (dw, aw) => Module(new Queue(new TimestampedRequestIOType(dw, aw, log2Up(counterSize)), inputBufferLen)) })*/ //Vec.tabulate(busCount)((i) => Module(new Queue(new TimestampedRequestIO(dataWidth(i), addrWidth(i), log2Up(counterSize)), inputBufferLen)).io) val arbiter = Module(new PriorityArbiter(new ApBusReq(if (dataWidth.length > 0) dataWidth.max else 0, if (addrWidth.length > 0) addrWidth.max else 0), busCount, log2Up(counterSize))) val incrCounter = Wire(Bool(false)) if (dataWidth.length > 0) { incrCounter := io.reqsWrite.reduce(_||_) } //+1 is because we need to be able to represent 0 and BusCount not BusCount-1 val requestsUInt = io.reqsWrite.map((x) => Mux(x, UInt(1, width=busCount), UInt(0, width=busCount+1))) if (dataWidth.length > 0) { io.newRequests := requestsUInt.reduce(_+_) } //Increment counter when any request comes in when(incrCounter){ counter.inc() } for(i <- 0 until busCount){ parsers(i).offsetAddr := io.offsetAddrs(i) parsers(i).loadOffset := io.loadOffsets(i) parsers(i).reqIn := io.reqsIn(i) //parsers(i).reqIn <> io.reqsIn(i) queues(i).enq.bits.req := parsers(i).reqOut queues(i).enq.bits.timestamp := counter.value //put in the timestamp! io.reqsFullN(i) := queues(i).enq.ready queues(i).enq.valid := io.reqsWrite(i) queues(i).deq.ready := arbiter.io.in(i).ready arbiter.io.in(i).valid := queues(i).deq.valid arbiter.io.in(i).bits := queues(i).deq.bits.req //Priority is oldest first. It is defined as the difference from the last serviced timestamp arbiter.io.priority(i) := queues(i).deq.bits.timestamp - prevTimestamp //This works even with overlfow so long as counterSize is a power of 2 } when(arbiter.io.out.valid && io.reqOut.ready){ prevTimestamp := prevTimestamp + arbiter.io.priorityOut } //io.reqOut := arbiter.io.out io.reqOut.valid := arbiter.io.out.valid arbiter.io.out.ready := io.reqOut.ready io.reqOut.bits := arbiter.io.out.bits io.selectedBus := arbiter.io.chosen } class RequestIssuerIO(dataWidth: Int, addrWidth:Int, maxReqWidth:Int, numBus:Int, roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int) extends Bundle{ //Incoming request from arbiter //val reqIn = Decoupled(new ApBusReq(dataWidth, addrWidth)).flip val reqIn = Flipped(Decoupled(new ApBusReq(dataWidth, addrWidth))) //The bus that the request came from val reqBus = UInt(INPUT, width=log2Up(numBus)) //Lines for table address check and for updating the table val accessWidth = UInt(OUTPUT, width=maxReqWidth) val accessRead = Bool(INPUT) val conflict = Bool(INPUT) //val tagQueueIO = Decoupled(UInt(OUTPUT, width=roccTagWidth)).flip val tagQueueIO = Flipped(Decoupled(Output(UInt(roccTagWidth.W)))) val busNum = UInt(OUTPUT, width=log2Up(numBus)) //RoCC Lines val roCCReqAddr = UInt(OUTPUT, width = roccAddrWidth) // coreMaxAddrBits) val roCCReqTag = UInt(OUTPUT, width = roccTagWidth) //coreDCacheReqTagBits) val roCCReqCmd = UInt(OUTPUT, width = roccCmdWidth) //M_SZ) val roCCReqTyp = UInt(OUTPUT, width = roccTypWidth) // val roCCReqData = UInt(OUTPUT, width = roccDataWidth) //coreDataBits) val roCCReqValid = Bool(OUTPUT) val roCCReqRdy = Bool(INPUT) val reqWidth = UInt(OUTPUT, width = maxReqWidth) //Pass to table to specify width of request val reqSent = Bool(OUTPUT) override def cloneType: this.type = new RequestIssuerIO(dataWidth, addrWidth, maxReqWidth, numBus, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth).asInstanceOf[this.type] } //maxReqBytes = 8 (64 bit) in our case class RequestIssuer(dataWidth: Int, addrWidth:Int, maxReqBytes:Int, roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int, busDataWidths:Seq[Int]) extends Module{ val maxReqWidth = log2Up(maxReqBytes) val numBus = busDataWidths.length val io = IO(new RequestIssuerIO(dataWidth, addrWidth, maxReqWidth, numBus, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth)) if (busDataWidths.length > 0) { val busByteWidth = Vec(busDataWidths.map((x) => UInt(x)/UInt(8))) val currentByteWidth = busByteWidth(io.reqBus) io.busNum := io.reqBus io.roCCReqAddr := io.reqIn.bits.address io.roCCReqData := io.reqIn.bits.dataout io.roCCReqTag := io.tagQueueIO.bits io.roCCReqCmd := Mux(io.reqIn.bits.din, M_XWR, M_XRD) //Set the transaction type (Write / Read) io.roCCReqTyp := Mux(currentByteWidth===UInt(8), log2Ceil(8).U, Mux(currentByteWidth===UInt(4), log2Ceil(4).U, Mux(currentByteWidth===UInt(2), log2Ceil(2).U, log2Ceil(1).U))) //Set transaction width io.reqWidth := currentByteWidth io.accessWidth := currentByteWidth io.roCCReqValid := io.reqIn.valid && !io.conflict && io.tagQueueIO.valid val memTransactSent = io.reqIn.valid && io.roCCReqRdy && !io.conflict && io.tagQueueIO.valid io.reqIn.ready := memTransactSent io.tagQueueIO.ready := memTransactSent //We used a tag when we issued a request io.reqSent := memTransactSent } } class RoutingTableIO(tagWidth:Int, numBus:Int, addrWidth: Int, maxReqWidth: Int) extends Bundle{ val reqValid = Bool(INPUT) val reqTag = UInt(INPUT, width = tagWidth) val reqWrite = Bool(INPUT) //If the transaction is a write val reqAddr = UInt(INPUT, width = addrWidth) val reqBus = UInt(INPUT, log2Up(numBus)) val reqWidth = UInt(INPUT, width = maxReqWidth) val checkAddr = UInt(INPUT, width = addrWidth) val checkWidth = UInt(INPUT, width = maxReqWidth) val checkRead = Bool(INPUT) val conflict = Bool(OUTPUT) val respTag = UInt(INPUT, width = tagWidth) val respVaid = Bool(INPUT) val respBus = UInt(OUTPUT, width = log2Up(numBus)) override def cloneType: this.type = new RoutingTableIO(tagWidth, numBus, addrWidth, maxReqWidth).asInstanceOf[this.type] } //maxReqBytes = 8 (64 bit) in our case class RoutingTable(tagWidth:Int, numTags:Int, numBus:Int, addrWidth: Int, maxReqBytes: Int) extends Module{ val maxReqWidth = log2Up(maxReqBytes) //val numTags = BigInt(2).pow(tagWidth).toInt val io = IO(new RoutingTableIO(tagWidth, numBus, addrWidth, maxReqWidth)) if (addrWidth > 0) { val v = Reg(Vec.fill(numTags)(Bool(false))) //valid (outstanding memory request) val write = Reg(Vec.fill(numTags)(Bool(false))) //outstanding request is a write val addr = Reg(Vec.fill(numTags)(UInt(0, width=addrWidth))) val width = Reg(Vec.fill(numTags)(UInt(0, width=maxReqWidth))) val bus = Reg(Vec.fill(numTags)(UInt(0, width=log2Up(numBus)))) //write Req into table when(io.reqValid){ v(io.reqTag) := Bool(true) //Set table entry as valid write(io.reqTag) := io.reqWrite //Set write flag addr(io.reqTag) := io.reqAddr //Set addr width(io.reqTag) := io.reqWidth //Set transaction width (in bytes) bus(io.reqTag) := io.reqBus //Set the requesting bus } //clear returned transaction. The tag queue prevents accedently claring a tag that has not yet returned when(io.respVaid){ v(io.respTag) := Bool(false) //This entry is no longer valid } //Return the bus that requsted the returning transaction io.respBus := bus(io.respTag) //Reads can execute if there is no outstanding write to the address (v==false || write==false) // Reads stall when v==true and write==true //Writes can only occure if there are no outstanding ops (v==false) // Writes stall when v==true val addrsConflicting = Vec.tabulate(numTags)((i) => !((io.checkAddr + io.checkWidth <= addr(i)) || (addr(i) + width(i) <= io.checkAddr))) //There is a conflict if: the address is conflicting, the entry is valid, and if(reading, there is an outstanding write) val conflicting = Vec.tabulate(numTags)((i) => addrsConflicting(i) && v(i) && Mux(io.checkRead, write(i), Bool(true))) io.conflict := conflicting.reduce(_||_) } } class MemControllerIO(dataWidth:Seq[Int], addrWidth:Seq[Int], roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int) extends Bundle{ //----ap_bus requests---- val reqsIn = HeterogeneousBag(dataWidth.zip(addrWidth).map { case (dw, aw) => new ApBusReq(dw, aw) }).flip //Decoupled signals for requests val reqsFullN = Vec(dataWidth.length, Bool(OUTPUT)) //val reqsWrite = Vec(dataWidth.length, Bool(INPUT)).flip val reqsWrite = Vec(dataWidth.length, Input(Bool())) //Offset address lines //val offsetAddrs = Vec.tabulate(dataWidth.length)((i) => UInt(INPUT, width=addrWidth(i))) val offsetAddrs = HeterogeneousBag(addrWidth.map(aw => UInt(INPUT, width = aw))) //val loadOffsets = Vec(dataWidth.length, Bool(INPUT)).flip val loadOffsets = Vec(dataWidth.length,Input(Bool())) //----status line---- val memBusy = Bool(OUTPUT) //----ap_bus response---- //val rspOut = Vec.tabulate(dataWidth.length)((i) => new ApBusRsp(dataWidth(i)).flip) val rspOut = HeterogeneousBag(dataWidth.map(dw => new ApBusRsp(dw))).flip val rsp_empty_n = Vec(dataWidth.length, Output(Bool())) //This is the same as valid val rsp_read = Vec(dataWidth.length, Input(Bool())) //This is the same as ready //----RoCC Mem Req---- val roCCReqAddr = UInt(OUTPUT, width = roccAddrWidth) // coreMaxAddrBits) val roCCReqTag = UInt(OUTPUT, width = roccTagWidth) //coreDCacheReqTagBits) val roCCReqCmd = UInt(OUTPUT, width = roccCmdWidth) //M_SZ) val roCCReqTyp = UInt(OUTPUT, width = roccTypWidth) //MT_SZ) val roCCReqData = UInt(OUTPUT, width = roccDataWidth) // val roCCReqValid = Bool(OUTPUT) val roCCReqRdy = Bool(INPUT) //val roCCRespAddr = UInt(INPUT, width = roccAddrWidth) // coreMaxAddrBits) val roCCRspTag = UInt(INPUT, width = roccTagWidth) //coreDCacheReqTagBits) val roCCRspCmd = UInt(INPUT, width = roccCmdWidth) //M_SZ) val roCCRspData = UInt(INPUT, width = roccDataWidth) //MT_SZ) //val roCCRespTyp = UInt(INPUT, width = roccTypWidth) //MT_SZ) val roCCRspValid = Bool(INPUT) override def cloneType: this.type = new MemControllerIO(dataWidth, addrWidth, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth).asInstanceOf[this.type] } class MemController(dataWidth:Seq[Int], addrWidth:Seq[Int], reqBufferLen:Int, rspBufferLen:Int, maxReqBytes:Int, roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int, numTags:Int, tagOffset:Int ) extends Module{ val io = IO(new MemControllerIO(dataWidth, addrWidth, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth)) val numBus = dataWidth.length if (numBus > 0){ val reqIngest = Module(new RequestIngest(dataWidth, addrWidth, reqBufferLen)) val reqIssuer = Module(new RequestIssuer(if (dataWidth.length > 0) dataWidth.max else 0, if (addrWidth.length > 0) addrWidth.max else 0, maxReqBytes, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth, dataWidth)) val scoreboard = Module(new RoutingTable(roccTagWidth, numTags, numBus, if (addrWidth.length > 0) addrWidth.max else 0, maxReqBytes)) //Tags may have an offset if this is not the only accelerator in the system val tags = (tagOffset until (tagOffset+numTags)) val tagUInts = tags.map((x) => UInt(x, width=roccTagWidth).asUInt) val tagQueue = Module(new RegisterQueue(gen=UInt(width=roccTagWidth), entries=numTags, initVals=tagUInts, flow=true)) val rspQueues = Seq.tabulate(numBus)((i) => Module(new Queue(UInt(width=dataWidth(i)), rspBufferLen)).io) val validDemux = Module(new ValidDemux(numBus)) val currentRequestNum = Reg(init=UInt(0, width=(reqBufferLen*numBus+1+numTags))) //Logic for number of outstanding requests currentRequestNum := currentRequestNum + reqIngest.io.newRequests - Mux(io.roCCRspValid, UInt(1), UInt(0)) io.memBusy := currentRequestNum =/= UInt(0) //==== Ingest Logic ==== //Hook up ap_bus request lines to ingest logic //reqIngest.io.reqsIn <> io.reqsIn reqIngest.io.reqsIn := io.reqsIn io.reqsFullN := reqIngest.io.reqsFullN reqIngest.io.reqsWrite := io.reqsWrite //reqIngest.io.offsetAddrs := io.offsetAddrs io.offsetAddrs := reqIngest.io.offsetAddrs reqIngest.io.loadOffsets := io.loadOffsets //val newRequests //val reqOut //val selectedBus //====Req Issuer ==== //Incoming request from arbiter //reqIssuer.io.reqIn <> reqIngest.io.reqOut reqIssuer.io.reqIn.bits := reqIngest.io.reqOut.bits reqIssuer.io.reqIn.valid := reqIngest.io.reqOut.valid reqIngest.io.reqOut.ready := reqIssuer.io.reqIn.ready //The bus that the request came from reqIssuer.io.reqBus := reqIngest.io.selectedBus //Lines for table address check and for updating the table //val accessWidth //val containsAddr //reqIssuer.io.tagQueue <> tagQueue.io.deq reqIssuer.io.tagQueueIO.bits := tagQueue.io.deq.bits reqIssuer.io.tagQueueIO.valid := tagQueue.io.deq.valid tagQueue.io.deq.ready := reqIssuer.io.tagQueueIO.ready //val busNum //val reqWidth //Pass to table to specify width of request //RoCC Lines io.roCCReqAddr := reqIssuer.io.roCCReqAddr io.roCCReqTag := reqIssuer.io.roCCReqTag io.roCCReqCmd := reqIssuer.io.roCCReqCmd io.roCCReqTyp := reqIssuer.io.roCCReqTyp io.roCCReqValid := reqIssuer.io.roCCReqValid reqIssuer.io.roCCReqRdy := io.roCCReqRdy io.roCCReqData := reqIssuer.io.roCCReqData //====Scoreboard==== scoreboard.io.reqValid := reqIssuer.io.reqSent //Do not commit into the table unles there is a fire scoreboard.io.reqTag := reqIssuer.io.roCCReqTag scoreboard.io.reqWrite := (reqIssuer.io.roCCReqCmd === M_XWR) //If the transaction is a write scoreboard.io.reqAddr := reqIssuer.io.roCCReqAddr scoreboard.io.reqBus := reqIssuer.io.busNum scoreboard.io.reqWidth := reqIssuer.io.reqWidth scoreboard.io.checkAddr := reqIssuer.io.roCCReqAddr scoreboard.io.checkWidth := reqIssuer.io.accessWidth scoreboard.io.checkRead := reqIssuer.io.accessRead reqIssuer.io.conflict := scoreboard.io.conflict scoreboard.io.respTag := io.roCCRspTag scoreboard.io.respVaid := io.roCCRspValid //val scoreboard.io.respBus //====TagQueue==== tagQueue.io.enq.bits := io.roCCRspTag tagQueue.io.enq.valid := io.roCCRspValid //====Demux===== validDemux.io.validIn := io.roCCRspValid && io.roCCRspCmd===M_XRD //Only return to the bus if this is a response to a read request validDemux.io.validSelect := scoreboard.io.respBus //val validOut = Vec.fill(fanout)(Bool(OUTPUT)) //====RespQueues==== for(i <- 0 until numBus){ rspQueues(i).enq.bits := io.roCCRspData((dataWidth(i)-1),0) //Pass the data to all output queues (slicing to approprite width), only give one the valid signal rspQueues(i).enq.valid := validDemux.io.validOut(i) io.rspOut(i).datain := rspQueues(i).deq.bits io.rsp_empty_n(i) := rspQueues(i).deq.valid rspQueues(i).deq.ready := io.rsp_read(i) } } }