From 63abcdc980964376543808913129f420d1b7228f Mon Sep 17 00:00:00 2001 From: Jenny Huang Date: Thu, 31 Oct 2019 03:06:27 -0700 Subject: [PATCH] Add perl scripts --- scripts/chisel_rocc_aux/ap_bus.scala | 47 ++ scripts/chisel_rocc_aux/controlUtils.scala | 511 ++++++++++++ .../memControllerComponents.scala | 430 +++++++++++ scripts/generate_accel.pl | 86 +++ scripts/generate_build_sbt.pl | 80 ++ scripts/generate_config.pl | 119 +++ scripts/generate_soc.pl | 165 ++++ scripts/generate_wrapper.pl | 327 ++++++++ scripts/generate_wrapper_tl.pl | 208 +++++ scripts/parse_json.pl | 59 ++ scripts/run_chisel.pl | 724 ++++++++++++++++++ scripts/run_chisel_tl.pl | 560 ++++++++++++++ scripts/run_hls.pl | 100 +++ 13 files changed, 3416 insertions(+) create mode 100644 scripts/chisel_rocc_aux/ap_bus.scala create mode 100644 scripts/chisel_rocc_aux/controlUtils.scala create mode 100644 scripts/chisel_rocc_aux/memControllerComponents.scala create mode 100644 scripts/generate_accel.pl create mode 100644 scripts/generate_build_sbt.pl create mode 100644 scripts/generate_config.pl create mode 100644 scripts/generate_soc.pl create mode 100644 scripts/generate_wrapper.pl create mode 100644 scripts/generate_wrapper_tl.pl create mode 100644 scripts/parse_json.pl create mode 100644 scripts/run_chisel.pl create mode 100644 scripts/run_chisel_tl.pl create mode 100644 scripts/run_hls.pl diff --git a/scripts/chisel_rocc_aux/ap_bus.scala b/scripts/chisel_rocc_aux/ap_bus.scala new file mode 100644 index 0000000..28a3791 --- /dev/null +++ b/scripts/chisel_rocc_aux/ap_bus.scala @@ -0,0 +1,47 @@ +package vivadoHLS + +import Chisel._ + +//These are definitions of the bus standards used on Vivado HLS generated accelerators + +//Request Packet Format +class ApBusReq(dataWidth:Int, addrWidth:Int) extends Bundle{ + //Req specific lines + //Specifies a write request + val din = Bool(OUTPUT) //req_din in verilog + + //Lines used for req + val address = UInt(OUTPUT, width = addrWidth) + val dataout = UInt(OUTPUT, width = dataWidth) + val size = UInt(OUTPUT, width = addrWidth) + override def cloneType: this.type = new ApBusReq(dataWidth, addrWidth).asInstanceOf[this.type] +} + +//Response Packet Format +class ApBusRsp(dataWidth:Int) extends Bundle{ + val datain = UInt(INPUT , width = dataWidth) + override def cloneType: this.type = new ApBusRsp(dataWidth).asInstanceOf[this.type] +} + +class ApBusIO(dataWidth:Int = 64, addrWidth:Int = 32) extends Bundle{ + val req = new ApBusReq(dataWidth, addrWidth) + val req_full_n = Bool(INPUT ) //req_full_n in verilog + //Write the request + val req_write = Bool(OUTPUT) //req_write in verilog + + val rsp = new ApBusRsp(dataWidth) + val rsp_empty_n = Bool(INPUT ) + val rsp_read = Bool(OUTPUT) + override def cloneType: this.type = new ApBusIO(dataWidth, addrWidth).asInstanceOf[this.type] +} + +class ApCtrlIO(dataWidth:Int = 64) extends Bundle{ + //val clk = Bool(INPUT ) + //val rst = Bool(INPUT ) + val start = Bool(INPUT ) + val done = Bool(OUTPUT) + val idle = Bool(OUTPUT) + val ready = Bool(OUTPUT) + val rtn = UInt(OUTPUT, width = dataWidth) + override def cloneType: this.type = new ApCtrlIO(dataWidth).asInstanceOf[this.type] +} diff --git a/scripts/chisel_rocc_aux/controlUtils.scala b/scripts/chisel_rocc_aux/controlUtils.scala new file mode 100644 index 0000000..047b867 --- /dev/null +++ b/scripts/chisel_rocc_aux/controlUtils.scala @@ -0,0 +1,511 @@ +package controlUtils + +import Chisel._ + +//This is based on the existing chisel arbiters but has a different implementation +class PriorityArbiterIO[T <: Data](gen: T, n: Int, priorityBits: Int) extends Bundle{ + val in = Vec(n, Flipped(Decoupled(gen))) + val out = Decoupled(gen) + val chosen = UInt(OUTPUT, log2Up(n)) + //Priorities of the inputs (0 is max priority) + val priority = Vec(n,Input(UInt(priorityBits.W))) + //Priority of the outputed value + val priorityOut = UInt(OUTPUT, width = priorityBits) + override def cloneType: this.type = new PriorityArbiterIO(gen, n, priorityBits).asInstanceOf[this.type] +} + +class PriorityArbiter[T <: Data](gen: T, n: Int, priorityBits: Int) extends Module{ + val io = IO(new PriorityArbiterIO(gen, n, priorityBits)) + + //val indexedPriority = io.priority.zipWithIndex; + + //val indexedPriorityValid = indexedPriority.zip(io.in.map(_.valid)) + + if(n == 1){ + //No arbitration required! + io.out.valid := io.in(0).valid + io.out.bits := io.in(0).bits + io.chosen := UInt(0) + io.priorityOut := io.priority(0) + io.in(0).ready := io.out.ready + } + else{ + + //val minPriorityValue = indexedPriorityValid.slice(1, n).foldLeft(indexedPriorityValid(0))((a, b) => Mux((a._2 === Bool(false) && b._2 === Bool(true) || (b._2 === Bool(true) && (b._1)._1 < (a._1)._1)), b, a)) //the max priority (smaller priority values are more important) + + //The slice is becuase we favor the left element in this simple arbiter + //We scan from left to right (from the first element to the last) and check + //if the next element has a smaller priority value. If it does, it becomes + //the element that is checked against for the remainder of the list. The slice + //is because an initial value has to be given for fold and it makes little sense + //to compare the first element with itself. The (a._1)._1 is accessing the 1st + //element of the zipped tuple (prority, index). The first part of the boolean + //function also ensures that if in(0) is not valid but in(y) is valid, that in(y) + //is selected even if it has a higher priority value than in(0). This in effect + //allows us to avoid filtering the list to only include valid inputs as a seperate + //step. + + //If no element is valid, in(0) is selected and is connected to the output. + //Since its valid signal is low, this should have no effect and the priorityOut + //value should be ignored. + + //val chosenPriority = (minPriorityValue._1)._1 + //val chosenInd = UInt((minPriorityValue._1)._2) + + //Well ... I was having a bunch of type errors trying to do this functioanlly (chisel/scala type system), so let's do it iterativly + + //The use of var here is based on the chisel implementation of the basic arbiter + //Normally, we would use a val but, in this case, it appears we can use a var as we are basically + //proceduarally constructing a tree of muxes. The initial node is 0 but, on each itteration, + //it is set to the muxof the current node and the selected one. The final value is then connected + //to the output and contains the whole tree of muxes + var chosenInd = UInt(0) + for(i <- 1 until n){ + val chooseThis = (io.in(chosenInd).valid === Bool(false) && io.in(i).valid === Bool(true)) || (io.in(i).valid === Bool(true) && io.priority(i) < io.priority(chosenInd)) + chosenInd = Mux(chooseThis, UInt(i), chosenInd) + } + + //Set IO lines + for(i <- 0 to n-1 by 1){ + //set the ready lines going to the input ports so that the chosen one + //gets the value of io.out.ready and the rest get a value of Bool(false) + io.in(i).ready := (chosenInd === UInt(i)) && io.out.ready + } + + io.out.valid := io.in(chosenInd).valid + io.out.bits := io.in(chosenInd).bits + io.chosen := chosenInd + io.priorityOut := io.priority(chosenInd) + } +} + +class InitCounter(val n: Int, val initVal: Int) { + /** current value of the counter */ + val value = if (n == 1) UInt(0) else Reg(init=UInt(initVal, log2Up(n))) + /** increment the counter + * @return if the counter is at the max value */ + def inc(): Bool = { + if (n == 1) Bool(true) + else { + val wrap = value === UInt(n-1) + value := Mux(Bool(!isPow2(n)) && wrap, UInt(0), value + UInt(1)) + wrap + } + } +} + +object InitCounter +{ + def apply(n: Int, initVal: Int): InitCounter = new InitCounter(n, initVal) + /** Get a counter which takes an input Bool of when to increment + * @return a UInt which is the value of the counter and a Bool indicating when the counter resets + */ + def apply(cond: Bool, n: Int, initVal: Int): (UInt, Bool) = { + val c = new InitCounter(n, initVal) + var wrap: Bool = null + when (cond) { wrap = c.inc() } + (c.value, cond && wrap) + } +} + +//Really want a way to initialize memory but this will do for now +/*class RegisterQueue[T <: Data](gen: T, val entries: Int, initVals: Seq[T], + pipe: Boolean = false, + flow: Boolean = false, + _reset: Option[Bool] = None) + extends Module(_reset=_reset)*/ + +class RegisterQueue[T <: Data](gen: T, val entries: Int, initVals: Seq[T], pipe: Boolean = false, flow: Boolean = false) extends Module{ + //override val ram = Vec.tabulate(entries)((i) => (if(i enq_ptr.value, + UInt(entries) + ptr_diff, ptr_diff)) + } +} + +class ValidDemuxIO(fanout:Int) extends Bundle{ + val validIn = Bool(INPUT) + val validSelect = UInt(INPUT, log2Up(fanout)) + val validOut = Vec(fanout, Bool(OUTPUT)) + //val validOut = Vec.fill(fanout)(Bool(OUTPUT)) + + override def cloneType: this.type = new ValidDemuxIO(fanout).asInstanceOf[this.type] +} + +class ValidDemux(fanout:Int) extends Module{ + val io = IO(new ValidDemuxIO(fanout)) + + for(i <- 0 until fanout){ + io.validOut(i) := io.validIn && (UInt(i) === io.validSelect) + } +} + +/* +class RegisterQueueTestWrapper(val entries:Int, initVals: Seq[Int]) extends Module{ + val io = IO(new QueueIO(UInt(width=32), entries)) + + val uintInit = initVals.map(UInt(_)) + val c = Module(new RegisterQueue(UInt(width=32), entries, uintInit)) + io.enq.valid <> c.io.enq.valid + io.enq.ready <> c.io.enq.ready + io.enq.bits <> c.io.enq.bits + + io.deq.valid <> c.io.deq.valid + io.deq.ready <> c.io.deq.ready + io.deq.bits <> c.io.deq.bits + + io.count <> c.io.count + +} + +class RegisterQueueTests(c: RegisterQueueTestWrapper) extends Tester(c){ + poke(c.io.deq.ready, 1) + + expect(c.io.deq.valid, 1) + expect(c.io.deq.bits, 2) + expect(c.io.count, 4) + + step(1) + expect(c.io.deq.valid, 1) + expect(c.io.deq.bits, 4) + expect(c.io.count, 3) + + step(1) + expect(c.io.deq.valid, 1) + expect(c.io.deq.bits, 6) + expect(c.io.count, 2) + + step(1) + expect(c.io.deq.valid, 1) + expect(c.io.deq.bits, 8) + expect(c.io.count, 1) + + step(1) + expect(c.io.deq.valid, 0) + expect(c.io.count, 0) + +} + +class PriorityArbiterTests(c: PriorityArbiter[UInt]) extends Tester(c) { + //Test when no inputs are valid and output is not ready + poke(c.io.in(0).valid, 0) + poke(c.io.in(1).valid, 0) + poke(c.io.in(2).valid, 0) + poke(c.io.in(3).valid, 0) + + poke(c.io.in(0).bits, 10) + poke(c.io.in(1).bits, 11) + poke(c.io.in(2).bits, 12) + poke(c.io.in(3).bits, 13) + + poke(c.io.priority(0), 20) + poke(c.io.priority(1), 21) + poke(c.io.priority(2), 22) + poke(c.io.priority(3), 23) + + poke(c.io.out.ready, 0) + + step(1) + expect(c.io.in(0).ready, 0) + expect(c.io.in(1).ready, 0) + expect(c.io.in(2).ready, 0) + expect(c.io.in(3).ready, 0) + + expect(c.io.out.bits, 10) + + expect(c.io.priorityOut, 20) + + expect(c.io.out.valid, 0) + + step(1) + //Test when no inputs are valid and output is ready + poke(c.io.out.ready, 1) + + step(1) + expect(c.io.in(0).ready, 1) + expect(c.io.in(1).ready, 0) + expect(c.io.in(2).ready, 0) + expect(c.io.in(3).ready, 0) + + expect(c.io.out.bits, 10) + + expect(c.io.priorityOut, 20) + + expect(c.io.out.valid, 0) + + step(1) + + //Test when one input is valid and output is not ready + poke(c.io.in(0).valid, 0) + poke(c.io.in(1).valid, 0) + poke(c.io.in(2).valid, 1) + poke(c.io.in(3).valid, 0) + + poke(c.io.in(0).bits, 10) + poke(c.io.in(1).bits, 11) + poke(c.io.in(2).bits, 12) + poke(c.io.in(3).bits, 13) + + poke(c.io.priority(0), 20) + poke(c.io.priority(1), 21) + poke(c.io.priority(2), 22) + poke(c.io.priority(3), 23) + + poke(c.io.out.ready, 0) + + step(1) + expect(c.io.in(0).ready, 0) + expect(c.io.in(1).ready, 0) + expect(c.io.in(2).ready, 0) + expect(c.io.in(3).ready, 0) + + expect(c.io.out.bits, 12) + + expect(c.io.priorityOut, 22) + + expect(c.io.out.valid, 1) + + step(1) + + //Test when one input is valid and output is ready + poke(c.io.out.ready, 1) + + step(1) + expect(c.io.in(0).ready, 0) + expect(c.io.in(1).ready, 0) + expect(c.io.in(2).ready, 1) + expect(c.io.in(3).ready, 0) + + expect(c.io.out.bits, 12) + + expect(c.io.priorityOut, 22) + + expect(c.io.out.valid, 1) + + step(1) + + //Test when multiple inputs are valid and one has the lowest priority value, and output is not ready + poke(c.io.in(0).valid, 1) + poke(c.io.in(1).valid, 1) + poke(c.io.in(2).valid, 1) + poke(c.io.in(3).valid, 1) + + poke(c.io.in(0).bits, 10) + poke(c.io.in(1).bits, 11) + poke(c.io.in(2).bits, 12) + poke(c.io.in(3).bits, 13) + + poke(c.io.priority(0), 20) + poke(c.io.priority(1), 10) + poke(c.io.priority(2), 20) + poke(c.io.priority(3), 20) + + poke(c.io.out.ready, 0) + + step(1) + expect(c.io.in(0).ready, 0) + expect(c.io.in(1).ready, 0) + expect(c.io.in(2).ready, 0) + expect(c.io.in(3).ready, 0) + + expect(c.io.out.bits, 11) + + expect(c.io.priorityOut, 10) + + expect(c.io.out.valid, 1) + + step(1) + + //Test when multiple inputs are valid and one has the lowest priority value, and output is ready + poke(c.io.out.ready, 1) + + step(1) + + expect(c.io.in(0).ready, 0) + expect(c.io.in(1).ready, 1) + expect(c.io.in(2).ready, 0) + expect(c.io.in(3).ready, 0) + + expect(c.io.out.bits, 11) + + expect(c.io.priorityOut, 10) + + expect(c.io.out.valid, 1) + + step(1) + + //Test when multiple inputs are valid and multiple have the lowest priority value, and output is not ready + poke(c.io.in(0).valid, 1) + poke(c.io.in(1).valid, 1) + poke(c.io.in(2).valid, 1) + poke(c.io.in(3).valid, 1) + + poke(c.io.in(0).bits, 10) + poke(c.io.in(1).bits, 11) + poke(c.io.in(2).bits, 12) + poke(c.io.in(3).bits, 13) + + poke(c.io.priority(0), 30) + poke(c.io.priority(1), 30) + poke(c.io.priority(2), 20) + poke(c.io.priority(3), 20) + + poke(c.io.out.ready, 0) + + step(1) + expect(c.io.in(0).ready, 0) + expect(c.io.in(1).ready, 0) + expect(c.io.in(2).ready, 0) + expect(c.io.in(3).ready, 0) + + expect(c.io.out.bits, 12) + + expect(c.io.priorityOut, 20) + + expect(c.io.out.valid, 1) + + step(1) + + //Test when multiple inputs are valid and multiple has the lowest priority value, and output is ready + poke(c.io.out.ready, 1) + + step(1) + + expect(c.io.in(0).ready, 0) + expect(c.io.in(1).ready, 0) + expect(c.io.in(2).ready, 1) + expect(c.io.in(3).ready, 0) + + expect(c.io.out.bits, 12) + + expect(c.io.priorityOut, 20) + + expect(c.io.out.valid, 1) + + step(1) + + //0 is valid lowest priority value, and output is not ready + poke(c.io.in(0).valid, 1) + poke(c.io.in(1).valid, 1) + poke(c.io.in(2).valid, 1) + poke(c.io.in(3).valid, 1) + + poke(c.io.in(0).bits, 10) + poke(c.io.in(1).bits, 11) + poke(c.io.in(2).bits, 12) + poke(c.io.in(3).bits, 13) + + poke(c.io.priority(0), 10) + poke(c.io.priority(1), 20) + poke(c.io.priority(2), 30) + poke(c.io.priority(3), 40) + + poke(c.io.out.ready, 0) + + step(1) + expect(c.io.in(0).ready, 0) + expect(c.io.in(1).ready, 0) + expect(c.io.in(2).ready, 0) + expect(c.io.in(3).ready, 0) + + expect(c.io.out.bits, 10) + + expect(c.io.priorityOut, 10) + + expect(c.io.out.valid, 1) + + step(1) + + //0 is valid lowest priority value, and output is ready + //Test when multiple inputs are valid and multiple has the lowest priority value, and output is ready + poke(c.io.out.ready, 1) + + step(1) + + expect(c.io.in(0).ready, 1) + expect(c.io.in(1).ready, 0) + expect(c.io.in(2).ready, 0) + expect(c.io.in(3).ready, 0) + + expect(c.io.out.bits, 10) + + expect(c.io.priorityOut, 10) + + expect(c.io.out.valid, 1) + +} + +object PriorityArbiterMain { + def main(args: Array[String]): Unit = { + chiselMainTest(args, () => Module(new PriorityArbiter(UInt(width = 32), 4, 32))){c => new PriorityArbiterTests(c)} + chiselMainTest(args, () => Module(new RegisterQueueTestWrapper(4, List(2, 4, 6, 8)))){ + c => new RegisterQueueTests(c) + } + } +}*/ diff --git a/scripts/chisel_rocc_aux/memControllerComponents.scala b/scripts/chisel_rocc_aux/memControllerComponents.scala new file mode 100644 index 0000000..45c5e7c --- /dev/null +++ b/scripts/chisel_rocc_aux/memControllerComponents.scala @@ -0,0 +1,430 @@ +package memControl + +import Chisel._ +import Chisel.ImplicitConversions._ +import freechips.rocketchip.tile._ +import freechips.rocketchip.config._ +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.rocket._ +import freechips.rocketchip.tilelink._ +//import freechips.rocketchip.util.InOrderArbiter +import freechips.rocketchip.util._ +import freechips.rocketchip.system._ + +import vivadoHLS._ +import controlUtils._ + +class RequestParserIO(dataWidth: Int, addrWidth: Int) extends Bundle{ + val reqIn = new ApBusReq(dataWidth, addrWidth).flip + val reqOut = new ApBusReq(dataWidth, addrWidth) + val offsetAddr = UInt(INPUT, width=addrWidth) + val loadOffset = Bool(INPUT) + override def cloneType: this.type = new RequestParserIO(dataWidth, addrWidth).asInstanceOf[this.type] +} +class RequestParser(dataWidth: Int, addrWidth: Int) extends Module{ + val io = IO(new RequestParserIO(dataWidth, addrWidth)) + + val offsetReg = Reg(init = UInt(0, width=addrWidth)) + + val multAmt = UInt(dataWidth/8) + + when(io.loadOffset){ + offsetReg := io.offsetAddr + } + + io.reqOut.din := io.reqIn.din + io.reqOut.dataout := io.reqIn.dataout + io.reqOut.size := io.reqIn.size + + io.reqOut.address := (io.reqIn.address*multAmt) + offsetReg +} + +class TimestampedRequestIO(dataWidth:Int, addrWidth:Int, counterSize: Int) extends Bundle{ + val req = new ApBusReq(dataWidth, addrWidth) + val timestamp = UInt(INPUT, log2Up(counterSize)) + override def cloneType: this.type = new TimestampedRequestIO(dataWidth, addrWidth, counterSize).asInstanceOf[this.type] +} + +class ApBusReqType (dataWidth:Int, addrWidth:Int) extends Bundle{ + //Req specific lines + //Specifies a write request + val din = Bool() //req_din in verilog + + //Lines used for req + val address = UInt(width = addrWidth) + val dataout = UInt(width = dataWidth) + val size = UInt(width = addrWidth) + + override def cloneType: this.type = new ApBusReqType(dataWidth, addrWidth).asInstanceOf[this.type] +} + +class TimestampedRequestIOType(dataWidth:Int, addrWidth:Int, counterSize: Int) extends Bundle{ + val req = new ApBusReqType(dataWidth, addrWidth) + val timestamp = UInt(width = log2Up(counterSize)) + + override def cloneType: this.type = new TimestampedRequestIOType(dataWidth, addrWidth, counterSize).asInstanceOf[this.type] +} + + +class RequestIngestIO(dataWidth: Seq[Int], addrWidth: Seq[Int], counterSize: Int, inputBufferLen: Int) extends Bundle{ + //val reqsIn = Vec.tabulate(dataWidth.length)((i) => Wire(new ApBusReq(dataWidth(i), addrWidth(i)).flip)) + val reqsIn = HeterogeneousBag(dataWidth.zip(addrWidth).map { + case (dw, aw) => new ApBusReq(dw, aw) + }).flip + + val reqsFullN = Vec(dataWidth.length, Output(Bool())) + //val reqsWrite = Vec(dataWidth.length, Bool(INPUT)).flip + val reqsWrite = Vec(dataWidth.length, Input(Bool())) + //val offsetAddrs = Vec.tabulate(dataWidth.length)((i) => UInt(INPUT, width=addrWidth(i))) + //val offsetAddrs = HeterogeneousBag(addrWidth.map(aw => UInt(INPUT, width = aw))) + val offsetAddrs = HeterogeneousBag(addrWidth.map(aw => Input(UInt(aw.W)))) + //val loadOffsets = Vec(dataWidth.length, Bool(INPUT)).flip + val loadOffsets = Vec(dataWidth.length, Input(Bool())) + + val newRequests = UInt(OUTPUT, width = log2Up(dataWidth.length)+1) //The number of new requests recieved in this cycle (used to track number of outstanding requests) + + //The widths are the maximums of all of the input widths + val reqOut = Decoupled(new ApBusReq(if (dataWidth.length > 0) dataWidth.max else 0, if (addrWidth.length > 0) addrWidth.max else 0)) + val selectedBus = UInt(OUTPUT, log2Up(dataWidth.length)) + override def cloneType: this.type = new RequestIngestIO(dataWidth, addrWidth, counterSize, inputBufferLen).asInstanceOf[this.type] +} + +class RequestIngest(dataWidth: Seq[Int], addrWidth: Seq[Int], inputBufferLen: Int) extends Module{ + val busCount = dataWidth.length + //We only need enough counter values to account for the worst case scenario when memory is stalled and each buffer is filled one at a time + //Adding an extra 1 is probably overly conservative as the extra one may be reassigned but is not at the front of the queue. If it was, then a queue is not + //Taking the base 2 log and rasing 2 to that power ensures that the overflow semantics are what is expected. This is important for the priority difference + val counterSize = BigInt(2).pow(log2Up(inputBufferLen*busCount)).toInt + + val io = IO(new RequestIngestIO(dataWidth, addrWidth, counterSize, inputBufferLen)) + + val counter = Counter(counterSize) + + val prevTimestamp = Reg(init = UInt(0, width=log2Up(counterSize))) + + val parsers = Seq.tabulate(busCount)((i) => Module(new RequestParser(dataWidth(i), addrWidth(i))).io) + + //val queues = Vec.tabulate(busCount)((i) => Module(new Queue(new TimestampedRequestIOType(dataWidth(i), addrWidth(i), log2Up(counterSize)), inputBufferLen))) + val queues = Seq.tabulate(busCount)((i) => Module(new Queue(new TimestampedRequestIOType(dataWidth(i), addrWidth(i), log2Up(counterSize)), inputBufferLen)).io) + /*val queues = HeterogeneousBag(dataWidth.zip(addrWidth).map{ + case (dw, aw) => Module(new Queue(new TimestampedRequestIOType(dw, aw, log2Up(counterSize)), inputBufferLen)) + })*/ + //Vec.tabulate(busCount)((i) => Module(new Queue(new TimestampedRequestIO(dataWidth(i), addrWidth(i), log2Up(counterSize)), inputBufferLen)).io) + + val arbiter = Module(new PriorityArbiter(new ApBusReq(if (dataWidth.length > 0) dataWidth.max else 0, if (addrWidth.length > 0) addrWidth.max else 0), busCount, log2Up(counterSize))) + + val incrCounter = Wire(Bool(false)) + if (dataWidth.length > 0) { + incrCounter := io.reqsWrite.reduce(_||_) + } + //+1 is because we need to be able to represent 0 and BusCount not BusCount-1 + val requestsUInt = io.reqsWrite.map((x) => Mux(x, UInt(1, width=busCount), UInt(0, width=busCount+1))) + + if (dataWidth.length > 0) { + io.newRequests := requestsUInt.reduce(_+_) + } + + //Increment counter when any request comes in + when(incrCounter){ + counter.inc() + } + + for(i <- 0 until busCount){ + parsers(i).offsetAddr := io.offsetAddrs(i) + parsers(i).loadOffset := io.loadOffsets(i) + parsers(i).reqIn := io.reqsIn(i) + //parsers(i).reqIn <> io.reqsIn(i) + queues(i).enq.bits.req := parsers(i).reqOut + queues(i).enq.bits.timestamp := counter.value //put in the timestamp! + io.reqsFullN(i) := queues(i).enq.ready + queues(i).enq.valid := io.reqsWrite(i) + queues(i).deq.ready := arbiter.io.in(i).ready + arbiter.io.in(i).valid := queues(i).deq.valid + arbiter.io.in(i).bits := queues(i).deq.bits.req + //Priority is oldest first. It is defined as the difference from the last serviced timestamp + arbiter.io.priority(i) := queues(i).deq.bits.timestamp - prevTimestamp //This works even with overlfow so long as counterSize is a power of 2 + } + + when(arbiter.io.out.valid && io.reqOut.ready){ + prevTimestamp := prevTimestamp + arbiter.io.priorityOut + } + + //io.reqOut := arbiter.io.out + io.reqOut.valid := arbiter.io.out.valid + arbiter.io.out.ready := io.reqOut.ready + io.reqOut.bits := arbiter.io.out.bits + io.selectedBus := arbiter.io.chosen +} + +class RequestIssuerIO(dataWidth: Int, addrWidth:Int, maxReqWidth:Int, numBus:Int, roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int) extends Bundle{ + //Incoming request from arbiter + //val reqIn = Decoupled(new ApBusReq(dataWidth, addrWidth)).flip + val reqIn = Flipped(Decoupled(new ApBusReq(dataWidth, addrWidth))) + //The bus that the request came from + val reqBus = UInt(INPUT, width=log2Up(numBus)) + + //Lines for table address check and for updating the table + val accessWidth = UInt(OUTPUT, width=maxReqWidth) + val accessRead = Bool(INPUT) + val conflict = Bool(INPUT) + //val tagQueueIO = Decoupled(UInt(OUTPUT, width=roccTagWidth)).flip + val tagQueueIO = Flipped(Decoupled(Output(UInt(roccTagWidth.W)))) + val busNum = UInt(OUTPUT, width=log2Up(numBus)) + + //RoCC Lines + val roCCReqAddr = UInt(OUTPUT, width = roccAddrWidth) // coreMaxAddrBits) + val roCCReqTag = UInt(OUTPUT, width = roccTagWidth) //coreDCacheReqTagBits) + val roCCReqCmd = UInt(OUTPUT, width = roccCmdWidth) //M_SZ) + val roCCReqTyp = UInt(OUTPUT, width = roccTypWidth) // + val roCCReqData = UInt(OUTPUT, width = roccDataWidth) //coreDataBits) + val roCCReqValid = Bool(OUTPUT) + val roCCReqRdy = Bool(INPUT) + + val reqWidth = UInt(OUTPUT, width = maxReqWidth) //Pass to table to specify width of request + + val reqSent = Bool(OUTPUT) + + override def cloneType: this.type = new RequestIssuerIO(dataWidth, addrWidth, maxReqWidth, numBus, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth).asInstanceOf[this.type] +} + +//maxReqBytes = 8 (64 bit) in our case +class RequestIssuer(dataWidth: Int, addrWidth:Int, maxReqBytes:Int, roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int, busDataWidths:Seq[Int]) extends Module{ + val maxReqWidth = log2Up(maxReqBytes) + val numBus = busDataWidths.length + val io = IO(new RequestIssuerIO(dataWidth, addrWidth, maxReqWidth, numBus, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth)) + +if (busDataWidths.length > 0) { + val busByteWidth = Vec(busDataWidths.map((x) => UInt(x)/UInt(8))) + + val currentByteWidth = busByteWidth(io.reqBus) + + + io.busNum := io.reqBus + io.roCCReqAddr := io.reqIn.bits.address + io.roCCReqData := io.reqIn.bits.dataout + io.roCCReqTag := io.tagQueueIO.bits + io.roCCReqCmd := Mux(io.reqIn.bits.din, M_XWR, M_XRD) //Set the transaction type (Write / Read) + io.roCCReqTyp := Mux(currentByteWidth===UInt(8), log2Ceil(8).U, Mux(currentByteWidth===UInt(4), log2Ceil(4).U, Mux(currentByteWidth===UInt(2), log2Ceil(2).U, log2Ceil(1).U))) //Set transaction width + io.reqWidth := currentByteWidth + + io.accessWidth := currentByteWidth + + io.roCCReqValid := io.reqIn.valid && !io.conflict && io.tagQueueIO.valid + + val memTransactSent = io.reqIn.valid && io.roCCReqRdy && !io.conflict && io.tagQueueIO.valid + + io.reqIn.ready := memTransactSent + io.tagQueueIO.ready := memTransactSent //We used a tag when we issued a request + io.reqSent := memTransactSent +} +} + +class RoutingTableIO(tagWidth:Int, numBus:Int, addrWidth: Int, maxReqWidth: Int) extends Bundle{ + val reqValid = Bool(INPUT) + val reqTag = UInt(INPUT, width = tagWidth) + val reqWrite = Bool(INPUT) //If the transaction is a write + val reqAddr = UInt(INPUT, width = addrWidth) + val reqBus = UInt(INPUT, log2Up(numBus)) + val reqWidth = UInt(INPUT, width = maxReqWidth) + + val checkAddr = UInt(INPUT, width = addrWidth) + val checkWidth = UInt(INPUT, width = maxReqWidth) + val checkRead = Bool(INPUT) + val conflict = Bool(OUTPUT) + + val respTag = UInt(INPUT, width = tagWidth) + val respVaid = Bool(INPUT) + val respBus = UInt(OUTPUT, width = log2Up(numBus)) + override def cloneType: this.type = new RoutingTableIO(tagWidth, numBus, addrWidth, maxReqWidth).asInstanceOf[this.type] +} + +//maxReqBytes = 8 (64 bit) in our case +class RoutingTable(tagWidth:Int, numTags:Int, numBus:Int, addrWidth: Int, maxReqBytes: Int) extends Module{ + val maxReqWidth = log2Up(maxReqBytes) + //val numTags = BigInt(2).pow(tagWidth).toInt + + val io = IO(new RoutingTableIO(tagWidth, numBus, addrWidth, maxReqWidth)) + +if (addrWidth > 0) { + + val v = Reg(Vec.fill(numTags)(Bool(false))) //valid (outstanding memory request) + val write = Reg(Vec.fill(numTags)(Bool(false))) //outstanding request is a write + val addr = Reg(Vec.fill(numTags)(UInt(0, width=addrWidth))) + val width = Reg(Vec.fill(numTags)(UInt(0, width=maxReqWidth))) + val bus = Reg(Vec.fill(numTags)(UInt(0, width=log2Up(numBus)))) + + //write Req into table + when(io.reqValid){ + v(io.reqTag) := Bool(true) //Set table entry as valid + write(io.reqTag) := io.reqWrite //Set write flag + addr(io.reqTag) := io.reqAddr //Set addr + width(io.reqTag) := io.reqWidth //Set transaction width (in bytes) + bus(io.reqTag) := io.reqBus //Set the requesting bus + } + + //clear returned transaction. The tag queue prevents accedently claring a tag that has not yet returned + when(io.respVaid){ + v(io.respTag) := Bool(false) //This entry is no longer valid + } + + //Return the bus that requsted the returning transaction + io.respBus := bus(io.respTag) + + //Reads can execute if there is no outstanding write to the address (v==false || write==false) + // Reads stall when v==true and write==true + + //Writes can only occure if there are no outstanding ops (v==false) + // Writes stall when v==true + + val addrsConflicting = Vec.tabulate(numTags)((i) => !((io.checkAddr + io.checkWidth <= addr(i)) || (addr(i) + width(i) <= io.checkAddr))) + + //There is a conflict if: the address is conflicting, the entry is valid, and if(reading, there is an outstanding write) + val conflicting = Vec.tabulate(numTags)((i) => addrsConflicting(i) && v(i) && Mux(io.checkRead, write(i), Bool(true))) + + io.conflict := conflicting.reduce(_||_) +} +} + +class MemControllerIO(dataWidth:Seq[Int], addrWidth:Seq[Int], roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int) extends Bundle{ + //----ap_bus requests---- + val reqsIn = HeterogeneousBag(dataWidth.zip(addrWidth).map { + case (dw, aw) => new ApBusReq(dw, aw) + }).flip + //Decoupled signals for requests + val reqsFullN = Vec(dataWidth.length, Bool(OUTPUT)) + //val reqsWrite = Vec(dataWidth.length, Bool(INPUT)).flip + val reqsWrite = Vec(dataWidth.length, Input(Bool())) +//Offset address lines + //val offsetAddrs = Vec.tabulate(dataWidth.length)((i) => UInt(INPUT, width=addrWidth(i))) + val offsetAddrs = HeterogeneousBag(addrWidth.map(aw => UInt(INPUT, width = aw))) + //val loadOffsets = Vec(dataWidth.length, Bool(INPUT)).flip + val loadOffsets = Vec(dataWidth.length,Input(Bool())) + + //----status line---- + val memBusy = Bool(OUTPUT) + + //----ap_bus response---- + //val rspOut = Vec.tabulate(dataWidth.length)((i) => new ApBusRsp(dataWidth(i)).flip) + val rspOut = HeterogeneousBag(dataWidth.map(dw => new ApBusRsp(dw))).flip + val rsp_empty_n = Vec(dataWidth.length, Output(Bool())) //This is the same as valid + val rsp_read = Vec(dataWidth.length, Input(Bool())) //This is the same as ready + + //----RoCC Mem Req---- + val roCCReqAddr = UInt(OUTPUT, width = roccAddrWidth) // coreMaxAddrBits) + val roCCReqTag = UInt(OUTPUT, width = roccTagWidth) //coreDCacheReqTagBits) + val roCCReqCmd = UInt(OUTPUT, width = roccCmdWidth) //M_SZ) + val roCCReqTyp = UInt(OUTPUT, width = roccTypWidth) //MT_SZ) + val roCCReqData = UInt(OUTPUT, width = roccDataWidth) // + val roCCReqValid = Bool(OUTPUT) + val roCCReqRdy = Bool(INPUT) + + //val roCCRespAddr = UInt(INPUT, width = roccAddrWidth) // coreMaxAddrBits) + val roCCRspTag = UInt(INPUT, width = roccTagWidth) //coreDCacheReqTagBits) + val roCCRspCmd = UInt(INPUT, width = roccCmdWidth) //M_SZ) + val roCCRspData = UInt(INPUT, width = roccDataWidth) //MT_SZ) + //val roCCRespTyp = UInt(INPUT, width = roccTypWidth) //MT_SZ) + val roCCRspValid = Bool(INPUT) + override def cloneType: this.type = new MemControllerIO(dataWidth, addrWidth, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth).asInstanceOf[this.type] +} + +class MemController(dataWidth:Seq[Int], addrWidth:Seq[Int], reqBufferLen:Int, rspBufferLen:Int, maxReqBytes:Int, roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int, numTags:Int, tagOffset:Int ) extends Module{ + val io = IO(new MemControllerIO(dataWidth, addrWidth, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth)) + + val numBus = dataWidth.length +if (numBus > 0){ + val reqIngest = Module(new RequestIngest(dataWidth, addrWidth, reqBufferLen)) + val reqIssuer = Module(new RequestIssuer(if (dataWidth.length > 0) dataWidth.max else 0, if (addrWidth.length > 0) addrWidth.max else 0, maxReqBytes, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth, dataWidth)) + val scoreboard = Module(new RoutingTable(roccTagWidth, numTags, numBus, if (addrWidth.length > 0) addrWidth.max else 0, maxReqBytes)) + + //Tags may have an offset if this is not the only accelerator in the system + val tags = (tagOffset until (tagOffset+numTags)) + val tagUInts = tags.map((x) => UInt(x, width=roccTagWidth).asUInt) + val tagQueue = Module(new RegisterQueue(gen=UInt(width=roccTagWidth), entries=numTags, initVals=tagUInts, flow=true)) + val rspQueues = Seq.tabulate(numBus)((i) => Module(new Queue(UInt(width=dataWidth(i)), rspBufferLen)).io) + val validDemux = Module(new ValidDemux(numBus)) + + val currentRequestNum = Reg(init=UInt(0, width=(reqBufferLen*numBus+1+numTags))) + + //Logic for number of outstanding requests + currentRequestNum := currentRequestNum + reqIngest.io.newRequests - Mux(io.roCCRspValid, UInt(1), UInt(0)) + io.memBusy := currentRequestNum =/= UInt(0) + + //==== Ingest Logic ==== + //Hook up ap_bus request lines to ingest logic + //reqIngest.io.reqsIn <> io.reqsIn + reqIngest.io.reqsIn := io.reqsIn + io.reqsFullN := reqIngest.io.reqsFullN + reqIngest.io.reqsWrite := io.reqsWrite + //reqIngest.io.offsetAddrs := io.offsetAddrs + io.offsetAddrs := reqIngest.io.offsetAddrs + reqIngest.io.loadOffsets := io.loadOffsets + + //val newRequests + + //val reqOut + //val selectedBus + + //====Req Issuer ==== + //Incoming request from arbiter + //reqIssuer.io.reqIn <> reqIngest.io.reqOut + reqIssuer.io.reqIn.bits := reqIngest.io.reqOut.bits + reqIssuer.io.reqIn.valid := reqIngest.io.reqOut.valid + reqIngest.io.reqOut.ready := reqIssuer.io.reqIn.ready + //The bus that the request came from + reqIssuer.io.reqBus := reqIngest.io.selectedBus + + //Lines for table address check and for updating the table + //val accessWidth + //val containsAddr + //reqIssuer.io.tagQueue <> tagQueue.io.deq + reqIssuer.io.tagQueueIO.bits := tagQueue.io.deq.bits + reqIssuer.io.tagQueueIO.valid := tagQueue.io.deq.valid + tagQueue.io.deq.ready := reqIssuer.io.tagQueueIO.ready + //val busNum + //val reqWidth //Pass to table to specify width of request + + //RoCC Lines + io.roCCReqAddr := reqIssuer.io.roCCReqAddr + io.roCCReqTag := reqIssuer.io.roCCReqTag + io.roCCReqCmd := reqIssuer.io.roCCReqCmd + io.roCCReqTyp := reqIssuer.io.roCCReqTyp + io.roCCReqValid := reqIssuer.io.roCCReqValid + reqIssuer.io.roCCReqRdy := io.roCCReqRdy + io.roCCReqData := reqIssuer.io.roCCReqData + + //====Scoreboard==== + scoreboard.io.reqValid := reqIssuer.io.reqSent //Do not commit into the table unles there is a fire + scoreboard.io.reqTag := reqIssuer.io.roCCReqTag + scoreboard.io.reqWrite := (reqIssuer.io.roCCReqCmd === M_XWR) //If the transaction is a write + scoreboard.io.reqAddr := reqIssuer.io.roCCReqAddr + scoreboard.io.reqBus := reqIssuer.io.busNum + scoreboard.io.reqWidth := reqIssuer.io.reqWidth + + scoreboard.io.checkAddr := reqIssuer.io.roCCReqAddr + scoreboard.io.checkWidth := reqIssuer.io.accessWidth + scoreboard.io.checkRead := reqIssuer.io.accessRead + reqIssuer.io.conflict := scoreboard.io.conflict + + scoreboard.io.respTag := io.roCCRspTag + scoreboard.io.respVaid := io.roCCRspValid + //val scoreboard.io.respBus + + //====TagQueue==== + tagQueue.io.enq.bits := io.roCCRspTag + tagQueue.io.enq.valid := io.roCCRspValid + + //====Demux===== + validDemux.io.validIn := io.roCCRspValid && io.roCCRspCmd===M_XRD //Only return to the bus if this is a response to a read request + validDemux.io.validSelect := scoreboard.io.respBus + //val validOut = Vec.fill(fanout)(Bool(OUTPUT)) + + //====RespQueues==== + for(i <- 0 until numBus){ + rspQueues(i).enq.bits := io.roCCRspData((dataWidth(i)-1),0) //Pass the data to all output queues (slicing to approprite width), only give one the valid signal + rspQueues(i).enq.valid := validDemux.io.validOut(i) + io.rspOut(i).datain := rspQueues(i).deq.bits + io.rsp_empty_n(i) := rspQueues(i).deq.valid + rspQueues(i).deq.ready := io.rsp_read(i) + } +} +} diff --git a/scripts/generate_accel.pl b/scripts/generate_accel.pl new file mode 100644 index 0000000..17b656a --- /dev/null +++ b/scripts/generate_accel.pl @@ -0,0 +1,86 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Cwd; +use File::Copy; + +my $rdir = $ENV{'RDIR'}; +print $rdir; +if ((not defined($rdir)) or $rdir eq '') { + print("Please source sourceme-f1-manager.sh!\n"); + exit(); +} + +sub generate_accel{ + + my @accel_tuples= @{$_[0]}; + + foreach my $accel_tuple_ref (@accel_tuples) { + print($accel_tuple_ref); + my @accel_tuple = @{$accel_tuple_ref}; + + my $pgm = $accel_tuple[0]; + my $func = $accel_tuple[1]; + my $bm_path = $accel_tuple[2]; + my $bm_path_c = $bm_path.'/src/main/c/'; + + my $is_rocc = $accel_tuple[3]; + my $idx_addr = $accel_tuple[4]; + + my $prefix=" "; + + my $num_args = scalar @accel_tuple; + if ($num_args > 5) { + $prefix = $accel_tuple[5]; + } + + print("Pgm: ".$pgm."\n"); + print("Func: ".$func."\n"); + print("Path: ".$bm_path."\n"); + print("Is RoCC not TL?: ".$is_rocc."\n"); + print("RoCC Idx or TL Addr: ".$idx_addr."\n"); + print("Prefix: ".$prefix."\n"); + $ENV{'PGM'} = $pgm; + $ENV{'FUNC'} = $func; + my $PGM = $pgm; + my $FUNC = $func; + my $RDIR = $rdir; + + system("mkdir -p $bm_path/src/main/c"); + chdir("$bm_path/src/main/c/") or die $!; + system("cp $RDIR/tools/centrifuge/examples/${PGM}/* $bm_path_c"); + system("cp $RDIR/tools/centrifuge/scripts/run_hls.pl $bm_path_c"); + #system("cp $RDIR/hls/sw/time.h $bm_path/src/main/c/"); + #system("cp $RDIR/hls/sw/rocc.h $bm_path/src/main/c/"); + + # Specialize the Makefile for this function + system("sed -i 's/^FUNC=.*/FUNC=$func/g' $bm_path_c/Makefile"); + + my $dir = getcwd; + print("$dir\n"); + #next; + + system("perl run_hls.pl ${PGM} ${FUNC} $prefix"); + + if ($is_rocc) { + system("cp $RDIR/tools/centrifuge/scripts/run_chisel.pl $bm_path_c"); + system("cp $RDIR/tools/centrifuge/scripts/generate_wrapper.pl $bm_path_c"); + system("perl run_chisel.pl ${PGM} ${FUNC} $prefix"); + system("perl generate_wrapper.pl ${PGM} ${FUNC} $idx_addr $prefix"); + #system("make clean"); + #system("make CUSTOM_INST=1"); + } else { + system("cp $RDIR/tools/centrifuge/scripts/run_chisel_tl.pl $bm_path_c"); + system("cp $RDIR/tools/centrifuge/scripts/generate_wrapper_tl.pl $bm_path_c"); + system("perl run_chisel_tl.pl ${PGM} ${FUNC} $idx_addr $prefix"); + system("perl generate_wrapper_tl.pl ${PGM} ${FUNC} $idx_addr $prefix"); + #system("make clean"); + #system("make CUSTOM_DRIVER=1"); + } + } +} + +# Example with RoCC and TL accel +#my @input = (["vadd", "vadd", "$rdir/sim/target-rtl/firechip/hls_vadd_vadd/src/main/c", 1, "0", "rocc0_"], ["vadd_tl", "vadd", "$rdir/sim/target-rtl/firechip/hls_vadd_tl_vadd/src/main/c", 0, "0x20000", "tl0_"]); +#generate_accel(\@input); +1; diff --git a/scripts/generate_build_sbt.pl b/scripts/generate_build_sbt.pl new file mode 100644 index 0000000..a3d871a --- /dev/null +++ b/scripts/generate_build_sbt.pl @@ -0,0 +1,80 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Cwd; +use File::Copy; + +sub generate_build_sbt { + + my $rdir = $ENV{'RDIR'}; + print $rdir; + if ((not defined($rdir)) or $rdir eq '') { + print("Please source sourceme-f1-manager.sh!\n"); + exit(); + } + + # back up the build sbt fie + copy("$rdir/build.sbt","$rdir/build.sbt.bk") or die "Copy failed: $!"; + + open SBT, ">$rdir/build.sbt"; + + # hash of all hls bm and its path + my $soc_name = $_[0]; + my %bm_path = %{$_[1]}; + + my $build_sbt_template = "$rdir/tools/centrifuge/scripts/build_sbt_template"; + open my $fh, '<', $build_sbt_template or die "error opening $build_sbt_template $!"; + my $build_sbt = do { local $/; <$fh> }; + + # print list of hls accels + my $dep_template='lazy val BM = (project in file("PATH")) + .dependsOn(rocketchip, testchipip, midasTargetUtils, icenet) + .settings(commonSettings) + '; + + keys %bm_path; + + while(my($bm, $path) = each %bm_path) { + my $dep = $dep_template; + $dep =~ s/BM/$bm/; + $dep =~ s/PATH/$path/; + $build_sbt = $build_sbt."\n".$dep; + } + + my @bm = (keys %bm_path); + my $bm_size = @bm; + my $bms = ''; + if ($bm_size > 0) { + $bms = ", ".join(", ", @bm); + } + + my $soc_template = ' +lazy val SOC_NAME = conditionalDependsOn(project in file("generators/SOC_NAME")) + .dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilitiesBMS) + .settings(commonSettings) +'; + my $soc = $soc_template; + $soc =~ s/SOC_NAME/$soc_name/g; + $soc =~ s/BMS/$bms/; + $build_sbt = $build_sbt.$soc; + my $firechip_template = ' +lazy val example = conditionalDependsOn(project in file("generators/example")) + .dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, sha3, SOC_NAME) + .settings(commonSettings) + +lazy val firechip = (project in file("generators/firechip")) + .dependsOn(example, icenet, testchipip, tracegen, midasTargetUtils, midas, firesimLib % "test->test;compile->compile") + .settings( + commonSettings, + testGrouping in Test := isolateAllTests( (definedTests in Test).value ) + ) +'; + my $firechip_dep = $firechip_template; + $firechip_dep =~ s/SOC_NAME/$soc_name/g; + $build_sbt = $build_sbt.$firechip_dep; + + print SBT $build_sbt; + close SBT; +} + +1; diff --git a/scripts/generate_config.pl b/scripts/generate_config.pl new file mode 100644 index 0000000..0cd6bcf --- /dev/null +++ b/scripts/generate_config.pl @@ -0,0 +1,119 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Cwd; +use File::Copy; + +sub generate_config { + + my $rdir = $ENV{'RDIR'}; + print $rdir; + if ((not defined($rdir)) or $rdir eq '') { + print("Please source sourceme-f1-manager.sh!\n"); + exit(); + } + + my @rocc_func_names = @{$_[0]}; + my @tll2_func_names = @{$_[1]}; + + my $postfix = ""; + my $num_args = scalar @_; + if ($num_args > 2) { + $postfix= $_[2]; + } + + my $rocc = ""; + #if (@rocc_func_names > 0) { + $rocc .= " +class WithHLSRoCCExample extends Config((site, here, up) => { + case BuildRoCC => Seq( +"; + #} + for( my $i = 0; $i < @rocc_func_names; $i = $i + 1 ){ + if ($i ne 0) { $rocc.=", +";} + $rocc .=" + (p: Parameters) => { + val hls_$rocc_func_names[$i] = LazyModule(new HLS$rocc_func_names[$i]Control(OpcodeSet.custom$i)(p)) + hls_$rocc_func_names[$i] + }"; + } + + if (scalar @rocc_func_names ne 0) { $rocc.=", +";} + $rocc .= " + (p: Parameters) => { + val translator = LazyModule(new TranslatorExample(OpcodeSet.custom3)(p)) + translator + })"; + + #if (@rocc_func_names > 0) { + $rocc .= "\n})\n"; + #} + + open CONFIG, ">$rdir/generators/example/src/main/scala/HLSConfig.scala" or die "$!\n"; + my $config="package example +import chisel3._ +import freechips.rocketchip.diplomacy.{LazyModule, ValName} +import freechips.rocketchip.config.{Parameters, Config} +import testchipip.{WithBlockDevice, BlockDeviceKey, BlockDeviceConfig} +import freechips.rocketchip.tile._ +import freechips.rocketchip.subsystem._ +import freechips.rocketchip.system.DefaultConfig +import freechips.rocketchip.rocket._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.devices.tilelink._ +import freechips.rocketchip._ +import testchipip._ +import sifive.blocks.devices.uart.{PeripheryUARTKey,UARTParams} + +import sifive.blocks.devices.uart._ +import java.io.File +import ConfigValName._ +"; + foreach my $func_name (@rocc_func_names) { + $config .= "import hls_$func_name.HLS$func_name"."Control\n"; + } + foreach my $func_name (@tll2_func_names) { + $config .= "import hls_$func_name._\n"; + } + + $config .= $rocc; + $config .=" +class HLSRocketConfig extends Config( + new WithHLSTop ++ + new WithBootROM ++ + new freechips.rocketchip.subsystem.WithInclusiveCache ++ + new WithHLSRoCCExample ++ + new freechips.rocketchip.subsystem.WithNBigCores(1) ++ + new freechips.rocketchip.system.BaseConfig) +"; + + $config .=" + +class WithHLSTop extends Config((site, here, up) => { + case BuildTop => (clock: Clock, reset: Bool, p: Parameters) => + Module(LazyModule(new TopWithHLS()(p)).module) + }) + +class TopWithHLS(implicit p: Parameters) extends Top "; + + foreach my $func_name (@tll2_func_names) { + $config .= "\n with HasPeripheryHLS$func_name"."AXI"; + } + + $config .=' { + override lazy val module = new TopWithHLSModule(this) +} + +class TopWithHLSModule(l: TopWithHLS) extends TopModule(l) +'; + foreach my $func_name (@tll2_func_names) { + $config .= " with HasPeripheryHLS$func_name"."AXIImp\n"; + } + + print CONFIG $config; + + close CONFIG; +} +1; diff --git a/scripts/generate_soc.pl b/scripts/generate_soc.pl new file mode 100644 index 0000000..52a2b85 --- /dev/null +++ b/scripts/generate_soc.pl @@ -0,0 +1,165 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Cwd; +use File::Copy; + +my $dir = getcwd; +my $json_fn = $ARGV[0]; +my $soc_name = $json_fn; +$soc_name =~ s/.json//; +my $rdir = $ENV{'RDIR'}; + +my $postfix=""; + +my $num_args = $#ARGV + 1; +if ($num_args > 1) { + $postfix= $ARGV[1]; +} + +if ((not defined($rdir)) or $rdir eq '') { + print("Please source sourceme-f1-manager.sh!\n"); + exit(); +} + +if (not defined($json_fn)){ + print("Please specify a json config file\!\n"); + exit(); +} + +my $scripts_dir = $rdir.'/tools/centrifuge/scripts/'; +require $scripts_dir.'parse_json.pl'; +require $scripts_dir.'generate_accel.pl'; +require $scripts_dir.'generate_build_sbt.pl'; +require $scripts_dir.'generate_config.pl'; +require $scripts_dir.'generate_f1_scripts.pl'; +require $scripts_dir.'generate_xsim_scripts.pl'; + +# Parse Json file +my ($RoCC_ref, $TLL2_ref) = parse_json($json_fn); +my @RoCC_accels = @$RoCC_ref; +my @TLL2_accels = @$TLL2_ref; + +my %hls_bm = (); +my @Accel_tuples = (); +my @RoCC_names = (); +my @TLL2_names = (); +my $idx = 0; +foreach my $RoCC_accel (@RoCC_accels){ + my @arr = @{$RoCC_accel}; + my $pgm = $arr[0]; + my $func = $arr[1]; + + my $bm_path = ""; + if (scalar @arr > 2) { + $bm_path = $arr[2]; + } else { + $bm_path = $rdir."/generators/$soc_name/hls_$pgm"."_$func"; ; + } + my $prefix = "rocc".$idx."_"; + # 3rd arg is_rocc is set to 1 + push(@Accel_tuples, [$pgm, $func, $bm_path, 1, $idx, $prefix]); + $func=$prefix.$func; + $hls_bm{"hls_$func"} = $bm_path; + push(@RoCC_names, $func); + $idx += 1; +} + +$idx = 0; +foreach my $TLL2_accel (@TLL2_accels){ + my @arr = @{$TLL2_accel}; + my $pgm = $arr[0]; + my $func = $arr[1]; + my $addr = $arr[2]; + + my $bm_path = ""; + if (scalar @arr > 3) { + $bm_path = $arr[3]; + } else { + $bm_path = $rdir."/generators/$soc_name/hls_$pgm"."_$func"; ; + } + + my $prefix = "tl".$idx."_"; + push(@Accel_tuples, [$pgm, $func, $bm_path, 0, $addr, $prefix]); + $func=$prefix.$func; + $hls_bm{"hls_$func"} = $bm_path; + push(@TLL2_names, $func); + $idx += 1; +} + +# Generate the verilog and chisel code +generate_accel(\@Accel_tuples); +# Generate build.sbt under firesim/sim +generate_build_sbt($soc_name, \%hls_bm); +# Generate HLSConfig file for RoCC Accelerators +generate_config(\@RoCC_names, \@TLL2_names, $postfix); + +# F1 +#generate_f1_scripts(\%hls_bm); +#generate_xsim_scripts(\%hls_bm); +#compile_xsim_libs($postfix, "clean", 0); +#compile_replace_rtl($postfix, "clean", 0); +#print_xsim_cmd($postfix, 0); + +# Ax machines +#compile_vcs("clean"); +#copy_verilog(\%hls_bm, "$rdir/sim/generated-src/f1/FireSimHLS-HLSFireSimRocketChipConfig-FireSimConfig/FPGATop.v"); + +sub print_xsim_cmd{ + my $postfix= $_[0]; + my $with_nic = $_[1]; + my $nic = "NoNIC"; + if ($with_nic) { + $nic = ""; + } + print("\n"); + print("Source Full Env:\n source sourceme-f1-full.sh\n"); + print("XSim Compile:\n".'cd $RDIR/sim '."&& make DESIGN=FireSimHLS$nic TARGET_CONFIG=HLSFireSimRocketChipConfig$postfix PLATFORM_CONFIG=FireSimConfig xsim\n"); + #cl_FireSimHLSNoNIC-HLSFireSimRocketChipConfig-FireSimConfig/verif + print("Remove Sim Folder:\n".'rm -rf cl_'."FireSimHLS$nic-HLSFireSimRocketChipConfig$postfix-FireSimConfig/verif/sim\n"); + print("XSim Run Driver:\n".'cd $RDIR/sim '."&& make DESIGN=FireSimHLS$nic TARGET_CONFIG=HLSFireSimRocketChipConfig$postfix PLATFORM_CONFIG=FireSimConfig xsim-dut\n"); + print("XSim Run Test:\n".'cd $RDIR/sim '."&& make DESIGN=FireSimHLS$nic TARGET_CONFIG=HLSFireSimRocketChipConfig$postfix PLATFORM_CONFIG=FireSimConfig run-xsim SIM_BINARY=".'$RDIR/sim/target-rtl/firechip/hls_${PGM}_${FUNC}/src/main/c/${PGM}.riscv'); + print("\n"); + #print("LD_LIBRARY_PATH=output/f1/FireSimHLS$nic-HLSFireSimRocketChipConfig$postfix-FireSimConfig/ output/f1/FireSimHLS$nic-HLSFireSimRocketChipConfig$postfix-FireSimConfig/FireSimHLS$nic-f1 ".'+mm_readLatency=10 +mm_writeLatency=10 +mm_readMaxReqs=4 +mm_writeMaxReqs=4 +netburst=8 +slotid=0 $RDIR/sim/target-rtl/firechip/hls_${PGM}_${FUNC}/src/main/c/${PGM}.riscv'); +} + +sub compile_xsim_libs{ + my $postfix= $_[0]; + my $clean = $_[1]; + my $with_nic = $_[2]; + my $nic = "NoNIC"; + if ($with_nic) { + $nic = ""; + } + chdir("$rdir/sim"); + system("make DESIGN=FireSimHLS$nic TARGET_CONFIG=HLSFireSimRocketChipConfig$postfix PLATFORM_CONFIG=FireSimConfig $clean xsim"); +} + +sub compile_replace_rtl{ + my $postfix= $_[0]; + my $clean = $_[1]; + my $with_nic = $_[2]; + my $nic = "NoNIC"; + if ($with_nic) { + $nic = ""; + } + chdir("$rdir/sim"); + system("make DESIGN=FireSimHLS$nic TARGET_CONFIG=HLSFireSimRocketChipConfig$postfix PLATFORM_CONFIG=FireSimConfig $clean replace-rtl"); +} + +sub compile_vcs{ + my $clean = $_[0]; + chdir("$rdir/sims/vcs"); + system("make $clean debug CONFIG="); +} + +sub copy_verilog{ + my %bm_path = %{$_[0]}; + my $FPGATop_path = $_[1]; + + while(my($bm, $path) = each %bm_path) { + system("cat $path/src/main/verilog/*.v >> $FPGATop_path"); + } +} + + diff --git a/scripts/generate_wrapper.pl b/scripts/generate_wrapper.pl new file mode 100644 index 0000000..df93058 --- /dev/null +++ b/scripts/generate_wrapper.pl @@ -0,0 +1,327 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Cwd; +use File::Copy; +use List::Util qw(first); + +# Inputs: file_name, func_name, rocc_index, prefix(Optional) +my $dir = getcwd; +my $file_name = $ARGV[0]; +my $func_name = $ARGV[1]; +my $rocc_index= $ARGV[2]; + +my $prefix = undef; + +my $num_args = $#ARGV + 1; +if ($num_args > 3) { + $prefix = $ARGV[3]; +} + +my $rdir = $ENV{'RDIR'}; +#print $rdir; +if ((not defined($rdir)) or $rdir eq '') { + print("Please source sourceme-f1.sh!\n"); + exit(); +} +my $bm_path = $rdir."/sim/target-rtl/firechip/hls_$file_name"."_$func_name"; +my $wrapper_func_name = $func_name."_wrapper"; +my $wrapper_header= "bm_wrapper.h"; +if ($prefix) { + $func_name = $prefix.$func_name; +} + +#############################PARSE Verilog############################## +my $verilog_file = "$dir/../verilog/$func_name".".v"; +my $line = undef; +my @verilog_input = (); +my @verilog_input_size = (); +my @verilog_output = (); +my @verilog_output_size = (); + +print "Parsing ".$verilog_file."\n"; +# parse the verilog file to get the info we need +if(!open VERILOG, "$verilog_file"){ + print $!; +} else { + while(){ + $line = $_; + if($line =~ m/^\s*input\s+(.*)/){ + my $input = $1; + #print "input:$input\n"; + if($input =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){ + my $end = $1; + my $start = $2; + my $input_name = $3; + #print "here!"."$input_name\n"; + push (@verilog_input, $input_name); + my $size = $end - $start + 1; + push(@verilog_input_size, $size); + }elsif ($input =~ m/\s*(.*)\s*;/){ + my $input_name = $1; + #print "here!"."$input_name\n"; + push (@verilog_input, $input_name); + push(@verilog_input_size, 1); + } + + }elsif($line =~ m/^\s*output\s+(.*)/){ + my $output = $1; + #print "output:$output\n"; + if($output =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){ + my $end = $1; + my $start = $2; + my $output_name = $3; + #print "here!"."$output_name\n"; + push(@verilog_output, $output_name); + my $size = $end - $start + 1; + push(@verilog_output_size, $size); + }elsif ($output =~ m/\s*(.*)\s*;/){ + my $output_name = $1; + #print "here!"."$output_name\n"; + push (@verilog_output, $output_name); + push(@verilog_output_size, 1); + } + } + } + print("Inputs:"); + my $in_str = join ' ', @verilog_input; + print $in_str."\n"; + print("Outputs:"); + my $out_str = join ' ', @verilog_output; + print $out_str."\n"; +} + +#creat scala folder +my $scala_dir = "$dir/../scala"; +mkdir $scala_dir unless (-d $scala_dir); + +############################################################################################################################## +print "Generating BlackBox file ...\n"; +# should be under scala folder + +my $blackbox1 = " +package hls_test_c +import Chisel._ +import freechips.rocketchip.config.{Parameters, Field} +import freechips.rocketchip.tile._ +import freechips.rocketchip.util._ +import vivadoHLS._ + +class test_c() extends BlackBox() { +"; +$blackbox1 =~ s/test_c/$func_name/g; + + +my $i = undef; +my $bb_body = ""; + +# now if the input name does not start with ap, we assume it is an arg +my $ap_return = 0; +my $ap_clk = 0; +my $ap_rst = 0; +my @verilog_input_scalar = (); +my %verilog_input_pointer = (); +my @verilog_input_pointer_arg = (); # An ordered list of args + +my $arg_count = 0; +my @sindices = (); +my @pindices = (); + +for( $i = 0; $i < @verilog_input; $i = $i + 1 ){ + my $input_name = $verilog_input[$i]; + my $input_size = $verilog_input_size[$i]; + + if ($input_name =~ m/ap_clk(.*)/){ + $ap_clk = 1; + } + + + elsif ($input_name =~ m/ap_rst(.*)/){ + $ap_rst = 1; + } + + # If the input is a ap_bus port, the signals should match the following format + # There should be 3 different input signals + elsif($input_name =~ m/(\S+)_req_full_n/ or $input_name =~ m/(\S+)_rsp_empty_n/ or $input_name =~ m/(\S+)_datain/){ + my $arg_name = $1; + if ($input_name =~ m/(\S+)_datain/) { + push(@pindices, $arg_count); + $arg_count = $arg_count + 1; + push(@verilog_input_pointer_arg, $arg_name); + } + if (defined $verilog_input_pointer{$arg_name}) { + $verilog_input_pointer{$arg_name} += 1; + } else { + $verilog_input_pointer{$arg_name} = 1; + } + } + elsif(!($input_name =~ m/ap_(,*)/)){ + push (@verilog_input_scalar, $input_name); + push(@sindices, $arg_count); + $arg_count = $arg_count + 1; + } + else{ + print("Not func args: $input_name\n"); + } +} + +#foreach my $arg (keys %verilog_input_pointer) { +foreach my $arg (@verilog_input_pointer_arg) { + print("pointer_arg: $arg\n"); +} +my $hash_count = keys %verilog_input_pointer; +print("hash_count: $hash_count\n"); +if(@verilog_input_scalar + $hash_count> 2){ + print "verilog_input_scalar: "; + my $in_str = join ' ', @verilog_input_scalar; + print $in_str."\n"; + die "Only accept function with no more than 2 arguments!\n"; +} + +foreach my $arg (keys %verilog_input_pointer) { + if ($verilog_input_pointer{$arg} ne 3) { + die "The AP bus interfance did not generate expected number of inputs!\n"; + } +} + +for( $i = 0; $i < @verilog_output; $i = $i + 1 ){ + + my $output_name = $verilog_output[$i]; + my $output_size = $verilog_output_size[$i]; + + if ($output_name =~ m/ap_return(.*)/){ + $ap_return = 1; + } + + $bb_body = $bb_body."\tio.".$output_name.".setName(\"".$output_name."\")\n"; +} + +if ($ap_clk eq 1){ + $bb_body = $bb_body."addClock(Driver\.implicitClock)\n".'renameClock("clk", "ap_clk")'."\n"; +} + +if ($ap_rst eq 1){ + $bb_body = $bb_body.'renameReset("ap_rst")'."\n"; +} + +my $bb_def = "class HLS$func_name"."Blackbox() extends Module {\n"; + +# Scalar IO Parameter +my @sdata_widths = (); +#my @sindices = (); +#my $sidx = 0; +foreach my $arg (@verilog_input_scalar) { + my $sdata_idx = first { $verilog_input[$_] eq $arg} 0..$#verilog_input; + my $sdata_width = $verilog_input_size[$sdata_idx]; + push(@sdata_widths, $sdata_width); + #push(@sindices, $sidx); + #$sidx += 1; +} +my $sindices_str = join ',',@sindices; +my $sdata_widths_str = join ',',@sdata_widths; +print "scalar data_widths: $sdata_widths_str\n"; + +$bb_def .= "\tval scalar_io_dataWidths = List($sdata_widths_str)\n"; +$bb_def .= "\tval scalar_io_argLoc = List($sindices_str) //Lists the argument number of the scalar_io\n"; + +# Pointer IO Parameter +my @addr_widths = (); +my @data_widths = (); +#my @indices = (); +my $idx = 0; +foreach my $arg (sort keys %verilog_input_pointer) { + my $addr_signal = $arg."_address"; + my $data_signal = $arg."_dataout"; + my $addr_idx = first { $verilog_output[$_] eq $addr_signal } 0..$#verilog_output; + my $data_idx = first { $verilog_output[$_] eq $data_signal } 0..$#verilog_output; + #my $addr_width = $verilog_output_size[$addr_idx]; + my $addr_width = "64"; + + my $data_width = $verilog_output_size[$data_idx]; + push(@addr_widths, $addr_width); + push(@data_widths, $data_width); + #push(@indices, $idx); + $idx += 1; +} +#my $indices_str = join ',',@indices; +my $pindices_str = join ',',@pindices; +my $addr_widths_str = join ',',@addr_widths; +print "addr_widths: $addr_widths_str\n"; +my $data_widths_str = join ',',@data_widths; +print "data_widths: $data_widths_str\n"; + + +foreach my $arg (@verilog_input_pointer_arg) { + print("pointer_arg: $arg\n"); +} + +my $wrapper =' +#ifdef CUSTOM_INST +#include "rocc.h" +#endif +'; + +my $return_type = "void "; +if($ap_return){ + $return_type = "uint64_t "; +} + +my $total_args = @verilog_input_scalar + $hash_count; +$wrapper .= "$return_type $wrapper_func_name("; + +my @args = (); +foreach my $arg (@verilog_input_scalar) { + push(@args, $arg); +} +foreach my $arg (@verilog_input_pointer_arg) { + push(@args, $arg); +} + +my $arg_str = join ', ', @args; +my $i = 0; +foreach my $arg (@args) { + if ($i != 0){ + $wrapper .=", " + } + $wrapper .="uint64_t $arg"; + + $i=1; +} +$wrapper .= ") { +"; + +if($ap_return){ + $wrapper .= " uint64_t ret_val;\n"; +} +$wrapper .=" + #ifdef CUSTOM_INST + #define XCUSTOM_ACC "; +$wrapper .= $rocc_index."\n"; + +if ($ap_return){ + if ($total_args == 0) { + $wrapper.=" ROCC_INSTRUCTION_D(XCUSTOM_ACC, ret_val, 0);\n"; + } elsif ($total_args == 1) { + $wrapper.=" ROCC_INSTRUCTION_DS(XCUSTOM_ACC, ret_val, $arg_str, 0);\n"; + } else { + $wrapper.=" ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, ret_val, $arg_str, 0);\n"; + } +} else{ + if ($total_args == 0) { + $wrapper.=" ROCC_INSTRUCTION(XCUSTOM_ACC, 0);\n"; + } elsif ($total_args == 1) { + $wrapper.=" ROCC_INSTRUCTION_S(XCUSTOM_ACC, $arg_str, 0);\n"; + } else { + $wrapper.=" ROCC_INSTRUCTION_SS(XCUSTOM_ACC, $arg_str, 0);\n"; + } +} +$wrapper .= " ROCC_BARRIER();\n"; +$wrapper.=" #endif\n"; +if($ap_return){ + $wrapper .= " return ret_val;\n"; +} +$wrapper.="}"; + +open FILE, "> $wrapper_header"; +print FILE $wrapper; + diff --git a/scripts/generate_wrapper_tl.pl b/scripts/generate_wrapper_tl.pl new file mode 100644 index 0000000..fee515e --- /dev/null +++ b/scripts/generate_wrapper_tl.pl @@ -0,0 +1,208 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Cwd; +use File::Copy; +use List::Util qw(first); +use Tie::IxHash; + +# Inputs: file_name, func_name, func_base_addr, prefix(Optional) +my $dir = getcwd; +my $file_name = $ARGV[0]; +my $func_name = $ARGV[1]; +my $func_base_addr = $ARGV[2]; + +my $prefix = undef; + +my $num_args = $#ARGV + 1; +if ($num_args > 3) { + $prefix = $ARGV[3]; +} + +my $rdir = $ENV{'RDIR'}; +#print $rdir; +if ((not defined($rdir)) or $rdir eq '') { + print("Please source sourceme-f1.sh!\n"); + exit(); +} + +my $bm_path = $rdir."/sim/target-rtl/firechip/hls_$file_name"."_$func_name"; +my $wrapper_func_name = $func_name."_wrapper"; +my $wrapper_header= "bm_wrapper.h"; + +if ($prefix) { + $func_name = $prefix.$func_name; +} + +my $bm_inc_path = $rdir."/hls/sw/bm/"; +#############################PARSE Verilog############################## + +my %var_dict; +tie %var_dict, "Tie::IxHash"; +my $verilog_file = "$dir/../verilog/$func_name"."_control_s_axi.v"; +print "Parsing ".$verilog_file."\n"; +# parse the verilog file to get the info we need +if(!open VERILOG, "$verilog_file"){ + print $!; +} else { + my $start = 0; + my $line = undef; + while(){ + $line = $_; + + if($line =~ m/------------------------Parameter----------------------/){ + $start = 0; + } + if($start){ + + if($line =~ m/(0x\S+) : Data signal of (\S+)/){ + my $base_addr = $1; + my $var = $2; + #print("$base_addr : $var\n"); + if (exists $var_dict{$var}) { + push (@{$var_dict{$var}}, $base_addr); + } else { + my @addr = (); + push (@addr, $base_addr); + $var_dict{$var} = \@addr; + } + } + + + } + if($line =~ m/------------------------Address Info------------------/){ + $start= 1; + } + } +} + + +#############################GENERATE Software Bare-metal Wrappers############################## +# We want ordered hash so we didn't add this piece of code into a func +#sub generate_bm_wrapper { +# my %var_dict=%{$_[0]}; +# tie %var_dict, "Tie::IxHash"; +# my $func_base_addr = $_[1]; + foreach my $var (keys %var_dict) { + print($var.": "); + + my @addr = @{$var_dict{$var}}; + foreach my $base_addr(@addr) { + + print($base_addr."\t"); + } + print("\n"); + } + my $wrapper = '#include "'.$bm_inc_path.'/mmio.h"'."\n"; + #$wrapper .= '#include "'.$bm_inc_path.'/time.h"'."\n"; + + $wrapper .= '#define ACCEL_BASE '.$func_base_addr."\n"; + + $wrapper .= "#define AP_DONE_MASK 0b10\n"; + $wrapper .= "#define ACCEL_INT 0x4\n"; + foreach my $var (keys %var_dict) { + + my @addr = @{$var_dict{$var}}; + my $idx = 0; + + foreach my $base_addr(@addr) { + $wrapper .="#define "."ACCEL_$var"."_$idx"." $base_addr\n"; + $idx +=1; + } + } + + my $ap_return = 0; + my $ap_return_type = "uint32_t"; + if (exists $var_dict{"ap_return"}) { + my $size=@{$var_dict{"ap_return"}}; + if ($size == 2){ + $ap_return_type = "uint64_t"; + } + $ap_return = 1; + } + + if ($ap_return){ + $wrapper .= $ap_return_type." $wrapper_func_name("; + } else { + $wrapper .="void $wrapper_func_name("; + } + + my @arglist=(); + foreach my $var (keys %var_dict) { + if ($var eq "ap_return") { + next; + } + + my $var_type = "uint32_t"; + my $size=@{$var_dict{$var}}; + if ($size == 2){ + $var_type = "uint64_t"; + } + push(@arglist, "$var_type $var"); + } + + my $args = join ', ', @arglist; + $wrapper.= $args.") {"; + + $wrapper.= ' + // Disable Interrupt + reg_write32(ACCEL_BASE + ACCEL_INT, 0x0); +'; + + foreach my $var (keys %var_dict) { + if ($var eq "ap_return") { + next; + } + + my @addr = @{$var_dict{$var}}; + my $idx = 0; + foreach my $base_addr(@addr) { + my $shift = ""; + if ($idx == 1){ + $shift = " >> 32"; + }elsif($idx > 1){ + die "Index exceeds limit!\n"; + } + $wrapper .=" reg_write32(ACCEL_BASE + ACCEL_$var"."_$idx, (uint32_t) ($var$shift));\n"; + $idx +=1; + } + } + + + $wrapper .=' + // Write to ap_start to start the execution + reg_write32(ACCEL_BASE, 0x1); + + // Done? + int done = 0; + while (!done){ + done = reg_read32(ACCEL_BASE) & AP_DONE_MASK; + } +'; + + # If there a return value + if ($ap_return){ + my @addr = @{$var_dict{"ap_return"}}; + + $wrapper .= " + $ap_return_type ret_val = 0;\n"; + my $idx = 0; + foreach my $base_addr(@addr) { + my $shift = ""; + if ($idx == 1){ + $shift = " >> 32"; + }elsif($idx > 1){ + die "Index exceeds limit!\n"; + } + $wrapper .=" ret_val = (reg_read32(ACCEL_BASE + ACCEL_ap_return"."_$idx)$shift) | ret_val;\n"; + $idx +=1; + } + $wrapper .= " return ret_val;\n"; + } + + $wrapper .="}\n"; + open FILE, "> $wrapper_header"; + print FILE $wrapper; +#} + +#generate_bm_wrapper(\%var_dict, $func_base_addr); diff --git a/scripts/parse_json.pl b/scripts/parse_json.pl new file mode 100644 index 0000000..d2cf98b --- /dev/null +++ b/scripts/parse_json.pl @@ -0,0 +1,59 @@ +#!/usr/bin/perl +use strict; +use warnings; +use JSON qw( decode_json ); +use Cwd; +use File::Copy; + +# Take in 1 arg which is the json file path +# Return two arrays of arrays +sub parse_json { + my $json_fn = $_[0]; + open my $fh, '<', $json_fn or die "error opening $json_fn: $!"; + my $json = do { local $/; <$fh> }; + + my $decoded = decode_json($json); + + my @RoCC_accels = (); + my $i; + print("\nRoCC Accels: \n"); + for( $i = 0; $i < 4; $i = $i + 1 ){ + + if ((exists $decoded -> {'RoCC'}{"custom$i"}{'pgm'}) and (exists $decoded -> {'RoCC'}{"custom$i"}{'func'} )){ + my $pgm = $decoded -> {'RoCC'}{"custom$i"}{'pgm'}; + my $func = $decoded -> {'RoCC'}{"custom$i"}{'func'}; + if(($pgm ne "") and ($func ne "")){ + print("\tpgm: $pgm\t func: $func\n"); + my @tup = (); + push (@tup, $pgm); + push (@tup, $func); + push (@RoCC_accels, \@tup); + } + } + } + + print("TLL2 Accels: \n"); + my @TLL2_accels = (); + if (exists $decoded -> {'TLL2'}){ + my @TLL2_arr = @{$decoded-> {'TLL2'}}; + foreach my $accel (@TLL2_arr) { + if( (exists $accel->{'pgm'}) and (exists $accel->{'func'} and (exists $accel->{'addr'}))){ + my $pgm = $accel->{'pgm'}; + my $func = $accel->{'func'}; + my $addr = $accel->{'addr'}; + if ($pgm ne "" and $func ne "" and $addr ne ""){ + + print("\tpgm: $pgm\t func: $func\t addr: $addr\n"); + my @tup = (); + push (@tup, $pgm); + push (@tup, $func); + push (@tup, $addr); + push (@TLL2_accels, \@tup); + } + } + } + } + return (\@RoCC_accels, \@TLL2_accels); +} + +1; diff --git a/scripts/run_chisel.pl b/scripts/run_chisel.pl new file mode 100644 index 0000000..e79600c --- /dev/null +++ b/scripts/run_chisel.pl @@ -0,0 +1,724 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Cwd; +use File::Copy; +use List::Util qw(first); + +my $dir = getcwd; +my $file_name = $ARGV[0]; +my $func_name = $ARGV[1]; +my $rdir = $ENV{'RDIR'}; + +my $prefix = undef; + +my $num_args = $#ARGV + 1; +if ($num_args > 2) { + $prefix = $ARGV[2]; +} + +if ($prefix) { + $func_name = $prefix.$func_name; +} + +#print $rdir; +if ((not defined($rdir)) or $rdir eq '') { + print("Please source sourceme-f1.sh!\n"); + exit(); +} + + +my $verilog_file = "$dir/../verilog/$func_name".".v"; +my $line = undef; +my @verilog_input = (); +my @verilog_input_size = (); +my @verilog_output = (); +my @verilog_output_size = (); + +print "Parsing ".$verilog_file."\n"; +# parse the verilog file to get the info we need +if(!open VERILOG, "$verilog_file"){ + print $!; +} else { + while(){ + $line = $_; + if($line =~ m/^\s*input\s+(.*)/){ + my $input = $1; + #print "input:$input\n"; + if($input =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){ + my $end = $1; + my $start = $2; + my $input_name = $3; + #print "here!"."$input_name\n"; + push (@verilog_input, $input_name); + my $size = $end - $start + 1; + push(@verilog_input_size, $size); + }elsif ($input =~ m/\s*(.*)\s*;/){ + my $input_name = $1; + #print "here!"."$input_name\n"; + push (@verilog_input, $input_name); + push(@verilog_input_size, 1); + } + + }elsif($line =~ m/^\s*output\s+(.*)/){ + my $output = $1; + #print "output:$output\n"; + if($output =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){ + my $end = $1; + my $start = $2; + my $output_name = $3; + #print "here!"."$output_name\n"; + push(@verilog_output, $output_name); + my $size = $end - $start + 1; + push(@verilog_output_size, $size); + }elsif ($output =~ m/\s*(.*)\s*;/){ + my $output_name = $1; + #print "here!"."$output_name\n"; + push (@verilog_output, $output_name); + push(@verilog_output_size, 1); + } + } + } + print("Inputs:"); + my $in_str = join ' ', @verilog_input; + print $in_str."\n"; + print("Outputs:"); + my $out_str = join ' ', @verilog_output; + print $out_str."\n"; +} + +#creat scala folder +my $scala_dir = "$dir/../scala"; +mkdir $scala_dir unless (-d $scala_dir); + +############################################################################################################################## +print "Generating BlackBox file ...\n"; +# should be under scala folder +open BB, ">$scala_dir/$func_name"."_blackbox.scala"; + +my $blackbox1 = " +package hls_test_c +import Chisel._ +import chisel3.experimental.dontTouch +import freechips.rocketchip.config.{Parameters, Field} +import freechips.rocketchip.tile._ +import freechips.rocketchip.util._ +import vivadoHLS._ + +class test_c() extends BlackBox() { +"; +$blackbox1 =~ s/test_c/$func_name/g; + +print BB $blackbox1; + +print BB "\tval io = new Bundle {\n"; +my $i = undef; +my $bb_body = ""; + +# now if the input name does not start with ap, we assume it is an arg +my $ap_return = 0; +my $ap_clk = 0; +my $ap_rst = 0; +my @verilog_input_scalar = (); +my %verilog_input_pointer = (); +my @verilog_input_pointer_arg = (); # An ordered list of args + +my $arg_count = 0; +my @sindices = (); +my @pindices = (); + +for( $i = 0; $i < @verilog_input; $i = $i + 1 ){ + my $input_name = $verilog_input[$i]; + my $input_size = $verilog_input_size[$i]; + + if ($input_name =~ m/ap_clk(.*)/){ + $ap_clk = 1; + } + + + elsif ($input_name =~ m/ap_rst(.*)/){ + $ap_rst = 1; + } + + # If the input is a ap_bus port, the signals should match the following format + # There should be 3 different input signals + elsif($input_name =~ m/(\S+)_req_full_n/ or $input_name =~ m/(\S+)_rsp_empty_n/ or $input_name =~ m/(\S+)_datain/){ + my $arg_name = $1; + if ($input_name =~ m/(\S+)_datain/) { + push(@pindices, $arg_count); + $arg_count = $arg_count + 1; + push(@verilog_input_pointer_arg, $arg_name); + } + if (defined $verilog_input_pointer{$arg_name}) { + $verilog_input_pointer{$arg_name} += 1; + } else { + $verilog_input_pointer{$arg_name} = 1; + } + } + elsif(!($input_name =~ m/ap_(,*)/)){ + push (@verilog_input_scalar, $input_name); + push(@sindices, $arg_count); + $arg_count = $arg_count + 1; + } + else{ + print("Not func args: $input_name\n"); + } + + print BB "\t\tval $input_name = "; + if ($input_name =~ m/ap_clk(.*)/){ + print BB "Clock\(INPUT\)\n"; + }else{ + if ($input_size == 1){ + print BB "Bool\(INPUT\)\n"; + }else{ + print BB "Bits\(INPUT, width = $input_size\)\n"; + } + } + if($input_name ne "ap_clk" && $input_name ne "ap_rst"){ + $bb_body = $bb_body."\tio.".$input_name.".setName(\"".$input_name."\")\n"; + } +} + +#foreach my $arg (keys %verilog_input_pointer) { +foreach my $arg (@verilog_input_pointer_arg) { + print("pointer_arg: $arg\n"); +} +my $hash_count = keys %verilog_input_pointer; +print("hash_count: $hash_count\n"); +if(@verilog_input_scalar + $hash_count> 2){ + print "verilog_input_scalar: "; + my $in_str = join ' ', @verilog_input_scalar; + print $in_str."\n"; + die "Only accept function with no more than 2 arguments!\n"; +} + +foreach my $arg (keys %verilog_input_pointer) { + if ($verilog_input_pointer{$arg} ne 3) { + die "The AP bus interfance did not generate expected number of inputs!\n"; + } +} + +for( $i = 0; $i < @verilog_output; $i = $i + 1 ){ + + my $output_name = $verilog_output[$i]; + my $output_size = $verilog_output_size[$i]; + + if ($output_name =~ m/ap_return(.*)/){ + $ap_return = 1; + } + + print BB "\t\tval $output_name = "; + if ($output_size == 1){ + print BB "Bool(OUTPUT)\n"; + }else{ + print BB "Bits(OUTPUT, width = $output_size)\n"; + } + + $bb_body = $bb_body."\tio.".$output_name.".setName(\"".$output_name."\")\n"; +} + +if ($ap_clk eq 1){ + $bb_body = $bb_body."addClock(Driver\.implicitClock)\n".'renameClock("clk", "ap_clk")'."\n"; +} + +if ($ap_rst eq 1){ + $bb_body = $bb_body.'renameReset("ap_rst")'."\n"; +} + +print BB "\t}\n"; +#print BB "$bb_body\n"; +#print BB "moduleName = "."\"$func_name\"\n"; +print BB "}\n"; + +my $bb_def = "class HLS$func_name"."Blackbox() extends Module {\n"; + +# Scalar IO Parameter +my @sdata_widths = (); +#my @sindices = (); +#my $sidx = 0; +foreach my $arg (@verilog_input_scalar) { + my $sdata_idx = first { $verilog_input[$_] eq $arg} 0..$#verilog_input; + my $sdata_width = $verilog_input_size[$sdata_idx]; + push(@sdata_widths, $sdata_width); + #push(@sindices, $sidx); + #$sidx += 1; +} +my $sindices_str = join ',',@sindices; +my $sdata_widths_str = join ',',@sdata_widths; +print "scalar data_widths: $sdata_widths_str\n"; + +$bb_def .= "\tval scalar_io_dataWidths = List($sdata_widths_str)\n"; +$bb_def .= "\tval scalar_io_argLoc = List($sindices_str) //Lists the argument number of the scalar_io\n"; + +# Pointer IO Parameter +my @addr_widths = (); +my @data_widths = (); +#my @indices = (); +my $idx = 0; +foreach my $arg (sort keys %verilog_input_pointer) { + my $addr_signal = $arg."_address"; + my $data_signal = $arg."_dataout"; + my $addr_idx = first { $verilog_output[$_] eq $addr_signal } 0..$#verilog_output; + my $data_idx = first { $verilog_output[$_] eq $data_signal } 0..$#verilog_output; + #my $addr_width = $verilog_output_size[$addr_idx]; + my $addr_width = "64"; + + my $data_width = $verilog_output_size[$data_idx]; + push(@addr_widths, $addr_width); + push(@data_widths, $data_width); + #push(@indices, $idx); + $idx += 1; +} +#my $indices_str = join ',',@indices; +my $pindices_str = join ',',@pindices; +my $addr_widths_str = join ',',@addr_widths; +print "addr_widths: $addr_widths_str\n"; +my $data_widths_str = join ',',@data_widths; +print "data_widths: $data_widths_str\n"; + +$bb_def .= "\tval ap_bus_addrWidths = List(".$addr_widths_str.")\n"; +$bb_def .= "\tval ap_bus_dataWidths = List(".$data_widths_str.")\n"; + +#$bb_def .= "\tval ap_bus_argLoc = List(".$indices_str.")\n"; +$bb_def .= "\tval ap_bus_argLoc = List(".$pindices_str.")\n"; + +my $ret_width = 1; +if ($ap_return eq 1){ + my $ret_idx = first { $verilog_output[$_] eq 'ap_return'} 0..$#verilog_output; + $ret_width = $verilog_output_size[$ret_idx]; +} + +$bb_def .= "\tval io = new Bundle { +\tval ap = new ApCtrlIO(dataWidth = $ret_width) +\tval ap_bus = HeterogeneousBag(ap_bus_addrWidths.zip(ap_bus_dataWidths).map { + case (aw, dw) => new ApBusIO(dw, aw) + }) +"; + +if (@verilog_input_scalar > 0){ + $bb_def .="\tval scalar_io = HeterogeneousBag(scalar_io_dataWidths.map(w => UInt(INPUT, width = w)))"; +} +$bb_def .=" +} + +\tval bb = Module(new $func_name()) + +\tbb.io.ap_start := io.ap.start +\tio.ap.done := bb.io.ap_done +\tio.ap.idle := bb.io.ap_idle +\tio.ap.ready := bb.io.ap_ready +"; + +if ($ap_return eq 1) { + $bb_def .= "\tio.ap.rtn := bb.io.ap_return\n"; +} + +if ($ap_rst eq 1) { + $bb_def .= "\tbb.io.ap_rst := reset\n"; +} +if ($ap_clk eq 1) { + $bb_def .= "\tbb.io.ap_clk := clock\n"; +} + +$idx = 0; +#foreach my $arg (keys %verilog_input_pointer) { +foreach my $arg (@verilog_input_pointer_arg) { + $bb_def.="\tio.ap_bus($idx).req.din := bb.io.$arg"."_req_din +\tbb.io.$arg"."_req_full_n := io.ap_bus($idx).req_full_n +\tio.ap_bus($idx).req_write := bb.io.$arg"."_req_write +\tbb.io.$arg"."_rsp_empty_n := io.ap_bus($idx).rsp_empty_n +\tio.ap_bus($idx).rsp_read := bb.io.$arg"."_rsp_read +\tio.ap_bus($idx).req.address := bb.io.$arg"."_address +\tbb.io.$arg"."_datain := io.ap_bus($idx).rsp.datain +\tio.ap_bus($idx).req.dataout := bb.io.$arg"."_dataout +\tio.ap_bus($idx).req.size := bb.io.$arg"."_size +"; + $idx += 1; +} +$idx = 0; +foreach my $arg (@verilog_input_scalar) { + $bb_def .="\tbb.io.$arg := io.scalar_io($idx)\n"; + $idx += 1; +} + +$bb_def .= "}"; + +print BB $bb_def; +close BB; + +############################################################################################################################## +print "Copying Vivado HLS Interface file ...\n"; +copy("$rdir/tools/centrifuge/scripts/chisel_rocc_aux/ap_bus.scala", "$scala_dir/") or die "Copy failed: $!"; + +############################################################################################################################## +print "Copying ROCC Memory Controller file ...\n"; +copy("$rdir/tools/centrifuge/scripts/chisel_rocc_aux/memControllerComponents.scala", "$scala_dir/") or die "Copy failed: $!"; + +############################################################################################################################## +print "Copying Controller Utilities file ...\n"; +copy("$rdir/tools/centrifuge/scripts/chisel_rocc_aux/controlUtils.scala", "$scala_dir/") or die "Copy failed: $!"; + +############################################################################################################################## +print "Generating Control file ...\n"; + +open CT, ">$scala_dir/$func_name"."_accel.scala"; +my $control1 = ' +package hls_test_c +import Chisel._ +import chisel3.experimental.dontTouch +import freechips.rocketchip.config.{Parameters, Field} +import freechips.rocketchip.tile._ +import freechips.rocketchip.config._ +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.rocket._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ +import freechips.rocketchip.system._ + +import vivadoHLS._ +import memControl._ +import hls_test_c._ + +class HLStest_cControl(opcodes: OpcodeSet)(implicit p: Parameters) extends LazyRoCC(opcodes) { + override lazy val module = new HLStest_cControlModule(this) +} + +class HLStest_cControlModule(outer: HLStest_cControl)(implicit p: Parameters) extends LazyRoCCModuleImp(outer) + with HasCoreParameters { +'; + +$control1 =~ s/test_c/$func_name/g; +print CT $control1; +#TODO modify accelerator arg! +my $control2 = ' +val result = Reg(init=Bits(0, width=xLen)) +val respValid = Reg(init=Bool(false)) +val rdy = Reg(init=Bool(true)) +val busy = Reg(init=Bool(false)) +val bufferedCmd = Reg(init=Wire( new RoCCCommand()(p))) + +val cmd = Queue(io.cmd) +val funct = bufferedCmd.inst.funct +val rs1 = bufferedCmd.rs1 +val rs2 = bufferedCmd.rs2 +val rdTag = bufferedCmd.inst.rd +val doAdd = funct === UInt(0) + +val rs1_unbuffered = cmd.bits.rs1 +val rs2_unbuffered = cmd.bits.rs2 + +val idle :: working :: Nil = Enum(UInt(),2) +val state = Reg(init=idle) + +when(reset.toBool){ + bufferedCmd.inst.funct := 0.asUInt(7.W) + bufferedCmd.inst.rs1 := 0.asUInt(5.W) + bufferedCmd.inst.rs2 := 0.asUInt(5.W) + bufferedCmd.inst.rd := 0.asUInt(5.W) + bufferedCmd.inst.opcode := 0.asUInt(5.W) + bufferedCmd.rs1 := 0.asUInt(64.W) + bufferedCmd.rs2 := 0.asUInt(64.W) +} + +// Assign Outputs to Appropriate registers +io.resp.valid := respValid && bufferedCmd.inst.xd + +//need to set rd to the value in the request. Otherwise bad things happen +//in this case, processor stalls +io.resp.bits.rd := rdTag +io.resp.bits.data := result +io.busy := busy +cmd.ready := rdy + +//===== Begin Accelerator ===== +val accel = Module(new HLStest_cBlackbox()) + +//Acclerator Registers (we buffer inputs to accelerator) +val ap_start = Reg(init=Bool(false)) + +//Assign Inputs to Accelerator +accel.io.ap.start := ap_start +'; +#accel.io.test_c_rs1 := rs1 //ACCEL IO NAME CAN CHANGE (NAMED IN C) +#accel.io.test_c_rs2 := rs2 //ACCEL IO NAME CAN CHANGE (NAMED IN C) +#my $rs1 = $verilog_input_scalar[0]; +#my $rs2 = $verilog_input_scalar[1]; + +for( $i = 0; $i < @verilog_input_scalar; $i = $i + 1 ){ + my $number = $i + 1; + $control2 = $control2."accel.io.scalar_io($i) := rs$number\n"; +} + +if ($ap_return eq 1){ + $control2 = $control2."val ap_return = accel.io.ap.rtn\n"; +}else{ + $control2 = $control2."val ap_return = UInt(4)\n"; +} + + +$control2 = $control2.'//Accelerator Outputs +val ap_done = accel.io.ap.done +val ap_idle = accel.io.ap.idle +val ap_ready = accel.io.ap.ready + +//===== End Accelerator ===== + +//===== Begin Mem Controller ===== +//The following are specific to the accelerator. They set the address and data widths of the ap_bus interfaces +val dataWidth = accel.ap_bus_dataWidths +val addrWidth = accel.ap_bus_addrWidths +val reqBufferLen = 4 +val rspBufferLen = 4 +val maxReqBytes = xLen/8 +val roccAddrWidth = coreMaxAddrBits +val roccDataWidth = coreDataBits +val roccTagWidth = coreDCacheReqTagBits +val roccCmdWidth = M_SZ +val roccTypWidth = log2Ceil(coreDataBytes.log2 + 1) +//val numTags = p(RoccMaxTaggedMemXacts) +val numTags = 16 +val tagOffset = 0 //Used if multiple accelerators to avoid tag collisions + +//Instantiate Controller +val memControl = Module(new MemController(dataWidth, addrWidth, reqBufferLen, rspBufferLen, maxReqBytes, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth, numTags, tagOffset)) + +if(accel.io.ap_bus.length > 0){ +//We have memory bus interfaces on the accelerator, create a memory controller + +//Hook up controller +for(i <- 0 until accel.io.ap_bus.length){ + //memControl.io.reqsIn(i) <> accel.io.ap_bus(i).req + memControl.io.reqsIn(i) := accel.io.ap_bus(i).req + + accel.io.ap_bus(i).req_full_n := memControl.io.reqsFullN(i) + + memControl.io.reqsWrite(i) := accel.io.ap_bus(i).req_write + + accel.io.ap_bus(i).rsp.datain := memControl.io.rspOut(i).datain + accel.io.ap_bus(i).rsp_empty_n := memControl.io.rsp_empty_n(i) + memControl.io.rsp_read(i) := accel.io.ap_bus(i).rsp_read +} +io.mem.req.bits.addr := memControl.io.roCCReqAddr +io.mem.req.bits.tag := memControl.io.roCCReqTag +io.mem.req.bits.cmd := memControl.io.roCCReqCmd +io.mem.req.bits.size := memControl.io.roCCReqTyp + +// If the address is not a mulitple of 8 byte which the coreDataBits width, +// We have to shift the N-bit data to the right place in a 64-bit word +val shift = (memControl.io.roCCReqAddr & UInt( log2Up(coreDataBits) - 1 )) << UInt(3) +io.mem.req.bits.data := memControl.io.roCCReqData << shift(7,0) +io.mem.req.valid := memControl.io.roCCReqValid +memControl.io.roCCReqRdy := io.mem.req.ready +//io.mem.req.bits.phys := Bool(true) + +//val roCCRespAddr = UInt(INPUT, width = roccAddrWidth) // coreMaxAddrBits) +memControl.io.roCCRspTag := io.mem.resp.bits.tag +memControl.io.roCCRspCmd := io.mem.resp.bits.cmd +memControl.io.roCCRspData := io.mem.resp.bits.data +//val roCCRespTyp +memControl.io.roCCRspValid := io.mem.resp.valid +} + +//===== End Mem Controller ===== +'; +# The sequence of arg 1 and 2 depends on the sequence they show up in the verilog file +# TODO think about a better way to add this + +$control2 .= '//===== Begin Argument Handling ===== +//TODO: currently only works for 2 argument calls. Generalize + +val cArgs = List(rs1, rs2) +val cArgsUnbuffered = List(rs1_unbuffered, rs2_unbuffered) +//Argument numbers are specified in the blackbox +'; + +if (@verilog_input_scalar > 0){ + $control2 .= '//Scalar values +for(i <- 0 until accel.io.scalar_io.length){ +accel.io.scalar_io(i) := cArgs(accel.scalar_io_argLoc(i)) +} +'; +} + +$control2 .= '//ap_bus offsets +for(i <- 0 until memControl.io.offsetAddrs.length){ +//ap_bus uses the unbuffered input because it is buffered on the first cycle +memControl.io.offsetAddrs(i) := cArgsUnbuffered(accel.ap_bus_argLoc(i)) +} +//===== End Argument Handling ===== +'; + + +$control2 .=' +if(accel.io.ap_bus.length > 0){ +//Will run ap_start after offsets loaded +for(i <- 0 until memControl.io.loadOffsets.length){ + memControl.io.loadOffsets(i) := (state === idle) && cmd.fire() +} +} + + +//===== Begin Controller State Machine Logic ===== +switch(state){ +is (idle){ + //Waiting for command + + when(cmd.fire()){ + //We have a valid, unserviced command. This code takes ready low so + //we should not accedently cause an infinite loop + + bufferedCmd := cmd.bits //Accelerator takes from bufferedCmd directly + busy := Bool(true) + rdy := Bool(false) + + //Load the offsets + /*if(accel.io.ap_bus.length > 0){ + //Will run ap_start after offsets loaded + for(i <- 0 until memControl.io.loadOffsets.length){ + memControl.io.loadOffsets(i) := Bool(true) + } + }*/ + + ap_start := Bool(true) //Set next state + state := working + //Note: Based on timing diagram in Vivado HLS user guide (pg 157), read occurs + //AFTER the 1st cycle. There will be a 1 cycle delay before input read as + //ap_start will be seen on next cycle. Idealy, ap_start would be raised 1 cycle + //earlier (ie. not using a register) or it would read the input immediatly + //when ap_start is raised (I assume this is due to an internal state machine). + //However, this would ruin the sequential nature of the state machine. It is + //possible to save a cycle by assigning ap_start as cmd.valid && state===idle + //&& !returned which would be asyncronous and probably trigger 1 cycle earlier. + //There would be more stringent timing requirements in this case though as the + //result would need to propogate before the next posEdge of the clk. + + + } + when(respValid && io.resp.ready){ + //The processor has read the response. There is no more data for it + //Drive resp.valid low to avoid stalling processor + respValid := Bool(false) + } +} +is (working){ + + //Stop Loading offsets + /*if(accel.io.ap_bus.length > 0){ + //Will run ap_start after offsets loaded + for(i <- 0 until memControl.io.loadOffsets.length){ + memControl.io.loadOffsets(i) := Bool(false) + } + }*/ + + //Waiting for accelerator to finish + + //All of the conditionals below can occure simultaniously + //and should be kept as seperart when statements + when(ap_done){ + //The accelerator has completed operation (user guidepg 156) and has + //has optionally generated a result (not not all accelerators will + //generated a result. This is technically not the same as ap_idle + //which signals when the accelerator is no longer busy. It is actually + //ap_ready actually determines when the accelerator is ready to accept + //more inputs. This is important for accelerators that do not operate + //in a syncronous mode. This is not true for the types of accelerators + //we are creating. + + result := ap_return + respValid := Bool(true) + } + when(ap_ready){ + //The accelerator has read the inputs and is ready to accept new ones. + //According to the timing diagram, + //ap_start should be deasserted for the next posedge. + ap_start := Bool(false) + } + + + if(accel.io.ap_bus.length == 0){ + when(ap_idle){ + //if the operation was completed (result valid), and the accelerator is ready + //the accerator is ready for the next operation and the controller is + //returned to the idle state to wait for a new command. the ready line + //is pulled high to advertise that the accelerator is ready. + + //from the manual, it appears that ap_done is always asserted when the + //accelerator is finished. if this is true, using ap_done && ap_ready + //should save one cycle over using ap_idle as the trigger. this is because, + //according to the timing diagram in the user manual (pg + + rdy := Bool(true) // ready to accept new commands + busy := Bool(false) // operation complete, no longer busy + state := idle + + //note: this code could possibly be placed in the ap_done action to save + //one wasted cycle. it is not clear from the user guide (pg 157), ap_idle + //is asserted one cycle after ap_done. if this arrangment has problems, + //transitioning on ap_idle should work but will result in an unnessicary + //extra cycle. + } + } + else + { + when(ap_idle && !memControl.io.memBusy){ + //if the operation was completed (result valid), and the accelerator is ready + //the accerator is ready for the next operation and the controller is + //returned to the idle state to wait for a new command. the ready line + //is pulled high to advertise that the accelerator is ready. + + //from the manual, it appears that ap_done is always asserted when the + //accelerator is finished. if this is true, using ap_done && ap_ready + //should save one cycle over using ap_idle as the trigger. this is because, + //according to the timing diagram in the user manual (pg + + rdy := Bool(true) // ready to accept new commands + busy := Bool(false) // operation complete, no longer busy + state := idle + + //note: this code could possibly be placed in the ap_done action to save + //one wasted cycle. it is not clear from the user guide (pg 157), ap_idle + //is asserted one cycle after ap_done. if this arrangment has problems, + //transitioning on ap_idle should work but will result in an unnessicary + //extra cycle. + } + } + when(respValid && io.resp.ready){ + //The processor has read the response. There is no more data for it + //Drive resp.valid low to avoid stalling processor + respValid := Bool(false) + } +} +} + +// ===== End Controller State Machine Logic ===== + +// ===== Tie off these lines ===== + io.interrupt := Bool(false) + // Set this true to trigger an interrupt on the processor (please refer to supervisor documentation) + + // MEMORY REQUEST INTERFACE + if(accel.io.ap_bus.length == 0){ + // No connected memory bus lines on accelerator + // We will not be doing any memory ops in this accelerator + io.mem.req.valid := Bool(false) + io.mem.req.bits.addr := UInt(0) + io.mem.req.bits.tag := UInt(0) + io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores) + io.mem.req.bits.size := log2Ceil(8).U + io.mem.req.bits.signed := Bool(false) + io.mem.req.bits.data := UInt(0) // not performing any stores + } + //io.mem.invalidate_lr := Bool(false) + + //If enable physical addr, make sure to use pmp instr to set the right permission on addr range + io.mem.req.bits.phys := Bool(false) +'; + + +$control2 .= "}\n"; +# TODO no clock and reset signal +$control2 =~ s/test_c/$func_name/g; + +print CT $control2; + + diff --git a/scripts/run_chisel_tl.pl b/scripts/run_chisel_tl.pl new file mode 100644 index 0000000..61ad120 --- /dev/null +++ b/scripts/run_chisel_tl.pl @@ -0,0 +1,560 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Cwd; +use File::Copy; +use List::Util qw(first); + +# Inputs: file_name, func_name, func_base_addr, prefix(Optional) +my $dir = getcwd; +my $file_name = $ARGV[0]; +my $func_name = $ARGV[1]; +my $func_base_addr = $ARGV[2]; +my $rdir = $ENV{'RDIR'}; + +my $prefix = undef; +my $i = undef; + +my $num_args = $#ARGV + 1; +if ($num_args > 3) { + $prefix = $ARGV[3]; +} + +#my $bm_path = $rdir."/sim/target-rtl/firechip/hls_$file_name"."_$func_name"; +if ($prefix) { + $func_name = $prefix.$func_name; +} + +#print $rdir; +if ((not defined($rdir)) or $rdir eq '') { + print("Please source sourceme-f1.sh!\n"); + exit(); +} + +# my $build_sbt = ' +# organization := "edu.berkeley.cs" +# +# version := "1.0" +# +# name := "hls_test_c"'; +# +# $build_sbt=~ s/test_c/$func_name/g; +# my $build_sbt_path= "$bm_path/"."build.sbt"; +# open BUILD, ">$build_sbt_path"; +# print BUILD $build_sbt; +# close BUILD; + +my $verilog_file = "$dir/../verilog/$func_name".".v"; +my $line = undef; +my @verilog_param = (); +my @param_val = (); +my @verilog_input = (); +my @verilog_input_size = (); +my @verilog_output = (); +my @verilog_output_size = (); + +#my $m_axi_data_width = undef; +#my $s_axi_data_width = undef; + +my @bus_names=(); +my @m_axi_data_widths = (); +my $s_axi_data_width = undef; + +print "Parsing ".$verilog_file."\n"; +# parse the verilog file to get the info we need +if(!open VERILOG, "$verilog_file"){ + print $!; +} else { + while(){ + $line = $_; + + # Match AXI4 parameter + if($line =~ m/parameter\s+(C_\S+) =\s+(.*);/){ + my $param = $1; + my $val = $2; + $param .=""; + if($param =~ m/C_M_AXI_(\S+)_DATA_WIDTH/){ + my $bus_name = lc $1; + #$m_axi_data_width = $val; + push(@bus_names, $bus_name); + push(@m_axi_data_widths, $val); + } + if ($param eq "C_S_AXI_DATA_WIDTH") { + $s_axi_data_width = $val; + } + push (@verilog_param, $param); + push (@param_val, $val); + } elsif($line =~ m/^\s*input\s+(.*)/){ + my $input = $1; + #print "input:$input\n"; + if($input =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){ + my $end = $1; + my $start = $2; + my $input_name = $3; + #print "here!"."$input_name\n"; + push (@verilog_input, $input_name); + my $size = 0; + if ($end =~ m/^\d+$/){ + $size = $end - $start + 1; + $size = "".$size; + } elsif($end =~m/(\S+) - 1/) { + $size = $1; + } + push(@verilog_input_size, $size); + }elsif ($input =~ m/\s*(.*)\s*;/){ + my $input_name = $1; + #print "here!"."$input_name\n"; + push (@verilog_input, $input_name); + push(@verilog_input_size, "1"); + } + + }elsif($line =~ m/^\s*output\s+(.*)/){ + my $output = $1; + #print "output:$output\n"; + if($output =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){ + my $end = $1; + my $start = $2; + my $output_name = $3; + #print "here!"."$output_name\n"; + push(@verilog_output, $output_name); + my $size = 0; + if ($end =~ m/^\d+$/){ + $size = $end - $start + 1; + $size = "".$size; + } elsif($end =~m/(\S+) - 1/) { + $size = $1; + } + push(@verilog_output_size, $size); + }elsif ($output =~ m/\s*(.*)\s*;/){ + my $output_name = $1; + #print "here!"."$output_name\n"; + push (@verilog_output, $output_name); + push(@verilog_output_size, "1"); + } + } + } + + + print("Parameters: "); + my $param_str = join ' ', @verilog_param; + print $param_str."\n"; + + print("Inputs: "); + my $in_str = join ' ', @verilog_input; + print $in_str."\n"; + print("Outputs: "); + my $out_str = join ' ', @verilog_output; + print $out_str."\n"; +} + +#creat scala folder +my $scala_dir = "$dir/../scala"; +mkdir $scala_dir unless (-d $scala_dir); + +############################################################################################################################## +if(@m_axi_data_widths < 1){ + push(@bus_names, "gmem_dummy"); + push(@m_axi_data_widths, 32); +} + +if(not defined($s_axi_data_width)) { + $s_axi_data_width=32 +} + +print "Generating BlackBox file ...\n"; +for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){ + print "m_axi_data_width_ $bus_names[$i]= $m_axi_data_widths[$i]\n"; +} + +print "s_axi_data_width = $s_axi_data_width\n"; +# should be under scala folder +open BB, ">$scala_dir/$func_name"."_blackbox.scala"; + +my $blackbox1 = " +package hls_test_c +import Chisel._ +import freechips.rocketchip.config.{Parameters, Field} +import freechips.rocketchip.tile._ +import freechips.rocketchip.util._ + +class test_c() extends BlackBox() { +"; +$blackbox1 =~ s/test_c/$func_name/g; + +# Print parameters +for( $i = 0; $i < @verilog_param; $i = $i + 1 ){ + $blackbox1 .= "val $verilog_param[$i] = $param_val[$i]\n"; +} + +print BB $blackbox1; + + +print BB "\tval io = new Bundle {\n"; +my $bb_body = ""; + +# now if the input name does not start with ap, we assume it is an arg +my $ap_return = 0; +my $ap_clk = 0; +my $ap_rst = 0; +my $ap_rst_n = 0; + +my @verilog_axi_io = (); + +for( $i = 0; $i < @verilog_input; $i = $i + 1 ){ + my $input_name = $verilog_input[$i]; + my $input_size = $verilog_input_size[$i]; + if ($input_name =~ m/^ap_clk$/){ + $ap_clk = 1; + } + elsif ($input_name =~ m/^ap_rst$/){ + $ap_rst = 1; + } + elsif ($input_name =~ m/^ap_rst_n$/){ + $ap_rst_n = 1; + } + elsif($input_name =~ m/^(m_axi|s_axi)\S+$/){ + push (@verilog_axi_io, $input_name); + } + + print BB "\t\tval $input_name = "; + if ($input_name =~ m/ap_clk(.*)/){ + print BB "Clock\(INPUT\)\n"; + }else{ + print BB "Bits\(INPUT, width = $input_size\)\n"; + } +} + +for( $i = 0; $i < @verilog_output; $i = $i + 1 ){ + + my $output_name = $verilog_output[$i]; + my $output_size = $verilog_output_size[$i]; + + if ($output_name =~ m/ap_return(.*)/){ + $ap_return = 1; + } + elsif($output_name =~ m/^(m_axi|s_axi)\S+$/){ + push (@verilog_axi_io, $output_name); + } + + print BB "\t\tval $output_name = "; + print BB "Bits(OUTPUT, width = $output_size)\n"; + +} + +print BB "\t}\n"; +print BB "}\n"; + +close BB; +############################################################################################################################## +print "Generating Control file ...\n"; + +open CT, ">$scala_dir/$func_name"."_accel.scala"; + +#TODO Fix AXI4 params +my $control1 = ' +package hls_test_c + +import chisel3._ +import chisel3.util._ + +import freechips.rocketchip.config.{Field, Parameters} +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.amba.axi4._ +import freechips.rocketchip.util._ +import freechips.rocketchip.subsystem._ + +class HLStest_cAXI (address: BigInt = 0x20000, beatBytes: Int = 8) (implicit p: Parameters) extends LazyModule { + + val numInFlight = 8 +'; + +for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){ + $control1 .=" + val node_$bus_names[$i] = AXI4MasterNode(Seq(AXI4MasterPortParameters( + masters = Seq(AXI4MasterParameters( + name = \"axil_hub_mem_out_$i\", + id = IdRange(0, numInFlight), + aligned = true, + maxFlight = Some(8) + )), + userBits = 0 + ) + ))"; +} +$control1 .=' + val slave_node = AXI4SlaveNode(Seq(AXI4SlavePortParameters( + slaves = Seq(AXI4SlaveParameters( + address = List(AddressSet(address,0x4000-1)), + regionType = RegionType.UNCACHED, + supportsWrite = TransferSizes(1, beatBytes), + supportsRead = TransferSizes(1, beatBytes), + interleavedId = Some(0) + )), + beatBytes = beatBytes + ))) + + lazy val module = new HLStest_cAXIModule(this) +} + +class HLStest_cAXIModule(outer: HLStest_cAXI) extends LazyModuleImp(outer) { + + //val (out, edge) = outer.node.out(0) + val (slave_in, slave_edge) = outer.slave_node.in(0) + + val bId = Reg(UInt(32.W)) + val rId = Reg(UInt(32.W)) + + val bb = Module(new test_c()) +'; + +for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){ + $control1 .=" + val (out_$bus_names[$i], edge_$bus_names[$i]) = outer.node_$bus_names[$i].out(0)"; +} +$control1 .= "\n"; +$control1 =~ s/s_axi_data_width/$s_axi_data_width/g; + +if ($ap_clk eq 1){ + $control1 .= "\tbb.io.ap_clk := clock\n"; +} +if ($ap_rst eq 1){ + $control1 .= "\tbb.io.ap_rst := reset\n"; +} +if ($ap_rst_n eq 1){ + $control1 .= "\tbb.io.ap_rst_n := !reset.toBool() \n"; +} + +$control1 =~ s/test_c/$func_name/g; +print CT $control1; +#TODO modify accelerator arg! +my $control2 = ' +'; + +# TODO Add support for multiple AXI buses +# AXI Inputs Signals +for( $i = 0; $i < @verilog_axi_io; $i = $i + 1 ){ + my $number = $i + 1; + if ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|W|AR)READY$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.ready\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)VALID$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.valid\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R)DATA$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.data\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R)LAST$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.last\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)ID$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.id\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)RESP$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.resp\n"; + } + elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|W|AR)VALID$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.valid\n"; + } + elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|AR)ADDR$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.bits.addr\n"; + } + elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(W)DATA$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.bits.data\n"; + } + elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(W)STRB$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.bits.strb\n"; + } + elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)READY$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.ready\n"; + } +} + +for( $i = 0; $i < @verilog_axi_io; $i = $i + 1 ){ + my $number = $i + 1; + if ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|W|AR)VALID$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.valid := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)READY$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.ready := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)ADDR$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.addr := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)ID$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.id := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)LEN$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.len := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)SIZE$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.size := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)BURST$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.burst := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)LOCK$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.lock := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)CACHE$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.cache := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)PROT$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.prot := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)QOS$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.qos := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)REGION$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\t//out_$bus_name.$type.bits.region := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)DATA$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.data := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)STRB$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.strb := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)LAST$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tout_$bus_name.$type.bits.last := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|W|AR)READY$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tslave_in.$type.ready := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)VALID$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tslave_in.$type.valid := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R)DATA$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tslave_in.$type.bits.data := bb.io.$verilog_axi_io[$i]\n"; + } + elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)RESP$/){ + my $bus_name = $1; + my $type = lc $2; + $control2 .= "\tslave_in.$type.bits.resp := bb.io.$verilog_axi_io[$i]\n"; + } +} + +if ($ap_return eq 1){ + $control2 = $control2."\tval ap_return = accel.io.ap.rtn\n"; +} +$control2 .= " + // For AXI4lite, these two signals are always True + slave_in.r.bits.last := true.B + + when(slave_in.aw.fire()){ + bId := slave_in.aw.bits.id + } + + when(slave_in.ar.fire()){ + rId := slave_in.ar.bits.id + } + slave_in.r.bits.id := rId + slave_in.b.bits.id := bId +} +"; + +# TODO Fix the width here +$control2 .=' +trait HasPeripheryHLStest_cAXI { this: BaseSubsystem => + private val address = BigInt(base_addr) + private val axi_m_portName = "HLS-Accelerator-test_c-master" + private val axilite_s_portName = "HLS-Accelerator-test_c-slave" + + //val accel_s_axi_width = s_axi_data_width + //val hls_test_c_accel = LazyModule(new HLStest_cAXI(address, sbus.beatBytes)) + val hls_test_c_accel = LazyModule(new HLStest_cAXI(address, s_axi_data_width >> 3)) +'; + + +for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){ + $control2 .=" + sbus.fromPort(Some(axi_m_portName)) { + (TLWidthWidget($m_axi_data_widths[$i]>> 3 ) + := AXI4ToTL() + := AXI4UserYanker() + := AXI4Fragmenter() + := AXI4IdIndexer(1)) + }:=* hls_test_c_accel.node_$bus_names[$i] + "; +} + +$control2 .=' + hls_test_c_accel.slave_node :=* sbus.toFixedWidthPort(Some(axilite_s_portName)) { + (AXI4Buffer() + := AXI4UserYanker() + //:= AXI4IdIndexer(params.idBits) + //:= AXI4Deinterleaver(sbus.blockBytes) // Assume there is no iterleaved requests, iterleaveId = Some(0) + := TLToAXI4() + := TLBuffer() + //:= TLWidthWidget(s_axi_data_width >> 3) + // Compared to TLWidthWidget, TLFragmenter saves the id info? + := TLFragmenter(s_axi_data_width >> 3, 64, true)) + } +} + +trait HasPeripheryHLStest_cAXIImp extends LazyModuleImp { + val outer: HasPeripheryHLStest_cAXI +}'; + +$control2 =~ s/test_c/$func_name/g; +$control2 =~ s/base_addr/$func_base_addr/g; +$control2 =~ s/s_axi_data_width/$s_axi_data_width/g; +print CT $control2; + diff --git a/scripts/run_hls.pl b/scripts/run_hls.pl new file mode 100644 index 0000000..f62460b --- /dev/null +++ b/scripts/run_hls.pl @@ -0,0 +1,100 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Cwd; +use File::Copy; + +my $file_name = $ARGV[0]; +my $func_name = $ARGV[1]; + +my $prefix = undef; + +my $num_args = $#ARGV + 1; +if ($num_args > 2) { + $prefix = $ARGV[2]; +} + +#############################GENERATE HLS############################## + +# Generate directive file based on LLVM emitted output +# If the variable is of pointer type that an ap_bus interface is generated + +my $directive_tcl_insn = 'set_directive_interface -mode ap_bus "test_c_func" test_var +'; + +my $prefix_tcl = ""; +if ($prefix) { + $prefix_tcl = "config_rtl -prefix ".$prefix."\n"; +} +my $hls_pgm = undef; +if (-f $file_name.".cpp"){ + $hls_pgm = $file_name.'.cpp -cflags "-std=c++0x" '; +} else { + $hls_pgm = $file_name.".c"; +} + +# should change to add all .c files +my $hls_tcl = 'open_project -reset test_c_prj +set_top test_c_func +add_files hls_pgm +open_solution -reset "solution1" +set_part {xcvu9p-flgb2104-2-i} +config_compile -ignore_long_run_time +create_clock -period 10 -name default +'.$prefix_tcl.' +#source "./test_c_prj/solution1/directives.tcl" +#config_interface -clock_enable +config_interface -m_axi_addr64 +csynth_design +#export_design -format ip_catalog +exit'; + +my $dir = getcwd; +open HLS, ">$dir/run_hls.tcl"; + +# replace the function name and file name +$hls_tcl =~ s/test_c_func/$func_name/g; +$hls_tcl =~ s/test_c/$file_name/g; +$hls_tcl =~ s/hls_pgm/$hls_pgm/g; + + +# run vivado hls +print HLS $hls_tcl; +system("vivado_hls -f run_hls.tcl"); + +my $vivado_dir = "$dir/$file_name"."_prj/solution1/syn/verilog/"; +my $verilog_dir = "$dir/../verilog/"; + +mkdir $verilog_dir unless (-d $verilog_dir); +unlink glob "$verilog_dir/*"; + +opendir(DIR, $vivado_dir) or die "Can't opendir $vivado_dir: $! \n"; + +my @files=readdir(DIR); +closedir(DIR); + +foreach my $v_file (@files){ + # Open and replace one line + + chdir($vivado_dir); + my $vivado_dir_escape = $vivado_dir; + $vivado_dir_escape =~ s/\//\\\//g; + my $perl_cmd = "perl -p -i -e 's/\$readmemh\\\(\\\"\\\.\/\$readmemh(\\\"$vivado_dir_escape/g' *"; + + print $perl_cmd; + system ($perl_cmd); + + $perl_cmd = "perl -p -i -e \"s/'bx/1'b0/g\" *"; + system ($perl_cmd); + print $perl_cmd; + + chdir($dir); + + print "$v_file\n"; + if (-f "$vivado_dir/$v_file") { + copy("$vivado_dir/$v_file", $verilog_dir) or die "File cannot be copied! $v_file $verilog_dir\n"; + } +} + +#die $!; +