package vivadoHLS
import Chisel._
//These are definitions of the bus standards used on Vivado HLS generated accelerators
//Request Packet Format
class ApBusReq(dataWidth:Int, addrWidth:Int) extends Bundle{
//Req specific lines
//Specifies a write request
val din = Bool(OUTPUT) //req_din in verilog
//Lines used for req
val address = UInt(OUTPUT, width = addrWidth)
val dataout = UInt(OUTPUT, width = dataWidth)
val size = UInt(OUTPUT, width = addrWidth)
override def cloneType: this.type = new ApBusReq(dataWidth, addrWidth).asInstanceOf[this.type]
//Response Packet Format
class ApBusRsp(dataWidth:Int) extends Bundle{
val datain = UInt(INPUT , width = dataWidth)
override def cloneType: this.type = new ApBusRsp(dataWidth).asInstanceOf[this.type]
class ApBusIO(dataWidth:Int = 64, addrWidth:Int = 32) extends Bundle{
val req = new ApBusReq(dataWidth, addrWidth)
val req_full_n = Bool(INPUT ) //req_full_n in verilog
//Write the request
val req_write = Bool(OUTPUT) //req_write in verilog
val rsp = new ApBusRsp(dataWidth)
val rsp_empty_n = Bool(INPUT )
val rsp_read = Bool(OUTPUT)
override def cloneType: this.type = new ApBusIO(dataWidth, addrWidth).asInstanceOf[this.type]
class ApCtrlIO(dataWidth:Int = 64) extends Bundle{
//val clk = Bool(INPUT )
//val rst = Bool(INPUT )
val start = Bool(INPUT )
val done = Bool(OUTPUT)
val idle = Bool(OUTPUT)
val ready = Bool(OUTPUT)
val rtn = UInt(OUTPUT, width = dataWidth)
override def cloneType: this.type = new ApCtrlIO(dataWidth).asInstanceOf[this.type]

package controlUtils
import Chisel._
//This is based on the existing chisel arbiters but has a different implementation
class PriorityArbiterIO[T <: Data](gen: T, n: Int, priorityBits: Int) extends Bundle{
val in = Vec(n, Flipped(Decoupled(gen)))
val out = Decoupled(gen)
val chosen = UInt(OUTPUT, log2Up(n))
//Priorities of the inputs (0 is max priority)
val priority = Vec(n,Input(UInt(priorityBits.W)))
//Priority of the outputed value
val priorityOut = UInt(OUTPUT, width = priorityBits)
override def cloneType: this.type = new PriorityArbiterIO(gen, n, priorityBits).asInstanceOf[this.type]
class PriorityArbiter[T <: Data](gen: T, n: Int, priorityBits: Int) extends Module{
val io = IO(new PriorityArbiterIO(gen, n, priorityBits))
//val indexedPriority = io.priority.zipWithIndex;
//val indexedPriorityValid =
if(n == 1){
//No arbitration required!
io.out.valid :=
io.out.bits :=
io.chosen := UInt(0)
io.priorityOut := io.priority(0) := io.out.ready
//val minPriorityValue = indexedPriorityValid.slice(1, n).foldLeft(indexedPriorityValid(0))((a, b) => Mux((a._2 === Bool(false) && b._2 === Bool(true) || (b._2 === Bool(true) && (b._1)._1 < (a._1)._1)), b, a)) //the max priority (smaller priority values are more important)
//The slice is becuase we favor the left element in this simple arbiter
//We scan from left to right (from the first element to the last) and check
//if the next element has a smaller priority value. If it does, it becomes
//the element that is checked against for the remainder of the list. The slice
//is because an initial value has to be given for fold and it makes little sense
//to compare the first element with itself. The (a._1)._1 is accessing the 1st
//element of the zipped tuple (prority, index). The first part of the boolean
//function also ensures that if in(0) is not valid but in(y) is valid, that in(y)
//is selected even if it has a higher priority value than in(0). This in effect
//allows us to avoid filtering the list to only include valid inputs as a seperate
//If no element is valid, in(0) is selected and is connected to the output.
//Since its valid signal is low, this should have no effect and the priorityOut
//value should be ignored.
//val chosenPriority = (minPriorityValue._1)._1
//val chosenInd = UInt((minPriorityValue._1)._2)
//Well ... I was having a bunch of type errors trying to do this functioanlly (chisel/scala type system), so let's do it iterativly
//The use of var here is based on the chisel implementation of the basic arbiter
//Normally, we would use a val but, in this case, it appears we can use a var as we are basically
//proceduarally constructing a tree of muxes. The initial node is 0 but, on each itteration,
//it is set to the muxof the current node and the selected one. The final value is then connected
//to the output and contains the whole tree of muxes
var chosenInd = UInt(0)
for(i <- 1 until n){
val chooseThis = ( === Bool(false) && === Bool(true)) || ( === Bool(true) && io.priority(i) < io.priority(chosenInd))
chosenInd = Mux(chooseThis, UInt(i), chosenInd)
//Set IO lines
for(i <- 0 to n-1 by 1){
//set the ready lines going to the input ports so that the chosen one
//gets the value of io.out.ready and the rest get a value of Bool(false) := (chosenInd === UInt(i)) && io.out.ready
io.out.valid :=
io.out.bits :=
io.chosen := chosenInd
io.priorityOut := io.priority(chosenInd)
class InitCounter(val n: Int, val initVal: Int) {
/** current value of the counter */
val value = if (n == 1) UInt(0) else Reg(init=UInt(initVal, log2Up(n)))
/** increment the counter
* @return if the counter is at the max value */
def inc(): Bool = {
if (n == 1) Bool(true)
else {
val wrap = value === UInt(n-1)
value := Mux(Bool(!isPow2(n)) && wrap, UInt(0), value + UInt(1))
object InitCounter
def apply(n: Int, initVal: Int): InitCounter = new InitCounter(n, initVal)
/** Get a counter which takes an input Bool of when to increment
* @return a UInt which is the value of the counter and a Bool indicating when the counter resets
def apply(cond: Bool, n: Int, initVal: Int): (UInt, Bool) = {
val c = new InitCounter(n, initVal)
var wrap: Bool = null
when (cond) { wrap = }
(c.value, cond && wrap)
//Really want a way to initialize memory but this will do for now
/*class RegisterQueue[T <: Data](gen: T, val entries: Int, initVals: Seq[T],
pipe: Boolean = false,
flow: Boolean = false,
_reset: Option[Bool] = None)
extends Module(_reset=_reset)*/
class RegisterQueue[T <: Data](gen: T, val entries: Int, initVals: Seq[T], pipe: Boolean = false, flow: Boolean = false) extends Module{
//override val ram = Vec.tabulate(entries)((i) => (if(i<initVals.length) Reg(init=initVals(i)) else Reg(gen)))
/** The I/O for this queue */
val io = IO(new QueueIO(gen, entries))
//val ending = Vec.fill(entries-initVals.length)(gen)
val ending = Vec(entries-initVals.length, gen)
val fullInitVals = Vec(initVals ++ ending)
val ram = Reg(init=fullInitVals)
//val ram = Mem(entries, gen)
//var registers = new ArraySeq(entries)
//for(i <- 0 until entries){
//if(i < initVals.length){
//registers(i) = Reg(init=initVals(i))
//registers(i) = Reg(gen)
//override val ram = Vec(registers)
//if(initVals.length < entries)
//enq_ptr.value := UInt(initVals.length)
//maybe_full := Bool(true)
val enq_ptr = InitCounter(entries, if(initVals.length < entries) initVals.length else 0)
val deq_ptr = Counter(entries)
val maybe_full = Reg(init=(if(initVals.length < entries) Bool(false) else Bool(true)))
val ptr_match = enq_ptr.value === deq_ptr.value
val empty = ptr_match && !maybe_full
val full = ptr_match && maybe_full
val maybe_flow = Bool(flow) && empty
val do_flow = maybe_flow && io.deq.ready
val do_enq = io.enq.ready && io.enq.valid && !do_flow
val do_deq = io.deq.ready && io.deq.valid && !do_flow
when (do_enq) {
ram(enq_ptr.value) := io.enq.bits
when (do_deq) {
when (do_enq =/= do_deq) {
maybe_full := do_enq
io.deq.valid := !empty || Bool(flow) && io.enq.valid
io.enq.ready := !full || Bool(pipe) && io.deq.ready
io.deq.bits := Mux(maybe_flow, io.enq.bits, ram(deq_ptr.value))
val ptr_diff = enq_ptr.value - deq_ptr.value
if (isPow2(entries)) {
io.count := Cat(maybe_full && ptr_match, ptr_diff)
} else {
io.count := Mux(ptr_match,
UInt(entries), UInt(0)),
Mux(deq_ptr.value > enq_ptr.value,
UInt(entries) + ptr_diff, ptr_diff))
class ValidDemuxIO(fanout:Int) extends Bundle{
val validIn = Bool(INPUT)
val validSelect = UInt(INPUT, log2Up(fanout))
val validOut = Vec(fanout, Bool(OUTPUT))
//val validOut = Vec.fill(fanout)(Bool(OUTPUT))
override def cloneType: this.type = new ValidDemuxIO(fanout).asInstanceOf[this.type]
class ValidDemux(fanout:Int) extends Module{
val io = IO(new ValidDemuxIO(fanout))
for(i <- 0 until fanout){
io.validOut(i) := io.validIn && (UInt(i) === io.validSelect)
class RegisterQueueTestWrapper(val entries:Int, initVals: Seq[Int]) extends Module{
val io = IO(new QueueIO(UInt(width=32), entries))
val uintInit =
val c = Module(new RegisterQueue(UInt(width=32), entries, uintInit))
io.enq.valid <>
io.enq.ready <>
io.enq.bits <>
io.deq.valid <>
io.deq.ready <>
io.deq.bits <>
io.count <>
class RegisterQueueTests(c: RegisterQueueTestWrapper) extends Tester(c){
poke(, 1)
expect(, 1)
expect(, 2)
expect(, 4)
expect(, 1)
expect(, 4)
expect(, 3)
expect(, 1)
expect(, 6)
expect(, 2)
expect(, 1)
expect(, 8)
expect(, 1)
expect(, 0)
expect(, 0)
class PriorityArbiterTests(c: PriorityArbiter[UInt]) extends Tester(c) {
//Test when no inputs are valid and output is not ready
poke(, 0)
poke(, 0)
poke(, 0)
poke(, 0)
poke(, 10)
poke(, 11)
poke(, 12)
poke(, 13)
poke(, 20)
poke(, 21)
poke(, 22)
poke(, 23)
poke(, 0)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 10)
expect(, 20)
expect(, 0)
//Test when no inputs are valid and output is ready
poke(, 1)
expect(, 1)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 10)
expect(, 20)
expect(, 0)
//Test when one input is valid and output is not ready
poke(, 0)
poke(, 0)
poke(, 1)
poke(, 0)
poke(, 10)
poke(, 11)
poke(, 12)
poke(, 13)
poke(, 20)
poke(, 21)
poke(, 22)
poke(, 23)
poke(, 0)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 12)
expect(, 22)
expect(, 1)
//Test when one input is valid and output is ready
poke(, 1)
expect(, 0)
expect(, 0)
expect(, 1)
expect(, 0)
expect(, 12)
expect(, 22)
expect(, 1)
//Test when multiple inputs are valid and one has the lowest priority value, and output is not ready
poke(, 1)
poke(, 1)
poke(, 1)
poke(, 1)
poke(, 10)
poke(, 11)
poke(, 12)
poke(, 13)
poke(, 20)
poke(, 10)
poke(, 20)
poke(, 20)
poke(, 0)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 11)
expect(, 10)
expect(, 1)
//Test when multiple inputs are valid and one has the lowest priority value, and output is ready
poke(, 1)
expect(, 0)
expect(, 1)
expect(, 0)
expect(, 0)
expect(, 11)
expect(, 10)
expect(, 1)
//Test when multiple inputs are valid and multiple have the lowest priority value, and output is not ready
poke(, 1)
poke(, 1)
poke(, 1)
poke(, 1)
poke(, 10)
poke(, 11)
poke(, 12)
poke(, 13)
poke(, 30)
poke(, 30)
poke(, 20)
poke(, 20)
poke(, 0)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 12)
expect(, 20)
expect(, 1)
//Test when multiple inputs are valid and multiple has the lowest priority value, and output is ready
poke(, 1)
expect(, 0)
expect(, 0)
expect(, 1)
expect(, 0)
expect(, 12)
expect(, 20)
expect(, 1)
//0 is valid lowest priority value, and output is not ready
poke(, 1)
poke(, 1)
poke(, 1)
poke(, 1)
poke(, 10)
poke(, 11)
poke(, 12)
poke(, 13)
poke(, 10)
poke(, 20)
poke(, 30)
poke(, 40)
poke(, 0)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 10)
expect(, 10)
expect(, 1)
//0 is valid lowest priority value, and output is ready
//Test when multiple inputs are valid and multiple has the lowest priority value, and output is ready
poke(, 1)
expect(, 1)
expect(, 0)
expect(, 0)
expect(, 0)
expect(, 10)
expect(, 10)
expect(, 1)
object PriorityArbiterMain {
def main(args: Array[String]): Unit = {
chiselMainTest(args, () => Module(new PriorityArbiter(UInt(width = 32), 4, 32))){c => new PriorityArbiterTests(c)}
chiselMainTest(args, () => Module(new RegisterQueueTestWrapper(4, List(2, 4, 6, 8)))){
c => new RegisterQueueTests(c)

package memControl
import Chisel._
import Chisel.ImplicitConversions._
import freechips.rocketchip.tile._
import freechips.rocketchip.config._
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.rocket._
import freechips.rocketchip.tilelink._
//import freechips.rocketchip.util.InOrderArbiter
import freechips.rocketchip.util._
import freechips.rocketchip.system._
import vivadoHLS._
import controlUtils._
class RequestParserIO(dataWidth: Int, addrWidth: Int) extends Bundle{
val reqIn = new ApBusReq(dataWidth, addrWidth).flip
val reqOut = new ApBusReq(dataWidth, addrWidth)
val offsetAddr = UInt(INPUT, width=addrWidth)
val loadOffset = Bool(INPUT)
override def cloneType: this.type = new RequestParserIO(dataWidth, addrWidth).asInstanceOf[this.type]
class RequestParser(dataWidth: Int, addrWidth: Int) extends Module{
val io = IO(new RequestParserIO(dataWidth, addrWidth))
val offsetReg = Reg(init = UInt(0, width=addrWidth))
val multAmt = UInt(dataWidth/8)
offsetReg := io.offsetAddr
io.reqOut.din := io.reqIn.din
io.reqOut.dataout := io.reqIn.dataout
io.reqOut.size := io.reqIn.size
io.reqOut.address := (io.reqIn.address*multAmt) + offsetReg
class TimestampedRequestIO(dataWidth:Int, addrWidth:Int, counterSize: Int) extends Bundle{
val req = new ApBusReq(dataWidth, addrWidth)
val timestamp = UInt(INPUT, log2Up(counterSize))
override def cloneType: this.type = new TimestampedRequestIO(dataWidth, addrWidth, counterSize).asInstanceOf[this.type]
class ApBusReqType (dataWidth:Int, addrWidth:Int) extends Bundle{
//Req specific lines
//Specifies a write request
val din = Bool() //req_din in verilog
//Lines used for req
val address = UInt(width = addrWidth)
val dataout = UInt(width = dataWidth)
val size = UInt(width = addrWidth)
override def cloneType: this.type = new ApBusReqType(dataWidth, addrWidth).asInstanceOf[this.type]
class TimestampedRequestIOType(dataWidth:Int, addrWidth:Int, counterSize: Int) extends Bundle{
val req = new ApBusReqType(dataWidth, addrWidth)
val timestamp = UInt(width = log2Up(counterSize))
override def cloneType: this.type = new TimestampedRequestIOType(dataWidth, addrWidth, counterSize).asInstanceOf[this.type]
class RequestIngestIO(dataWidth: Seq[Int], addrWidth: Seq[Int], counterSize: Int, inputBufferLen: Int) extends Bundle{
//val reqsIn = Vec.tabulate(dataWidth.length)((i) => Wire(new ApBusReq(dataWidth(i), addrWidth(i)).flip))
val reqsIn = HeterogeneousBag( {
case (dw, aw) => new ApBusReq(dw, aw)
val reqsFullN = Vec(dataWidth.length, Output(Bool()))
//val reqsWrite = Vec(dataWidth.length, Bool(INPUT)).flip
val reqsWrite = Vec(dataWidth.length, Input(Bool()))
//val offsetAddrs = Vec.tabulate(dataWidth.length)((i) => UInt(INPUT, width=addrWidth(i)))
//val offsetAddrs = HeterogeneousBag( => UInt(INPUT, width = aw)))
val offsetAddrs = HeterogeneousBag( => Input(UInt(aw.W))))
//val loadOffsets = Vec(dataWidth.length, Bool(INPUT)).flip
val loadOffsets = Vec(dataWidth.length, Input(Bool()))
val newRequests = UInt(OUTPUT, width = log2Up(dataWidth.length)+1) //The number of new requests recieved in this cycle (used to track number of outstanding requests)
//The widths are the maximums of all of the input widths
val reqOut = Decoupled(new ApBusReq(if (dataWidth.length > 0) dataWidth.max else 0, if (addrWidth.length > 0) addrWidth.max else 0))
val selectedBus = UInt(OUTPUT, log2Up(dataWidth.length))
override def cloneType: this.type = new RequestIngestIO(dataWidth, addrWidth, counterSize, inputBufferLen).asInstanceOf[this.type]
class RequestIngest(dataWidth: Seq[Int], addrWidth: Seq[Int], inputBufferLen: Int) extends Module{
val busCount = dataWidth.length
//We only need enough counter values to account for the worst case scenario when memory is stalled and each buffer is filled one at a time
//Adding an extra 1 is probably overly conservative as the extra one may be reassigned but is not at the front of the queue. If it was, then a queue is not
//Taking the base 2 log and rasing 2 to that power ensures that the overflow semantics are what is expected. This is important for the priority difference
val counterSize = BigInt(2).pow(log2Up(inputBufferLen*busCount)).toInt
val io = IO(new RequestIngestIO(dataWidth, addrWidth, counterSize, inputBufferLen))
val counter = Counter(counterSize)
val prevTimestamp = Reg(init = UInt(0, width=log2Up(counterSize)))
val parsers = Seq.tabulate(busCount)((i) => Module(new RequestParser(dataWidth(i), addrWidth(i))).io)
//val queues = Vec.tabulate(busCount)((i) => Module(new Queue(new TimestampedRequestIOType(dataWidth(i), addrWidth(i), log2Up(counterSize)), inputBufferLen)))
val queues = Seq.tabulate(busCount)((i) => Module(new Queue(new TimestampedRequestIOType(dataWidth(i), addrWidth(i), log2Up(counterSize)), inputBufferLen)).io)
/*val queues = HeterogeneousBag({
case (dw, aw) => Module(new Queue(new TimestampedRequestIOType(dw, aw, log2Up(counterSize)), inputBufferLen))
//Vec.tabulate(busCount)((i) => Module(new Queue(new TimestampedRequestIO(dataWidth(i), addrWidth(i), log2Up(counterSize)), inputBufferLen)).io)
val arbiter = Module(new PriorityArbiter(new ApBusReq(if (dataWidth.length > 0) dataWidth.max else 0, if (addrWidth.length > 0) addrWidth.max else 0), busCount, log2Up(counterSize)))
val incrCounter = Wire(Bool(false))
if (dataWidth.length > 0) {
incrCounter := io.reqsWrite.reduce(_||_)
//+1 is because we need to be able to represent 0 and BusCount not BusCount-1
val requestsUInt = => Mux(x, UInt(1, width=busCount), UInt(0, width=busCount+1)))
if (dataWidth.length > 0) {
io.newRequests := requestsUInt.reduce(_+_)
//Increment counter when any request comes in
for(i <- 0 until busCount){
parsers(i).offsetAddr := io.offsetAddrs(i)
parsers(i).loadOffset := io.loadOffsets(i)
parsers(i).reqIn := io.reqsIn(i)
//parsers(i).reqIn <> io.reqsIn(i)
queues(i).enq.bits.req := parsers(i).reqOut
queues(i).enq.bits.timestamp := counter.value //put in the timestamp!
io.reqsFullN(i) := queues(i).enq.ready
queues(i).enq.valid := io.reqsWrite(i)
queues(i).deq.ready := := queues(i).deq.valid := queues(i).deq.bits.req
//Priority is oldest first. It is defined as the difference from the last serviced timestamp := queues(i).deq.bits.timestamp - prevTimestamp //This works even with overlfow so long as counterSize is a power of 2
when( && io.reqOut.ready){
prevTimestamp := prevTimestamp +
//io.reqOut :=
io.reqOut.valid := := io.reqOut.ready
io.reqOut.bits :=
io.selectedBus :=
class RequestIssuerIO(dataWidth: Int, addrWidth:Int, maxReqWidth:Int, numBus:Int, roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int) extends Bundle{
//Incoming request from arbiter
//val reqIn = Decoupled(new ApBusReq(dataWidth, addrWidth)).flip
val reqIn = Flipped(Decoupled(new ApBusReq(dataWidth, addrWidth)))
//The bus that the request came from
val reqBus = UInt(INPUT, width=log2Up(numBus))
//Lines for table address check and for updating the table
val accessWidth = UInt(OUTPUT, width=maxReqWidth)
val accessRead = Bool(INPUT)
val conflict = Bool(INPUT)
//val tagQueueIO = Decoupled(UInt(OUTPUT, width=roccTagWidth)).flip
val tagQueueIO = Flipped(Decoupled(Output(UInt(roccTagWidth.W))))
val busNum = UInt(OUTPUT, width=log2Up(numBus))
//RoCC Lines
val roCCReqAddr = UInt(OUTPUT, width = roccAddrWidth) // coreMaxAddrBits)
val roCCReqTag = UInt(OUTPUT, width = roccTagWidth) //coreDCacheReqTagBits)
val roCCReqCmd = UInt(OUTPUT, width = roccCmdWidth) //M_SZ)
val roCCReqTyp = UInt(OUTPUT, width = roccTypWidth) //
val roCCReqData = UInt(OUTPUT, width = roccDataWidth) //coreDataBits)
val roCCReqValid = Bool(OUTPUT)
val roCCReqRdy = Bool(INPUT)
val reqWidth = UInt(OUTPUT, width = maxReqWidth) //Pass to table to specify width of request
val reqSent = Bool(OUTPUT)
override def cloneType: this.type = new RequestIssuerIO(dataWidth, addrWidth, maxReqWidth, numBus, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth).asInstanceOf[this.type]
//maxReqBytes = 8 (64 bit) in our case
class RequestIssuer(dataWidth: Int, addrWidth:Int, maxReqBytes:Int, roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int, busDataWidths:Seq[Int]) extends Module{
val maxReqWidth = log2Up(maxReqBytes)
val numBus = busDataWidths.length
val io = IO(new RequestIssuerIO(dataWidth, addrWidth, maxReqWidth, numBus, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth))
if (busDataWidths.length > 0) {
val busByteWidth = Vec( => UInt(x)/UInt(8)))
val currentByteWidth = busByteWidth(io.reqBus)
io.busNum := io.reqBus
io.roCCReqAddr := io.reqIn.bits.address
io.roCCReqData := io.reqIn.bits.dataout
io.roCCReqTag := io.tagQueueIO.bits
io.roCCReqCmd := Mux(io.reqIn.bits.din, M_XWR, M_XRD) //Set the transaction type (Write / Read)
io.roCCReqTyp := Mux(currentByteWidth===UInt(8), log2Ceil(8).U, Mux(currentByteWidth===UInt(4), log2Ceil(4).U, Mux(currentByteWidth===UInt(2), log2Ceil(2).U, log2Ceil(1).U))) //Set transaction width
io.reqWidth := currentByteWidth
io.accessWidth := currentByteWidth
io.roCCReqValid := io.reqIn.valid && !io.conflict && io.tagQueueIO.valid
val memTransactSent = io.reqIn.valid && io.roCCReqRdy && !io.conflict && io.tagQueueIO.valid
io.reqIn.ready := memTransactSent
io.tagQueueIO.ready := memTransactSent //We used a tag when we issued a request
io.reqSent := memTransactSent
class RoutingTableIO(tagWidth:Int, numBus:Int, addrWidth: Int, maxReqWidth: Int) extends Bundle{
val reqValid = Bool(INPUT)
val reqTag = UInt(INPUT, width = tagWidth)
val reqWrite = Bool(INPUT) //If the transaction is a write
val reqAddr = UInt(INPUT, width = addrWidth)
val reqBus = UInt(INPUT, log2Up(numBus))
val reqWidth = UInt(INPUT, width = maxReqWidth)
val checkAddr = UInt(INPUT, width = addrWidth)
val checkWidth = UInt(INPUT, width = maxReqWidth)
val checkRead = Bool(INPUT)
val conflict = Bool(OUTPUT)
val respTag = UInt(INPUT, width = tagWidth)
val respVaid = Bool(INPUT)
val respBus = UInt(OUTPUT, width = log2Up(numBus))
override def cloneType: this.type = new RoutingTableIO(tagWidth, numBus, addrWidth, maxReqWidth).asInstanceOf[this.type]
//maxReqBytes = 8 (64 bit) in our case
class RoutingTable(tagWidth:Int, numTags:Int, numBus:Int, addrWidth: Int, maxReqBytes: Int) extends Module{
val maxReqWidth = log2Up(maxReqBytes)
//val numTags = BigInt(2).pow(tagWidth).toInt
val io = IO(new RoutingTableIO(tagWidth, numBus, addrWidth, maxReqWidth))
if (addrWidth > 0) {
val v = Reg(Vec.fill(numTags)(Bool(false))) //valid (outstanding memory request)
val write = Reg(Vec.fill(numTags)(Bool(false))) //outstanding request is a write
val addr = Reg(Vec.fill(numTags)(UInt(0, width=addrWidth)))
val width = Reg(Vec.fill(numTags)(UInt(0, width=maxReqWidth)))
val bus = Reg(Vec.fill(numTags)(UInt(0, width=log2Up(numBus))))
//write Req into table
v(io.reqTag) := Bool(true) //Set table entry as valid
write(io.reqTag) := io.reqWrite //Set write flag
addr(io.reqTag) := io.reqAddr //Set addr
width(io.reqTag) := io.reqWidth //Set transaction width (in bytes)
bus(io.reqTag) := io.reqBus //Set the requesting bus
//clear returned transaction. The tag queue prevents accedently claring a tag that has not yet returned
v(io.respTag) := Bool(false) //This entry is no longer valid
//Return the bus that requsted the returning transaction
io.respBus := bus(io.respTag)
//Reads can execute if there is no outstanding write to the address (v==false || write==false)
// Reads stall when v==true and write==true
//Writes can only occure if there are no outstanding ops (v==false)
// Writes stall when v==true
val addrsConflicting = Vec.tabulate(numTags)((i) => !((io.checkAddr + io.checkWidth <= addr(i)) || (addr(i) + width(i) <= io.checkAddr)))
//There is a conflict if: the address is conflicting, the entry is valid, and if(reading, there is an outstanding write)
val conflicting = Vec.tabulate(numTags)((i) => addrsConflicting(i) && v(i) && Mux(io.checkRead, write(i), Bool(true)))
io.conflict := conflicting.reduce(_||_)
class MemControllerIO(dataWidth:Seq[Int], addrWidth:Seq[Int], roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int) extends Bundle{
//----ap_bus requests----
val reqsIn = HeterogeneousBag( {
case (dw, aw) => new ApBusReq(dw, aw)
//Decoupled signals for requests
val reqsFullN = Vec(dataWidth.length, Bool(OUTPUT))
//val reqsWrite = Vec(dataWidth.length, Bool(INPUT)).flip
val reqsWrite = Vec(dataWidth.length, Input(Bool()))
//Offset address lines
//val offsetAddrs = Vec.tabulate(dataWidth.length)((i) => UInt(INPUT, width=addrWidth(i)))
val offsetAddrs = HeterogeneousBag( => UInt(INPUT, width = aw)))
//val loadOffsets = Vec(dataWidth.length, Bool(INPUT)).flip
val loadOffsets = Vec(dataWidth.length,Input(Bool()))
//----status line----
val memBusy = Bool(OUTPUT)
//----ap_bus response----
//val rspOut = Vec.tabulate(dataWidth.length)((i) => new ApBusRsp(dataWidth(i)).flip)
val rspOut = HeterogeneousBag( => new ApBusRsp(dw))).flip
val rsp_empty_n = Vec(dataWidth.length, Output(Bool())) //This is the same as valid
val rsp_read = Vec(dataWidth.length, Input(Bool())) //This is the same as ready
//----RoCC Mem Req----
val roCCReqAddr = UInt(OUTPUT, width = roccAddrWidth) // coreMaxAddrBits)
val roCCReqTag = UInt(OUTPUT, width = roccTagWidth) //coreDCacheReqTagBits)
val roCCReqCmd = UInt(OUTPUT, width = roccCmdWidth) //M_SZ)
val roCCReqTyp = UInt(OUTPUT, width = roccTypWidth) //MT_SZ)
val roCCReqData = UInt(OUTPUT, width = roccDataWidth) //
val roCCReqValid = Bool(OUTPUT)
val roCCReqRdy = Bool(INPUT)
//val roCCRespAddr = UInt(INPUT, width = roccAddrWidth) // coreMaxAddrBits)
val roCCRspTag = UInt(INPUT, width = roccTagWidth) //coreDCacheReqTagBits)
val roCCRspCmd = UInt(INPUT, width = roccCmdWidth) //M_SZ)
val roCCRspData = UInt(INPUT, width = roccDataWidth) //MT_SZ)
//val roCCRespTyp = UInt(INPUT, width = roccTypWidth) //MT_SZ)
val roCCRspValid = Bool(INPUT)
override def cloneType: this.type = new MemControllerIO(dataWidth, addrWidth, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth).asInstanceOf[this.type]
class MemController(dataWidth:Seq[Int], addrWidth:Seq[Int], reqBufferLen:Int, rspBufferLen:Int, maxReqBytes:Int, roccAddrWidth:Int, roccDataWidth:Int, roccTagWidth:Int, roccCmdWidth:Int, roccTypWidth:Int, numTags:Int, tagOffset:Int ) extends Module{
val io = IO(new MemControllerIO(dataWidth, addrWidth, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth))
val numBus = dataWidth.length
if (numBus > 0){
val reqIngest = Module(new RequestIngest(dataWidth, addrWidth, reqBufferLen))
val reqIssuer = Module(new RequestIssuer(if (dataWidth.length > 0) dataWidth.max else 0, if (addrWidth.length > 0) addrWidth.max else 0, maxReqBytes, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth, dataWidth))
val scoreboard = Module(new RoutingTable(roccTagWidth, numTags, numBus, if (addrWidth.length > 0) addrWidth.max else 0, maxReqBytes))
//Tags may have an offset if this is not the only accelerator in the system
val tags = (tagOffset until (tagOffset+numTags))
val tagUInts = => UInt(x, width=roccTagWidth).asUInt)
val tagQueue = Module(new RegisterQueue(gen=UInt(width=roccTagWidth), entries=numTags, initVals=tagUInts, flow=true))
val rspQueues = Seq.tabulate(numBus)((i) => Module(new Queue(UInt(width=dataWidth(i)), rspBufferLen)).io)
val validDemux = Module(new ValidDemux(numBus))
val currentRequestNum = Reg(init=UInt(0, width=(reqBufferLen*numBus+1+numTags)))
//Logic for number of outstanding requests
currentRequestNum := currentRequestNum + - Mux(io.roCCRspValid, UInt(1), UInt(0))
io.memBusy := currentRequestNum =/= UInt(0)
//==== Ingest Logic ====
//Hook up ap_bus request lines to ingest logic
// <> io.reqsIn := io.reqsIn
io.reqsFullN := := io.reqsWrite
// := io.offsetAddrs
io.offsetAddrs := := io.loadOffsets
//val newRequests
//val reqOut
//val selectedBus
//====Req Issuer ====
//Incoming request from arbiter
// <> := := :=
//The bus that the request came from :=
//Lines for table address check and for updating the table
//val accessWidth
//val containsAddr
// <> := := :=
//val busNum
//val reqWidth //Pass to table to specify width of request
//RoCC Lines
io.roCCReqAddr :=
io.roCCReqTag :=
io.roCCReqCmd :=
io.roCCReqTyp :=
io.roCCReqValid := := io.roCCReqRdy
io.roCCReqData :=
//====Scoreboard==== := //Do not commit into the table unles there is a fire := := ( === M_XWR) //If the transaction is a write := := := := := := := := io.roCCRspTag := io.roCCRspValid
//====TagQueue==== := io.roCCRspTag := io.roCCRspValid
//====Demux===== := io.roCCRspValid && io.roCCRspCmd===M_XRD //Only return to the bus if this is a response to a read request :=
//val validOut = Vec.fill(fanout)(Bool(OUTPUT))
for(i <- 0 until numBus){
rspQueues(i).enq.bits := io.roCCRspData((dataWidth(i)-1),0) //Pass the data to all output queues (slicing to approprite width), only give one the valid signal
rspQueues(i).enq.valid :=
io.rspOut(i).datain := rspQueues(i).deq.bits
io.rsp_empty_n(i) := rspQueues(i).deq.valid
rspQueues(i).deq.ready := io.rsp_read(i)

use warnings;
use strict;
use Cwd;
use File::Copy;
my $rdir = $ENV{'RDIR'};
print $rdir;
if ((not defined($rdir)) or $rdir eq '') {
print("Please source!\n");
sub generate_accel{
my @accel_tuples= @{$_[0]};
foreach my $accel_tuple_ref (@accel_tuples) {
my @accel_tuple = @{$accel_tuple_ref};
my $pgm = $accel_tuple[0];
my $func = $accel_tuple[1];
my $bm_path = $accel_tuple[2];
my $bm_path_c = $bm_path.'/src/main/c/';
my $is_rocc = $accel_tuple[3];
my $idx_addr = $accel_tuple[4];
my $prefix=" ";
my $num_args = scalar @accel_tuple;
if ($num_args > 5) {
$prefix = $accel_tuple[5];
print("Pgm: ".$pgm."\n");
print("Func: ".$func."\n");
print("Path: ".$bm_path."\n");
print("Is RoCC not TL?: ".$is_rocc."\n");
print("RoCC Idx or TL Addr: ".$idx_addr."\n");
print("Prefix: ".$prefix."\n");
$ENV{'PGM'} = $pgm;
$ENV{'FUNC'} = $func;
my $PGM = $pgm;
my $FUNC = $func;
my $RDIR = $rdir;
system("mkdir -p $bm_path/src/main/c");
chdir("$bm_path/src/main/c/") or die $!;
system("cp $RDIR/tools/centrifuge/examples/${PGM}/* $bm_path_c");
system("cp $RDIR/tools/centrifuge/scripts/ $bm_path_c");
#system("cp $RDIR/hls/sw/time.h $bm_path/src/main/c/");
#system("cp $RDIR/hls/sw/rocc.h $bm_path/src/main/c/");
# Specialize the Makefile for this function
system("sed -i 's/^FUNC=.*/FUNC=$func/g' $bm_path_c/Makefile");
my $dir = getcwd;
system("perl ${PGM} ${FUNC} $prefix");
if ($is_rocc) {
system("cp $RDIR/tools/centrifuge/scripts/ $bm_path_c");
system("cp $RDIR/tools/centrifuge/scripts/ $bm_path_c");
system("perl ${PGM} ${FUNC} $prefix");
system("perl ${PGM} ${FUNC} $idx_addr $prefix");
#system("make clean");
#system("make CUSTOM_INST=1");
} else {
system("cp $RDIR/tools/centrifuge/scripts/ $bm_path_c");
system("cp $RDIR/tools/centrifuge/scripts/ $bm_path_c");
system("perl ${PGM} ${FUNC} $idx_addr $prefix");
system("perl ${PGM} ${FUNC} $idx_addr $prefix");
#system("make clean");
#system("make CUSTOM_DRIVER=1");
# Example with RoCC and TL accel
#my @input = (["vadd", "vadd", "$rdir/sim/target-rtl/firechip/hls_vadd_vadd/src/main/c", 1, "0", "rocc0_"], ["vadd_tl", "vadd", "$rdir/sim/target-rtl/firechip/hls_vadd_tl_vadd/src/main/c", 0, "0x20000", "tl0_"]);

use warnings;
use strict;
use Cwd;
use File::Copy;
sub generate_build_sbt {
my $rdir = $ENV{'RDIR'};
print $rdir;
if ((not defined($rdir)) or $rdir eq '') {
print("Please source!\n");
# back up the build sbt fie
copy("$rdir/build.sbt","$rdir/build.sbt.bk") or die "Copy failed: $!";
open SBT, ">$rdir/build.sbt";
# hash of all hls bm and its path
my $soc_name = $_[0];
my %bm_path = %{$_[1]};
my $build_sbt_template = "$rdir/tools/centrifuge/scripts/build_sbt_template";
open my $fh, '<', $build_sbt_template or die "error opening $build_sbt_template $!";
my $build_sbt = do { local $/; <$fh> };
# print list of hls accels
my $dep_template='lazy val BM = (project in file("PATH"))
.dependsOn(rocketchip, testchipip, midasTargetUtils, icenet)
keys %bm_path;
while(my($bm, $path) = each %bm_path) {
my $dep = $dep_template;
$dep =~ s/BM/$bm/;
$dep =~ s/PATH/$path/;
$build_sbt = $build_sbt."\n".$dep;
my @bm = (keys %bm_path);
my $bm_size = @bm;
my $bms = '';
if ($bm_size > 0) {
$bms = ", ".join(", ", @bm);
my $soc_template = '
lazy val SOC_NAME = conditionalDependsOn(project in file("generators/SOC_NAME"))
.dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilitiesBMS)
my $soc = $soc_template;
$soc =~ s/SOC_NAME/$soc_name/g;
$soc =~ s/BMS/$bms/;
$build_sbt = $build_sbt.$soc;
my $firechip_template = '
lazy val example = conditionalDependsOn(project in file("generators/example"))
.dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, sha3, SOC_NAME)
lazy val firechip = (project in file("generators/firechip"))
.dependsOn(example, icenet, testchipip, tracegen, midasTargetUtils, midas, firesimLib % "test->test;compile->compile")
testGrouping in Test := isolateAllTests( (definedTests in Test).value )
my $firechip_dep = $firechip_template;
$firechip_dep =~ s/SOC_NAME/$soc_name/g;
$build_sbt = $build_sbt.$firechip_dep;
print SBT $build_sbt;
close SBT;

use warnings;
use strict;
use Cwd;
use File::Copy;
sub generate_config {
my $rdir = $ENV{'RDIR'};
print $rdir;
if ((not defined($rdir)) or $rdir eq '') {
print("Please source!\n");
my @rocc_func_names = @{$_[0]};
my @tll2_func_names = @{$_[1]};
my $postfix = "";
my $num_args = scalar @_;
if ($num_args > 2) {
$postfix= $_[2];
my $rocc = "";
#if (@rocc_func_names > 0) {
$rocc .= "
class WithHLSRoCCExample extends Config((site, here, up) => {
case BuildRoCC => Seq(
for( my $i = 0; $i < @rocc_func_names; $i = $i + 1 ){
if ($i ne 0) { $rocc.=",
$rocc .="
(p: Parameters) => {
val hls_$rocc_func_names[$i] = LazyModule(new HLS$rocc_func_names[$i]Control(OpcodeSet.custom$i)(p))
if (scalar @rocc_func_names ne 0) { $rocc.=",
$rocc .= "
(p: Parameters) => {
val translator = LazyModule(new TranslatorExample(OpcodeSet.custom3)(p))
#if (@rocc_func_names > 0) {
$rocc .= "\n})\n";
open CONFIG, ">$rdir/generators/example/src/main/scala/HLSConfig.scala" or die "$!\n";
my $config="package example
import chisel3._
import freechips.rocketchip.diplomacy.{LazyModule, ValName}
import freechips.rocketchip.config.{Parameters, Config}
import testchipip.{WithBlockDevice, BlockDeviceKey, BlockDeviceConfig}
import freechips.rocketchip.tile._
import freechips.rocketchip.subsystem._
import freechips.rocketchip.system.DefaultConfig
import freechips.rocketchip.rocket._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.devices.tilelink._
import freechips.rocketchip._
import testchipip._
import sifive.blocks.devices.uart.{PeripheryUARTKey,UARTParams}
import sifive.blocks.devices.uart._
import ConfigValName._
foreach my $func_name (@rocc_func_names) {
$config .= "import hls_$func_name.HLS$func_name"."Control\n";
foreach my $func_name (@tll2_func_names) {
$config .= "import hls_$func_name._\n";
$config .= $rocc;
$config .="
class HLSRocketConfig extends Config(
new WithHLSTop ++
new WithBootROM ++
new freechips.rocketchip.subsystem.WithInclusiveCache ++
new WithHLSRoCCExample ++
new freechips.rocketchip.subsystem.WithNBigCores(1) ++
new freechips.rocketchip.system.BaseConfig)
$config .="
class WithHLSTop extends Config((site, here, up) => {
case BuildTop => (clock: Clock, reset: Bool, p: Parameters) =>
Module(LazyModule(new TopWithHLS()(p)).module)
class TopWithHLS(implicit p: Parameters) extends Top ";
foreach my $func_name (@tll2_func_names) {
$config .= "\n with HasPeripheryHLS$func_name"."AXI";
$config .=' {
override lazy val module = new TopWithHLSModule(this)
class TopWithHLSModule(l: TopWithHLS) extends TopModule(l)
foreach my $func_name (@tll2_func_names) {
$config .= " with HasPeripheryHLS$func_name"."AXIImp\n";
print CONFIG $config;
close CONFIG;

use strict;
use warnings;
use Cwd;
use File::Copy;
my $dir = getcwd;
my $json_fn = $ARGV[0];
my $soc_name = $json_fn;
$soc_name =~ s/.json//;
my $rdir = $ENV{'RDIR'};
my $postfix="";
my $num_args = $#ARGV + 1;
if ($num_args > 1) {
$postfix= $ARGV[1];
if ((not defined($rdir)) or $rdir eq '') {
print("Please source!\n");
if (not defined($json_fn)){
print("Please specify a json config file\!\n");
my $scripts_dir = $rdir.'/tools/centrifuge/scripts/';
require $scripts_dir.'';
require $scripts_dir.'';
require $scripts_dir.'';
require $scripts_dir.'';
require $scripts_dir.'';
require $scripts_dir.'';
# Parse Json file
my ($RoCC_ref, $TLL2_ref) = parse_json($json_fn);
my @RoCC_accels = @$RoCC_ref;
my @TLL2_accels = @$TLL2_ref;
my %hls_bm = ();
my @Accel_tuples = ();
my @RoCC_names = ();
my @TLL2_names = ();
my $idx = 0;
foreach my $RoCC_accel (@RoCC_accels){
my @arr = @{$RoCC_accel};
my $pgm = $arr[0];
my $func = $arr[1];
my $bm_path = "";
if (scalar @arr > 2) {
$bm_path = $arr[2];
} else {
$bm_path = $rdir."/generators/$soc_name/hls_$pgm"."_$func"; ;
my $prefix = "rocc".$idx."_";
# 3rd arg is_rocc is set to 1
push(@Accel_tuples, [$pgm, $func, $bm_path, 1, $idx, $prefix]);
$hls_bm{"hls_$func"} = $bm_path;
push(@RoCC_names, $func);
$idx += 1;
$idx = 0;
foreach my $TLL2_accel (@TLL2_accels){
my @arr = @{$TLL2_accel};
my $pgm = $arr[0];
my $func = $arr[1];
my $addr = $arr[2];
my $bm_path = "";
if (scalar @arr > 3) {
$bm_path = $arr[3];
} else {
$bm_path = $rdir."/generators/$soc_name/hls_$pgm"."_$func"; ;
my $prefix = "tl".$idx."_";
push(@Accel_tuples, [$pgm, $func, $bm_path, 0, $addr, $prefix]);
$hls_bm{"hls_$func"} = $bm_path;
push(@TLL2_names, $func);
$idx += 1;
# Generate the verilog and chisel code
# Generate build.sbt under firesim/sim
generate_build_sbt($soc_name, \%hls_bm);
# Generate HLSConfig file for RoCC Accelerators
generate_config(\@RoCC_names, \@TLL2_names, $postfix);
# F1
#compile_xsim_libs($postfix, "clean", 0);
#compile_replace_rtl($postfix, "clean", 0);
#print_xsim_cmd($postfix, 0);
# Ax machines
#copy_verilog(\%hls_bm, "$rdir/sim/generated-src/f1/FireSimHLS-HLSFireSimRocketChipConfig-FireSimConfig/FPGATop.v");
sub print_xsim_cmd{
my $postfix= $_[0];
my $with_nic = $_[1];
my $nic = "NoNIC";
if ($with_nic) {
$nic = "";
print("Source Full Env:\n source\n");
print("XSim Compile:\n".'cd $RDIR/sim '."&& make DESIGN=FireSimHLS$nic TARGET_CONFIG=HLSFireSimRocketChipConfig$postfix PLATFORM_CONFIG=FireSimConfig xsim\n");
print("Remove Sim Folder:\n".'rm -rf cl_'."FireSimHLS$nic-HLSFireSimRocketChipConfig$postfix-FireSimConfig/verif/sim\n");
print("XSim Run Driver:\n".'cd $RDIR/sim '."&& make DESIGN=FireSimHLS$nic TARGET_CONFIG=HLSFireSimRocketChipConfig$postfix PLATFORM_CONFIG=FireSimConfig xsim-dut\n");
print("XSim Run Test:\n".'cd $RDIR/sim '."&& make DESIGN=FireSimHLS$nic TARGET_CONFIG=HLSFireSimRocketChipConfig$postfix PLATFORM_CONFIG=FireSimConfig run-xsim SIM_BINARY=".'$RDIR/sim/target-rtl/firechip/hls_${PGM}_${FUNC}/src/main/c/${PGM}.riscv');
#print("LD_LIBRARY_PATH=output/f1/FireSimHLS$nic-HLSFireSimRocketChipConfig$postfix-FireSimConfig/ output/f1/FireSimHLS$nic-HLSFireSimRocketChipConfig$postfix-FireSimConfig/FireSimHLS$nic-f1 ".'+mm_readLatency=10 +mm_writeLatency=10 +mm_readMaxReqs=4 +mm_writeMaxReqs=4 +netburst=8 +slotid=0 $RDIR/sim/target-rtl/firechip/hls_${PGM}_${FUNC}/src/main/c/${PGM}.riscv');
sub compile_xsim_libs{
my $postfix= $_[0];
my $clean = $_[1];
my $with_nic = $_[2];
my $nic = "NoNIC";
if ($with_nic) {
$nic = "";
system("make DESIGN=FireSimHLS$nic TARGET_CONFIG=HLSFireSimRocketChipConfig$postfix PLATFORM_CONFIG=FireSimConfig $clean xsim");
sub compile_replace_rtl{
my $postfix= $_[0];
my $clean = $_[1];
my $with_nic = $_[2];
my $nic = "NoNIC";
if ($with_nic) {
$nic = "";
system("make DESIGN=FireSimHLS$nic TARGET_CONFIG=HLSFireSimRocketChipConfig$postfix PLATFORM_CONFIG=FireSimConfig $clean replace-rtl");
sub compile_vcs{
my $clean = $_[0];
system("make $clean debug CONFIG=");
sub copy_verilog{
my %bm_path = %{$_[0]};
my $FPGATop_path = $_[1];
while(my($bm, $path) = each %bm_path) {
system("cat $path/src/main/verilog/*.v >> $FPGATop_path");

use warnings;
use strict;
use Cwd;
use File::Copy;
use List::Util qw(first);
# Inputs: file_name, func_name, rocc_index, prefix(Optional)
my $dir = getcwd;
my $file_name = $ARGV[0];
my $func_name = $ARGV[1];
my $rocc_index= $ARGV[2];
my $prefix = undef;
my $num_args = $#ARGV + 1;
if ($num_args > 3) {
$prefix = $ARGV[3];
my $rdir = $ENV{'RDIR'};
#print $rdir;
if ((not defined($rdir)) or $rdir eq '') {
print("Please source!\n");
my $bm_path = $rdir."/sim/target-rtl/firechip/hls_$file_name"."_$func_name";
my $wrapper_func_name = $func_name."_wrapper";
my $wrapper_header= "bm_wrapper.h";
if ($prefix) {
$func_name = $prefix.$func_name;
#############################PARSE Verilog##############################
my $verilog_file = "$dir/../verilog/$func_name".".v";
my $line = undef;
my @verilog_input = ();
my @verilog_input_size = ();
my @verilog_output = ();
my @verilog_output_size = ();
print "Parsing ".$verilog_file."\n";
# parse the verilog file to get the info we need
if(!open VERILOG, "$verilog_file"){
print $!;
} else {
$line = $_;
if($line =~ m/^\s*input\s+(.*)/){
my $input = $1;
#print "input:$input\n";
if($input =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){
my $end = $1;
my $start = $2;
my $input_name = $3;
#print "here!"."$input_name\n";
push (@verilog_input, $input_name);
my $size = $end - $start + 1;
push(@verilog_input_size, $size);
}elsif ($input =~ m/\s*(.*)\s*;/){
my $input_name = $1;
#print "here!"."$input_name\n";
push (@verilog_input, $input_name);
push(@verilog_input_size, 1);
}elsif($line =~ m/^\s*output\s+(.*)/){
my $output = $1;
#print "output:$output\n";
if($output =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){
my $end = $1;
my $start = $2;
my $output_name = $3;
#print "here!"."$output_name\n";
push(@verilog_output, $output_name);
my $size = $end - $start + 1;
push(@verilog_output_size, $size);
}elsif ($output =~ m/\s*(.*)\s*;/){
my $output_name = $1;
#print "here!"."$output_name\n";
push (@verilog_output, $output_name);
push(@verilog_output_size, 1);
my $in_str = join ' ', @verilog_input;
print $in_str."\n";
my $out_str = join ' ', @verilog_output;
print $out_str."\n";
#creat scala folder
my $scala_dir = "$dir/../scala";
mkdir $scala_dir unless (-d $scala_dir);
print "Generating BlackBox file ...\n";
# should be under scala folder
my $blackbox1 = "
package hls_test_c
import Chisel._
import freechips.rocketchip.config.{Parameters, Field}
import freechips.rocketchip.tile._
import freechips.rocketchip.util._
import vivadoHLS._
class test_c() extends BlackBox() {
$blackbox1 =~ s/test_c/$func_name/g;
my $i = undef;
my $bb_body = "";
# now if the input name does not start with ap, we assume it is an arg
my $ap_return = 0;
my $ap_clk = 0;
my $ap_rst = 0;
my @verilog_input_scalar = ();
my %verilog_input_pointer = ();
my @verilog_input_pointer_arg = (); # An ordered list of args
my $arg_count = 0;
my @sindices = ();
my @pindices = ();
for( $i = 0; $i < @verilog_input; $i = $i + 1 ){
my $input_name = $verilog_input[$i];
my $input_size = $verilog_input_size[$i];
if ($input_name =~ m/ap_clk(.*)/){
$ap_clk = 1;
elsif ($input_name =~ m/ap_rst(.*)/){
$ap_rst = 1;
# If the input is a ap_bus port, the signals should match the following format
# There should be 3 different input signals
elsif($input_name =~ m/(\S+)_req_full_n/ or $input_name =~ m/(\S+)_rsp_empty_n/ or $input_name =~ m/(\S+)_datain/){
my $arg_name = $1;
if ($input_name =~ m/(\S+)_datain/) {
push(@pindices, $arg_count);
$arg_count = $arg_count + 1;
push(@verilog_input_pointer_arg, $arg_name);
if (defined $verilog_input_pointer{$arg_name}) {
$verilog_input_pointer{$arg_name} += 1;
} else {
$verilog_input_pointer{$arg_name} = 1;
elsif(!($input_name =~ m/ap_(,*)/)){
push (@verilog_input_scalar, $input_name);
push(@sindices, $arg_count);
$arg_count = $arg_count + 1;
print("Not func args: $input_name\n");
#foreach my $arg (keys %verilog_input_pointer) {
foreach my $arg (@verilog_input_pointer_arg) {
print("pointer_arg: $arg\n");
my $hash_count = keys %verilog_input_pointer;
print("hash_count: $hash_count\n");
if(@verilog_input_scalar + $hash_count> 2){
print "verilog_input_scalar: ";
my $in_str = join ' ', @verilog_input_scalar;
print $in_str."\n";
die "Only accept function with no more than 2 arguments!\n";
foreach my $arg (keys %verilog_input_pointer) {
if ($verilog_input_pointer{$arg} ne 3) {
die "The AP bus interfance did not generate expected number of inputs!\n";
for( $i = 0; $i < @verilog_output; $i = $i + 1 ){
my $output_name = $verilog_output[$i];
my $output_size = $verilog_output_size[$i];
if ($output_name =~ m/ap_return(.*)/){
$ap_return = 1;
$bb_body = $bb_body."\tio.".$output_name.".setName(\"".$output_name."\")\n";
if ($ap_clk eq 1){
$bb_body = $bb_body."addClock(Driver\.implicitClock)\n".'renameClock("clk", "ap_clk")'."\n";
if ($ap_rst eq 1){
$bb_body = $bb_body.'renameReset("ap_rst")'."\n";
my $bb_def = "class HLS$func_name"."Blackbox() extends Module {\n";
# Scalar IO Parameter
my @sdata_widths = ();
#my @sindices = ();
#my $sidx = 0;
foreach my $arg (@verilog_input_scalar) {
my $sdata_idx = first { $verilog_input[$_] eq $arg} 0..$#verilog_input;
my $sdata_width = $verilog_input_size[$sdata_idx];
push(@sdata_widths, $sdata_width);
#push(@sindices, $sidx);
#$sidx += 1;
my $sindices_str = join ',',@sindices;
my $sdata_widths_str = join ',',@sdata_widths;
print "scalar data_widths: $sdata_widths_str\n";
$bb_def .= "\tval scalar_io_dataWidths = List($sdata_widths_str)\n";
$bb_def .= "\tval scalar_io_argLoc = List($sindices_str) //Lists the argument number of the scalar_io\n";
# Pointer IO Parameter
my @addr_widths = ();
my @data_widths = ();
#my @indices = ();
my $idx = 0;
foreach my $arg (sort keys %verilog_input_pointer) {
my $addr_signal = $arg."_address";
my $data_signal = $arg."_dataout";
my $addr_idx = first { $verilog_output[$_] eq $addr_signal } 0..$#verilog_output;
my $data_idx = first { $verilog_output[$_] eq $data_signal } 0..$#verilog_output;
#my $addr_width = $verilog_output_size[$addr_idx];
my $addr_width = "64";
my $data_width = $verilog_output_size[$data_idx];
push(@addr_widths, $addr_width);
push(@data_widths, $data_width);
#push(@indices, $idx);
$idx += 1;
#my $indices_str = join ',',@indices;
my $pindices_str = join ',',@pindices;
my $addr_widths_str = join ',',@addr_widths;
print "addr_widths: $addr_widths_str\n";
my $data_widths_str = join ',',@data_widths;
print "data_widths: $data_widths_str\n";
foreach my $arg (@verilog_input_pointer_arg) {
print("pointer_arg: $arg\n");
my $wrapper ='
#include "rocc.h"
my $return_type = "void ";
$return_type = "uint64_t ";
my $total_args = @verilog_input_scalar + $hash_count;
$wrapper .= "$return_type $wrapper_func_name(";
my @args = ();
foreach my $arg (@verilog_input_scalar) {
push(@args, $arg);
foreach my $arg (@verilog_input_pointer_arg) {
push(@args, $arg);
my $arg_str = join ', ', @args;
my $i = 0;
foreach my $arg (@args) {
if ($i != 0){
$wrapper .=", "
$wrapper .="uint64_t $arg";
$wrapper .= ") {
$wrapper .= " uint64_t ret_val;\n";
$wrapper .="
#define XCUSTOM_ACC ";
$wrapper .= $rocc_index."\n";
if ($ap_return){
if ($total_args == 0) {
$wrapper.=" ROCC_INSTRUCTION_D(XCUSTOM_ACC, ret_val, 0);\n";
} elsif ($total_args == 1) {
$wrapper.=" ROCC_INSTRUCTION_DS(XCUSTOM_ACC, ret_val, $arg_str, 0);\n";
} else {
$wrapper.=" ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, ret_val, $arg_str, 0);\n";
} else{
if ($total_args == 0) {
} elsif ($total_args == 1) {
$wrapper.=" ROCC_INSTRUCTION_S(XCUSTOM_ACC, $arg_str, 0);\n";
} else {
$wrapper.=" ROCC_INSTRUCTION_SS(XCUSTOM_ACC, $arg_str, 0);\n";
$wrapper .= " ROCC_BARRIER();\n";
$wrapper.=" #endif\n";
$wrapper .= " return ret_val;\n";
open FILE, "> $wrapper_header";
print FILE $wrapper;

use warnings;
use strict;
use Cwd;
use File::Copy;
use List::Util qw(first);
use Tie::IxHash;
# Inputs: file_name, func_name, func_base_addr, prefix(Optional)
my $dir = getcwd;
my $file_name = $ARGV[0];
my $func_name = $ARGV[1];
my $func_base_addr = $ARGV[2];
my $prefix = undef;
my $num_args = $#ARGV + 1;
if ($num_args > 3) {
$prefix = $ARGV[3];
my $rdir = $ENV{'RDIR'};
#print $rdir;
if ((not defined($rdir)) or $rdir eq '') {
print("Please source!\n");
my $bm_path = $rdir."/sim/target-rtl/firechip/hls_$file_name"."_$func_name";
my $wrapper_func_name = $func_name."_wrapper";
my $wrapper_header= "bm_wrapper.h";
if ($prefix) {
$func_name = $prefix.$func_name;
my $bm_inc_path = $rdir."/hls/sw/bm/";
#############################PARSE Verilog##############################
my %var_dict;
tie %var_dict, "Tie::IxHash";
my $verilog_file = "$dir/../verilog/$func_name"."_control_s_axi.v";
print "Parsing ".$verilog_file."\n";
# parse the verilog file to get the info we need
if(!open VERILOG, "$verilog_file"){
print $!;
} else {
my $start = 0;
my $line = undef;
$line = $_;
if($line =~ m/------------------------Parameter----------------------/){
$start = 0;
if($line =~ m/(0x\S+) : Data signal of (\S+)/){
my $base_addr = $1;
my $var = $2;
#print("$base_addr : $var\n");
if (exists $var_dict{$var}) {
push (@{$var_dict{$var}}, $base_addr);
} else {
my @addr = ();
push (@addr, $base_addr);
$var_dict{$var} = \@addr;
if($line =~ m/------------------------Address Info------------------/){
$start= 1;
#############################GENERATE Software Bare-metal Wrappers##############################
# We want ordered hash so we didn't add this piece of code into a func
#sub generate_bm_wrapper {
# my %var_dict=%{$_[0]};
# tie %var_dict, "Tie::IxHash";
# my $func_base_addr = $_[1];
foreach my $var (keys %var_dict) {
print($var.": ");
my @addr = @{$var_dict{$var}};
foreach my $base_addr(@addr) {
my $wrapper = '#include "'.$bm_inc_path.'/mmio.h"'."\n";
#$wrapper .= '#include "'.$bm_inc_path.'/time.h"'."\n";
$wrapper .= '#define ACCEL_BASE '.$func_base_addr."\n";
$wrapper .= "#define AP_DONE_MASK 0b10\n";
$wrapper .= "#define ACCEL_INT 0x4\n";
foreach my $var (keys %var_dict) {
my @addr = @{$var_dict{$var}};
my $idx = 0;
foreach my $base_addr(@addr) {
$wrapper .="#define "."ACCEL_$var"."_$idx"." $base_addr\n";
$idx +=1;
my $ap_return = 0;
my $ap_return_type = "uint32_t";
if (exists $var_dict{"ap_return"}) {
my $size=@{$var_dict{"ap_return"}};
if ($size == 2){
$ap_return_type = "uint64_t";
$ap_return = 1;
if ($ap_return){
$wrapper .= $ap_return_type." $wrapper_func_name(";
} else {
$wrapper .="void $wrapper_func_name(";
my @arglist=();
foreach my $var (keys %var_dict) {
if ($var eq "ap_return") {
my $var_type = "uint32_t";
my $size=@{$var_dict{$var}};
if ($size == 2){
$var_type = "uint64_t";
push(@arglist, "$var_type $var");
my $args = join ', ', @arglist;
$wrapper.= $args.") {";
$wrapper.= '
// Disable Interrupt
reg_write32(ACCEL_BASE + ACCEL_INT, 0x0);
foreach my $var (keys %var_dict) {
if ($var eq "ap_return") {
my @addr = @{$var_dict{$var}};
my $idx = 0;
foreach my $base_addr(@addr) {
my $shift = "";
if ($idx == 1){
$shift = " >> 32";
}elsif($idx > 1){
die "Index exceeds limit!\n";
$wrapper .=" reg_write32(ACCEL_BASE + ACCEL_$var"."_$idx, (uint32_t) ($var$shift));\n";
$idx +=1;
$wrapper .='
// Write to ap_start to start the execution
reg_write32(ACCEL_BASE, 0x1);
// Done?
int done = 0;
while (!done){
done = reg_read32(ACCEL_BASE) & AP_DONE_MASK;
# If there a return value
if ($ap_return){
my @addr = @{$var_dict{"ap_return"}};
$wrapper .= "
$ap_return_type ret_val = 0;\n";
my $idx = 0;
foreach my $base_addr(@addr) {
my $shift = "";
if ($idx == 1){
$shift = " >> 32";
}elsif($idx > 1){
die "Index exceeds limit!\n";
$wrapper .=" ret_val = (reg_read32(ACCEL_BASE + ACCEL_ap_return"."_$idx)$shift) | ret_val;\n";
$idx +=1;
$wrapper .= " return ret_val;\n";
$wrapper .="}\n";
open FILE, "> $wrapper_header";
print FILE $wrapper;
#generate_bm_wrapper(\%var_dict, $func_base_addr);

use strict;
use warnings;
use JSON qw( decode_json );
use Cwd;
use File::Copy;
# Take in 1 arg which is the json file path
# Return two arrays of arrays
sub parse_json {
my $json_fn = $_[0];
open my $fh, '<', $json_fn or die "error opening $json_fn: $!";
my $json = do { local $/; <$fh> };
my $decoded = decode_json($json);
my @RoCC_accels = ();
my $i;
print("\nRoCC Accels: \n");
for( $i = 0; $i < 4; $i = $i + 1 ){
if ((exists $decoded -> {'RoCC'}{"custom$i"}{'pgm'}) and (exists $decoded -> {'RoCC'}{"custom$i"}{'func'} )){
my $pgm = $decoded -> {'RoCC'}{"custom$i"}{'pgm'};
my $func = $decoded -> {'RoCC'}{"custom$i"}{'func'};
if(($pgm ne "") and ($func ne "")){
print("\tpgm: $pgm\t func: $func\n");
my @tup = ();
push (@tup, $pgm);
push (@tup, $func);
push (@RoCC_accels, \@tup);
print("TLL2 Accels: \n");
my @TLL2_accels = ();
if (exists $decoded -> {'TLL2'}){
my @TLL2_arr = @{$decoded-> {'TLL2'}};
foreach my $accel (@TLL2_arr) {
if( (exists $accel->{'pgm'}) and (exists $accel->{'func'} and (exists $accel->{'addr'}))){
my $pgm = $accel->{'pgm'};
my $func = $accel->{'func'};
my $addr = $accel->{'addr'};
if ($pgm ne "" and $func ne "" and $addr ne ""){
print("\tpgm: $pgm\t func: $func\t addr: $addr\n");
my @tup = ();
push (@tup, $pgm);
push (@tup, $func);
push (@tup, $addr);
push (@TLL2_accels, \@tup);
return (\@RoCC_accels, \@TLL2_accels);

use warnings;
use strict;
use Cwd;
use File::Copy;
use List::Util qw(first);
my $dir = getcwd;
my $file_name = $ARGV[0];
my $func_name = $ARGV[1];
my $rdir = $ENV{'RDIR'};
my $prefix = undef;
my $num_args = $#ARGV + 1;
if ($num_args > 2) {
$prefix = $ARGV[2];
if ($prefix) {
$func_name = $prefix.$func_name;
#print $rdir;
if ((not defined($rdir)) or $rdir eq '') {
print("Please source!\n");
my $verilog_file = "$dir/../verilog/$func_name".".v";
my $line = undef;
my @verilog_input = ();
my @verilog_input_size = ();
my @verilog_output = ();
my @verilog_output_size = ();
print "Parsing ".$verilog_file."\n";
# parse the verilog file to get the info we need
if(!open VERILOG, "$verilog_file"){
print $!;
} else {
$line = $_;
if($line =~ m/^\s*input\s+(.*)/){
my $input = $1;
#print "input:$input\n";
if($input =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){
my $end = $1;
my $start = $2;
my $input_name = $3;
#print "here!"."$input_name\n";
push (@verilog_input, $input_name);
my $size = $end - $start + 1;
push(@verilog_input_size, $size);
}elsif ($input =~ m/\s*(.*)\s*;/){
my $input_name = $1;
#print "here!"."$input_name\n";
push (@verilog_input, $input_name);
push(@verilog_input_size, 1);
}elsif($line =~ m/^\s*output\s+(.*)/){
my $output = $1;
#print "output:$output\n";
if($output =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){
my $end = $1;
my $start = $2;
my $output_name = $3;
#print "here!"."$output_name\n";
push(@verilog_output, $output_name);
my $size = $end - $start + 1;
push(@verilog_output_size, $size);
}elsif ($output =~ m/\s*(.*)\s*;/){
my $output_name = $1;
#print "here!"."$output_name\n";
push (@verilog_output, $output_name);
push(@verilog_output_size, 1);
my $in_str = join ' ', @verilog_input;
print $in_str."\n";
my $out_str = join ' ', @verilog_output;
print $out_str."\n";
#creat scala folder
my $scala_dir = "$dir/../scala";
mkdir $scala_dir unless (-d $scala_dir);
print "Generating BlackBox file ...\n";
# should be under scala folder
open BB, ">$scala_dir/$func_name"."_blackbox.scala";
my $blackbox1 = "
package hls_test_c
import Chisel._
import chisel3.experimental.dontTouch
import freechips.rocketchip.config.{Parameters, Field}
import freechips.rocketchip.tile._
import freechips.rocketchip.util._
import vivadoHLS._
class test_c() extends BlackBox() {
$blackbox1 =~ s/test_c/$func_name/g;
print BB $blackbox1;
print BB "\tval io = new Bundle {\n";
my $i = undef;
my $bb_body = "";
# now if the input name does not start with ap, we assume it is an arg
my $ap_return = 0;
my $ap_clk = 0;
my $ap_rst = 0;
my @verilog_input_scalar = ();
my %verilog_input_pointer = ();
my @verilog_input_pointer_arg = (); # An ordered list of args
my $arg_count = 0;
my @sindices = ();
my @pindices = ();
for( $i = 0; $i < @verilog_input; $i = $i + 1 ){
my $input_name = $verilog_input[$i];
my $input_size = $verilog_input_size[$i];
if ($input_name =~ m/ap_clk(.*)/){
$ap_clk = 1;
elsif ($input_name =~ m/ap_rst(.*)/){
$ap_rst = 1;
# If the input is a ap_bus port, the signals should match the following format
# There should be 3 different input signals
elsif($input_name =~ m/(\S+)_req_full_n/ or $input_name =~ m/(\S+)_rsp_empty_n/ or $input_name =~ m/(\S+)_datain/){
my $arg_name = $1;
if ($input_name =~ m/(\S+)_datain/) {
push(@pindices, $arg_count);
$arg_count = $arg_count + 1;
push(@verilog_input_pointer_arg, $arg_name);
if (defined $verilog_input_pointer{$arg_name}) {
$verilog_input_pointer{$arg_name} += 1;
} else {
$verilog_input_pointer{$arg_name} = 1;
elsif(!($input_name =~ m/ap_(,*)/)){
push (@verilog_input_scalar, $input_name);
push(@sindices, $arg_count);
$arg_count = $arg_count + 1;
print("Not func args: $input_name\n");
print BB "\t\tval $input_name = ";
if ($input_name =~ m/ap_clk(.*)/){
print BB "Clock\(INPUT\)\n";
if ($input_size == 1){
print BB "Bool\(INPUT\)\n";
print BB "Bits\(INPUT, width = $input_size\)\n";
if($input_name ne "ap_clk" && $input_name ne "ap_rst"){
$bb_body = $bb_body."\tio.".$input_name.".setName(\"".$input_name."\")\n";
#foreach my $arg (keys %verilog_input_pointer) {
foreach my $arg (@verilog_input_pointer_arg) {
print("pointer_arg: $arg\n");
my $hash_count = keys %verilog_input_pointer;
print("hash_count: $hash_count\n");
if(@verilog_input_scalar + $hash_count> 2){
print "verilog_input_scalar: ";
my $in_str = join ' ', @verilog_input_scalar;
print $in_str."\n";
die "Only accept function with no more than 2 arguments!\n";
foreach my $arg (keys %verilog_input_pointer) {
if ($verilog_input_pointer{$arg} ne 3) {
die "The AP bus interfance did not generate expected number of inputs!\n";
for( $i = 0; $i < @verilog_output; $i = $i + 1 ){
my $output_name = $verilog_output[$i];
my $output_size = $verilog_output_size[$i];
if ($output_name =~ m/ap_return(.*)/){
$ap_return = 1;
print BB "\t\tval $output_name = ";
if ($output_size == 1){
print BB "Bool(OUTPUT)\n";
print BB "Bits(OUTPUT, width = $output_size)\n";
$bb_body = $bb_body."\tio.".$output_name.".setName(\"".$output_name."\")\n";
if ($ap_clk eq 1){
$bb_body = $bb_body."addClock(Driver\.implicitClock)\n".'renameClock("clk", "ap_clk")'."\n";
if ($ap_rst eq 1){
$bb_body = $bb_body.'renameReset("ap_rst")'."\n";
print BB "\t}\n";
#print BB "$bb_body\n";
#print BB "moduleName = "."\"$func_name\"\n";
print BB "}\n";
my $bb_def = "class HLS$func_name"."Blackbox() extends Module {\n";
# Scalar IO Parameter
my @sdata_widths = ();
#my @sindices = ();
#my $sidx = 0;
foreach my $arg (@verilog_input_scalar) {
my $sdata_idx = first { $verilog_input[$_] eq $arg} 0..$#verilog_input;
my $sdata_width = $verilog_input_size[$sdata_idx];
push(@sdata_widths, $sdata_width);
#push(@sindices, $sidx);
#$sidx += 1;
my $sindices_str = join ',',@sindices;
my $sdata_widths_str = join ',',@sdata_widths;
print "scalar data_widths: $sdata_widths_str\n";
$bb_def .= "\tval scalar_io_dataWidths = List($sdata_widths_str)\n";
$bb_def .= "\tval scalar_io_argLoc = List($sindices_str) //Lists the argument number of the scalar_io\n";
# Pointer IO Parameter
my @addr_widths = ();
my @data_widths = ();
#my @indices = ();
my $idx = 0;
foreach my $arg (sort keys %verilog_input_pointer) {
my $addr_signal = $arg."_address";
my $data_signal = $arg."_dataout";
my $addr_idx = first { $verilog_output[$_] eq $addr_signal } 0..$#verilog_output;
my $data_idx = first { $verilog_output[$_] eq $data_signal } 0..$#verilog_output;
#my $addr_width = $verilog_output_size[$addr_idx];
my $addr_width = "64";
my $data_width = $verilog_output_size[$data_idx];
push(@addr_widths, $addr_width);
push(@data_widths, $data_width);
#push(@indices, $idx);
$idx += 1;
#my $indices_str = join ',',@indices;
my $pindices_str = join ',',@pindices;
my $addr_widths_str = join ',',@addr_widths;
print "addr_widths: $addr_widths_str\n";
my $data_widths_str = join ',',@data_widths;
print "data_widths: $data_widths_str\n";
$bb_def .= "\tval ap_bus_addrWidths = List(".$addr_widths_str.")\n";
$bb_def .= "\tval ap_bus_dataWidths = List(".$data_widths_str.")\n";
#$bb_def .= "\tval ap_bus_argLoc = List(".$indices_str.")\n";
$bb_def .= "\tval ap_bus_argLoc = List(".$pindices_str.")\n";
my $ret_width = 1;
if ($ap_return eq 1){
my $ret_idx = first { $verilog_output[$_] eq 'ap_return'} 0..$#verilog_output;
$ret_width = $verilog_output_size[$ret_idx];
$bb_def .= "\tval io = new Bundle {
\tval ap = new ApCtrlIO(dataWidth = $ret_width)
\tval ap_bus = HeterogeneousBag( {
case (aw, dw) => new ApBusIO(dw, aw)
if (@verilog_input_scalar > 0){
$bb_def .="\tval scalar_io = HeterogeneousBag( => UInt(INPUT, width = w)))";
$bb_def .="
\tval bb = Module(new $func_name())
\ := io.ap.start
\tio.ap.done :=
\tio.ap.idle :=
\tio.ap.ready :=
if ($ap_return eq 1) {
$bb_def .= "\tio.ap.rtn :=\n";
if ($ap_rst eq 1) {
$bb_def .= "\ := reset\n";
if ($ap_clk eq 1) {
$bb_def .= "\ := clock\n";
$idx = 0;
#foreach my $arg (keys %verilog_input_pointer) {
foreach my $arg (@verilog_input_pointer_arg) {
$bb_def.="\tio.ap_bus($idx).req.din :=$arg"."_req_din
\$arg"."_req_full_n := io.ap_bus($idx).req_full_n
\tio.ap_bus($idx).req_write :=$arg"."_req_write
\$arg"."_rsp_empty_n := io.ap_bus($idx).rsp_empty_n
\tio.ap_bus($idx).rsp_read :=$arg"."_rsp_read
\tio.ap_bus($idx).req.address :=$arg"."_address
\$arg"."_datain := io.ap_bus($idx).rsp.datain
\tio.ap_bus($idx).req.dataout :=$arg"."_dataout
\tio.ap_bus($idx).req.size :=$arg"."_size
$idx += 1;
$idx = 0;
foreach my $arg (@verilog_input_scalar) {
$bb_def .="\$arg := io.scalar_io($idx)\n";
$idx += 1;
$bb_def .= "}";
print BB $bb_def;
close BB;
print "Copying Vivado HLS Interface file ...\n";
copy("$rdir/tools/centrifuge/scripts/chisel_rocc_aux/ap_bus.scala", "$scala_dir/") or die "Copy failed: $!";
print "Copying ROCC Memory Controller file ...\n";
copy("$rdir/tools/centrifuge/scripts/chisel_rocc_aux/memControllerComponents.scala", "$scala_dir/") or die "Copy failed: $!";
print "Copying Controller Utilities file ...\n";
copy("$rdir/tools/centrifuge/scripts/chisel_rocc_aux/controlUtils.scala", "$scala_dir/") or die "Copy failed: $!";
print "Generating Control file ...\n";
open CT, ">$scala_dir/$func_name"."_accel.scala";
my $control1 = '
package hls_test_c
import Chisel._
import chisel3.experimental.dontTouch
import freechips.rocketchip.config.{Parameters, Field}
import freechips.rocketchip.tile._
import freechips.rocketchip.config._
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.rocket._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.util._
import freechips.rocketchip.system._
import vivadoHLS._
import memControl._
import hls_test_c._
class HLStest_cControl(opcodes: OpcodeSet)(implicit p: Parameters) extends LazyRoCC(opcodes) {
override lazy val module = new HLStest_cControlModule(this)
class HLStest_cControlModule(outer: HLStest_cControl)(implicit p: Parameters) extends LazyRoCCModuleImp(outer)
with HasCoreParameters {
$control1 =~ s/test_c/$func_name/g;
print CT $control1;
#TODO modify accelerator arg!
my $control2 = '
val result = Reg(init=Bits(0, width=xLen))
val respValid = Reg(init=Bool(false))
val rdy = Reg(init=Bool(true))
val busy = Reg(init=Bool(false))
val bufferedCmd = Reg(init=Wire( new RoCCCommand()(p)))
val cmd = Queue(io.cmd)
val funct = bufferedCmd.inst.funct
val rs1 = bufferedCmd.rs1
val rs2 = bufferedCmd.rs2
val rdTag = bufferedCmd.inst.rd
val doAdd = funct === UInt(0)
val rs1_unbuffered = cmd.bits.rs1
val rs2_unbuffered = cmd.bits.rs2
val idle :: working :: Nil = Enum(UInt(),2)
val state = Reg(init=idle)
bufferedCmd.inst.funct := 0.asUInt(7.W)
bufferedCmd.inst.rs1 := 0.asUInt(5.W)
bufferedCmd.inst.rs2 := 0.asUInt(5.W)
bufferedCmd.inst.rd := 0.asUInt(5.W)
bufferedCmd.inst.opcode := 0.asUInt(5.W)
bufferedCmd.rs1 := 0.asUInt(64.W)
bufferedCmd.rs2 := 0.asUInt(64.W)
// Assign Outputs to Appropriate registers
io.resp.valid := respValid && bufferedCmd.inst.xd
//need to set rd to the value in the request. Otherwise bad things happen
//in this case, processor stalls
io.resp.bits.rd := rdTag := result
io.busy := busy
cmd.ready := rdy
//===== Begin Accelerator =====
val accel = Module(new HLStest_cBlackbox())
//Acclerator Registers (we buffer inputs to accelerator)
val ap_start = Reg(init=Bool(false))
//Assign Inputs to Accelerator := ap_start
#my $rs1 = $verilog_input_scalar[0];
#my $rs2 = $verilog_input_scalar[1];
for( $i = 0; $i < @verilog_input_scalar; $i = $i + 1 ){
my $number = $i + 1;
$control2 = $control2."$i) := rs$number\n";
if ($ap_return eq 1){
$control2 = $control2."val ap_return =\n";
$control2 = $control2."val ap_return = UInt(4)\n";
$control2 = $control2.'//Accelerator Outputs
val ap_done =
val ap_idle =
val ap_ready =
//===== End Accelerator =====
//===== Begin Mem Controller =====
//The following are specific to the accelerator. They set the address and data widths of the ap_bus interfaces
val dataWidth = accel.ap_bus_dataWidths
val addrWidth = accel.ap_bus_addrWidths
val reqBufferLen = 4
val rspBufferLen = 4
val maxReqBytes = xLen/8
val roccAddrWidth = coreMaxAddrBits
val roccDataWidth = coreDataBits
val roccTagWidth = coreDCacheReqTagBits
val roccCmdWidth = M_SZ
val roccTypWidth = log2Ceil(coreDataBytes.log2 + 1)
//val numTags = p(RoccMaxTaggedMemXacts)
val numTags = 16
val tagOffset = 0 //Used if multiple accelerators to avoid tag collisions
//Instantiate Controller
val memControl = Module(new MemController(dataWidth, addrWidth, reqBufferLen, rspBufferLen, maxReqBytes, roccAddrWidth, roccDataWidth, roccTagWidth, roccCmdWidth, roccTypWidth, numTags, tagOffset))
if( > 0){
//We have memory bus interfaces on the accelerator, create a memory controller
//Hook up controller
for(i <- 0 until{
// <> := := := := := :=
io.mem.req.bits.addr :=
io.mem.req.bits.tag :=
io.mem.req.bits.cmd :=
io.mem.req.bits.size :=
// If the address is not a mulitple of 8 byte which the coreDataBits width,
// We have to shift the N-bit data to the right place in a 64-bit word
val shift = ( & UInt( log2Up(coreDataBits) - 1 )) << UInt(3) := << shift(7,0)
io.mem.req.valid := := io.mem.req.ready
//io.mem.req.bits.phys := Bool(true)
//val roCCRespAddr = UInt(INPUT, width = roccAddrWidth) // coreMaxAddrBits) := io.mem.resp.bits.tag := io.mem.resp.bits.cmd :=
//val roCCRespTyp := io.mem.resp.valid
//===== End Mem Controller =====
# The sequence of arg 1 and 2 depends on the sequence they show up in the verilog file
# TODO think about a better way to add this
$control2 .= '//===== Begin Argument Handling =====
//TODO: currently only works for 2 argument calls. Generalize
val cArgs = List(rs1, rs2)
val cArgsUnbuffered = List(rs1_unbuffered, rs2_unbuffered)
//Argument numbers are specified in the blackbox
if (@verilog_input_scalar > 0){
$control2 .= '//Scalar values
for(i <- 0 until{ := cArgs(accel.scalar_io_argLoc(i))
$control2 .= '//ap_bus offsets
for(i <- 0 until{
//ap_bus uses the unbuffered input because it is buffered on the first cycle := cArgsUnbuffered(accel.ap_bus_argLoc(i))
//===== End Argument Handling =====
$control2 .='
if( > 0){
//Will run ap_start after offsets loaded
for(i <- 0 until{ := (state === idle) &&
//===== Begin Controller State Machine Logic =====
is (idle){
//Waiting for command
//We have a valid, unserviced command. This code takes ready low so
//we should not accedently cause an infinite loop
bufferedCmd := cmd.bits //Accelerator takes from bufferedCmd directly
busy := Bool(true)
rdy := Bool(false)
//Load the offsets
/*if( > 0){
//Will run ap_start after offsets loaded
for(i <- 0 until{ := Bool(true)
ap_start := Bool(true) //Set next state
state := working
//Note: Based on timing diagram in Vivado HLS user guide (pg 157), read occurs
//AFTER the 1st cycle. There will be a 1 cycle delay before input read as
//ap_start will be seen on next cycle. Idealy, ap_start would be raised 1 cycle
//earlier (ie. not using a register) or it would read the input immediatly
//when ap_start is raised (I assume this is due to an internal state machine).
//However, this would ruin the sequential nature of the state machine. It is
//possible to save a cycle by assigning ap_start as cmd.valid && state===idle
//&& !returned which would be asyncronous and probably trigger 1 cycle earlier.
//There would be more stringent timing requirements in this case though as the
//result would need to propogate before the next posEdge of the clk.
when(respValid && io.resp.ready){
//The processor has read the response. There is no more data for it
//Drive resp.valid low to avoid stalling processor
respValid := Bool(false)
is (working){
//Stop Loading offsets
/*if( > 0){
//Will run ap_start after offsets loaded
for(i <- 0 until{ := Bool(false)
//Waiting for accelerator to finish
//All of the conditionals below can occure simultaniously
//and should be kept as seperart when statements
//The accelerator has completed operation (user guidepg 156) and has
//has optionally generated a result (not not all accelerators will
//generated a result. This is technically not the same as ap_idle
//which signals when the accelerator is no longer busy. It is actually
//ap_ready actually determines when the accelerator is ready to accept
//more inputs. This is important for accelerators that do not operate
//in a syncronous mode. This is not true for the types of accelerators
//we are creating.
result := ap_return
respValid := Bool(true)
//The accelerator has read the inputs and is ready to accept new ones.
//According to the timing diagram,
//ap_start should be deasserted for the next posedge.
ap_start := Bool(false)
if( == 0){
//if the operation was completed (result valid), and the accelerator is ready
//the accerator is ready for the next operation and the controller is
//returned to the idle state to wait for a new command. the ready line
//is pulled high to advertise that the accelerator is ready.
//from the manual, it appears that ap_done is always asserted when the
//accelerator is finished. if this is true, using ap_done && ap_ready
//should save one cycle over using ap_idle as the trigger. this is because,
//according to the timing diagram in the user manual (pg
rdy := Bool(true) // ready to accept new commands
busy := Bool(false) // operation complete, no longer busy
state := idle
//note: this code could possibly be placed in the ap_done action to save
//one wasted cycle. it is not clear from the user guide (pg 157), ap_idle
//is asserted one cycle after ap_done. if this arrangment has problems,
//transitioning on ap_idle should work but will result in an unnessicary
//extra cycle.
when(ap_idle && !{
//if the operation was completed (result valid), and the accelerator is ready
//the accerator is ready for the next operation and the controller is
//returned to the idle state to wait for a new command. the ready line
//is pulled high to advertise that the accelerator is ready.
//from the manual, it appears that ap_done is always asserted when the
//accelerator is finished. if this is true, using ap_done && ap_ready
//should save one cycle over using ap_idle as the trigger. this is because,
//according to the timing diagram in the user manual (pg
rdy := Bool(true) // ready to accept new commands
busy := Bool(false) // operation complete, no longer busy
state := idle
//note: this code could possibly be placed in the ap_done action to save
//one wasted cycle. it is not clear from the user guide (pg 157), ap_idle
//is asserted one cycle after ap_done. if this arrangment has problems,
//transitioning on ap_idle should work but will result in an unnessicary
//extra cycle.
when(respValid && io.resp.ready){
//The processor has read the response. There is no more data for it
//Drive resp.valid low to avoid stalling processor
respValid := Bool(false)
// ===== End Controller State Machine Logic =====
// ===== Tie off these lines =====
io.interrupt := Bool(false)
// Set this true to trigger an interrupt on the processor (please refer to supervisor documentation)
if( == 0){
// No connected memory bus lines on accelerator
// We will not be doing any memory ops in this accelerator
io.mem.req.valid := Bool(false)
io.mem.req.bits.addr := UInt(0)
io.mem.req.bits.tag := UInt(0)
io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
io.mem.req.bits.size := log2Ceil(8).U
io.mem.req.bits.signed := Bool(false) := UInt(0) // not performing any stores
//io.mem.invalidate_lr := Bool(false)
//If enable physical addr, make sure to use pmp instr to set the right permission on addr range
io.mem.req.bits.phys := Bool(false)
$control2 .= "}\n";
# TODO no clock and reset signal
$control2 =~ s/test_c/$func_name/g;
print CT $control2;

use warnings;
use strict;
use Cwd;
use File::Copy;
use List::Util qw(first);
# Inputs: file_name, func_name, func_base_addr, prefix(Optional)
my $dir = getcwd;
my $file_name = $ARGV[0];
my $func_name = $ARGV[1];
my $func_base_addr = $ARGV[2];
my $rdir = $ENV{'RDIR'};
my $prefix = undef;
my $i = undef;
my $num_args = $#ARGV + 1;
if ($num_args > 3) {
$prefix = $ARGV[3];
#my $bm_path = $rdir."/sim/target-rtl/firechip/hls_$file_name"."_$func_name";
if ($prefix) {
$func_name = $prefix.$func_name;
#print $rdir;
if ((not defined($rdir)) or $rdir eq '') {
print("Please source!\n");
# my $build_sbt = '
# organization := "edu.berkeley.cs"
# version := "1.0"
# name := "hls_test_c"';
# $build_sbt=~ s/test_c/$func_name/g;
# my $build_sbt_path= "$bm_path/"."build.sbt";
# open BUILD, ">$build_sbt_path";
# print BUILD $build_sbt;
# close BUILD;
my $verilog_file = "$dir/../verilog/$func_name".".v";
my $line = undef;
my @verilog_param = ();
my @param_val = ();
my @verilog_input = ();
my @verilog_input_size = ();
my @verilog_output = ();
my @verilog_output_size = ();
#my $m_axi_data_width = undef;
#my $s_axi_data_width = undef;
my @bus_names=();
my @m_axi_data_widths = ();
my $s_axi_data_width = undef;
print "Parsing ".$verilog_file."\n";
# parse the verilog file to get the info we need
if(!open VERILOG, "$verilog_file"){
print $!;
} else {
$line = $_;
# Match AXI4 parameter
if($line =~ m/parameter\s+(C_\S+) =\s+(.*);/){
my $param = $1;
my $val = $2;
$param .="";
if($param =~ m/C_M_AXI_(\S+)_DATA_WIDTH/){
my $bus_name = lc $1;
#$m_axi_data_width = $val;
push(@bus_names, $bus_name);
push(@m_axi_data_widths, $val);
if ($param eq "C_S_AXI_DATA_WIDTH") {
$s_axi_data_width = $val;
push (@verilog_param, $param);
push (@param_val, $val);
} elsif($line =~ m/^\s*input\s+(.*)/){
my $input = $1;
#print "input:$input\n";
if($input =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){
my $end = $1;
my $start = $2;
my $input_name = $3;
#print "here!"."$input_name\n";
push (@verilog_input, $input_name);
my $size = 0;
if ($end =~ m/^\d+$/){
$size = $end - $start + 1;
$size = "".$size;
} elsif($end =~m/(\S+) - 1/) {
$size = $1;
push(@verilog_input_size, $size);
}elsif ($input =~ m/\s*(.*)\s*;/){
my $input_name = $1;
#print "here!"."$input_name\n";
push (@verilog_input, $input_name);
push(@verilog_input_size, "1");
}elsif($line =~ m/^\s*output\s+(.*)/){
my $output = $1;
#print "output:$output\n";
if($output =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){
my $end = $1;
my $start = $2;
my $output_name = $3;
#print "here!"."$output_name\n";
push(@verilog_output, $output_name);
my $size = 0;
if ($end =~ m/^\d+$/){
$size = $end - $start + 1;
$size = "".$size;
} elsif($end =~m/(\S+) - 1/) {
$size = $1;
push(@verilog_output_size, $size);
}elsif ($output =~ m/\s*(.*)\s*;/){
my $output_name = $1;
#print "here!"."$output_name\n";
push (@verilog_output, $output_name);
push(@verilog_output_size, "1");
print("Parameters: ");
my $param_str = join ' ', @verilog_param;
print $param_str."\n";
print("Inputs: ");
my $in_str = join ' ', @verilog_input;
print $in_str."\n";
print("Outputs: ");
my $out_str = join ' ', @verilog_output;
print $out_str."\n";
#creat scala folder
my $scala_dir = "$dir/../scala";
mkdir $scala_dir unless (-d $scala_dir);
if(@m_axi_data_widths < 1){
push(@bus_names, "gmem_dummy");
push(@m_axi_data_widths, 32);
if(not defined($s_axi_data_width)) {
print "Generating BlackBox file ...\n";
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
print "m_axi_data_width_ $bus_names[$i]= $m_axi_data_widths[$i]\n";
print "s_axi_data_width = $s_axi_data_width\n";
# should be under scala folder
open BB, ">$scala_dir/$func_name"."_blackbox.scala";
my $blackbox1 = "
package hls_test_c
import Chisel._
import freechips.rocketchip.config.{Parameters, Field}
import freechips.rocketchip.tile._
import freechips.rocketchip.util._
class test_c() extends BlackBox() {
$blackbox1 =~ s/test_c/$func_name/g;
# Print parameters
for( $i = 0; $i < @verilog_param; $i = $i + 1 ){
$blackbox1 .= "val $verilog_param[$i] = $param_val[$i]\n";
print BB $blackbox1;
print BB "\tval io = new Bundle {\n";
my $bb_body = "";
# now if the input name does not start with ap, we assume it is an arg
my $ap_return = 0;
my $ap_clk = 0;
my $ap_rst = 0;
my $ap_rst_n = 0;
my @verilog_axi_io = ();
for( $i = 0; $i < @verilog_input; $i = $i + 1 ){
my $input_name = $verilog_input[$i];
my $input_size = $verilog_input_size[$i];
if ($input_name =~ m/^ap_clk$/){
$ap_clk = 1;
elsif ($input_name =~ m/^ap_rst$/){
$ap_rst = 1;
elsif ($input_name =~ m/^ap_rst_n$/){
$ap_rst_n = 1;
elsif($input_name =~ m/^(m_axi|s_axi)\S+$/){
push (@verilog_axi_io, $input_name);
print BB "\t\tval $input_name = ";
if ($input_name =~ m/ap_clk(.*)/){
print BB "Clock\(INPUT\)\n";
print BB "Bits\(INPUT, width = $input_size\)\n";
for( $i = 0; $i < @verilog_output; $i = $i + 1 ){
my $output_name = $verilog_output[$i];
my $output_size = $verilog_output_size[$i];
if ($output_name =~ m/ap_return(.*)/){
$ap_return = 1;
elsif($output_name =~ m/^(m_axi|s_axi)\S+$/){
push (@verilog_axi_io, $output_name);
print BB "\t\tval $output_name = ";
print BB "Bits(OUTPUT, width = $output_size)\n";
print BB "\t}\n";
print BB "}\n";
close BB;
print "Generating Control file ...\n";
open CT, ">$scala_dir/$func_name"."_accel.scala";
#TODO Fix AXI4 params
my $control1 = '
package hls_test_c
import chisel3._
import chisel3.util._
import freechips.rocketchip.config.{Field, Parameters}
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.amba.axi4._
import freechips.rocketchip.util._
import freechips.rocketchip.subsystem._
class HLStest_cAXI (address: BigInt = 0x20000, beatBytes: Int = 8) (implicit p: Parameters) extends LazyModule {
val numInFlight = 8
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
$control1 .="
val node_$bus_names[$i] = AXI4MasterNode(Seq(AXI4MasterPortParameters(
masters = Seq(AXI4MasterParameters(
name = \"axil_hub_mem_out_$i\",
id = IdRange(0, numInFlight),
aligned = true,
maxFlight = Some(8)
userBits = 0
$control1 .='
val slave_node = AXI4SlaveNode(Seq(AXI4SlavePortParameters(
slaves = Seq(AXI4SlaveParameters(
address = List(AddressSet(address,0x4000-1)),
regionType = RegionType.UNCACHED,
supportsWrite = TransferSizes(1, beatBytes),
supportsRead = TransferSizes(1, beatBytes),
interleavedId = Some(0)
beatBytes = beatBytes
lazy val module = new HLStest_cAXIModule(this)
class HLStest_cAXIModule(outer: HLStest_cAXI) extends LazyModuleImp(outer) {
//val (out, edge) = outer.node.out(0)
val (slave_in, slave_edge) =
val bId = Reg(UInt(32.W))
val rId = Reg(UInt(32.W))
val bb = Module(new test_c())
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
$control1 .="
val (out_$bus_names[$i], edge_$bus_names[$i]) = outer.node_$bus_names[$i].out(0)";
$control1 .= "\n";
$control1 =~ s/s_axi_data_width/$s_axi_data_width/g;
if ($ap_clk eq 1){
$control1 .= "\ := clock\n";
if ($ap_rst eq 1){
$control1 .= "\ := reset\n";
if ($ap_rst_n eq 1){
$control1 .= "\ := !reset.toBool() \n";
$control1 =~ s/test_c/$func_name/g;
print CT $control1;
#TODO modify accelerator arg!
my $control2 = '
# TODO Add support for multiple AXI buses
# AXI Inputs Signals
for( $i = 0; $i < @verilog_axi_io; $i = $i + 1 ){
my $number = $i + 1;
if ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|W|AR)READY$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := out_$bus_name.$type.ready\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)VALID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := out_$bus_name.$type.valid\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R)DATA$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := out_$bus_name.$\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R)LAST$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := out_$bus_name.$type.bits.last\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)ID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := out_$bus_name.$\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)RESP$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := out_$bus_name.$type.bits.resp\n";
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|W|AR)VALID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := slave_in.$type.valid\n";
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|AR)ADDR$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := slave_in.$type.bits.addr\n";
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(W)DATA$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := slave_in.$\n";
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(W)STRB$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := slave_in.$type.bits.strb\n";
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)READY$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\$verilog_axi_io[$i] := slave_in.$type.ready\n";
for( $i = 0; $i < @verilog_axi_io; $i = $i + 1 ){
my $number = $i + 1;
if ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|W|AR)VALID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.valid :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)READY$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.ready :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)ADDR$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.addr :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)ID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$ :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)LEN$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.len :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)SIZE$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.size :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)BURST$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.burst :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)LOCK$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.lock :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)CACHE$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.cache :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)PROT$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.prot :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)QOS$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.qos :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)REGION$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\t//out_$bus_name.$type.bits.region :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)DATA$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$ :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)STRB$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.strb :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)LAST$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.last :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|W|AR)READY$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tslave_in.$type.ready :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)VALID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tslave_in.$type.valid :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R)DATA$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tslave_in.$ :=$verilog_axi_io[$i]\n";
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)RESP$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tslave_in.$type.bits.resp :=$verilog_axi_io[$i]\n";
if ($ap_return eq 1){
$control2 = $control2."\tval ap_return =\n";
$control2 .= "
// For AXI4lite, these two signals are always True
slave_in.r.bits.last := true.B
bId :=
rId :=
} := rId := bId
# TODO Fix the width here
$control2 .='
trait HasPeripheryHLStest_cAXI { this: BaseSubsystem =>
private val address = BigInt(base_addr)
private val axi_m_portName = "HLS-Accelerator-test_c-master"
private val axilite_s_portName = "HLS-Accelerator-test_c-slave"
//val accel_s_axi_width = s_axi_data_width
//val hls_test_c_accel = LazyModule(new HLStest_cAXI(address, sbus.beatBytes))
val hls_test_c_accel = LazyModule(new HLStest_cAXI(address, s_axi_data_width >> 3))
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
$control2 .="
sbus.fromPort(Some(axi_m_portName)) {
(TLWidthWidget($m_axi_data_widths[$i]>> 3 )
:= AXI4ToTL()
:= AXI4UserYanker()
:= AXI4Fragmenter()
:= AXI4IdIndexer(1))
}:=* hls_test_c_accel.node_$bus_names[$i]
$control2 .='
hls_test_c_accel.slave_node :=* sbus.toFixedWidthPort(Some(axilite_s_portName)) {
:= AXI4UserYanker()
//:= AXI4IdIndexer(params.idBits)
//:= AXI4Deinterleaver(sbus.blockBytes) // Assume there is no iterleaved requests, iterleaveId = Some(0)
:= TLToAXI4()
:= TLBuffer()
//:= TLWidthWidget(s_axi_data_width >> 3)
// Compared to TLWidthWidget, TLFragmenter saves the id info?
:= TLFragmenter(s_axi_data_width >> 3, 64, true))
trait HasPeripheryHLStest_cAXIImp extends LazyModuleImp {
val outer: HasPeripheryHLStest_cAXI
$control2 =~ s/test_c/$func_name/g;
$control2 =~ s/base_addr/$func_base_addr/g;
$control2 =~ s/s_axi_data_width/$s_axi_data_width/g;
print CT $control2;

use warnings;
use strict;
use Cwd;
use File::Copy;
my $file_name = $ARGV[0];
my $func_name = $ARGV[1];
my $prefix = undef;
my $num_args = $#ARGV + 1;
if ($num_args > 2) {
$prefix = $ARGV[2];
#############################GENERATE HLS##############################
# Generate directive file based on LLVM emitted output
# If the variable is of pointer type that an ap_bus interface is generated
my $directive_tcl_insn = 'set_directive_interface -mode ap_bus "test_c_func" test_var
my $prefix_tcl = "";
if ($prefix) {
$prefix_tcl = "config_rtl -prefix ".$prefix."\n";
my $hls_pgm = undef;
if (-f $file_name.".cpp"){
$hls_pgm = $file_name.'.cpp -cflags "-std=c++0x" ';
} else {
$hls_pgm = $file_name.".c";
# should change to add all .c files
my $hls_tcl = 'open_project -reset test_c_prj
set_top test_c_func
add_files hls_pgm
open_solution -reset "solution1"
set_part {xcvu9p-flgb2104-2-i}
config_compile -ignore_long_run_time
create_clock -period 10 -name default
#source "./test_c_prj/solution1/directives.tcl"
#config_interface -clock_enable
config_interface -m_axi_addr64
#export_design -format ip_catalog
my $dir = getcwd;
open HLS, ">$dir/run_hls.tcl";
# replace the function name and file name
$hls_tcl =~ s/test_c_func/$func_name/g;
$hls_tcl =~ s/test_c/$file_name/g;
$hls_tcl =~ s/hls_pgm/$hls_pgm/g;
# run vivado hls
print HLS $hls_tcl;
system("vivado_hls -f run_hls.tcl");
my $vivado_dir = "$dir/$file_name"."_prj/solution1/syn/verilog/";
my $verilog_dir = "$dir/../verilog/";
mkdir $verilog_dir unless (-d $verilog_dir);
unlink glob "$verilog_dir/*";
opendir(DIR, $vivado_dir) or die "Can't opendir $vivado_dir: $! \n";
my @files=readdir(DIR);
foreach my $v_file (@files){
# Open and replace one line
my $vivado_dir_escape = $vivado_dir;
$vivado_dir_escape =~ s/\//\\\//g;
my $perl_cmd = "perl -p -i -e 's/\$readmemh\\\(\\\"\\\.\/\$readmemh(\\\"$vivado_dir_escape/g' *";
print $perl_cmd;
system ($perl_cmd);
$perl_cmd = "perl -p -i -e \"s/'bx/1'b0/g\" *";
system ($perl_cmd);
print $perl_cmd;
print "$v_file\n";
if (-f "$vivado_dir/$v_file") {
copy("$vivado_dir/$v_file", $verilog_dir) or die "File cannot be copied! $v_file $verilog_dir\n";
#die $!;