Emit multi-cycle clock constraints on target domains

This commit is contained in:
David Biancolin 2021-09-14 03:09:29 +00:00
parent a8c864b302
commit 1f015341c2
9 changed files with 103 additions and 34 deletions

View File

@ -241,7 +241,7 @@ class TargetBoxIO(config: SimWrapperConfig) extends ChannelizedWrapperIO(config)
}
val clockElement: (String, DecoupledIO[Data]) = chAnnos.collectFirst({
case ch @ FAMEChannelConnectionAnnotation(globalName, fame.TargetClockChannel(_), _, _, Some(sinks)) =>
case ch @ FAMEChannelConnectionAnnotation(globalName, fame.TargetClockChannel(_, _), _, _, Some(sinks)) =>
sinks.head.ref.stripSuffix("_bits") -> Flipped(Decoupled(regenClockType(sinks)))
}).get
@ -262,7 +262,7 @@ class SimWrapperChannels(config: SimWrapperConfig) extends ChannelizedWrapperIO(
def regenClockType(refTargets: Seq[ReferenceTarget]): Vec[Bool] = Vec(refTargets.size, Bool())
val clockElement: (String, DecoupledIO[Vec[Bool]]) = chAnnos.collectFirst({
case ch @ FAMEChannelConnectionAnnotation(globalName, fame.TargetClockChannel(_), _, _, Some(sinks)) =>
case ch @ FAMEChannelConnectionAnnotation(globalName, fame.TargetClockChannel(_,_), _, _, Some(sinks)) =>
sinks.head.ref.stripSuffix("_bits") -> Flipped(Decoupled(regenClockType(sinks)))
}).get
@ -436,7 +436,7 @@ class SimWrapper(val config: SimWrapperConfig)(implicit val p: Parameters) exten
channelGroups foreach { case (name, annos) => genPipeChannel(annos, name) }
// Generate clock channels
val clockChannels = chAnnos.collect({case ch @ FAMEChannelConnectionAnnotation(_, fame.TargetClockChannel(_),_,_,_) => ch })
val clockChannels = chAnnos.collect({case ch @ FAMEChannelConnectionAnnotation(_, fame.TargetClockChannel(_,_),_,_,_) => ch })
require(clockChannels.size == 1)
genClockChannel(clockChannels.head)
}

View File

@ -26,7 +26,7 @@ object ChannelClockInfoAnalysis extends Transform {
def outputForm = LowForm
def analyze(state: CircuitState): Map[String, RationalClock] = {
val clockChannels = state.annotations.collect {
case FAMEChannelConnectionAnnotation(_,TargetClockChannel(clocks),_,_,Some(clockRTs)) =>
case FAMEChannelConnectionAnnotation(_,TargetClockChannel(clocks,_),_,_,Some(clockRTs)) =>
clockRTs zip clocks
}
require(clockChannels.size == 1,

View File

@ -191,7 +191,7 @@ private[passes] object TriggerWiring extends firrtl.Transform {
// Step 6) Synchronize and aggregate local counts into global counts in the base clock domain
val refClockRT = wiredState.annotations.collectFirst({
case FAMEChannelConnectionAnnotation(_,TargetClockChannel(_),_,_,Some(clock :: _)) => clock
case FAMEChannelConnectionAnnotation(_,TargetClockChannel(_,_),_,_,Some(clock :: _)) => clock
}).get
// We only need to use a single register to synchronize a signal in GG, we use two here

View File

@ -152,8 +152,16 @@ case object DecoupledReverseChannel extends FAMEChannelInfo
/**
* Indicates that a channel connection carries target clocks
*
* @param clockInfo The user-specified metadata, including a name and a ratio
* relative to the base clock
*
* @param perClockMFMR Specifies the minimum number of host cycles
* between clock edges. This is a property of the clock token schedule, and
* permits relaxing timing constraints on clock domains with miniumum FMRS (MFMR) > 1.
*
*/
case class TargetClockChannel(clockInfo: Seq[RationalClock]) extends FAMEChannelInfo
case class TargetClockChannel(clockInfo: Seq[RationalClock], perClockMFMR: Seq[Int]) extends FAMEChannelInfo
/**
* Indicates that a channel connection is the forward (valid) half of

View File

@ -15,6 +15,8 @@ import scala.collection.mutable
import mutable.{LinkedHashSet, LinkedHashMap}
import midas.passes._
import midas.targetutils.xdc.{XDCFiles, XDCAnnotation}
import midas.widgets.{RationalClock}
/**************
PRECONDITIONS:
@ -70,10 +72,20 @@ trait FAME1DataChannel extends FAME1Channel with HasModelPort {
}
}
case class FAME1ClockChannel(name: String, ports: Seq[Port]) extends FAME1Channel with InputChannel with HasModelPort
trait ClockChannel {
def clockInfo: Seq[RationalClock]
def clockMFMRs: Seq[Int]
lazy val clockMFMRMap: Map[RationalClock, Int] = clockInfo.zip(clockMFMRs).toMap
}
case class VirtualClockChannel(targetClock: Port) extends FAME1Channel with InputChannel {
case class FAME1ClockChannel(name: String, ports: Seq[Port], clockInfo: Seq[RationalClock], clockMFMRs: Seq[Int])
extends FAME1Channel with InputChannel with HasModelPort with ClockChannel
case class VirtualClockChannel(targetClock: Port) extends FAME1Channel with InputChannel with ClockChannel {
val name = "VirtualClockChannel"
val clockInfo = Seq(RationalClock("NonHubClock", 1,1))
val clockMFMRs = Seq(1)
val ports = Seq(targetClock)
val isValid: Expression = UIntLiteral(1)
def setReady(advanceCycle: Expression): Statement = EmptyStmt
@ -107,7 +119,7 @@ case class FAME1OutputChannel(
// - Use to initialize isFired for all channels (with negation)
// - Finishing is gated with clock channel valid
object FAMEModuleTransformer {
def apply(m: Module, analysis: FAMEChannelAnalysis): Module = {
def apply(m: Module, analysis: FAMEChannelAnalysis, addedAnnos: mutable.ArrayBuffer[Annotation]): Module = {
// Step 0: Bookkeeping for port structure conventions
implicit val ns = Namespace(m)
val mTarget = ModuleTarget(analysis.circuit.main, m.name)
@ -132,10 +144,11 @@ object FAMEModuleTransformer {
}
val clockChannel = analysis.modelInputChannelPortMap(mTarget).find(isClockChannel) match {
case Some((name, (None, ports))) => FAME1ClockChannel(name, ports)
val (currentModuleIsHub, clockChannel) = analysis.modelInputChannelPortMap(mTarget).find(isClockChannel) match {
case Some((name, (None, ports))) =>
(true, FAME1ClockChannel(name, ports, analysis.targetClockChInfo.clockInfo, analysis.targetClockChInfo.perClockMFMR))
case Some(_) => ??? // Clock channel cannot have an associated clock domain
case None => VirtualClockChannel(clocks.head) // Virtual clock channel for single-clock models
case None => (false, VirtualClockChannel(clocks.head)) // Virtual clock channel for single-clock models
}
/*
@ -169,7 +182,7 @@ object FAMEModuleTransformer {
clockBuffer: SignalInfo)
// Multi-clock management step 4: Generate clock buffers for all target clocks
val clockMetadata: Seq[TargetClockMetadata] = clockChannel.ports.map { en =>
val clockMetadata: Seq[TargetClockMetadata] = clockChannel.ports.zip(clockChannel.clockInfo).map { case (en, info) =>
val enableReg = hostFlagReg(s"${en.name}_enabled")
val buf = WDefInstance(ns.newName(s"${en.name}_buffer"), DefineAbstractClockGate.blackbox.name)
val clockFlag = DoPrim(PrimOps.AsUInt, Seq(clockChannel.replacePortRef(WRef(en))), Nil, BoolType)
@ -178,6 +191,44 @@ object FAMEModuleTransformer {
Connect(NoInfo, WSubField(WRef(buf), "I"), WRef(hostClock)),
Connect(NoInfo, WSubField(WRef(buf), "CE"),
And.reduce(Seq(WRef(enableReg), WRef(finishing), nReset)))))
// Add multicycle annotations
val clockName = info.name
val clockMFMR = clockChannel.clockMFMRMap(info)
val bufferOutputRT = mTarget.ref(buf.name).field("O")
if (currentModuleIsHub) {
// Leverage the MFMR hint provided by the clock bridge to relax the setup constraints on
// intra-domain paths. Do this only for the hub, since it it is the only multiclock model.
//
// Using this multicycle setup constraint is conservative, since all
// inter-clock paths must still close timing at single a host cycle of
// delay. If we instead defined these generated clocks by specifying them
// as _divisions_ of the host_clock, the timer may give more margin to a
// path spanning two slower clock domains, when in reality they must meet
// a single host-cycle constraint. For a counterexample, consider three
// clocks with periods of 2, 3, 4. This produces the following clock token schedule:
//
// token : 0 1 2 3 4 5
// ====================
// clk 2 : 1 1 0 1 1 1
// clk 3 : 1 0 1 0 1 0
// clk 4 : 1 0 0 1 0 1
// t_time : 0 2 3 4 6 8
//
// While the latter two clocks have MFMRs of 2, they sometimes fire in
// back-to-back host-cycles (tokens 2->3).
//
// Note: "host_clock" is defined statically in the shell XDC.
val xdcAnno = XDCAnnotation(
XDCFiles.Implementation,
s"""|create_generated_clock -name ${clockName} -source [get_pins -of [get_clocks host_clock]] [get_pins {}] -divide_by 1
|set_multicycle_path $clockMFMR -setup -from [get_clocks $clockName] -to [get_clocks $clockName]
|set_multicycle_path 1 -hold -from [get_clocks $clockName] -to [get_clocks $clockName]
|""".stripMargin,
bufferOutputRT)
addedAnnos += xdcAnno
}
TargetClockMetadata(
en,
WRef(enableReg),
@ -405,10 +456,12 @@ class FAMETransform extends Transform {
// TODO: pick a value that does not collide
implicit val triggerName = "finishing"
val addedAnnos = mutable.ArrayBuffer[Annotation]()
val toTransform = analysis.transformedModules
val transformedModules = c.modules.map {
case m: Module if (m.name == c.main) => transformTop(m, analysis)
case m: Module if (toTransform.contains(ModuleTarget(c.main, m.name))) => FAMEModuleTransformer(m, analysis)
case m: Module if (toTransform.contains(ModuleTarget(c.main, m.name))) => FAMEModuleTransformer(m, analysis, addedAnnos)
case m => m // TODO (Albert): revisit this; currently, not transforming nested modules
}
@ -418,6 +471,6 @@ class FAMETransform extends Transform {
}
val newCircuit = c.copy(modules = transformedModules)
CircuitState(newCircuit, outputForm, filteredAnnos, Some(hostDecouplingRenames(analysis)))
CircuitState(newCircuit, outputForm, filteredAnnos ++ addedAnnos, Some(hostDecouplingRenames(analysis)))
}
}

View File

@ -240,6 +240,10 @@ private[fame] class FAMEChannelAnalysis(val state: CircuitState, val fameType: F
val hostClock = state.annotations.collect({ case FAMEHostClock(rt) => rt }).head
val hostReset = state.annotations.collect({ case FAMEHostReset(rt) => rt }).head
lazy val targetClockChInfo = state.annotations.collectFirst({
case FAMEChannelConnectionAnnotation(_,chInfo: TargetClockChannel,_,_,_) => chInfo
}).get
private def irPortFromGlobalTarget(mt: ModuleTarget)(rt: ReferenceTarget): Option[Port] = {
val modelPort = topConnects(rt).pathlessTarget

View File

@ -109,7 +109,7 @@ object FindDefaultClocks extends Transform {
// Find an arbitrary clock channel sink
val refClock = state.annotations.collectFirst {
case FAMEChannelConnectionAnnotation(_, TargetClockChannel(_), None, _, Some(sinks)) => sinks.head
case FAMEChannelConnectionAnnotation(_, TargetClockChannel(_,_), None, _, Some(sinks)) => sinks.head
}
// Get the wrapper top port reference it points to

View File

@ -39,6 +39,22 @@ sealed trait ClockBridgeConsts {
val clockChannelName = "clocks"
}
/**
* Finds a virtual fast-clock whose period is the GCD of the periods of all requested
* clocks, and returns the period of each requested clock as an integer multiple of that
* high-frequency virtual clock.
*/
object FindScaledPeriodGCD {
def apply(phaseRelationships: Seq[(Int, Int)]): Seq[BigInt] = {
val periodDivisors = phaseRelationships.unzip._1
val productOfDivisors = periodDivisors.foldLeft(BigInt(1))(_ * _)
val scaledMultipliers = phaseRelationships.map({ case (divisor, multiplier) => multiplier * productOfDivisors / divisor })
val gcdOfScaledPeriods = scaledMultipliers.reduce((a, b) => a.gcd(b))
val reducedPeriods = scaledMultipliers.map(_ / gcdOfScaledPeriods)
reducedPeriods
}
}
/**
* A custom bridge annotation for the Clock Bridge. Unique so that we can
* trivially match against it in bridge extraction.
@ -80,12 +96,16 @@ class RationalClockBridge(val allClocks: Seq[RationalClock]) extends BlackBox wi
val clocks = Output(Vec(allClocks.size, Clock()))
})
val scaledPeriods = FindScaledPeriodGCD(allClocks.map { c => (c.multiplier, c.divisor) })
val minPeriod = scaledPeriods.min
val clockMFMRs = scaledPeriods.map { period => ((period + (minPeriod - 1)) / minPeriod).toInt }
// Generate the bridge annotation
annotate(new ChiselAnnotation { def toFirrtl = ClockBridgeAnnotation(outer.toTarget, allClocks) })
annotate(new ChiselAnnotation { def toFirrtl =
FAMEChannelConnectionAnnotation(
clockChannelName,
channelInfo = TargetClockChannel(allClocks),
channelInfo = TargetClockChannel(allClocks, clockMFMRs),
clock = None, // Clock channels do not have a reference clock
sinks = Some(io.clocks.map(_.toTarget)),
sources = None
@ -161,22 +181,6 @@ class ClockBridgeModule(clockInfo: Seq[RationalClock])(implicit p: Parameters)
genCRFile()
}
/**
* Finds a virtual fast-clock whose period is the GCD of the periods of all requested
* clocks, and returns the period of each requested clock as an integer multiple of that
* high-frequency virtual clock.
*/
object FindScaledPeriodGCD {
def apply(phaseRelationships: Seq[(Int, Int)]): Seq[BigInt] = {
val periodDivisors = phaseRelationships.unzip._1
val productOfDivisors = periodDivisors.foldLeft(BigInt(1))(_ * _)
val scaledMultipliers = phaseRelationships.map({ case (divisor, multiplier) => multiplier * productOfDivisors / divisor })
val gcdOfScaledPeriods = scaledMultipliers.reduce((a, b) => a.gcd(b))
val reducedPeriods = scaledMultipliers.map(_ / gcdOfScaledPeriods)
reducedPeriods
}
}
/**
* Generates an infinite clock token stream based on rational relationship of each clock.
* To improve simulator FMR, this module always produces non-zero clock tokens

View File

@ -28,7 +28,7 @@ class FAMEAnnotationSerialization extends AnyFlatSpec {
}
"ClockChannel FCCAs" should "serialize and deserialize correctly" in {
val clockInfo = TargetClockChannel(Seq(RationalClock("test",1,2)))
val clockInfo = TargetClockChannel(Seq(RationalClock("test",1,2)), Seq(1))
val anno = baseFCCA.copy(channelInfo = clockInfo)
val deserAnno = serializeAndDeserialize(anno)
assert(anno == deserAnno)