Emit multi-cycle clock constraints on target domains

This commit is contained in:
David Biancolin 2021-09-14 03:09:29 +00:00
parent a8c864b302
commit 1f015341c2
9 changed files with 103 additions and 34 deletions

View File

@ -241,7 +241,7 @@ class TargetBoxIO(config: SimWrapperConfig) extends ChannelizedWrapperIO(config)
} }
val clockElement: (String, DecoupledIO[Data]) = chAnnos.collectFirst({ val clockElement: (String, DecoupledIO[Data]) = chAnnos.collectFirst({
case ch @ FAMEChannelConnectionAnnotation(globalName, fame.TargetClockChannel(_), _, _, Some(sinks)) => case ch @ FAMEChannelConnectionAnnotation(globalName, fame.TargetClockChannel(_, _), _, _, Some(sinks)) =>
sinks.head.ref.stripSuffix("_bits") -> Flipped(Decoupled(regenClockType(sinks))) sinks.head.ref.stripSuffix("_bits") -> Flipped(Decoupled(regenClockType(sinks)))
}).get }).get
@ -262,7 +262,7 @@ class SimWrapperChannels(config: SimWrapperConfig) extends ChannelizedWrapperIO(
def regenClockType(refTargets: Seq[ReferenceTarget]): Vec[Bool] = Vec(refTargets.size, Bool()) def regenClockType(refTargets: Seq[ReferenceTarget]): Vec[Bool] = Vec(refTargets.size, Bool())
val clockElement: (String, DecoupledIO[Vec[Bool]]) = chAnnos.collectFirst({ val clockElement: (String, DecoupledIO[Vec[Bool]]) = chAnnos.collectFirst({
case ch @ FAMEChannelConnectionAnnotation(globalName, fame.TargetClockChannel(_), _, _, Some(sinks)) => case ch @ FAMEChannelConnectionAnnotation(globalName, fame.TargetClockChannel(_,_), _, _, Some(sinks)) =>
sinks.head.ref.stripSuffix("_bits") -> Flipped(Decoupled(regenClockType(sinks))) sinks.head.ref.stripSuffix("_bits") -> Flipped(Decoupled(regenClockType(sinks)))
}).get }).get
@ -436,7 +436,7 @@ class SimWrapper(val config: SimWrapperConfig)(implicit val p: Parameters) exten
channelGroups foreach { case (name, annos) => genPipeChannel(annos, name) } channelGroups foreach { case (name, annos) => genPipeChannel(annos, name) }
// Generate clock channels // Generate clock channels
val clockChannels = chAnnos.collect({case ch @ FAMEChannelConnectionAnnotation(_, fame.TargetClockChannel(_),_,_,_) => ch }) val clockChannels = chAnnos.collect({case ch @ FAMEChannelConnectionAnnotation(_, fame.TargetClockChannel(_,_),_,_,_) => ch })
require(clockChannels.size == 1) require(clockChannels.size == 1)
genClockChannel(clockChannels.head) genClockChannel(clockChannels.head)
} }

View File

@ -26,7 +26,7 @@ object ChannelClockInfoAnalysis extends Transform {
def outputForm = LowForm def outputForm = LowForm
def analyze(state: CircuitState): Map[String, RationalClock] = { def analyze(state: CircuitState): Map[String, RationalClock] = {
val clockChannels = state.annotations.collect { val clockChannels = state.annotations.collect {
case FAMEChannelConnectionAnnotation(_,TargetClockChannel(clocks),_,_,Some(clockRTs)) => case FAMEChannelConnectionAnnotation(_,TargetClockChannel(clocks,_),_,_,Some(clockRTs)) =>
clockRTs zip clocks clockRTs zip clocks
} }
require(clockChannels.size == 1, require(clockChannels.size == 1,

View File

@ -191,7 +191,7 @@ private[passes] object TriggerWiring extends firrtl.Transform {
// Step 6) Synchronize and aggregate local counts into global counts in the base clock domain // Step 6) Synchronize and aggregate local counts into global counts in the base clock domain
val refClockRT = wiredState.annotations.collectFirst({ val refClockRT = wiredState.annotations.collectFirst({
case FAMEChannelConnectionAnnotation(_,TargetClockChannel(_),_,_,Some(clock :: _)) => clock case FAMEChannelConnectionAnnotation(_,TargetClockChannel(_,_),_,_,Some(clock :: _)) => clock
}).get }).get
// We only need to use a single register to synchronize a signal in GG, we use two here // We only need to use a single register to synchronize a signal in GG, we use two here

View File

@ -152,8 +152,16 @@ case object DecoupledReverseChannel extends FAMEChannelInfo
/** /**
* Indicates that a channel connection carries target clocks * Indicates that a channel connection carries target clocks
*
* @param clockInfo The user-specified metadata, including a name and a ratio
* relative to the base clock
*
* @param perClockMFMR Specifies the minimum number of host cycles
* between clock edges. This is a property of the clock token schedule, and
* permits relaxing timing constraints on clock domains with miniumum FMRS (MFMR) > 1.
*
*/ */
case class TargetClockChannel(clockInfo: Seq[RationalClock]) extends FAMEChannelInfo case class TargetClockChannel(clockInfo: Seq[RationalClock], perClockMFMR: Seq[Int]) extends FAMEChannelInfo
/** /**
* Indicates that a channel connection is the forward (valid) half of * Indicates that a channel connection is the forward (valid) half of

View File

@ -15,6 +15,8 @@ import scala.collection.mutable
import mutable.{LinkedHashSet, LinkedHashMap} import mutable.{LinkedHashSet, LinkedHashMap}
import midas.passes._ import midas.passes._
import midas.targetutils.xdc.{XDCFiles, XDCAnnotation}
import midas.widgets.{RationalClock}
/************** /**************
PRECONDITIONS: PRECONDITIONS:
@ -70,10 +72,20 @@ trait FAME1DataChannel extends FAME1Channel with HasModelPort {
} }
} }
case class FAME1ClockChannel(name: String, ports: Seq[Port]) extends FAME1Channel with InputChannel with HasModelPort trait ClockChannel {
def clockInfo: Seq[RationalClock]
def clockMFMRs: Seq[Int]
lazy val clockMFMRMap: Map[RationalClock, Int] = clockInfo.zip(clockMFMRs).toMap
}
case class VirtualClockChannel(targetClock: Port) extends FAME1Channel with InputChannel {
case class FAME1ClockChannel(name: String, ports: Seq[Port], clockInfo: Seq[RationalClock], clockMFMRs: Seq[Int])
extends FAME1Channel with InputChannel with HasModelPort with ClockChannel
case class VirtualClockChannel(targetClock: Port) extends FAME1Channel with InputChannel with ClockChannel {
val name = "VirtualClockChannel" val name = "VirtualClockChannel"
val clockInfo = Seq(RationalClock("NonHubClock", 1,1))
val clockMFMRs = Seq(1)
val ports = Seq(targetClock) val ports = Seq(targetClock)
val isValid: Expression = UIntLiteral(1) val isValid: Expression = UIntLiteral(1)
def setReady(advanceCycle: Expression): Statement = EmptyStmt def setReady(advanceCycle: Expression): Statement = EmptyStmt
@ -107,7 +119,7 @@ case class FAME1OutputChannel(
// - Use to initialize isFired for all channels (with negation) // - Use to initialize isFired for all channels (with negation)
// - Finishing is gated with clock channel valid // - Finishing is gated with clock channel valid
object FAMEModuleTransformer { object FAMEModuleTransformer {
def apply(m: Module, analysis: FAMEChannelAnalysis): Module = { def apply(m: Module, analysis: FAMEChannelAnalysis, addedAnnos: mutable.ArrayBuffer[Annotation]): Module = {
// Step 0: Bookkeeping for port structure conventions // Step 0: Bookkeeping for port structure conventions
implicit val ns = Namespace(m) implicit val ns = Namespace(m)
val mTarget = ModuleTarget(analysis.circuit.main, m.name) val mTarget = ModuleTarget(analysis.circuit.main, m.name)
@ -132,10 +144,11 @@ object FAMEModuleTransformer {
} }
val clockChannel = analysis.modelInputChannelPortMap(mTarget).find(isClockChannel) match { val (currentModuleIsHub, clockChannel) = analysis.modelInputChannelPortMap(mTarget).find(isClockChannel) match {
case Some((name, (None, ports))) => FAME1ClockChannel(name, ports) case Some((name, (None, ports))) =>
(true, FAME1ClockChannel(name, ports, analysis.targetClockChInfo.clockInfo, analysis.targetClockChInfo.perClockMFMR))
case Some(_) => ??? // Clock channel cannot have an associated clock domain case Some(_) => ??? // Clock channel cannot have an associated clock domain
case None => VirtualClockChannel(clocks.head) // Virtual clock channel for single-clock models case None => (false, VirtualClockChannel(clocks.head)) // Virtual clock channel for single-clock models
} }
/* /*
@ -169,7 +182,7 @@ object FAMEModuleTransformer {
clockBuffer: SignalInfo) clockBuffer: SignalInfo)
// Multi-clock management step 4: Generate clock buffers for all target clocks // Multi-clock management step 4: Generate clock buffers for all target clocks
val clockMetadata: Seq[TargetClockMetadata] = clockChannel.ports.map { en => val clockMetadata: Seq[TargetClockMetadata] = clockChannel.ports.zip(clockChannel.clockInfo).map { case (en, info) =>
val enableReg = hostFlagReg(s"${en.name}_enabled") val enableReg = hostFlagReg(s"${en.name}_enabled")
val buf = WDefInstance(ns.newName(s"${en.name}_buffer"), DefineAbstractClockGate.blackbox.name) val buf = WDefInstance(ns.newName(s"${en.name}_buffer"), DefineAbstractClockGate.blackbox.name)
val clockFlag = DoPrim(PrimOps.AsUInt, Seq(clockChannel.replacePortRef(WRef(en))), Nil, BoolType) val clockFlag = DoPrim(PrimOps.AsUInt, Seq(clockChannel.replacePortRef(WRef(en))), Nil, BoolType)
@ -178,6 +191,44 @@ object FAMEModuleTransformer {
Connect(NoInfo, WSubField(WRef(buf), "I"), WRef(hostClock)), Connect(NoInfo, WSubField(WRef(buf), "I"), WRef(hostClock)),
Connect(NoInfo, WSubField(WRef(buf), "CE"), Connect(NoInfo, WSubField(WRef(buf), "CE"),
And.reduce(Seq(WRef(enableReg), WRef(finishing), nReset))))) And.reduce(Seq(WRef(enableReg), WRef(finishing), nReset)))))
// Add multicycle annotations
val clockName = info.name
val clockMFMR = clockChannel.clockMFMRMap(info)
val bufferOutputRT = mTarget.ref(buf.name).field("O")
if (currentModuleIsHub) {
// Leverage the MFMR hint provided by the clock bridge to relax the setup constraints on
// intra-domain paths. Do this only for the hub, since it it is the only multiclock model.
//
// Using this multicycle setup constraint is conservative, since all
// inter-clock paths must still close timing at single a host cycle of
// delay. If we instead defined these generated clocks by specifying them
// as _divisions_ of the host_clock, the timer may give more margin to a
// path spanning two slower clock domains, when in reality they must meet
// a single host-cycle constraint. For a counterexample, consider three
// clocks with periods of 2, 3, 4. This produces the following clock token schedule:
//
// token : 0 1 2 3 4 5
// ====================
// clk 2 : 1 1 0 1 1 1
// clk 3 : 1 0 1 0 1 0
// clk 4 : 1 0 0 1 0 1
// t_time : 0 2 3 4 6 8
//
// While the latter two clocks have MFMRs of 2, they sometimes fire in
// back-to-back host-cycles (tokens 2->3).
//
// Note: "host_clock" is defined statically in the shell XDC.
val xdcAnno = XDCAnnotation(
XDCFiles.Implementation,
s"""|create_generated_clock -name ${clockName} -source [get_pins -of [get_clocks host_clock]] [get_pins {}] -divide_by 1
|set_multicycle_path $clockMFMR -setup -from [get_clocks $clockName] -to [get_clocks $clockName]
|set_multicycle_path 1 -hold -from [get_clocks $clockName] -to [get_clocks $clockName]
|""".stripMargin,
bufferOutputRT)
addedAnnos += xdcAnno
}
TargetClockMetadata( TargetClockMetadata(
en, en,
WRef(enableReg), WRef(enableReg),
@ -405,10 +456,12 @@ class FAMETransform extends Transform {
// TODO: pick a value that does not collide // TODO: pick a value that does not collide
implicit val triggerName = "finishing" implicit val triggerName = "finishing"
val addedAnnos = mutable.ArrayBuffer[Annotation]()
val toTransform = analysis.transformedModules val toTransform = analysis.transformedModules
val transformedModules = c.modules.map { val transformedModules = c.modules.map {
case m: Module if (m.name == c.main) => transformTop(m, analysis) case m: Module if (m.name == c.main) => transformTop(m, analysis)
case m: Module if (toTransform.contains(ModuleTarget(c.main, m.name))) => FAMEModuleTransformer(m, analysis) case m: Module if (toTransform.contains(ModuleTarget(c.main, m.name))) => FAMEModuleTransformer(m, analysis, addedAnnos)
case m => m // TODO (Albert): revisit this; currently, not transforming nested modules case m => m // TODO (Albert): revisit this; currently, not transforming nested modules
} }
@ -418,6 +471,6 @@ class FAMETransform extends Transform {
} }
val newCircuit = c.copy(modules = transformedModules) val newCircuit = c.copy(modules = transformedModules)
CircuitState(newCircuit, outputForm, filteredAnnos, Some(hostDecouplingRenames(analysis))) CircuitState(newCircuit, outputForm, filteredAnnos ++ addedAnnos, Some(hostDecouplingRenames(analysis)))
} }
} }

View File

@ -240,6 +240,10 @@ private[fame] class FAMEChannelAnalysis(val state: CircuitState, val fameType: F
val hostClock = state.annotations.collect({ case FAMEHostClock(rt) => rt }).head val hostClock = state.annotations.collect({ case FAMEHostClock(rt) => rt }).head
val hostReset = state.annotations.collect({ case FAMEHostReset(rt) => rt }).head val hostReset = state.annotations.collect({ case FAMEHostReset(rt) => rt }).head
lazy val targetClockChInfo = state.annotations.collectFirst({
case FAMEChannelConnectionAnnotation(_,chInfo: TargetClockChannel,_,_,_) => chInfo
}).get
private def irPortFromGlobalTarget(mt: ModuleTarget)(rt: ReferenceTarget): Option[Port] = { private def irPortFromGlobalTarget(mt: ModuleTarget)(rt: ReferenceTarget): Option[Port] = {
val modelPort = topConnects(rt).pathlessTarget val modelPort = topConnects(rt).pathlessTarget

View File

@ -109,7 +109,7 @@ object FindDefaultClocks extends Transform {
// Find an arbitrary clock channel sink // Find an arbitrary clock channel sink
val refClock = state.annotations.collectFirst { val refClock = state.annotations.collectFirst {
case FAMEChannelConnectionAnnotation(_, TargetClockChannel(_), None, _, Some(sinks)) => sinks.head case FAMEChannelConnectionAnnotation(_, TargetClockChannel(_,_), None, _, Some(sinks)) => sinks.head
} }
// Get the wrapper top port reference it points to // Get the wrapper top port reference it points to

View File

@ -39,6 +39,22 @@ sealed trait ClockBridgeConsts {
val clockChannelName = "clocks" val clockChannelName = "clocks"
} }
/**
* Finds a virtual fast-clock whose period is the GCD of the periods of all requested
* clocks, and returns the period of each requested clock as an integer multiple of that
* high-frequency virtual clock.
*/
object FindScaledPeriodGCD {
def apply(phaseRelationships: Seq[(Int, Int)]): Seq[BigInt] = {
val periodDivisors = phaseRelationships.unzip._1
val productOfDivisors = periodDivisors.foldLeft(BigInt(1))(_ * _)
val scaledMultipliers = phaseRelationships.map({ case (divisor, multiplier) => multiplier * productOfDivisors / divisor })
val gcdOfScaledPeriods = scaledMultipliers.reduce((a, b) => a.gcd(b))
val reducedPeriods = scaledMultipliers.map(_ / gcdOfScaledPeriods)
reducedPeriods
}
}
/** /**
* A custom bridge annotation for the Clock Bridge. Unique so that we can * A custom bridge annotation for the Clock Bridge. Unique so that we can
* trivially match against it in bridge extraction. * trivially match against it in bridge extraction.
@ -80,12 +96,16 @@ class RationalClockBridge(val allClocks: Seq[RationalClock]) extends BlackBox wi
val clocks = Output(Vec(allClocks.size, Clock())) val clocks = Output(Vec(allClocks.size, Clock()))
}) })
val scaledPeriods = FindScaledPeriodGCD(allClocks.map { c => (c.multiplier, c.divisor) })
val minPeriod = scaledPeriods.min
val clockMFMRs = scaledPeriods.map { period => ((period + (minPeriod - 1)) / minPeriod).toInt }
// Generate the bridge annotation // Generate the bridge annotation
annotate(new ChiselAnnotation { def toFirrtl = ClockBridgeAnnotation(outer.toTarget, allClocks) }) annotate(new ChiselAnnotation { def toFirrtl = ClockBridgeAnnotation(outer.toTarget, allClocks) })
annotate(new ChiselAnnotation { def toFirrtl = annotate(new ChiselAnnotation { def toFirrtl =
FAMEChannelConnectionAnnotation( FAMEChannelConnectionAnnotation(
clockChannelName, clockChannelName,
channelInfo = TargetClockChannel(allClocks), channelInfo = TargetClockChannel(allClocks, clockMFMRs),
clock = None, // Clock channels do not have a reference clock clock = None, // Clock channels do not have a reference clock
sinks = Some(io.clocks.map(_.toTarget)), sinks = Some(io.clocks.map(_.toTarget)),
sources = None sources = None
@ -161,22 +181,6 @@ class ClockBridgeModule(clockInfo: Seq[RationalClock])(implicit p: Parameters)
genCRFile() genCRFile()
} }
/**
* Finds a virtual fast-clock whose period is the GCD of the periods of all requested
* clocks, and returns the period of each requested clock as an integer multiple of that
* high-frequency virtual clock.
*/
object FindScaledPeriodGCD {
def apply(phaseRelationships: Seq[(Int, Int)]): Seq[BigInt] = {
val periodDivisors = phaseRelationships.unzip._1
val productOfDivisors = periodDivisors.foldLeft(BigInt(1))(_ * _)
val scaledMultipliers = phaseRelationships.map({ case (divisor, multiplier) => multiplier * productOfDivisors / divisor })
val gcdOfScaledPeriods = scaledMultipliers.reduce((a, b) => a.gcd(b))
val reducedPeriods = scaledMultipliers.map(_ / gcdOfScaledPeriods)
reducedPeriods
}
}
/** /**
* Generates an infinite clock token stream based on rational relationship of each clock. * Generates an infinite clock token stream based on rational relationship of each clock.
* To improve simulator FMR, this module always produces non-zero clock tokens * To improve simulator FMR, this module always produces non-zero clock tokens

View File

@ -28,7 +28,7 @@ class FAMEAnnotationSerialization extends AnyFlatSpec {
} }
"ClockChannel FCCAs" should "serialize and deserialize correctly" in { "ClockChannel FCCAs" should "serialize and deserialize correctly" in {
val clockInfo = TargetClockChannel(Seq(RationalClock("test",1,2))) val clockInfo = TargetClockChannel(Seq(RationalClock("test",1,2)), Seq(1))
val anno = baseFCCA.copy(channelInfo = clockInfo) val anno = baseFCCA.copy(channelInfo = clockInfo)
val deserAnno = serializeAndDeserialize(anno) val deserAnno = serializeAndDeserialize(anno)
assert(anno == deserAnno) assert(anno == deserAnno)