From ce293ee4c54a8c6bec4482d0211dfbea91f7a192 Mon Sep 17 00:00:00 2001 From: Hanchen Ye Date: Wed, 23 Sep 2020 18:23:43 -0500 Subject: [PATCH] [QoREstimation] design parameters data structure; initial impl of longer path search --- include/Dialect/HLSCpp/Attributes.td | 15 --- lib/Transforms/PragmaDSE.cpp | 2 + lib/Transforms/QoREstimation.cpp | 178 +++++++++++++++++++-------- 3 files changed, 130 insertions(+), 65 deletions(-) diff --git a/include/Dialect/HLSCpp/Attributes.td b/include/Dialect/HLSCpp/Attributes.td index 5bb4607..720ff1c 100644 --- a/include/Dialect/HLSCpp/Attributes.td +++ b/include/Dialect/HLSCpp/Attributes.td @@ -50,19 +50,4 @@ def LegalPartitionType : AttrConstraint {} -//===----------------------------------------------------------------------===// -// Pragma bind_op Constraints -//===----------------------------------------------------------------------===// - -def LegalOpImpl : AttrConstraint().getValue() == \"dsp\"">, - CPred<"$_self.cast().getValue() == \"fabric\"">, - CPred<"$_self.cast().getValue() == \"meddsp\"">, - CPred<"$_self.cast().getValue() == \"fulldsp\"">, - CPred<"$_self.cast().getValue() == \"maxdsp\"">, - CPred<"$_self.cast().getValue() == \"primitivedsp\""> -]>>; - -def OpImplAttr : Confined {} - #endif // SCALEHLS_DIALECT_HLSCPP_ATTRIBUTES_TD diff --git a/lib/Transforms/PragmaDSE.cpp b/lib/Transforms/PragmaDSE.cpp index 0d4e706..c2c4467 100644 --- a/lib/Transforms/PragmaDSE.cpp +++ b/lib/Transforms/PragmaDSE.cpp @@ -6,8 +6,10 @@ #include "Transforms/INIReader.h" #include "Transforms/Passes.h" +using namespace std; using namespace mlir; using namespace scalehls; +using namespace hlscpp; namespace { struct PragmaDSE : public PragmaDSEBase { diff --git a/lib/Transforms/QoREstimation.cpp b/lib/Transforms/QoREstimation.cpp index 9128e8b..16dacfc 100644 --- a/lib/Transforms/QoREstimation.cpp +++ b/lib/Transforms/QoREstimation.cpp @@ -6,66 +6,153 @@ #include "Transforms/INIReader.h" #include "Transforms/Passes.h" +using namespace std; using namespace mlir; using namespace scalehls; +using namespace hlscpp; + +/// This class includes all possible parameters kind for "processes" (function, +/// for/parallel loop, and if). +enum class ProcParam { + // Pragam configurations. + EnablePipeline, + InitialInterval, + UnrollFactor, + + // Performance parameters. + LoopBound, + IterLatency, + Latency, + + // Resource parameters. + LUT, + DSP, + BRAM +}; + +/// This class includes all possible parameters kind for memories (memref, +/// tensor, and vector). +enum class MemParam { + // Pragma configurations. + StorageType, + StorageImpl, + PartitionType, + PartitionFactor, + InterfaceMode, + + // Performance parameters. + ReadNum, + WriteNum, + ReadPorts, + WritePorts, + DepdcyLatency, + DepdcyDistance, + + // Resource parameters. + LUT, + BRAM +}; -/* namespace { class QoREstimator { public: - explicit QoREstimator(std::string toolConfigPath, std::string opLatencyPath) { - INIReader toolConfig(toolConfigPath); - if (toolConfig.ParseError()) - llvm::outs() << "error: Tool configuration file parse fail.\n"; + explicit QoREstimator(std::string targetSpecPath, std::string opLatencyPath); - INIReader opLatency(opLatencyPath); - if (opLatency.ParseError()) - llvm::outs() << "error: Op latency file parse fail.\n"; - - auto freq = toolConfig.Get("config", "frequency", "200MHz"); - auto latency = opLatency.GetInteger(freq, "op", 0); - llvm::outs() << latency << "\n"; + /// Get parameters. + unsigned getMemParam(Value *mem, MemParam kind) { + return memParams[mem][(unsigned)kind]; + } + unsigned getProcParam(Operation *proc, ProcParam kind) { + return procParams[proc][(unsigned)kind]; } - void estimateLoop(AffineForOp loop); + /// These methods can extract static parameters and pragma configurations (if + /// applicable) of the input CDFG, and update them in procParams or memParams. + void analyzePragma(ModuleOp module); + void analyzeModule(ModuleOp module); + + /// These methods can estimate the performance and resource utilization of a + /// specific MLIR structure, and update them in procParams or memroyParams. + void estimateAffineFor(AffineForOp affineFor); + void estimateAffineParallel(AffineParallelOp affineParallel); + void estimateAffineIf(AffineIfOp affineIf); void estimateFunc(FuncOp func); void estimateModule(ModuleOp module); private: + DenseMap> procParams; + DenseMap> memParams; + + // Set parameters. + void setMemParam(Value *mem, unsigned kind, unsigned param) { + memParams[mem][(unsigned)kind] = param; + } + void setProcParam(Operation *proc, MemParam kind, unsigned param) { + procParams[proc][(unsigned)kind] = param; + } }; } // namespace +/// Estimator constructor. +QoREstimator::QoREstimator(std::string targetSpecPath, + std::string opLatencyPath) { + INIReader targetSpec(targetSpecPath); + if (targetSpec.ParseError()) + llvm::outs() << "error: target spec file parse fail, please refer to " + "--help option and pass in correct file path\n"; + + INIReader opLatency(opLatencyPath); + if (opLatency.ParseError()) + llvm::outs() << "error: Op latency file parse fail, please refer to " + "--help option and pass in correct file path\n"; + + auto freq = targetSpec.Get("config", "frequency", "200MHz"); + auto latency = opLatency.GetInteger(freq, "op", 0); + llvm::outs() << latency << "\n"; +} + +/// This method will search the longest path in a DAG block using a ASAP (As +/// Soon As Possible) manner. Loop, function, if, and other operation owning +/// regions will be considered as a whole. +unsigned searchLongestPath(Block &block) { + DenseMap valueReadyTime; + unsigned blockReadyTime = 0; + for (auto &op : block) { + + // Calculate ready time of all predecessors. + unsigned allPredsReadyTime = 0; + for (auto operand : op.getOperands()) { + if (operand.getKind() == Value::Kind::BlockArgument) + continue; + else if (operand.getParentBlock() != &block) + continue; + else + allPredsReadyTime = max(allPredsReadyTime, valueReadyTime[operand]); + } + + // Calculate ready time of the current operation. + unsigned opReadyTime = allPredsReadyTime + 1; + for (auto result : op.getResults()) + valueReadyTime[result] = opReadyTime; + + // Update block ready time. + blockReadyTime = max(blockReadyTime, opReadyTime); + } + return blockReadyTime; +} + /// For now, estimation for unrolled loops are following the analytical model /// of COMBA, which is suspected to be wrong. Meanwhile, we assume the absence /// of function call in the loop body. -void QoREstimator::estimateLoop(AffineForOp loop) { - auto &body = loop.getLoopBody(); +void QoREstimator::estimateAffineFor(AffineForOp affineFor) { + auto &body = affineFor.getLoopBody(); if (body.getBlocks().size() != 1) - loop.emitError("has zero or more than one basic blocks."); + affineFor.emitError("has zero or more than one basic blocks."); - auto paramOp = dyn_cast(body.front().front()); - if (!paramOp) { - loop.emitError("doesn't have parameter operations as front."); - return; - } - - // TODO: a simple AEAP scheduling. - unsigned iterLatency = paramOp.getNonprocLatency(); for (auto &op : body.front()) { - if (auto subLoop = dyn_cast(op)) { - estimateLoop(subLoop); - auto subParamOp = - dyn_cast(subLoop.getLoopBody().front().front()); - iterLatency += subParamOp.getLatency(); - } + if (auto subAffineFor = dyn_cast(op)) + estimateAffineFor(subAffineFor); } - - unsigned latency = iterLatency; - // When loop is not completely unrolled. - if (paramOp.getLoopBound() > 1) - latency = iterLatency * paramOp.getLoopBound() * paramOp.getUnrollFactor(); - auto builder = Builder(paramOp.getContext()); - paramOp.setAttr("latency", builder.getUI32IntegerAttr(latency)); } /// For now, function pipelining and task-level dataflow optimizations are not @@ -74,25 +161,18 @@ void QoREstimator::estimateFunc(FuncOp func) { if (func.getBlocks().size() != 1) func.emitError("has zero or more than one basic blocks."); - auto paramOp = dyn_cast(func.front().front()); - if (!paramOp) { - func.emitError("doesn't have parameter operations as front."); - return; - } - // Recursively estimate latency of sub-elements, including functions and // loops. These sub-elements will be considered as a normal node in the CDFG // for function latency estimzation. for (auto &op : func.front()) { if (auto subFunc = dyn_cast(op)) estimateFunc(subFunc); - else if (auto subLoop = dyn_cast(op)) - estimateLoop(subLoop); + else if (auto subAffineFor = dyn_cast(op)) + estimateAffineFor(subAffineFor); } // Estimate function latency. - for (auto &op : func.front()) { - } + llvm::outs() << searchLongestPath(func.front()) << "\n"; } void QoREstimator::estimateModule(ModuleOp module) { @@ -104,12 +184,10 @@ void QoREstimator::estimateModule(ModuleOp module) { } } -*/ - namespace { struct QoREstimation : public QoREstimationBase { void runOnOperation() override { - // QoREstimator(toolConfig, opLatency).estimateModule(getOperation()); + QoREstimator(targetSpec, opLatency).estimateModule(getOperation()); } }; } // namespace