From 21968283d19b75ccc9a3ebd0aa16a8c47c4d11f6 Mon Sep 17 00:00:00 2001 From: Hanchen Ye Date: Thu, 24 Sep 2020 00:30:03 -0500 Subject: [PATCH] [QoREstimation] split out QoREstimation.h, StaticParam.h, and Visitor.h; refine QoREstimation code structure with new created classes; a relative completed searchLongestPath implementation --- include/Transforms/QoREstimation.h | 78 ++++++++ include/Transforms/StaticParam.h | 94 ++++++++++ include/Visitor.h | 187 +++++++++++++++++++ lib/EmitHLSCpp/EmitHLSCpp.cpp | 176 +---------------- lib/Transforms/PragmaDSE.cpp | 1 + lib/Transforms/QoREstimation.cpp | 270 ++++++++++++++------------- test/Dialect/HLSCpp/test_pragma.mlir | 2 +- 7 files changed, 505 insertions(+), 303 deletions(-) create mode 100644 include/Transforms/QoREstimation.h create mode 100644 include/Transforms/StaticParam.h create mode 100644 include/Visitor.h diff --git a/include/Transforms/QoREstimation.h b/include/Transforms/QoREstimation.h new file mode 100644 index 0000000..b9e0d29 --- /dev/null +++ b/include/Transforms/QoREstimation.h @@ -0,0 +1,78 @@ +//===------------------------------------------------------------*- C++ -*-===// +// +//===----------------------------------------------------------------------===// + +#ifndef SCALEHLS_TRANSFORMS_QORESTIMATION_H +#define SCALEHLS_TRANSFORMS_QORESTIMATION_H + +#include "StaticParam.h" +#include "Visitor.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Pass/Pass.h" +#include "llvm/ADT/TypeSwitch.h" + +namespace mlir { +namespace scalehls { + +//===----------------------------------------------------------------------===// +// HLSCppAnalyzer Class Declaration +//===----------------------------------------------------------------------===// + +class HLSCppAnalyzer : public HLSCppVisitorBase { +public: + explicit HLSCppAnalyzer(ProcParam &procParam, MemParam &memParam) + : procParam(procParam), memParam(memParam) {} + + ProcParam &procParam; + MemParam &memParam; + + bool visitUnhandledOp(Operation *op) { return true; } + + using HLSCppVisitorBase::visitOp; + bool visitOp(AffineForOp op); + bool visitOp(AffineParallelOp op); + bool visitOp(AffineIfOp op); + + void analyzeOperation(Operation *op); + void analyzeFunc(FuncOp func); + void analyzeBlock(Block &block); + void analyzeModule(ModuleOp module); +}; + +//===----------------------------------------------------------------------===// +// QoREstimator Class Declaration +//===----------------------------------------------------------------------===// + +class QoREstimator : public HLSCppVisitorBase { +public: + explicit QoREstimator(ProcParam &procParam, MemParam &memParam, + std::string targetSpecPath, std::string opLatencyPath); + + ProcParam &procParam; + MemParam &memParam; + + bool visitUnhandledOp(Operation *op) { return true; } + + using HLSCppVisitorBase::visitOp; + /// These methods can estimate the performance and resource utilization of a + /// specific MLIR structure, and update them in procParams or memroyParams. + bool visitOp(AffineForOp op); + bool visitOp(AffineParallelOp op); + bool visitOp(AffineIfOp op); + + /// These methods are used for searching longest path in a DAG. + void updateValueTimeStamp(Operation *currentOp, unsigned opTimeStamp, + DenseMap &valueTimeStampMap); + unsigned searchLongestPath(Block &block); + + /// MLIR component estimators. + void estimateOperation(Operation *op); + void estimateFunc(FuncOp func); + void estimateBlock(Block &block); + void estimateModule(ModuleOp module); +}; + +} // namespace scalehls +} // namespace mlir + +#endif // SCALEHLS_TRANSFORMS_QORESTIMATION_H diff --git a/include/Transforms/StaticParam.h b/include/Transforms/StaticParam.h new file mode 100644 index 0000000..2172e98 --- /dev/null +++ b/include/Transforms/StaticParam.h @@ -0,0 +1,94 @@ +//===------------------------------------------------------------*- C++ -*-===// +// +//===----------------------------------------------------------------------===// + +#ifndef SCALEHLS_TRANSFORMS_STATICPARAM_H +#define SCALEHLS_TRANSFORMS_STATICPARAM_H + +#include "mlir/IR/Operation.h" +#include "mlir/IR/Value.h" + +namespace mlir { +namespace scalehls { + +//===----------------------------------------------------------------------===// +// ParamBase class +//===----------------------------------------------------------------------===// + +template class ParamBase { +public: + void init(KeyType key) { + for (unsigned i = 0, e = (unsigned)ParamKind::KindNum; i < e; ++i) + Params[key].push_back(0); + } + + unsigned get(KeyType key, ParamKind kind) { + return Params[key][(unsigned)kind]; + } + + void set(KeyType key, ParamKind kind, unsigned param) { + Params[key][(unsigned)kind] = param; + } + +private: + DenseMap> Params; +}; + +//===----------------------------------------------------------------------===// +// ProcParam and MemParam classes +//===----------------------------------------------------------------------===// + +enum class ProcParamKind { + // Process-related pragam configurations. + EnablePipeline, + InitialInterval, + UnrollFactor, + + // Performance parameters. + LoopBound, + IterLatency, + Latency, + + // Resource parameters. + LUT, + BRAM, + DSP, + + KindNum = DSP + 1 +}; + +enum class MemParamKind { + // Pragma configurations. + StorageType, + StorageImpl, + PartitionType, + PartitionFactor, + InterfaceMode, + + // Performance parameters. + ReadNum, + WriteNum, + ReadPorts, + WritePorts, + DepdcyLatency, + DepdcyDistance, + + // Resource parameters. + LUT, + BRAM, + + KindNum = BRAM + 1 +}; + +/// This class includes all possible parameters kind for "processes" (function, +/// for/parallel loop, and if). +class ProcParam : public ParamBase {}; + +/// This class includes all possible parameters kind for memories (memref, +/// tensor, and vector). +class MemParam : public ParamBase {}; + +} // namespace scalehls +} // namespace mlir + +#endif // SCALEHLS_TRANSFORMS_STATICPARAM_H diff --git a/include/Visitor.h b/include/Visitor.h new file mode 100644 index 0000000..ae639a5 --- /dev/null +++ b/include/Visitor.h @@ -0,0 +1,187 @@ +//===------------------------------------------------------------*- C++ -*-===// +// +//===----------------------------------------------------------------------===// + +#ifndef SCALEHLS_VISITOR_H +#define SCALEHLS_VISITOR_H + +#include "Dialect/HLSCpp/HLSCpp.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" + +namespace mlir { +namespace scalehls { + +using namespace hlscpp; + +/// This class is a visitor for SSACFG operation nodes. +template +class HLSCppVisitorBase { +public: + ResultType dispatchVisitor(Operation *op, ExtraArgs... args) { + auto *thisCast = static_cast(this); + return TypeSwitch(op) + .template Case< + // Affine statements. + AffineForOp, AffineIfOp, AffineParallelOp, AffineApplyOp, + AffineMaxOp, AffineMinOp, AffineLoadOp, AffineStoreOp, + AffineYieldOp, AffineVectorLoadOp, AffineVectorStoreOp, + AffineDmaStartOp, AffineDmaWaitOp, + // Memref-related statements. + AllocOp, AllocaOp, LoadOp, StoreOp, DeallocOp, DmaStartOp, + DmaWaitOp, AtomicRMWOp, GenericAtomicRMWOp, AtomicYieldOp, + MemRefCastOp, ViewOp, SubViewOp, + // Tensor-related statements. + TensorLoadOp, TensorStoreOp, ExtractElementOp, TensorFromElementsOp, + SplatOp, TensorCastOp, DimOp, RankOp, + // Unary expressions. + AbsFOp, CeilFOp, NegFOp, CosOp, SinOp, TanhOp, SqrtOp, RsqrtOp, + ExpOp, Exp2Op, LogOp, Log2Op, Log10Op, + // Float binary expressions. + CmpFOp, AddFOp, SubFOp, MulFOp, DivFOp, RemFOp, + // Integer binary expressions. + CmpIOp, AddIOp, SubIOp, MulIOp, SignedDivIOp, SignedRemIOp, + UnsignedDivIOp, UnsignedRemIOp, XOrOp, AndOp, OrOp, ShiftLeftOp, + SignedShiftRightOp, UnsignedShiftRightOp, + // Complex expressions. + AddCFOp, SubCFOp, ImOp, ReOp, CreateComplexOp, + // Special operations. + SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp, + SignExtendIOp, IndexCastOp, CallOp, ReturnOp, AssignOp, EndOp, + // Pragma operations. + ApplyPragmasOp, PragmaPipelineOp, PragmaUnrollOp, + PragmaArrayPartitionOp>([&](auto opNode) -> ResultType { + return thisCast->visitOp(opNode, args...); + }) + .Default([&](auto opNode) -> ResultType { + return thisCast->visitInvalidOp(op, args...); + }); + } + + /// This callback is invoked on any invalid operations. + ResultType visitInvalidOp(Operation *op, ExtraArgs... args) { + op->emitOpError("is unsupported operation."); + abort(); + } + + /// This callback is invoked on any operations that are not handled by the + /// concrete visitor. + ResultType visitUnhandledOp(Operation *op, ExtraArgs... args) { + return ResultType(); + } + +#define HANDLE(OPTYPE) \ + ResultType visitOp(OPTYPE op, ExtraArgs... args) { \ + return static_cast(this)->visitUnhandledOp(op, args...); \ + } + + // Affine statements. + HANDLE(AffineForOp); + HANDLE(AffineIfOp); + HANDLE(AffineParallelOp); + HANDLE(AffineApplyOp); + HANDLE(AffineMaxOp); + HANDLE(AffineMinOp); + HANDLE(AffineLoadOp); + HANDLE(AffineStoreOp); + HANDLE(AffineYieldOp); + HANDLE(AffineVectorLoadOp); + HANDLE(AffineVectorStoreOp); + HANDLE(AffineDmaStartOp); + HANDLE(AffineDmaWaitOp); + + // Memref-related statements. + HANDLE(AllocOp); + HANDLE(AllocaOp); + HANDLE(LoadOp); + HANDLE(StoreOp); + HANDLE(DeallocOp); + HANDLE(DmaStartOp); + HANDLE(DmaWaitOp); + HANDLE(AtomicRMWOp); + HANDLE(GenericAtomicRMWOp); + HANDLE(AtomicYieldOp); + HANDLE(MemRefCastOp); + HANDLE(ViewOp); + HANDLE(SubViewOp); + + // Tensor-related statements. + HANDLE(TensorLoadOp); + HANDLE(TensorStoreOp); + HANDLE(ExtractElementOp); + HANDLE(TensorFromElementsOp); + HANDLE(SplatOp); + HANDLE(TensorCastOp); + HANDLE(DimOp); + HANDLE(RankOp); + + // Unary expressions. + HANDLE(AbsFOp); + HANDLE(CeilFOp); + HANDLE(NegFOp); + HANDLE(CosOp); + HANDLE(SinOp); + HANDLE(TanhOp); + HANDLE(SqrtOp); + HANDLE(RsqrtOp); + HANDLE(ExpOp); + HANDLE(Exp2Op); + HANDLE(LogOp); + HANDLE(Log2Op); + HANDLE(Log10Op); + + // Float binary expressions. + HANDLE(CmpFOp); + HANDLE(AddFOp); + HANDLE(SubFOp); + HANDLE(MulFOp); + HANDLE(DivFOp); + HANDLE(RemFOp); + + // Integer binary expressions. + HANDLE(CmpIOp); + HANDLE(AddIOp); + HANDLE(SubIOp); + HANDLE(MulIOp); + HANDLE(SignedDivIOp); + HANDLE(SignedRemIOp); + HANDLE(UnsignedDivIOp); + HANDLE(UnsignedRemIOp); + HANDLE(XOrOp); + HANDLE(AndOp); + HANDLE(OrOp); + HANDLE(ShiftLeftOp); + HANDLE(SignedShiftRightOp); + HANDLE(UnsignedShiftRightOp); + + // Complex expressions. + HANDLE(AddCFOp); + HANDLE(SubCFOp); + HANDLE(ImOp); + HANDLE(ReOp); + HANDLE(CreateComplexOp); + + // Special operations. + HANDLE(SelectOp); + HANDLE(ConstantOp); + HANDLE(CopySignOp); + HANDLE(TruncateIOp); + HANDLE(ZeroExtendIOp); + HANDLE(SignExtendIOp); + HANDLE(IndexCastOp); + HANDLE(CallOp); + HANDLE(ReturnOp); + HANDLE(AssignOp); + HANDLE(EndOp); + + // Pragma operations. + HANDLE(ApplyPragmasOp); + HANDLE(PragmaPipelineOp); + HANDLE(PragmaUnrollOp); + HANDLE(PragmaArrayPartitionOp); +#undef HANDLE +}; +} // namespace scalehls +} // namespace mlir + +#endif // SCALEHLS_VISITOR_H \ No newline at end of file diff --git a/lib/EmitHLSCpp/EmitHLSCpp.cpp b/lib/EmitHLSCpp/EmitHLSCpp.cpp index 70f192c..e4d887f 100644 --- a/lib/EmitHLSCpp/EmitHLSCpp.cpp +++ b/lib/EmitHLSCpp/EmitHLSCpp.cpp @@ -4,6 +4,7 @@ #include "EmitHLSCpp.h" #include "Dialect/HLSCpp/HLSCpp.h" +#include "Visitor.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Dialect/SCF/SCF.h" @@ -26,9 +27,6 @@ using namespace hlscpp; //===----------------------------------------------------------------------===// // Some Base Classes -// -// These classes should be factored out, and can be inherited by emitters -// targeting various backends (e.g., Xilinx Vivado HLS, Intel FPGAs, etc.). //===----------------------------------------------------------------------===// namespace { @@ -130,176 +128,6 @@ SmallString<8> HLSCppEmitterBase::getName(Value val) { return state.nameTable[val]; } -namespace { -/// This class is a visitor for SSACFG operation nodes. -template -class HLSCppVisitorBase { -public: - ResultType dispatchVisitor(Operation *op, ExtraArgs... args) { - auto *thisCast = static_cast(this); - return TypeSwitch(op) - .template Case< - // Affine statements. - AffineForOp, AffineIfOp, AffineParallelOp, AffineApplyOp, - AffineMaxOp, AffineMinOp, AffineLoadOp, AffineStoreOp, - AffineYieldOp, AffineVectorLoadOp, AffineVectorStoreOp, - AffineDmaStartOp, AffineDmaWaitOp, - // Memref-related statements. - AllocOp, AllocaOp, LoadOp, StoreOp, DeallocOp, DmaStartOp, - DmaWaitOp, AtomicRMWOp, GenericAtomicRMWOp, AtomicYieldOp, - MemRefCastOp, ViewOp, SubViewOp, - // Tensor-related statements. - TensorLoadOp, TensorStoreOp, ExtractElementOp, TensorFromElementsOp, - SplatOp, TensorCastOp, DimOp, RankOp, - // Unary expressions. - AbsFOp, CeilFOp, NegFOp, CosOp, SinOp, TanhOp, SqrtOp, RsqrtOp, - ExpOp, Exp2Op, LogOp, Log2Op, Log10Op, - // Float binary expressions. - CmpFOp, AddFOp, SubFOp, MulFOp, DivFOp, RemFOp, - // Integer binary expressions. - CmpIOp, AddIOp, SubIOp, MulIOp, SignedDivIOp, SignedRemIOp, - UnsignedDivIOp, UnsignedRemIOp, XOrOp, AndOp, OrOp, ShiftLeftOp, - SignedShiftRightOp, UnsignedShiftRightOp, - // Complex expressions. - AddCFOp, SubCFOp, ImOp, ReOp, CreateComplexOp, - // Special operations. - SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp, - SignExtendIOp, IndexCastOp, CallOp, ReturnOp, AssignOp, EndOp, - // Pragma operations. - ApplyPragmasOp, PragmaPipelineOp, PragmaUnrollOp, - PragmaArrayPartitionOp>([&](auto opNode) -> ResultType { - return thisCast->visitOp(opNode, args...); - }) - .Default([&](auto opNode) -> ResultType { - return thisCast->visitInvalidOp(op, args...); - }); - } - - /// This callback is invoked on any invalid operations. - ResultType visitInvalidOp(Operation *op, ExtraArgs... args) { - op->emitOpError("is unsupported operation."); - abort(); - } - - /// This callback is invoked on any operations that are not handled by the - /// concrete visitor. - ResultType visitUnhandledOp(Operation *op, ExtraArgs... args) { - return ResultType(); - } - -#define HANDLE(OPTYPE) \ - ResultType visitOp(OPTYPE op, ExtraArgs... args) { \ - return static_cast(this)->visitUnhandledOp(op, args...); \ - } - - // Affine statements. - HANDLE(AffineForOp); - HANDLE(AffineIfOp); - HANDLE(AffineParallelOp); - HANDLE(AffineApplyOp); - HANDLE(AffineMaxOp); - HANDLE(AffineMinOp); - HANDLE(AffineLoadOp); - HANDLE(AffineStoreOp); - HANDLE(AffineYieldOp); - HANDLE(AffineVectorLoadOp); - HANDLE(AffineVectorStoreOp); - HANDLE(AffineDmaStartOp); - HANDLE(AffineDmaWaitOp); - - // Memref-related statements. - HANDLE(AllocOp); - HANDLE(AllocaOp); - HANDLE(LoadOp); - HANDLE(StoreOp); - HANDLE(DeallocOp); - HANDLE(DmaStartOp); - HANDLE(DmaWaitOp); - HANDLE(AtomicRMWOp); - HANDLE(GenericAtomicRMWOp); - HANDLE(AtomicYieldOp); - HANDLE(MemRefCastOp); - HANDLE(ViewOp); - HANDLE(SubViewOp); - - // Tensor-related statements. - HANDLE(TensorLoadOp); - HANDLE(TensorStoreOp); - HANDLE(ExtractElementOp); - HANDLE(TensorFromElementsOp); - HANDLE(SplatOp); - HANDLE(TensorCastOp); - HANDLE(DimOp); - HANDLE(RankOp); - - // Unary expressions. - HANDLE(AbsFOp); - HANDLE(CeilFOp); - HANDLE(NegFOp); - HANDLE(CosOp); - HANDLE(SinOp); - HANDLE(TanhOp); - HANDLE(SqrtOp); - HANDLE(RsqrtOp); - HANDLE(ExpOp); - HANDLE(Exp2Op); - HANDLE(LogOp); - HANDLE(Log2Op); - HANDLE(Log10Op); - - // Float binary expressions. - HANDLE(CmpFOp); - HANDLE(AddFOp); - HANDLE(SubFOp); - HANDLE(MulFOp); - HANDLE(DivFOp); - HANDLE(RemFOp); - - // Integer binary expressions. - HANDLE(CmpIOp); - HANDLE(AddIOp); - HANDLE(SubIOp); - HANDLE(MulIOp); - HANDLE(SignedDivIOp); - HANDLE(SignedRemIOp); - HANDLE(UnsignedDivIOp); - HANDLE(UnsignedRemIOp); - HANDLE(XOrOp); - HANDLE(AndOp); - HANDLE(OrOp); - HANDLE(ShiftLeftOp); - HANDLE(SignedShiftRightOp); - HANDLE(UnsignedShiftRightOp); - - // Complex expressions. - HANDLE(AddCFOp); - HANDLE(SubCFOp); - HANDLE(ImOp); - HANDLE(ReOp); - HANDLE(CreateComplexOp); - - // Special operations. - HANDLE(SelectOp); - HANDLE(ConstantOp); - HANDLE(CopySignOp); - HANDLE(TruncateIOp); - HANDLE(ZeroExtendIOp); - HANDLE(SignExtendIOp); - HANDLE(IndexCastOp); - HANDLE(CallOp); - HANDLE(ReturnOp); - HANDLE(AssignOp); - HANDLE(EndOp); - - // Pragma operations. - HANDLE(ApplyPragmasOp); - HANDLE(PragmaPipelineOp); - HANDLE(PragmaUnrollOp); - HANDLE(PragmaArrayPartitionOp); -#undef HANDLE -}; -} // namespace - //===----------------------------------------------------------------------===// // ModuleEmitter Class Declaration //===----------------------------------------------------------------------===// @@ -655,7 +483,7 @@ private: } // namespace //===----------------------------------------------------------------------===// -// ModuleEmitter Class Implementation +// ModuleEmitter Class Definition //===----------------------------------------------------------------------===// /// Affine statement emitters. diff --git a/lib/Transforms/PragmaDSE.cpp b/lib/Transforms/PragmaDSE.cpp index c2c4467..c350138 100644 --- a/lib/Transforms/PragmaDSE.cpp +++ b/lib/Transforms/PragmaDSE.cpp @@ -5,6 +5,7 @@ #include "Dialect/HLSCpp/HLSCpp.h" #include "Transforms/INIReader.h" #include "Transforms/Passes.h" +#include "Transforms/QoREstimation.h" using namespace std; using namespace mlir; diff --git a/lib/Transforms/QoREstimation.cpp b/lib/Transforms/QoREstimation.cpp index 16dacfc..61ddaf4 100644 --- a/lib/Transforms/QoREstimation.cpp +++ b/lib/Transforms/QoREstimation.cpp @@ -2,100 +2,61 @@ // //===----------------------------------------------------------------------===// +#include "Transforms/QoREstimation.h" #include "Dialect/HLSCpp/HLSCpp.h" #include "Transforms/INIReader.h" #include "Transforms/Passes.h" +#include "Visitor.h" using namespace std; using namespace mlir; using namespace scalehls; using namespace hlscpp; -/// This class includes all possible parameters kind for "processes" (function, -/// for/parallel loop, and if). -enum class ProcParam { - // Pragam configurations. - EnablePipeline, - InitialInterval, - UnrollFactor, +//===----------------------------------------------------------------------===// +// HLSCppAnalyzer Class Definition +//===----------------------------------------------------------------------===// - // Performance parameters. - LoopBound, - IterLatency, - Latency, +bool HLSCppAnalyzer::visitOp(AffineForOp op) { return true; } - // Resource parameters. - LUT, - DSP, - BRAM -}; +bool HLSCppAnalyzer::visitOp(AffineParallelOp op) { return true; } -/// This class includes all possible parameters kind for memories (memref, -/// tensor, and vector). -enum class MemParam { - // Pragma configurations. - StorageType, - StorageImpl, - PartitionType, - PartitionFactor, - InterfaceMode, +bool HLSCppAnalyzer::visitOp(AffineIfOp op) { return true; } - // Performance parameters. - ReadNum, - WriteNum, - ReadPorts, - WritePorts, - DepdcyLatency, - DepdcyDistance, +/// This method will update all parameters except IterLatency, Latency, LUT, +/// BRAM, and DSP through static analysis. +void HLSCppAnalyzer::analyzeOperation(Operation *op) { + if (dispatchVisitor(op)) + return; - // Resource parameters. - LUT, - BRAM -}; + op->emitError("can't be correctly analyzed."); +} -namespace { -class QoREstimator { -public: - explicit QoREstimator(std::string targetSpecPath, std::string opLatencyPath); +void HLSCppAnalyzer::analyzeFunc(FuncOp func) { procParam.init(func); } - /// Get parameters. - unsigned getMemParam(Value *mem, MemParam kind) { - return memParams[mem][(unsigned)kind]; - } - unsigned getProcParam(Operation *proc, ProcParam kind) { - return procParams[proc][(unsigned)kind]; +void HLSCppAnalyzer::analyzeBlock(Block &block) { + for (auto &op : block) + analyzeOperation(&op); +} + +/// This method is a wrapper for recursively calling operation analyzer. +void HLSCppAnalyzer::analyzeModule(ModuleOp module) { + for (auto &op : module) { + if (auto func = dyn_cast(op)) { + analyzeFunc(func); + } else if (!isa(op)) + op.emitError("is unsupported operation."); } +} - /// These methods can extract static parameters and pragma configurations (if - /// applicable) of the input CDFG, and update them in procParams or memParams. - void analyzePragma(ModuleOp module); - void analyzeModule(ModuleOp module); - - /// These methods can estimate the performance and resource utilization of a - /// specific MLIR structure, and update them in procParams or memroyParams. - void estimateAffineFor(AffineForOp affineFor); - void estimateAffineParallel(AffineParallelOp affineParallel); - void estimateAffineIf(AffineIfOp affineIf); - void estimateFunc(FuncOp func); - void estimateModule(ModuleOp module); - -private: - DenseMap> procParams; - DenseMap> memParams; - - // Set parameters. - void setMemParam(Value *mem, unsigned kind, unsigned param) { - memParams[mem][(unsigned)kind] = param; - } - void setProcParam(Operation *proc, MemParam kind, unsigned param) { - procParams[proc][(unsigned)kind] = param; - } -}; -} // namespace +//===----------------------------------------------------------------------===// +// QoREstimator Class Definition +//===----------------------------------------------------------------------===// /// Estimator constructor. -QoREstimator::QoREstimator(std::string targetSpecPath, - std::string opLatencyPath) { +QoREstimator::QoREstimator(ProcParam &procParam, MemParam &memParam, + string targetSpecPath, string opLatencyPath) + : procParam(procParam), memParam(memParam) { INIReader targetSpec(targetSpecPath); if (targetSpec.ParseError()) llvm::outs() << "error: target spec file parse fail, please refer to " @@ -111,83 +72,136 @@ QoREstimator::QoREstimator(std::string targetSpecPath, llvm::outs() << latency << "\n"; } -/// This method will search the longest path in a DAG block using a ASAP (As -/// Soon As Possible) manner. Loop, function, if, and other operation owning -/// regions will be considered as a whole. -unsigned searchLongestPath(Block &block) { - DenseMap valueReadyTime; - unsigned blockReadyTime = 0; - for (auto &op : block) { - - // Calculate ready time of all predecessors. - unsigned allPredsReadyTime = 0; - for (auto operand : op.getOperands()) { - if (operand.getKind() == Value::Kind::BlockArgument) - continue; - else if (operand.getParentBlock() != &block) - continue; - else - allPredsReadyTime = max(allPredsReadyTime, valueReadyTime[operand]); - } - - // Calculate ready time of the current operation. - unsigned opReadyTime = allPredsReadyTime + 1; - for (auto result : op.getResults()) - valueReadyTime[result] = opReadyTime; - - // Update block ready time. - blockReadyTime = max(blockReadyTime, opReadyTime); - } - return blockReadyTime; -} - /// For now, estimation for unrolled loops are following the analytical model /// of COMBA, which is suspected to be wrong. Meanwhile, we assume the absence /// of function call in the loop body. -void QoREstimator::estimateAffineFor(AffineForOp affineFor) { - auto &body = affineFor.getLoopBody(); +/// +/// This method will update ProcParam::IterLatency and ProcParam::Latency of the +/// current affine for loop. +bool QoREstimator::visitOp(AffineForOp op) { + auto &body = op.getLoopBody(); if (body.getBlocks().size() != 1) - affineFor.emitError("has zero or more than one basic blocks."); - - for (auto &op : body.front()) { - if (auto subAffineFor = dyn_cast(op)) - estimateAffineFor(subAffineFor); - } -} - -/// For now, function pipelining and task-level dataflow optimizations are not -/// considered for simplicity. -void QoREstimator::estimateFunc(FuncOp func) { - if (func.getBlocks().size() != 1) - func.emitError("has zero or more than one basic blocks."); + op.emitError("has zero or more than one basic blocks."); // Recursively estimate latency of sub-elements, including functions and // loops. These sub-elements will be considered as a normal node in the CDFG // for function latency estimzation. - for (auto &op : func.front()) { - if (auto subFunc = dyn_cast(op)) - estimateFunc(subFunc); - else if (auto subAffineFor = dyn_cast(op)) - estimateAffineFor(subAffineFor); + for (auto &op : body.front()) { + estimateOperation(&op); } + // Estimate iteration latency. + unsigned iterLatency = searchLongestPath(body.front()); + procParam.set(op, ProcParamKind::IterLatency, iterLatency); + + // Estimate affine for loop latency. + unsigned latency = iterLatency; + if (procParam.get(op, ProcParamKind::LoopBound) != 1) + latency *= procParam.get(op, ProcParamKind::LoopBound) * + procParam.get(op, ProcParamKind::UnrollFactor); + procParam.set(op, ProcParamKind::Latency, latency); +} + +bool QoREstimator::visitOp(AffineParallelOp op) { return true; } + +bool QoREstimator::visitOp(AffineIfOp op) { return true; } + +/// This method recursively update the time stamp of all values (1) directly +/// generated as result by the current operation or (2) generated by any +/// operations insided of the region held by the current operation. +void QoREstimator::updateValueTimeStamp( + Operation *currentOp, unsigned opTimeStamp, + DenseMap &valueTimeStampMap) { + for (auto result : currentOp->getResults()) + valueTimeStampMap[result] = opTimeStamp; + for (auto ®ion : currentOp->getRegions()) { + for (auto &op : region.front()) + updateValueTimeStamp(&op, opTimeStamp, valueTimeStampMap); + } +} + +/// This method will search the longest path in a DAG block using a ASAP (As +/// Soon As Possible) manner. Loop, function, if, and other operation owning +/// regions will be considered as a whole. +unsigned QoREstimator::searchLongestPath(Block &block) { + DenseMap valueTimeStampMap; + unsigned blockTimeStamp = 0; + + for (auto &op : block) { + unsigned opTimeStamp = 0; + + // Add the latest ready time among all predecessors. + for (auto operand : op.getOperands()) + opTimeStamp = max(opTimeStamp, valueTimeStampMap[operand]); + + // Add latency of the current operation. + if (auto subAffineFor = dyn_cast(op)) + opTimeStamp += procParam.get(subAffineFor, ProcParamKind::Latency); + else + opTimeStamp += 1; + blockTimeStamp = max(blockTimeStamp, opTimeStamp); + + // Update ready time of each value generated by the current operation. + updateValueTimeStamp(&op, opTimeStamp, valueTimeStampMap); + } + return blockTimeStamp; +} + +void QoREstimator::estimateOperation(Operation *op) { + if (dispatchVisitor(op)) + return; + + op->emitError("can't be correctly estimated."); +} + +/// For now, function pipelining and task-level dataflow optimizations are not +/// considered for simplicity. Meanwhile, we assume the absence of function call +/// in the loop body. +/// +/// This method will update ProcParam::Latency of the current function. +void QoREstimator::estimateFunc(FuncOp func) { + if (func.getBlocks().size() != 1) + func.emitError("has zero or more than one basic blocks."); + + estimateBlock(func.front()); + // Estimate function latency. - llvm::outs() << searchLongestPath(func.front()) << "\n"; + unsigned latency = searchLongestPath(func.front()); + procParam.set(func, ProcParamKind::Latency, latency); +} + +void QoREstimator::estimateBlock(Block &block) { + for (auto &op : block) + estimateOperation(&op); } void QoREstimator::estimateModule(ModuleOp module) { for (auto &op : module) { - if (auto func = dyn_cast(op)) + if (auto func = dyn_cast(op)) { estimateFunc(func); - else if (!isa(op)) + } else if (!isa(op)) op.emitError("is unsupported operation."); } } +//===----------------------------------------------------------------------===// +// Entry of scalehls-opt +//===----------------------------------------------------------------------===// + namespace { struct QoREstimation : public QoREstimationBase { void runOnOperation() override { - QoREstimator(targetSpec, opLatency).estimateModule(getOperation()); + ProcParam procParam; + MemParam memParam; + + // Extract all static parameters and current pragma configurations. + HLSCppAnalyzer analyzer(procParam, memParam); + analyzer.analyzeModule(getOperation()); + + // Estimate performance and resource utilization. + QoREstimator estimator(analyzer.procParam, analyzer.memParam, targetSpec, + opLatency); + estimator.estimateModule(getOperation()); } }; } // namespace diff --git a/test/Dialect/HLSCpp/test_pragma.mlir b/test/Dialect/HLSCpp/test_pragma.mlir index 1b1af62..7325b55 100644 --- a/test/Dialect/HLSCpp/test_pragma.mlir +++ b/test/Dialect/HLSCpp/test_pragma.mlir @@ -1,4 +1,4 @@ -// RUN: scalehls-opt -pragma-dse %s | FileCheck %s +// RUN: scalehls-opt -qor-estimation -pragma-dse %s | FileCheck %s // CHECK-LABEL: func @test_pragma() func @test_pragma() {