From 117a1bd0f42bb786be37635d2114f6150899379c Mon Sep 17 00:00:00 2001 From: Hanchen Ye Date: Fri, 18 Dec 2020 23:42:41 -0600 Subject: [PATCH] [HLSCpp] eliminate PragmaOps, update ArrayOp definition; [Analysis] refactor Utils; [StoreForward] start of this pass --- include/Analysis/Passes.h | 2 +- include/Analysis/Utils.h | 73 ++++---- include/Conversion/Passes.h | 4 +- include/Dialect/HLSCpp/HLSCpp.td | 1 - include/Dialect/HLSCpp/PragmaOps.td | 81 --------- include/Dialect/HLSCpp/StructureOps.td | 6 + include/Dialect/HLSCpp/Visitor.h | 14 +- include/Dialect/HLSKernel/HLSKernel.h | 2 + include/Dialect/HLSKernel/Interfaces.td | 8 + include/Transforms/Passes.h | 19 ++- include/Transforms/Passes.td | 10 ++ lib/Analysis/QoREstimation.cpp | 141 ++++------------ lib/Analysis/Utils.cpp | 109 ++++++++++++ lib/EmitHLSCpp/EmitHLSCpp.cpp | 84 ---------- lib/Transforms/ArrayPartition.cpp | 4 +- lib/Transforms/StoreForward.cpp | 212 ++++++++++++++++++++++++ 16 files changed, 424 insertions(+), 346 deletions(-) delete mode 100644 include/Dialect/HLSCpp/PragmaOps.td create mode 100644 lib/Analysis/Utils.cpp create mode 100644 lib/Transforms/StoreForward.cpp diff --git a/include/Analysis/Passes.h b/include/Analysis/Passes.h index e9feff3..77be3e7 100644 --- a/include/Analysis/Passes.h +++ b/include/Analysis/Passes.h @@ -15,7 +15,7 @@ class Pass; namespace mlir { namespace scalehls { -std::unique_ptr createQoREstimationPass(); +std::unique_ptr createQoREstimationPass(); void registerAnalysisPasses(); diff --git a/include/Analysis/Utils.h b/include/Analysis/Utils.h index 00a204c..babd800 100644 --- a/include/Analysis/Utils.h +++ b/include/Analysis/Utils.h @@ -19,7 +19,20 @@ class HLSCppAnalysisBase { public: explicit HLSCppAnalysisBase(OpBuilder builder) : builder(builder) {} - OpBuilder builder; + /// Get partition information methods. + StringRef getPartitionType(hlscpp::ArrayOp op, unsigned dim) { + if (auto attr = op.partition_type()[dim].cast()) + return attr.getValue(); + else + return ""; + } + + unsigned getPartitionFactor(hlscpp::ArrayOp op, unsigned dim) { + if (auto attr = op.partition_factor()[dim].cast()) + return attr.getUInt(); + else + return 0; + } /// Get attribute value methods. int32_t getIntAttrValue(Operation *op, StringRef name) { @@ -50,21 +63,6 @@ public: return ""; } - /// Get partition information methods. - StringRef getPartitionType(hlscpp::ArrayOp op, unsigned dim) { - if (auto attr = op.partition_type()[dim].cast()) - return attr.getValue(); - else - return ""; - } - - unsigned getPartitionFactor(hlscpp::ArrayOp op, unsigned dim) { - if (auto attr = op.partition_factor()[dim].cast()) - return attr.getUInt(); - else - return 0; - } - /// Set attribute value methods. void setAttrValue(Operation *op, StringRef name, int32_t value) { op->setAttr(name, builder.getI32IntegerAttr(value)); @@ -82,46 +80,31 @@ public: op->setAttr(name, builder.getStringAttr(value)); } - /// Set schedule attribute methods. - void setScheduleValue(Operation *op, unsigned begin, unsigned end) { - setAttrValue(op, "schedule_begin", begin); - setAttrValue(op, "schedule_end", end); - } + OpBuilder builder; }; //===----------------------------------------------------------------------===// -// Common Used Type Declarations +// Helper methods //===----------------------------------------------------------------------===// -// Profiled latency map. -using LatencyMap = llvm::StringMap; - -// For storing all memory access operations (including AffineLoadOp and -// AffineStoreOp) indexed by the array instance (ArrayOp). +// For storing all affine memory access operations (including AffineLoadOp and +// AffineStoreOp) indexed by the array (ArrayOp). using LoadStores = SmallVector; using LoadStoresMap = DenseMap; -// For storing all dependent operations indexed by the source operation. -using Depends = SmallVector; -using DependsMap = DenseMap; +// Check if the lhsOp and rhsOp is at the same scheduling level. In this check, +// AffineIfOp is transparent. +bool checkSameLevel(Operation *lhsOp, Operation *rhsOp); -// Indicate the unoccupied memory ports number. -struct PortInfo { - PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0) - : rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {} +// Get the pointer of the scrOp's parent loop, which should locate at the same +// level with dstOp's any parent loop. +Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp); - unsigned rdPort; - unsigned wrPort; - unsigned rdwrPort; -}; +/// Get the definition ArrayOp given any memory access operation. +hlscpp::ArrayOp getArrayOp(Operation *op); -// For storing ports number of all partitions indexed by the array instance -// (ArrayOp). -using Ports = SmallVector; -using PortsMap = DenseMap; - -// For storing PortsMap indexed by the scheduling level. -using PortsMapDict = DenseMap; +/// Collect all load and store operations in the block. +void getLoadStoresMap(Block &block, LoadStoresMap &map); } // namespace scalehls } // namespace mlir diff --git a/include/Conversion/Passes.h b/include/Conversion/Passes.h index db07c91..4103ee0 100644 --- a/include/Conversion/Passes.h +++ b/include/Conversion/Passes.h @@ -15,8 +15,8 @@ class Pass; namespace mlir { namespace scalehls { -std::unique_ptr createConvertToHLSCppPass(); -std::unique_ptr createHLSKernelToAffinePass(); +std::unique_ptr createConvertToHLSCppPass(); +std::unique_ptr createHLSKernelToAffinePass(); void registerConversionPasses(); diff --git a/include/Dialect/HLSCpp/HLSCpp.td b/include/Dialect/HLSCpp/HLSCpp.td index 6bdd6f1..c9eabb6 100644 --- a/include/Dialect/HLSCpp/HLSCpp.td +++ b/include/Dialect/HLSCpp/HLSCpp.td @@ -26,7 +26,6 @@ class HLSCppOp traits = []> : include "Interfaces.td" include "Attributes.td" -include "PragmaOps.td" include "StructureOps.td" #endif // SCALEHLS_DIALECT_HLSCPP_HLSCPP_TD diff --git a/include/Dialect/HLSCpp/PragmaOps.td b/include/Dialect/HLSCpp/PragmaOps.td deleted file mode 100644 index 968384e..0000000 --- a/include/Dialect/HLSCpp/PragmaOps.td +++ /dev/null @@ -1,81 +0,0 @@ -//===-------------------------------------------------------*- tablegen -*-===// -// Deprecated. Will be removed somehow in someday. -//===----------------------------------------------------------------------===// - -#ifndef SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD -#define SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD - -def ArrayPragmaOp : HLSCppOp<"array_pragma", [PragmaOpInterface]> { - let summary = "Apply array pragmas"; - let description = [{ - This hlscpp.func_pragma operation represent pragmas for arrays, such as - array partition, interface, and bind storage pragma. - }]; - - let arguments = (ins - // Targeted array. - Type : $variable, - - // Interface-related attributes. - DefaultValuedAttr : $interface, - DefaultValuedAttr : $interface_mode, - DefaultValuedAttr : $interface_depth, - - // BindStorage-related attributes. - DefaultValuedAttr : $storage, - DefaultValuedAttr : $storage_type, - DefaultValuedAttr : $storage_impl, - - // ArrayPartition-related attributes. - DefaultValuedAttr : $partition, - DefaultValuedAttr : $partition_type, - DefaultValuedAttr : $partition_factor - ); - - let assemblyFormat = [{`(` $variable `)` attr-dict `:` type($variable)}]; - let extraClassDeclaration = [{}]; -} - -def LoopPragmaOp : HLSCppOp<"loop_pragma", [ - PragmaOpInterface, - HasParent<"AffineForOp"> -]> { - let summary = "Apply loop pragmas"; - let description = [{ - This hlscpp.loop_pragma operation represent pragmas for loops, such as - pipeline, and unroll pragma. - }]; - - let arguments = (ins - // Pipeline-related attributes. - DefaultValuedAttr : $pipeline, - DefaultValuedAttr : $pipeline_II, - - // Loop-related attributes. - DefaultValuedAttr : $flatten, - DefaultValuedAttr : $unroll - ); - - let assemblyFormat = [{attr-dict}]; - let extraClassDeclaration = [{}]; -} - -def FuncPragmaOp : HLSCppOp<"func_pragma", [ - PragmaOpInterface, - HasParent<"FuncOp"> -]> { - let summary = "Apply function pragmas"; - let description = [{ - This hlscpp.func_pragma operation represent pragmas for functions, such as - pipeline, and dataflow pragma. - }]; - - let arguments = (ins - DefaultValuedAttr : $dataflow - ); - - let assemblyFormat = [{attr-dict}]; - let extraClassDeclaration = [{}]; -} - -#endif // SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD diff --git a/include/Dialect/HLSCpp/StructureOps.td b/include/Dialect/HLSCpp/StructureOps.td index 2d255d4..83ed55c 100644 --- a/include/Dialect/HLSCpp/StructureOps.td +++ b/include/Dialect/HLSCpp/StructureOps.td @@ -47,6 +47,12 @@ def ArrayOp : HLSCppOp<"array", [SameOperandsAndResultType]> { ); let results = (outs Type : $output); + + let extraClassDeclaration = [{ + ShapedType getShapedType() { + return getType().cast(); + } + }]; } def EndOp : HLSCppOp<"end", [Terminator]> { diff --git a/include/Dialect/HLSCpp/Visitor.h b/include/Dialect/HLSCpp/Visitor.h index 55359c4..f4b92e5 100644 --- a/include/Dialect/HLSCpp/Visitor.h +++ b/include/Dialect/HLSCpp/Visitor.h @@ -54,12 +54,9 @@ public: SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp, SignExtendIOp, IndexCastOp, CallOp, ReturnOp, // Structure operations. - AssignOp, ArrayOp, EndOp, - // Pragma operations. - LoopPragmaOp, FuncPragmaOp, ArrayPragmaOp>( - [&](auto opNode) -> ResultType { - return thisCast->visitOp(opNode, args...); - }) + AssignOp, ArrayOp, EndOp>([&](auto opNode) -> ResultType { + return thisCast->visitOp(opNode, args...); + }) .Default([&](auto opNode) -> ResultType { return thisCast->visitInvalidOp(op, args...); }); @@ -191,11 +188,6 @@ public: HANDLE(AssignOp); HANDLE(ArrayOp); HANDLE(EndOp); - - // Pragma operations. - HANDLE(LoopPragmaOp); - HANDLE(FuncPragmaOp); - HANDLE(ArrayPragmaOp); #undef HANDLE }; } // namespace scalehls diff --git a/include/Dialect/HLSKernel/HLSKernel.h b/include/Dialect/HLSKernel/HLSKernel.h index a6c76f4..c303a86 100644 --- a/include/Dialect/HLSKernel/HLSKernel.h +++ b/include/Dialect/HLSKernel/HLSKernel.h @@ -8,6 +8,8 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Dialect.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/StandardTypes.h" namespace mlir { namespace scalehls { diff --git a/include/Dialect/HLSKernel/Interfaces.td b/include/Dialect/HLSKernel/Interfaces.td index a38db3f..29e2a36 100644 --- a/include/Dialect/HLSKernel/Interfaces.td +++ b/include/Dialect/HLSKernel/Interfaces.td @@ -11,6 +11,14 @@ def HLSKernelOpInterface : OpInterface<"HLSKernelOpInterface"> { let description = [{ This interface indicates the operation is an HLS kernel. }]; + + let methods = [ + InterfaceMethod< + "Return the shaped type of the i-th operand", + "ShapedType", "getOperandShapedType", (ins "unsigned" : $i), + [{ return $_op.getOperation()->getOperand(i).getType().template cast(); }] + > + ]; } #endif // SCALEHLS_DIALECT_HLSKERNEL_INTERFACES_TD diff --git a/include/Transforms/Passes.h b/include/Transforms/Passes.h index 8def5b8..af962f4 100644 --- a/include/Transforms/Passes.h +++ b/include/Transforms/Passes.h @@ -16,21 +16,22 @@ namespace mlir { namespace scalehls { /// Pragma optimization passes. -std::unique_ptr createPragmaDSEPass(); -std::unique_ptr createLoopPipeliningPass(); -std::unique_ptr createArrayPartitionPass(); +std::unique_ptr createPragmaDSEPass(); +std::unique_ptr createLoopPipeliningPass(); +std::unique_ptr createArrayPartitionPass(); /// Loop optimization passes. -std::unique_ptr createAffineLoopPerfectionPass(); -std::unique_ptr createPartialAffineLoopTilePass(); -std::unique_ptr createRemoveVarLoopBoundPass(); +std::unique_ptr createAffineLoopPerfectionPass(); +std::unique_ptr createPartialAffineLoopTilePass(); +std::unique_ptr createRemoveVarLoopBoundPass(); /// Dataflow optimization passes. -std::unique_ptr createSplitFunctionPass(); -std::unique_ptr createLegalizeDataflowPass(); +std::unique_ptr createSplitFunctionPass(); +std::unique_ptr createLegalizeDataflowPass(); /// Bufferization passes. -std::unique_ptr createHLSKernelBufferizePass(); +std::unique_ptr createHLSKernelBufferizePass(); +std::unique_ptr createStoreForwardPass(); void registerTransformsPasses(); diff --git a/include/Transforms/Passes.td b/include/Transforms/Passes.td index 5678f0b..506fbc0 100644 --- a/include/Transforms/Passes.td +++ b/include/Transforms/Passes.td @@ -136,4 +136,14 @@ def HLSKernelBufferize : Pass<"hlskernel-bufferize", "FuncOp"> { let constructor = "mlir::scalehls::createHLSKernelBufferizePass()"; } +def StoreForward : Pass<"store-forward", "FuncOp"> { + let summary = "Forward store to load, including conditional stores"; + let description = [{ + This store-forward pass is similar to memref-dataflow-opt, but support to + forward stores in if statements. + }]; + + let constructor = "mlir::scalehls::createStoreForwardPass()"; +} + #endif // SCALEHLS_TRANSFORMS_PASSES_TD diff --git a/lib/Analysis/QoREstimation.cpp b/lib/Analysis/QoREstimation.cpp index d9d7cbe..6dbe1f9 100644 --- a/lib/Analysis/QoREstimation.cpp +++ b/lib/Analysis/QoREstimation.cpp @@ -18,8 +18,10 @@ using namespace mlir; using namespace scalehls; using namespace hlscpp; +using LatencyMap = llvm::StringMap; + //===----------------------------------------------------------------------===// -// HLSCppEstimator Class Delaration +// HLSCppEstimator Class //===----------------------------------------------------------------------===// namespace { @@ -33,7 +35,34 @@ public: getFuncMemRefDepends(); } + // Indicate the unoccupied memory ports number. + struct PortInfo { + PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0) + : rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {} + + unsigned rdPort; + unsigned wrPort; + unsigned rdwrPort; + }; + + // For storing ports number of all partitions indexed by the array (ArrayOp). + using Ports = SmallVector; + using PortsMap = DenseMap; + + // For storing PortsMap indexed by the scheduling level. + using PortsMapDict = DenseMap; + + // For storing all dependent operations indexed by the source operation. + using Depends = SmallVector; + using DependsMap = DenseMap; + void getFuncMemRefDepends(); + + void setScheduleValue(Operation *op, unsigned begin, unsigned end) { + setAttrValue(op, "schedule_begin", begin); + setAttrValue(op, "schedule_end", end); + } + using HLSCppVisitorBase::visitOp; Optional visitUnhandledOp(Operation *op, unsigned begin) { // Default latency of any unhandled operation is 1. @@ -83,114 +112,6 @@ public: }; } // namespace -//===----------------------------------------------------------------------===// -// Helper methods -//===----------------------------------------------------------------------===// - -// Check if the lhsOp and rhsOp is at the same scheduling level. In this check, -// AffineIfOp is transparent. -static bool checkSameLevel(Operation *lhsOp, Operation *rhsOp) { - // If lhsOp and rhsOp are already at the same level, return true. - if (lhsOp->getBlock() == rhsOp->getBlock()) - return true; - - // Helper to get all surrounding AffineIfOps. - auto getSurroundIfs = - ([&](Operation *op, SmallVector &nests) { - nests.push_back(op); - auto currentOp = op; - while (true) { - if (auto parentOp = currentOp->getParentOfType()) { - nests.push_back(parentOp); - currentOp = parentOp; - } else - break; - } - }); - - SmallVector lhsNests; - SmallVector rhsNests; - - getSurroundIfs(lhsOp, lhsNests); - getSurroundIfs(rhsOp, rhsNests); - - // If any parent of lhsOp and any parent of rhsOp are at the same level, - // return true. - for (auto lhs : lhsNests) - for (auto rhs : rhsNests) - if (lhs->getBlock() == rhs->getBlock()) - return true; - - return false; -} - -// Get the pointer of the scrOp's parent loop, which should locate at the same -// level with dstOp's any parent loop. -static Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp) { - // If srcOp and dstOp are already at the same level, return the srcOp. - if (checkSameLevel(srcOp, dstOp)) - return dstOp; - - // Helper to get all surrouding AffineForOps. AffineIfOps are skipped. - auto getSurroundFors = - ([&](Operation *op, SmallVector &nests) { - nests.push_back(op); - auto currentOp = op; - while (true) { - if (auto parentOp = currentOp->getParentOfType()) { - nests.push_back(parentOp); - currentOp = parentOp; - } else if (auto parentOp = currentOp->getParentOfType()) - currentOp = parentOp; - else - break; - } - }); - - SmallVector srcNests; - SmallVector dstNests; - - getSurroundFors(srcOp, srcNests); - getSurroundFors(dstOp, dstNests); - - // If any parent of srcOp (or itself) and any parent of dstOp (or itself) are - // at the same level, return the pointer. - for (auto src : srcNests) - for (auto dst : dstNests) - if (checkSameLevel(src, dst)) - return dst; - - return nullptr; -} - -/// Get the definition ArrayOp given any memory access operation. -static ArrayOp getArrayOp(Operation *op) { - auto defOp = MemRefAccess(op).memref.getDefiningOp(); - assert(defOp && "MemRef is block argument"); - - auto arrayOp = dyn_cast(defOp); - assert(arrayOp && "MemRef is not defined by ArrayOp"); - - return arrayOp; -} - -/// Collect all load and store operations in the block. -static void getLoadStoresMap(Block &block, LoadStoresMap &map) { - for (auto &op : block) { - if (isa(op)) - map[getArrayOp(&op)].push_back(&op); - else if (op.getNumRegions()) { - for (auto ®ion : op.getRegions()) - for (auto &block : region) - getLoadStoresMap(block, map); - } - } -} - -//===----------------------------------------------------------------------===// -// HLSCppEstimator Class Definition -//===----------------------------------------------------------------------===// - /// Collect all dependencies detected in the function. void HLSCppEstimator::getFuncMemRefDepends() { // TODO: This can be simplified by traversing each ArrayOp in the function. @@ -258,7 +179,7 @@ int32_t HLSCppEstimator::getPartitionIndex(Operation *op) { if (type == "cyclic") idxExpr = expr % builder.getAffineConstantExpr(factor); else if (type == "block") { - auto size = arrayOp.getType().cast().getShape()[dim]; + auto size = arrayOp.getShapedType().getShape()[dim]; idxExpr = expr.floorDiv( builder.getAffineConstantExpr((size + factor - 1) / factor)); } diff --git a/lib/Analysis/Utils.cpp b/lib/Analysis/Utils.cpp new file mode 100644 index 0000000..bb55823 --- /dev/null +++ b/lib/Analysis/Utils.cpp @@ -0,0 +1,109 @@ +//===------------------------------------------------------------*- C++ -*-===// +// +//===----------------------------------------------------------------------===// + +#include "Analysis/Utils.h" +#include "mlir/Analysis/AffineAnalysis.h" + +using namespace mlir; +using namespace scalehls; + +// Check if the lhsOp and rhsOp is at the same scheduling level. In this check, +// AffineIfOp is transparent. +bool scalehls::checkSameLevel(Operation *lhsOp, Operation *rhsOp) { + // If lhsOp and rhsOp are already at the same level, return true. + if (lhsOp->getBlock() == rhsOp->getBlock()) + return true; + + // Helper to get all surrounding AffineIfOps. + auto getSurroundIfs = + ([&](Operation *op, SmallVector &nests) { + nests.push_back(op); + auto currentOp = op; + while (true) { + if (auto parentOp = currentOp->getParentOfType()) { + nests.push_back(parentOp); + currentOp = parentOp; + } else + break; + } + }); + + SmallVector lhsNests; + SmallVector rhsNests; + + getSurroundIfs(lhsOp, lhsNests); + getSurroundIfs(rhsOp, rhsNests); + + // If any parent of lhsOp and any parent of rhsOp are at the same level, + // return true. + for (auto lhs : lhsNests) + for (auto rhs : rhsNests) + if (lhs->getBlock() == rhs->getBlock()) + return true; + + return false; +} + +// Get the pointer of the scrOp's parent loop, which should locate at the same +// level with dstOp's any parent loop. +Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) { + // If srcOp and dstOp are already at the same level, return the srcOp. + if (checkSameLevel(srcOp, dstOp)) + return dstOp; + + // Helper to get all surrouding AffineForOps. AffineIfOps are skipped. + auto getSurroundFors = + ([&](Operation *op, SmallVector &nests) { + nests.push_back(op); + auto currentOp = op; + while (true) { + if (auto parentOp = currentOp->getParentOfType()) { + nests.push_back(parentOp); + currentOp = parentOp; + } else if (auto parentOp = currentOp->getParentOfType()) + currentOp = parentOp; + else + break; + } + }); + + SmallVector srcNests; + SmallVector dstNests; + + getSurroundFors(srcOp, srcNests); + getSurroundFors(dstOp, dstNests); + + // If any parent of srcOp (or itself) and any parent of dstOp (or itself) are + // at the same level, return the pointer. + for (auto src : srcNests) + for (auto dst : dstNests) + if (checkSameLevel(src, dst)) + return dst; + + return nullptr; +} + +/// Get the definition ArrayOp given any memory access operation. +hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) { + auto defOp = MemRefAccess(op).memref.getDefiningOp(); + assert(defOp && "MemRef is block argument"); + + auto arrayOp = dyn_cast(defOp); + assert(arrayOp && "MemRef is not defined by ArrayOp"); + + return arrayOp; +} + +/// Collect all load and store operations in the block. +void scalehls::getLoadStoresMap(Block &block, LoadStoresMap &map) { + for (auto &op : block) { + if (isa(op)) + map[getArrayOp(&op)].push_back(&op); + else if (op.getNumRegions()) { + for (auto ®ion : op.getRegions()) + for (auto &block : region) + getLoadStoresMap(block, map); + } + } +} diff --git a/lib/EmitHLSCpp/EmitHLSCpp.cpp b/lib/EmitHLSCpp/EmitHLSCpp.cpp index 9096f95..6044b68 100644 --- a/lib/EmitHLSCpp/EmitHLSCpp.cpp +++ b/lib/EmitHLSCpp/EmitHLSCpp.cpp @@ -201,11 +201,6 @@ public: void emitAssign(AssignOp *op); void emitArray(ArrayOp *op); - /// Pragma operation emitters. - void emitLoopPragma(LoopPragmaOp *op); - void emitFuncPragma(FuncPragmaOp *op); - void emitArrayPragma(ArrayPragmaOp *op); - /// Top-level MLIR module emitter. void emitModule(ModuleOp module); @@ -501,11 +496,6 @@ public: bool visitOp(ArrayOp op) { return emitter.emitArray(&op), true; } bool visitOp(EndOp op) { return true; } - /// Pragma operations. - bool visitOp(LoopPragmaOp op) { return emitter.emitLoopPragma(&op), true; } - bool visitOp(FuncPragmaOp op) { return emitter.emitFuncPragma(&op), true; } - bool visitOp(ArrayPragmaOp op) { return emitter.emitArrayPragma(&op), true; } - private: ModuleEmitter &emitter; }; @@ -1287,80 +1277,6 @@ void ModuleEmitter::emitArray(ArrayOp *op) { os << "\n"; } -/// Pragma operation emitters. (deprecated) -void ModuleEmitter::emitLoopPragma(LoopPragmaOp *op) { - indent(); - os << "#pragma HLS pipeline"; - if (op->pipeline()) - os << " II=" << op->pipeline_II(); - else - os << " off\n"; - - if (op->unroll()) { - indent(); - os << "#pragma HLS unroll\n"; - } - - // An empty line. - os << "\n"; -} - -void ModuleEmitter::emitFuncPragma(FuncPragmaOp *op) { - if (op->dataflow()) { - indent(); - os << "#pragma HLS dataflow\n"; - - // An empty line. - os << "\n"; - } -} - -void ModuleEmitter::emitArrayPragma(ArrayPragmaOp *op) { - if (op->interface()) { - - // Emit interface pragma. - indent(); - os << "#pragma HLS interface"; - os << " " << op->interface_mode(); - os << " port="; - emitValue(op->getOperand()); - if (op->interface_mode() == "m_axi") { - os << " depth=" << op->interface_depth(); - os << " offset=slave\n"; - } else - os << " storage_type=" << op->storage_type() << "\n"; - } else { - - // Emit bind_storage pragma. - indent(); - os << "#pragma HLS bind_storage"; - os << " variable="; - emitValue(op->getOperand()); - os << " type=" << op->storage_type(); - os << " impl=" << op->storage_impl() << "\n"; - } - - auto type = op->getOperand().getType().cast(); - if (op->partition() && type.hasStaticShape()) { - - // Emit array_partition pragma(s). - for (unsigned dim = 0; dim < type.getRank(); ++dim) { - indent(); - os << "#pragma HLS array_partition"; - os << " variable="; - emitValue(op->getOperand()); - auto partitionType = - op->partition_type()[dim].cast().getValue(); - os << " " << partitionType; - if (partitionType != "complete") - os << " factor=" - << op->partition_factor()[dim].cast().getUInt(); - os << " dim=" << dim + 1 << "\n"; - } - } - os << "\n"; -} - /// C++ component emitters. void ModuleEmitter::emitValue(Value val, unsigned rank, bool isPtr) { assert(!(rank && isPtr) && "should be either an array or a pointer."); diff --git a/lib/Transforms/ArrayPartition.cpp b/lib/Transforms/ArrayPartition.cpp index 025aa3c..e16586e 100644 --- a/lib/Transforms/ArrayPartition.cpp +++ b/lib/Transforms/ArrayPartition.cpp @@ -35,7 +35,7 @@ template static void applyArrayPartition(LoadStoresMap &map, OpBuilder &builder) { for (auto pair : map) { auto arrayOp = cast(pair.first); - auto arrayType = arrayOp.getType().cast(); + auto arrayShape = arrayOp.getShapedType().getShape(); auto arrayAccesses = pair.second; // Walk through each dimension of the targeted array. @@ -43,7 +43,7 @@ static void applyArrayPartition(LoadStoresMap &map, OpBuilder &builder) { SmallVector partitionType; unsigned partitionNum = 1; - for (size_t dim = 0, e = arrayType.getShape().size(); dim < e; ++dim) { + for (size_t dim = 0, e = arrayShape.size(); dim < e; ++dim) { // Collect all array access indices of the current dimension. SmallVector indices; for (auto accessOp : arrayAccesses) { diff --git a/lib/Transforms/StoreForward.cpp b/lib/Transforms/StoreForward.cpp new file mode 100644 index 0000000..c2f686d --- /dev/null +++ b/lib/Transforms/StoreForward.cpp @@ -0,0 +1,212 @@ +//===------------------------------------------------------------*- C++ -*-===// +// +//===----------------------------------------------------------------------===// + +#include "Analysis/Utils.h" +#include "Transforms/Passes.h" +#include "mlir/Analysis/AffineAnalysis.h" +#include "mlir/Analysis/Utils.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Dominance.h" +#include "llvm/ADT/SmallPtrSet.h" +#include + +using namespace mlir; +using namespace scalehls; + +namespace { +// The store to load forwarding relies on three conditions: +// +// 1) they need to have mathematically equivalent affine access functions +// (checked after full composition of load/store operands); this implies that +// they access the same single memref element for all iterations of the common +// surrounding loop, +// +// 2) the store op should dominate the load op, +// +// 3) among all op's that satisfy both (1) and (2), the one that postdominates +// all store op's that have a dependence into the load, is provably the last +// writer to the particular memref location being loaded at the load op, and its +// store value can be forwarded to the load. Note that the only dependences +// that are to be considered are those that are satisfied at the block* of the +// innermost common surrounding loop of the being considered. +// +// (* A dependence being satisfied at a block: a dependence that is satisfied by +// virtue of the destination operation appearing textually / lexically after +// the source operation within the body of a 'affine.for' operation; thus, a +// dependence is always either satisfied by a loop or by a block). +// +// The above conditions are simple to check, sufficient, and powerful for most +// cases in practice - they are sufficient, but not necessary --- since they +// don't reason about loops that are guaranteed to execute at least once or +// multiple sources to forward from. +// +// TODO: more forwarding can be done when support for +// loop/conditional live-out SSA values is available. +// TODO: do general dead store elimination for memref's. This pass +// currently only eliminates the stores only if no other loads/uses (other +// than dealloc) remain. +// +struct StoreForward : public StoreForwardBase { + void runOnOperation() override; + + void forwardStoreToLoad(AffineReadOpInterface loadOp); + + // A list of memref's that are potentially dead / could be eliminated. + SmallPtrSet memrefsToErase; + // Load op's whose results were replaced by those forwarded from stores. + SmallVector loadOpsToErase; + + DominanceInfo *domInfo = nullptr; + PostDominanceInfo *postDomInfo = nullptr; +}; + +} // end anonymous namespace + +/// Creates a pass to perform optimizations relying on memref dataflow such as +/// store to load forwarding, elimination of dead stores, and dead allocs. +std::unique_ptr scalehls::createStoreForwardPass() { + return std::make_unique(); +} + +// This is a straightforward implementation not optimized for speed. Optimize +// if needed. +void StoreForward::forwardStoreToLoad(AffineReadOpInterface loadOp) { + // First pass over the use list to get the minimum number of surrounding + // loops common between the load op and the store op, with min taken across + // all store ops. + SmallVector storeOps; + unsigned minSurroundingLoops = getNestingDepth(loadOp); + for (auto *user : loadOp.getMemRef().getUsers()) { + auto storeOp = dyn_cast(user); + if (!storeOp) + continue; + unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp); + minSurroundingLoops = std::min(nsLoops, minSurroundingLoops); + storeOps.push_back(storeOp); + } + + // The list of store op candidates for forwarding that satisfy conditions + // (1) and (2) above - they will be filtered later when checking (3). + SmallVector fwdingCandidates; + + // Store ops that have a dependence into the load (even if they aren't + // forwarding candidates). Each forwarding candidate will be checked for a + // post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores. + SmallVector depSrcStores; + + for (auto *storeOp : storeOps) { + MemRefAccess srcAccess(storeOp); + MemRefAccess destAccess(loadOp); + // Find stores that may be reaching the load. + FlatAffineConstraints dependenceConstraints; + unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp); + unsigned d; + // Dependences at loop depth <= minSurroundingLoops do NOT matter. + for (d = nsLoops + 1; d > minSurroundingLoops; d--) { + DependenceResult result = checkMemrefAccessDependence( + srcAccess, destAccess, d, &dependenceConstraints, + /*dependenceComponents=*/nullptr); + if (hasDependence(result)) + break; + } + if (d == minSurroundingLoops) + continue; + + // Stores that *may* be reaching the load. + depSrcStores.push_back(storeOp); + + // 1. Check if the store and the load have mathematically equivalent + // affine access functions; this implies that they statically refer to the + // same single memref element. As an example this filters out cases like: + // store %A[%i0 + 1] + // load %A[%i0] + // store %A[%M] + // load %A[%N] + // Use the AffineValueMap difference based memref access equality checking. + if (srcAccess != destAccess) + continue; + + // 2. The store has to dominate the load op to be candidate. + if (!domInfo->dominates(storeOp, loadOp)) { + llvm::outs() << *loadOp.getOperation() << "\n"; + llvm::outs() << *storeOp << "\n"; + llvm::outs() << "does not dominate\n"; + continue; + } + + // We now have a candidate for forwarding. + fwdingCandidates.push_back(storeOp); + } + + // 3. Of all the store op's that meet the above criteria, the store that + // postdominates all 'depSrcStores' (if one exists) is the unique store + // providing the value to the load, i.e., provably the last writer to that + // memref loc. + // Note: this can be implemented in a cleaner way with postdominator tree + // traversals. Consider this for the future if needed. + Operation *lastWriteStoreOp = nullptr; + for (auto *storeOp : fwdingCandidates) { + if (llvm::all_of(depSrcStores, [&](Operation *depStore) { + return postDomInfo->postDominates(storeOp, depStore); + })) { + lastWriteStoreOp = storeOp; + break; + } + } + if (!lastWriteStoreOp) + return; + + // Perform the actual store to load forwarding. + Value storeVal = + cast(lastWriteStoreOp).getValueToStore(); + loadOp.getValue().replaceAllUsesWith(storeVal); + // Record the memref for a later sweep to optimize away. + memrefsToErase.insert(loadOp.getMemRef()); + // Record this to erase later. + loadOpsToErase.push_back(loadOp); +} + +void StoreForward::runOnOperation() { + // Only supports single block functions at the moment. + FuncOp f = getOperation(); + if (!llvm::hasSingleElement(f)) { + markAllAnalysesPreserved(); + return; + } + + domInfo = &getAnalysis(); + postDomInfo = &getAnalysis(); + + loadOpsToErase.clear(); + memrefsToErase.clear(); + + // Walk all load's and perform store to load forwarding. + f.walk([&](AffineReadOpInterface loadOp) { forwardStoreToLoad(loadOp); }); + + // Erase all load op's whose results were replaced with store fwd'ed ones. + for (auto *loadOp : loadOpsToErase) + loadOp->erase(); + + // Check if the store fwd'ed memrefs are now left with only stores and can + // thus be completely deleted. Note: the canonicalize pass should be able + // to do this as well, but we'll do it here since we collected these anyway. + for (auto memref : memrefsToErase) { + // If the memref hasn't been alloc'ed in this function, skip. + Operation *defOp = memref.getDefiningOp(); + if (!defOp || !isa(defOp)) + // TODO: if the memref was returned by a 'call' operation, we + // could still erase it if the call had no side-effects. + continue; + if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) { + return !isa(ownerOp); + })) + continue; + + // Erase all stores, the dealloc, and the alloc on the memref. + for (auto *user : llvm::make_early_inc_range(memref.getUsers())) + user->erase(); + defOp->erase(); + } +}