From 07d77f7193d34c3204e36d897f522bc384dd5d3c Mon Sep 17 00:00:00 2001 From: Hanchen Ye Date: Thu, 7 Jan 2021 23:29:49 -0600 Subject: [PATCH] [ArrayPartition] support multi-loops array partition; factor out applyArrayPartition() method (#20) --- include/Dialect/HLSCpp/Attributes.td | 22 --- include/Dialect/HLSCpp/HLSCpp.h | 4 +- include/Dialect/HLSCpp/StructureOps.td | 37 ----- include/Transforms/Passes.h | 2 + lib/Analysis/QoREstimation.cpp | 2 +- lib/Analysis/Utils.cpp | 2 +- lib/EmitHLSCpp/EmitHLSCpp.cpp | 2 +- lib/Transforms/ArrayPartition.cpp | 206 ++++++++++++++----------- 8 files changed, 122 insertions(+), 155 deletions(-) diff --git a/include/Dialect/HLSCpp/Attributes.td b/include/Dialect/HLSCpp/Attributes.td index 745dec5..4a1e82d 100644 --- a/include/Dialect/HLSCpp/Attributes.td +++ b/include/Dialect/HLSCpp/Attributes.td @@ -5,28 +5,6 @@ #ifndef SCALEHLS_DIALECT_HLSCPP_ATTRIBUTES_TD #define SCALEHLS_DIALECT_HLSCPP_ATTRIBUTES_TD -//===----------------------------------------------------------------------===// -// Customized ui32 Attributes -//===----------------------------------------------------------------------===// - -def PositiveI64Attr : Confined {} -def PositiveI64ArrayAttr : TypedArrayAttrBase {} - -//===----------------------------------------------------------------------===// -// Pragma array_partition Attributes -//===----------------------------------------------------------------------===// - -def PartitionTypeAttr : StrEnumAttr<"PartitionType", "", [ - StrEnumAttrCase<"cyclic", 0>, - StrEnumAttrCase<"block", 1>, - StrEnumAttrCase<"complete", 2>, - StrEnumAttrCase<"none", 3> -]> { - let cppNamespace = "::mlir::scalehls::hlscpp"; -} - -def PartitionTypeArrayAttr : TypedArrayAttrBase {} - //===----------------------------------------------------------------------===// // Pragma Interface Attributes (for array ports) //===----------------------------------------------------------------------===// diff --git a/include/Dialect/HLSCpp/HLSCpp.h b/include/Dialect/HLSCpp/HLSCpp.h index e67ff63..d474966 100644 --- a/include/Dialect/HLSCpp/HLSCpp.h +++ b/include/Dialect/HLSCpp/HLSCpp.h @@ -25,9 +25,11 @@ enum class MemoryKind { // URAM_S2P = 4, // URAM_T2P = 5, - DRAM = 3, + DRAM = 3 }; +enum class PartitionKind { CYCLIC = 0, BLOCK = 1, NONE = 2 }; + } // namespace hlscpp } // namespace scalehls } // namespace mlir diff --git a/include/Dialect/HLSCpp/StructureOps.td b/include/Dialect/HLSCpp/StructureOps.td index fda7a47..30ed58f 100644 --- a/include/Dialect/HLSCpp/StructureOps.td +++ b/include/Dialect/HLSCpp/StructureOps.td @@ -18,43 +18,6 @@ def AssignOp : HLSCppOp<"assign", [SameOperandsAndResultType]> { let results = (outs AnyType : $output); } -// Deprecated. Will be removed in the future. -def ArrayOp : HLSCppOp<"array", [SameOperandsAndResultType]> { - let summary = "A C++ array instance"; - let description = [{ - This hlscpp.array operation represent an array in C++. All shaped type value - (e.g., memref, tensor, and vector) should be passed through this operation - after declared by an allocation (e.g., Alloc, etc.) operation or in the - signature of a function. This will help the compiler to easily manage the - attributs and statistics of arrays. - }]; - - let arguments = (ins Type : $input, - - // Interface-related attributes. - DefaultValuedAttr : $interface, - DefaultValuedAttr : $interface_mode, - - // BindStorage-related attributes. - DefaultValuedAttr : $storage, - DefaultValuedAttr : $storage_type, - - // ArrayPartition-related attributes. - DefaultValuedAttr : $partition, - DefaultValuedAttr : $partition_num, - DefaultValuedAttr : $partition_type, - DefaultValuedAttr : $partition_factor - ); - - let results = (outs Type : $output); - - let extraClassDeclaration = [{ - ShapedType getShapedType() { - return getType().cast(); - } - }]; -} - def EndOp : HLSCppOp<"end", [Terminator]> { let summary = "Mark the end of a HLSCpp region"; let description = [{ diff --git a/include/Transforms/Passes.h b/include/Transforms/Passes.h index ccae218..60c6463 100644 --- a/include/Transforms/Passes.h +++ b/include/Transforms/Passes.h @@ -31,6 +31,8 @@ bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder); /// fully unrolled. bool applyLoopPipelining(AffineForOp loop, OpBuilder &builder); +bool applyArrayPartition(FuncOp func, OpBuilder &builder); + //===----------------------------------------------------------------------===// // Optimization Pass Entries //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/QoREstimation.cpp b/lib/Analysis/QoREstimation.cpp index 6718faa..1e8f5d6 100644 --- a/lib/Analysis/QoREstimation.cpp +++ b/lib/Analysis/QoREstimation.cpp @@ -241,7 +241,7 @@ int64_t HLSCppEstimator::getPartitionIndex(Operation *op) { int64_t partitionIdx = 0; int64_t accumFactor = 1; - for (auto dim = 0; dim < memrefType.getRank(); ++dim) { + for (int64_t dim = 0; dim < memrefType.getRank(); ++dim) { auto idxExpr = composeMap.getResult(dim); if (auto constExpr = idxExpr.dyn_cast()) diff --git a/lib/Analysis/Utils.cpp b/lib/Analysis/Utils.cpp index 67534d4..d8eed49 100644 --- a/lib/Analysis/Utils.cpp +++ b/lib/Analysis/Utils.cpp @@ -177,7 +177,7 @@ int64_t scalehls::getPartitionFactors(MemRefType memrefType, auto layoutMap = getLayoutMap(memrefType, memrefType.getContext()); int64_t accumFactor = 1; - for (unsigned dim = 0; dim < memrefType.getRank(); ++dim) { + for (int64_t dim = 0; dim < memrefType.getRank(); ++dim) { int64_t factor = 1; if (!layoutMap.isEmpty()) { diff --git a/lib/EmitHLSCpp/EmitHLSCpp.cpp b/lib/EmitHLSCpp/EmitHLSCpp.cpp index 7afc4ab..a2d39d4 100644 --- a/lib/EmitHLSCpp/EmitHLSCpp.cpp +++ b/lib/EmitHLSCpp/EmitHLSCpp.cpp @@ -1397,7 +1397,7 @@ void ModuleEmitter::emitArrayPragmas(Value memref) { SmallVector factors; getPartitionFactors(type, &factors); - for (unsigned dim = 0; dim < type.getRank(); ++dim) { + for (int64_t dim = 0; dim < type.getRank(); ++dim) { if (factors[dim] != 1) { emitPragmaFlag = true; diff --git a/lib/Transforms/ArrayPartition.cpp b/lib/Transforms/ArrayPartition.cpp index 31cd0e0..dfda41c 100644 --- a/lib/Transforms/ArrayPartition.cpp +++ b/lib/Transforms/ArrayPartition.cpp @@ -4,7 +4,9 @@ #include "Analysis/Utils.h" #include "Transforms/Passes.h" +#include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/IR/AffineValueMap.h" using namespace std; using namespace mlir; @@ -13,89 +15,135 @@ using namespace hlscpp; namespace { struct ArrayPartition : public ArrayPartitionBase { - void runOnOperation() override; + void runOnOperation() override { + auto func = getOperation(); + auto builder = OpBuilder(func); + + applyArrayPartition(func, builder); + } }; } // namespace -static mlir::AffineForOp getPipelineLoop(mlir::AffineForOp root) { - SmallVector nestedLoops; - root.walk([&](mlir::AffineForOp loop) { - if (auto attr = loop.getAttrOfType("pipeline")) { +bool scalehls::applyArrayPartition(FuncOp func, OpBuilder &builder) { + // Only memory accesses in pipelined loops will be executed in parallel. + SmallVector pipelinedLoops; + func.walk([&](AffineForOp loop) { + if (auto attr = loop.getAttrOfType("pipeline")) if (attr.getValue()) - nestedLoops.push_back(loop); - } + pipelinedLoops.push_back(loop); }); - if (nestedLoops.empty()) - return nullptr; - else - return nestedLoops.back(); -} -template -static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) { - for (auto pair : map) { + // Storing the partition information of each memref. + using PartitionInfo = std::pair; + DenseMap> partitionsMap; + + // Traverse all pipelined loops. + for (auto loop : pipelinedLoops) { + MemAccessesMap accessesMap; + getMemAccessesMap(loop.getLoopBody().front(), accessesMap); + + for (auto pair : accessesMap) { + auto memref = pair.first; + auto memrefType = memref.getType().cast(); + auto loadStores = pair.second; + auto &partitions = partitionsMap[memref]; + + // If the current partitionsMap is empty, initialize it with no partition + // and factor of 1. + if (partitions.empty()) { + for (int64_t dim = 0; dim < memrefType.getRank(); ++dim) + partitions.push_back(PartitionInfo(PartitionKind::NONE, 1)); + } + + // Find the best partition solution for each dimensions of the memref. + for (int64_t dim = 0; dim < memrefType.getRank(); ++dim) { + // Collect all array access indices of the current dimension. + SmallVector indices; + for (auto accessOp : loadStores) { + // Get memory access map. + AffineValueMap accessMap; + MemRefAccess(accessOp).getAccessMap(&accessMap); + + // Get index expression. + auto index = accessMap.getResult(dim); + + // Only add unique index. + if (std::find(indices.begin(), indices.end(), index) == indices.end()) + indices.push_back(index); + } + auto accessNum = indices.size(); + + // Find the max array access distance in the current block. + unsigned maxDistance = 0; + + for (unsigned i = 0; i < accessNum; ++i) { + for (unsigned j = i + 1; j < accessNum; ++j) { + // TODO: this expression can't be simplified in some cases. + auto expr = indices[j] - indices[i]; + + if (auto constDistance = expr.dyn_cast()) { + unsigned distance = abs(constDistance.getValue()); + maxDistance = max(maxDistance, distance); + } + } + } + + // Determine array partition strategy. + // TODO: take storage type into consideration. + maxDistance += 1; + if (maxDistance == 1) { + // This means all accesses have the same index, and this dimension + // should not be partitioned. + continue; + + } else if (accessNum >= maxDistance) { + // This means some elements are accessed more than once or exactly + // once, and successive elements are accessed. In most cases, apply + // "cyclic" partition should be the best solution. + unsigned factor = maxDistance; + if (factor > partitions[dim].second) + partitions[dim] = PartitionInfo(PartitionKind::CYCLIC, factor); + + } else { + // This means discrete elements are accessed. Typically, "block" + // partition will be most benefit for this occasion. + unsigned factor = accessNum; + if (factor > partitions[dim].second) + partitions[dim] = PartitionInfo(PartitionKind::BLOCK, factor); + } + } + } + } + + // Constuct and set new type to each partitioned MemRefType. + for (auto pair : partitionsMap) { auto memref = pair.first; auto memrefType = memref.getType().cast(); - auto loadStores = pair.second; + auto partitions = pair.second; - // Walk through each dimension of the targeted array. + // Walk through each dimension of the current memory. SmallVector partitionIndices; SmallVector addressIndices; - for (unsigned dim = 0; dim < memrefType.getRank(); ++dim) { - // Collect all array access indices of the current dimension. - SmallVector indices; - for (auto accessOp : loadStores) { - auto concreteOp = cast(accessOp); - auto index = concreteOp.getAffineMap().getResult(dim); - // Only add unique index. - if (std::find(indices.begin(), indices.end(), index) == indices.end()) - indices.push_back(index); - } - auto accessNum = indices.size(); - - // Find the max array access distance in the current block. - unsigned maxDistance = 0; - - for (unsigned i = 0; i < accessNum; ++i) { - for (unsigned j = i + 1; j < accessNum; ++j) { - // TODO: this expression can't be simplified. - auto expr = indices[j] - indices[i]; - - if (auto constDistance = expr.dyn_cast()) { - unsigned distance = abs(constDistance.getValue()); - maxDistance = max(maxDistance, distance); - } - } - } - - // Determine array partition strategy. - maxDistance += 1; - if (maxDistance == 1) { - // This means all accesses have the same index, and this dimension - // should not be partitioned. - partitionIndices.push_back(builder.getAffineConstantExpr(0)); - addressIndices.push_back(builder.getAffineDimExpr(dim)); - - } else if (accessNum >= maxDistance) { - // This means some elements are accessed more than once or exactly - // once, and successive elements are accessed. In most cases, - // apply "cyclic" partition should be the best solution. - unsigned factor = maxDistance; + for (int64_t dim = 0; dim < memrefType.getRank(); ++dim) { + auto partition = partitions[dim]; + auto kind = partition.first; + auto factor = partition.second; + if (kind == PartitionKind::CYCLIC) { partitionIndices.push_back(builder.getAffineDimExpr(dim) % factor); addressIndices.push_back( builder.getAffineDimExpr(dim).floorDiv(factor)); - } else { - // This means discrete elements are accessed. Typically, "block" - // partition will be most benefit for this occasion. - unsigned factor = accessNum; - + } else if (kind == PartitionKind::BLOCK) { auto blockFactor = (memrefType.getShape()[dim] + factor - 1) / factor; partitionIndices.push_back( builder.getAffineDimExpr(dim).floorDiv(blockFactor)); addressIndices.push_back(builder.getAffineDimExpr(dim) % blockFactor); + + } else { + partitionIndices.push_back(builder.getAffineConstantExpr(0)); + addressIndices.push_back(builder.getAffineDimExpr(dim)); } } @@ -112,41 +160,15 @@ static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) { // Set new type. memref.setType(newType); } -} - -void ArrayPartition::runOnOperation() { - auto func = getOperation(); - auto builder = OpBuilder(func); - - // Apply array partition. - for (auto forOp : func.getOps()) { - // TODO: support imperfect loop. - if (auto outermost = getPipelineLoop(forOp)) { - // Collect memory access information. - MemAccessesMap loadMap; - outermost.walk([&](mlir::AffineLoadOp loadOp) { - loadMap[loadOp.getMemRef()].push_back(loadOp); - }); - - MemAccessesMap storeMap; - outermost.walk([&](mlir::AffineStoreOp storeOp) { - storeMap[storeOp.getMemRef()].push_back(storeOp); - }); - - // Apply array partition pragma. - // TODO: how to decide which to pick? - applyArrayPartition(loadMap, builder); - applyArrayPartition(storeMap, builder); - - // TODO: how to handle the case when different sub-functions have - // different array partition strategy selected? - } - } // Align function type with entry block argument types. auto resultTypes = func.front().getTerminator()->getOperandTypes(); auto inputTypes = func.front().getArgumentTypes(); func.setType(builder.getFunctionType(inputTypes, resultTypes)); + + // TODO: how to handle the case when different sub-functions have different + // array partition strategy selected? + return true; } std::unique_ptr scalehls::createArrayPartitionPass() {