From 98060b98fe53586a2fd5e41a2c997d7077f51eed Mon Sep 17 00:00:00 2001 From: Hanchen Ye Date: Fri, 25 Dec 2020 23:40:33 -0600 Subject: [PATCH] [LegalizeDataflow] support to legalize loop and other operations simultaneously, support insert-copy and min-gran option for controlling whether CopyOps are inserted and the minimum granularity of dataflow --- include/Analysis/Utils.h | 11 -- include/Transforms/Passes.td | 9 +- lib/Analysis/QoREstimation.cpp | 26 +++- lib/Analysis/Utils.cpp | 50 +------ lib/Transforms/LegalizeDataflow.cpp | 221 +++++++++++++++++----------- 5 files changed, 163 insertions(+), 154 deletions(-) diff --git a/include/Analysis/Utils.h b/include/Analysis/Utils.h index 3602161..fba5629 100644 --- a/include/Analysis/Utils.h +++ b/include/Analysis/Utils.h @@ -92,10 +92,6 @@ void getMemAccessesMap(Block &block, MemAccessesMap &map, Optional> checkSameLevel(Operation *lhsOp, Operation *rhsOp); -// Get the innermost surrounding operation, either an AffineForOp or a FuncOp. -// In this method, AffineIfOp is transparent as well. -Operation *getSurroundingOp(Operation *op); - // Get the pointer of the scrOp's parent loop, which should locate at the same // level with dstOp's any parent loop. Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp); @@ -105,13 +101,6 @@ hlscpp::ArrayOp getArrayOp(Value memref); hlscpp::ArrayOp getArrayOp(Operation *op); -// For storing the intermediate memory and successor loops indexed by the -// predecessor loop. -using Successors = SmallVector, 2>; -using SuccessorsMap = DenseMap; - -void getSuccessorsMap(Block &block, SuccessorsMap &map); - } // namespace scalehls } // namespace mlir diff --git a/include/Transforms/Passes.td b/include/Transforms/Passes.td index 94b9e3a..ad23bcf 100644 --- a/include/Transforms/Passes.td +++ b/include/Transforms/Passes.td @@ -79,7 +79,7 @@ def PartialAffineLoopTile : Pass<"partial-affine-loop-tile", "FuncOp"> { let options = [ Option<"tileLevel", "tile-level", "unsigned", /*default=*/"1", - "Positive number: the level of loops to be tiles">, + "Positive number: the level of loops to be tiled">, Option<"tileSize", "tile-size", "unsigned", /*default=*/"2", "Positive number: the size of tiling"> ]; @@ -121,6 +121,13 @@ def LegalizeDataflow : Pass<"legalize-dataflow", "FuncOp"> { }]; let constructor = "mlir::scalehls::createLegalizeDataflowPass()"; + + let options = [ + Option<"insertCopy", "insert-copy", "bool", /*default=*/"true", + "Whether insert copy to break bypass paths">, + Option<"minGran", "min-gran", "unsigned", /*default=*/"1", + "Positive number: the minimum granularity of dataflow"> + ]; } //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/QoREstimation.cpp b/lib/Analysis/QoREstimation.cpp index a6a9df2..14d1cbe 100644 --- a/lib/Analysis/QoREstimation.cpp +++ b/lib/Analysis/QoREstimation.cpp @@ -483,8 +483,8 @@ int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) { int64_t distance = 0; // Calculate the distance of this dependency. - for (auto it = depComps.rbegin(); it < depComps.rend(); ++it) { - auto dep = *it; + for (auto i = depComps.rbegin(); i < depComps.rend(); ++i) { + auto dep = *i; auto tripCount = getIntAttrValue(dep.op, "trip_count"); if (dep.lb) @@ -714,8 +714,8 @@ HLSCppEstimator::estimateBlock(Block &block, int64_t begin) { auto blockEnd = begin; // Reversely walk through all operations in the block. - for (auto it = block.rbegin(), e = block.rend(); it != e; ++it) { - auto op = &*it; + for (auto i = block.rbegin(), e = block.rend(); i != e; ++i) { + auto op = &*i; auto opBegin = begin; auto opEnd = begin; @@ -739,7 +739,7 @@ HLSCppEstimator::estimateBlock(Block &block, int64_t begin) { return Optional>(); // Update the block schedule end and begin. - if (it == block.rbegin()) + if (i == block.rbegin()) blockBegin = opBegin; else blockBegin = min(blockBegin, opBegin); @@ -749,6 +749,22 @@ HLSCppEstimator::estimateBlock(Block &block, int64_t begin) { return std::pair(blockBegin, blockEnd); } +// Get the innermost surrounding operation, either an AffineForOp or a FuncOp. +// In this method, AffineIfOp is transparent as well. +static Operation *getSurroundingOp(Operation *op) { + auto currentOp = op; + while (true) { + if (auto parentIfOp = currentOp->getParentOfType()) + currentOp = parentIfOp; + else if (auto parentForOp = currentOp->getParentOfType()) + return parentForOp; + else if (auto parentFuncOp = currentOp->getParentOfType()) + return parentFuncOp; + else + return nullptr; + } +} + void HLSCppEstimator::reverseSchedule() { func.walk([&](Operation *op) { // Get schedule level. diff --git a/lib/Analysis/Utils.cpp b/lib/Analysis/Utils.cpp index af80802..f969fc6 100644 --- a/lib/Analysis/Utils.cpp +++ b/lib/Analysis/Utils.cpp @@ -69,23 +69,7 @@ scalehls::checkSameLevel(Operation *lhsOp, Operation *rhsOp) { return Optional>(); } -// Get the innermost surrounding operation, either an AffineForOp or a FuncOp. -// In this method, AffineIfOp is transparent as well. -Operation *scalehls::getSurroundingOp(Operation *op) { - auto currentOp = op; - while (true) { - if (auto parentIfOp = currentOp->getParentOfType()) - currentOp = parentIfOp; - else if (auto parentForOp = currentOp->getParentOfType()) - return parentForOp; - else if (auto parentFuncOp = currentOp->getParentOfType()) - return parentFuncOp; - else - return nullptr; - } -} - -// Get the pointer of the scrOp's parent loop, which should locate at the same +// Get the pointer of the scrOp's parent loop, which should locat at the same // level with dstOp's any parent loop. Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) { // If srcOp and dstOp are already at the same level, return the srcOp. @@ -140,35 +124,3 @@ hlscpp::ArrayOp scalehls::getArrayOp(Value memref) { hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) { return getArrayOp(MemRefAccess(op).memref); } - -void scalehls::getSuccessorsMap(Block &block, SuccessorsMap &map) { - DenseMap> memsMap; - DenseMap> loopsMap; - - for (auto loop : block.getOps()) - loop.walk([&](Operation *op) { - if (auto affineStore = dyn_cast(op)) { - memsMap[loop].insert(affineStore.getMemRef()); - - } else if (auto store = dyn_cast(op)) { - memsMap[loop].insert(store.getMemRef()); - - } else if (auto affineLoad = dyn_cast(op)) { - loopsMap[affineLoad.getMemRef()].insert(loop); - - } else if (auto load = dyn_cast(op)) { - loopsMap[load.getMemRef()].insert(loop); - } - }); - - for (auto loop : block.getOps()) - for (auto mem : memsMap[loop]) - for (auto successor : loopsMap[mem]) { - // If the successor loop not only loads from the memory, but also store - // to the memory, it will not be considered as a legal successor. - if (successor == loop || memsMap[successor].count(mem)) - continue; - - map[loop].push_back(std::pair(mem, successor)); - } -} diff --git a/lib/Transforms/LegalizeDataflow.cpp b/lib/Transforms/LegalizeDataflow.cpp index 0298b6f..c2befbe 100644 --- a/lib/Transforms/LegalizeDataflow.cpp +++ b/lib/Transforms/LegalizeDataflow.cpp @@ -2,7 +2,6 @@ // //===----------------------------------------------------------------------===// -#include "Analysis/Utils.h" #include "Dialect/HLSKernel/HLSKernel.h" #include "Transforms/Passes.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" @@ -17,140 +16,161 @@ struct LegalizeDataflow : public LegalizeDataflowBase { }; } // namespace -void LegalizeDataflow::runOnOperation() { - auto func = getOperation(); - auto builder = OpBuilder(func); +static bool isDataflowOp(Operation *op) { + return !isa(op); +} - //===--------------------------------------------------------------------===// - // HLSKernel Handler - //===--------------------------------------------------------------------===// +// For storing the intermediate memory and successor loops indexed by the +// predecessor loop. +using Successors = SmallVector, 2>; +using SuccessorsMap = DenseMap; - // Handle HLSKernel operations. Note that HLSKernel operations must have not - // been bufferized at this point. - for (auto kernelOp : func.front().getOps()) { - auto op = kernelOp.getOperation(); +static void getSuccessorsMap(Block &block, SuccessorsMap &map) { + DenseMap> memsMap; + DenseMap> loopsMap; - // Walk through all operands to establish an ASAP dataflow schedule. - int64_t dataflowLevel = 0; - for (auto operand : op->getOperands()) { - if (operand.getKind() == Value::Kind::BlockArgument) - continue; - else { - auto predOp = operand.getDefiningOp(); - if (auto attr = predOp->getAttrOfType("dataflow_level")) - dataflowLevel = max(dataflowLevel, attr.getInt()); - else - op->emitError( - "HLSKernelOp has unexpected successor, legalization failed"); + for (auto loop : block.getOps()) + loop.walk([&](Operation *op) { + if (auto affineStore = dyn_cast(op)) { + memsMap[loop].insert(affineStore.getMemRef()); + + } else if (auto store = dyn_cast(op)) { + memsMap[loop].insert(store.getMemRef()); + + } else if (auto affineLoad = dyn_cast(op)) { + loopsMap[affineLoad.getMemRef()].insert(loop); + + } else if (auto load = dyn_cast(op)) { + loopsMap[load.getMemRef()].insert(loop); } - } + }); - // Set an attribute for indicating the scheduled dataflow level. - op->setAttr("dataflow_level", builder.getIntegerAttr(builder.getI64Type(), - dataflowLevel + 1)); - } + // Find successors of all operations. Since this is a dataflow analysis, this + // traverse will not enter any control flow operations. + for (auto &op : block.getOperations()) { + // Loops need to be separately handled. + if (auto loop = dyn_cast(op)) { + for (auto mem : memsMap[loop]) { + for (auto successor : loopsMap[mem]) { + // If the successor loop not only loads from the memory, but also + // store to the memory, it is considered as a legal successor. + if (successor == loop || memsMap[successor].count(mem)) + continue; - // Eliminate bypass paths between non-successive dataflow levels. Dummy - // nodes will be inserted into the bypass paths. - for (auto kernelOp : func.front().getOps()) { - auto op = kernelOp.getOperation(); - auto dataflowLevel = - op->getAttrOfType("dataflow_level").getInt(); + map[loop].push_back(std::pair(mem, successor)); + } + } + } else if (isDataflowOp(&op)) { + for (auto result : op.getResults()) { + for (auto successor : result.getUsers()) { + // If the intermediate result is not shaped type, or the successor is + // not a dataflow operation, it is considered as a legal successor. + if (!result.getType().isa() || !isDataflowOp(successor)) + continue; - auto result = op->getResult(0); - for (auto &use : result.getUses()) { - if (auto attr = - use.getOwner()->getAttrOfType("dataflow_level")) { - if (attr.getInt() != dataflowLevel + 1) { - // Insert a dummy CopyOp if required. - builder.setInsertionPointAfter(op); - auto copyOp = builder.create( - op->getLoc(), result.getType(), result); - copyOp.setAttr( - "dataflow_level", - builder.getIntegerAttr(builder.getI64Type(), dataflowLevel + 1)); - - // Replace the operand with the result of CopyOp. - use.getOwner()->setOperand(use.getOperandNumber(), - copyOp.getResult(0)); + map[&op].push_back(std::pair(result, successor)); } } } } +} - //===--------------------------------------------------------------------===// - // AffineForLoop Handler - //===--------------------------------------------------------------------===// +void LegalizeDataflow::runOnOperation() { + auto func = getOperation(); + auto builder = OpBuilder(func); - // Handle loops. Note that this assume all operations have been bufferized at - // this point. Therefore, HLSKernel ops and loops will never have dependencies - // with each other in this pass. - // TODO: analyze live ins. SuccessorsMap successorsMap; getSuccessorsMap(func.front(), successorsMap); - for (auto it = func.front().rbegin(); it != func.front().rend(); ++it) { - if (auto loop = dyn_cast(*it)) { + llvm::SmallDenseMap dataflowToMerge; + + // Walk through all dataflow operations in a reversed order for establishing a + // ALAP scheduling. + for (auto i = func.front().rbegin(); i != func.front().rend(); ++i) { + auto op = &*i; + if (isDataflowOp(op)) { int64_t dataflowLevel = 0; - // Walk through all successor loops. - for (auto pair : successorsMap[loop]) { + // Walk through all successor ops. + for (auto pair : successorsMap[op]) { auto successor = pair.second; if (auto attr = successor->getAttrOfType("dataflow_level")) dataflowLevel = max(dataflowLevel, attr.getInt()); else { - loop.emitError("loop has unexpected successor, legalization failed"); + op->emitError("has unexpected successor, legalization failed"); return; } } // Set an attribute for indicating the scheduled dataflow level. - loop.setAttr( - "dataflow_level", - builder.getIntegerAttr(builder.getI64Type(), dataflowLevel + 1)); + op->setAttr("dataflow_level", builder.getIntegerAttr(builder.getI64Type(), + dataflowLevel + 1)); - // Eliminate bypass paths. - for (auto pair : successorsMap[loop]) { - auto mem = pair.first; + // Eliminate bypass paths if detected. + for (auto pair : successorsMap[op]) { + auto value = pair.first; auto successor = pair.second; auto successorDataflowLevel = successor->getAttrOfType("dataflow_level").getInt(); - // Insert CopyOps if required. - SmallVector mems; - mems.push_back(mem); - builder.setInsertionPoint(successor); + // Bypass path does not exist. + if (dataflowLevel == successorDataflowLevel) + continue; - for (auto i = dataflowLevel; i > successorDataflowLevel; --i) { - // Create CopyOp. - auto newMem = builder.create( - loop.getLoc(), mem.getType().cast()); - auto copyOp = builder.create(loop.getLoc(), - mems.back(), newMem); + // If insert-copy is set, insert CopyOp to the bypass path. Otherwise, + // record all the bypass paths in dataflowToMerge. + if (insertCopy) { + // Insert CopyOps if required. + SmallVector values; + values.push_back(value); - // Set CopyOp dataflow level. - copyOp.setAttr("dataflow_level", - builder.getIntegerAttr(builder.getI64Type(), i)); + builder.setInsertionPoint(successor); + for (auto i = dataflowLevel; i > successorDataflowLevel; --i) { + // Create CopyOp. + Value newValue; + Operation *copyOp; + if (auto valueType = value.getType().dyn_cast()) { + newValue = builder.create(op->getLoc(), valueType); + copyOp = builder.create(op->getLoc(), + values.back(), newValue); + } else { + copyOp = builder.create( + op->getLoc(), value.getType(), values.back()); + newValue = copyOp->getResult(0); + } - // Chain created CopyOps. - if (i == successorDataflowLevel + 1) - mem.replaceUsesWithIf(newMem, [&](mlir::OpOperand &use) { - return successor->isProperAncestor(use.getOwner()); - }); + // Set CopyOp dataflow level. + copyOp->setAttr("dataflow_level", + builder.getIntegerAttr(builder.getI64Type(), i)); + + // Chain created CopyOps. + if (i == successorDataflowLevel + 1) + value.replaceUsesWithIf(newValue, [&](mlir::OpOperand &use) { + return successor->isAncestor(use.getOwner()); + }); + else + values.push_back(newValue); + } + } else { + // Always retain the longest merge path. + if (auto dst = dataflowToMerge.lookup(successorDataflowLevel)) + dataflowToMerge[successorDataflowLevel] = max(dst, dataflowLevel); else - mems.push_back(newMem); + dataflowToMerge[successorDataflowLevel] = dataflowLevel; } } } } - // Reorder operations that are legalized, including HLSKernel ops or loops. + // Collect all operations in each dataflow level. DenseMap> dataflowOps; func.walk([&](Operation *dataflowOp) { if (auto attr = dataflowOp->getAttrOfType("dataflow_level")) dataflowOps[attr.getInt()].push_back(dataflowOp); }); + // Reorder operations that are legalized. for (auto pair : dataflowOps) { auto ops = pair.second; auto lastOp = ops.back(); @@ -161,6 +181,31 @@ void LegalizeDataflow::runOnOperation() { } } + // Merge dataflow levels according to the bypasses and minimum granularity. + if (minGran != 1 || !insertCopy) { + unsigned newLevel = 1; + unsigned toMerge = minGran; + for (unsigned i = 1, e = dataflowOps.size(); i <= e; ++i) { + // If the current level is the start point of a bypass, refresh toMerge. + // Otherwise, decrease toMerge by 1. + if (auto dst = dataflowToMerge.lookup(i)) + toMerge = dst - i; + else + toMerge--; + + // Annotate all ops in the current level to the new level. + for (auto op : dataflowOps[i]) + op->setAttr("dataflow_level", + builder.getIntegerAttr(builder.getI64Type(), newLevel)); + + // Update toMerge and newLevel if required. + if (toMerge == 0) { + toMerge = minGran; + newLevel++; + } + } + } + // Set dataflow attribute. func.setAttr("dataflow", builder.getBoolAttr(true)); }