[LegalizeDataflow] support to legalize loop and other operations simultaneously, support insert-copy and min-gran option for controlling whether CopyOps are inserted and the minimum granularity of dataflow

This commit is contained in:
Hanchen Ye 2020-12-25 23:40:33 -06:00
parent 8310ef8670
commit 98060b98fe
5 changed files with 163 additions and 154 deletions

View File

@ -92,10 +92,6 @@ void getMemAccessesMap(Block &block, MemAccessesMap &map,
Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
Operation *rhsOp);
// Get the innermost surrounding operation, either an AffineForOp or a FuncOp.
// In this method, AffineIfOp is transparent as well.
Operation *getSurroundingOp(Operation *op);
// Get the pointer of the scrOp's parent loop, which should locate at the same
// level with dstOp's any parent loop.
Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp);
@ -105,13 +101,6 @@ hlscpp::ArrayOp getArrayOp(Value memref);
hlscpp::ArrayOp getArrayOp(Operation *op);
// For storing the intermediate memory and successor loops indexed by the
// predecessor loop.
using Successors = SmallVector<std::pair<Value, Operation *>, 2>;
using SuccessorsMap = DenseMap<Operation *, Successors>;
void getSuccessorsMap(Block &block, SuccessorsMap &map);
} // namespace scalehls
} // namespace mlir

View File

@ -79,7 +79,7 @@ def PartialAffineLoopTile : Pass<"partial-affine-loop-tile", "FuncOp"> {
let options = [
Option<"tileLevel", "tile-level", "unsigned", /*default=*/"1",
"Positive number: the level of loops to be tiles">,
"Positive number: the level of loops to be tiled">,
Option<"tileSize", "tile-size", "unsigned", /*default=*/"2",
"Positive number: the size of tiling">
];
@ -121,6 +121,13 @@ def LegalizeDataflow : Pass<"legalize-dataflow", "FuncOp"> {
}];
let constructor = "mlir::scalehls::createLegalizeDataflowPass()";
let options = [
Option<"insertCopy", "insert-copy", "bool", /*default=*/"true",
"Whether insert copy to break bypass paths">,
Option<"minGran", "min-gran", "unsigned", /*default=*/"1",
"Positive number: the minimum granularity of dataflow">
];
}
//===----------------------------------------------------------------------===//

View File

@ -483,8 +483,8 @@ int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
int64_t distance = 0;
// Calculate the distance of this dependency.
for (auto it = depComps.rbegin(); it < depComps.rend(); ++it) {
auto dep = *it;
for (auto i = depComps.rbegin(); i < depComps.rend(); ++i) {
auto dep = *i;
auto tripCount = getIntAttrValue(dep.op, "trip_count");
if (dep.lb)
@ -714,8 +714,8 @@ HLSCppEstimator::estimateBlock(Block &block, int64_t begin) {
auto blockEnd = begin;
// Reversely walk through all operations in the block.
for (auto it = block.rbegin(), e = block.rend(); it != e; ++it) {
auto op = &*it;
for (auto i = block.rbegin(), e = block.rend(); i != e; ++i) {
auto op = &*i;
auto opBegin = begin;
auto opEnd = begin;
@ -739,7 +739,7 @@ HLSCppEstimator::estimateBlock(Block &block, int64_t begin) {
return Optional<std::pair<int64_t, int64_t>>();
// Update the block schedule end and begin.
if (it == block.rbegin())
if (i == block.rbegin())
blockBegin = opBegin;
else
blockBegin = min(blockBegin, opBegin);
@ -749,6 +749,22 @@ HLSCppEstimator::estimateBlock(Block &block, int64_t begin) {
return std::pair<int64_t, int64_t>(blockBegin, blockEnd);
}
// Get the innermost surrounding operation, either an AffineForOp or a FuncOp.
// In this method, AffineIfOp is transparent as well.
static Operation *getSurroundingOp(Operation *op) {
auto currentOp = op;
while (true) {
if (auto parentIfOp = currentOp->getParentOfType<AffineIfOp>())
currentOp = parentIfOp;
else if (auto parentForOp = currentOp->getParentOfType<AffineForOp>())
return parentForOp;
else if (auto parentFuncOp = currentOp->getParentOfType<FuncOp>())
return parentFuncOp;
else
return nullptr;
}
}
void HLSCppEstimator::reverseSchedule() {
func.walk([&](Operation *op) {
// Get schedule level.

View File

@ -69,23 +69,7 @@ scalehls::checkSameLevel(Operation *lhsOp, Operation *rhsOp) {
return Optional<std::pair<Operation *, Operation *>>();
}
// Get the innermost surrounding operation, either an AffineForOp or a FuncOp.
// In this method, AffineIfOp is transparent as well.
Operation *scalehls::getSurroundingOp(Operation *op) {
auto currentOp = op;
while (true) {
if (auto parentIfOp = currentOp->getParentOfType<AffineIfOp>())
currentOp = parentIfOp;
else if (auto parentForOp = currentOp->getParentOfType<AffineForOp>())
return parentForOp;
else if (auto parentFuncOp = currentOp->getParentOfType<FuncOp>())
return parentFuncOp;
else
return nullptr;
}
}
// Get the pointer of the scrOp's parent loop, which should locate at the same
// Get the pointer of the scrOp's parent loop, which should locat at the same
// level with dstOp's any parent loop.
Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
// If srcOp and dstOp are already at the same level, return the srcOp.
@ -140,35 +124,3 @@ hlscpp::ArrayOp scalehls::getArrayOp(Value memref) {
hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) {
return getArrayOp(MemRefAccess(op).memref);
}
void scalehls::getSuccessorsMap(Block &block, SuccessorsMap &map) {
DenseMap<Operation *, SmallPtrSet<Value, 2>> memsMap;
DenseMap<Value, SmallPtrSet<Operation *, 2>> loopsMap;
for (auto loop : block.getOps<AffineForOp>())
loop.walk([&](Operation *op) {
if (auto affineStore = dyn_cast<AffineStoreOp>(op)) {
memsMap[loop].insert(affineStore.getMemRef());
} else if (auto store = dyn_cast<StoreOp>(op)) {
memsMap[loop].insert(store.getMemRef());
} else if (auto affineLoad = dyn_cast<AffineLoadOp>(op)) {
loopsMap[affineLoad.getMemRef()].insert(loop);
} else if (auto load = dyn_cast<LoadOp>(op)) {
loopsMap[load.getMemRef()].insert(loop);
}
});
for (auto loop : block.getOps<AffineForOp>())
for (auto mem : memsMap[loop])
for (auto successor : loopsMap[mem]) {
// If the successor loop not only loads from the memory, but also store
// to the memory, it will not be considered as a legal successor.
if (successor == loop || memsMap[successor].count(mem))
continue;
map[loop].push_back(std::pair<Value, Operation *>(mem, successor));
}
}

View File

@ -2,7 +2,6 @@
//
//===----------------------------------------------------------------------===//
#include "Analysis/Utils.h"
#include "Dialect/HLSKernel/HLSKernel.h"
#include "Transforms/Passes.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
@ -17,140 +16,161 @@ struct LegalizeDataflow : public LegalizeDataflowBase<LegalizeDataflow> {
};
} // namespace
void LegalizeDataflow::runOnOperation() {
auto func = getOperation();
auto builder = OpBuilder(func);
static bool isDataflowOp(Operation *op) {
return !isa<AllocOp, AllocaOp, ConstantOp, TensorLoadOp, TensorToMemrefOp,
ReturnOp>(op);
}
//===--------------------------------------------------------------------===//
// HLSKernel Handler
//===--------------------------------------------------------------------===//
// For storing the intermediate memory and successor loops indexed by the
// predecessor loop.
using Successors = SmallVector<std::pair<Value, Operation *>, 2>;
using SuccessorsMap = DenseMap<Operation *, Successors>;
// Handle HLSKernel operations. Note that HLSKernel operations must have not
// been bufferized at this point.
for (auto kernelOp : func.front().getOps<hlskernel::HLSKernelOpInterface>()) {
auto op = kernelOp.getOperation();
static void getSuccessorsMap(Block &block, SuccessorsMap &map) {
DenseMap<Operation *, SmallPtrSet<Value, 2>> memsMap;
DenseMap<Value, SmallPtrSet<Operation *, 2>> loopsMap;
// Walk through all operands to establish an ASAP dataflow schedule.
int64_t dataflowLevel = 0;
for (auto operand : op->getOperands()) {
if (operand.getKind() == Value::Kind::BlockArgument)
continue;
else {
auto predOp = operand.getDefiningOp();
if (auto attr = predOp->getAttrOfType<IntegerAttr>("dataflow_level"))
dataflowLevel = max(dataflowLevel, attr.getInt());
else
op->emitError(
"HLSKernelOp has unexpected successor, legalization failed");
for (auto loop : block.getOps<AffineForOp>())
loop.walk([&](Operation *op) {
if (auto affineStore = dyn_cast<AffineStoreOp>(op)) {
memsMap[loop].insert(affineStore.getMemRef());
} else if (auto store = dyn_cast<StoreOp>(op)) {
memsMap[loop].insert(store.getMemRef());
} else if (auto affineLoad = dyn_cast<AffineLoadOp>(op)) {
loopsMap[affineLoad.getMemRef()].insert(loop);
} else if (auto load = dyn_cast<LoadOp>(op)) {
loopsMap[load.getMemRef()].insert(loop);
}
}
});
// Set an attribute for indicating the scheduled dataflow level.
op->setAttr("dataflow_level", builder.getIntegerAttr(builder.getI64Type(),
dataflowLevel + 1));
}
// Find successors of all operations. Since this is a dataflow analysis, this
// traverse will not enter any control flow operations.
for (auto &op : block.getOperations()) {
// Loops need to be separately handled.
if (auto loop = dyn_cast<AffineForOp>(op)) {
for (auto mem : memsMap[loop]) {
for (auto successor : loopsMap[mem]) {
// If the successor loop not only loads from the memory, but also
// store to the memory, it is considered as a legal successor.
if (successor == loop || memsMap[successor].count(mem))
continue;
// Eliminate bypass paths between non-successive dataflow levels. Dummy
// nodes will be inserted into the bypass paths.
for (auto kernelOp : func.front().getOps<hlskernel::HLSKernelOpInterface>()) {
auto op = kernelOp.getOperation();
auto dataflowLevel =
op->getAttrOfType<IntegerAttr>("dataflow_level").getInt();
map[loop].push_back(std::pair<Value, Operation *>(mem, successor));
}
}
} else if (isDataflowOp(&op)) {
for (auto result : op.getResults()) {
for (auto successor : result.getUsers()) {
// If the intermediate result is not shaped type, or the successor is
// not a dataflow operation, it is considered as a legal successor.
if (!result.getType().isa<ShapedType>() || !isDataflowOp(successor))
continue;
auto result = op->getResult(0);
for (auto &use : result.getUses()) {
if (auto attr =
use.getOwner()->getAttrOfType<IntegerAttr>("dataflow_level")) {
if (attr.getInt() != dataflowLevel + 1) {
// Insert a dummy CopyOp if required.
builder.setInsertionPointAfter(op);
auto copyOp = builder.create<hlskernel::CopyOp>(
op->getLoc(), result.getType(), result);
copyOp.setAttr(
"dataflow_level",
builder.getIntegerAttr(builder.getI64Type(), dataflowLevel + 1));
// Replace the operand with the result of CopyOp.
use.getOwner()->setOperand(use.getOperandNumber(),
copyOp.getResult(0));
map[&op].push_back(std::pair<Value, Operation *>(result, successor));
}
}
}
}
}
//===--------------------------------------------------------------------===//
// AffineForLoop Handler
//===--------------------------------------------------------------------===//
void LegalizeDataflow::runOnOperation() {
auto func = getOperation();
auto builder = OpBuilder(func);
// Handle loops. Note that this assume all operations have been bufferized at
// this point. Therefore, HLSKernel ops and loops will never have dependencies
// with each other in this pass.
// TODO: analyze live ins.
SuccessorsMap successorsMap;
getSuccessorsMap(func.front(), successorsMap);
for (auto it = func.front().rbegin(); it != func.front().rend(); ++it) {
if (auto loop = dyn_cast<mlir::AffineForOp>(*it)) {
llvm::SmallDenseMap<int64_t, int64_t, 16> dataflowToMerge;
// Walk through all dataflow operations in a reversed order for establishing a
// ALAP scheduling.
for (auto i = func.front().rbegin(); i != func.front().rend(); ++i) {
auto op = &*i;
if (isDataflowOp(op)) {
int64_t dataflowLevel = 0;
// Walk through all successor loops.
for (auto pair : successorsMap[loop]) {
// Walk through all successor ops.
for (auto pair : successorsMap[op]) {
auto successor = pair.second;
if (auto attr = successor->getAttrOfType<IntegerAttr>("dataflow_level"))
dataflowLevel = max(dataflowLevel, attr.getInt());
else {
loop.emitError("loop has unexpected successor, legalization failed");
op->emitError("has unexpected successor, legalization failed");
return;
}
}
// Set an attribute for indicating the scheduled dataflow level.
loop.setAttr(
"dataflow_level",
builder.getIntegerAttr(builder.getI64Type(), dataflowLevel + 1));
op->setAttr("dataflow_level", builder.getIntegerAttr(builder.getI64Type(),
dataflowLevel + 1));
// Eliminate bypass paths.
for (auto pair : successorsMap[loop]) {
auto mem = pair.first;
// Eliminate bypass paths if detected.
for (auto pair : successorsMap[op]) {
auto value = pair.first;
auto successor = pair.second;
auto successorDataflowLevel =
successor->getAttrOfType<IntegerAttr>("dataflow_level").getInt();
// Insert CopyOps if required.
SmallVector<Value, 4> mems;
mems.push_back(mem);
builder.setInsertionPoint(successor);
// Bypass path does not exist.
if (dataflowLevel == successorDataflowLevel)
continue;
for (auto i = dataflowLevel; i > successorDataflowLevel; --i) {
// Create CopyOp.
auto newMem = builder.create<mlir::AllocOp>(
loop.getLoc(), mem.getType().cast<MemRefType>());
auto copyOp = builder.create<linalg::CopyOp>(loop.getLoc(),
mems.back(), newMem);
// If insert-copy is set, insert CopyOp to the bypass path. Otherwise,
// record all the bypass paths in dataflowToMerge.
if (insertCopy) {
// Insert CopyOps if required.
SmallVector<Value, 4> values;
values.push_back(value);
// Set CopyOp dataflow level.
copyOp.setAttr("dataflow_level",
builder.getIntegerAttr(builder.getI64Type(), i));
builder.setInsertionPoint(successor);
for (auto i = dataflowLevel; i > successorDataflowLevel; --i) {
// Create CopyOp.
Value newValue;
Operation *copyOp;
if (auto valueType = value.getType().dyn_cast<MemRefType>()) {
newValue = builder.create<mlir::AllocOp>(op->getLoc(), valueType);
copyOp = builder.create<linalg::CopyOp>(op->getLoc(),
values.back(), newValue);
} else {
copyOp = builder.create<hlskernel::CopyOp>(
op->getLoc(), value.getType(), values.back());
newValue = copyOp->getResult(0);
}
// Chain created CopyOps.
if (i == successorDataflowLevel + 1)
mem.replaceUsesWithIf(newMem, [&](mlir::OpOperand &use) {
return successor->isProperAncestor(use.getOwner());
});
// Set CopyOp dataflow level.
copyOp->setAttr("dataflow_level",
builder.getIntegerAttr(builder.getI64Type(), i));
// Chain created CopyOps.
if (i == successorDataflowLevel + 1)
value.replaceUsesWithIf(newValue, [&](mlir::OpOperand &use) {
return successor->isAncestor(use.getOwner());
});
else
values.push_back(newValue);
}
} else {
// Always retain the longest merge path.
if (auto dst = dataflowToMerge.lookup(successorDataflowLevel))
dataflowToMerge[successorDataflowLevel] = max(dst, dataflowLevel);
else
mems.push_back(newMem);
dataflowToMerge[successorDataflowLevel] = dataflowLevel;
}
}
}
}
// Reorder operations that are legalized, including HLSKernel ops or loops.
// Collect all operations in each dataflow level.
DenseMap<int64_t, SmallVector<Operation *, 2>> dataflowOps;
func.walk([&](Operation *dataflowOp) {
if (auto attr = dataflowOp->getAttrOfType<IntegerAttr>("dataflow_level"))
dataflowOps[attr.getInt()].push_back(dataflowOp);
});
// Reorder operations that are legalized.
for (auto pair : dataflowOps) {
auto ops = pair.second;
auto lastOp = ops.back();
@ -161,6 +181,31 @@ void LegalizeDataflow::runOnOperation() {
}
}
// Merge dataflow levels according to the bypasses and minimum granularity.
if (minGran != 1 || !insertCopy) {
unsigned newLevel = 1;
unsigned toMerge = minGran;
for (unsigned i = 1, e = dataflowOps.size(); i <= e; ++i) {
// If the current level is the start point of a bypass, refresh toMerge.
// Otherwise, decrease toMerge by 1.
if (auto dst = dataflowToMerge.lookup(i))
toMerge = dst - i;
else
toMerge--;
// Annotate all ops in the current level to the new level.
for (auto op : dataflowOps[i])
op->setAttr("dataflow_level",
builder.getIntegerAttr(builder.getI64Type(), newLevel));
// Update toMerge and newLevel if required.
if (toMerge == 0) {
toMerge = minGran;
newLevel++;
}
}
}
// Set dataflow attribute.
func.setAttr("dataflow", builder.getBoolAttr(true));
}