[LegalizeDataflow] support loop-based dataflow legalization; [SplitFunction] include live-in analysis

This commit is contained in:
Hanchen Ye 2020-12-24 14:18:14 -06:00
parent f02955c284
commit fef0cdc3fe
4 changed files with 189 additions and 43 deletions

View File

@ -92,6 +92,12 @@ public:
using MemAccesses = SmallVector<Operation *, 16>;
using MemAccessesMap = DenseMap<Value, MemAccesses>;
/// Collect all load and store operations in the block. The collected operations
/// in the MemAccessesMap are ordered, which means an operation will never
/// dominate another operation in front of it.
void getMemAccessesMap(Block &block, MemAccessesMap &map,
bool includeCalls = false);
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
// AffineIfOp is transparent.
Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
@ -110,11 +116,17 @@ hlscpp::ArrayOp getArrayOp(Value memref);
hlscpp::ArrayOp getArrayOp(Operation *op);
/// Collect all load and store operations in the block. The collected operations
/// in the MemAccessesMap are ordered, which means an operation will never
/// dominate another operation in front of it.
void getMemAccessesMap(Block &block, MemAccessesMap &map,
bool includeCalls = false);
// For storing all accessed memrefs indexed by an operation (e.g. AffineForOp).
using MemRefs = SmallVector<Value, 4>;
using MemRefsMap = DenseMap<Operation *, MemRefs>;
/// With the generated MemRefsMap, given a specific loop, we can easily find all
/// memories which are consumed by the loop.
void getLoopLoadMemsMap(Block &block, MemRefsMap &map);
/// With the generated MemAccessesMap, given a specific memory, we can easily
/// find the loops which produce data to the memory.
void getLoopMemStoresMap(Block &block, MemAccessesMap &map);
} // namespace scalehls
} // namespace mlir

View File

@ -8,6 +8,28 @@
using namespace mlir;
using namespace scalehls;
/// Collect all load and store operations in the block.
void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
bool includeCalls) {
for (auto &op : block) {
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
map[MemRefAccess(&op).memref].push_back(&op);
else if (includeCalls && isa<CallOp>(op)) {
// All CallOps accessing the memory will be pushed back to the map.
for (auto operand : op.getOperands())
if (operand.getType().isa<MemRefType>())
map[operand].push_back(&op);
} else if (op.getNumRegions()) {
// Recursively collect memory access operations in each block.
for (auto &region : op.getRegions())
for (auto &block : region)
getMemAccessesMap(block, map);
}
}
}
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
// AffineIfOp is transparent.
Optional<std::pair<Operation *, Operation *>>
@ -118,26 +140,41 @@ hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) {
return getArrayOp(MemRefAccess(op).memref);
}
/// Collect all load and store operations in the block.
void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
bool includeCalls) {
for (auto &op : block) {
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
map[MemRefAccess(&op).memref].push_back(&op);
/// With the generated MemRefsMap, given a specific loop, we can easily find all
/// memories which are consumed by the loop.
void scalehls::getLoopLoadMemsMap(Block &block, MemRefsMap &map) {
for (auto loop : block.getOps<AffineForOp>()) {
loop.walk([&](Operation *op) {
if (auto affineLoad = dyn_cast<AffineLoadOp>(op)) {
auto &mems = map[loop];
if (std::find(mems.begin(), mems.end(), affineLoad.getMemRef()) ==
mems.end())
mems.push_back(affineLoad.getMemRef());
else if (includeCalls && isa<CallOp>(op)) {
// All CallOps accessing the memory will be pushed back to the map.
for (auto operand : op.getOperands())
if (operand.getType().isa<MemRefType>()) {
map[operand].push_back(&op);
break;
}
} else if (op.getNumRegions()) {
// Recursively collect memory access operations in each block.
for (auto &region : op.getRegions())
for (auto &block : region)
getMemAccessesMap(block, map);
}
} else if (auto load = dyn_cast<LoadOp>(op)) {
auto &mems = map[loop];
if (std::find(mems.begin(), mems.end(), load.getMemRef()) == mems.end())
mems.push_back(load.getMemRef());
}
});
}
}
/// With the generated MemAccessesMap, given a specific memory, we can easily
/// find the loops which produce data to the memory.
void scalehls::getLoopMemStoresMap(Block &block, MemAccessesMap &map) {
for (auto loop : block.getOps<AffineForOp>()) {
loop.walk([&](Operation *op) {
if (auto affineStore = dyn_cast<AffineStoreOp>(op)) {
auto &loops = map[affineStore.getMemRef()];
if (std::find(loops.begin(), loops.end(), loop) == loops.end())
loops.push_back(loop);
} else if (auto store = dyn_cast<StoreOp>(op)) {
auto &loops = map[store.getMemRef()];
if (std::find(loops.begin(), loops.end(), loop) == loops.end())
loops.push_back(loop);
}
});
}
}

View File

@ -2,8 +2,10 @@
//
//===----------------------------------------------------------------------===//
#include "Analysis/Utils.h"
#include "Dialect/HLSKernel/HLSKernel.h"
#include "Transforms/Passes.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
using namespace std;
using namespace mlir;
@ -19,8 +21,12 @@ void LegalizeDataflow::runOnOperation() {
auto func = getOperation();
auto builder = OpBuilder(func);
// TODO: support non-HLSKernel operations, such as loops.
// TODO: support non-CNNOps.
//===--------------------------------------------------------------------===//
// HLSKernel Handler
//===--------------------------------------------------------------------===//
// Handle HLSKernel operations. Note that HLSKernel operations must have not
// been bufferized at this point.
for (auto kernelOp : func.front().getOps<hlskernel::HLSKernelOpInterface>()) {
auto op = kernelOp.getOperation();
@ -34,7 +40,8 @@ void LegalizeDataflow::runOnOperation() {
if (auto attr = predOp->getAttrOfType<IntegerAttr>("dataflow_level"))
dataflowLevel = max(dataflowLevel, attr.getInt());
else
op->emitError("has unexpected dominator");
op->emitError(
"HLSKernelOp has unexpected predecessor, legalization failed");
}
}
@ -71,20 +78,96 @@ void LegalizeDataflow::runOnOperation() {
}
}
// Reorder operations that are legalized.
//===--------------------------------------------------------------------===//
// AffineForLoop Handler
//===--------------------------------------------------------------------===//
// Handle loops. Note that this assume all operations have been bufferized at
// this point. Therefore, HLSKernel ops and loops will never have dependencies
// with each other in this pass.
// TODO: analyze live ins.
MemRefsMap loadMemsMap;
MemAccessesMap memStoresMap;
getLoopLoadMemsMap(func.front(), loadMemsMap);
getLoopMemStoresMap(func.front(), memStoresMap);
for (auto loop : func.front().getOps<mlir::AffineForOp>()) {
int64_t dataflowLevel = 0;
for (auto mem : loadMemsMap[loop]) {
for (auto predLoop : memStoresMap[mem]) {
if (predLoop == loop)
continue;
// Establish an ASAP dataflow schedule.
if (auto attr = predLoop->getAttrOfType<IntegerAttr>("dataflow_level"))
dataflowLevel = max(dataflowLevel, attr.getInt());
else
loop.emitError(
"loop has unexpected predecessor, legalization failed");
}
}
// Set an attribute for indicating the scheduled dataflow level.
loop.setAttr("dataflow_level", builder.getIntegerAttr(builder.getI64Type(),
dataflowLevel + 1));
// Eliminate bypass paths.
for (auto mem : loadMemsMap[loop]) {
for (auto predLoop : memStoresMap[mem]) {
if (predLoop == loop)
continue;
auto predDataflowLevel =
predLoop->getAttrOfType<IntegerAttr>("dataflow_level").getInt();
// Insert dummy CopyOps if required.
SmallVector<Operation *, 4> dummyOps;
dummyOps.push_back(loop);
for (auto i = dataflowLevel; i > predDataflowLevel; --i) {
// Create CopyOp.
builder.setInsertionPoint(dummyOps.back());
auto interMem = builder.create<mlir::AllocOp>(
loop.getLoc(), mem.getType().cast<MemRefType>());
auto dummyOp =
builder.create<linalg::CopyOp>(loop.getLoc(), mem, interMem);
dummyOp.setAttr("dataflow_level",
builder.getIntegerAttr(builder.getI64Type(), i));
// Chain created CopyOps.
if (i == dataflowLevel) {
loop.walk([&](Operation *op) {
if (auto affineLoad = dyn_cast<mlir::AffineLoadOp>(op)) {
if (affineLoad.getMemRef() == mem)
affineLoad.setMemRef(interMem);
} else if (auto load = dyn_cast<mlir::LoadOp>(op)) {
if (load.getMemRef() == mem)
load.setMemRef(interMem);
}
});
} else
dummyOps.back()->setOperand(0, interMem);
dummyOps.push_back(dummyOp);
}
}
}
}
// Reorder operations that are legalized, including HLSKernel ops or loops.
DenseMap<int64_t, SmallVector<Operation *, 2>> dataflowOps;
func.walk([&](hlskernel::HLSKernelOpInterface kernelOp) {
if (auto attr = kernelOp.getAttrOfType<IntegerAttr>("dataflow_level"))
dataflowOps[attr.getInt()].push_back(kernelOp.getOperation());
func.walk([&](Operation *dataflowOp) {
if (auto attr = dataflowOp->getAttrOfType<IntegerAttr>("dataflow_level"))
dataflowOps[attr.getInt()].push_back(dataflowOp);
});
for (auto pair : dataflowOps) {
auto ops = pair.second;
auto firstOp = ops.front();
auto lastOp = ops.back();
for (auto op : llvm::drop_begin(ops, 1)) {
op->moveBefore(firstOp);
firstOp = op;
for (auto it = ops.begin(); it < std::prev(ops.end()); ++it) {
auto op = *it;
op->moveBefore(lastOp);
}
}

View File

@ -2,8 +2,10 @@
//
//===----------------------------------------------------------------------===//
#include "Analysis/Utils.h"
#include "Dialect/HLSKernel/HLSKernel.h"
#include "Transforms/Passes.h"
#include "mlir/Analysis/Liveness.h"
using namespace std;
using namespace mlir;
@ -24,10 +26,12 @@ void SplitFunction::runOnOperation() {
funcs.push_back(func);
for (auto top : funcs) {
Liveness liveness(top);
DenseMap<int64_t, SmallVector<Operation *, 2>> dataflowOps;
top.walk([&](hlskernel::HLSKernelOpInterface kernelOp) {
if (auto attr = kernelOp.getAttrOfType<IntegerAttr>("dataflow_level"))
dataflowOps[attr.getInt()].push_back(kernelOp.getOperation());
top.walk([&](Operation *op) {
if (auto attr = op->getAttrOfType<IntegerAttr>("dataflow_level"))
dataflowOps[attr.getInt()].push_back(op);
});
for (auto pair : dataflowOps) {
@ -44,8 +48,15 @@ void SplitFunction::runOnOperation() {
unsigned opIndex = 0;
for (auto op : ops) {
SmallVector<Value, 8> candidateInputs(op->getOperands());
if (auto loop = dyn_cast<mlir::AffineForOp>(op)) {
auto liveIns = liveness.getLiveIn(&loop.getLoopBody().front());
for (auto liveIn : liveIns)
if (!isForInductionVar(liveIn))
candidateInputs.push_back(liveIn);
}
// Add input types and values.
for (auto operand : op->getOperands()) {
for (auto operand : candidateInputs) {
// Record the index of the operand.
auto operandFound =
std::find(inputValues.begin(), inputValues.end(), operand);
@ -58,7 +69,6 @@ void SplitFunction::runOnOperation() {
inputValues.push_back(operand);
}
}
opIndex += 1;
// Add output types and values.
for (auto result : op->getResults()) {
@ -68,6 +78,7 @@ void SplitFunction::runOnOperation() {
outputValues.push_back(result);
}
}
opIndex++;
}
// Create a new function for the current dataflow level.
@ -93,8 +104,11 @@ void SplitFunction::runOnOperation() {
for (auto op : ops) {
op->moveBefore(returnOp);
// Connect operands to the arguments of the new created function.
for (unsigned i = 0, e = op->getNumOperands(); i < e; ++i)
op->setOperand(i, entry->getArgument(inputMap[opIndex][i]));
for (unsigned i = 0, e = inputValues.size(); i < e; ++i)
inputValues[i].replaceUsesWithIf(
entry->getArgument(i), [&](mlir::OpOperand &use) {
return getSameLevelDstOp(returnOp, use.getOwner());
});
opIndex += 1;
}
}