[LegalizeDataflow] support loop-based dataflow legalization; [SplitFunction] include live-in analysis
This commit is contained in:
parent
f02955c284
commit
fef0cdc3fe
|
@ -92,6 +92,12 @@ public:
|
|||
using MemAccesses = SmallVector<Operation *, 16>;
|
||||
using MemAccessesMap = DenseMap<Value, MemAccesses>;
|
||||
|
||||
/// Collect all load and store operations in the block. The collected operations
|
||||
/// in the MemAccessesMap are ordered, which means an operation will never
|
||||
/// dominate another operation in front of it.
|
||||
void getMemAccessesMap(Block &block, MemAccessesMap &map,
|
||||
bool includeCalls = false);
|
||||
|
||||
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
|
||||
// AffineIfOp is transparent.
|
||||
Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
|
||||
|
@ -110,11 +116,17 @@ hlscpp::ArrayOp getArrayOp(Value memref);
|
|||
|
||||
hlscpp::ArrayOp getArrayOp(Operation *op);
|
||||
|
||||
/// Collect all load and store operations in the block. The collected operations
|
||||
/// in the MemAccessesMap are ordered, which means an operation will never
|
||||
/// dominate another operation in front of it.
|
||||
void getMemAccessesMap(Block &block, MemAccessesMap &map,
|
||||
bool includeCalls = false);
|
||||
// For storing all accessed memrefs indexed by an operation (e.g. AffineForOp).
|
||||
using MemRefs = SmallVector<Value, 4>;
|
||||
using MemRefsMap = DenseMap<Operation *, MemRefs>;
|
||||
|
||||
/// With the generated MemRefsMap, given a specific loop, we can easily find all
|
||||
/// memories which are consumed by the loop.
|
||||
void getLoopLoadMemsMap(Block &block, MemRefsMap &map);
|
||||
|
||||
/// With the generated MemAccessesMap, given a specific memory, we can easily
|
||||
/// find the loops which produce data to the memory.
|
||||
void getLoopMemStoresMap(Block &block, MemAccessesMap &map);
|
||||
|
||||
} // namespace scalehls
|
||||
} // namespace mlir
|
||||
|
|
|
@ -8,6 +8,28 @@
|
|||
using namespace mlir;
|
||||
using namespace scalehls;
|
||||
|
||||
/// Collect all load and store operations in the block.
|
||||
void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
|
||||
bool includeCalls) {
|
||||
for (auto &op : block) {
|
||||
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
|
||||
map[MemRefAccess(&op).memref].push_back(&op);
|
||||
|
||||
else if (includeCalls && isa<CallOp>(op)) {
|
||||
// All CallOps accessing the memory will be pushed back to the map.
|
||||
for (auto operand : op.getOperands())
|
||||
if (operand.getType().isa<MemRefType>())
|
||||
map[operand].push_back(&op);
|
||||
|
||||
} else if (op.getNumRegions()) {
|
||||
// Recursively collect memory access operations in each block.
|
||||
for (auto ®ion : op.getRegions())
|
||||
for (auto &block : region)
|
||||
getMemAccessesMap(block, map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
|
||||
// AffineIfOp is transparent.
|
||||
Optional<std::pair<Operation *, Operation *>>
|
||||
|
@ -118,26 +140,41 @@ hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) {
|
|||
return getArrayOp(MemRefAccess(op).memref);
|
||||
}
|
||||
|
||||
/// Collect all load and store operations in the block.
|
||||
void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
|
||||
bool includeCalls) {
|
||||
for (auto &op : block) {
|
||||
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
|
||||
map[MemRefAccess(&op).memref].push_back(&op);
|
||||
/// With the generated MemRefsMap, given a specific loop, we can easily find all
|
||||
/// memories which are consumed by the loop.
|
||||
void scalehls::getLoopLoadMemsMap(Block &block, MemRefsMap &map) {
|
||||
for (auto loop : block.getOps<AffineForOp>()) {
|
||||
loop.walk([&](Operation *op) {
|
||||
if (auto affineLoad = dyn_cast<AffineLoadOp>(op)) {
|
||||
auto &mems = map[loop];
|
||||
if (std::find(mems.begin(), mems.end(), affineLoad.getMemRef()) ==
|
||||
mems.end())
|
||||
mems.push_back(affineLoad.getMemRef());
|
||||
|
||||
else if (includeCalls && isa<CallOp>(op)) {
|
||||
// All CallOps accessing the memory will be pushed back to the map.
|
||||
for (auto operand : op.getOperands())
|
||||
if (operand.getType().isa<MemRefType>()) {
|
||||
map[operand].push_back(&op);
|
||||
break;
|
||||
}
|
||||
|
||||
} else if (op.getNumRegions()) {
|
||||
// Recursively collect memory access operations in each block.
|
||||
for (auto ®ion : op.getRegions())
|
||||
for (auto &block : region)
|
||||
getMemAccessesMap(block, map);
|
||||
}
|
||||
} else if (auto load = dyn_cast<LoadOp>(op)) {
|
||||
auto &mems = map[loop];
|
||||
if (std::find(mems.begin(), mems.end(), load.getMemRef()) == mems.end())
|
||||
mems.push_back(load.getMemRef());
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// With the generated MemAccessesMap, given a specific memory, we can easily
|
||||
/// find the loops which produce data to the memory.
|
||||
void scalehls::getLoopMemStoresMap(Block &block, MemAccessesMap &map) {
|
||||
for (auto loop : block.getOps<AffineForOp>()) {
|
||||
loop.walk([&](Operation *op) {
|
||||
if (auto affineStore = dyn_cast<AffineStoreOp>(op)) {
|
||||
auto &loops = map[affineStore.getMemRef()];
|
||||
if (std::find(loops.begin(), loops.end(), loop) == loops.end())
|
||||
loops.push_back(loop);
|
||||
|
||||
} else if (auto store = dyn_cast<StoreOp>(op)) {
|
||||
auto &loops = map[store.getMemRef()];
|
||||
if (std::find(loops.begin(), loops.end(), loop) == loops.end())
|
||||
loops.push_back(loop);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Analysis/Utils.h"
|
||||
#include "Dialect/HLSKernel/HLSKernel.h"
|
||||
#include "Transforms/Passes.h"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace mlir;
|
||||
|
@ -19,8 +21,12 @@ void LegalizeDataflow::runOnOperation() {
|
|||
auto func = getOperation();
|
||||
auto builder = OpBuilder(func);
|
||||
|
||||
// TODO: support non-HLSKernel operations, such as loops.
|
||||
// TODO: support non-CNNOps.
|
||||
//===--------------------------------------------------------------------===//
|
||||
// HLSKernel Handler
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
// Handle HLSKernel operations. Note that HLSKernel operations must have not
|
||||
// been bufferized at this point.
|
||||
for (auto kernelOp : func.front().getOps<hlskernel::HLSKernelOpInterface>()) {
|
||||
auto op = kernelOp.getOperation();
|
||||
|
||||
|
@ -34,7 +40,8 @@ void LegalizeDataflow::runOnOperation() {
|
|||
if (auto attr = predOp->getAttrOfType<IntegerAttr>("dataflow_level"))
|
||||
dataflowLevel = max(dataflowLevel, attr.getInt());
|
||||
else
|
||||
op->emitError("has unexpected dominator");
|
||||
op->emitError(
|
||||
"HLSKernelOp has unexpected predecessor, legalization failed");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -71,20 +78,96 @@ void LegalizeDataflow::runOnOperation() {
|
|||
}
|
||||
}
|
||||
|
||||
// Reorder operations that are legalized.
|
||||
//===--------------------------------------------------------------------===//
|
||||
// AffineForLoop Handler
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
// Handle loops. Note that this assume all operations have been bufferized at
|
||||
// this point. Therefore, HLSKernel ops and loops will never have dependencies
|
||||
// with each other in this pass.
|
||||
// TODO: analyze live ins.
|
||||
MemRefsMap loadMemsMap;
|
||||
MemAccessesMap memStoresMap;
|
||||
getLoopLoadMemsMap(func.front(), loadMemsMap);
|
||||
getLoopMemStoresMap(func.front(), memStoresMap);
|
||||
|
||||
for (auto loop : func.front().getOps<mlir::AffineForOp>()) {
|
||||
int64_t dataflowLevel = 0;
|
||||
for (auto mem : loadMemsMap[loop]) {
|
||||
for (auto predLoop : memStoresMap[mem]) {
|
||||
if (predLoop == loop)
|
||||
continue;
|
||||
|
||||
// Establish an ASAP dataflow schedule.
|
||||
if (auto attr = predLoop->getAttrOfType<IntegerAttr>("dataflow_level"))
|
||||
dataflowLevel = max(dataflowLevel, attr.getInt());
|
||||
else
|
||||
loop.emitError(
|
||||
"loop has unexpected predecessor, legalization failed");
|
||||
}
|
||||
}
|
||||
|
||||
// Set an attribute for indicating the scheduled dataflow level.
|
||||
loop.setAttr("dataflow_level", builder.getIntegerAttr(builder.getI64Type(),
|
||||
dataflowLevel + 1));
|
||||
|
||||
// Eliminate bypass paths.
|
||||
for (auto mem : loadMemsMap[loop]) {
|
||||
for (auto predLoop : memStoresMap[mem]) {
|
||||
if (predLoop == loop)
|
||||
continue;
|
||||
|
||||
auto predDataflowLevel =
|
||||
predLoop->getAttrOfType<IntegerAttr>("dataflow_level").getInt();
|
||||
|
||||
// Insert dummy CopyOps if required.
|
||||
SmallVector<Operation *, 4> dummyOps;
|
||||
dummyOps.push_back(loop);
|
||||
for (auto i = dataflowLevel; i > predDataflowLevel; --i) {
|
||||
// Create CopyOp.
|
||||
builder.setInsertionPoint(dummyOps.back());
|
||||
auto interMem = builder.create<mlir::AllocOp>(
|
||||
loop.getLoc(), mem.getType().cast<MemRefType>());
|
||||
auto dummyOp =
|
||||
builder.create<linalg::CopyOp>(loop.getLoc(), mem, interMem);
|
||||
dummyOp.setAttr("dataflow_level",
|
||||
builder.getIntegerAttr(builder.getI64Type(), i));
|
||||
|
||||
// Chain created CopyOps.
|
||||
if (i == dataflowLevel) {
|
||||
loop.walk([&](Operation *op) {
|
||||
if (auto affineLoad = dyn_cast<mlir::AffineLoadOp>(op)) {
|
||||
if (affineLoad.getMemRef() == mem)
|
||||
affineLoad.setMemRef(interMem);
|
||||
|
||||
} else if (auto load = dyn_cast<mlir::LoadOp>(op)) {
|
||||
if (load.getMemRef() == mem)
|
||||
load.setMemRef(interMem);
|
||||
}
|
||||
});
|
||||
} else
|
||||
dummyOps.back()->setOperand(0, interMem);
|
||||
|
||||
dummyOps.push_back(dummyOp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reorder operations that are legalized, including HLSKernel ops or loops.
|
||||
DenseMap<int64_t, SmallVector<Operation *, 2>> dataflowOps;
|
||||
func.walk([&](hlskernel::HLSKernelOpInterface kernelOp) {
|
||||
if (auto attr = kernelOp.getAttrOfType<IntegerAttr>("dataflow_level"))
|
||||
dataflowOps[attr.getInt()].push_back(kernelOp.getOperation());
|
||||
func.walk([&](Operation *dataflowOp) {
|
||||
if (auto attr = dataflowOp->getAttrOfType<IntegerAttr>("dataflow_level"))
|
||||
dataflowOps[attr.getInt()].push_back(dataflowOp);
|
||||
});
|
||||
|
||||
for (auto pair : dataflowOps) {
|
||||
auto ops = pair.second;
|
||||
auto firstOp = ops.front();
|
||||
auto lastOp = ops.back();
|
||||
|
||||
for (auto op : llvm::drop_begin(ops, 1)) {
|
||||
op->moveBefore(firstOp);
|
||||
firstOp = op;
|
||||
for (auto it = ops.begin(); it < std::prev(ops.end()); ++it) {
|
||||
auto op = *it;
|
||||
op->moveBefore(lastOp);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Analysis/Utils.h"
|
||||
#include "Dialect/HLSKernel/HLSKernel.h"
|
||||
#include "Transforms/Passes.h"
|
||||
#include "mlir/Analysis/Liveness.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace mlir;
|
||||
|
@ -24,10 +26,12 @@ void SplitFunction::runOnOperation() {
|
|||
funcs.push_back(func);
|
||||
|
||||
for (auto top : funcs) {
|
||||
Liveness liveness(top);
|
||||
|
||||
DenseMap<int64_t, SmallVector<Operation *, 2>> dataflowOps;
|
||||
top.walk([&](hlskernel::HLSKernelOpInterface kernelOp) {
|
||||
if (auto attr = kernelOp.getAttrOfType<IntegerAttr>("dataflow_level"))
|
||||
dataflowOps[attr.getInt()].push_back(kernelOp.getOperation());
|
||||
top.walk([&](Operation *op) {
|
||||
if (auto attr = op->getAttrOfType<IntegerAttr>("dataflow_level"))
|
||||
dataflowOps[attr.getInt()].push_back(op);
|
||||
});
|
||||
|
||||
for (auto pair : dataflowOps) {
|
||||
|
@ -44,8 +48,15 @@ void SplitFunction::runOnOperation() {
|
|||
|
||||
unsigned opIndex = 0;
|
||||
for (auto op : ops) {
|
||||
SmallVector<Value, 8> candidateInputs(op->getOperands());
|
||||
if (auto loop = dyn_cast<mlir::AffineForOp>(op)) {
|
||||
auto liveIns = liveness.getLiveIn(&loop.getLoopBody().front());
|
||||
for (auto liveIn : liveIns)
|
||||
if (!isForInductionVar(liveIn))
|
||||
candidateInputs.push_back(liveIn);
|
||||
}
|
||||
// Add input types and values.
|
||||
for (auto operand : op->getOperands()) {
|
||||
for (auto operand : candidateInputs) {
|
||||
// Record the index of the operand.
|
||||
auto operandFound =
|
||||
std::find(inputValues.begin(), inputValues.end(), operand);
|
||||
|
@ -58,7 +69,6 @@ void SplitFunction::runOnOperation() {
|
|||
inputValues.push_back(operand);
|
||||
}
|
||||
}
|
||||
opIndex += 1;
|
||||
|
||||
// Add output types and values.
|
||||
for (auto result : op->getResults()) {
|
||||
|
@ -68,6 +78,7 @@ void SplitFunction::runOnOperation() {
|
|||
outputValues.push_back(result);
|
||||
}
|
||||
}
|
||||
opIndex++;
|
||||
}
|
||||
|
||||
// Create a new function for the current dataflow level.
|
||||
|
@ -93,8 +104,11 @@ void SplitFunction::runOnOperation() {
|
|||
for (auto op : ops) {
|
||||
op->moveBefore(returnOp);
|
||||
// Connect operands to the arguments of the new created function.
|
||||
for (unsigned i = 0, e = op->getNumOperands(); i < e; ++i)
|
||||
op->setOperand(i, entry->getArgument(inputMap[opIndex][i]));
|
||||
for (unsigned i = 0, e = inputValues.size(); i < e; ++i)
|
||||
inputValues[i].replaceUsesWithIf(
|
||||
entry->getArgument(i), [&](mlir::OpOperand &use) {
|
||||
return getSameLevelDstOp(returnOp, use.getOwner());
|
||||
});
|
||||
opIndex += 1;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue