[AffineLoopPerfection] factor out applyAffineLoopPerfection() method (#20)
This commit is contained in:
parent
94d6d57dda
commit
5b2af8e248
|
@ -90,6 +90,10 @@ AffineMap getLayoutMap(MemRefType memrefType, MLIRContext *context);
|
||||||
int64_t getPartitionFactors(MemRefType memrefType,
|
int64_t getPartitionFactors(MemRefType memrefType,
|
||||||
SmallVector<int64_t, 4> *factors = nullptr);
|
SmallVector<int64_t, 4> *factors = nullptr);
|
||||||
|
|
||||||
|
/// This is method for finding the number of child loops which immediatedly
|
||||||
|
/// contained by the input operation.
|
||||||
|
unsigned getChildLoopNum(Operation *op);
|
||||||
|
|
||||||
} // namespace scalehls
|
} // namespace scalehls
|
||||||
} // namespace mlir
|
} // namespace mlir
|
||||||
|
|
||||||
|
|
|
@ -16,9 +16,21 @@ class Pass;
|
||||||
namespace mlir {
|
namespace mlir {
|
||||||
namespace scalehls {
|
namespace scalehls {
|
||||||
|
|
||||||
/// Optimization APIs.
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Optimization APIs
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
/// Apply loop perfection to all outer loops of the input loop until the outer
|
||||||
|
/// operation is no longer a loop, or contains more than one child loop.
|
||||||
|
bool applyAffineLoopPerfection(AffineForOp loop, OpBuilder &builder);
|
||||||
|
|
||||||
|
/// Apply remove variable bound to all inner loops of the input loop.
|
||||||
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
|
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Optimization Pass Entries
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
/// Pragma optimization passes.
|
/// Pragma optimization passes.
|
||||||
std::unique_ptr<Pass> createLoopPipeliningPass();
|
std::unique_ptr<Pass> createLoopPipeliningPass();
|
||||||
std::unique_ptr<Pass> createArrayPartitionPass();
|
std::unique_ptr<Pass> createArrayPartitionPass();
|
||||||
|
|
|
@ -199,3 +199,16 @@ int64_t scalehls::getPartitionFactors(MemRefType memrefType,
|
||||||
|
|
||||||
return accumFactor;
|
return accumFactor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This is method for finding the number of child loops which immediatedly
|
||||||
|
/// contained by the input operation.
|
||||||
|
unsigned scalehls::getChildLoopNum(Operation *op) {
|
||||||
|
unsigned childNum = 0;
|
||||||
|
for (auto ®ion : op->getRegions())
|
||||||
|
for (auto &block : region)
|
||||||
|
for (auto &op : block)
|
||||||
|
if (isa<AffineForOp>(op))
|
||||||
|
++childNum;
|
||||||
|
|
||||||
|
return childNum;
|
||||||
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "Analysis/Utils.h"
|
||||||
#include "Transforms/Passes.h"
|
#include "Transforms/Passes.h"
|
||||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||||
#include "mlir/IR/IntegerSet.h"
|
#include "mlir/IR/IntegerSet.h"
|
||||||
|
@ -13,177 +14,191 @@ using namespace scalehls;
|
||||||
namespace {
|
namespace {
|
||||||
struct AffineLoopPerfection
|
struct AffineLoopPerfection
|
||||||
: public AffineLoopPerfectionBase<AffineLoopPerfection> {
|
: public AffineLoopPerfectionBase<AffineLoopPerfection> {
|
||||||
void runOnOperation() override;
|
void runOnOperation() override {
|
||||||
|
auto func = getOperation();
|
||||||
|
auto builder = OpBuilder(func);
|
||||||
|
|
||||||
|
// Walk through all loops.
|
||||||
|
for (auto forOp : func.getOps<AffineForOp>()) {
|
||||||
|
// Collect all loops that: (1) is the innermost loop (contains zero child
|
||||||
|
// loop nest); or (2) contains more than one child loop nest.
|
||||||
|
SmallVector<AffineForOp, 4> targetLoops;
|
||||||
|
forOp.walk([&](AffineForOp loop) {
|
||||||
|
if (getChildLoopNum(loop) != 1)
|
||||||
|
targetLoops.push_back(loop);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Apply loop perfection to each target loop.
|
||||||
|
for (auto loop : targetLoops)
|
||||||
|
applyAffineLoopPerfection(loop, builder);
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void AffineLoopPerfection::runOnOperation() {
|
/// Apply loop perfection to all outer loops of the input loop until the outer
|
||||||
auto func = getOperation();
|
/// operation is no longer a loop, or contains more than one child loop.
|
||||||
auto builder = OpBuilder(func);
|
bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
|
||||||
|
OpBuilder &builder) {
|
||||||
|
SmallVector<AffineForOp, 4> loops;
|
||||||
|
loops.push_back(innermostLoop);
|
||||||
|
|
||||||
// Walk through all functions and loops.
|
while (true) {
|
||||||
for (auto forOp : func.getOps<mlir::AffineForOp>()) {
|
// Get the parent loop of the child loop.
|
||||||
// Walk through all inner loops.
|
auto childLoop = loops.back();
|
||||||
SmallVector<mlir::AffineForOp, 4> loops;
|
auto loop = dyn_cast<AffineForOp>(childLoop.getParentOp());
|
||||||
forOp.walk([&](mlir::AffineForOp loop) {
|
|
||||||
if (!loops.empty()) {
|
// Break the procedure if the parent operation is no longer a loop.
|
||||||
// Make sure the current loop is a sequential nested loop.
|
if (!loop)
|
||||||
// TODO: support parallel loops perfection? This tends to be much
|
break;
|
||||||
// complicated than a pure sequential loop stack, but seems possible.
|
|
||||||
if (loop != loops.back().getParentOp()) {
|
// Break if the parent loop contains more than one child loop.
|
||||||
forOp.emitError("contains parallel inner loops, not supported");
|
// TODO: how to handle this case? It seems possible.
|
||||||
return;
|
if (getChildLoopNum(loop) != 1)
|
||||||
}
|
break;
|
||||||
auto innermostLoop = loops.front();
|
|
||||||
|
// Collect all operations before the child loop.
|
||||||
// Collect all operations before the inner loop.
|
SmallVector<Operation *, 4> frontOps;
|
||||||
SmallVector<Operation *, 4> frontOps;
|
for (auto &op : loop.getBody()->getOperations()) {
|
||||||
for (auto &op : loop.getBody()->getOperations()) {
|
if (&op != childLoop)
|
||||||
if (&op != loops.back().getOperation())
|
frontOps.push_back(&op);
|
||||||
frontOps.push_back(&op);
|
else
|
||||||
else
|
break;
|
||||||
break;
|
}
|
||||||
}
|
|
||||||
|
// All operations before the child loop should be moved to the innermost
|
||||||
// All operations before the inner loop should be moved to the
|
// loop, they are collected in frontOps.
|
||||||
// innermost loop, they are collected in frontOps.
|
if (!frontOps.empty()) {
|
||||||
if (!frontOps.empty()) {
|
// TODO: for now, we assume all users are inside of the current loop. This
|
||||||
// TODO: for now, we assume all users are inside of the current loop.
|
// is important because if any user is located at inner loops, it is
|
||||||
// This is important because if any user is located at inner loops, it
|
// required to create a memref for holding the result.
|
||||||
// is required to create a memref for holding the result.
|
for (auto op : frontOps)
|
||||||
for (auto op : frontOps)
|
for (auto user : op->getUsers())
|
||||||
for (auto user : op->getUsers())
|
if (user->getParentOp() != loop)
|
||||||
if (user->getParentOp() != loop)
|
return true;
|
||||||
return;
|
|
||||||
|
// Create AffineIf in the front of the innermost loop.
|
||||||
// Create AffineIf in the front of the innermost loop.
|
SmallVector<AffineExpr, 4> ifExprs;
|
||||||
SmallVector<AffineExpr, 4> ifExprs;
|
SmallVector<bool, 4> ifEqFlags;
|
||||||
SmallVector<bool, 4> ifEqFlags;
|
SmallVector<Value, 4> ifOperands;
|
||||||
SmallVector<Value, 4> ifOperands;
|
unsigned dim = 0;
|
||||||
unsigned dim = 0;
|
for (auto innerLoop : loops) {
|
||||||
for (auto innerLoop : loops) {
|
// Create all components required by constructing if operation.
|
||||||
// Create all components required by constructing if operation.
|
if (innerLoop.hasConstantLowerBound()) {
|
||||||
if (innerLoop.hasConstantLowerBound()) {
|
ifExprs.push_back(builder.getAffineDimExpr(dim++) -
|
||||||
ifExprs.push_back(
|
innerLoop.getConstantLowerBound());
|
||||||
getAffineDimExpr(dim++, func.getContext()) -
|
ifOperands.push_back(innerLoop.getInductionVar());
|
||||||
getAffineConstantExpr(innerLoop.getConstantLowerBound(),
|
} else {
|
||||||
func.getContext()));
|
// Non-constant case requires to integrate the bound affine expression
|
||||||
ifOperands.push_back(innerLoop.getInductionVar());
|
// and operands into the condition integer set.
|
||||||
} else {
|
auto lowerExpr = innerLoop.getLowerBoundMap().getResult(0);
|
||||||
// Non-constant case requires to integrate the bound affine
|
auto lowerOperands = innerLoop.getLowerBoundOperands();
|
||||||
// expression and operands into the condition integer set.
|
SmallVector<AffineExpr, 4> newDims;
|
||||||
auto lowerExpr = innerLoop.getLowerBoundMap().getResult(0);
|
for (unsigned i = 0, e = lowerOperands.size(); i < e; ++i)
|
||||||
auto lowerOperands = innerLoop.getLowerBoundOperands();
|
newDims.push_back(builder.getAffineDimExpr(i + dim + 1));
|
||||||
SmallVector<AffineExpr, 4> newDims;
|
lowerExpr = lowerExpr.replaceDimsAndSymbols(newDims, {});
|
||||||
for (unsigned i = 0, e = lowerOperands.size(); i < e; ++i)
|
|
||||||
newDims.push_back(
|
ifExprs.push_back(builder.getAffineDimExpr(dim++) - lowerExpr);
|
||||||
getAffineDimExpr(i + dim + 1, func.getContext()));
|
ifOperands.push_back(innerLoop.getInductionVar());
|
||||||
lowerExpr = lowerExpr.replaceDimsAndSymbols(newDims, {});
|
ifOperands.append(lowerOperands.begin(), lowerOperands.end());
|
||||||
|
dim += lowerOperands.size();
|
||||||
ifExprs.push_back(getAffineDimExpr(dim++, func.getContext()) -
|
|
||||||
lowerExpr);
|
|
||||||
ifOperands.push_back(innerLoop.getInductionVar());
|
|
||||||
ifOperands.append(lowerOperands.begin(), lowerOperands.end());
|
|
||||||
dim += lowerOperands.size();
|
|
||||||
}
|
|
||||||
ifEqFlags.push_back(true);
|
|
||||||
}
|
|
||||||
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
|
|
||||||
|
|
||||||
// Set builder insertion point and create AffineIf operation.
|
|
||||||
builder.setInsertionPointToStart(innermostLoop.getBody());
|
|
||||||
auto ifOp = builder.create<mlir::AffineIfOp>(
|
|
||||||
func.getLoc(), ifCondition, ifOperands,
|
|
||||||
/*withElseRegion=*/false);
|
|
||||||
|
|
||||||
// Move all operations in frontOps into the innermost loop. Note
|
|
||||||
// that if the operation has result, it will always be executed.
|
|
||||||
// However, if the operation doesn't have result (e.g. AffineStore
|
|
||||||
// operation), it will be putted into the generated AffineIf
|
|
||||||
// operation and conditionally executed.
|
|
||||||
for (auto op : frontOps) {
|
|
||||||
if (op->getNumResults())
|
|
||||||
op->moveBefore(ifOp);
|
|
||||||
else
|
|
||||||
op->moveBefore(ifOp.getThenBlock()->getTerminator());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Collect all operations after the inner loop.
|
|
||||||
SmallVector<Operation *, 4> backOps;
|
|
||||||
auto &opList = loop.getBody()->getOperations();
|
|
||||||
for (auto opIt = opList.rbegin(); opIt != opList.rend(); ++opIt) {
|
|
||||||
auto &op = *opIt;
|
|
||||||
if (!isa<mlir::AffineYieldOp>(op)) {
|
|
||||||
if (&op != loops.back().getOperation())
|
|
||||||
backOps.push_back(&op);
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// All operations after the inner loop should be moved to the
|
|
||||||
// innermost loop, they are collected in backOps.
|
|
||||||
if (!backOps.empty()) {
|
|
||||||
// Create AffineIf in the back of the innermost loop (before the
|
|
||||||
// terminator).
|
|
||||||
SmallVector<AffineExpr, 4> ifExprs;
|
|
||||||
SmallVector<bool, 4> ifEqFlags;
|
|
||||||
SmallVector<Value, 4> ifOperands;
|
|
||||||
unsigned dim = 0;
|
|
||||||
for (auto innerLoop : loops) {
|
|
||||||
// Create all components required by constructing if operation.
|
|
||||||
if (innerLoop.hasConstantUpperBound()) {
|
|
||||||
ifExprs.push_back(
|
|
||||||
getAffineConstantExpr(innerLoop.getConstantUpperBound() - 1,
|
|
||||||
func.getContext()) -
|
|
||||||
getAffineDimExpr(dim++, func.getContext()));
|
|
||||||
ifOperands.push_back(innerLoop.getInductionVar());
|
|
||||||
} else {
|
|
||||||
// Non-constant case requires to integrate the bound affine
|
|
||||||
// expression and operands into the condition integer set.
|
|
||||||
auto upperExpr = innerLoop.getUpperBoundMap().getResult(0);
|
|
||||||
auto upperOperands = innerLoop.getUpperBoundOperands();
|
|
||||||
SmallVector<AffineExpr, 4> newDims;
|
|
||||||
for (unsigned i = 0, e = upperOperands.size(); i < e; ++i)
|
|
||||||
newDims.push_back(
|
|
||||||
getAffineDimExpr(i + dim + 1, func.getContext()));
|
|
||||||
upperExpr = upperExpr.replaceDimsAndSymbols(newDims, {});
|
|
||||||
|
|
||||||
ifExprs.push_back(upperExpr -
|
|
||||||
getAffineConstantExpr(1, func.getContext()) -
|
|
||||||
getAffineDimExpr(dim++, func.getContext()));
|
|
||||||
ifOperands.push_back(innerLoop.getInductionVar());
|
|
||||||
ifOperands.append(upperOperands.begin(), upperOperands.end());
|
|
||||||
dim += upperOperands.size();
|
|
||||||
}
|
|
||||||
ifEqFlags.push_back(true);
|
|
||||||
}
|
|
||||||
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
|
|
||||||
|
|
||||||
// Set builder insertion point and create AffineIf operation.
|
|
||||||
builder.setInsertionPoint(innermostLoop.getBody()->getTerminator());
|
|
||||||
auto ifOp = builder.create<mlir::AffineIfOp>(
|
|
||||||
func.getLoc(), ifCondition, ifOperands,
|
|
||||||
/*withElseRegion=*/false);
|
|
||||||
|
|
||||||
// Move all operations in backOps into the innermost loop. Note
|
|
||||||
// that if the operation has result, it will always be executed.
|
|
||||||
// However, if the operation doesn't have result (e.g. AffineStore
|
|
||||||
// operation), it will be putted into the generated AffineIf
|
|
||||||
// operation and conditionally executed.
|
|
||||||
for (auto opIt = backOps.rbegin(); opIt < backOps.rend(); ++opIt) {
|
|
||||||
auto op = *opIt;
|
|
||||||
if (op->getNumResults())
|
|
||||||
op->moveBefore(ifOp);
|
|
||||||
else
|
|
||||||
op->moveBefore(ifOp.getThenBlock()->getTerminator());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
ifEqFlags.push_back(true);
|
||||||
}
|
}
|
||||||
loops.push_back(loop);
|
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
|
||||||
});
|
|
||||||
|
// Set builder insertion point and create AffineIf operation.
|
||||||
|
builder.setInsertionPointToStart(innermostLoop.getBody());
|
||||||
|
auto ifOp =
|
||||||
|
builder.create<AffineIfOp>(loop.getLoc(), ifCondition, ifOperands,
|
||||||
|
/*withElseRegion=*/false);
|
||||||
|
|
||||||
|
// Move all operations in frontOps into the innermost loop. Note that if
|
||||||
|
// the operation has result, it will always be executed. However, if the
|
||||||
|
// operation doesn't have result (e.g. AffineStore operation), it will be
|
||||||
|
// putted into the generated AffineIf operation and conditionally
|
||||||
|
// executed.
|
||||||
|
for (auto op : frontOps) {
|
||||||
|
if (op->getNumResults())
|
||||||
|
op->moveBefore(ifOp);
|
||||||
|
else
|
||||||
|
op->moveBefore(ifOp.getThenBlock()->getTerminator());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect all operations after the inner loop.
|
||||||
|
SmallVector<Operation *, 4> backOps;
|
||||||
|
auto &opList = loop.getBody()->getOperations();
|
||||||
|
for (auto opIt = opList.rbegin(); opIt != opList.rend(); ++opIt) {
|
||||||
|
auto &op = *opIt;
|
||||||
|
if (!isa<AffineYieldOp>(op)) {
|
||||||
|
if (&op != childLoop.getOperation())
|
||||||
|
backOps.push_back(&op);
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// All operations after the inner loop should be moved to the
|
||||||
|
// innermost loop, they are collected in backOps.
|
||||||
|
if (!backOps.empty()) {
|
||||||
|
// Create AffineIf in the back of the innermost loop (before the
|
||||||
|
// terminator).
|
||||||
|
SmallVector<AffineExpr, 4> ifExprs;
|
||||||
|
SmallVector<bool, 4> ifEqFlags;
|
||||||
|
SmallVector<Value, 4> ifOperands;
|
||||||
|
unsigned dim = 0;
|
||||||
|
for (auto innerLoop : loops) {
|
||||||
|
// Create all components required by constructing if operation.
|
||||||
|
if (innerLoop.hasConstantUpperBound()) {
|
||||||
|
ifExprs.push_back(innerLoop.getConstantUpperBound() - 1 -
|
||||||
|
builder.getAffineDimExpr(dim++));
|
||||||
|
ifOperands.push_back(innerLoop.getInductionVar());
|
||||||
|
} else {
|
||||||
|
// Non-constant case requires to integrate the bound affine expression
|
||||||
|
// and operands into the condition integer set.
|
||||||
|
auto upperExpr = innerLoop.getUpperBoundMap().getResult(0);
|
||||||
|
auto upperOperands = innerLoop.getUpperBoundOperands();
|
||||||
|
SmallVector<AffineExpr, 4> newDims;
|
||||||
|
for (unsigned i = 0, e = upperOperands.size(); i < e; ++i)
|
||||||
|
newDims.push_back(builder.getAffineDimExpr(i + dim + 1));
|
||||||
|
upperExpr = upperExpr.replaceDimsAndSymbols(newDims, {});
|
||||||
|
|
||||||
|
ifExprs.push_back(upperExpr - 1 - builder.getAffineDimExpr(dim++));
|
||||||
|
ifOperands.push_back(innerLoop.getInductionVar());
|
||||||
|
ifOperands.append(upperOperands.begin(), upperOperands.end());
|
||||||
|
dim += upperOperands.size();
|
||||||
|
}
|
||||||
|
ifEqFlags.push_back(true);
|
||||||
|
}
|
||||||
|
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
|
||||||
|
|
||||||
|
// Set builder insertion point and create AffineIf operation.
|
||||||
|
builder.setInsertionPoint(innermostLoop.getBody()->getTerminator());
|
||||||
|
auto ifOp =
|
||||||
|
builder.create<AffineIfOp>(loop.getLoc(), ifCondition, ifOperands,
|
||||||
|
/*withElseRegion=*/false);
|
||||||
|
|
||||||
|
// Move all operations in backOps into the innermost loop. Note that if
|
||||||
|
// the operation has result, it will always be executed. However, if the
|
||||||
|
// operation doesn't have result (e.g. AffineStore operation), it will be
|
||||||
|
// putted into the generated AffineIf operation and conditionally
|
||||||
|
// executed.
|
||||||
|
for (auto opIt = backOps.rbegin(); opIt < backOps.rend(); ++opIt) {
|
||||||
|
auto op = *opIt;
|
||||||
|
if (op->getNumResults())
|
||||||
|
op->moveBefore(ifOp);
|
||||||
|
else
|
||||||
|
op->moveBefore(ifOp.getThenBlock()->getTerminator());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push back the current loop as the new child loop.
|
||||||
|
loops.push_back(loop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For now, this method will always success.
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<mlir::Pass> scalehls::createAffineLoopPerfectionPass() {
|
std::unique_ptr<mlir::Pass> scalehls::createAffineLoopPerfectionPass() {
|
||||||
|
|
|
@ -18,13 +18,14 @@ struct RemoveVariableBound
|
||||||
auto func = getOperation();
|
auto func = getOperation();
|
||||||
auto builder = OpBuilder(func);
|
auto builder = OpBuilder(func);
|
||||||
|
|
||||||
// Walk through all functions and loops.
|
// Walk through all loops.
|
||||||
for (auto loop : func.getOps<AffineForOp>())
|
for (auto loop : func.getOps<AffineForOp>())
|
||||||
applyRemoveVariableBound(loop, builder);
|
applyRemoveVariableBound(loop, builder);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
/// Apply remove variable bound to all inner loops of the input loop.
|
||||||
bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
|
bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
|
||||||
SmallVector<AffineForOp, 4> nestedLoops;
|
SmallVector<AffineForOp, 4> nestedLoops;
|
||||||
getPerfectlyNestedLoops(nestedLoops, loop);
|
getPerfectlyNestedLoops(nestedLoops, loop);
|
||||||
|
|
Loading…
Reference in New Issue