[AffineLoopPerfection] factor out applyAffineLoopPerfection() method (#20)

This commit is contained in:
Hanchen Ye 2021-01-07 18:38:11 -06:00
parent 94d6d57dda
commit 5b2af8e248
5 changed files with 211 additions and 166 deletions

View File

@ -90,6 +90,10 @@ AffineMap getLayoutMap(MemRefType memrefType, MLIRContext *context);
int64_t getPartitionFactors(MemRefType memrefType,
SmallVector<int64_t, 4> *factors = nullptr);
/// This is method for finding the number of child loops which immediatedly
/// contained by the input operation.
unsigned getChildLoopNum(Operation *op);
} // namespace scalehls
} // namespace mlir

View File

@ -16,9 +16,21 @@ class Pass;
namespace mlir {
namespace scalehls {
/// Optimization APIs.
//===----------------------------------------------------------------------===//
// Optimization APIs
//===----------------------------------------------------------------------===//
/// Apply loop perfection to all outer loops of the input loop until the outer
/// operation is no longer a loop, or contains more than one child loop.
bool applyAffineLoopPerfection(AffineForOp loop, OpBuilder &builder);
/// Apply remove variable bound to all inner loops of the input loop.
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
//===----------------------------------------------------------------------===//
// Optimization Pass Entries
//===----------------------------------------------------------------------===//
/// Pragma optimization passes.
std::unique_ptr<Pass> createLoopPipeliningPass();
std::unique_ptr<Pass> createArrayPartitionPass();

View File

@ -199,3 +199,16 @@ int64_t scalehls::getPartitionFactors(MemRefType memrefType,
return accumFactor;
}
/// This is method for finding the number of child loops which immediatedly
/// contained by the input operation.
unsigned scalehls::getChildLoopNum(Operation *op) {
unsigned childNum = 0;
for (auto &region : op->getRegions())
for (auto &block : region)
for (auto &op : block)
if (isa<AffineForOp>(op))
++childNum;
return childNum;
}

View File

@ -2,6 +2,7 @@
//
//===----------------------------------------------------------------------===//
#include "Analysis/Utils.h"
#include "Transforms/Passes.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/IntegerSet.h"
@ -13,177 +14,191 @@ using namespace scalehls;
namespace {
struct AffineLoopPerfection
: public AffineLoopPerfectionBase<AffineLoopPerfection> {
void runOnOperation() override;
void runOnOperation() override {
auto func = getOperation();
auto builder = OpBuilder(func);
// Walk through all loops.
for (auto forOp : func.getOps<AffineForOp>()) {
// Collect all loops that: (1) is the innermost loop (contains zero child
// loop nest); or (2) contains more than one child loop nest.
SmallVector<AffineForOp, 4> targetLoops;
forOp.walk([&](AffineForOp loop) {
if (getChildLoopNum(loop) != 1)
targetLoops.push_back(loop);
});
// Apply loop perfection to each target loop.
for (auto loop : targetLoops)
applyAffineLoopPerfection(loop, builder);
}
}
};
} // namespace
void AffineLoopPerfection::runOnOperation() {
auto func = getOperation();
auto builder = OpBuilder(func);
/// Apply loop perfection to all outer loops of the input loop until the outer
/// operation is no longer a loop, or contains more than one child loop.
bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
OpBuilder &builder) {
SmallVector<AffineForOp, 4> loops;
loops.push_back(innermostLoop);
// Walk through all functions and loops.
for (auto forOp : func.getOps<mlir::AffineForOp>()) {
// Walk through all inner loops.
SmallVector<mlir::AffineForOp, 4> loops;
forOp.walk([&](mlir::AffineForOp loop) {
if (!loops.empty()) {
// Make sure the current loop is a sequential nested loop.
// TODO: support parallel loops perfection? This tends to be much
// complicated than a pure sequential loop stack, but seems possible.
if (loop != loops.back().getParentOp()) {
forOp.emitError("contains parallel inner loops, not supported");
return;
}
auto innermostLoop = loops.front();
// Collect all operations before the inner loop.
SmallVector<Operation *, 4> frontOps;
for (auto &op : loop.getBody()->getOperations()) {
if (&op != loops.back().getOperation())
frontOps.push_back(&op);
else
break;
}
// All operations before the inner loop should be moved to the
// innermost loop, they are collected in frontOps.
if (!frontOps.empty()) {
// TODO: for now, we assume all users are inside of the current loop.
// This is important because if any user is located at inner loops, it
// is required to create a memref for holding the result.
for (auto op : frontOps)
for (auto user : op->getUsers())
if (user->getParentOp() != loop)
return;
// Create AffineIf in the front of the innermost loop.
SmallVector<AffineExpr, 4> ifExprs;
SmallVector<bool, 4> ifEqFlags;
SmallVector<Value, 4> ifOperands;
unsigned dim = 0;
for (auto innerLoop : loops) {
// Create all components required by constructing if operation.
if (innerLoop.hasConstantLowerBound()) {
ifExprs.push_back(
getAffineDimExpr(dim++, func.getContext()) -
getAffineConstantExpr(innerLoop.getConstantLowerBound(),
func.getContext()));
ifOperands.push_back(innerLoop.getInductionVar());
} else {
// Non-constant case requires to integrate the bound affine
// expression and operands into the condition integer set.
auto lowerExpr = innerLoop.getLowerBoundMap().getResult(0);
auto lowerOperands = innerLoop.getLowerBoundOperands();
SmallVector<AffineExpr, 4> newDims;
for (unsigned i = 0, e = lowerOperands.size(); i < e; ++i)
newDims.push_back(
getAffineDimExpr(i + dim + 1, func.getContext()));
lowerExpr = lowerExpr.replaceDimsAndSymbols(newDims, {});
ifExprs.push_back(getAffineDimExpr(dim++, func.getContext()) -
lowerExpr);
ifOperands.push_back(innerLoop.getInductionVar());
ifOperands.append(lowerOperands.begin(), lowerOperands.end());
dim += lowerOperands.size();
}
ifEqFlags.push_back(true);
}
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
// Set builder insertion point and create AffineIf operation.
builder.setInsertionPointToStart(innermostLoop.getBody());
auto ifOp = builder.create<mlir::AffineIfOp>(
func.getLoc(), ifCondition, ifOperands,
/*withElseRegion=*/false);
// Move all operations in frontOps into the innermost loop. Note
// that if the operation has result, it will always be executed.
// However, if the operation doesn't have result (e.g. AffineStore
// operation), it will be putted into the generated AffineIf
// operation and conditionally executed.
for (auto op : frontOps) {
if (op->getNumResults())
op->moveBefore(ifOp);
else
op->moveBefore(ifOp.getThenBlock()->getTerminator());
}
}
// Collect all operations after the inner loop.
SmallVector<Operation *, 4> backOps;
auto &opList = loop.getBody()->getOperations();
for (auto opIt = opList.rbegin(); opIt != opList.rend(); ++opIt) {
auto &op = *opIt;
if (!isa<mlir::AffineYieldOp>(op)) {
if (&op != loops.back().getOperation())
backOps.push_back(&op);
else
break;
}
}
// All operations after the inner loop should be moved to the
// innermost loop, they are collected in backOps.
if (!backOps.empty()) {
// Create AffineIf in the back of the innermost loop (before the
// terminator).
SmallVector<AffineExpr, 4> ifExprs;
SmallVector<bool, 4> ifEqFlags;
SmallVector<Value, 4> ifOperands;
unsigned dim = 0;
for (auto innerLoop : loops) {
// Create all components required by constructing if operation.
if (innerLoop.hasConstantUpperBound()) {
ifExprs.push_back(
getAffineConstantExpr(innerLoop.getConstantUpperBound() - 1,
func.getContext()) -
getAffineDimExpr(dim++, func.getContext()));
ifOperands.push_back(innerLoop.getInductionVar());
} else {
// Non-constant case requires to integrate the bound affine
// expression and operands into the condition integer set.
auto upperExpr = innerLoop.getUpperBoundMap().getResult(0);
auto upperOperands = innerLoop.getUpperBoundOperands();
SmallVector<AffineExpr, 4> newDims;
for (unsigned i = 0, e = upperOperands.size(); i < e; ++i)
newDims.push_back(
getAffineDimExpr(i + dim + 1, func.getContext()));
upperExpr = upperExpr.replaceDimsAndSymbols(newDims, {});
ifExprs.push_back(upperExpr -
getAffineConstantExpr(1, func.getContext()) -
getAffineDimExpr(dim++, func.getContext()));
ifOperands.push_back(innerLoop.getInductionVar());
ifOperands.append(upperOperands.begin(), upperOperands.end());
dim += upperOperands.size();
}
ifEqFlags.push_back(true);
}
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
// Set builder insertion point and create AffineIf operation.
builder.setInsertionPoint(innermostLoop.getBody()->getTerminator());
auto ifOp = builder.create<mlir::AffineIfOp>(
func.getLoc(), ifCondition, ifOperands,
/*withElseRegion=*/false);
// Move all operations in backOps into the innermost loop. Note
// that if the operation has result, it will always be executed.
// However, if the operation doesn't have result (e.g. AffineStore
// operation), it will be putted into the generated AffineIf
// operation and conditionally executed.
for (auto opIt = backOps.rbegin(); opIt < backOps.rend(); ++opIt) {
auto op = *opIt;
if (op->getNumResults())
op->moveBefore(ifOp);
else
op->moveBefore(ifOp.getThenBlock()->getTerminator());
}
while (true) {
// Get the parent loop of the child loop.
auto childLoop = loops.back();
auto loop = dyn_cast<AffineForOp>(childLoop.getParentOp());
// Break the procedure if the parent operation is no longer a loop.
if (!loop)
break;
// Break if the parent loop contains more than one child loop.
// TODO: how to handle this case? It seems possible.
if (getChildLoopNum(loop) != 1)
break;
// Collect all operations before the child loop.
SmallVector<Operation *, 4> frontOps;
for (auto &op : loop.getBody()->getOperations()) {
if (&op != childLoop)
frontOps.push_back(&op);
else
break;
}
// All operations before the child loop should be moved to the innermost
// loop, they are collected in frontOps.
if (!frontOps.empty()) {
// TODO: for now, we assume all users are inside of the current loop. This
// is important because if any user is located at inner loops, it is
// required to create a memref for holding the result.
for (auto op : frontOps)
for (auto user : op->getUsers())
if (user->getParentOp() != loop)
return true;
// Create AffineIf in the front of the innermost loop.
SmallVector<AffineExpr, 4> ifExprs;
SmallVector<bool, 4> ifEqFlags;
SmallVector<Value, 4> ifOperands;
unsigned dim = 0;
for (auto innerLoop : loops) {
// Create all components required by constructing if operation.
if (innerLoop.hasConstantLowerBound()) {
ifExprs.push_back(builder.getAffineDimExpr(dim++) -
innerLoop.getConstantLowerBound());
ifOperands.push_back(innerLoop.getInductionVar());
} else {
// Non-constant case requires to integrate the bound affine expression
// and operands into the condition integer set.
auto lowerExpr = innerLoop.getLowerBoundMap().getResult(0);
auto lowerOperands = innerLoop.getLowerBoundOperands();
SmallVector<AffineExpr, 4> newDims;
for (unsigned i = 0, e = lowerOperands.size(); i < e; ++i)
newDims.push_back(builder.getAffineDimExpr(i + dim + 1));
lowerExpr = lowerExpr.replaceDimsAndSymbols(newDims, {});
ifExprs.push_back(builder.getAffineDimExpr(dim++) - lowerExpr);
ifOperands.push_back(innerLoop.getInductionVar());
ifOperands.append(lowerOperands.begin(), lowerOperands.end());
dim += lowerOperands.size();
}
ifEqFlags.push_back(true);
}
loops.push_back(loop);
});
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
// Set builder insertion point and create AffineIf operation.
builder.setInsertionPointToStart(innermostLoop.getBody());
auto ifOp =
builder.create<AffineIfOp>(loop.getLoc(), ifCondition, ifOperands,
/*withElseRegion=*/false);
// Move all operations in frontOps into the innermost loop. Note that if
// the operation has result, it will always be executed. However, if the
// operation doesn't have result (e.g. AffineStore operation), it will be
// putted into the generated AffineIf operation and conditionally
// executed.
for (auto op : frontOps) {
if (op->getNumResults())
op->moveBefore(ifOp);
else
op->moveBefore(ifOp.getThenBlock()->getTerminator());
}
}
// Collect all operations after the inner loop.
SmallVector<Operation *, 4> backOps;
auto &opList = loop.getBody()->getOperations();
for (auto opIt = opList.rbegin(); opIt != opList.rend(); ++opIt) {
auto &op = *opIt;
if (!isa<AffineYieldOp>(op)) {
if (&op != childLoop.getOperation())
backOps.push_back(&op);
else
break;
}
}
// All operations after the inner loop should be moved to the
// innermost loop, they are collected in backOps.
if (!backOps.empty()) {
// Create AffineIf in the back of the innermost loop (before the
// terminator).
SmallVector<AffineExpr, 4> ifExprs;
SmallVector<bool, 4> ifEqFlags;
SmallVector<Value, 4> ifOperands;
unsigned dim = 0;
for (auto innerLoop : loops) {
// Create all components required by constructing if operation.
if (innerLoop.hasConstantUpperBound()) {
ifExprs.push_back(innerLoop.getConstantUpperBound() - 1 -
builder.getAffineDimExpr(dim++));
ifOperands.push_back(innerLoop.getInductionVar());
} else {
// Non-constant case requires to integrate the bound affine expression
// and operands into the condition integer set.
auto upperExpr = innerLoop.getUpperBoundMap().getResult(0);
auto upperOperands = innerLoop.getUpperBoundOperands();
SmallVector<AffineExpr, 4> newDims;
for (unsigned i = 0, e = upperOperands.size(); i < e; ++i)
newDims.push_back(builder.getAffineDimExpr(i + dim + 1));
upperExpr = upperExpr.replaceDimsAndSymbols(newDims, {});
ifExprs.push_back(upperExpr - 1 - builder.getAffineDimExpr(dim++));
ifOperands.push_back(innerLoop.getInductionVar());
ifOperands.append(upperOperands.begin(), upperOperands.end());
dim += upperOperands.size();
}
ifEqFlags.push_back(true);
}
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
// Set builder insertion point and create AffineIf operation.
builder.setInsertionPoint(innermostLoop.getBody()->getTerminator());
auto ifOp =
builder.create<AffineIfOp>(loop.getLoc(), ifCondition, ifOperands,
/*withElseRegion=*/false);
// Move all operations in backOps into the innermost loop. Note that if
// the operation has result, it will always be executed. However, if the
// operation doesn't have result (e.g. AffineStore operation), it will be
// putted into the generated AffineIf operation and conditionally
// executed.
for (auto opIt = backOps.rbegin(); opIt < backOps.rend(); ++opIt) {
auto op = *opIt;
if (op->getNumResults())
op->moveBefore(ifOp);
else
op->moveBefore(ifOp.getThenBlock()->getTerminator());
}
}
// Push back the current loop as the new child loop.
loops.push_back(loop);
}
// For now, this method will always success.
return true;
}
std::unique_ptr<mlir::Pass> scalehls::createAffineLoopPerfectionPass() {

View File

@ -18,13 +18,14 @@ struct RemoveVariableBound
auto func = getOperation();
auto builder = OpBuilder(func);
// Walk through all functions and loops.
// Walk through all loops.
for (auto loop : func.getOps<AffineForOp>())
applyRemoveVariableBound(loop, builder);
}
};
} // namespace
/// Apply remove variable bound to all inner loops of the input loop.
bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
SmallVector<AffineForOp, 4> nestedLoops;
getPerfectlyNestedLoops(nestedLoops, loop);