[AffineLoopPerfection] factor out applyAffineLoopPerfection() method (#20)

This commit is contained in:
Hanchen Ye 2021-01-07 18:38:11 -06:00
parent 94d6d57dda
commit 5b2af8e248
5 changed files with 211 additions and 166 deletions

View File

@ -90,6 +90,10 @@ AffineMap getLayoutMap(MemRefType memrefType, MLIRContext *context);
int64_t getPartitionFactors(MemRefType memrefType, int64_t getPartitionFactors(MemRefType memrefType,
SmallVector<int64_t, 4> *factors = nullptr); SmallVector<int64_t, 4> *factors = nullptr);
/// This is method for finding the number of child loops which immediatedly
/// contained by the input operation.
unsigned getChildLoopNum(Operation *op);
} // namespace scalehls } // namespace scalehls
} // namespace mlir } // namespace mlir

View File

@ -16,9 +16,21 @@ class Pass;
namespace mlir { namespace mlir {
namespace scalehls { namespace scalehls {
/// Optimization APIs. //===----------------------------------------------------------------------===//
// Optimization APIs
//===----------------------------------------------------------------------===//
/// Apply loop perfection to all outer loops of the input loop until the outer
/// operation is no longer a loop, or contains more than one child loop.
bool applyAffineLoopPerfection(AffineForOp loop, OpBuilder &builder);
/// Apply remove variable bound to all inner loops of the input loop.
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder); bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
//===----------------------------------------------------------------------===//
// Optimization Pass Entries
//===----------------------------------------------------------------------===//
/// Pragma optimization passes. /// Pragma optimization passes.
std::unique_ptr<Pass> createLoopPipeliningPass(); std::unique_ptr<Pass> createLoopPipeliningPass();
std::unique_ptr<Pass> createArrayPartitionPass(); std::unique_ptr<Pass> createArrayPartitionPass();

View File

@ -199,3 +199,16 @@ int64_t scalehls::getPartitionFactors(MemRefType memrefType,
return accumFactor; return accumFactor;
} }
/// This is method for finding the number of child loops which immediatedly
/// contained by the input operation.
unsigned scalehls::getChildLoopNum(Operation *op) {
unsigned childNum = 0;
for (auto &region : op->getRegions())
for (auto &block : region)
for (auto &op : block)
if (isa<AffineForOp>(op))
++childNum;
return childNum;
}

View File

@ -2,6 +2,7 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "Analysis/Utils.h"
#include "Transforms/Passes.h" #include "Transforms/Passes.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/IntegerSet.h" #include "mlir/IR/IntegerSet.h"
@ -13,177 +14,191 @@ using namespace scalehls;
namespace { namespace {
struct AffineLoopPerfection struct AffineLoopPerfection
: public AffineLoopPerfectionBase<AffineLoopPerfection> { : public AffineLoopPerfectionBase<AffineLoopPerfection> {
void runOnOperation() override; void runOnOperation() override {
auto func = getOperation();
auto builder = OpBuilder(func);
// Walk through all loops.
for (auto forOp : func.getOps<AffineForOp>()) {
// Collect all loops that: (1) is the innermost loop (contains zero child
// loop nest); or (2) contains more than one child loop nest.
SmallVector<AffineForOp, 4> targetLoops;
forOp.walk([&](AffineForOp loop) {
if (getChildLoopNum(loop) != 1)
targetLoops.push_back(loop);
});
// Apply loop perfection to each target loop.
for (auto loop : targetLoops)
applyAffineLoopPerfection(loop, builder);
}
}
}; };
} // namespace } // namespace
void AffineLoopPerfection::runOnOperation() { /// Apply loop perfection to all outer loops of the input loop until the outer
auto func = getOperation(); /// operation is no longer a loop, or contains more than one child loop.
auto builder = OpBuilder(func); bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
OpBuilder &builder) {
SmallVector<AffineForOp, 4> loops;
loops.push_back(innermostLoop);
// Walk through all functions and loops. while (true) {
for (auto forOp : func.getOps<mlir::AffineForOp>()) { // Get the parent loop of the child loop.
// Walk through all inner loops. auto childLoop = loops.back();
SmallVector<mlir::AffineForOp, 4> loops; auto loop = dyn_cast<AffineForOp>(childLoop.getParentOp());
forOp.walk([&](mlir::AffineForOp loop) {
if (!loops.empty()) { // Break the procedure if the parent operation is no longer a loop.
// Make sure the current loop is a sequential nested loop. if (!loop)
// TODO: support parallel loops perfection? This tends to be much break;
// complicated than a pure sequential loop stack, but seems possible.
if (loop != loops.back().getParentOp()) { // Break if the parent loop contains more than one child loop.
forOp.emitError("contains parallel inner loops, not supported"); // TODO: how to handle this case? It seems possible.
return; if (getChildLoopNum(loop) != 1)
} break;
auto innermostLoop = loops.front();
// Collect all operations before the child loop.
// Collect all operations before the inner loop. SmallVector<Operation *, 4> frontOps;
SmallVector<Operation *, 4> frontOps; for (auto &op : loop.getBody()->getOperations()) {
for (auto &op : loop.getBody()->getOperations()) { if (&op != childLoop)
if (&op != loops.back().getOperation()) frontOps.push_back(&op);
frontOps.push_back(&op); else
else break;
break; }
}
// All operations before the child loop should be moved to the innermost
// All operations before the inner loop should be moved to the // loop, they are collected in frontOps.
// innermost loop, they are collected in frontOps. if (!frontOps.empty()) {
if (!frontOps.empty()) { // TODO: for now, we assume all users are inside of the current loop. This
// TODO: for now, we assume all users are inside of the current loop. // is important because if any user is located at inner loops, it is
// This is important because if any user is located at inner loops, it // required to create a memref for holding the result.
// is required to create a memref for holding the result. for (auto op : frontOps)
for (auto op : frontOps) for (auto user : op->getUsers())
for (auto user : op->getUsers()) if (user->getParentOp() != loop)
if (user->getParentOp() != loop) return true;
return;
// Create AffineIf in the front of the innermost loop.
// Create AffineIf in the front of the innermost loop. SmallVector<AffineExpr, 4> ifExprs;
SmallVector<AffineExpr, 4> ifExprs; SmallVector<bool, 4> ifEqFlags;
SmallVector<bool, 4> ifEqFlags; SmallVector<Value, 4> ifOperands;
SmallVector<Value, 4> ifOperands; unsigned dim = 0;
unsigned dim = 0; for (auto innerLoop : loops) {
for (auto innerLoop : loops) { // Create all components required by constructing if operation.
// Create all components required by constructing if operation. if (innerLoop.hasConstantLowerBound()) {
if (innerLoop.hasConstantLowerBound()) { ifExprs.push_back(builder.getAffineDimExpr(dim++) -
ifExprs.push_back( innerLoop.getConstantLowerBound());
getAffineDimExpr(dim++, func.getContext()) - ifOperands.push_back(innerLoop.getInductionVar());
getAffineConstantExpr(innerLoop.getConstantLowerBound(), } else {
func.getContext())); // Non-constant case requires to integrate the bound affine expression
ifOperands.push_back(innerLoop.getInductionVar()); // and operands into the condition integer set.
} else { auto lowerExpr = innerLoop.getLowerBoundMap().getResult(0);
// Non-constant case requires to integrate the bound affine auto lowerOperands = innerLoop.getLowerBoundOperands();
// expression and operands into the condition integer set. SmallVector<AffineExpr, 4> newDims;
auto lowerExpr = innerLoop.getLowerBoundMap().getResult(0); for (unsigned i = 0, e = lowerOperands.size(); i < e; ++i)
auto lowerOperands = innerLoop.getLowerBoundOperands(); newDims.push_back(builder.getAffineDimExpr(i + dim + 1));
SmallVector<AffineExpr, 4> newDims; lowerExpr = lowerExpr.replaceDimsAndSymbols(newDims, {});
for (unsigned i = 0, e = lowerOperands.size(); i < e; ++i)
newDims.push_back( ifExprs.push_back(builder.getAffineDimExpr(dim++) - lowerExpr);
getAffineDimExpr(i + dim + 1, func.getContext())); ifOperands.push_back(innerLoop.getInductionVar());
lowerExpr = lowerExpr.replaceDimsAndSymbols(newDims, {}); ifOperands.append(lowerOperands.begin(), lowerOperands.end());
dim += lowerOperands.size();
ifExprs.push_back(getAffineDimExpr(dim++, func.getContext()) -
lowerExpr);
ifOperands.push_back(innerLoop.getInductionVar());
ifOperands.append(lowerOperands.begin(), lowerOperands.end());
dim += lowerOperands.size();
}
ifEqFlags.push_back(true);
}
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
// Set builder insertion point and create AffineIf operation.
builder.setInsertionPointToStart(innermostLoop.getBody());
auto ifOp = builder.create<mlir::AffineIfOp>(
func.getLoc(), ifCondition, ifOperands,
/*withElseRegion=*/false);
// Move all operations in frontOps into the innermost loop. Note
// that if the operation has result, it will always be executed.
// However, if the operation doesn't have result (e.g. AffineStore
// operation), it will be putted into the generated AffineIf
// operation and conditionally executed.
for (auto op : frontOps) {
if (op->getNumResults())
op->moveBefore(ifOp);
else
op->moveBefore(ifOp.getThenBlock()->getTerminator());
}
}
// Collect all operations after the inner loop.
SmallVector<Operation *, 4> backOps;
auto &opList = loop.getBody()->getOperations();
for (auto opIt = opList.rbegin(); opIt != opList.rend(); ++opIt) {
auto &op = *opIt;
if (!isa<mlir::AffineYieldOp>(op)) {
if (&op != loops.back().getOperation())
backOps.push_back(&op);
else
break;
}
}
// All operations after the inner loop should be moved to the
// innermost loop, they are collected in backOps.
if (!backOps.empty()) {
// Create AffineIf in the back of the innermost loop (before the
// terminator).
SmallVector<AffineExpr, 4> ifExprs;
SmallVector<bool, 4> ifEqFlags;
SmallVector<Value, 4> ifOperands;
unsigned dim = 0;
for (auto innerLoop : loops) {
// Create all components required by constructing if operation.
if (innerLoop.hasConstantUpperBound()) {
ifExprs.push_back(
getAffineConstantExpr(innerLoop.getConstantUpperBound() - 1,
func.getContext()) -
getAffineDimExpr(dim++, func.getContext()));
ifOperands.push_back(innerLoop.getInductionVar());
} else {
// Non-constant case requires to integrate the bound affine
// expression and operands into the condition integer set.
auto upperExpr = innerLoop.getUpperBoundMap().getResult(0);
auto upperOperands = innerLoop.getUpperBoundOperands();
SmallVector<AffineExpr, 4> newDims;
for (unsigned i = 0, e = upperOperands.size(); i < e; ++i)
newDims.push_back(
getAffineDimExpr(i + dim + 1, func.getContext()));
upperExpr = upperExpr.replaceDimsAndSymbols(newDims, {});
ifExprs.push_back(upperExpr -
getAffineConstantExpr(1, func.getContext()) -
getAffineDimExpr(dim++, func.getContext()));
ifOperands.push_back(innerLoop.getInductionVar());
ifOperands.append(upperOperands.begin(), upperOperands.end());
dim += upperOperands.size();
}
ifEqFlags.push_back(true);
}
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
// Set builder insertion point and create AffineIf operation.
builder.setInsertionPoint(innermostLoop.getBody()->getTerminator());
auto ifOp = builder.create<mlir::AffineIfOp>(
func.getLoc(), ifCondition, ifOperands,
/*withElseRegion=*/false);
// Move all operations in backOps into the innermost loop. Note
// that if the operation has result, it will always be executed.
// However, if the operation doesn't have result (e.g. AffineStore
// operation), it will be putted into the generated AffineIf
// operation and conditionally executed.
for (auto opIt = backOps.rbegin(); opIt < backOps.rend(); ++opIt) {
auto op = *opIt;
if (op->getNumResults())
op->moveBefore(ifOp);
else
op->moveBefore(ifOp.getThenBlock()->getTerminator());
}
} }
ifEqFlags.push_back(true);
} }
loops.push_back(loop); auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
});
// Set builder insertion point and create AffineIf operation.
builder.setInsertionPointToStart(innermostLoop.getBody());
auto ifOp =
builder.create<AffineIfOp>(loop.getLoc(), ifCondition, ifOperands,
/*withElseRegion=*/false);
// Move all operations in frontOps into the innermost loop. Note that if
// the operation has result, it will always be executed. However, if the
// operation doesn't have result (e.g. AffineStore operation), it will be
// putted into the generated AffineIf operation and conditionally
// executed.
for (auto op : frontOps) {
if (op->getNumResults())
op->moveBefore(ifOp);
else
op->moveBefore(ifOp.getThenBlock()->getTerminator());
}
}
// Collect all operations after the inner loop.
SmallVector<Operation *, 4> backOps;
auto &opList = loop.getBody()->getOperations();
for (auto opIt = opList.rbegin(); opIt != opList.rend(); ++opIt) {
auto &op = *opIt;
if (!isa<AffineYieldOp>(op)) {
if (&op != childLoop.getOperation())
backOps.push_back(&op);
else
break;
}
}
// All operations after the inner loop should be moved to the
// innermost loop, they are collected in backOps.
if (!backOps.empty()) {
// Create AffineIf in the back of the innermost loop (before the
// terminator).
SmallVector<AffineExpr, 4> ifExprs;
SmallVector<bool, 4> ifEqFlags;
SmallVector<Value, 4> ifOperands;
unsigned dim = 0;
for (auto innerLoop : loops) {
// Create all components required by constructing if operation.
if (innerLoop.hasConstantUpperBound()) {
ifExprs.push_back(innerLoop.getConstantUpperBound() - 1 -
builder.getAffineDimExpr(dim++));
ifOperands.push_back(innerLoop.getInductionVar());
} else {
// Non-constant case requires to integrate the bound affine expression
// and operands into the condition integer set.
auto upperExpr = innerLoop.getUpperBoundMap().getResult(0);
auto upperOperands = innerLoop.getUpperBoundOperands();
SmallVector<AffineExpr, 4> newDims;
for (unsigned i = 0, e = upperOperands.size(); i < e; ++i)
newDims.push_back(builder.getAffineDimExpr(i + dim + 1));
upperExpr = upperExpr.replaceDimsAndSymbols(newDims, {});
ifExprs.push_back(upperExpr - 1 - builder.getAffineDimExpr(dim++));
ifOperands.push_back(innerLoop.getInductionVar());
ifOperands.append(upperOperands.begin(), upperOperands.end());
dim += upperOperands.size();
}
ifEqFlags.push_back(true);
}
auto ifCondition = IntegerSet::get(dim, 0, ifExprs, ifEqFlags);
// Set builder insertion point and create AffineIf operation.
builder.setInsertionPoint(innermostLoop.getBody()->getTerminator());
auto ifOp =
builder.create<AffineIfOp>(loop.getLoc(), ifCondition, ifOperands,
/*withElseRegion=*/false);
// Move all operations in backOps into the innermost loop. Note that if
// the operation has result, it will always be executed. However, if the
// operation doesn't have result (e.g. AffineStore operation), it will be
// putted into the generated AffineIf operation and conditionally
// executed.
for (auto opIt = backOps.rbegin(); opIt < backOps.rend(); ++opIt) {
auto op = *opIt;
if (op->getNumResults())
op->moveBefore(ifOp);
else
op->moveBefore(ifOp.getThenBlock()->getTerminator());
}
}
// Push back the current loop as the new child loop.
loops.push_back(loop);
} }
// For now, this method will always success.
return true;
} }
std::unique_ptr<mlir::Pass> scalehls::createAffineLoopPerfectionPass() { std::unique_ptr<mlir::Pass> scalehls::createAffineLoopPerfectionPass() {

View File

@ -18,13 +18,14 @@ struct RemoveVariableBound
auto func = getOperation(); auto func = getOperation();
auto builder = OpBuilder(func); auto builder = OpBuilder(func);
// Walk through all functions and loops. // Walk through all loops.
for (auto loop : func.getOps<AffineForOp>()) for (auto loop : func.getOps<AffineForOp>())
applyRemoveVariableBound(loop, builder); applyRemoveVariableBound(loop, builder);
} }
}; };
} // namespace } // namespace
/// Apply remove variable bound to all inner loops of the input loop.
bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) { bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
SmallVector<AffineForOp, 4> nestedLoops; SmallVector<AffineForOp, 4> nestedLoops;
getPerfectlyNestedLoops(nestedLoops, loop); getPerfectlyNestedLoops(nestedLoops, loop);