[PartialAffineLoopTiling] factor out applyPartialAffineLoopTiling() method, reimplement the tiling strategy

This commit is contained in:
Hanchen Ye 2021-01-20 16:45:11 -06:00
parent 10944e1367
commit e0c3b2ad8e
11 changed files with 103 additions and 81 deletions

View File

@ -61,8 +61,8 @@ public:
// Helper methods // Helper methods
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
using AffineLoopBand = SmallVector<AffineForOp, 4>; using AffineLoopBand = SmallVector<AffineForOp, 6>;
using AffineLoopBands = SmallVector<AffineLoopBand, 4>; using AffineLoopBands = SmallVector<AffineLoopBand, 6>;
// For storing all affine memory access operations (including CallOp, // For storing all affine memory access operations (including CallOp,
// AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref. // AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref.
@ -98,6 +98,11 @@ unsigned getChildLoopNum(Operation *op);
AffineForOp getLoopBandFromRoot(AffineForOp forOp, AffineLoopBand &band); AffineForOp getLoopBandFromRoot(AffineForOp forOp, AffineLoopBand &band);
AffineForOp getLoopBandFromLeaf(AffineForOp forOp, AffineLoopBand &band); AffineForOp getLoopBandFromLeaf(AffineForOp forOp, AffineLoopBand &band);
/// Collect all loop bands in the function. If allowHavingChilds is false,
/// only innermost loop bands will be collected.
void getLoopBands(Block &block, AffineLoopBands &bands,
bool allowHavingChilds = false);
} // namespace scalehls } // namespace scalehls
} // namespace mlir } // namespace mlir

View File

@ -23,7 +23,7 @@ namespace scalehls {
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
bool applyLegalizeDataflow(FuncOp func, OpBuilder &builder, int64_t minGran, bool applyLegalizeDataflow(FuncOp func, OpBuilder &builder, int64_t minGran,
bool insertCopy); bool insertCopy = true);
bool applySplitFunction(FuncOp func, OpBuilder &builder); bool applySplitFunction(FuncOp func, OpBuilder &builder);
@ -34,7 +34,11 @@ bool applyAffineLoopPerfection(AffineForOp loop, OpBuilder &builder);
/// Apply remove variable bound to all inner loops of the input loop. /// Apply remove variable bound to all inner loops of the input loop.
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder); bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
bool applyAffineLoopOrderOpt(AffineLoopBand band, OpBuilder &builder); bool applyAffineLoopOrderOpt(AffineLoopBand band);
bool applyPartialAffineLoopTiling(AffineLoopBand band, OpBuilder &builder,
unsigned tileSize,
bool applyPipelining = true);
/// Apply loop pipelining to the input loop, all inner loops are automatically /// Apply loop pipelining to the input loop, all inner loops are automatically
/// fully unrolled. /// fully unrolled.

View File

@ -105,8 +105,6 @@ def PartialAffineLoopTile : Pass<"partial-affine-loop-tile", "FuncOp"> {
let constructor = "mlir::scalehls::createPartialAffineLoopTilePass()"; let constructor = "mlir::scalehls::createPartialAffineLoopTilePass()";
let options = [ let options = [
Option<"tileLevel", "tile-level", "unsigned", /*default=*/"1",
"Positive number: the level of loops to be tiled">,
Option<"tileSize", "tile-size", "unsigned", /*default=*/"2", Option<"tileSize", "tile-size", "unsigned", /*default=*/"2",
"Positive number: the size of tiling"> "Positive number: the size of tiling">
]; ];

View File

@ -196,4 +196,19 @@ AffineForOp scalehls::getLoopBandFromLeaf(AffineForOp forOp,
band.append(reverseBand.rbegin(), reverseBand.rend()); band.append(reverseBand.rbegin(), reverseBand.rend());
return band.front(); return band.front();
} }
/// Collect all loop bands in the function. If allowHavingChilds is false,
/// only innermost loop bands will be collected.
void scalehls::getLoopBands(Block &block, AffineLoopBands &bands,
bool allowHavingChilds) {
block.walk([&](AffineForOp loop) {
auto childNum = getChildLoopNum(loop);
if (childNum == 0 || (childNum > 1 && allowHavingChilds)) {
AffineLoopBand band;
getLoopBandFromLeaf(loop, band);
bands.push_back(band);
}
});
}

View File

@ -17,27 +17,19 @@ namespace {
struct AffineLoopOrderOpt : public AffineLoopOrderOptBase<AffineLoopOrderOpt> { struct AffineLoopOrderOpt : public AffineLoopOrderOptBase<AffineLoopOrderOpt> {
void runOnOperation() override { void runOnOperation() override {
auto func = getOperation(); auto func = getOperation();
auto builder = OpBuilder(func);
// Collect all target loop bands. // Collect all target loop bands.
AffineLoopBands targetBands; AffineLoopBands targetBands;
func.walk([&](AffineForOp loop) { getLoopBands(func.front(), targetBands);
if (getChildLoopNum(loop) == 0) {
AffineLoopBand band;
getLoopBandFromLeaf(loop, band);
targetBands.push_back(band);
}
});
// Apply loop order optimization to each loop band. // Apply loop order optimization to each loop band.
for (auto band : targetBands) for (auto band : targetBands)
applyAffineLoopOrderOpt(band, builder); applyAffineLoopOrderOpt(band);
} }
}; };
} // namespace } // namespace
bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand band, bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand band) {
OpBuilder &builder) {
auto &loopBlock = band.back().getLoopBody().front(); auto &loopBlock = band.back().getLoopBody().front();
auto bandDepth = band.size(); auto bandDepth = band.size();

View File

@ -35,6 +35,8 @@ struct AffineLoopPerfection
/// Apply loop perfection to all outer loops of the input loop until the outer /// Apply loop perfection to all outer loops of the input loop until the outer
/// operation is no longer a loop, or contains more than one child loop. /// operation is no longer a loop, or contains more than one child loop.
/// TODO: passing in AffineLoopBand rather than AffineForOp to simplify the
/// internal implementation.
bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop, bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
OpBuilder &builder) { OpBuilder &builder) {
SmallVector<AffineForOp, 4> loops; SmallVector<AffineForOp, 4> loops;
@ -72,7 +74,7 @@ bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
for (auto op : frontOps) for (auto op : frontOps)
for (auto user : op->getUsers()) for (auto user : op->getUsers())
if (user->getParentOp() != loop) if (user->getParentOp() != loop)
return true; return false;
// Create AffineIf in the front of the innermost loop. // Create AffineIf in the front of the innermost loop.
SmallVector<AffineExpr, 4> ifExprs; SmallVector<AffineExpr, 4> ifExprs;
@ -193,8 +195,6 @@ bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
// Push back the current loop as the new child loop. // Push back the current loop as the new child loop.
loops.push_back(loop); loops.push_back(loop);
} }
// For now, this method will always success.
return true; return true;
} }

View File

@ -34,19 +34,25 @@ struct FuncPipelining : public FuncPipeliningBase<FuncPipelining> {
/// Apply function pipelining to the input function, all contained loops are /// Apply function pipelining to the input function, all contained loops are
/// automatically fully unrolled. /// automatically fully unrolled.
bool scalehls::applyFuncPipelining(FuncOp func, OpBuilder &builder) { bool scalehls::applyFuncPipelining(FuncOp func, OpBuilder &builder) {
bool hasFullyUnrolled = false; // TODO: the teminate condition need to be updated. This will try at most 8
while (hasFullyUnrolled == false) { // iterations.
hasFullyUnrolled = true; for (auto i = 0; i < 8; ++i) {
bool hasFullyUnrolled = true;
func.walk([&](AffineForOp loop) { func.walk([&](AffineForOp loop) {
if (failed(loopUnrollFull(loop))) if (failed(loopUnrollFull(loop)))
hasFullyUnrolled = false; hasFullyUnrolled = false;
}); });
if (hasFullyUnrolled)
break;
if (i == 7)
return false;
} }
func->setAttr("pipeline", builder.getBoolAttr(true)); func->setAttr("pipeline", builder.getBoolAttr(true));
func->setAttr("dataflow", builder.getBoolAttr(false)); func->setAttr("dataflow", builder.getBoolAttr(false));
// For now, this method will always success.
return true; return true;
} }

View File

@ -59,15 +59,21 @@ struct LoopPipelining : public LoopPipeliningBase<LoopPipelining> {
bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) { bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) {
targetLoop->setAttr("pipeline", builder.getBoolAttr(true)); targetLoop->setAttr("pipeline", builder.getBoolAttr(true));
// All inner loops of the pipelined loop are automatically unrolled. // All inner loops of the pipelined loop are automatically unrolled. This will
bool hasFullyUnrolled = false; // try at most 8 iterations.
while (hasFullyUnrolled == false) { for (auto i = 0; i < 8; ++i) {
hasFullyUnrolled = true; bool hasFullyUnrolled = true;
targetLoop.walk([&](AffineForOp loop) { targetLoop.walk([&](AffineForOp loop) {
if (loop != targetLoop) if (loop != targetLoop)
if (failed(loopUnrollFull(loop))) if (failed(loopUnrollFull(loop)))
hasFullyUnrolled = false; hasFullyUnrolled = false;
}); });
if (hasFullyUnrolled)
break;
if (i == 7)
return false;
} }
// All outer loops that perfect nest the pipelined loop can be flattened. // All outer loops that perfect nest the pipelined loop can be flattened.
@ -88,7 +94,6 @@ bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) {
break; break;
} }
// For now, this method will always success.
return true; return true;
} }

View File

@ -175,20 +175,14 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
// TODO: apply different optimizations to different leaf LNs. // TODO: apply different optimizations to different leaf LNs.
AffineLoopBands targetBands; AffineLoopBands targetBands;
func.walk([&](AffineForOp loop) { getLoopBands(func.front(), targetBands);
if (getChildLoopNum(loop) == 0) {
AffineLoopBand band;
getLoopBandFromLeaf(loop, band);
targetBands.push_back(band);
}
});
// Loop perfection, remove variable bound, and loop order optimization are // Loop perfection, remove variable bound, and loop order optimization are
// always applied for the convenience of polyhedral optimizations. // always applied for the convenience of polyhedral optimizations.
for (auto band : targetBands) { for (auto band : targetBands) {
applyAffineLoopPerfection(band.back(), builder); applyAffineLoopPerfection(band.back(), builder);
applyRemoveVariableBound(band.front(), builder); applyRemoveVariableBound(band.front(), builder);
applyAffineLoopOrderOpt(band, builder); applyAffineLoopOrderOpt(band);
} }
// TODO: automatic tiling and pipelining. // TODO: automatic tiling and pipelining.

View File

@ -4,6 +4,7 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/LoopUtils.h"
#include "scalehls/Transforms/Passes.h" #include "scalehls/Transforms/Passes.h"
@ -13,53 +14,56 @@ using namespace scalehls;
namespace { namespace {
struct PartialAffineLoopTile struct PartialAffineLoopTile
: public PartialAffineLoopTileBase<PartialAffineLoopTile> { : public PartialAffineLoopTileBase<PartialAffineLoopTile> {
void runOnOperation() override; void runOnOperation() override {
auto func = getOperation();
auto builder = OpBuilder(func);
std::vector<SmallVector<AffineForOp, 6>> bands;
getTileableBands(func, &bands);
for (auto band : bands)
applyPartialAffineLoopTiling(band, builder, tileSize);
}
}; };
} // namespace } // namespace
void PartialAffineLoopTile::runOnOperation() { bool scalehls::applyPartialAffineLoopTiling(AffineLoopBand band,
// Walk through all functions and loops. OpBuilder &builder,
auto func = getOperation(); unsigned tileSize,
bool applyPipelining) {
if (!isPerfectlyNested(band))
return false;
// Bands of loops to tile. // Calculate the tiling size of each loop in the band.
std::vector<SmallVector<AffineForOp, 6>> bands; SmallVector<unsigned, 8> sizes;
getTileableBands(func, &bands); auto remainTileSize = tileSize;
// Tile each band. for (auto loop : band) {
for (auto &band : bands) { if (auto tripCount = getConstantTripCount(loop)) {
// Truncate band and only keep first tileLevel loops. auto constTripCount = tripCount.getValue();
size_t realTileLevel = band.size();
if (realTileLevel > tileLevel) {
band.resize(tileLevel);
realTileLevel = tileLevel;
}
// Set up tile sizes; fill missing tile sizes at the end with default tile if (remainTileSize > constTripCount) {
// size or tileSize if one was provided. sizes.push_back(constTripCount);
SmallVector<unsigned, 6> tileSizes; remainTileSize = (remainTileSize + constTripCount - 1) / constTripCount;
tileSizes.assign(band.size(), tileSize); } else {
sizes.push_back(remainTileSize);
SmallVector<AffineForOp, 6> tiledNest; remainTileSize = 1;
if (failed(tilePerfectlyNested(band, tileSizes, &tiledNest))) }
return signalPassFailure(); } else
return false;
// Permute loop order to move the tiled loop to the innermost of the
// perfect nested loop.
SmallVector<AffineForOp, 4> nestedLoops;
getPerfectlyNestedLoops(nestedLoops, tiledNest.front());
SmallVector<unsigned, 4> permMap;
for (size_t i = 0, e = nestedLoops.size(); i < e; ++i) {
if (i < realTileLevel)
permMap.push_back(i);
else if (i < 2 * realTileLevel)
permMap.push_back(e + i - 2 * realTileLevel);
else
permMap.push_back(i - realTileLevel);
}
if (isValidLoopInterchangePermutation(nestedLoops, permMap))
permuteLoops(nestedLoops, permMap);
} }
AffineLoopBand tiledBand;
if (failed(tilePerfectlyNested(band, sizes, &tiledBand)))
return false;
// Pipelining the tiled loop band if required.
if (applyPipelining) {
auto targetLoop = tiledBand[band.size() - 1];
return applyLoopPipelining(targetLoop, builder);
}
return true;
} }
std::unique_ptr<Pass> scalehls::createPartialAffineLoopTilePass() { std::unique_ptr<Pass> scalehls::createPartialAffineLoopTilePass() {

View File

@ -123,11 +123,10 @@ bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
// Set constant variable bound. // Set constant variable bound.
auto maximum = bound.getValue().second; auto maximum = bound.getValue().second;
loop.setConstantUpperBound(maximum); loop.setConstantUpperBound(maximum);
} } else
return false;
} }
} }
// For now, this method will always success.
return true; return true;
} }