[PartialAffineLoopTiling] factor out applyPartialAffineLoopTiling() method, reimplement the tiling strategy

This commit is contained in:
Hanchen Ye 2021-01-20 16:45:11 -06:00
parent 10944e1367
commit e0c3b2ad8e
11 changed files with 103 additions and 81 deletions

View File

@ -61,8 +61,8 @@ public:
// Helper methods
//===----------------------------------------------------------------------===//
using AffineLoopBand = SmallVector<AffineForOp, 4>;
using AffineLoopBands = SmallVector<AffineLoopBand, 4>;
using AffineLoopBand = SmallVector<AffineForOp, 6>;
using AffineLoopBands = SmallVector<AffineLoopBand, 6>;
// For storing all affine memory access operations (including CallOp,
// AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref.
@ -98,6 +98,11 @@ unsigned getChildLoopNum(Operation *op);
AffineForOp getLoopBandFromRoot(AffineForOp forOp, AffineLoopBand &band);
AffineForOp getLoopBandFromLeaf(AffineForOp forOp, AffineLoopBand &band);
/// Collect all loop bands in the function. If allowHavingChilds is false,
/// only innermost loop bands will be collected.
void getLoopBands(Block &block, AffineLoopBands &bands,
bool allowHavingChilds = false);
} // namespace scalehls
} // namespace mlir

View File

@ -23,7 +23,7 @@ namespace scalehls {
//===----------------------------------------------------------------------===//
bool applyLegalizeDataflow(FuncOp func, OpBuilder &builder, int64_t minGran,
bool insertCopy);
bool insertCopy = true);
bool applySplitFunction(FuncOp func, OpBuilder &builder);
@ -34,7 +34,11 @@ bool applyAffineLoopPerfection(AffineForOp loop, OpBuilder &builder);
/// Apply remove variable bound to all inner loops of the input loop.
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
bool applyAffineLoopOrderOpt(AffineLoopBand band, OpBuilder &builder);
bool applyAffineLoopOrderOpt(AffineLoopBand band);
bool applyPartialAffineLoopTiling(AffineLoopBand band, OpBuilder &builder,
unsigned tileSize,
bool applyPipelining = true);
/// Apply loop pipelining to the input loop, all inner loops are automatically
/// fully unrolled.

View File

@ -105,8 +105,6 @@ def PartialAffineLoopTile : Pass<"partial-affine-loop-tile", "FuncOp"> {
let constructor = "mlir::scalehls::createPartialAffineLoopTilePass()";
let options = [
Option<"tileLevel", "tile-level", "unsigned", /*default=*/"1",
"Positive number: the level of loops to be tiled">,
Option<"tileSize", "tile-size", "unsigned", /*default=*/"2",
"Positive number: the size of tiling">
];

View File

@ -196,4 +196,19 @@ AffineForOp scalehls::getLoopBandFromLeaf(AffineForOp forOp,
band.append(reverseBand.rbegin(), reverseBand.rend());
return band.front();
}
}
/// Collect all loop bands in the function. If allowHavingChilds is false,
/// only innermost loop bands will be collected.
void scalehls::getLoopBands(Block &block, AffineLoopBands &bands,
bool allowHavingChilds) {
block.walk([&](AffineForOp loop) {
auto childNum = getChildLoopNum(loop);
if (childNum == 0 || (childNum > 1 && allowHavingChilds)) {
AffineLoopBand band;
getLoopBandFromLeaf(loop, band);
bands.push_back(band);
}
});
}

View File

@ -17,27 +17,19 @@ namespace {
struct AffineLoopOrderOpt : public AffineLoopOrderOptBase<AffineLoopOrderOpt> {
void runOnOperation() override {
auto func = getOperation();
auto builder = OpBuilder(func);
// Collect all target loop bands.
AffineLoopBands targetBands;
func.walk([&](AffineForOp loop) {
if (getChildLoopNum(loop) == 0) {
AffineLoopBand band;
getLoopBandFromLeaf(loop, band);
targetBands.push_back(band);
}
});
getLoopBands(func.front(), targetBands);
// Apply loop order optimization to each loop band.
for (auto band : targetBands)
applyAffineLoopOrderOpt(band, builder);
applyAffineLoopOrderOpt(band);
}
};
} // namespace
bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand band,
OpBuilder &builder) {
bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand band) {
auto &loopBlock = band.back().getLoopBody().front();
auto bandDepth = band.size();

View File

@ -35,6 +35,8 @@ struct AffineLoopPerfection
/// Apply loop perfection to all outer loops of the input loop until the outer
/// operation is no longer a loop, or contains more than one child loop.
/// TODO: passing in AffineLoopBand rather than AffineForOp to simplify the
/// internal implementation.
bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
OpBuilder &builder) {
SmallVector<AffineForOp, 4> loops;
@ -72,7 +74,7 @@ bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
for (auto op : frontOps)
for (auto user : op->getUsers())
if (user->getParentOp() != loop)
return true;
return false;
// Create AffineIf in the front of the innermost loop.
SmallVector<AffineExpr, 4> ifExprs;
@ -193,8 +195,6 @@ bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
// Push back the current loop as the new child loop.
loops.push_back(loop);
}
// For now, this method will always success.
return true;
}

View File

@ -34,19 +34,25 @@ struct FuncPipelining : public FuncPipeliningBase<FuncPipelining> {
/// Apply function pipelining to the input function, all contained loops are
/// automatically fully unrolled.
bool scalehls::applyFuncPipelining(FuncOp func, OpBuilder &builder) {
bool hasFullyUnrolled = false;
while (hasFullyUnrolled == false) {
hasFullyUnrolled = true;
// TODO: the teminate condition need to be updated. This will try at most 8
// iterations.
for (auto i = 0; i < 8; ++i) {
bool hasFullyUnrolled = true;
func.walk([&](AffineForOp loop) {
if (failed(loopUnrollFull(loop)))
hasFullyUnrolled = false;
});
if (hasFullyUnrolled)
break;
if (i == 7)
return false;
}
func->setAttr("pipeline", builder.getBoolAttr(true));
func->setAttr("dataflow", builder.getBoolAttr(false));
// For now, this method will always success.
return true;
}

View File

@ -59,15 +59,21 @@ struct LoopPipelining : public LoopPipeliningBase<LoopPipelining> {
bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) {
targetLoop->setAttr("pipeline", builder.getBoolAttr(true));
// All inner loops of the pipelined loop are automatically unrolled.
bool hasFullyUnrolled = false;
while (hasFullyUnrolled == false) {
hasFullyUnrolled = true;
// All inner loops of the pipelined loop are automatically unrolled. This will
// try at most 8 iterations.
for (auto i = 0; i < 8; ++i) {
bool hasFullyUnrolled = true;
targetLoop.walk([&](AffineForOp loop) {
if (loop != targetLoop)
if (failed(loopUnrollFull(loop)))
hasFullyUnrolled = false;
});
if (hasFullyUnrolled)
break;
if (i == 7)
return false;
}
// All outer loops that perfect nest the pipelined loop can be flattened.
@ -88,7 +94,6 @@ bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) {
break;
}
// For now, this method will always success.
return true;
}

View File

@ -175,20 +175,14 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
// TODO: apply different optimizations to different leaf LNs.
AffineLoopBands targetBands;
func.walk([&](AffineForOp loop) {
if (getChildLoopNum(loop) == 0) {
AffineLoopBand band;
getLoopBandFromLeaf(loop, band);
targetBands.push_back(band);
}
});
getLoopBands(func.front(), targetBands);
// Loop perfection, remove variable bound, and loop order optimization are
// always applied for the convenience of polyhedral optimizations.
for (auto band : targetBands) {
applyAffineLoopPerfection(band.back(), builder);
applyRemoveVariableBound(band.front(), builder);
applyAffineLoopOrderOpt(band, builder);
applyAffineLoopOrderOpt(band);
}
// TODO: automatic tiling and pipelining.

View File

@ -4,6 +4,7 @@
//
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Transforms/LoopUtils.h"
#include "scalehls/Transforms/Passes.h"
@ -13,53 +14,56 @@ using namespace scalehls;
namespace {
struct PartialAffineLoopTile
: public PartialAffineLoopTileBase<PartialAffineLoopTile> {
void runOnOperation() override;
void runOnOperation() override {
auto func = getOperation();
auto builder = OpBuilder(func);
std::vector<SmallVector<AffineForOp, 6>> bands;
getTileableBands(func, &bands);
for (auto band : bands)
applyPartialAffineLoopTiling(band, builder, tileSize);
}
};
} // namespace
void PartialAffineLoopTile::runOnOperation() {
// Walk through all functions and loops.
auto func = getOperation();
bool scalehls::applyPartialAffineLoopTiling(AffineLoopBand band,
OpBuilder &builder,
unsigned tileSize,
bool applyPipelining) {
if (!isPerfectlyNested(band))
return false;
// Bands of loops to tile.
std::vector<SmallVector<AffineForOp, 6>> bands;
getTileableBands(func, &bands);
// Calculate the tiling size of each loop in the band.
SmallVector<unsigned, 8> sizes;
auto remainTileSize = tileSize;
// Tile each band.
for (auto &band : bands) {
// Truncate band and only keep first tileLevel loops.
size_t realTileLevel = band.size();
if (realTileLevel > tileLevel) {
band.resize(tileLevel);
realTileLevel = tileLevel;
}
for (auto loop : band) {
if (auto tripCount = getConstantTripCount(loop)) {
auto constTripCount = tripCount.getValue();
// Set up tile sizes; fill missing tile sizes at the end with default tile
// size or tileSize if one was provided.
SmallVector<unsigned, 6> tileSizes;
tileSizes.assign(band.size(), tileSize);
SmallVector<AffineForOp, 6> tiledNest;
if (failed(tilePerfectlyNested(band, tileSizes, &tiledNest)))
return signalPassFailure();
// Permute loop order to move the tiled loop to the innermost of the
// perfect nested loop.
SmallVector<AffineForOp, 4> nestedLoops;
getPerfectlyNestedLoops(nestedLoops, tiledNest.front());
SmallVector<unsigned, 4> permMap;
for (size_t i = 0, e = nestedLoops.size(); i < e; ++i) {
if (i < realTileLevel)
permMap.push_back(i);
else if (i < 2 * realTileLevel)
permMap.push_back(e + i - 2 * realTileLevel);
else
permMap.push_back(i - realTileLevel);
}
if (isValidLoopInterchangePermutation(nestedLoops, permMap))
permuteLoops(nestedLoops, permMap);
if (remainTileSize > constTripCount) {
sizes.push_back(constTripCount);
remainTileSize = (remainTileSize + constTripCount - 1) / constTripCount;
} else {
sizes.push_back(remainTileSize);
remainTileSize = 1;
}
} else
return false;
}
AffineLoopBand tiledBand;
if (failed(tilePerfectlyNested(band, sizes, &tiledBand)))
return false;
// Pipelining the tiled loop band if required.
if (applyPipelining) {
auto targetLoop = tiledBand[band.size() - 1];
return applyLoopPipelining(targetLoop, builder);
}
return true;
}
std::unique_ptr<Pass> scalehls::createPartialAffineLoopTilePass() {

View File

@ -123,11 +123,10 @@ bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
// Set constant variable bound.
auto maximum = bound.getValue().second;
loop.setConstantUpperBound(maximum);
}
} else
return false;
}
}
// For now, this method will always success.
return true;
}