[PartialAffineLoopTiling] factor out applyPartialAffineLoopTiling() method, reimplement the tiling strategy
This commit is contained in:
parent
10944e1367
commit
e0c3b2ad8e
|
@ -61,8 +61,8 @@ public:
|
|||
// Helper methods
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
using AffineLoopBand = SmallVector<AffineForOp, 4>;
|
||||
using AffineLoopBands = SmallVector<AffineLoopBand, 4>;
|
||||
using AffineLoopBand = SmallVector<AffineForOp, 6>;
|
||||
using AffineLoopBands = SmallVector<AffineLoopBand, 6>;
|
||||
|
||||
// For storing all affine memory access operations (including CallOp,
|
||||
// AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref.
|
||||
|
@ -98,6 +98,11 @@ unsigned getChildLoopNum(Operation *op);
|
|||
AffineForOp getLoopBandFromRoot(AffineForOp forOp, AffineLoopBand &band);
|
||||
AffineForOp getLoopBandFromLeaf(AffineForOp forOp, AffineLoopBand &band);
|
||||
|
||||
/// Collect all loop bands in the function. If allowHavingChilds is false,
|
||||
/// only innermost loop bands will be collected.
|
||||
void getLoopBands(Block &block, AffineLoopBands &bands,
|
||||
bool allowHavingChilds = false);
|
||||
|
||||
} // namespace scalehls
|
||||
} // namespace mlir
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ namespace scalehls {
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool applyLegalizeDataflow(FuncOp func, OpBuilder &builder, int64_t minGran,
|
||||
bool insertCopy);
|
||||
bool insertCopy = true);
|
||||
|
||||
bool applySplitFunction(FuncOp func, OpBuilder &builder);
|
||||
|
||||
|
@ -34,7 +34,11 @@ bool applyAffineLoopPerfection(AffineForOp loop, OpBuilder &builder);
|
|||
/// Apply remove variable bound to all inner loops of the input loop.
|
||||
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
|
||||
|
||||
bool applyAffineLoopOrderOpt(AffineLoopBand band, OpBuilder &builder);
|
||||
bool applyAffineLoopOrderOpt(AffineLoopBand band);
|
||||
|
||||
bool applyPartialAffineLoopTiling(AffineLoopBand band, OpBuilder &builder,
|
||||
unsigned tileSize,
|
||||
bool applyPipelining = true);
|
||||
|
||||
/// Apply loop pipelining to the input loop, all inner loops are automatically
|
||||
/// fully unrolled.
|
||||
|
|
|
@ -105,8 +105,6 @@ def PartialAffineLoopTile : Pass<"partial-affine-loop-tile", "FuncOp"> {
|
|||
let constructor = "mlir::scalehls::createPartialAffineLoopTilePass()";
|
||||
|
||||
let options = [
|
||||
Option<"tileLevel", "tile-level", "unsigned", /*default=*/"1",
|
||||
"Positive number: the level of loops to be tiled">,
|
||||
Option<"tileSize", "tile-size", "unsigned", /*default=*/"2",
|
||||
"Positive number: the size of tiling">
|
||||
];
|
||||
|
|
|
@ -196,4 +196,19 @@ AffineForOp scalehls::getLoopBandFromLeaf(AffineForOp forOp,
|
|||
|
||||
band.append(reverseBand.rbegin(), reverseBand.rend());
|
||||
return band.front();
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect all loop bands in the function. If allowHavingChilds is false,
|
||||
/// only innermost loop bands will be collected.
|
||||
void scalehls::getLoopBands(Block &block, AffineLoopBands &bands,
|
||||
bool allowHavingChilds) {
|
||||
block.walk([&](AffineForOp loop) {
|
||||
auto childNum = getChildLoopNum(loop);
|
||||
|
||||
if (childNum == 0 || (childNum > 1 && allowHavingChilds)) {
|
||||
AffineLoopBand band;
|
||||
getLoopBandFromLeaf(loop, band);
|
||||
bands.push_back(band);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -17,27 +17,19 @@ namespace {
|
|||
struct AffineLoopOrderOpt : public AffineLoopOrderOptBase<AffineLoopOrderOpt> {
|
||||
void runOnOperation() override {
|
||||
auto func = getOperation();
|
||||
auto builder = OpBuilder(func);
|
||||
|
||||
// Collect all target loop bands.
|
||||
AffineLoopBands targetBands;
|
||||
func.walk([&](AffineForOp loop) {
|
||||
if (getChildLoopNum(loop) == 0) {
|
||||
AffineLoopBand band;
|
||||
getLoopBandFromLeaf(loop, band);
|
||||
targetBands.push_back(band);
|
||||
}
|
||||
});
|
||||
getLoopBands(func.front(), targetBands);
|
||||
|
||||
// Apply loop order optimization to each loop band.
|
||||
for (auto band : targetBands)
|
||||
applyAffineLoopOrderOpt(band, builder);
|
||||
applyAffineLoopOrderOpt(band);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand band,
|
||||
OpBuilder &builder) {
|
||||
bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand band) {
|
||||
auto &loopBlock = band.back().getLoopBody().front();
|
||||
auto bandDepth = band.size();
|
||||
|
||||
|
|
|
@ -35,6 +35,8 @@ struct AffineLoopPerfection
|
|||
|
||||
/// Apply loop perfection to all outer loops of the input loop until the outer
|
||||
/// operation is no longer a loop, or contains more than one child loop.
|
||||
/// TODO: passing in AffineLoopBand rather than AffineForOp to simplify the
|
||||
/// internal implementation.
|
||||
bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
|
||||
OpBuilder &builder) {
|
||||
SmallVector<AffineForOp, 4> loops;
|
||||
|
@ -72,7 +74,7 @@ bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
|
|||
for (auto op : frontOps)
|
||||
for (auto user : op->getUsers())
|
||||
if (user->getParentOp() != loop)
|
||||
return true;
|
||||
return false;
|
||||
|
||||
// Create AffineIf in the front of the innermost loop.
|
||||
SmallVector<AffineExpr, 4> ifExprs;
|
||||
|
@ -193,8 +195,6 @@ bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
|
|||
// Push back the current loop as the new child loop.
|
||||
loops.push_back(loop);
|
||||
}
|
||||
|
||||
// For now, this method will always success.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -34,19 +34,25 @@ struct FuncPipelining : public FuncPipeliningBase<FuncPipelining> {
|
|||
/// Apply function pipelining to the input function, all contained loops are
|
||||
/// automatically fully unrolled.
|
||||
bool scalehls::applyFuncPipelining(FuncOp func, OpBuilder &builder) {
|
||||
bool hasFullyUnrolled = false;
|
||||
while (hasFullyUnrolled == false) {
|
||||
hasFullyUnrolled = true;
|
||||
// TODO: the teminate condition need to be updated. This will try at most 8
|
||||
// iterations.
|
||||
for (auto i = 0; i < 8; ++i) {
|
||||
bool hasFullyUnrolled = true;
|
||||
func.walk([&](AffineForOp loop) {
|
||||
if (failed(loopUnrollFull(loop)))
|
||||
hasFullyUnrolled = false;
|
||||
});
|
||||
|
||||
if (hasFullyUnrolled)
|
||||
break;
|
||||
|
||||
if (i == 7)
|
||||
return false;
|
||||
}
|
||||
|
||||
func->setAttr("pipeline", builder.getBoolAttr(true));
|
||||
func->setAttr("dataflow", builder.getBoolAttr(false));
|
||||
|
||||
// For now, this method will always success.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -59,15 +59,21 @@ struct LoopPipelining : public LoopPipeliningBase<LoopPipelining> {
|
|||
bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) {
|
||||
targetLoop->setAttr("pipeline", builder.getBoolAttr(true));
|
||||
|
||||
// All inner loops of the pipelined loop are automatically unrolled.
|
||||
bool hasFullyUnrolled = false;
|
||||
while (hasFullyUnrolled == false) {
|
||||
hasFullyUnrolled = true;
|
||||
// All inner loops of the pipelined loop are automatically unrolled. This will
|
||||
// try at most 8 iterations.
|
||||
for (auto i = 0; i < 8; ++i) {
|
||||
bool hasFullyUnrolled = true;
|
||||
targetLoop.walk([&](AffineForOp loop) {
|
||||
if (loop != targetLoop)
|
||||
if (failed(loopUnrollFull(loop)))
|
||||
hasFullyUnrolled = false;
|
||||
});
|
||||
|
||||
if (hasFullyUnrolled)
|
||||
break;
|
||||
|
||||
if (i == 7)
|
||||
return false;
|
||||
}
|
||||
|
||||
// All outer loops that perfect nest the pipelined loop can be flattened.
|
||||
|
@ -88,7 +94,6 @@ bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) {
|
|||
break;
|
||||
}
|
||||
|
||||
// For now, this method will always success.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -175,20 +175,14 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
|
|||
// TODO: apply different optimizations to different leaf LNs.
|
||||
|
||||
AffineLoopBands targetBands;
|
||||
func.walk([&](AffineForOp loop) {
|
||||
if (getChildLoopNum(loop) == 0) {
|
||||
AffineLoopBand band;
|
||||
getLoopBandFromLeaf(loop, band);
|
||||
targetBands.push_back(band);
|
||||
}
|
||||
});
|
||||
getLoopBands(func.front(), targetBands);
|
||||
|
||||
// Loop perfection, remove variable bound, and loop order optimization are
|
||||
// always applied for the convenience of polyhedral optimizations.
|
||||
for (auto band : targetBands) {
|
||||
applyAffineLoopPerfection(band.back(), builder);
|
||||
applyRemoveVariableBound(band.front(), builder);
|
||||
applyAffineLoopOrderOpt(band, builder);
|
||||
applyAffineLoopOrderOpt(band);
|
||||
}
|
||||
|
||||
// TODO: automatic tiling and pipelining.
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Analysis/LoopAnalysis.h"
|
||||
#include "mlir/Transforms/LoopUtils.h"
|
||||
#include "scalehls/Transforms/Passes.h"
|
||||
|
||||
|
@ -13,53 +14,56 @@ using namespace scalehls;
|
|||
namespace {
|
||||
struct PartialAffineLoopTile
|
||||
: public PartialAffineLoopTileBase<PartialAffineLoopTile> {
|
||||
void runOnOperation() override;
|
||||
void runOnOperation() override {
|
||||
auto func = getOperation();
|
||||
auto builder = OpBuilder(func);
|
||||
|
||||
std::vector<SmallVector<AffineForOp, 6>> bands;
|
||||
getTileableBands(func, &bands);
|
||||
|
||||
for (auto band : bands)
|
||||
applyPartialAffineLoopTiling(band, builder, tileSize);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void PartialAffineLoopTile::runOnOperation() {
|
||||
// Walk through all functions and loops.
|
||||
auto func = getOperation();
|
||||
bool scalehls::applyPartialAffineLoopTiling(AffineLoopBand band,
|
||||
OpBuilder &builder,
|
||||
unsigned tileSize,
|
||||
bool applyPipelining) {
|
||||
if (!isPerfectlyNested(band))
|
||||
return false;
|
||||
|
||||
// Bands of loops to tile.
|
||||
std::vector<SmallVector<AffineForOp, 6>> bands;
|
||||
getTileableBands(func, &bands);
|
||||
// Calculate the tiling size of each loop in the band.
|
||||
SmallVector<unsigned, 8> sizes;
|
||||
auto remainTileSize = tileSize;
|
||||
|
||||
// Tile each band.
|
||||
for (auto &band : bands) {
|
||||
// Truncate band and only keep first tileLevel loops.
|
||||
size_t realTileLevel = band.size();
|
||||
if (realTileLevel > tileLevel) {
|
||||
band.resize(tileLevel);
|
||||
realTileLevel = tileLevel;
|
||||
}
|
||||
for (auto loop : band) {
|
||||
if (auto tripCount = getConstantTripCount(loop)) {
|
||||
auto constTripCount = tripCount.getValue();
|
||||
|
||||
// Set up tile sizes; fill missing tile sizes at the end with default tile
|
||||
// size or tileSize if one was provided.
|
||||
SmallVector<unsigned, 6> tileSizes;
|
||||
tileSizes.assign(band.size(), tileSize);
|
||||
|
||||
SmallVector<AffineForOp, 6> tiledNest;
|
||||
if (failed(tilePerfectlyNested(band, tileSizes, &tiledNest)))
|
||||
return signalPassFailure();
|
||||
|
||||
// Permute loop order to move the tiled loop to the innermost of the
|
||||
// perfect nested loop.
|
||||
SmallVector<AffineForOp, 4> nestedLoops;
|
||||
getPerfectlyNestedLoops(nestedLoops, tiledNest.front());
|
||||
|
||||
SmallVector<unsigned, 4> permMap;
|
||||
for (size_t i = 0, e = nestedLoops.size(); i < e; ++i) {
|
||||
if (i < realTileLevel)
|
||||
permMap.push_back(i);
|
||||
else if (i < 2 * realTileLevel)
|
||||
permMap.push_back(e + i - 2 * realTileLevel);
|
||||
else
|
||||
permMap.push_back(i - realTileLevel);
|
||||
}
|
||||
if (isValidLoopInterchangePermutation(nestedLoops, permMap))
|
||||
permuteLoops(nestedLoops, permMap);
|
||||
if (remainTileSize > constTripCount) {
|
||||
sizes.push_back(constTripCount);
|
||||
remainTileSize = (remainTileSize + constTripCount - 1) / constTripCount;
|
||||
} else {
|
||||
sizes.push_back(remainTileSize);
|
||||
remainTileSize = 1;
|
||||
}
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
AffineLoopBand tiledBand;
|
||||
if (failed(tilePerfectlyNested(band, sizes, &tiledBand)))
|
||||
return false;
|
||||
|
||||
// Pipelining the tiled loop band if required.
|
||||
if (applyPipelining) {
|
||||
auto targetLoop = tiledBand[band.size() - 1];
|
||||
return applyLoopPipelining(targetLoop, builder);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<Pass> scalehls::createPartialAffineLoopTilePass() {
|
||||
|
|
|
@ -123,11 +123,10 @@ bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
|
|||
// Set constant variable bound.
|
||||
auto maximum = bound.getValue().second;
|
||||
loop.setConstantUpperBound(maximum);
|
||||
}
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// For now, this method will always success.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue