[PartialAffineLoopTiling] factor out applyPartialAffineLoopTiling() method, reimplement the tiling strategy
This commit is contained in:
parent
10944e1367
commit
e0c3b2ad8e
|
@ -61,8 +61,8 @@ public:
|
||||||
// Helper methods
|
// Helper methods
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
using AffineLoopBand = SmallVector<AffineForOp, 4>;
|
using AffineLoopBand = SmallVector<AffineForOp, 6>;
|
||||||
using AffineLoopBands = SmallVector<AffineLoopBand, 4>;
|
using AffineLoopBands = SmallVector<AffineLoopBand, 6>;
|
||||||
|
|
||||||
// For storing all affine memory access operations (including CallOp,
|
// For storing all affine memory access operations (including CallOp,
|
||||||
// AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref.
|
// AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref.
|
||||||
|
@ -98,6 +98,11 @@ unsigned getChildLoopNum(Operation *op);
|
||||||
AffineForOp getLoopBandFromRoot(AffineForOp forOp, AffineLoopBand &band);
|
AffineForOp getLoopBandFromRoot(AffineForOp forOp, AffineLoopBand &band);
|
||||||
AffineForOp getLoopBandFromLeaf(AffineForOp forOp, AffineLoopBand &band);
|
AffineForOp getLoopBandFromLeaf(AffineForOp forOp, AffineLoopBand &band);
|
||||||
|
|
||||||
|
/// Collect all loop bands in the function. If allowHavingChilds is false,
|
||||||
|
/// only innermost loop bands will be collected.
|
||||||
|
void getLoopBands(Block &block, AffineLoopBands &bands,
|
||||||
|
bool allowHavingChilds = false);
|
||||||
|
|
||||||
} // namespace scalehls
|
} // namespace scalehls
|
||||||
} // namespace mlir
|
} // namespace mlir
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ namespace scalehls {
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
bool applyLegalizeDataflow(FuncOp func, OpBuilder &builder, int64_t minGran,
|
bool applyLegalizeDataflow(FuncOp func, OpBuilder &builder, int64_t minGran,
|
||||||
bool insertCopy);
|
bool insertCopy = true);
|
||||||
|
|
||||||
bool applySplitFunction(FuncOp func, OpBuilder &builder);
|
bool applySplitFunction(FuncOp func, OpBuilder &builder);
|
||||||
|
|
||||||
|
@ -34,7 +34,11 @@ bool applyAffineLoopPerfection(AffineForOp loop, OpBuilder &builder);
|
||||||
/// Apply remove variable bound to all inner loops of the input loop.
|
/// Apply remove variable bound to all inner loops of the input loop.
|
||||||
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
|
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
|
||||||
|
|
||||||
bool applyAffineLoopOrderOpt(AffineLoopBand band, OpBuilder &builder);
|
bool applyAffineLoopOrderOpt(AffineLoopBand band);
|
||||||
|
|
||||||
|
bool applyPartialAffineLoopTiling(AffineLoopBand band, OpBuilder &builder,
|
||||||
|
unsigned tileSize,
|
||||||
|
bool applyPipelining = true);
|
||||||
|
|
||||||
/// Apply loop pipelining to the input loop, all inner loops are automatically
|
/// Apply loop pipelining to the input loop, all inner loops are automatically
|
||||||
/// fully unrolled.
|
/// fully unrolled.
|
||||||
|
|
|
@ -105,8 +105,6 @@ def PartialAffineLoopTile : Pass<"partial-affine-loop-tile", "FuncOp"> {
|
||||||
let constructor = "mlir::scalehls::createPartialAffineLoopTilePass()";
|
let constructor = "mlir::scalehls::createPartialAffineLoopTilePass()";
|
||||||
|
|
||||||
let options = [
|
let options = [
|
||||||
Option<"tileLevel", "tile-level", "unsigned", /*default=*/"1",
|
|
||||||
"Positive number: the level of loops to be tiled">,
|
|
||||||
Option<"tileSize", "tile-size", "unsigned", /*default=*/"2",
|
Option<"tileSize", "tile-size", "unsigned", /*default=*/"2",
|
||||||
"Positive number: the size of tiling">
|
"Positive number: the size of tiling">
|
||||||
];
|
];
|
||||||
|
|
|
@ -196,4 +196,19 @@ AffineForOp scalehls::getLoopBandFromLeaf(AffineForOp forOp,
|
||||||
|
|
||||||
band.append(reverseBand.rbegin(), reverseBand.rend());
|
band.append(reverseBand.rbegin(), reverseBand.rend());
|
||||||
return band.front();
|
return band.front();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Collect all loop bands in the function. If allowHavingChilds is false,
|
||||||
|
/// only innermost loop bands will be collected.
|
||||||
|
void scalehls::getLoopBands(Block &block, AffineLoopBands &bands,
|
||||||
|
bool allowHavingChilds) {
|
||||||
|
block.walk([&](AffineForOp loop) {
|
||||||
|
auto childNum = getChildLoopNum(loop);
|
||||||
|
|
||||||
|
if (childNum == 0 || (childNum > 1 && allowHavingChilds)) {
|
||||||
|
AffineLoopBand band;
|
||||||
|
getLoopBandFromLeaf(loop, band);
|
||||||
|
bands.push_back(band);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
|
@ -17,27 +17,19 @@ namespace {
|
||||||
struct AffineLoopOrderOpt : public AffineLoopOrderOptBase<AffineLoopOrderOpt> {
|
struct AffineLoopOrderOpt : public AffineLoopOrderOptBase<AffineLoopOrderOpt> {
|
||||||
void runOnOperation() override {
|
void runOnOperation() override {
|
||||||
auto func = getOperation();
|
auto func = getOperation();
|
||||||
auto builder = OpBuilder(func);
|
|
||||||
|
|
||||||
// Collect all target loop bands.
|
// Collect all target loop bands.
|
||||||
AffineLoopBands targetBands;
|
AffineLoopBands targetBands;
|
||||||
func.walk([&](AffineForOp loop) {
|
getLoopBands(func.front(), targetBands);
|
||||||
if (getChildLoopNum(loop) == 0) {
|
|
||||||
AffineLoopBand band;
|
|
||||||
getLoopBandFromLeaf(loop, band);
|
|
||||||
targetBands.push_back(band);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Apply loop order optimization to each loop band.
|
// Apply loop order optimization to each loop band.
|
||||||
for (auto band : targetBands)
|
for (auto band : targetBands)
|
||||||
applyAffineLoopOrderOpt(band, builder);
|
applyAffineLoopOrderOpt(band);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand band,
|
bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand band) {
|
||||||
OpBuilder &builder) {
|
|
||||||
auto &loopBlock = band.back().getLoopBody().front();
|
auto &loopBlock = band.back().getLoopBody().front();
|
||||||
auto bandDepth = band.size();
|
auto bandDepth = band.size();
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,8 @@ struct AffineLoopPerfection
|
||||||
|
|
||||||
/// Apply loop perfection to all outer loops of the input loop until the outer
|
/// Apply loop perfection to all outer loops of the input loop until the outer
|
||||||
/// operation is no longer a loop, or contains more than one child loop.
|
/// operation is no longer a loop, or contains more than one child loop.
|
||||||
|
/// TODO: passing in AffineLoopBand rather than AffineForOp to simplify the
|
||||||
|
/// internal implementation.
|
||||||
bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
|
bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
|
||||||
OpBuilder &builder) {
|
OpBuilder &builder) {
|
||||||
SmallVector<AffineForOp, 4> loops;
|
SmallVector<AffineForOp, 4> loops;
|
||||||
|
@ -72,7 +74,7 @@ bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
|
||||||
for (auto op : frontOps)
|
for (auto op : frontOps)
|
||||||
for (auto user : op->getUsers())
|
for (auto user : op->getUsers())
|
||||||
if (user->getParentOp() != loop)
|
if (user->getParentOp() != loop)
|
||||||
return true;
|
return false;
|
||||||
|
|
||||||
// Create AffineIf in the front of the innermost loop.
|
// Create AffineIf in the front of the innermost loop.
|
||||||
SmallVector<AffineExpr, 4> ifExprs;
|
SmallVector<AffineExpr, 4> ifExprs;
|
||||||
|
@ -193,8 +195,6 @@ bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop,
|
||||||
// Push back the current loop as the new child loop.
|
// Push back the current loop as the new child loop.
|
||||||
loops.push_back(loop);
|
loops.push_back(loop);
|
||||||
}
|
}
|
||||||
|
|
||||||
// For now, this method will always success.
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,19 +34,25 @@ struct FuncPipelining : public FuncPipeliningBase<FuncPipelining> {
|
||||||
/// Apply function pipelining to the input function, all contained loops are
|
/// Apply function pipelining to the input function, all contained loops are
|
||||||
/// automatically fully unrolled.
|
/// automatically fully unrolled.
|
||||||
bool scalehls::applyFuncPipelining(FuncOp func, OpBuilder &builder) {
|
bool scalehls::applyFuncPipelining(FuncOp func, OpBuilder &builder) {
|
||||||
bool hasFullyUnrolled = false;
|
// TODO: the teminate condition need to be updated. This will try at most 8
|
||||||
while (hasFullyUnrolled == false) {
|
// iterations.
|
||||||
hasFullyUnrolled = true;
|
for (auto i = 0; i < 8; ++i) {
|
||||||
|
bool hasFullyUnrolled = true;
|
||||||
func.walk([&](AffineForOp loop) {
|
func.walk([&](AffineForOp loop) {
|
||||||
if (failed(loopUnrollFull(loop)))
|
if (failed(loopUnrollFull(loop)))
|
||||||
hasFullyUnrolled = false;
|
hasFullyUnrolled = false;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (hasFullyUnrolled)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (i == 7)
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
func->setAttr("pipeline", builder.getBoolAttr(true));
|
func->setAttr("pipeline", builder.getBoolAttr(true));
|
||||||
func->setAttr("dataflow", builder.getBoolAttr(false));
|
func->setAttr("dataflow", builder.getBoolAttr(false));
|
||||||
|
|
||||||
// For now, this method will always success.
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,15 +59,21 @@ struct LoopPipelining : public LoopPipeliningBase<LoopPipelining> {
|
||||||
bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) {
|
bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) {
|
||||||
targetLoop->setAttr("pipeline", builder.getBoolAttr(true));
|
targetLoop->setAttr("pipeline", builder.getBoolAttr(true));
|
||||||
|
|
||||||
// All inner loops of the pipelined loop are automatically unrolled.
|
// All inner loops of the pipelined loop are automatically unrolled. This will
|
||||||
bool hasFullyUnrolled = false;
|
// try at most 8 iterations.
|
||||||
while (hasFullyUnrolled == false) {
|
for (auto i = 0; i < 8; ++i) {
|
||||||
hasFullyUnrolled = true;
|
bool hasFullyUnrolled = true;
|
||||||
targetLoop.walk([&](AffineForOp loop) {
|
targetLoop.walk([&](AffineForOp loop) {
|
||||||
if (loop != targetLoop)
|
if (loop != targetLoop)
|
||||||
if (failed(loopUnrollFull(loop)))
|
if (failed(loopUnrollFull(loop)))
|
||||||
hasFullyUnrolled = false;
|
hasFullyUnrolled = false;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (hasFullyUnrolled)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (i == 7)
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// All outer loops that perfect nest the pipelined loop can be flattened.
|
// All outer loops that perfect nest the pipelined loop can be flattened.
|
||||||
|
@ -88,7 +94,6 @@ bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// For now, this method will always success.
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -175,20 +175,14 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
|
||||||
// TODO: apply different optimizations to different leaf LNs.
|
// TODO: apply different optimizations to different leaf LNs.
|
||||||
|
|
||||||
AffineLoopBands targetBands;
|
AffineLoopBands targetBands;
|
||||||
func.walk([&](AffineForOp loop) {
|
getLoopBands(func.front(), targetBands);
|
||||||
if (getChildLoopNum(loop) == 0) {
|
|
||||||
AffineLoopBand band;
|
|
||||||
getLoopBandFromLeaf(loop, band);
|
|
||||||
targetBands.push_back(band);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Loop perfection, remove variable bound, and loop order optimization are
|
// Loop perfection, remove variable bound, and loop order optimization are
|
||||||
// always applied for the convenience of polyhedral optimizations.
|
// always applied for the convenience of polyhedral optimizations.
|
||||||
for (auto band : targetBands) {
|
for (auto band : targetBands) {
|
||||||
applyAffineLoopPerfection(band.back(), builder);
|
applyAffineLoopPerfection(band.back(), builder);
|
||||||
applyRemoveVariableBound(band.front(), builder);
|
applyRemoveVariableBound(band.front(), builder);
|
||||||
applyAffineLoopOrderOpt(band, builder);
|
applyAffineLoopOrderOpt(band);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: automatic tiling and pipelining.
|
// TODO: automatic tiling and pipelining.
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "mlir/Analysis/LoopAnalysis.h"
|
||||||
#include "mlir/Transforms/LoopUtils.h"
|
#include "mlir/Transforms/LoopUtils.h"
|
||||||
#include "scalehls/Transforms/Passes.h"
|
#include "scalehls/Transforms/Passes.h"
|
||||||
|
|
||||||
|
@ -13,53 +14,56 @@ using namespace scalehls;
|
||||||
namespace {
|
namespace {
|
||||||
struct PartialAffineLoopTile
|
struct PartialAffineLoopTile
|
||||||
: public PartialAffineLoopTileBase<PartialAffineLoopTile> {
|
: public PartialAffineLoopTileBase<PartialAffineLoopTile> {
|
||||||
void runOnOperation() override;
|
void runOnOperation() override {
|
||||||
|
auto func = getOperation();
|
||||||
|
auto builder = OpBuilder(func);
|
||||||
|
|
||||||
|
std::vector<SmallVector<AffineForOp, 6>> bands;
|
||||||
|
getTileableBands(func, &bands);
|
||||||
|
|
||||||
|
for (auto band : bands)
|
||||||
|
applyPartialAffineLoopTiling(band, builder, tileSize);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void PartialAffineLoopTile::runOnOperation() {
|
bool scalehls::applyPartialAffineLoopTiling(AffineLoopBand band,
|
||||||
// Walk through all functions and loops.
|
OpBuilder &builder,
|
||||||
auto func = getOperation();
|
unsigned tileSize,
|
||||||
|
bool applyPipelining) {
|
||||||
|
if (!isPerfectlyNested(band))
|
||||||
|
return false;
|
||||||
|
|
||||||
// Bands of loops to tile.
|
// Calculate the tiling size of each loop in the band.
|
||||||
std::vector<SmallVector<AffineForOp, 6>> bands;
|
SmallVector<unsigned, 8> sizes;
|
||||||
getTileableBands(func, &bands);
|
auto remainTileSize = tileSize;
|
||||||
|
|
||||||
// Tile each band.
|
for (auto loop : band) {
|
||||||
for (auto &band : bands) {
|
if (auto tripCount = getConstantTripCount(loop)) {
|
||||||
// Truncate band and only keep first tileLevel loops.
|
auto constTripCount = tripCount.getValue();
|
||||||
size_t realTileLevel = band.size();
|
|
||||||
if (realTileLevel > tileLevel) {
|
|
||||||
band.resize(tileLevel);
|
|
||||||
realTileLevel = tileLevel;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set up tile sizes; fill missing tile sizes at the end with default tile
|
if (remainTileSize > constTripCount) {
|
||||||
// size or tileSize if one was provided.
|
sizes.push_back(constTripCount);
|
||||||
SmallVector<unsigned, 6> tileSizes;
|
remainTileSize = (remainTileSize + constTripCount - 1) / constTripCount;
|
||||||
tileSizes.assign(band.size(), tileSize);
|
} else {
|
||||||
|
sizes.push_back(remainTileSize);
|
||||||
SmallVector<AffineForOp, 6> tiledNest;
|
remainTileSize = 1;
|
||||||
if (failed(tilePerfectlyNested(band, tileSizes, &tiledNest)))
|
}
|
||||||
return signalPassFailure();
|
} else
|
||||||
|
return false;
|
||||||
// Permute loop order to move the tiled loop to the innermost of the
|
|
||||||
// perfect nested loop.
|
|
||||||
SmallVector<AffineForOp, 4> nestedLoops;
|
|
||||||
getPerfectlyNestedLoops(nestedLoops, tiledNest.front());
|
|
||||||
|
|
||||||
SmallVector<unsigned, 4> permMap;
|
|
||||||
for (size_t i = 0, e = nestedLoops.size(); i < e; ++i) {
|
|
||||||
if (i < realTileLevel)
|
|
||||||
permMap.push_back(i);
|
|
||||||
else if (i < 2 * realTileLevel)
|
|
||||||
permMap.push_back(e + i - 2 * realTileLevel);
|
|
||||||
else
|
|
||||||
permMap.push_back(i - realTileLevel);
|
|
||||||
}
|
|
||||||
if (isValidLoopInterchangePermutation(nestedLoops, permMap))
|
|
||||||
permuteLoops(nestedLoops, permMap);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AffineLoopBand tiledBand;
|
||||||
|
if (failed(tilePerfectlyNested(band, sizes, &tiledBand)))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Pipelining the tiled loop band if required.
|
||||||
|
if (applyPipelining) {
|
||||||
|
auto targetLoop = tiledBand[band.size() - 1];
|
||||||
|
return applyLoopPipelining(targetLoop, builder);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Pass> scalehls::createPartialAffineLoopTilePass() {
|
std::unique_ptr<Pass> scalehls::createPartialAffineLoopTilePass() {
|
||||||
|
|
|
@ -123,11 +123,10 @@ bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
|
||||||
// Set constant variable bound.
|
// Set constant variable bound.
|
||||||
auto maximum = bound.getValue().second;
|
auto maximum = bound.getValue().second;
|
||||||
loop.setConstantUpperBound(maximum);
|
loop.setConstantUpperBound(maximum);
|
||||||
}
|
} else
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// For now, this method will always success.
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue