hanchenye-scalehls/lib/Transforms/Loop/AffineLoopOrderOpt.cpp

213 lines
7.1 KiB
C++

//===----------------------------------------------------------------------===//
//
// Copyright 2020-2021 The ScaleHLS Authors.
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
#include "mlir/Dialect/Affine/Analysis/Utils.h"
#include "mlir/Dialect/Affine/LoopUtils.h"
#include "scalehls/Transforms/Passes.h"
#include "scalehls/Transforms/Utils.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "scalehls"
using namespace mlir;
using namespace scalehls;
/// TODO: support to pass in permutation map.
/// Optimize loop order. Loops associated with memory access dependencies are
/// moved to an as outer as possible location of the input loop band. If
/// "reverse" is true, as inner as possible.
bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand &band,
ArrayRef<unsigned> permMap,
bool reverse) {
LLVM_DEBUG(llvm::dbgs() << "Loop order opt ";);
assert(!band.empty() && "no loops provided");
if (!isPerfectlyNested(band))
return false;
auto &loopBlock = *band.back().getBody();
auto bandDepth = band.size();
if (!permMap.empty() && isValidLoopInterchangePermutation(band, permMap)) {
auto newRoot = band[permuteLoops(band, permMap)];
band.clear();
getLoopBandFromOutermost(newRoot, band);
band.resize(bandDepth);
return true;
}
// Collect all load and store operations for each memory in the loop block,
// and calculate the number of common surrouding loops for later uses.
MemAccessesMap loadStoresMap;
getMemAccessesMap(loopBlock, loadStoresMap);
auto commonLoopDepth = getNumCommonSurroundingLoops(
*loopBlock.begin(), *std::next(loopBlock.begin()));
// A map of dependency distances indexed by the loop in the band.
SmallVector<AffineForOp, 8> targetLoops;
llvm::SmallDenseMap<Operation *, unsigned, 8> distanceMap;
// Only the loops in the loop band will be checked.
unsigned startDepth = commonLoopDepth - bandDepth + 1;
for (unsigned depth = startDepth; depth < commonLoopDepth + 1; ++depth) {
auto loop = band[depth - startDepth];
unsigned minDistance = UINT_MAX;
// Traverse all memories in the loop block and find all dependencies
// associated to each memory.
for (auto pair : loadStoresMap) {
auto loadStores = pair.second;
int64_t dstIndex = 1;
for (auto dstOp : loadStores) {
for (auto srcOp : llvm::drop_begin(loadStores, dstIndex)) {
MemRefAccess dstAccess(dstOp);
MemRefAccess srcAccess(srcOp);
FlatAffineValueConstraints depConstrs;
SmallVector<DependenceComponent, 2> depComps;
DependenceResult result = checkMemrefAccessDependence(
srcAccess, dstAccess, depth, &depConstrs, &depComps);
if (hasDependence(result)) {
auto depComp = depComps[depth - 1];
assert(loop == depComp.op && "unexpected dependency");
// Only positive distance will be recorded.
if (depComp.ub.getValue() > 0) {
unsigned distance = std::max(depComp.lb.getValue(), (int64_t)1);
minDistance = std::min(minDistance, distance);
}
}
}
++dstIndex;
}
}
// Collect all candidate loops into an ordered vector. Loop with the
// smallest distance will appear in the front.
if (minDistance < UINT_MAX) {
distanceMap[loop] = minDistance;
for (auto it = targetLoops.begin(); it <= targetLoops.end(); ++it)
if (it == targetLoops.end()) {
targetLoops.push_back(loop);
break;
} else if (minDistance < distanceMap[*it]) {
targetLoops.insert(it, loop);
break;
}
}
}
distanceMap.clear();
// Permute the target loops one by one.
// TODO: a more comprehensive permution strategy search.
for (auto loop : targetLoops) {
unsigned targetLoopLoc =
std::find(band.begin(), band.end(), loop) - band.begin();
if (!reverse)
// Permute the target loop to an as outer as possible location.
for (unsigned dstLoc = 0; dstLoc < targetLoopLoc; ++dstLoc) {
SmallVector<unsigned, 4> permMap;
// Construct permutation map.
for (unsigned loc = 0; loc < bandDepth; ++loc) {
if (loc < dstLoc)
permMap.push_back(loc);
else if (loc < targetLoopLoc)
permMap.push_back(loc + 1);
else if (loc == targetLoopLoc)
permMap.push_back(dstLoc);
else
permMap.push_back(loc);
}
// Check the validation of the current permutation.
if (isValidLoopInterchangePermutation(band, permMap)) {
LLVM_DEBUG(llvm::dbgs() << "(";);
LLVM_DEBUG(for (unsigned i = 0, e = permMap.size(); i < e; ++i) {
llvm::dbgs() << permMap[i];
if (i != e - 1)
llvm::dbgs() << ",";
});
LLVM_DEBUG(llvm::dbgs() << ") ";);
auto newRoot = band[permuteLoops(band, permMap)];
band.clear();
getLoopBandFromOutermost(newRoot, band);
band.resize(bandDepth);
break;
}
}
else
// Permute the target loop to an as inner as possible location.
for (unsigned dstLoc = targetLoopLoc + 1; dstLoc < bandDepth; ++dstLoc) {
SmallVector<unsigned, 4> permMap;
// Construct permutation map.
for (unsigned loc = 0; loc < bandDepth; ++loc) {
if (loc < targetLoopLoc)
permMap.push_back(loc);
else if (loc == targetLoopLoc)
permMap.push_back(dstLoc);
else if (loc <= dstLoc)
permMap.push_back(loc - 1);
else
permMap.push_back(loc);
}
// Check the validation of the current permutation.
if (isValidLoopInterchangePermutation(band, permMap)) {
auto newRoot = band[permuteLoops(band, permMap)];
band.clear();
getLoopBandFromOutermost(newRoot, band);
band.resize(bandDepth);
break;
}
}
}
LLVM_DEBUG(llvm::dbgs() << "\n";);
return true;
}
namespace {
struct AffineLoopOrderOpt : public AffineLoopOrderOptBase<AffineLoopOrderOpt> {
void runOnOperation() override {
// Collect all target loop bands.
AffineLoopBands targetBands;
getLoopBands(getOperation().front(), targetBands);
// Apply loop order optimization to each loop band.
for (auto &band : targetBands) {
AffineLoopBand tileBand;
AffineLoopBand pointBand;
// If the current loop band can be split into a tile band and point band,
// apply to these two partial bands separately. Otherwise, apply to the
// whole loop band.
if (getTileAndPointLoopBand(band, tileBand, pointBand)) {
if (!tileBand.empty())
applyAffineLoopOrderOpt(tileBand);
if (!pointBand.empty())
applyAffineLoopOrderOpt(pointBand);
} else
applyAffineLoopOrderOpt(band);
}
}
};
} // namespace
std::unique_ptr<Pass> scalehls::createAffineLoopOrderOptPass() {
return std::make_unique<AffineLoopOrderOpt>();
}