[QoREstimation] support a temporary approach to estimate variable trip count

This commit is contained in:
Hanchen Ye 2021-01-25 13:55:07 -06:00
parent 3b78917ea9
commit 76af224a46
5 changed files with 81 additions and 59 deletions

View File

@ -101,6 +101,8 @@ Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
// level with dstOp's any parent loop.
Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp);
Optional<std::pair<int64_t, int64_t>> getBoundOfAffineBound(AffineBound bound);
AffineMap getLayoutMap(MemRefType memrefType);
// Collect partition factors and overall partition number through analyzing the

View File

@ -490,12 +490,25 @@ int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) {
// Set an attribute indicating the trip count. For now, we assume all loops
// have static loop bound.
// TODO: how to handle variable trip count?
int64_t tripCount = 1;
if (auto optionalTripCount = getConstantTripCount(op)) {
if (auto optionalTripCount = getConstantTripCount(op))
tripCount = optionalTripCount.getValue();
setAttrValue(op, "trip_count", tripCount);
else {
// TODO: A temporary approach to estimate the trip count. For now, we take
// the average of the upper bound and lower bound of trip count as the
// estimated trip count.
auto lowerBound = getBoundOfAffineBound(op.getLowerBound());
auto upperBound = getBoundOfAffineBound(op.getUpperBound());
if (lowerBound && upperBound) {
auto lowerTripCount =
upperBound.getValue().second - lowerBound.getValue().first;
auto upperTripCount =
upperBound.getValue().first - lowerBound.getValue().second;
tripCount = (lowerTripCount + upperTripCount + 1) / 2;
}
}
setAttrValue(op, "trip_count", tripCount);
auto end = begin;
auto &loopBlock = op.getLoopBody().front();

View File

@ -109,6 +109,65 @@ Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
return nullptr;
}
Optional<std::pair<int64_t, int64_t>>
scalehls::getBoundOfAffineBound(AffineBound bound) {
auto boundMap = bound.getMap();
if (boundMap.isSingleConstant()) {
auto constBound = boundMap.getSingleConstantResult();
return std::pair<int64_t, int64_t>(constBound, constBound);
}
// For now, we can only handle one result affine bound.
if (boundMap.getNumResults() != 1)
return Optional<std::pair<int64_t, int64_t>>();
auto context = boundMap.getContext();
SmallVector<int64_t, 4> lbs;
SmallVector<int64_t, 4> ubs;
for (auto operand : bound.getOperands()) {
// Only if the affine bound operands are induction variable, the calculation
// is possible.
if (!isForInductionVar(operand))
return Optional<std::pair<int64_t, int64_t>>();
// Only if the owner for op of the induction variable has constant bound,
// the calculation is possible.
auto ifOp = getForInductionVarOwner(operand);
if (!ifOp.hasConstantBounds())
return Optional<std::pair<int64_t, int64_t>>();
auto lb = ifOp.getConstantLowerBound();
auto ub = ifOp.getConstantUpperBound();
auto step = ifOp.getStep();
lbs.push_back(lb);
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
}
// TODO: maybe a more efficient algorithm.
auto operandNum = bound.getNumOperands();
SmallVector<int64_t, 16> results;
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
SmallVector<AffineExpr, 4> replacements;
for (unsigned pos = 0; pos < operandNum; ++pos) {
if (i >> pos % 2 == 0)
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
else
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
}
auto newExpr =
bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
results.push_back(constExpr.getValue());
else
return Optional<std::pair<int64_t, int64_t>>();
}
auto minmax = std::minmax_element(results.begin(), results.end());
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
}
AffineMap scalehls::getLayoutMap(MemRefType memrefType) {
// Check whether the memref has layout map.
auto memrefMaps = memrefType.getAffineMaps();

View File

@ -96,7 +96,7 @@ void HLSCppOptimizer::applyLoopTilingStrategy(
applyPatternsAndFoldGreedily(targetFunc, patterns);
// Apply general optimizations and array partition.
// applyMergeAffineIf(targetFunc);
applyMergeAffineIf(targetFunc);
applyAffineStoreForward(targetFunc, builder);
applySimplifyMemrefAccess(targetFunc);
applyArrayPartition(targetFunc, builder);
@ -290,7 +290,7 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
for (auto &band : targetBands) {
applyAffineLoopPerfection(band.back(), builder);
applyAffineLoopOrderOpt(band);
// applyRemoveVariableBound(band.front(), builder);
applyRemoveVariableBound(band.front(), builder);
}
// Estimate the current latency.

View File

@ -6,6 +6,7 @@
#include "mlir/IR/IntegerSet.h"
#include "mlir/Transforms/LoopUtils.h"
#include "scalehls/Analysis/Utils.h"
#include "scalehls/Transforms/Passes.h"
using namespace mlir;
@ -25,58 +26,6 @@ struct RemoveVariableBound
};
} // namespace
static Optional<std::pair<int64_t, int64_t>>
getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
// For now, we can only handle one result affine bound.
if (bound.getMap().getNumResults() != 1)
return Optional<std::pair<int64_t, int64_t>>();
SmallVector<int64_t, 4> lbs;
SmallVector<int64_t, 4> ubs;
for (auto operand : bound.getOperands()) {
// Only if the affine bound operands are induction variable, the calculation
// is possible.
if (!isForInductionVar(operand))
return Optional<std::pair<int64_t, int64_t>>();
// Only if the owner for op of the induction variable has constant bound,
// the calculation is possible.
auto ifOp = getForInductionVarOwner(operand);
if (!ifOp.hasConstantBounds())
return Optional<std::pair<int64_t, int64_t>>();
auto lb = ifOp.getConstantLowerBound();
auto ub = ifOp.getConstantUpperBound();
auto step = ifOp.getStep();
lbs.push_back(lb);
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
}
// TODO: maybe a more efficient algorithm.
auto operandNum = bound.getNumOperands();
SmallVector<int64_t, 16> results;
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
SmallVector<AffineExpr, 4> replacements;
for (unsigned pos = 0; pos < operandNum; ++pos) {
if (i >> pos % 2 == 0)
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
else
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
}
auto newExpr =
bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
results.push_back(constExpr.getValue());
else
return Optional<std::pair<int64_t, int64_t>>();
}
auto minmax = std::minmax_element(results.begin(), results.end());
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
}
/// Apply remove variable bound to all inner loops of the input loop.
bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
SmallVector<AffineForOp, 4> nestedLoops;
@ -96,8 +45,7 @@ bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
if (!loop.hasConstantUpperBound()) {
// TODO: support variable upper bound with more than one result in the
// getBoundOfAffineBound() method.
if (auto bound = getBoundOfAffineBound(loop.getUpperBound(),
builder.getContext())) {
if (auto bound = getBoundOfAffineBound(loop.getUpperBound())) {
// Collect all components for creating AffineIf operation.
auto upperMap = loop.getUpperBoundMap();
auto ifExpr = upperMap.getResult(0) -