[QoREstimation] support a temporary approach to estimate variable trip count

This commit is contained in:
Hanchen Ye 2021-01-25 13:55:07 -06:00
parent 3b78917ea9
commit 76af224a46
5 changed files with 81 additions and 59 deletions

View File

@ -101,6 +101,8 @@ Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
// level with dstOp's any parent loop. // level with dstOp's any parent loop.
Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp); Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp);
Optional<std::pair<int64_t, int64_t>> getBoundOfAffineBound(AffineBound bound);
AffineMap getLayoutMap(MemRefType memrefType); AffineMap getLayoutMap(MemRefType memrefType);
// Collect partition factors and overall partition number through analyzing the // Collect partition factors and overall partition number through analyzing the

View File

@ -490,12 +490,25 @@ int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) { bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) {
// Set an attribute indicating the trip count. For now, we assume all loops // Set an attribute indicating the trip count. For now, we assume all loops
// have static loop bound. // have static loop bound.
// TODO: how to handle variable trip count?
int64_t tripCount = 1; int64_t tripCount = 1;
if (auto optionalTripCount = getConstantTripCount(op)) { if (auto optionalTripCount = getConstantTripCount(op))
tripCount = optionalTripCount.getValue(); tripCount = optionalTripCount.getValue();
setAttrValue(op, "trip_count", tripCount); else {
// TODO: A temporary approach to estimate the trip count. For now, we take
// the average of the upper bound and lower bound of trip count as the
// estimated trip count.
auto lowerBound = getBoundOfAffineBound(op.getLowerBound());
auto upperBound = getBoundOfAffineBound(op.getUpperBound());
if (lowerBound && upperBound) {
auto lowerTripCount =
upperBound.getValue().second - lowerBound.getValue().first;
auto upperTripCount =
upperBound.getValue().first - lowerBound.getValue().second;
tripCount = (lowerTripCount + upperTripCount + 1) / 2;
}
} }
setAttrValue(op, "trip_count", tripCount);
auto end = begin; auto end = begin;
auto &loopBlock = op.getLoopBody().front(); auto &loopBlock = op.getLoopBody().front();

View File

@ -109,6 +109,65 @@ Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
return nullptr; return nullptr;
} }
Optional<std::pair<int64_t, int64_t>>
scalehls::getBoundOfAffineBound(AffineBound bound) {
auto boundMap = bound.getMap();
if (boundMap.isSingleConstant()) {
auto constBound = boundMap.getSingleConstantResult();
return std::pair<int64_t, int64_t>(constBound, constBound);
}
// For now, we can only handle one result affine bound.
if (boundMap.getNumResults() != 1)
return Optional<std::pair<int64_t, int64_t>>();
auto context = boundMap.getContext();
SmallVector<int64_t, 4> lbs;
SmallVector<int64_t, 4> ubs;
for (auto operand : bound.getOperands()) {
// Only if the affine bound operands are induction variable, the calculation
// is possible.
if (!isForInductionVar(operand))
return Optional<std::pair<int64_t, int64_t>>();
// Only if the owner for op of the induction variable has constant bound,
// the calculation is possible.
auto ifOp = getForInductionVarOwner(operand);
if (!ifOp.hasConstantBounds())
return Optional<std::pair<int64_t, int64_t>>();
auto lb = ifOp.getConstantLowerBound();
auto ub = ifOp.getConstantUpperBound();
auto step = ifOp.getStep();
lbs.push_back(lb);
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
}
// TODO: maybe a more efficient algorithm.
auto operandNum = bound.getNumOperands();
SmallVector<int64_t, 16> results;
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
SmallVector<AffineExpr, 4> replacements;
for (unsigned pos = 0; pos < operandNum; ++pos) {
if (i >> pos % 2 == 0)
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
else
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
}
auto newExpr =
bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
results.push_back(constExpr.getValue());
else
return Optional<std::pair<int64_t, int64_t>>();
}
auto minmax = std::minmax_element(results.begin(), results.end());
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
}
AffineMap scalehls::getLayoutMap(MemRefType memrefType) { AffineMap scalehls::getLayoutMap(MemRefType memrefType) {
// Check whether the memref has layout map. // Check whether the memref has layout map.
auto memrefMaps = memrefType.getAffineMaps(); auto memrefMaps = memrefType.getAffineMaps();

View File

@ -96,7 +96,7 @@ void HLSCppOptimizer::applyLoopTilingStrategy(
applyPatternsAndFoldGreedily(targetFunc, patterns); applyPatternsAndFoldGreedily(targetFunc, patterns);
// Apply general optimizations and array partition. // Apply general optimizations and array partition.
// applyMergeAffineIf(targetFunc); applyMergeAffineIf(targetFunc);
applyAffineStoreForward(targetFunc, builder); applyAffineStoreForward(targetFunc, builder);
applySimplifyMemrefAccess(targetFunc); applySimplifyMemrefAccess(targetFunc);
applyArrayPartition(targetFunc, builder); applyArrayPartition(targetFunc, builder);
@ -290,7 +290,7 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
for (auto &band : targetBands) { for (auto &band : targetBands) {
applyAffineLoopPerfection(band.back(), builder); applyAffineLoopPerfection(band.back(), builder);
applyAffineLoopOrderOpt(band); applyAffineLoopOrderOpt(band);
// applyRemoveVariableBound(band.front(), builder); applyRemoveVariableBound(band.front(), builder);
} }
// Estimate the current latency. // Estimate the current latency.

View File

@ -6,6 +6,7 @@
#include "mlir/IR/IntegerSet.h" #include "mlir/IR/IntegerSet.h"
#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/LoopUtils.h"
#include "scalehls/Analysis/Utils.h"
#include "scalehls/Transforms/Passes.h" #include "scalehls/Transforms/Passes.h"
using namespace mlir; using namespace mlir;
@ -25,58 +26,6 @@ struct RemoveVariableBound
}; };
} // namespace } // namespace
static Optional<std::pair<int64_t, int64_t>>
getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
// For now, we can only handle one result affine bound.
if (bound.getMap().getNumResults() != 1)
return Optional<std::pair<int64_t, int64_t>>();
SmallVector<int64_t, 4> lbs;
SmallVector<int64_t, 4> ubs;
for (auto operand : bound.getOperands()) {
// Only if the affine bound operands are induction variable, the calculation
// is possible.
if (!isForInductionVar(operand))
return Optional<std::pair<int64_t, int64_t>>();
// Only if the owner for op of the induction variable has constant bound,
// the calculation is possible.
auto ifOp = getForInductionVarOwner(operand);
if (!ifOp.hasConstantBounds())
return Optional<std::pair<int64_t, int64_t>>();
auto lb = ifOp.getConstantLowerBound();
auto ub = ifOp.getConstantUpperBound();
auto step = ifOp.getStep();
lbs.push_back(lb);
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
}
// TODO: maybe a more efficient algorithm.
auto operandNum = bound.getNumOperands();
SmallVector<int64_t, 16> results;
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
SmallVector<AffineExpr, 4> replacements;
for (unsigned pos = 0; pos < operandNum; ++pos) {
if (i >> pos % 2 == 0)
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
else
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
}
auto newExpr =
bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
results.push_back(constExpr.getValue());
else
return Optional<std::pair<int64_t, int64_t>>();
}
auto minmax = std::minmax_element(results.begin(), results.end());
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
}
/// Apply remove variable bound to all inner loops of the input loop. /// Apply remove variable bound to all inner loops of the input loop.
bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) { bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
SmallVector<AffineForOp, 4> nestedLoops; SmallVector<AffineForOp, 4> nestedLoops;
@ -96,8 +45,7 @@ bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
if (!loop.hasConstantUpperBound()) { if (!loop.hasConstantUpperBound()) {
// TODO: support variable upper bound with more than one result in the // TODO: support variable upper bound with more than one result in the
// getBoundOfAffineBound() method. // getBoundOfAffineBound() method.
if (auto bound = getBoundOfAffineBound(loop.getUpperBound(), if (auto bound = getBoundOfAffineBound(loop.getUpperBound())) {
builder.getContext())) {
// Collect all components for creating AffineIf operation. // Collect all components for creating AffineIf operation.
auto upperMap = loop.getUpperBoundMap(); auto upperMap = loop.getUpperBoundMap();
auto ifExpr = upperMap.getResult(0) - auto ifExpr = upperMap.getResult(0) -