[QoREstimation] support a temporary approach to estimate variable trip count
This commit is contained in:
parent
3b78917ea9
commit
76af224a46
|
@ -101,6 +101,8 @@ Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
|
|||
// level with dstOp's any parent loop.
|
||||
Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp);
|
||||
|
||||
Optional<std::pair<int64_t, int64_t>> getBoundOfAffineBound(AffineBound bound);
|
||||
|
||||
AffineMap getLayoutMap(MemRefType memrefType);
|
||||
|
||||
// Collect partition factors and overall partition number through analyzing the
|
||||
|
|
|
@ -490,12 +490,25 @@ int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
|
|||
bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) {
|
||||
// Set an attribute indicating the trip count. For now, we assume all loops
|
||||
// have static loop bound.
|
||||
// TODO: how to handle variable trip count?
|
||||
int64_t tripCount = 1;
|
||||
if (auto optionalTripCount = getConstantTripCount(op)) {
|
||||
if (auto optionalTripCount = getConstantTripCount(op))
|
||||
tripCount = optionalTripCount.getValue();
|
||||
setAttrValue(op, "trip_count", tripCount);
|
||||
else {
|
||||
// TODO: A temporary approach to estimate the trip count. For now, we take
|
||||
// the average of the upper bound and lower bound of trip count as the
|
||||
// estimated trip count.
|
||||
auto lowerBound = getBoundOfAffineBound(op.getLowerBound());
|
||||
auto upperBound = getBoundOfAffineBound(op.getUpperBound());
|
||||
|
||||
if (lowerBound && upperBound) {
|
||||
auto lowerTripCount =
|
||||
upperBound.getValue().second - lowerBound.getValue().first;
|
||||
auto upperTripCount =
|
||||
upperBound.getValue().first - lowerBound.getValue().second;
|
||||
tripCount = (lowerTripCount + upperTripCount + 1) / 2;
|
||||
}
|
||||
}
|
||||
setAttrValue(op, "trip_count", tripCount);
|
||||
|
||||
auto end = begin;
|
||||
auto &loopBlock = op.getLoopBody().front();
|
||||
|
|
|
@ -109,6 +109,65 @@ Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
Optional<std::pair<int64_t, int64_t>>
|
||||
scalehls::getBoundOfAffineBound(AffineBound bound) {
|
||||
auto boundMap = bound.getMap();
|
||||
if (boundMap.isSingleConstant()) {
|
||||
auto constBound = boundMap.getSingleConstantResult();
|
||||
return std::pair<int64_t, int64_t>(constBound, constBound);
|
||||
}
|
||||
|
||||
// For now, we can only handle one result affine bound.
|
||||
if (boundMap.getNumResults() != 1)
|
||||
return Optional<std::pair<int64_t, int64_t>>();
|
||||
|
||||
auto context = boundMap.getContext();
|
||||
SmallVector<int64_t, 4> lbs;
|
||||
SmallVector<int64_t, 4> ubs;
|
||||
for (auto operand : bound.getOperands()) {
|
||||
// Only if the affine bound operands are induction variable, the calculation
|
||||
// is possible.
|
||||
if (!isForInductionVar(operand))
|
||||
return Optional<std::pair<int64_t, int64_t>>();
|
||||
|
||||
// Only if the owner for op of the induction variable has constant bound,
|
||||
// the calculation is possible.
|
||||
auto ifOp = getForInductionVarOwner(operand);
|
||||
if (!ifOp.hasConstantBounds())
|
||||
return Optional<std::pair<int64_t, int64_t>>();
|
||||
|
||||
auto lb = ifOp.getConstantLowerBound();
|
||||
auto ub = ifOp.getConstantUpperBound();
|
||||
auto step = ifOp.getStep();
|
||||
|
||||
lbs.push_back(lb);
|
||||
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
|
||||
}
|
||||
|
||||
// TODO: maybe a more efficient algorithm.
|
||||
auto operandNum = bound.getNumOperands();
|
||||
SmallVector<int64_t, 16> results;
|
||||
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
|
||||
SmallVector<AffineExpr, 4> replacements;
|
||||
for (unsigned pos = 0; pos < operandNum; ++pos) {
|
||||
if (i >> pos % 2 == 0)
|
||||
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
|
||||
else
|
||||
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
|
||||
}
|
||||
auto newExpr =
|
||||
bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
|
||||
|
||||
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
|
||||
results.push_back(constExpr.getValue());
|
||||
else
|
||||
return Optional<std::pair<int64_t, int64_t>>();
|
||||
}
|
||||
|
||||
auto minmax = std::minmax_element(results.begin(), results.end());
|
||||
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
|
||||
}
|
||||
|
||||
AffineMap scalehls::getLayoutMap(MemRefType memrefType) {
|
||||
// Check whether the memref has layout map.
|
||||
auto memrefMaps = memrefType.getAffineMaps();
|
||||
|
|
|
@ -96,7 +96,7 @@ void HLSCppOptimizer::applyLoopTilingStrategy(
|
|||
applyPatternsAndFoldGreedily(targetFunc, patterns);
|
||||
|
||||
// Apply general optimizations and array partition.
|
||||
// applyMergeAffineIf(targetFunc);
|
||||
applyMergeAffineIf(targetFunc);
|
||||
applyAffineStoreForward(targetFunc, builder);
|
||||
applySimplifyMemrefAccess(targetFunc);
|
||||
applyArrayPartition(targetFunc, builder);
|
||||
|
@ -290,7 +290,7 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
|
|||
for (auto &band : targetBands) {
|
||||
applyAffineLoopPerfection(band.back(), builder);
|
||||
applyAffineLoopOrderOpt(band);
|
||||
// applyRemoveVariableBound(band.front(), builder);
|
||||
applyRemoveVariableBound(band.front(), builder);
|
||||
}
|
||||
|
||||
// Estimate the current latency.
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "mlir/IR/IntegerSet.h"
|
||||
#include "mlir/Transforms/LoopUtils.h"
|
||||
#include "scalehls/Analysis/Utils.h"
|
||||
#include "scalehls/Transforms/Passes.h"
|
||||
|
||||
using namespace mlir;
|
||||
|
@ -25,58 +26,6 @@ struct RemoveVariableBound
|
|||
};
|
||||
} // namespace
|
||||
|
||||
static Optional<std::pair<int64_t, int64_t>>
|
||||
getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
|
||||
// For now, we can only handle one result affine bound.
|
||||
if (bound.getMap().getNumResults() != 1)
|
||||
return Optional<std::pair<int64_t, int64_t>>();
|
||||
|
||||
SmallVector<int64_t, 4> lbs;
|
||||
SmallVector<int64_t, 4> ubs;
|
||||
for (auto operand : bound.getOperands()) {
|
||||
// Only if the affine bound operands are induction variable, the calculation
|
||||
// is possible.
|
||||
if (!isForInductionVar(operand))
|
||||
return Optional<std::pair<int64_t, int64_t>>();
|
||||
|
||||
// Only if the owner for op of the induction variable has constant bound,
|
||||
// the calculation is possible.
|
||||
auto ifOp = getForInductionVarOwner(operand);
|
||||
if (!ifOp.hasConstantBounds())
|
||||
return Optional<std::pair<int64_t, int64_t>>();
|
||||
|
||||
auto lb = ifOp.getConstantLowerBound();
|
||||
auto ub = ifOp.getConstantUpperBound();
|
||||
auto step = ifOp.getStep();
|
||||
|
||||
lbs.push_back(lb);
|
||||
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
|
||||
}
|
||||
|
||||
// TODO: maybe a more efficient algorithm.
|
||||
auto operandNum = bound.getNumOperands();
|
||||
SmallVector<int64_t, 16> results;
|
||||
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
|
||||
SmallVector<AffineExpr, 4> replacements;
|
||||
for (unsigned pos = 0; pos < operandNum; ++pos) {
|
||||
if (i >> pos % 2 == 0)
|
||||
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
|
||||
else
|
||||
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
|
||||
}
|
||||
auto newExpr =
|
||||
bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
|
||||
|
||||
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
|
||||
results.push_back(constExpr.getValue());
|
||||
else
|
||||
return Optional<std::pair<int64_t, int64_t>>();
|
||||
}
|
||||
|
||||
auto minmax = std::minmax_element(results.begin(), results.end());
|
||||
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
|
||||
}
|
||||
|
||||
/// Apply remove variable bound to all inner loops of the input loop.
|
||||
bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
|
||||
SmallVector<AffineForOp, 4> nestedLoops;
|
||||
|
@ -96,8 +45,7 @@ bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
|
|||
if (!loop.hasConstantUpperBound()) {
|
||||
// TODO: support variable upper bound with more than one result in the
|
||||
// getBoundOfAffineBound() method.
|
||||
if (auto bound = getBoundOfAffineBound(loop.getUpperBound(),
|
||||
builder.getContext())) {
|
||||
if (auto bound = getBoundOfAffineBound(loop.getUpperBound())) {
|
||||
// Collect all components for creating AffineIf operation.
|
||||
auto upperMap = loop.getUpperBoundMap();
|
||||
auto ifExpr = upperMap.getResult(0) -
|
||||
|
|
Loading…
Reference in New Issue