[QoREstimation] support a temporary approach to estimate variable trip count
This commit is contained in:
parent
3b78917ea9
commit
76af224a46
|
@ -101,6 +101,8 @@ Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
|
||||||
// level with dstOp's any parent loop.
|
// level with dstOp's any parent loop.
|
||||||
Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp);
|
Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp);
|
||||||
|
|
||||||
|
Optional<std::pair<int64_t, int64_t>> getBoundOfAffineBound(AffineBound bound);
|
||||||
|
|
||||||
AffineMap getLayoutMap(MemRefType memrefType);
|
AffineMap getLayoutMap(MemRefType memrefType);
|
||||||
|
|
||||||
// Collect partition factors and overall partition number through analyzing the
|
// Collect partition factors and overall partition number through analyzing the
|
||||||
|
|
|
@ -490,12 +490,25 @@ int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
|
||||||
bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) {
|
bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) {
|
||||||
// Set an attribute indicating the trip count. For now, we assume all loops
|
// Set an attribute indicating the trip count. For now, we assume all loops
|
||||||
// have static loop bound.
|
// have static loop bound.
|
||||||
// TODO: how to handle variable trip count?
|
|
||||||
int64_t tripCount = 1;
|
int64_t tripCount = 1;
|
||||||
if (auto optionalTripCount = getConstantTripCount(op)) {
|
if (auto optionalTripCount = getConstantTripCount(op))
|
||||||
tripCount = optionalTripCount.getValue();
|
tripCount = optionalTripCount.getValue();
|
||||||
setAttrValue(op, "trip_count", tripCount);
|
else {
|
||||||
|
// TODO: A temporary approach to estimate the trip count. For now, we take
|
||||||
|
// the average of the upper bound and lower bound of trip count as the
|
||||||
|
// estimated trip count.
|
||||||
|
auto lowerBound = getBoundOfAffineBound(op.getLowerBound());
|
||||||
|
auto upperBound = getBoundOfAffineBound(op.getUpperBound());
|
||||||
|
|
||||||
|
if (lowerBound && upperBound) {
|
||||||
|
auto lowerTripCount =
|
||||||
|
upperBound.getValue().second - lowerBound.getValue().first;
|
||||||
|
auto upperTripCount =
|
||||||
|
upperBound.getValue().first - lowerBound.getValue().second;
|
||||||
|
tripCount = (lowerTripCount + upperTripCount + 1) / 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
setAttrValue(op, "trip_count", tripCount);
|
||||||
|
|
||||||
auto end = begin;
|
auto end = begin;
|
||||||
auto &loopBlock = op.getLoopBody().front();
|
auto &loopBlock = op.getLoopBody().front();
|
||||||
|
|
|
@ -109,6 +109,65 @@ Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Optional<std::pair<int64_t, int64_t>>
|
||||||
|
scalehls::getBoundOfAffineBound(AffineBound bound) {
|
||||||
|
auto boundMap = bound.getMap();
|
||||||
|
if (boundMap.isSingleConstant()) {
|
||||||
|
auto constBound = boundMap.getSingleConstantResult();
|
||||||
|
return std::pair<int64_t, int64_t>(constBound, constBound);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For now, we can only handle one result affine bound.
|
||||||
|
if (boundMap.getNumResults() != 1)
|
||||||
|
return Optional<std::pair<int64_t, int64_t>>();
|
||||||
|
|
||||||
|
auto context = boundMap.getContext();
|
||||||
|
SmallVector<int64_t, 4> lbs;
|
||||||
|
SmallVector<int64_t, 4> ubs;
|
||||||
|
for (auto operand : bound.getOperands()) {
|
||||||
|
// Only if the affine bound operands are induction variable, the calculation
|
||||||
|
// is possible.
|
||||||
|
if (!isForInductionVar(operand))
|
||||||
|
return Optional<std::pair<int64_t, int64_t>>();
|
||||||
|
|
||||||
|
// Only if the owner for op of the induction variable has constant bound,
|
||||||
|
// the calculation is possible.
|
||||||
|
auto ifOp = getForInductionVarOwner(operand);
|
||||||
|
if (!ifOp.hasConstantBounds())
|
||||||
|
return Optional<std::pair<int64_t, int64_t>>();
|
||||||
|
|
||||||
|
auto lb = ifOp.getConstantLowerBound();
|
||||||
|
auto ub = ifOp.getConstantUpperBound();
|
||||||
|
auto step = ifOp.getStep();
|
||||||
|
|
||||||
|
lbs.push_back(lb);
|
||||||
|
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: maybe a more efficient algorithm.
|
||||||
|
auto operandNum = bound.getNumOperands();
|
||||||
|
SmallVector<int64_t, 16> results;
|
||||||
|
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
|
||||||
|
SmallVector<AffineExpr, 4> replacements;
|
||||||
|
for (unsigned pos = 0; pos < operandNum; ++pos) {
|
||||||
|
if (i >> pos % 2 == 0)
|
||||||
|
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
|
||||||
|
else
|
||||||
|
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
|
||||||
|
}
|
||||||
|
auto newExpr =
|
||||||
|
bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
|
||||||
|
|
||||||
|
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
|
||||||
|
results.push_back(constExpr.getValue());
|
||||||
|
else
|
||||||
|
return Optional<std::pair<int64_t, int64_t>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto minmax = std::minmax_element(results.begin(), results.end());
|
||||||
|
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
|
||||||
|
}
|
||||||
|
|
||||||
AffineMap scalehls::getLayoutMap(MemRefType memrefType) {
|
AffineMap scalehls::getLayoutMap(MemRefType memrefType) {
|
||||||
// Check whether the memref has layout map.
|
// Check whether the memref has layout map.
|
||||||
auto memrefMaps = memrefType.getAffineMaps();
|
auto memrefMaps = memrefType.getAffineMaps();
|
||||||
|
|
|
@ -96,7 +96,7 @@ void HLSCppOptimizer::applyLoopTilingStrategy(
|
||||||
applyPatternsAndFoldGreedily(targetFunc, patterns);
|
applyPatternsAndFoldGreedily(targetFunc, patterns);
|
||||||
|
|
||||||
// Apply general optimizations and array partition.
|
// Apply general optimizations and array partition.
|
||||||
// applyMergeAffineIf(targetFunc);
|
applyMergeAffineIf(targetFunc);
|
||||||
applyAffineStoreForward(targetFunc, builder);
|
applyAffineStoreForward(targetFunc, builder);
|
||||||
applySimplifyMemrefAccess(targetFunc);
|
applySimplifyMemrefAccess(targetFunc);
|
||||||
applyArrayPartition(targetFunc, builder);
|
applyArrayPartition(targetFunc, builder);
|
||||||
|
@ -290,7 +290,7 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
|
||||||
for (auto &band : targetBands) {
|
for (auto &band : targetBands) {
|
||||||
applyAffineLoopPerfection(band.back(), builder);
|
applyAffineLoopPerfection(band.back(), builder);
|
||||||
applyAffineLoopOrderOpt(band);
|
applyAffineLoopOrderOpt(band);
|
||||||
// applyRemoveVariableBound(band.front(), builder);
|
applyRemoveVariableBound(band.front(), builder);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Estimate the current latency.
|
// Estimate the current latency.
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include "mlir/IR/IntegerSet.h"
|
#include "mlir/IR/IntegerSet.h"
|
||||||
#include "mlir/Transforms/LoopUtils.h"
|
#include "mlir/Transforms/LoopUtils.h"
|
||||||
|
#include "scalehls/Analysis/Utils.h"
|
||||||
#include "scalehls/Transforms/Passes.h"
|
#include "scalehls/Transforms/Passes.h"
|
||||||
|
|
||||||
using namespace mlir;
|
using namespace mlir;
|
||||||
|
@ -25,58 +26,6 @@ struct RemoveVariableBound
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
static Optional<std::pair<int64_t, int64_t>>
|
|
||||||
getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
|
|
||||||
// For now, we can only handle one result affine bound.
|
|
||||||
if (bound.getMap().getNumResults() != 1)
|
|
||||||
return Optional<std::pair<int64_t, int64_t>>();
|
|
||||||
|
|
||||||
SmallVector<int64_t, 4> lbs;
|
|
||||||
SmallVector<int64_t, 4> ubs;
|
|
||||||
for (auto operand : bound.getOperands()) {
|
|
||||||
// Only if the affine bound operands are induction variable, the calculation
|
|
||||||
// is possible.
|
|
||||||
if (!isForInductionVar(operand))
|
|
||||||
return Optional<std::pair<int64_t, int64_t>>();
|
|
||||||
|
|
||||||
// Only if the owner for op of the induction variable has constant bound,
|
|
||||||
// the calculation is possible.
|
|
||||||
auto ifOp = getForInductionVarOwner(operand);
|
|
||||||
if (!ifOp.hasConstantBounds())
|
|
||||||
return Optional<std::pair<int64_t, int64_t>>();
|
|
||||||
|
|
||||||
auto lb = ifOp.getConstantLowerBound();
|
|
||||||
auto ub = ifOp.getConstantUpperBound();
|
|
||||||
auto step = ifOp.getStep();
|
|
||||||
|
|
||||||
lbs.push_back(lb);
|
|
||||||
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: maybe a more efficient algorithm.
|
|
||||||
auto operandNum = bound.getNumOperands();
|
|
||||||
SmallVector<int64_t, 16> results;
|
|
||||||
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
|
|
||||||
SmallVector<AffineExpr, 4> replacements;
|
|
||||||
for (unsigned pos = 0; pos < operandNum; ++pos) {
|
|
||||||
if (i >> pos % 2 == 0)
|
|
||||||
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
|
|
||||||
else
|
|
||||||
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
|
|
||||||
}
|
|
||||||
auto newExpr =
|
|
||||||
bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
|
|
||||||
|
|
||||||
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
|
|
||||||
results.push_back(constExpr.getValue());
|
|
||||||
else
|
|
||||||
return Optional<std::pair<int64_t, int64_t>>();
|
|
||||||
}
|
|
||||||
|
|
||||||
auto minmax = std::minmax_element(results.begin(), results.end());
|
|
||||||
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Apply remove variable bound to all inner loops of the input loop.
|
/// Apply remove variable bound to all inner loops of the input loop.
|
||||||
bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
|
bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
|
||||||
SmallVector<AffineForOp, 4> nestedLoops;
|
SmallVector<AffineForOp, 4> nestedLoops;
|
||||||
|
@ -96,8 +45,7 @@ bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
|
||||||
if (!loop.hasConstantUpperBound()) {
|
if (!loop.hasConstantUpperBound()) {
|
||||||
// TODO: support variable upper bound with more than one result in the
|
// TODO: support variable upper bound with more than one result in the
|
||||||
// getBoundOfAffineBound() method.
|
// getBoundOfAffineBound() method.
|
||||||
if (auto bound = getBoundOfAffineBound(loop.getUpperBound(),
|
if (auto bound = getBoundOfAffineBound(loop.getUpperBound())) {
|
||||||
builder.getContext())) {
|
|
||||||
// Collect all components for creating AffineIf operation.
|
// Collect all components for creating AffineIf operation.
|
||||||
auto upperMap = loop.getUpperBoundMap();
|
auto upperMap = loop.getUpperBoundMap();
|
||||||
auto ifExpr = upperMap.getResult(0) -
|
auto ifExpr = upperMap.getResult(0) -
|
||||||
|
|
Loading…
Reference in New Issue