[QoREstimation] support a temporary approach to estimate variable trip count

2021-01-25 13:55:07 -06:00 · 2021-01-25 13:55:07 -06:00 · 76af224a46
parent 3b78917ea9
commit 76af224a46
5 changed files with 81 additions and 59 deletions
--- a/include/scalehls/Analysis/Utils.h
+++ b/include/scalehls/Analysis/Utils.h
@ -101,6 +101,8 @@ Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
 // level with dstOp's any parent loop.
 Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp);

+Optional<std::pair<int64_t, int64_t>> getBoundOfAffineBound(AffineBound bound);
+
 AffineMap getLayoutMap(MemRefType memrefType);

 // Collect partition factors and overall partition number through analyzing the
--- a/lib/Analysis/QoREstimation.cpp
+++ b/lib/Analysis/QoREstimation.cpp
@ -490,12 +490,25 @@ int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
 bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) {
  // Set an attribute indicating the trip count. For now, we assume all loops
  // have static loop bound.
-  // TODO: how to handle variable trip count?
  int64_t tripCount = 1;
-  if (auto optionalTripCount = getConstantTripCount(op)) {
+  if (auto optionalTripCount = getConstantTripCount(op))
    tripCount = optionalTripCount.getValue();
-    setAttrValue(op, "trip_count", tripCount);
+  else {
+    // TODO: A temporary approach to estimate the trip count. For now, we take
+    // the average of the upper bound and lower bound of trip count as the
+    // estimated trip count.
+    auto lowerBound = getBoundOfAffineBound(op.getLowerBound());
+    auto upperBound = getBoundOfAffineBound(op.getUpperBound());
+
+    if (lowerBound && upperBound) {
+      auto lowerTripCount =
+          upperBound.getValue().second - lowerBound.getValue().first;
+      auto upperTripCount =
+          upperBound.getValue().first - lowerBound.getValue().second;
+      tripCount = (lowerTripCount + upperTripCount + 1) / 2;
+    }
  }
+  setAttrValue(op, "trip_count", tripCount);

  auto end = begin;
  auto &loopBlock = op.getLoopBody().front();
--- a/lib/Analysis/Utils.cpp
+++ b/lib/Analysis/Utils.cpp
@ -109,6 +109,65 @@ Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
  return nullptr;
 }

+Optional<std::pair<int64_t, int64_t>>
+scalehls::getBoundOfAffineBound(AffineBound bound) {
+  auto boundMap = bound.getMap();
+  if (boundMap.isSingleConstant()) {
+    auto constBound = boundMap.getSingleConstantResult();
+    return std::pair<int64_t, int64_t>(constBound, constBound);
+  }
+
+  // For now, we can only handle one result affine bound.
+  if (boundMap.getNumResults() != 1)
+    return Optional<std::pair<int64_t, int64_t>>();
+
+  auto context = boundMap.getContext();
+  SmallVector<int64_t, 4> lbs;
+  SmallVector<int64_t, 4> ubs;
+  for (auto operand : bound.getOperands()) {
+    // Only if the affine bound operands are induction variable, the calculation
+    // is possible.
+    if (!isForInductionVar(operand))
+      return Optional<std::pair<int64_t, int64_t>>();
+
+    // Only if the owner for op of the induction variable has constant bound,
+    // the calculation is possible.
+    auto ifOp = getForInductionVarOwner(operand);
+    if (!ifOp.hasConstantBounds())
+      return Optional<std::pair<int64_t, int64_t>>();
+
+    auto lb = ifOp.getConstantLowerBound();
+    auto ub = ifOp.getConstantUpperBound();
+    auto step = ifOp.getStep();
+
+    lbs.push_back(lb);
+    ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
+  }
+
+  // TODO: maybe a more efficient algorithm.
+  auto operandNum = bound.getNumOperands();
+  SmallVector<int64_t, 16> results;
+  for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
+    SmallVector<AffineExpr, 4> replacements;
+    for (unsigned pos = 0; pos < operandNum; ++pos) {
+      if (i >> pos % 2 == 0)
+        replacements.push_back(getAffineConstantExpr(lbs[pos], context));
+      else
+        replacements.push_back(getAffineConstantExpr(ubs[pos], context));
+    }
+    auto newExpr =
+        bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
+
+    if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
+      results.push_back(constExpr.getValue());
+    else
+      return Optional<std::pair<int64_t, int64_t>>();
+  }
+
+  auto minmax = std::minmax_element(results.begin(), results.end());
+  return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
+}
+
 AffineMap scalehls::getLayoutMap(MemRefType memrefType) {
  // Check whether the memref has layout map.
  auto memrefMaps = memrefType.getAffineMaps();
--- a/lib/Transforms/MultipleLevelDSE.cpp
+++ b/lib/Transforms/MultipleLevelDSE.cpp
@ -96,7 +96,7 @@ void HLSCppOptimizer::applyLoopTilingStrategy(
  applyPatternsAndFoldGreedily(targetFunc, patterns);

  // Apply general optimizations and array partition.
-  // applyMergeAffineIf(targetFunc);
+  applyMergeAffineIf(targetFunc);
  applyAffineStoreForward(targetFunc, builder);
  applySimplifyMemrefAccess(targetFunc);
  applyArrayPartition(targetFunc, builder);
@ -290,7 +290,7 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
  for (auto &band : targetBands) {
    applyAffineLoopPerfection(band.back(), builder);
    applyAffineLoopOrderOpt(band);
-    // applyRemoveVariableBound(band.front(), builder);
+    applyRemoveVariableBound(band.front(), builder);
  }

  // Estimate the current latency.
--- a/lib/Transforms/RemoveVariableBound.cpp
+++ b/lib/Transforms/RemoveVariableBound.cpp
@ -6,6 +6,7 @@

 #include "mlir/IR/IntegerSet.h"
 #include "mlir/Transforms/LoopUtils.h"
+#include "scalehls/Analysis/Utils.h"
 #include "scalehls/Transforms/Passes.h"

 using namespace mlir;
@ -25,58 +26,6 @@ struct RemoveVariableBound
 };
 } // namespace

-static Optional<std::pair<int64_t, int64_t>>
-getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
-  // For now, we can only handle one result affine bound.
-  if (bound.getMap().getNumResults() != 1)
-    return Optional<std::pair<int64_t, int64_t>>();
-
-  SmallVector<int64_t, 4> lbs;
-  SmallVector<int64_t, 4> ubs;
-  for (auto operand : bound.getOperands()) {
-    // Only if the affine bound operands are induction variable, the calculation
-    // is possible.
-    if (!isForInductionVar(operand))
-      return Optional<std::pair<int64_t, int64_t>>();
-
-    // Only if the owner for op of the induction variable has constant bound,
-    // the calculation is possible.
-    auto ifOp = getForInductionVarOwner(operand);
-    if (!ifOp.hasConstantBounds())
-      return Optional<std::pair<int64_t, int64_t>>();
-
-    auto lb = ifOp.getConstantLowerBound();
-    auto ub = ifOp.getConstantUpperBound();
-    auto step = ifOp.getStep();
-
-    lbs.push_back(lb);
-    ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
-  }
-
-  // TODO: maybe a more efficient algorithm.
-  auto operandNum = bound.getNumOperands();
-  SmallVector<int64_t, 16> results;
-  for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
-    SmallVector<AffineExpr, 4> replacements;
-    for (unsigned pos = 0; pos < operandNum; ++pos) {
-      if (i >> pos % 2 == 0)
-        replacements.push_back(getAffineConstantExpr(lbs[pos], context));
-      else
-        replacements.push_back(getAffineConstantExpr(ubs[pos], context));
-    }
-    auto newExpr =
-        bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
-
-    if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
-      results.push_back(constExpr.getValue());
-    else
-      return Optional<std::pair<int64_t, int64_t>>();
-  }
-
-  auto minmax = std::minmax_element(results.begin(), results.end());
-  return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
-}
-
 /// Apply remove variable bound to all inner loops of the input loop.
 bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
  SmallVector<AffineForOp, 4> nestedLoops;
@ -96,8 +45,7 @@ bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
    if (!loop.hasConstantUpperBound()) {
      // TODO: support variable upper bound with more than one result in the
      // getBoundOfAffineBound() method.
-      if (auto bound = getBoundOfAffineBound(loop.getUpperBound(),
-                                             builder.getContext())) {
+      if (auto bound = getBoundOfAffineBound(loop.getUpperBound())) {
        // Collect all components for creating AffineIf operation.
        auto upperMap = loop.getUpperBoundMap();
        auto ifExpr = upperMap.getResult(0) -