[QoREstimation] factor out QoREstimation.h (#20); [MultipleLevelDSE] start of auto dse

2021-01-08 02:20:11 -06:00 · 2021-01-08 02:20:11 -06:00 · ba3ca07833
parent 9f31dd663d
commit ba3ca07833
10 changed files with 240 additions and 206 deletions
--- a/include/Analysis/QoREstimation.h
+++ b/include/Analysis/QoREstimation.h
@ -0,0 +1,124 @@
+//===------------------------------------------------------------*- C++ -*-===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCALEHLS_ANALYSIS_QORESTIMATION_H
+#define SCALEHLS_ANALYSIS_QORESTIMATION_H
+
+#include "Analysis/Utils.h"
+#include "Dialect/HLSCpp/Visitor.h"
+#include "INIReader.h"
+
+namespace mlir {
+namespace scalehls {
+
+using LatencyMap = llvm::StringMap<int64_t>;
+void getLatencyMap(INIReader spec, LatencyMap &latencyMap);
+
+class HLSCppEstimator
+    : public HLSCppVisitorBase<HLSCppEstimator, bool, int64_t>,
+      public HLSCppAnalysisBase {
+public:
+  explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
+      : HLSCppAnalysisBase(OpBuilder(func)), func(func),
+        latencyMap(latencyMap) {
+    getFuncDependencies();
+  }
+
+  // For storing all dependencies indexed by the dependency source operation.
+  using Depends = SmallVector<Operation *, 16>;
+  using DependsMap = DenseMap<Operation *, Depends>;
+
+  // Indicate the unoccupied memory ports number.
+  struct PortInfo {
+    unsigned rdPort;
+    unsigned wrPort;
+    unsigned rdwrPort;
+
+    PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
+        : rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
+  };
+
+  // For storing ports number of all partitions indexed by the memref.
+  using Ports = SmallVector<PortInfo, 16>;
+  using PortsMap = DenseMap<Value, Ports>;
+  // For storing PortsMap indexed by the scheduling level.
+  using PortsMapDict = DenseMap<int64_t, PortsMap>;
+
+  // For storing the DSP resource utilization indexed by the schedule level.
+  using ResourceMap = DenseMap<int64_t, int64_t>;
+
+  /// Collect all dependencies detected in the function.
+  void getFuncDependencies();
+
+  void setScheduleValue(Operation *op, int64_t begin, int64_t end) {
+    setAttrValue(op, "schedule_begin", begin);
+    setAttrValue(op, "schedule_end", end);
+  }
+
+  using HLSCppVisitorBase::visitOp;
+  bool visitUnhandledOp(Operation *op, int64_t begin) {
+    // Default latency of any unhandled operation is 0.
+    setScheduleValue(op, begin, begin);
+    return true;
+  }
+
+  /// LoadOp and StoreOp related methods.
+  int64_t getPartitionIndex(Operation *op);
+  void estimateLoadStore(Operation *op, int64_t begin);
+  bool visitOp(AffineLoadOp op, int64_t begin) {
+    return estimateLoadStore(op, begin), true;
+  }
+  bool visitOp(AffineStoreOp op, int64_t begin) {
+    return estimateLoadStore(op, begin), true;
+  }
+  bool visitOp(LoadOp op, int64_t begin) {
+    setScheduleValue(op, begin, begin + 2);
+    return true;
+  }
+  bool visitOp(StoreOp op, int64_t begin) {
+    setScheduleValue(op, begin, begin + 1);
+    return true;
+  }
+
+  /// AffineForOp related methods.
+  // unsigned getOpMinII(AffineForOp forOp);
+  int64_t getResMinII(MemAccessesMap &map);
+  int64_t getDepMinII(AffineForOp forOp, MemAccessesMap &map);
+  bool visitOp(AffineForOp op, int64_t begin);
+
+  /// Other operation handlers.
+  bool visitOp(AffineIfOp op, int64_t begin);
+  bool visitOp(CallOp op, int64_t begin);
+
+  /// Handle operations with profiled latency.
+#define HANDLE(OPTYPE, KEYNAME)                                                \
+  bool visitOp(OPTYPE op, int64_t begin) {                                     \
+    setScheduleValue(op, begin, begin + latencyMap[KEYNAME] + 1);              \
+    return true;                                                               \
+  }
+  HANDLE(AddFOp, "fadd");
+  HANDLE(MulFOp, "fmul");
+  HANDLE(DivFOp, "fdiv");
+  HANDLE(CmpFOp, "fcmp");
+#undef HANDLE
+
+  /// Block scheduler and estimator.
+  int64_t getResourceMap(Block &block, ResourceMap &addFMap,
+                         ResourceMap &mulFMap);
+  int64_t estimateResource(Block &block);
+  Optional<std::pair<int64_t, int64_t>> estimateBlock(Block &block,
+                                                      int64_t begin);
+  void reverseSchedule();
+  void estimateFunc();
+
+  FuncOp &func;
+  DependsMap dependsMap;
+  PortsMapDict portsMapDict;
+  LatencyMap &latencyMap;
+};
+
+} // namespace scalehls
+} // namespace mlir
+
+#endif // SCALEHLS_ANALYSIS_QORESTIMATION_H
--- a/include/Analysis/Utils.h
+++ b/include/Analysis/Utils.h
@ -71,9 +71,6 @@ using MemAccessesMap = DenseMap<Value, MemAccesses>;
 void getMemAccessesMap(Block &block, MemAccessesMap &map,
                       bool includeCalls = false);

-Optional<std::pair<int64_t, int64_t>>
-getBoundOfAffineBound(AffineBound bound, MLIRContext *context);
-
 // Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
 // AffineIfOp is transparent.
 Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
--- a/include/Transforms/Passes.h
+++ b/include/Transforms/Passes.h
@ -44,16 +44,16 @@ bool applySimplifyMemrefAccess(FuncOp func);
 /// Pragma optimization passes.
 std::unique_ptr<Pass> createLoopPipeliningPass();
 std::unique_ptr<Pass> createArrayPartitionPass();
-std::unique_ptr<Pass> createPragmaDSEPass();
+std::unique_ptr<Pass> createMultipleLevelDSEPass();

 /// Loop optimization passes.
 std::unique_ptr<Pass> createAffineLoopPerfectionPass();
-std::unique_ptr<Pass> createPartialAffineLoopTilePass();
 std::unique_ptr<Pass> createRemoveVariableBoundPass();
+std::unique_ptr<Pass> createPartialAffineLoopTilePass();

 /// Dataflow optimization passes.
-std::unique_ptr<Pass> createSplitFunctionPass();
 std::unique_ptr<Pass> createLegalizeDataflowPass();
+std::unique_ptr<Pass> createSplitFunctionPass();

 /// Bufferization passes.
 std::unique_ptr<Pass> createHLSKernelBufferizePass();
--- a/include/Transforms/Passes.td
+++ b/include/Transforms/Passes.td
@ -37,17 +37,23 @@ def LoopPipelining : Pass<"loop-pipelining", "FuncOp"> {
  ];
 }

-def PragmaDSE : Pass<"pragma-dse", "ModuleOp"> {
-  let summary = "Optimize pragma configurations";
+def MultipleLevelDSE : Pass<"multiple-level-dse", "ModuleOp"> {
+  let summary = "Optimize HLS design at multiple abstraction level";
  let description = [{
-    This pragma-dse pass will automatically tune HLS pragma insertion and
-    configuration for performance and area optimization. By calling methods
+    This multiple-level-dse pass will automatically conduct the design space
+    exploration (DSE) across multiple abstraction levels. By calling methods
    provided by qor-estimation, this pass is able to rapidly obtain the QoR
-    estimation of the current design point, and feed it back to the design space
-    exploration engine for an efficient convergence.
+    estimation of the current design point, and feed it back to the DSE engine
+    for an efficient optimization convergence.
  }];

-  let constructor = "mlir::scalehls::createPragmaDSEPass()";
+  let constructor = "mlir::scalehls::createMultipleLevelDSEPass()";
+
+  let options = [
+    Option<"targetSpec", "target-spec", "std::string",
+           /*default=*/"\"../config/target-spec.ini\"", 
+           "File path: target backend specifications and configurations">
+  ];
 }

 //===----------------------------------------------------------------------===//
--- a/lib/Analysis/QoREstimation.cpp
+++ b/lib/Analysis/QoREstimation.cpp
@ -2,10 +2,8 @@
 //
 //===----------------------------------------------------------------------===//

+#include "Analysis/QoREstimation.h"
 #include "Analysis/Passes.h"
-#include "Analysis/Utils.h"
-#include "Dialect/HLSCpp/Visitor.h"
-#include "INIReader.h"
 #include "mlir/Analysis/AffineAnalysis.h"
 #include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/LoopAnalysis.h"
@ -18,117 +16,10 @@ using namespace mlir;
 using namespace scalehls;
 using namespace hlscpp;

-using LatencyMap = llvm::StringMap<int64_t>;
-
 //===----------------------------------------------------------------------===//
-// HLSCppEstimator Class
+// Initialization Methods
 //===----------------------------------------------------------------------===//

-namespace {
-class HLSCppEstimator
-    : public HLSCppVisitorBase<HLSCppEstimator, bool, int64_t>,
-      public HLSCppAnalysisBase {
-public:
-  explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
-      : HLSCppAnalysisBase(OpBuilder(func)), func(func),
-        latencyMap(latencyMap) {
-    getFuncDependencies();
-  }
-
-  // For storing all dependencies indexed by the dependency source operation.
-  using Depends = SmallVector<Operation *, 16>;
-  using DependsMap = DenseMap<Operation *, Depends>;
-
-  // Indicate the unoccupied memory ports number.
-  struct PortInfo {
-    unsigned rdPort;
-    unsigned wrPort;
-    unsigned rdwrPort;
-
-    PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
-        : rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
-  };
-
-  // For storing ports number of all partitions indexed by the memref.
-  using Ports = SmallVector<PortInfo, 16>;
-  using PortsMap = DenseMap<Value, Ports>;
-  // For storing PortsMap indexed by the scheduling level.
-  using PortsMapDict = DenseMap<int64_t, PortsMap>;
-
-  // For storing the DSP resource utilization indexed by the schedule level.
-  using ResourceMap = DenseMap<int64_t, int64_t>;
-
-  /// Collect all dependencies detected in the function.
-  void getFuncDependencies();
-
-  void setScheduleValue(Operation *op, int64_t begin, int64_t end) {
-    setAttrValue(op, "schedule_begin", begin);
-    setAttrValue(op, "schedule_end", end);
-  }
-
-  using HLSCppVisitorBase::visitOp;
-  bool visitUnhandledOp(Operation *op, int64_t begin) {
-    // Default latency of any unhandled operation is 0.
-    setScheduleValue(op, begin, begin);
-    return true;
-  }
-
-  /// LoadOp and StoreOp related methods.
-  int64_t getPartitionIndex(Operation *op);
-  void estimateLoadStore(Operation *op, int64_t begin);
-  bool visitOp(AffineLoadOp op, int64_t begin) {
-    return estimateLoadStore(op, begin), true;
-  }
-  bool visitOp(AffineStoreOp op, int64_t begin) {
-    return estimateLoadStore(op, begin), true;
-  }
-  bool visitOp(LoadOp op, int64_t begin) {
-    setScheduleValue(op, begin, begin + 2);
-    return true;
-  }
-  bool visitOp(StoreOp op, int64_t begin) {
-    setScheduleValue(op, begin, begin + 1);
-    return true;
-  }
-
-  /// AffineForOp related methods.
-  // unsigned getOpMinII(AffineForOp forOp);
-  int64_t getResMinII(MemAccessesMap &map);
-  int64_t getDepMinII(AffineForOp forOp, MemAccessesMap &map);
-  bool visitOp(AffineForOp op, int64_t begin);
-
-  /// Other operation handlers.
-  bool visitOp(AffineIfOp op, int64_t begin);
-  bool visitOp(CallOp op, int64_t begin);
-
-  /// Handle operations with profiled latency.
-#define HANDLE(OPTYPE, KEYNAME)                                                \
-  bool visitOp(OPTYPE op, int64_t begin) {                                     \
-    setScheduleValue(op, begin, begin + latencyMap[KEYNAME] + 1);              \
-    return true;                                                               \
-  }
-  HANDLE(AddFOp, "fadd");
-  HANDLE(MulFOp, "fmul");
-  HANDLE(DivFOp, "fdiv");
-  HANDLE(CmpFOp, "fcmp");
-#undef HANDLE
-
-  /// Block scheduler and estimator.
-  int64_t getResourceMap(Block &block, ResourceMap &addFMap,
-                         ResourceMap &mulFMap);
-  int64_t estimateResource(Block &block);
-  Optional<std::pair<int64_t, int64_t>> estimateBlock(Block &block,
-                                                      int64_t begin);
-  void reverseSchedule();
-  void estimateFunc();
-
-  FuncOp &func;
-  DependsMap dependsMap;
-  PortsMapDict portsMapDict;
-  LatencyMap &latencyMap;
-};
-} // namespace
-
 /// Collect all dependencies detected in the function.
 void HLSCppEstimator::getFuncDependencies() {
  MemAccessesMap map;
@ -812,8 +703,9 @@ void HLSCppEstimator::estimateFunc() {
 // Entry of scalehls-opt
 //===----------------------------------------------------------------------===//

-static void getLatencyMap(INIReader &spec, std::string freq,
-                          LatencyMap &latencyMap) {
+void scalehls::getLatencyMap(INIReader spec, LatencyMap &latencyMap) {
+  auto freq = spec.Get("specification", "frequency", "100MHz");
+
  latencyMap["fadd"] = spec.GetInteger(freq, "fadd", 4);
  latencyMap["fmul"] = spec.GetInteger(freq, "fmul", 3);
  latencyMap["fdiv"] = spec.GetInteger(freq, "fdiv", 15);
@ -826,14 +718,12 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
    // Read configuration file.
    INIReader spec(targetSpec);
    if (spec.ParseError())
-      emitError(getOperation().getLoc(),
-                "error: target spec file parse fail, please refer to "
-                "--help option and pass in correct file path\n");
+      emitError(getOperation().getLoc(), "error: target spec file parse fail, "
+                                         "please pass in correct file path\n");

    // Collect profiling latency data.
-    auto freq = spec.Get("specification", "frequency", "100MHz");
    LatencyMap latencyMap;
-    getLatencyMap(spec, freq, latencyMap);
+    getLatencyMap(spec, latencyMap);

    // Estimate performance and resource utilization.
    for (auto func : getOperation().getOps<FuncOp>())
--- a/lib/Analysis/Utils.cpp
+++ b/lib/Analysis/Utils.cpp
@ -31,58 +31,6 @@ void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
  }
 }

-Optional<std::pair<int64_t, int64_t>>
-scalehls::getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
-  // For now, we can only handle one result affine bound.
-  if (bound.getMap().getNumResults() != 1)
-    return Optional<std::pair<int64_t, int64_t>>();
-
-  SmallVector<int64_t, 4> lbs;
-  SmallVector<int64_t, 4> ubs;
-  for (auto operand : bound.getOperands()) {
-    // Only if the affine bound operands are induction variable, the calculation
-    // is possible.
-    if (!isForInductionVar(operand))
-      return Optional<std::pair<int64_t, int64_t>>();
-
-    // Only if the owner for op of the induction variable has constant bound,
-    // the calculation is possible.
-    auto ifOp = getForInductionVarOwner(operand);
-    if (!ifOp.hasConstantBounds())
-      return Optional<std::pair<int64_t, int64_t>>();
-
-    auto lb = ifOp.getConstantLowerBound();
-    auto ub = ifOp.getConstantUpperBound();
-    auto step = ifOp.getStep();
-
-    lbs.push_back(lb);
-    ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
-  }
-
-  // TODO: maybe a more efficient algorithm.
-  auto operandNum = bound.getNumOperands();
-  SmallVector<int64_t, 16> results;
-  for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
-    SmallVector<AffineExpr, 4> replacements;
-    for (unsigned pos = 0; pos < operandNum; ++pos) {
-      if (i >> pos % 2 == 0)
-        replacements.push_back(getAffineConstantExpr(lbs[pos], context));
-      else
-        replacements.push_back(getAffineConstantExpr(ubs[pos], context));
-    }
-    auto newExpr =
-        bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
-
-    if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
-      results.push_back(constExpr.getValue());
-    else
-      return Optional<std::pair<int64_t, int64_t>>();
-  }
-
-  auto minmax = std::minmax_element(results.begin(), results.end());
-  return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
-}
-
 // Check if the lhsOp and rhsOp is at the same scheduling level. In this
 // check, AffineIfOp is transparent.
 Optional<std::pair<Operation *, Operation *>>
--- a/lib/Transforms/MultipleLevelDSE.cpp
+++ b/lib/Transforms/MultipleLevelDSE.cpp
@ -0,0 +1,34 @@
+//===------------------------------------------------------------*- C++ -*-===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Analysis/QoREstimation.h"
+#include "Dialect/HLSCpp/HLSCpp.h"
+#include "Transforms/Passes.h"
+
+using namespace std;
+using namespace mlir;
+using namespace scalehls;
+using namespace hlscpp;
+
+namespace {
+struct MultipleLevelDSE : public MultipleLevelDSEBase<MultipleLevelDSE> {
+  void runOnOperation() override;
+};
+} // namespace
+
+void MultipleLevelDSE::runOnOperation() {
+  // Read configuration file.
+  INIReader spec(targetSpec);
+  if (spec.ParseError())
+    emitError(getOperation().getLoc(), "error: target spec file parse fail, "
+                                       "please pass in correct file path\n");
+
+  // Collect profiling latency data.
+  LatencyMap latencyMap;
+  getLatencyMap(spec, latencyMap);
+}
+
+std::unique_ptr<mlir::Pass> scalehls::createMultipleLevelDSEPass() {
+  return std::make_unique<MultipleLevelDSE>();
+}
--- a/lib/Transforms/PragmaDSE.cpp
+++ b/lib/Transforms/PragmaDSE.cpp
@ -1,22 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-//===----------------------------------------------------------------------===//
-
-#include "Analysis/Utils.h"
-#include "Dialect/HLSCpp/HLSCpp.h"
-#include "Transforms/Passes.h"
-
-using namespace std;
-using namespace mlir;
-using namespace scalehls;
-using namespace hlscpp;
-
-namespace {
-struct PragmaDSE : public PragmaDSEBase<PragmaDSE> {
-  void runOnOperation() override {}
-};
-} // namespace
-
-std::unique_ptr<mlir::Pass> scalehls::createPragmaDSEPass() {
-  return std::make_unique<PragmaDSE>();
-}
--- a/lib/Transforms/RemoveVariableBound.cpp
+++ b/lib/Transforms/RemoveVariableBound.cpp
@ -2,7 +2,6 @@
 //
 //===----------------------------------------------------------------------===//

-#include "Analysis/Utils.h"
 #include "Transforms/Passes.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/Transforms/LoopUtils.h"
@ -25,6 +24,58 @@ struct RemoveVariableBound
 };
 } // namespace

+static Optional<std::pair<int64_t, int64_t>>
+getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
+  // For now, we can only handle one result affine bound.
+  if (bound.getMap().getNumResults() != 1)
+    return Optional<std::pair<int64_t, int64_t>>();
+
+  SmallVector<int64_t, 4> lbs;
+  SmallVector<int64_t, 4> ubs;
+  for (auto operand : bound.getOperands()) {
+    // Only if the affine bound operands are induction variable, the calculation
+    // is possible.
+    if (!isForInductionVar(operand))
+      return Optional<std::pair<int64_t, int64_t>>();
+
+    // Only if the owner for op of the induction variable has constant bound,
+    // the calculation is possible.
+    auto ifOp = getForInductionVarOwner(operand);
+    if (!ifOp.hasConstantBounds())
+      return Optional<std::pair<int64_t, int64_t>>();
+
+    auto lb = ifOp.getConstantLowerBound();
+    auto ub = ifOp.getConstantUpperBound();
+    auto step = ifOp.getStep();
+
+    lbs.push_back(lb);
+    ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
+  }
+
+  // TODO: maybe a more efficient algorithm.
+  auto operandNum = bound.getNumOperands();
+  SmallVector<int64_t, 16> results;
+  for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
+    SmallVector<AffineExpr, 4> replacements;
+    for (unsigned pos = 0; pos < operandNum; ++pos) {
+      if (i >> pos % 2 == 0)
+        replacements.push_back(getAffineConstantExpr(lbs[pos], context));
+      else
+        replacements.push_back(getAffineConstantExpr(ubs[pos], context));
+    }
+    auto newExpr =
+        bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
+
+    if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
+      results.push_back(constExpr.getValue());
+    else
+      return Optional<std::pair<int64_t, int64_t>>();
+  }
+
+  auto minmax = std::minmax_element(results.begin(), results.end());
+  return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
+}
+
 /// Apply remove variable bound to all inner loops of the input loop.
 bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
  SmallVector<AffineForOp, 4> nestedLoops;
--- a/test/Transforms/test_multiple_level_dse.mlir
+++ b/test/Transforms/test_multiple_level_dse.mlir
@ -0,0 +1,6 @@
+// RUN: scalehls-opt -multiple-level-dse %s | FileCheck %s
+
+// CHECK-LABEL: func @test_for
+func @test_for() {
+  return
+}