[QoREstimation] factor out QoREstimation.h (#20); [MultipleLevelDSE] start of auto dse

2021-01-08 02:20:11 -06:00 · 2021-01-08 02:20:11 -06:00 · ba3ca07833
parent 9f31dd663d
commit ba3ca07833
10 changed files with 240 additions and 206 deletions
--- a/include/Analysis/QoREstimation.h
+++ b/include/Analysis/QoREstimation.h
@ -0,0 +1,124 @@
 //===------------------------------------------------------------*- C++ -*-===//
 //
 //===----------------------------------------------------------------------===//
 #ifndef SCALEHLS_ANALYSIS_QORESTIMATION_H
 #define SCALEHLS_ANALYSIS_QORESTIMATION_H
 #include "Analysis/Utils.h"
 #include "Dialect/HLSCpp/Visitor.h"
 #include "INIReader.h"
 namespace mlir {
 namespace scalehls {
 using LatencyMap = llvm::StringMap<int64_t>;
 void getLatencyMap(INIReader spec, LatencyMap &latencyMap);
 class HLSCppEstimator
    : public HLSCppVisitorBase<HLSCppEstimator, bool, int64_t>,
      public HLSCppAnalysisBase {
 public:
  explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
      : HLSCppAnalysisBase(OpBuilder(func)), func(func),
        latencyMap(latencyMap) {
    getFuncDependencies();
  }
  // For storing all dependencies indexed by the dependency source operation.
  using Depends = SmallVector<Operation *, 16>;
  using DependsMap = DenseMap<Operation *, Depends>;
  // Indicate the unoccupied memory ports number.
  struct PortInfo {
    unsigned rdPort;
    unsigned wrPort;
    unsigned rdwrPort;
    PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
        : rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
  };
  // For storing ports number of all partitions indexed by the memref.
  using Ports = SmallVector<PortInfo, 16>;
  using PortsMap = DenseMap<Value, Ports>;
  // For storing PortsMap indexed by the scheduling level.
  using PortsMapDict = DenseMap<int64_t, PortsMap>;
  // For storing the DSP resource utilization indexed by the schedule level.
  using ResourceMap = DenseMap<int64_t, int64_t>;
  /// Collect all dependencies detected in the function.
  void getFuncDependencies();
  void setScheduleValue(Operation *op, int64_t begin, int64_t end) {
    setAttrValue(op, "schedule_begin", begin);
    setAttrValue(op, "schedule_end", end);
  }
  using HLSCppVisitorBase::visitOp;
  bool visitUnhandledOp(Operation *op, int64_t begin) {
    // Default latency of any unhandled operation is 0.
    setScheduleValue(op, begin, begin);
    return true;
  }
  /// LoadOp and StoreOp related methods.
  int64_t getPartitionIndex(Operation *op);
  void estimateLoadStore(Operation *op, int64_t begin);
  bool visitOp(AffineLoadOp op, int64_t begin) {
    return estimateLoadStore(op, begin), true;
  }
  bool visitOp(AffineStoreOp op, int64_t begin) {
    return estimateLoadStore(op, begin), true;
  }
  bool visitOp(LoadOp op, int64_t begin) {
    setScheduleValue(op, begin, begin + 2);
    return true;
  }
  bool visitOp(StoreOp op, int64_t begin) {
    setScheduleValue(op, begin, begin + 1);
    return true;
  }
  /// AffineForOp related methods.
  // unsigned getOpMinII(AffineForOp forOp);
  int64_t getResMinII(MemAccessesMap &map);
  int64_t getDepMinII(AffineForOp forOp, MemAccessesMap &map);
  bool visitOp(AffineForOp op, int64_t begin);
  /// Other operation handlers.
  bool visitOp(AffineIfOp op, int64_t begin);
  bool visitOp(CallOp op, int64_t begin);
  /// Handle operations with profiled latency.
 #define HANDLE(OPTYPE, KEYNAME)                                                \
  bool visitOp(OPTYPE op, int64_t begin) {                                     \
    setScheduleValue(op, begin, begin + latencyMap[KEYNAME] + 1);              \
    return true;                                                               \
  }
  HANDLE(AddFOp, "fadd");
  HANDLE(MulFOp, "fmul");
  HANDLE(DivFOp, "fdiv");
  HANDLE(CmpFOp, "fcmp");
 #undef HANDLE
  /// Block scheduler and estimator.
  int64_t getResourceMap(Block &block, ResourceMap &addFMap,
                         ResourceMap &mulFMap);
  int64_t estimateResource(Block &block);
  Optional<std::pair<int64_t, int64_t>> estimateBlock(Block &block,
                                                      int64_t begin);
  void reverseSchedule();
  void estimateFunc();
  FuncOp &func;
  DependsMap dependsMap;
  PortsMapDict portsMapDict;
  LatencyMap &latencyMap;
 };
 } // namespace scalehls
 } // namespace mlir
 #endif // SCALEHLS_ANALYSIS_QORESTIMATION_H
--- a/include/Analysis/Utils.h
+++ b/include/Analysis/Utils.h
@ -71,9 +71,6 @@ using MemAccessesMap = DenseMap<Value, MemAccesses>;
 void getMemAccessesMap(Block &block, MemAccessesMap &map,
                       bool includeCalls = false);
 Optional<std::pair<int64_t, int64_t>>
 getBoundOfAffineBound(AffineBound bound, MLIRContext *context);
 // Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
 // AffineIfOp is transparent.
 Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
--- a/include/Transforms/Passes.h
+++ b/include/Transforms/Passes.h
@ -44,16 +44,16 @@ bool applySimplifyMemrefAccess(FuncOp func);
 /// Pragma optimization passes.
 std::unique_ptr<Pass> createLoopPipeliningPass();
 std::unique_ptr<Pass> createArrayPartitionPass();
-std::unique_ptr<Pass> createPragmaDSEPass();
+std::unique_ptr<Pass> createMultipleLevelDSEPass();
 /// Loop optimization passes.
 std::unique_ptr<Pass> createAffineLoopPerfectionPass();
 std::unique_ptr<Pass> createPartialAffineLoopTilePass();
 std::unique_ptr<Pass> createRemoveVariableBoundPass();
 std::unique_ptr<Pass> createPartialAffineLoopTilePass();
 /// Dataflow optimization passes.
 std::unique_ptr<Pass> createSplitFunctionPass();
 std::unique_ptr<Pass> createLegalizeDataflowPass();
 std::unique_ptr<Pass> createSplitFunctionPass();
 /// Bufferization passes.
 std::unique_ptr<Pass> createHLSKernelBufferizePass();
--- a/include/Transforms/Passes.td
+++ b/include/Transforms/Passes.td
@ -37,17 +37,23 @@ def LoopPipelining : Pass<"loop-pipelining", "FuncOp"> {
  ];
 }
-def PragmaDSE : Pass<"pragma-dse", "ModuleOp"> {
+def MultipleLevelDSE : Pass<"multiple-level-dse", "ModuleOp"> {
-  let summary = "Optimize pragma configurations";
+  let summary = "Optimize HLS design at multiple abstraction level";
  let description = [{
-    This pragma-dse pass will automatically tune HLS pragma insertion and
+    This multiple-level-dse pass will automatically conduct the design space
-    configuration for performance and area optimization. By calling methods
+    exploration (DSE) across multiple abstraction levels. By calling methods
    provided by qor-estimation, this pass is able to rapidly obtain the QoR
-    estimation of the current design point, and feed it back to the design space
+    estimation of the current design point, and feed it back to the DSE engine
-    exploration engine for an efficient convergence.
+    for an efficient optimization convergence.
  }];
-  let constructor = "mlir::scalehls::createPragmaDSEPass()";
+  let constructor = "mlir::scalehls::createMultipleLevelDSEPass()";
  let options = [
    Option<"targetSpec", "target-spec", "std::string",
           /*default=*/"\"../config/target-spec.ini\"", 
           "File path: target backend specifications and configurations">
  ];
 }
 //===----------------------------------------------------------------------===//
--- a/lib/Analysis/QoREstimation.cpp
+++ b/lib/Analysis/QoREstimation.cpp
@ -2,10 +2,8 @@
 //
 //===----------------------------------------------------------------------===//
 #include "Analysis/QoREstimation.h"
 #include "Analysis/Passes.h"
 #include "Analysis/Utils.h"
 #include "Dialect/HLSCpp/Visitor.h"
 #include "INIReader.h"
 #include "mlir/Analysis/AffineAnalysis.h"
 #include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/LoopAnalysis.h"
@ -18,117 +16,10 @@ using namespace mlir;
 using namespace scalehls;
 using namespace hlscpp;
 using LatencyMap = llvm::StringMap<int64_t>;
 //===----------------------------------------------------------------------===//
-// HLSCppEstimator Class
+// Initialization Methods
 //===----------------------------------------------------------------------===//
 namespace {
 class HLSCppEstimator
    : public HLSCppVisitorBase<HLSCppEstimator, bool, int64_t>,
      public HLSCppAnalysisBase {
 public:
  explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
      : HLSCppAnalysisBase(OpBuilder(func)), func(func),
        latencyMap(latencyMap) {
    getFuncDependencies();
  }
  // For storing all dependencies indexed by the dependency source operation.
  using Depends = SmallVector<Operation *, 16>;
  using DependsMap = DenseMap<Operation *, Depends>;
  // Indicate the unoccupied memory ports number.
  struct PortInfo {
    unsigned rdPort;
    unsigned wrPort;
    unsigned rdwrPort;
    PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
        : rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
  };
  // For storing ports number of all partitions indexed by the memref.
  using Ports = SmallVector<PortInfo, 16>;
  using PortsMap = DenseMap<Value, Ports>;
  // For storing PortsMap indexed by the scheduling level.
  using PortsMapDict = DenseMap<int64_t, PortsMap>;
  // For storing the DSP resource utilization indexed by the schedule level.
  using ResourceMap = DenseMap<int64_t, int64_t>;
  /// Collect all dependencies detected in the function.
  void getFuncDependencies();
  void setScheduleValue(Operation *op, int64_t begin, int64_t end) {
    setAttrValue(op, "schedule_begin", begin);
    setAttrValue(op, "schedule_end", end);
  }
  using HLSCppVisitorBase::visitOp;
  bool visitUnhandledOp(Operation *op, int64_t begin) {
    // Default latency of any unhandled operation is 0.
    setScheduleValue(op, begin, begin);
    return true;
  }
  /// LoadOp and StoreOp related methods.
  int64_t getPartitionIndex(Operation *op);
  void estimateLoadStore(Operation *op, int64_t begin);
  bool visitOp(AffineLoadOp op, int64_t begin) {
    return estimateLoadStore(op, begin), true;
  }
  bool visitOp(AffineStoreOp op, int64_t begin) {
    return estimateLoadStore(op, begin), true;
  }
  bool visitOp(LoadOp op, int64_t begin) {
    setScheduleValue(op, begin, begin + 2);
    return true;
  }
  bool visitOp(StoreOp op, int64_t begin) {
    setScheduleValue(op, begin, begin + 1);
    return true;
  }
  /// AffineForOp related methods.
  // unsigned getOpMinII(AffineForOp forOp);
  int64_t getResMinII(MemAccessesMap &map);
  int64_t getDepMinII(AffineForOp forOp, MemAccessesMap &map);
  bool visitOp(AffineForOp op, int64_t begin);
  /// Other operation handlers.
  bool visitOp(AffineIfOp op, int64_t begin);
  bool visitOp(CallOp op, int64_t begin);
  /// Handle operations with profiled latency.
 #define HANDLE(OPTYPE, KEYNAME)                                                \
  bool visitOp(OPTYPE op, int64_t begin) {                                     \
    setScheduleValue(op, begin, begin + latencyMap[KEYNAME] + 1);              \
    return true;                                                               \
  }
  HANDLE(AddFOp, "fadd");
  HANDLE(MulFOp, "fmul");
  HANDLE(DivFOp, "fdiv");
  HANDLE(CmpFOp, "fcmp");
 #undef HANDLE
  /// Block scheduler and estimator.
  int64_t getResourceMap(Block &block, ResourceMap &addFMap,
                         ResourceMap &mulFMap);
  int64_t estimateResource(Block &block);
  Optional<std::pair<int64_t, int64_t>> estimateBlock(Block &block,
                                                      int64_t begin);
  void reverseSchedule();
  void estimateFunc();
  FuncOp &func;
  DependsMap dependsMap;
  PortsMapDict portsMapDict;
  LatencyMap &latencyMap;
 };
 } // namespace
 /// Collect all dependencies detected in the function.
 void HLSCppEstimator::getFuncDependencies() {
  MemAccessesMap map;
@ -812,8 +703,9 @@ void HLSCppEstimator::estimateFunc() {
 // Entry of scalehls-opt
 //===----------------------------------------------------------------------===//
-static void getLatencyMap(INIReader &spec, std::string freq,
+void scalehls::getLatencyMap(INIReader spec, LatencyMap &latencyMap) {
-                          LatencyMap &latencyMap) {
+  auto freq = spec.Get("specification", "frequency", "100MHz");
  latencyMap["fadd"] = spec.GetInteger(freq, "fadd", 4);
  latencyMap["fmul"] = spec.GetInteger(freq, "fmul", 3);
  latencyMap["fdiv"] = spec.GetInteger(freq, "fdiv", 15);
@ -826,14 +718,12 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
    // Read configuration file.
    INIReader spec(targetSpec);
    if (spec.ParseError())
-      emitError(getOperation().getLoc(),
+      emitError(getOperation().getLoc(), "error: target spec file parse fail, "
-                "error: target spec file parse fail, please refer to "
+                                         "please pass in correct file path\n");
                "--help option and pass in correct file path\n");
    // Collect profiling latency data.
    auto freq = spec.Get("specification", "frequency", "100MHz");
    LatencyMap latencyMap;
-    getLatencyMap(spec, freq, latencyMap);
+    getLatencyMap(spec, latencyMap);
    // Estimate performance and resource utilization.
    for (auto func : getOperation().getOps<FuncOp>())
--- a/lib/Analysis/Utils.cpp
+++ b/lib/Analysis/Utils.cpp
@ -31,58 +31,6 @@ void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
  }
 }
 Optional<std::pair<int64_t, int64_t>>
 scalehls::getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
  // For now, we can only handle one result affine bound.
  if (bound.getMap().getNumResults() != 1)
    return Optional<std::pair<int64_t, int64_t>>();
  SmallVector<int64_t, 4> lbs;
  SmallVector<int64_t, 4> ubs;
  for (auto operand : bound.getOperands()) {
    // Only if the affine bound operands are induction variable, the calculation
    // is possible.
    if (!isForInductionVar(operand))
      return Optional<std::pair<int64_t, int64_t>>();
    // Only if the owner for op of the induction variable has constant bound,
    // the calculation is possible.
    auto ifOp = getForInductionVarOwner(operand);
    if (!ifOp.hasConstantBounds())
      return Optional<std::pair<int64_t, int64_t>>();
    auto lb = ifOp.getConstantLowerBound();
    auto ub = ifOp.getConstantUpperBound();
    auto step = ifOp.getStep();
    lbs.push_back(lb);
    ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
  }
  // TODO: maybe a more efficient algorithm.
  auto operandNum = bound.getNumOperands();
  SmallVector<int64_t, 16> results;
  for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
    SmallVector<AffineExpr, 4> replacements;
    for (unsigned pos = 0; pos < operandNum; ++pos) {
      if (i >> pos % 2 == 0)
        replacements.push_back(getAffineConstantExpr(lbs[pos], context));
      else
        replacements.push_back(getAffineConstantExpr(ubs[pos], context));
    }
    auto newExpr =
        bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
    if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
      results.push_back(constExpr.getValue());
    else
      return Optional<std::pair<int64_t, int64_t>>();
  }
  auto minmax = std::minmax_element(results.begin(), results.end());
  return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
 }
 // Check if the lhsOp and rhsOp is at the same scheduling level. In this
 // check, AffineIfOp is transparent.
 Optional<std::pair<Operation *, Operation *>>
--- a/lib/Transforms/MultipleLevelDSE.cpp
+++ b/lib/Transforms/MultipleLevelDSE.cpp
@ -0,0 +1,34 @@
 //===------------------------------------------------------------*- C++ -*-===//
 //
 //===----------------------------------------------------------------------===//
 #include "Analysis/QoREstimation.h"
 #include "Dialect/HLSCpp/HLSCpp.h"
 #include "Transforms/Passes.h"
 using namespace std;
 using namespace mlir;
 using namespace scalehls;
 using namespace hlscpp;
 namespace {
 struct MultipleLevelDSE : public MultipleLevelDSEBase<MultipleLevelDSE> {
  void runOnOperation() override;
 };
 } // namespace
 void MultipleLevelDSE::runOnOperation() {
  // Read configuration file.
  INIReader spec(targetSpec);
  if (spec.ParseError())
    emitError(getOperation().getLoc(), "error: target spec file parse fail, "
                                       "please pass in correct file path\n");
  // Collect profiling latency data.
  LatencyMap latencyMap;
  getLatencyMap(spec, latencyMap);
 }
 std::unique_ptr<mlir::Pass> scalehls::createMultipleLevelDSEPass() {
  return std::make_unique<MultipleLevelDSE>();
 }
--- a/lib/Transforms/PragmaDSE.cpp
+++ b/lib/Transforms/PragmaDSE.cpp
@ -1,22 +0,0 @@
 //===------------------------------------------------------------*- C++ -*-===//
 //
 //===----------------------------------------------------------------------===//
 #include "Analysis/Utils.h"
 #include "Dialect/HLSCpp/HLSCpp.h"
 #include "Transforms/Passes.h"
 using namespace std;
 using namespace mlir;
 using namespace scalehls;
 using namespace hlscpp;
 namespace {
 struct PragmaDSE : public PragmaDSEBase<PragmaDSE> {
  void runOnOperation() override {}
 };
 } // namespace
 std::unique_ptr<mlir::Pass> scalehls::createPragmaDSEPass() {
  return std::make_unique<PragmaDSE>();
 }
--- a/lib/Transforms/RemoveVariableBound.cpp
+++ b/lib/Transforms/RemoveVariableBound.cpp
@ -2,7 +2,6 @@
 //
 //===----------------------------------------------------------------------===//
 #include "Analysis/Utils.h"
 #include "Transforms/Passes.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/Transforms/LoopUtils.h"
@ -25,6 +24,58 @@ struct RemoveVariableBound
 };
 } // namespace
 static Optional<std::pair<int64_t, int64_t>>
 getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
  // For now, we can only handle one result affine bound.
  if (bound.getMap().getNumResults() != 1)
    return Optional<std::pair<int64_t, int64_t>>();
  SmallVector<int64_t, 4> lbs;
  SmallVector<int64_t, 4> ubs;
  for (auto operand : bound.getOperands()) {
    // Only if the affine bound operands are induction variable, the calculation
    // is possible.
    if (!isForInductionVar(operand))
      return Optional<std::pair<int64_t, int64_t>>();
    // Only if the owner for op of the induction variable has constant bound,
    // the calculation is possible.
    auto ifOp = getForInductionVarOwner(operand);
    if (!ifOp.hasConstantBounds())
      return Optional<std::pair<int64_t, int64_t>>();
    auto lb = ifOp.getConstantLowerBound();
    auto ub = ifOp.getConstantUpperBound();
    auto step = ifOp.getStep();
    lbs.push_back(lb);
    ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
  }
  // TODO: maybe a more efficient algorithm.
  auto operandNum = bound.getNumOperands();
  SmallVector<int64_t, 16> results;
  for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
    SmallVector<AffineExpr, 4> replacements;
    for (unsigned pos = 0; pos < operandNum; ++pos) {
      if (i >> pos % 2 == 0)
        replacements.push_back(getAffineConstantExpr(lbs[pos], context));
      else
        replacements.push_back(getAffineConstantExpr(ubs[pos], context));
    }
    auto newExpr =
        bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
    if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
      results.push_back(constExpr.getValue());
    else
      return Optional<std::pair<int64_t, int64_t>>();
  }
  auto minmax = std::minmax_element(results.begin(), results.end());
  return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
 }
 /// Apply remove variable bound to all inner loops of the input loop.
 bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
  SmallVector<AffineForOp, 4> nestedLoops;
--- a/test/Transforms/test_multiple_level_dse.mlir
+++ b/test/Transforms/test_multiple_level_dse.mlir
@ -0,0 +1,6 @@
 // RUN: scalehls-opt -multiple-level-dse %s | FileCheck %s
 // CHECK-LABEL: func @test_for
 func @test_for() {
  return
 }