[QoREstimation] support function call estimation, a known issue is CallOps inside of loops are not comprehensively considered; estimation refinement for multiple loops and select op (#5); fix related bugs

2020-12-21 19:02:39 -06:00 · 2020-12-21 19:02:39 -06:00 · 85c47e98e3
parent b0bf044c9a
commit 85c47e98e3
9 changed files with 174 additions and 120 deletions
--- a/README.md
+++ b/README.md
@ -36,23 +36,30 @@ After the installation and test successfully completed, you should be able to pl
 $ export PATH=$SCALEHLS_DIR/build/bin:$PATH
 $ cd $SCALEHLS_DIR

-$ # Benchmark generation, dataflow-level optimization, and bufferization.
+$ # Benchmark generation, dataflow-level optimization, HLSKernel lowering and bufferization.
 $ benchmark-gen -type "cnn" -config "config/cnn-config.ini" -number 1 \
    | scalehls-opt -legalize-dataflow -split-function \
    -hlskernel-bufferize -hlskernel-to-affine -func-bufferize -canonicalize

-$ # HLSKernel lowering, loop-level and pragma-level optimizations, and performance estimation.
+$ # Loop and pragma-level optimizations, performance estimation, and HLS C++ code generation.
 $ scalehls-opt test/Conversion/HLSKernelToAffine/test_gemm.mlir -hlskernel-to-affine \
-    -affine-loop-perfection -remove-var-loop-bound -partial-affine-loop-tile="tile-level=1 tile-size=4" \
+    -affine-loop-perfection -remove-var-loop-bound -affine-loop-normalize \
+    -partial-affine-loop-tile="tile-level=1 tile-size=4" \
    -convert-to-hlscpp="top-function=test_gemm" -loop-pipelining="pipeline-level=1" \
    -store-op-forward -simplify-memref-access -array-partition -cse -canonicalize \
-    -qor-estimation="target-spec=config/target-spec.ini"
+    -qor-estimation="target-spec=config/target-spec.ini" \
+    | scalehls-translate -emit-hlscpp

-$ # HLS C++ code generation.
-$ scalehls-opt test/Conversion/HLSKernelToAffine/test_gemm.mlir -hlskernel-to-affine \
+$ # Put them together.
+$ benchmark-gen -type "cnn" -config "config/cnn-config.ini" -number 1 \
+    | scalehls-opt -legalize-dataflow -split-function \
+    -hlskernel-bufferize -hlskernel-to-affine -func-bufferize \
+    -affine-loop-perfection -affine-loop-normalize \
+    -convert-to-hlscpp="top-function=auto_gen_cnn" \
+    -store-op-forward -simplify-memref-access -cse -canonicalize \
+    -qor-estimation="target-spec=config/target-spec.ini" \
    | scalehls-translate -emit-hlscpp
 ```
-You can go through `benchmark-gen`, `scalehls-opt`, and `scalehls-translate` to try the whole flow. We also provide some computation kernel level test cases located at `test/Conversion/HLSKernelToAffine/` for experimenting the ScaleHLS passes and tools.

 ## Ablation study
 If Vivado HLS (2019.1 tested) is installed on your machine, running the following script will report the HLS results for some benchmarks (around 8 hours on AMD Ryzen7 3800X for all 33 tests).
--- a/config/target-spec.ini
+++ b/config/target-spec.ini
@ -6,10 +6,8 @@ fadd=4
 fmul=3
 fdiv=15
 fcmp=1
-fselect=0

 fadd_delay=7.25
 fmul_delay=5.7
 fdiv_delay=6.07
 fcmp_delay=6.4
-fselect_delay=0.69
--- a/include/Analysis/Passes.td
+++ b/include/Analysis/Passes.td
@ -20,9 +20,7 @@ def QoREstimation : Pass<"qor-estimation", "ModuleOp"> {
  let options = [
    Option<"targetSpec", "target-spec", "std::string",
           /*default=*/"\"../config/target-spec.ini\"", 
-           "File path: target backend specifications and configurations">,
-    Option<"topFunction", "top-function", "std::string", /*default=*/"", 
-           "The top function for HLS synthesis">
+           "File path: target backend specifications and configurations">
  ];
 }

--- a/include/Analysis/Utils.h
+++ b/include/Analysis/Utils.h
@ -87,10 +87,10 @@ public:
 // Helper methods
 //===----------------------------------------------------------------------===//

-// For storing all affine memory access operations (including AffineLoadOp and
-// AffineStoreOp) indexed by the corresponding memref.
-using LoadStores = SmallVector<Operation *, 16>;
-using LoadStoresMap = DenseMap<Value, LoadStores>;
+// For storing all affine memory access operations (including CallOp,
+// AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref.
+using MemAccesses = SmallVector<Operation *, 16>;
+using MemAccessesMap = DenseMap<Value, MemAccesses>;

 // Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
 // AffineIfOp is transparent.
@ -110,8 +110,11 @@ hlscpp::ArrayOp getArrayOp(Value memref);

 hlscpp::ArrayOp getArrayOp(Operation *op);

-/// Collect all load and store operations in the block.
-void getLoadStoresMap(Block &block, LoadStoresMap &map);
+/// Collect all load and store operations in the block. The collected operations
+/// in the MemAccessesMap are ordered, which means an operation will never
+/// dominate another operation in front of it.
+void getMemAccessesMap(Block &block, MemAccessesMap &map,
+                       bool includeCalls = false);

 } // namespace scalehls
 } // namespace mlir
--- a/lib/Analysis/QoREstimation.cpp
+++ b/lib/Analysis/QoREstimation.cpp
@ -32,7 +32,7 @@ public:
  explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
      : HLSCppAnalysisBase(OpBuilder(func)), func(func),
        latencyMap(latencyMap) {
-    getFuncMemRefDepends();
+    getFuncDependencies();
  }

  // Indicate the unoccupied memory ports number.
@ -56,7 +56,8 @@ public:
  using Depends = SmallVector<Operation *, 16>;
  using DependsMap = DenseMap<Operation *, Depends>;

-  void getFuncMemRefDepends();
+  /// Collect all dependencies detected in the function.
+  void getFuncDependencies();

  void setScheduleValue(Operation *op, unsigned begin, unsigned end) {
    setAttrValue(op, "schedule_begin", begin);
@ -65,11 +66,12 @@ public:

  using HLSCppVisitorBase::visitOp;
  Optional<unsigned> visitUnhandledOp(Operation *op, unsigned begin) {
-    // Default latency of any unhandled operation is 1.
-    setScheduleValue(op, begin, begin + 1);
-    return begin + 1;
+    // Default latency of any unhandled operation is 0.
+    setScheduleValue(op, begin, begin);
+    return begin;
  }

+  /// LoadOp and StoreOp related methods.
  int32_t getPartitionIndex(Operation *op);
  unsigned getLoadStoreSchedule(Operation *op, unsigned begin);
  Optional<unsigned> visitOp(AffineLoadOp op, unsigned begin) {
@ -79,15 +81,15 @@ public:
    return getLoadStoreSchedule(op, begin);
  }

+  /// AffineForOp related methods.
  // unsigned getOpMinII(AffineForOp forOp);
-  unsigned getResMinII(LoadStoresMap &map);
-  unsigned getDepMinII(AffineForOp forOp, LoadStoresMap &map);
+  unsigned getResMinII(MemAccessesMap &map);
+  unsigned getDepMinII(AffineForOp forOp, MemAccessesMap &map);
  Optional<unsigned> visitOp(AffineForOp op, unsigned begin);

+  /// Other operation handlers.
  Optional<unsigned> visitOp(AffineIfOp op, unsigned begin);
-  Optional<unsigned> visitOp(ReturnOp op, unsigned begin);
-  Optional<unsigned> visitOp(AffineYieldOp op, unsigned begin);
-  Optional<unsigned> visitOp(ArrayOp op, unsigned begin);
+  Optional<unsigned> visitOp(CallOp op, unsigned begin);

  /// Handle operations with profiled latency.
 #define HANDLE(OPTYPE, KEYNAME)                                                \
@ -100,10 +102,11 @@ public:
  HANDLE(MulFOp, "fmul");
  HANDLE(DivFOp, "fdiv");
  HANDLE(CmpFOp, "fcmp");
-  HANDLE(SelectOp, "fselect");
 #undef HANDLE

-  Optional<unsigned> estimateBlock(Block &block, unsigned begin);
+  /// Block scheduler and estimator.
+  Optional<std::pair<unsigned, unsigned>> estimateBlock(Block &block,
+                                                        unsigned begin);
  void reverseSchedule();
  void estimateFunc();

@ -115,21 +118,27 @@ public:
 } // namespace

 /// Collect all dependencies detected in the function.
-void HLSCppEstimator::getFuncMemRefDepends() {
+void HLSCppEstimator::getFuncDependencies() {
  // TODO: This can be simplified by traversing each ArrayOp in the function.
-  LoadStoresMap loadStoresMap;
-  getLoadStoresMap(func.front(), loadStoresMap);
+  MemAccessesMap map;
+  getMemAccessesMap(func.front(), map, /*includeCallOp=*/true);

-  // Walk through all ArrayOp - LoadOp/StoreOp pairs.
-  for (auto &pair : loadStoresMap) {
-    auto loadStores = pair.second;
+  // Walk through all ArrayOp - LoadOp/StoreOp pairs, and find all memory
+  // related dependencies.
+  for (auto &pair : map) {
+    auto memAccesses = pair.second;

    // Walk through each pair of source and destination. Note that for intra
    // iteration dependencies, srcOp is always before dstOp.
    unsigned srcIndex = 1;
-    for (auto srcOp : loadStores) {
+    for (auto srcOp : memAccesses) {
+      for (auto dstOp : llvm::drop_begin(memAccesses, srcIndex)) {
+        if (isa<mlir::CallOp>(srcOp) || isa<mlir::CallOp>(dstOp)) {
+          // TODO: for now, all dstOps are considered to have dependencies to
+          // the srcOp if either the dstOp or srcOp is a CallOp.
+          dependsMap[srcOp].push_back(dstOp);
+        } else {
          MemRefAccess srcAccess(srcOp);
-      for (auto dstOp : llvm::drop_begin(loadStores, srcIndex)) {
          MemRefAccess dstAccess(dstOp);

          bool dependFlag = false;
@ -149,9 +158,24 @@ void HLSCppEstimator::getFuncMemRefDepends() {
          if (dependFlag)
            dependsMap[srcOp].push_back(dstOp);
        }
+      }
      srcIndex++;
    }
  }
+
+  // Walk through all loops in the function and establish dependencies. The
+  // rationale here is in Vivado HLS, a loop will always be dominated by another
+  // loop before it, even if no actual dependencies exist between them.
+  SmallVector<Operation *, 16> loops;
+  func.walk([&](AffineForOp loop) { loops.push_back(loop); });
+
+  unsigned loopIndex = 1;
+  for (auto srcLoop : loops) {
+    for (auto dstLoop : llvm::drop_begin(loops, loopIndex))
+      if (checkSameLevel(srcLoop, dstLoop))
+        dependsMap[srcLoop].push_back(dstLoop);
+    loopIndex++;
+  }
 }

 //===----------------------------------------------------------------------===//
@ -235,12 +259,6 @@ int32_t HLSCppEstimator::getPartitionIndex(Operation *op) {

 /// Schedule load/store operation honoring the memory ports number limitation.
 unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin) {
-  // Check dependencies of the operation and update schedule level.
-  for (auto dstOp : dependsMap[op]) {
-    auto sameLevelDstOp = getSameLevelDstOp(op, dstOp);
-    begin = max(getUIntAttrValue(sameLevelDstOp, "schedule_end"), begin);
-  }
-
  // Calculate partition index.
  auto partitionIdx = getPartitionIndex(op);
  setAttrValue(op, "partition_index", partitionIdx);
@ -348,7 +366,7 @@ unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin) {
 // }

 /// Calculate the minimum resource II.
-unsigned HLSCppEstimator::getResMinII(LoadStoresMap &map) {
+unsigned HLSCppEstimator::getResMinII(MemAccessesMap &map) {
  unsigned II = 1;

  for (auto &pair : map) {
@ -414,7 +432,7 @@ unsigned HLSCppEstimator::getResMinII(LoadStoresMap &map) {
 }

 /// Calculate the minimum dependency II.
-unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoresMap &map) {
+unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
  unsigned II = 1;

  // Collect start and end level of the pipeline.
@ -496,21 +514,16 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) {

  // Collect load and store operations in the loop block for solving possible
  // dependencies.
-  LoadStoresMap map;
-  getLoadStoresMap(loopBlock, map);
-
-  // Check dependencies of all load/store operations and update schedule level.
-  for (auto pair : map)
-    for (auto srcOp : pair.second)
-      for (auto dstOp : dependsMap[srcOp]) {
-        auto sameLevelDstOp = getSameLevelDstOp(srcOp, dstOp);
-        begin = max(getUIntAttrValue(sameLevelDstOp, "schedule_end"), begin);
-      }
+  // TODO: include CallOps, how? Maybe we need to somehow analyze the memory
+  // access behavior of the CallOp.
+  MemAccessesMap map;
+  getMemAccessesMap(loopBlock, map);

  // Estimate the loop block.
-  if (auto schedule = estimateBlock(loopBlock, begin))
-    end = max(end, schedule.getValue());
-  else
+  if (auto schedule = estimateBlock(loopBlock, begin)) {
+    end = max(end, schedule.getValue().second);
+    begin = max(begin, schedule.getValue().first);
+  } else
    return Optional<unsigned>();

  // If the current loop is annotated as pipeline, extra dependency and
@ -582,7 +595,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {

  // Estimate then block.
  if (auto schedule = estimateBlock(*thenBlock, begin))
-    end = max(end, schedule.getValue());
+    end = max(end, schedule.getValue().second);
  else
    return Optional<unsigned>();

@ -591,7 +604,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
    auto elseBlock = op.getElseBlock();

    if (auto schedule = estimateBlock(*elseBlock, begin))
-      end = max(end, schedule.getValue());
+      end = max(end, schedule.getValue().second);
    else
      return Optional<unsigned>();
  }
@ -602,23 +615,20 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
  return end;
 }

-Optional<unsigned> HLSCppEstimator::visitOp(ReturnOp op, unsigned begin) {
-  setScheduleValue(op, begin, begin);
-  return begin;
-}
+Optional<unsigned> HLSCppEstimator::visitOp(mlir::CallOp op, unsigned begin) {
+  auto callee = SymbolTable::lookupSymbolIn(func.getParentOp(), op.getCallee());
+  auto subFunc = dyn_cast<FuncOp>(callee);
+  assert(subFunc && "callable is not a function operation");

-Optional<unsigned> HLSCppEstimator::visitOp(AffineYieldOp op, unsigned begin) {
-  setScheduleValue(op, begin, begin);
-  return begin;
-}
+  HLSCppEstimator estimator(subFunc, latencyMap);
+  estimator.estimateFunc();

-Optional<unsigned> HLSCppEstimator::visitOp(ArrayOp op, unsigned begin) {
-  for (auto user : op.getResult().getUsers()) {
-    auto sameLevelDstOp = getSameLevelDstOp(op, user);
-    begin = max(getUIntAttrValue(sameLevelDstOp, "schedule_end"), begin);
-  }
-  setScheduleValue(op, begin, begin);
-  return begin;
+  // We assume enter and leave the subfunction require extra 2 clock cycles.
+  if (auto subLatency = getUIntAttrValue(subFunc, "latency")) {
+    setScheduleValue(op, begin, begin + subLatency + 2);
+    return begin + subLatency + 1;
+  } else
+    return Optional<unsigned>();
 }

 //===----------------------------------------------------------------------===//
@ -626,9 +636,11 @@ Optional<unsigned> HLSCppEstimator::visitOp(ArrayOp op, unsigned begin) {
 //===----------------------------------------------------------------------===//

 /// Estimate the latency of a block with ALAP scheduling strategy, return the
-/// end level of schedule.
-Optional<unsigned> HLSCppEstimator::estimateBlock(Block &block,
-                                                  unsigned begin) {
+/// end level of schedule. Meanwhile, the input begin will also be updated if
+/// required (typically happens in AffineForOps).
+Optional<std::pair<unsigned, unsigned>>
+HLSCppEstimator::estimateBlock(Block &block, unsigned begin) {
+  unsigned blockBegin = begin;
  unsigned blockEnd = begin;

  // Reversely walk through all operations in the block.
@ -639,19 +651,32 @@ Optional<unsigned> HLSCppEstimator::estimateBlock(Block &block,

    // Fine the latest arrived successor relying on the current operation.
    for (auto result : op->getResults())
-      for (auto user : result.getUsers())
-        opBegin = max(opBegin, getUIntAttrValue(user, "schedule_end"));
+      for (auto user : result.getUsers()) {
+        auto sameLevelUser = getSameLevelDstOp(op, user);
+        opBegin = max(opBegin, getUIntAttrValue(sameLevelUser, "schedule_end"));
+      }
+
+    // Check dependencies of the operation and update schedule level.
+    for (auto dstOp : dependsMap[op]) {
+      auto sameLevelDstOp = getSameLevelDstOp(op, dstOp);
+      opBegin = max(opBegin, getUIntAttrValue(sameLevelDstOp, "schedule_end"));
+    }

    // Estimate the current operation.
    if (auto scheduleEnd = dispatchVisitor(op, opBegin))
      opEnd = max(opEnd, scheduleEnd.getValue());
    else
-      return Optional<unsigned>();
+      return Optional<std::pair<unsigned, unsigned>>();
+
+    // Update the block schedule end and begin.
+    if (it == block.rbegin())
+      blockBegin = opBegin;
+    else
+      blockBegin = min(blockBegin, opBegin);

-    // Update the block schedule end.
    blockEnd = max(blockEnd, opEnd);
  }
-  return blockEnd;
+  return std::pair<unsigned, unsigned>(blockBegin, blockEnd);
 }

 void HLSCppEstimator::reverseSchedule() {
@ -663,13 +688,16 @@ void HLSCppEstimator::reverseSchedule() {
    // Reverse schedule level.
    if (auto surOp = getSurroundingOp(op)) {
      if (isa<mlir::AffineForOp>(surOp)) {
+        auto surOpBegin = getUIntAttrValue(surOp, "schedule_begin");
+
        if (getBoolAttrValue(surOp, "flatten")) {
          // Handle flattened surrounding loops.
-          setScheduleValue(op, 0, end - begin);
+          setScheduleValue(op, surOpBegin, surOpBegin + end - begin);
        } else {
          // Handle normal cases.
          auto iterLatency = getUIntAttrValue(surOp, "iter_latency");
-          setScheduleValue(op, iterLatency - end, iterLatency - begin);
+          setScheduleValue(op, surOpBegin + iterLatency - end,
+                           surOpBegin + iterLatency - begin);
        }
      } else if (isa<FuncOp>(surOp)) {
        auto latency = getUIntAttrValue(surOp, "latency");
@ -682,11 +710,13 @@ void HLSCppEstimator::reverseSchedule() {
 void HLSCppEstimator::estimateFunc() {
  // Recursively estimate blocks in the function.
  if (auto schedule = estimateBlock(func.front(), 0)) {
-    auto latency = schedule.getValue();
+    auto latency = schedule.getValue().second;
    setAttrValue(func, "latency", latency);

    // Scheduled levels of all operations are reversed in this method, because
-    // we have done the ALAP scheduling in a reverse order.
+    // we have done the ALAP scheduling in a reverse order. Note that after the
+    // reverse, the annotated scheduling level of each operation is a relative
+    // level of the nearest surrounding AffineForOp or FuncOp.
    reverseSchedule();
  } else {
    // Scheduling failed due to early error.
@ -706,7 +736,6 @@ static void getLatencyMap(INIReader &spec, std::string freq,
  latencyMap["fmul"] = spec.GetInteger(freq, "fmul", 3);
  latencyMap["fdiv"] = spec.GetInteger(freq, "fdiv", 15);
  latencyMap["fcmp"] = spec.GetInteger(freq, "fcmp", 1);
-  latencyMap["fselect"] = spec.GetInteger(freq, "fselect", 0);
 }

 namespace {
@ -725,10 +754,17 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
    getLatencyMap(spec, freq, latencyMap);

    // Estimate performance and resource utilization.
-    for (auto func : getOperation().getOps<FuncOp>()) {
+    for (auto func : getOperation().getOps<FuncOp>())
+      if (auto topFunction = func.getAttrOfType<BoolAttr>("top_function"))
+        if (topFunction.getValue()) {
+          // Estimate the top function. If any other functions are called by the
+          // top function, it will be estimated in the procedure of estimating
+          // the top function.
          HLSCppEstimator estimator(func, latencyMap);
          estimator.estimateFunc();
        }
+
+    // TODO: Somehow print the estimation report?
  }
 };
 } // namespace
--- a/lib/Analysis/Utils.cpp
+++ b/lib/Analysis/Utils.cpp
@ -119,14 +119,25 @@ hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) {
 }

 /// Collect all load and store operations in the block.
-void scalehls::getLoadStoresMap(Block &block, LoadStoresMap &map) {
+void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
+                                 bool includeCalls) {
  for (auto &op : block) {
    if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
      map[MemRefAccess(&op).memref].push_back(&op);
-    else if (op.getNumRegions()) {
+
+    else if (includeCalls && isa<CallOp>(op)) {
+      // All CallOps accessing the memory will be pushed back to the map.
+      for (auto operand : op.getOperands())
+        if (operand.getType().isa<MemRefType>()) {
+          map[operand].push_back(&op);
+          break;
+        }
+
+    } else if (op.getNumRegions()) {
+      // Recursively collect memory access operations in each block.
      for (auto &region : op.getRegions())
        for (auto &block : region)
-          getLoadStoresMap(block, map);
+          getMemAccessesMap(block, map);
    }
  }
 }
--- a/lib/EmitHLSCpp/EmitHLSCpp.cpp
+++ b/lib/EmitHLSCpp/EmitHLSCpp.cpp
@ -1445,6 +1445,7 @@ void ModuleEmitter::emitFunction(FuncOp func) {
    emitError(func, "has zero or more than one basic blocks.");

  if (auto top = func.getAttrOfType<BoolAttr>("top_function"))
+    if (top.getValue())
      os << "/// This is top function.\n";

  if (auto latency = func.getAttrOfType<IntegerAttr>("latency"))
--- a/lib/Transforms/ArrayPartition.cpp
+++ b/lib/Transforms/ArrayPartition.cpp
@ -32,7 +32,7 @@ static mlir::AffineForOp getPipelineLoop(mlir::AffineForOp root) {
 }

 template <typename OpType>
-static void applyArrayPartition(LoadStoresMap &map, OpBuilder &builder) {
+static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) {
  for (auto pair : map) {
    auto arrayOp = getArrayOp(pair.first);
    auto arrayShape = arrayOp.getShapedType().getShape();
@ -118,12 +118,12 @@ void ArrayPartition::runOnOperation() {
    // TODO: support imperfect loop.
    if (auto outermost = getPipelineLoop(forOp)) {
      // Collect memory access information.
-      LoadStoresMap loadMap;
+      MemAccessesMap loadMap;
      outermost.walk([&](mlir::AffineLoadOp loadOp) {
        loadMap[loadOp.getMemRef()].push_back(loadOp);
      });

-      LoadStoresMap storeMap;
+      MemAccessesMap storeMap;
      outermost.walk([&](mlir::AffineStoreOp storeOp) {
        storeMap[storeOp.getMemRef()].push_back(storeOp);
      });
--- a/lib/Transforms/SimplifyMemRefAccess.cpp
+++ b/lib/Transforms/SimplifyMemRefAccess.cpp
@ -24,8 +24,8 @@ void SimplifyMemRefAccess::runOnOperation() {
  auto func = getOperation();

  // Collect all load and store operations in the function block.
-  LoadStoresMap map;
-  getLoadStoresMap(func.front(), map);
+  MemAccessesMap map;
+  getMemAccessesMap(func.front(), map);

  for (auto pair : map) {
    auto loadStores = pair.second;