[LegalizeDataflow] support loop-based dataflow legalization; [SplitFunction] include live-in analysis

2020-12-24 14:18:14 -06:00 · 2020-12-24 14:18:14 -06:00 · fef0cdc3fe
parent f02955c284
commit fef0cdc3fe
4 changed files with 189 additions and 43 deletions
--- a/include/Analysis/Utils.h
+++ b/include/Analysis/Utils.h
@ -92,6 +92,12 @@ public:
 using MemAccesses = SmallVector<Operation *, 16>;
 using MemAccessesMap = DenseMap<Value, MemAccesses>;

+/// Collect all load and store operations in the block. The collected operations
+/// in the MemAccessesMap are ordered, which means an operation will never
+/// dominate another operation in front of it.
+void getMemAccessesMap(Block &block, MemAccessesMap &map,
+                       bool includeCalls = false);
+
 // Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
 // AffineIfOp is transparent.
 Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
@ -110,11 +116,17 @@ hlscpp::ArrayOp getArrayOp(Value memref);

 hlscpp::ArrayOp getArrayOp(Operation *op);

-/// Collect all load and store operations in the block. The collected operations
-/// in the MemAccessesMap are ordered, which means an operation will never
-/// dominate another operation in front of it.
-void getMemAccessesMap(Block &block, MemAccessesMap &map,
-                       bool includeCalls = false);
+// For storing all accessed memrefs indexed by an operation (e.g. AffineForOp).
+using MemRefs = SmallVector<Value, 4>;
+using MemRefsMap = DenseMap<Operation *, MemRefs>;
+
+/// With the generated MemRefsMap, given a specific loop, we can easily find all
+/// memories which are consumed by the loop.
+void getLoopLoadMemsMap(Block &block, MemRefsMap &map);
+
+/// With the generated MemAccessesMap, given a specific memory, we can easily
+/// find the loops which produce data to the memory.
+void getLoopMemStoresMap(Block &block, MemAccessesMap &map);

 } // namespace scalehls
 } // namespace mlir
--- a/lib/Analysis/Utils.cpp
+++ b/lib/Analysis/Utils.cpp
@ -8,6 +8,28 @@
 using namespace mlir;
 using namespace scalehls;

+/// Collect all load and store operations in the block.
+void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
+                                 bool includeCalls) {
+  for (auto &op : block) {
+    if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
+      map[MemRefAccess(&op).memref].push_back(&op);
+
+    else if (includeCalls && isa<CallOp>(op)) {
+      // All CallOps accessing the memory will be pushed back to the map.
+      for (auto operand : op.getOperands())
+        if (operand.getType().isa<MemRefType>())
+          map[operand].push_back(&op);
+
+    } else if (op.getNumRegions()) {
+      // Recursively collect memory access operations in each block.
+      for (auto &region : op.getRegions())
+        for (auto &block : region)
+          getMemAccessesMap(block, map);
+    }
+  }
+}
+
 // Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
 // AffineIfOp is transparent.
 Optional<std::pair<Operation *, Operation *>>
@ -118,26 +140,41 @@ hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) {
  return getArrayOp(MemRefAccess(op).memref);
 }

-/// Collect all load and store operations in the block.
-void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
-                                 bool includeCalls) {
-  for (auto &op : block) {
-    if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
-      map[MemRefAccess(&op).memref].push_back(&op);
+/// With the generated MemRefsMap, given a specific loop, we can easily find all
+/// memories which are consumed by the loop.
+void scalehls::getLoopLoadMemsMap(Block &block, MemRefsMap &map) {
+  for (auto loop : block.getOps<AffineForOp>()) {
+    loop.walk([&](Operation *op) {
+      if (auto affineLoad = dyn_cast<AffineLoadOp>(op)) {
+        auto &mems = map[loop];
+        if (std::find(mems.begin(), mems.end(), affineLoad.getMemRef()) ==
+            mems.end())
+          mems.push_back(affineLoad.getMemRef());

-    else if (includeCalls && isa<CallOp>(op)) {
-      // All CallOps accessing the memory will be pushed back to the map.
-      for (auto operand : op.getOperands())
-        if (operand.getType().isa<MemRefType>()) {
-          map[operand].push_back(&op);
-          break;
-        }
-
-    } else if (op.getNumRegions()) {
-      // Recursively collect memory access operations in each block.
-      for (auto &region : op.getRegions())
-        for (auto &block : region)
-          getMemAccessesMap(block, map);
-    }
+      } else if (auto load = dyn_cast<LoadOp>(op)) {
+        auto &mems = map[loop];
+        if (std::find(mems.begin(), mems.end(), load.getMemRef()) == mems.end())
+          mems.push_back(load.getMemRef());
+      }
+    });
+  }
+}
+
+/// With the generated MemAccessesMap, given a specific memory, we can easily
+/// find the loops which produce data to the memory.
+void scalehls::getLoopMemStoresMap(Block &block, MemAccessesMap &map) {
+  for (auto loop : block.getOps<AffineForOp>()) {
+    loop.walk([&](Operation *op) {
+      if (auto affineStore = dyn_cast<AffineStoreOp>(op)) {
+        auto &loops = map[affineStore.getMemRef()];
+        if (std::find(loops.begin(), loops.end(), loop) == loops.end())
+          loops.push_back(loop);
+
+      } else if (auto store = dyn_cast<StoreOp>(op)) {
+        auto &loops = map[store.getMemRef()];
+        if (std::find(loops.begin(), loops.end(), loop) == loops.end())
+          loops.push_back(loop);
+      }
+    });
  }
 }
--- a/lib/Transforms/LegalizeDataflow.cpp
+++ b/lib/Transforms/LegalizeDataflow.cpp
@ -2,8 +2,10 @@
 //
 //===----------------------------------------------------------------------===//

+#include "Analysis/Utils.h"
 #include "Dialect/HLSKernel/HLSKernel.h"
 #include "Transforms/Passes.h"
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"

 using namespace std;
 using namespace mlir;
@ -19,8 +21,12 @@ void LegalizeDataflow::runOnOperation() {
  auto func = getOperation();
  auto builder = OpBuilder(func);

-  // TODO: support non-HLSKernel operations, such as loops.
-  // TODO: support non-CNNOps.
+  //===--------------------------------------------------------------------===//
+  // HLSKernel Handler
+  //===--------------------------------------------------------------------===//
+
+  // Handle HLSKernel operations. Note that HLSKernel operations must have not
+  // been bufferized at this point.
  for (auto kernelOp : func.front().getOps<hlskernel::HLSKernelOpInterface>()) {
    auto op = kernelOp.getOperation();

@ -34,7 +40,8 @@ void LegalizeDataflow::runOnOperation() {
        if (auto attr = predOp->getAttrOfType<IntegerAttr>("dataflow_level"))
          dataflowLevel = max(dataflowLevel, attr.getInt());
        else
-          op->emitError("has unexpected dominator");
+          op->emitError(
+              "HLSKernelOp has unexpected predecessor, legalization failed");
      }
    }

@ -71,20 +78,96 @@ void LegalizeDataflow::runOnOperation() {
    }
  }

-  // Reorder operations that are legalized.
+  //===--------------------------------------------------------------------===//
+  // AffineForLoop Handler
+  //===--------------------------------------------------------------------===//
+
+  // Handle loops. Note that this assume all operations have been bufferized at
+  // this point. Therefore, HLSKernel ops and loops will never have dependencies
+  // with each other in this pass.
+  // TODO: analyze live ins.
+  MemRefsMap loadMemsMap;
+  MemAccessesMap memStoresMap;
+  getLoopLoadMemsMap(func.front(), loadMemsMap);
+  getLoopMemStoresMap(func.front(), memStoresMap);
+
+  for (auto loop : func.front().getOps<mlir::AffineForOp>()) {
+    int64_t dataflowLevel = 0;
+    for (auto mem : loadMemsMap[loop]) {
+      for (auto predLoop : memStoresMap[mem]) {
+        if (predLoop == loop)
+          continue;
+
+        // Establish an ASAP dataflow schedule.
+        if (auto attr = predLoop->getAttrOfType<IntegerAttr>("dataflow_level"))
+          dataflowLevel = max(dataflowLevel, attr.getInt());
+        else
+          loop.emitError(
+              "loop has unexpected predecessor, legalization failed");
+      }
+    }
+
+    // Set an attribute for indicating the scheduled dataflow level.
+    loop.setAttr("dataflow_level", builder.getIntegerAttr(builder.getI64Type(),
+                                                          dataflowLevel + 1));
+
+    // Eliminate bypass paths.
+    for (auto mem : loadMemsMap[loop]) {
+      for (auto predLoop : memStoresMap[mem]) {
+        if (predLoop == loop)
+          continue;
+
+        auto predDataflowLevel =
+            predLoop->getAttrOfType<IntegerAttr>("dataflow_level").getInt();
+
+        // Insert dummy CopyOps if required.
+        SmallVector<Operation *, 4> dummyOps;
+        dummyOps.push_back(loop);
+        for (auto i = dataflowLevel; i > predDataflowLevel; --i) {
+          // Create CopyOp.
+          builder.setInsertionPoint(dummyOps.back());
+          auto interMem = builder.create<mlir::AllocOp>(
+              loop.getLoc(), mem.getType().cast<MemRefType>());
+          auto dummyOp =
+              builder.create<linalg::CopyOp>(loop.getLoc(), mem, interMem);
+          dummyOp.setAttr("dataflow_level",
+                          builder.getIntegerAttr(builder.getI64Type(), i));
+
+          // Chain created CopyOps.
+          if (i == dataflowLevel) {
+            loop.walk([&](Operation *op) {
+              if (auto affineLoad = dyn_cast<mlir::AffineLoadOp>(op)) {
+                if (affineLoad.getMemRef() == mem)
+                  affineLoad.setMemRef(interMem);
+
+              } else if (auto load = dyn_cast<mlir::LoadOp>(op)) {
+                if (load.getMemRef() == mem)
+                  load.setMemRef(interMem);
+              }
+            });
+          } else
+            dummyOps.back()->setOperand(0, interMem);
+
+          dummyOps.push_back(dummyOp);
+        }
+      }
+    }
+  }
+
+  // Reorder operations that are legalized, including HLSKernel ops or loops.
  DenseMap<int64_t, SmallVector<Operation *, 2>> dataflowOps;
-  func.walk([&](hlskernel::HLSKernelOpInterface kernelOp) {
-    if (auto attr = kernelOp.getAttrOfType<IntegerAttr>("dataflow_level"))
-      dataflowOps[attr.getInt()].push_back(kernelOp.getOperation());
+  func.walk([&](Operation *dataflowOp) {
+    if (auto attr = dataflowOp->getAttrOfType<IntegerAttr>("dataflow_level"))
+      dataflowOps[attr.getInt()].push_back(dataflowOp);
  });

  for (auto pair : dataflowOps) {
    auto ops = pair.second;
-    auto firstOp = ops.front();
+    auto lastOp = ops.back();

-    for (auto op : llvm::drop_begin(ops, 1)) {
-      op->moveBefore(firstOp);
-      firstOp = op;
+    for (auto it = ops.begin(); it < std::prev(ops.end()); ++it) {
+      auto op = *it;
+      op->moveBefore(lastOp);
    }
  }

--- a/lib/Transforms/SplitFunction.cpp
+++ b/lib/Transforms/SplitFunction.cpp
@ -2,8 +2,10 @@
 //
 //===----------------------------------------------------------------------===//

+#include "Analysis/Utils.h"
 #include "Dialect/HLSKernel/HLSKernel.h"
 #include "Transforms/Passes.h"
+#include "mlir/Analysis/Liveness.h"

 using namespace std;
 using namespace mlir;
@ -24,10 +26,12 @@ void SplitFunction::runOnOperation() {
    funcs.push_back(func);

  for (auto top : funcs) {
+    Liveness liveness(top);
+
    DenseMap<int64_t, SmallVector<Operation *, 2>> dataflowOps;
-    top.walk([&](hlskernel::HLSKernelOpInterface kernelOp) {
-      if (auto attr = kernelOp.getAttrOfType<IntegerAttr>("dataflow_level"))
-        dataflowOps[attr.getInt()].push_back(kernelOp.getOperation());
+    top.walk([&](Operation *op) {
+      if (auto attr = op->getAttrOfType<IntegerAttr>("dataflow_level"))
+        dataflowOps[attr.getInt()].push_back(op);
    });

    for (auto pair : dataflowOps) {
@ -44,8 +48,15 @@ void SplitFunction::runOnOperation() {

      unsigned opIndex = 0;
      for (auto op : ops) {
+        SmallVector<Value, 8> candidateInputs(op->getOperands());
+        if (auto loop = dyn_cast<mlir::AffineForOp>(op)) {
+          auto liveIns = liveness.getLiveIn(&loop.getLoopBody().front());
+          for (auto liveIn : liveIns)
+            if (!isForInductionVar(liveIn))
+              candidateInputs.push_back(liveIn);
+        }
        // Add input types and values.
-        for (auto operand : op->getOperands()) {
+        for (auto operand : candidateInputs) {
          // Record the index of the operand.
          auto operandFound =
              std::find(inputValues.begin(), inputValues.end(), operand);
@ -58,7 +69,6 @@ void SplitFunction::runOnOperation() {
            inputValues.push_back(operand);
          }
        }
-        opIndex += 1;

        // Add output types and values.
        for (auto result : op->getResults()) {
@ -68,6 +78,7 @@ void SplitFunction::runOnOperation() {
            outputValues.push_back(result);
          }
        }
+        opIndex++;
      }

      // Create a new function for the current dataflow level.
@ -93,8 +104,11 @@ void SplitFunction::runOnOperation() {
      for (auto op : ops) {
        op->moveBefore(returnOp);
        // Connect operands to the arguments of the new created function.
-        for (unsigned i = 0, e = op->getNumOperands(); i < e; ++i)
-          op->setOperand(i, entry->getArgument(inputMap[opIndex][i]));
+        for (unsigned i = 0, e = inputValues.size(); i < e; ++i)
+          inputValues[i].replaceUsesWithIf(
+              entry->getArgument(i), [&](mlir::OpOperand &use) {
+                return getSameLevelDstOp(returnOp, use.getOwner());
+              });
        opIndex += 1;
      }
    }