[Scheduling] Overhaul simplex-based modulo scheduler. (#4426)

2022-12-18 21:58:24 +13:00 · 2022-12-18 21:58:24 +13:00 · 9a12e82d61
parent fba130cea2
commit 9a12e82d61
2 changed files with 70 additions and 67 deletions
--- a/lib/Scheduling/SimplexSchedulers.cpp
+++ b/lib/Scheduling/SimplexSchedulers.cpp
@ -261,7 +261,6 @@ protected:
  enum { OBJ_LATENCY = 0, OBJ_AXAP /* i.e. either ASAP or ALAP */ };
  bool fillObjectiveRow(SmallVector<int> &row, unsigned obj) override;
  void updateMargins();
-  void incrementII();
  void scheduleOperation(Operation *n);
  unsigned computeResMinII();

@ -564,8 +563,8 @@ LogicalResult SimplexSchedulerBase::solveTableau() {

    // If we did not find a pivot column, then the entire row contained only
    // positive entries, and the problem is in principle infeasible. However, if
-    // the entry in the `parameterTColumn` is positive, we can make the LP
-    // feasible again by increasing the II.
+    // the entry in the `parameterTColumn` is positive, we can try to make the
+    // LP feasible again by increasing the II.
    int entry1Col = tableau[*pivotRow][parameter1Column];
    int entryTCol = tableau[*pivotRow][parameterTColumn];
    if (entryTCol > 0) {
@ -574,11 +573,12 @@ LogicalResult SimplexSchedulerBase::solveTableau() {
      // would not have been a valid pivot row), and without the negation, the
      // new II would be negative.
      assert(entry1Col < 0);
-      parameterT = (-entry1Col - 1) / entryTCol + 1;
-
-      LLVM_DEBUG(dbgs() << "Increased II to " << parameterT << '\n');
-
-      continue;
+      int newParameterT = (-entry1Col - 1) / entryTCol + 1;
+      if (newParameterT > parameterT) {
+        parameterT = newParameterT;
+        LLVM_DEBUG(dbgs() << "Increased II to " << parameterT << '\n');
+        continue;
+      }
    }

    // Otherwise, the linear program is infeasible.
@ -685,9 +685,8 @@ LogicalResult SimplexSchedulerBase::scheduleAt(unsigned startTimeVariable,
    return failure();
  }

-  // Translate S by the other parameter(s). For acyclic problems, this means
-  // setting `factor1` to `timeStep`. For cyclic problems, we perform a modulo
-  // decomposition: S = `factor1` + `factorT` * T, with `factor1` < T.
+  // Translate S by the other parameter(s). This means setting `factor1` to
+  // `timeStep`.
  //
  // This translation does not change the values of the parametric constants,
  // hence we do not need to solve the tableau again.
@ -695,13 +694,17 @@ LogicalResult SimplexSchedulerBase::scheduleAt(unsigned startTimeVariable,
  // Note: I added a negation of the factors here, which is not mentioned in the
  // paper's text, but apparently used in the example. Without it, the intended
  // effect, i.e. making the S-column all-zero again, is not achieved.
-  if (parameterT == 0)
-    translate(parameterSColumn, /* factor1= */ -timeStep, /* factorS= */ 1,
-              /* factorT= */ 0);
-  else
-    translate(parameterSColumn, /* factor1= */ -(timeStep % parameterT),
-              /* factorS= */ 1,
-              /* factorT= */ -(timeStep / parameterT));
+  //
+  // Note 2: For cyclic problems, the paper suggested to perform a modulo
+  // decomposition: S = `factor1` + `factorT` * T, with `factor1` < T.
+  // However, this makes the value baked into the tableau dependent on
+  // `parameterT`, and it is unclear to me how to update it correctly when
+  // changing the II. I found it much more robust to fix the operations to
+  // absolute time steps, and manually shift them by the appropriate amount
+  // whenever the II is incremented (cf. adding `phiJ`, `phiN` in the modulo
+  // scheduler's `scheduleOperation` method).
+  translate(parameterSColumn, /* factor1= */ -timeStep, /* factorS= */ 1,
+            /* factorT= */ 0);

  return success();
 }
@ -1007,46 +1010,33 @@ void ModuloSimplexScheduler::updateMargins() {
  }
 }

-void ModuloSimplexScheduler::incrementII() {
-  // Account for the shift in the frozen start times that will be caused by
-  // increasing `parameterT`: Assuming decompositions of
-  //   t = phi * II + tau  and  t' = phi * (II + 1) + tau,
-  // so the required shift is
-  //   t' - t = phi = floordiv(t / II)
-  for (auto &kv : frozenVariables) {
-    unsigned &frozenTime = kv.getSecond();
-    frozenTime += frozenTime / parameterT;
-  }
-
-  // Increment the parameter.
-  ++parameterT;
-}
-
 void ModuloSimplexScheduler::scheduleOperation(Operation *n) {
+  auto oprN = *prob.getLinkedOperatorType(n);
  unsigned stvN = startTimeVariables[n];

-  // Get current state of the LP, and determine range of alternative times
-  // guaranteed to be feasible.
+  // Get current state of the LP. We'll try to schedule at its current time step
+  // in the partial solution, and the II-1 following time steps. Scheduling the
+  // op to a later time step may increase the overall latency, however, as long
+  // as the solution is still feasible, we prefer that over incrementing the II
+  // to resolve resource conflicts.
  unsigned stN = getStartTime(stvN);
-  unsigned lbN = (unsigned)std::max<int>(asapTimes[stvN], stN - parameterT + 1);
-  unsigned ubN = (unsigned)std::min<int>(alapTimes[stvN], lbN + parameterT - 1);
+  unsigned ubN = stN + parameterT - 1;

-  LLVM_DEBUG(dbgs() << "Attempting to schedule at t=" << stN << ", or in ["
-                    << lbN << ", " << ubN << "]: " << *n << '\n');
+  LLVM_DEBUG(dbgs() << "Attempting to schedule in [" << stN << ", " << ubN
+                    << "]: " << *n << '\n');

-  SmallVector<unsigned> candTimes;
-  candTimes.push_back(stN);
-  for (unsigned ct = lbN; ct <= ubN; ++ct)
-    if (ct != stN)
-      candTimes.push_back(ct);
-
-  for (unsigned ct : candTimes)
+  for (unsigned ct = stN; ct <= ubN; ++ct)
    if (succeeded(mrt.enter(n, ct))) {
-      auto fixedN = scheduleAt(stvN, stN);
-      assert(succeeded(fixedN));
-      (void)fixedN;
-      LLVM_DEBUG(dbgs() << "Success at t=" << stN << " " << *n << '\n');
-      return;
+      auto fixedN = scheduleAt(stvN, ct);
+      if (succeeded(fixedN)) {
+        LLVM_DEBUG(dbgs() << "Success at t=" << ct << " " << *n << '\n');
+        return;
+      }
+      // Problem became infeasible with `n` at `ct`, roll back the MRT
+      // assignment. Also, no later time can be feasible, so stop the search
+      // here.
+      mrt.release(n);
+      break;
    }

  // As a last resort, increase II to make room for the op. De Dinechin's
@ -1071,30 +1061,43 @@ void ModuloSimplexScheduler::scheduleOperation(Operation *n) {
  // We're going to revisit the current partial schedule.
  SmallVector<Operation *> moved;
  for (Operation *j : scheduled) {
+    auto oprJ = *prob.getLinkedOperatorType(j);
    unsigned stvJ = startTimeVariables[j];
    unsigned stJ = getStartTime(stvJ);
    unsigned phiJ = stJ / parameterT;
    unsigned tauJ = stJ % parameterT;
    unsigned deltaJ = 0;

-    // To actually resolve the resource conflicts, we move operations that are
-    // "preceded" (cf. de Dinechin's ≺ relation) one slot to the right.
-    if (tauN < tauJ || (tauN == tauJ && phiN > phiJ) ||
-        (tauN == tauJ && phiN == phiJ && stvN < stvJ)) {
-      // TODO: Replace the last condition with a proper graph analysis.
+    if (oprN == oprJ) {
+      // To actually resolve the resource conflicts, we will move operations
+      // that are "preceded" (cf. de Dinechin's ≺ relation) one slot to the
+      // right.
+      if (tauN < tauJ || (tauN == tauJ && phiN > phiJ) ||
+          (tauN == tauJ && phiN == phiJ && stvN < stvJ)) {
+        // TODO: Replace the last condition with a proper graph analysis.

-      deltaJ = 1;
-      moved.push_back(j);
-      if (tauN == tauJ)
-        deltaN = 0;
+        deltaJ = 1;
+        moved.push_back(j);
+        if (tauN == tauJ)
+          deltaN = 0;
+      }
    }

-    // Apply the move to the tableau.
-    moveBy(stvJ, deltaJ);
+    // Move operation.
+    //
+    // In order to keep the op in its current MRT slot `tauJ` after incrementing
+    // the II, we add `phiJ`:
+    //   stJ + phiJ = (phiJ * parameterT + tauJ) + phiJ
+    //              = phiJ * (parameterT + 1) + tauJ
+    //
+    // Shifting an additional `deltaJ` time steps then moves the op to a
+    // different MRT slot, in order to make room for the operation that caused
+    // the resource conflict.
+    moveBy(stvJ, phiJ + deltaJ);
  }

-  // Finally, increment the II.
-  incrementII();
+  // Finally, increment the II and solve to apply the moves.
+  ++parameterT;
  auto solved = solveTableau();
  assert(succeeded(solved));
  (void)solved;
@ -1108,8 +1111,8 @@ void ModuloSimplexScheduler::scheduleOperation(Operation *n) {
    (void)enteredM;
  }

-  // Finally, schedule the operation. Adding `phiN` accounts for the implicit
-  // shift caused by incrementing the II; cf. `incrementII()`.
+  // Finally, schedule the operation. Again, adding `phiN` accounts for the
+  // implicit shift caused by incrementing the II.
  auto fixedN = scheduleAt(stvN, stN + phiN + deltaN);
  auto enteredN = mrt.enter(n, tauN + deltaN);
  assert(succeeded(fixedN) && succeeded(enteredN));
--- a/test/Scheduling/modulo-problems.mlir
+++ b/test/Scheduling/modulo-problems.mlir
@ -20,7 +20,7 @@ func.func @canis14_fig2() attributes {
 }

 // SIMPLEX-LABEL: minII_feasible
-// SIMPLEX-SAME: simplexInitiationInterval = 4
+// SIMPLEX-SAME: simplexInitiationInterval = 3
 func.func @minII_feasible() attributes {
  problemInitiationInterval = 3,
  auxdeps = [ [6,1,5], [5,2,3], [6,7] ],