fix bugs related to dependency analysis; [AffineLoopOrderOpt] add initial impl of applyAffineLoopOrderOpt method

This commit is contained in:
Hanchen Ye 2021-01-19 15:55:36 -06:00
parent 7c1130452e
commit d3d13e0bd0
8 changed files with 116 additions and 52 deletions

View File

@ -7,8 +7,7 @@
#ifndef SCALEHLS_ANALYSIS_UTILS_H #ifndef SCALEHLS_ANALYSIS_UTILS_H
#define SCALEHLS_ANALYSIS_UTILS_H #define SCALEHLS_ANALYSIS_UTILS_H
#include "mlir/IR/Builders.h" #include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/Operation.h"
namespace mlir { namespace mlir {
namespace scalehls { namespace scalehls {
@ -62,6 +61,9 @@ public:
// Helper methods // Helper methods
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
using AffineLoopBand = SmallVector<AffineForOp, 4>;
using AffineLoopBands = SmallVector<AffineLoopBand, 4>;
// For storing all affine memory access operations (including CallOp, // For storing all affine memory access operations (including CallOp,
// AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref. // AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref.
using MemAccesses = SmallVector<Operation *, 16>; using MemAccesses = SmallVector<Operation *, 16>;

View File

@ -7,8 +7,8 @@
#ifndef SCALEHLS_TRANSFORMS_PASSES_H #ifndef SCALEHLS_TRANSFORMS_PASSES_H
#define SCALEHLS_TRANSFORMS_PASSES_H #define SCALEHLS_TRANSFORMS_PASSES_H
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Pass/Pass.h" #include "mlir/Pass/Pass.h"
#include "scalehls/Analysis/Utils.h"
#include <memory> #include <memory>
namespace mlir { namespace mlir {
@ -34,7 +34,7 @@ bool applyAffineLoopPerfection(AffineForOp loop, OpBuilder &builder);
/// Apply remove variable bound to all inner loops of the input loop. /// Apply remove variable bound to all inner loops of the input loop.
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder); bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
bool applyAffineLoopOrderOpt(AffineForOp loop, OpBuilder &builder); bool applyAffineLoopOrderOpt(AffineLoopBand band, OpBuilder &builder);
/// Apply loop pipelining to the input loop, all inner loops are automatically /// Apply loop pipelining to the input loop, all inner loops are automatically
/// fully unrolled. /// fully unrolled.

View File

@ -426,18 +426,26 @@ int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
if (srcMuxSize > 2 || dstMuxSize > 2) if (srcMuxSize > 2 || dstMuxSize > 2)
distance = 1; distance = 1;
else { else {
SmallVector<int64_t, 2> accumTripCounts; SmallVector<int64_t, 8> accumTrips;
accumTripCounts.push_back(1); accumTrips.push_back(1);
// Calculate the distance of this dependency. // Calculate the distance of this dependency.
for (auto i = depComps.rbegin(); i < depComps.rend(); ++i) { for (auto i = depComps.rbegin(); i < depComps.rend(); ++i) {
auto dep = *i; auto dep = *i;
auto tripCount = getIntAttrValue(dep.op, "trip_count"); auto tripCount = getIntAttrValue(dep.op, "trip_count");
auto ub = dep.ub.getValue();
auto lb = dep.lb.getValue();
if (dep.lb) // Ff ub is more than zero, calculate the minimum positive
distance += accumTripCounts.back() * dep.lb.getValue(); // disatance. Otherwise, set distance to negative and break.
if (ub >= 0)
distance += accumTrips.back() * max(lb, (int64_t)0);
else {
distance = -1;
break;
}
accumTripCounts.push_back(accumTripCounts.back() * tripCount); accumTrips.push_back(accumTrips.back() * tripCount);
} }
} }
@ -481,8 +489,8 @@ bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) {
// If the current loop is annotated as pipelined loop, extra dependency and // If the current loop is annotated as pipelined loop, extra dependency and
// resource aware II analysis will be executed. // resource aware II analysis will be executed.
if (getBoolAttrValue(op, "pipeline")) { if (getBoolAttrValue(op, "pipeline")) {
// Collect load and store operations in the loop block for solving possible // Collect load and store operations in the loop block for solving
// carried dependencies. // possible carried dependencies.
// TODO: include CallOps, how? It seems dependencies always exist for all // TODO: include CallOps, how? It seems dependencies always exist for all
// CallOps not matter its access pattern. // CallOps not matter its access pattern.
MemAccessesMap map; MemAccessesMap map;
@ -510,9 +518,9 @@ bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) {
return true; return true;
} }
// If the current loop is annotated as flatten, it will be flattened into the // If the current loop is annotated as flatten, it will be flattened into
// child pipelined loop. This will increase the flattened loop trip count // the child pipelined loop. This will increase the flattened loop trip
// without changing the iteration latency. // count without changing the iteration latency.
if (getBoolAttrValue(op, "flatten")) { if (getBoolAttrValue(op, "flatten")) {
auto child = dyn_cast<AffineForOp>(op.getLoopBody().front().front()); auto child = dyn_cast<AffineForOp>(op.getLoopBody().front().front());
assert(child && "the first containing operation is not a loop"); assert(child && "the first containing operation is not a loop");
@ -575,8 +583,8 @@ bool HLSCppEstimator::visitOp(AffineIfOp op, int64_t begin) {
return false; return false;
} }
// In our assumption, AffineIfOp is completely transparent. Therefore, we set // In our assumption, AffineIfOp is completely transparent. Therefore, we
// a dummy schedule begin here. // set a dummy schedule begin here.
setScheduleValue(op, end, end); setScheduleValue(op, end, end);
return true; return true;
} }
@ -659,9 +667,9 @@ HLSCppEstimator::Resource HLSCppEstimator::estimateResource(Block &block,
totalFmul /= (latencyMap["fmul"] + 1); totalFmul /= (latencyMap["fmul"] + 1);
// We assume the loop resource utilization cannot be shared. Therefore, the // We assume the loop resource utilization cannot be shared. Therefore, the
// overall resource utilization is loops' plus other operstions'. According to // overall resource utilization is loops' plus other operstions'. According
// profiling, floating-point add and muliply will consume 2 and 3 DSP units, // to profiling, floating-point add and muliply will consume 2 and 3 DSP
// respectively. // units, respectively.
auto dsp = loopDSPNum + maxFadd * 2 + maxFmul * 3; auto dsp = loopDSPNum + maxFadd * 2 + maxFmul * 3;
// If the block is pipelined (interval is positive), the minimum resource // If the block is pipelined (interval is positive), the minimum resource
@ -787,14 +795,14 @@ void HLSCppEstimator::estimateFunc() {
} }
// Scheduled levels of all operations are reversed in this method, because // Scheduled levels of all operations are reversed in this method, because
// we have done the ALAP scheduling in a reverse order. Note that after the // we have done the ALAP scheduling in a reverse order. Note that after
// reverse, the annotated scheduling level of each operation is a relative // the reverse, the annotated scheduling level of each operation is a
// level of the nearest surrounding AffineForOp or FuncOp. // relative level of the nearest surrounding AffineForOp or FuncOp.
reverseSchedule(); reverseSchedule();
} else { } else {
// Scheduling failed due to early error. // Scheduling failed due to early error.
// TODO: further refinement and try the best to avoid failing, e.g. support // TODO: further refinement and try the best to avoid failing, e.g.
// variable loop bound. // support variable loop bound.
setAttrValue(func, "latency", std::string("unknown")); setAttrValue(func, "latency", std::string("unknown"));
} }
@ -856,9 +864,9 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
for (auto func : module.getOps<FuncOp>()) for (auto func : module.getOps<FuncOp>())
if (auto topFunction = func->getAttrOfType<BoolAttr>("top_function")) if (auto topFunction = func->getAttrOfType<BoolAttr>("top_function"))
if (topFunction.getValue()) { if (topFunction.getValue()) {
// Estimate the top function. If any other functions are called by the // Estimate the top function. If any other functions are called by
// top function, it will be estimated in the procedure of estimating // the top function, it will be estimated in the procedure of
// the top function. // estimating the top function.
HLSCppEstimator estimator(func, latencyMap); HLSCppEstimator estimator(func, latencyMap);
estimator.estimateFunc(); estimator.estimateFunc();
} }

View File

@ -6,7 +6,6 @@
#include "scalehls/Analysis/Utils.h" #include "scalehls/Analysis/Utils.h"
#include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
using namespace mlir; using namespace mlir;
using namespace scalehls; using namespace scalehls;

View File

@ -4,6 +4,9 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/Utils.h"
#include "scalehls/Analysis/Utils.h"
#include "scalehls/Transforms/Passes.h" #include "scalehls/Transforms/Passes.h"
using namespace mlir; using namespace mlir;
@ -15,7 +18,52 @@ struct AffineLoopOrderOpt : public AffineLoopOrderOptBase<AffineLoopOrderOpt> {
}; };
} // namespace } // namespace
bool scalehls::applyAffineLoopOrderOpt(AffineForOp loop, OpBuilder &builder) { bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand band,
OpBuilder &builder) {
auto &loopBlock = band.back().getLoopBody().front();
auto depth = band.size();
// Collect all load and store operations for each memory in the loop block,
// and calculate the number of common surrouding loops for later uses.
MemAccessesMap map;
getMemAccessesMap(loopBlock, map);
auto commonLoopDepth = getNumCommonSurroundingLoops(
*loopBlock.begin(), *std::next(loopBlock.begin()));
// Traverse all memories in the loop block.
for (auto pair : map) {
auto loadStores = pair.second;
// Find all dependencies associated to the current memory.
int64_t dstIndex = 1;
for (auto dstOp : loadStores) {
for (auto srcOp : llvm::drop_begin(loadStores, dstIndex)) {
MemRefAccess dstAccess(dstOp);
MemRefAccess srcAccess(srcOp);
FlatAffineConstraints depConstrs;
SmallVector<DependenceComponent, 2> depComps;
for (unsigned loopDepth = commonLoopDepth - depth + 1;
loopDepth <= commonLoopDepth + 1; ++loopDepth) {
DependenceResult result = checkMemrefAccessDependence(
srcAccess, dstAccess, loopDepth, &depConstrs, &depComps,
/*allowRAR=*/false);
if (hasDependence(result)) {
// llvm::outs() << "\n----------\n";
// llvm::outs() << *srcOp << " -> " << *dstOp << "\n";
// llvm::outs() << "depth: " << loopDepth << ", distance: ";
// for (auto dep : depComps)
// llvm::outs() << "(" << dep.lb.getValue() << ","
// << dep.ub.getValue() << "), ";
// llvm::outs() << "\n";
}
}
}
dstIndex++;
}
}
return true; return true;
} }

View File

@ -15,9 +15,6 @@ using namespace scalehls;
// Helper methods // Helper methods
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
using AffineLoopBand = SmallVector<AffineForOp, 4>;
using AffineLoopBands = SmallVector<AffineLoopBand, 4>;
static AffineForOp getLoopBandFromRoot(AffineForOp forOp, static AffineForOp getLoopBandFromRoot(AffineForOp forOp,
AffineLoopBand &band) { AffineLoopBand &band) {
auto currentLoop = forOp; auto currentLoop = forOp;
@ -218,13 +215,16 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
AffineLoopBand band; AffineLoopBand band;
getLoopBandFromLeaf(loop, band); getLoopBandFromLeaf(loop, band);
targetBands.push_back(band); targetBands.push_back(band);
// Loop perfection and remove variable bound are always applied for the
// convenience of polyhedral optimizations.
applyAffineLoopPerfection(band.back(), builder);
applyRemoveVariableBound(band.front(), builder);
} }
}); });
// Loop perfection, remove variable bound, and loop order optimization are
// always applied for the convenience of polyhedral optimizations.
for (auto band : targetBands) {
applyAffineLoopPerfection(band.back(), builder);
applyRemoveVariableBound(band.front(), builder);
applyAffineLoopOrderOpt(band, builder);
}
} }
namespace { namespace {

View File

@ -40,11 +40,13 @@ bool scalehls::applySimplifyMemrefAccess(FuncOp func) {
auto secondAccess = MemRefAccess(secondOp); auto secondAccess = MemRefAccess(secondOp);
auto secondIsRead = isa<AffineReadOpInterface>(secondOp); auto secondIsRead = isa<AffineReadOpInterface>(secondOp);
auto sameLevelOps = checkSameLevel(firstOp, secondOp); // Check whether the two operations statically have the same access.
if (firstAccess == secondAccess) {
// Check whether the two operations statically have the same access // If the two operations are at different loop levels, break.
// element while at the same level. // TODO: memory access operation hoisting?
if ((firstAccess == secondAccess) && sameLevelOps) { auto sameLevelOps = checkSameLevel(firstOp, secondOp);
if (!sameLevelOps)
break;
// If the second operation's access direction is different with the // If the second operation's access direction is different with the
// first operation, the first operation is known not redundant. // first operation, the first operation is known not redundant.
@ -80,9 +82,10 @@ bool scalehls::applySimplifyMemrefAccess(FuncOp func) {
} }
} }
} else { } else {
// Find possible dependencies. // Find possible dependencies. If dependency appears, the first is no
// longer be able to be simplified.
unsigned nsLoops = getNumCommonSurroundingLoops(*firstOp, *secondOp); unsigned nsLoops = getNumCommonSurroundingLoops(*firstOp, *secondOp);
bool dependencyFlag = false; bool foundDependence = false;
for (unsigned depth = 1; depth <= nsLoops + 1; ++depth) { for (unsigned depth = 1; depth <= nsLoops + 1; ++depth) {
FlatAffineConstraints dependenceConstraints; FlatAffineConstraints dependenceConstraints;
@ -94,19 +97,23 @@ bool scalehls::applySimplifyMemrefAccess(FuncOp func) {
// Only zero distance dependencies are considered here. // Only zero distance dependencies are considered here.
if (hasDependence(result)) { if (hasDependence(result)) {
int64_t distance = 0; bool hasZeroDistance = true;
for (auto dep : dependenceComponents)
if (dep.lb)
distance += std::abs(dep.lb.getValue());
if (distance == 0) { for (auto dep : dependenceComponents)
dependencyFlag = true; if (dep.lb.getValue() > 0 || dep.ub.getValue() < 0) {
hasZeroDistance = false;
break;
}
if (hasZeroDistance) {
foundDependence = true;
break; break;
} }
} }
} }
if (dependencyFlag) // If any dependence is found, break.
if (foundDependence)
break; break;
} }
} }

View File

@ -1,6 +1,6 @@
// RUN: scalehls-opt -qor-estimation="target-spec=../../config/target-spec.ini" %s | FileCheck %s // RUN: scalehls-opt -qor-estimation="target-spec=../../config/target-spec.ini" %s | FileCheck %s
// CHECK-LABEL: func @test_qor_estimation // CHECK-LABEL: func @qor_estimation
func @qor_estimation() { func @qor_estimation() {
return return
} }