fix bugs related to dependency analysis; [AffineLoopOrderOpt] add initial impl of applyAffineLoopOrderOpt method
This commit is contained in:
parent
7c1130452e
commit
d3d13e0bd0
|
@ -7,8 +7,7 @@
|
|||
#ifndef SCALEHLS_ANALYSIS_UTILS_H
|
||||
#define SCALEHLS_ANALYSIS_UTILS_H
|
||||
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/Operation.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace scalehls {
|
||||
|
@ -62,6 +61,9 @@ public:
|
|||
// Helper methods
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
using AffineLoopBand = SmallVector<AffineForOp, 4>;
|
||||
using AffineLoopBands = SmallVector<AffineLoopBand, 4>;
|
||||
|
||||
// For storing all affine memory access operations (including CallOp,
|
||||
// AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref.
|
||||
using MemAccesses = SmallVector<Operation *, 16>;
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
#ifndef SCALEHLS_TRANSFORMS_PASSES_H
|
||||
#define SCALEHLS_TRANSFORMS_PASSES_H
|
||||
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "scalehls/Analysis/Utils.h"
|
||||
#include <memory>
|
||||
|
||||
namespace mlir {
|
||||
|
@ -34,7 +34,7 @@ bool applyAffineLoopPerfection(AffineForOp loop, OpBuilder &builder);
|
|||
/// Apply remove variable bound to all inner loops of the input loop.
|
||||
bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
|
||||
|
||||
bool applyAffineLoopOrderOpt(AffineForOp loop, OpBuilder &builder);
|
||||
bool applyAffineLoopOrderOpt(AffineLoopBand band, OpBuilder &builder);
|
||||
|
||||
/// Apply loop pipelining to the input loop, all inner loops are automatically
|
||||
/// fully unrolled.
|
||||
|
|
|
@ -426,18 +426,26 @@ int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
|
|||
if (srcMuxSize > 2 || dstMuxSize > 2)
|
||||
distance = 1;
|
||||
else {
|
||||
SmallVector<int64_t, 2> accumTripCounts;
|
||||
accumTripCounts.push_back(1);
|
||||
SmallVector<int64_t, 8> accumTrips;
|
||||
accumTrips.push_back(1);
|
||||
|
||||
// Calculate the distance of this dependency.
|
||||
for (auto i = depComps.rbegin(); i < depComps.rend(); ++i) {
|
||||
auto dep = *i;
|
||||
auto tripCount = getIntAttrValue(dep.op, "trip_count");
|
||||
auto ub = dep.ub.getValue();
|
||||
auto lb = dep.lb.getValue();
|
||||
|
||||
if (dep.lb)
|
||||
distance += accumTripCounts.back() * dep.lb.getValue();
|
||||
// Ff ub is more than zero, calculate the minimum positive
|
||||
// disatance. Otherwise, set distance to negative and break.
|
||||
if (ub >= 0)
|
||||
distance += accumTrips.back() * max(lb, (int64_t)0);
|
||||
else {
|
||||
distance = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
accumTripCounts.push_back(accumTripCounts.back() * tripCount);
|
||||
accumTrips.push_back(accumTrips.back() * tripCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -481,8 +489,8 @@ bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) {
|
|||
// If the current loop is annotated as pipelined loop, extra dependency and
|
||||
// resource aware II analysis will be executed.
|
||||
if (getBoolAttrValue(op, "pipeline")) {
|
||||
// Collect load and store operations in the loop block for solving possible
|
||||
// carried dependencies.
|
||||
// Collect load and store operations in the loop block for solving
|
||||
// possible carried dependencies.
|
||||
// TODO: include CallOps, how? It seems dependencies always exist for all
|
||||
// CallOps not matter its access pattern.
|
||||
MemAccessesMap map;
|
||||
|
@ -510,9 +518,9 @@ bool HLSCppEstimator::visitOp(AffineForOp op, int64_t begin) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// If the current loop is annotated as flatten, it will be flattened into the
|
||||
// child pipelined loop. This will increase the flattened loop trip count
|
||||
// without changing the iteration latency.
|
||||
// If the current loop is annotated as flatten, it will be flattened into
|
||||
// the child pipelined loop. This will increase the flattened loop trip
|
||||
// count without changing the iteration latency.
|
||||
if (getBoolAttrValue(op, "flatten")) {
|
||||
auto child = dyn_cast<AffineForOp>(op.getLoopBody().front().front());
|
||||
assert(child && "the first containing operation is not a loop");
|
||||
|
@ -575,8 +583,8 @@ bool HLSCppEstimator::visitOp(AffineIfOp op, int64_t begin) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// In our assumption, AffineIfOp is completely transparent. Therefore, we set
|
||||
// a dummy schedule begin here.
|
||||
// In our assumption, AffineIfOp is completely transparent. Therefore, we
|
||||
// set a dummy schedule begin here.
|
||||
setScheduleValue(op, end, end);
|
||||
return true;
|
||||
}
|
||||
|
@ -659,9 +667,9 @@ HLSCppEstimator::Resource HLSCppEstimator::estimateResource(Block &block,
|
|||
totalFmul /= (latencyMap["fmul"] + 1);
|
||||
|
||||
// We assume the loop resource utilization cannot be shared. Therefore, the
|
||||
// overall resource utilization is loops' plus other operstions'. According to
|
||||
// profiling, floating-point add and muliply will consume 2 and 3 DSP units,
|
||||
// respectively.
|
||||
// overall resource utilization is loops' plus other operstions'. According
|
||||
// to profiling, floating-point add and muliply will consume 2 and 3 DSP
|
||||
// units, respectively.
|
||||
auto dsp = loopDSPNum + maxFadd * 2 + maxFmul * 3;
|
||||
|
||||
// If the block is pipelined (interval is positive), the minimum resource
|
||||
|
@ -787,14 +795,14 @@ void HLSCppEstimator::estimateFunc() {
|
|||
}
|
||||
|
||||
// Scheduled levels of all operations are reversed in this method, because
|
||||
// we have done the ALAP scheduling in a reverse order. Note that after the
|
||||
// reverse, the annotated scheduling level of each operation is a relative
|
||||
// level of the nearest surrounding AffineForOp or FuncOp.
|
||||
// we have done the ALAP scheduling in a reverse order. Note that after
|
||||
// the reverse, the annotated scheduling level of each operation is a
|
||||
// relative level of the nearest surrounding AffineForOp or FuncOp.
|
||||
reverseSchedule();
|
||||
} else {
|
||||
// Scheduling failed due to early error.
|
||||
// TODO: further refinement and try the best to avoid failing, e.g. support
|
||||
// variable loop bound.
|
||||
// TODO: further refinement and try the best to avoid failing, e.g.
|
||||
// support variable loop bound.
|
||||
setAttrValue(func, "latency", std::string("unknown"));
|
||||
}
|
||||
|
||||
|
@ -856,9 +864,9 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
|||
for (auto func : module.getOps<FuncOp>())
|
||||
if (auto topFunction = func->getAttrOfType<BoolAttr>("top_function"))
|
||||
if (topFunction.getValue()) {
|
||||
// Estimate the top function. If any other functions are called by the
|
||||
// top function, it will be estimated in the procedure of estimating
|
||||
// the top function.
|
||||
// Estimate the top function. If any other functions are called by
|
||||
// the top function, it will be estimated in the procedure of
|
||||
// estimating the top function.
|
||||
HLSCppEstimator estimator(func, latencyMap);
|
||||
estimator.estimateFunc();
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
|
||||
#include "scalehls/Analysis/Utils.h"
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
|
||||
using namespace mlir;
|
||||
using namespace scalehls;
|
||||
|
|
|
@ -4,6 +4,9 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
#include "mlir/Analysis/Utils.h"
|
||||
#include "scalehls/Analysis/Utils.h"
|
||||
#include "scalehls/Transforms/Passes.h"
|
||||
|
||||
using namespace mlir;
|
||||
|
@ -15,7 +18,52 @@ struct AffineLoopOrderOpt : public AffineLoopOrderOptBase<AffineLoopOrderOpt> {
|
|||
};
|
||||
} // namespace
|
||||
|
||||
bool scalehls::applyAffineLoopOrderOpt(AffineForOp loop, OpBuilder &builder) {
|
||||
bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand band,
|
||||
OpBuilder &builder) {
|
||||
auto &loopBlock = band.back().getLoopBody().front();
|
||||
auto depth = band.size();
|
||||
|
||||
// Collect all load and store operations for each memory in the loop block,
|
||||
// and calculate the number of common surrouding loops for later uses.
|
||||
MemAccessesMap map;
|
||||
getMemAccessesMap(loopBlock, map);
|
||||
auto commonLoopDepth = getNumCommonSurroundingLoops(
|
||||
*loopBlock.begin(), *std::next(loopBlock.begin()));
|
||||
|
||||
// Traverse all memories in the loop block.
|
||||
for (auto pair : map) {
|
||||
auto loadStores = pair.second;
|
||||
|
||||
// Find all dependencies associated to the current memory.
|
||||
int64_t dstIndex = 1;
|
||||
for (auto dstOp : loadStores) {
|
||||
for (auto srcOp : llvm::drop_begin(loadStores, dstIndex)) {
|
||||
MemRefAccess dstAccess(dstOp);
|
||||
MemRefAccess srcAccess(srcOp);
|
||||
|
||||
FlatAffineConstraints depConstrs;
|
||||
SmallVector<DependenceComponent, 2> depComps;
|
||||
|
||||
for (unsigned loopDepth = commonLoopDepth - depth + 1;
|
||||
loopDepth <= commonLoopDepth + 1; ++loopDepth) {
|
||||
DependenceResult result = checkMemrefAccessDependence(
|
||||
srcAccess, dstAccess, loopDepth, &depConstrs, &depComps,
|
||||
/*allowRAR=*/false);
|
||||
|
||||
if (hasDependence(result)) {
|
||||
// llvm::outs() << "\n----------\n";
|
||||
// llvm::outs() << *srcOp << " -> " << *dstOp << "\n";
|
||||
// llvm::outs() << "depth: " << loopDepth << ", distance: ";
|
||||
// for (auto dep : depComps)
|
||||
// llvm::outs() << "(" << dep.lb.getValue() << ","
|
||||
// << dep.ub.getValue() << "), ";
|
||||
// llvm::outs() << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
dstIndex++;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -15,9 +15,6 @@ using namespace scalehls;
|
|||
// Helper methods
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
using AffineLoopBand = SmallVector<AffineForOp, 4>;
|
||||
using AffineLoopBands = SmallVector<AffineLoopBand, 4>;
|
||||
|
||||
static AffineForOp getLoopBandFromRoot(AffineForOp forOp,
|
||||
AffineLoopBand &band) {
|
||||
auto currentLoop = forOp;
|
||||
|
@ -218,13 +215,16 @@ void HLSCppOptimizer::applyMultipleLevelDSE() {
|
|||
AffineLoopBand band;
|
||||
getLoopBandFromLeaf(loop, band);
|
||||
targetBands.push_back(band);
|
||||
|
||||
// Loop perfection and remove variable bound are always applied for the
|
||||
// convenience of polyhedral optimizations.
|
||||
applyAffineLoopPerfection(band.back(), builder);
|
||||
applyRemoveVariableBound(band.front(), builder);
|
||||
}
|
||||
});
|
||||
|
||||
// Loop perfection, remove variable bound, and loop order optimization are
|
||||
// always applied for the convenience of polyhedral optimizations.
|
||||
for (auto band : targetBands) {
|
||||
applyAffineLoopPerfection(band.back(), builder);
|
||||
applyRemoveVariableBound(band.front(), builder);
|
||||
applyAffineLoopOrderOpt(band, builder);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
|
|
@ -40,11 +40,13 @@ bool scalehls::applySimplifyMemrefAccess(FuncOp func) {
|
|||
auto secondAccess = MemRefAccess(secondOp);
|
||||
auto secondIsRead = isa<AffineReadOpInterface>(secondOp);
|
||||
|
||||
// Check whether the two operations statically have the same access.
|
||||
if (firstAccess == secondAccess) {
|
||||
// If the two operations are at different loop levels, break.
|
||||
// TODO: memory access operation hoisting?
|
||||
auto sameLevelOps = checkSameLevel(firstOp, secondOp);
|
||||
|
||||
// Check whether the two operations statically have the same access
|
||||
// element while at the same level.
|
||||
if ((firstAccess == secondAccess) && sameLevelOps) {
|
||||
if (!sameLevelOps)
|
||||
break;
|
||||
|
||||
// If the second operation's access direction is different with the
|
||||
// first operation, the first operation is known not redundant.
|
||||
|
@ -80,9 +82,10 @@ bool scalehls::applySimplifyMemrefAccess(FuncOp func) {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
// Find possible dependencies.
|
||||
// Find possible dependencies. If dependency appears, the first is no
|
||||
// longer be able to be simplified.
|
||||
unsigned nsLoops = getNumCommonSurroundingLoops(*firstOp, *secondOp);
|
||||
bool dependencyFlag = false;
|
||||
bool foundDependence = false;
|
||||
|
||||
for (unsigned depth = 1; depth <= nsLoops + 1; ++depth) {
|
||||
FlatAffineConstraints dependenceConstraints;
|
||||
|
@ -94,19 +97,23 @@ bool scalehls::applySimplifyMemrefAccess(FuncOp func) {
|
|||
|
||||
// Only zero distance dependencies are considered here.
|
||||
if (hasDependence(result)) {
|
||||
int64_t distance = 0;
|
||||
for (auto dep : dependenceComponents)
|
||||
if (dep.lb)
|
||||
distance += std::abs(dep.lb.getValue());
|
||||
bool hasZeroDistance = true;
|
||||
|
||||
if (distance == 0) {
|
||||
dependencyFlag = true;
|
||||
for (auto dep : dependenceComponents)
|
||||
if (dep.lb.getValue() > 0 || dep.ub.getValue() < 0) {
|
||||
hasZeroDistance = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (hasZeroDistance) {
|
||||
foundDependence = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dependencyFlag)
|
||||
// If any dependence is found, break.
|
||||
if (foundDependence)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// RUN: scalehls-opt -qor-estimation="target-spec=../../config/target-spec.ini" %s | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: func @test_qor_estimation
|
||||
// CHECK-LABEL: func @qor_estimation
|
||||
func @qor_estimation() {
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue