[QoREstimation] support dep-aware II calculation (#4)

This commit is contained in:
Hanchen Ye 2020-12-14 20:48:59 -06:00
parent 2307b0141f
commit 93b5a2641e
3 changed files with 104 additions and 59 deletions

View File

@ -8,6 +8,7 @@
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@ -36,31 +37,16 @@ HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath)
llvm::outs() << latency << "\n";
}
static Value getMemRef(Operation *op) {
if (auto loadOp = dyn_cast<mlir::AffineReadOpInterface>(op))
return loadOp.getMemRef();
else if (auto storeOp = dyn_cast<mlir::AffineWriteOpInterface>(op))
return storeOp.getMemRef();
else
return nullptr;
}
static AffineMap getAffineMap(Operation *op) {
if (auto loadOp = dyn_cast<mlir::AffineReadOpInterface>(op))
return loadOp.getAffineMap();
else if (auto storeOp = dyn_cast<mlir::AffineWriteOpInterface>(op))
return storeOp.getAffineMap();
else
return AffineMap();
}
/// Collect memory access information of the block.
void HLSCppEstimator::getBlockMemInfo(Block &block, LoadStoreDict &dict) {
// Walk through all load/store operations in the current block.
block.walk([&](Operation *op) {
if (auto memRef = getMemRef(op)) {
auto map = getAffineMap(op);
auto arrayOp = cast<ArrayOp>(getMemRef(op).getDefiningOp());
if (isa<mlir::AffineReadOpInterface, mlir::AffineWriteOpInterface>(op)) {
auto memAccess = MemRefAccess(op);
auto arrayOp = cast<ArrayOp>(memAccess.memref.getDefiningOp());
AffineValueMap accessMap;
memAccess.getAccessMap(&accessMap);
dict[arrayOp].push_back(op);
@ -69,7 +55,7 @@ void HLSCppEstimator::getBlockMemInfo(Block &block, LoadStoreDict &dict) {
int32_t partitionIdx = 0;
unsigned accumFactor = 1;
unsigned dim = 0;
for (auto expr : map.getResults()) {
for (auto expr : accessMap.getAffineMap().getResults()) {
auto idxExpr = getConstExpr(0);
unsigned factor = 1;
if (arrayOp.partition()) {
@ -94,7 +80,7 @@ void HLSCppEstimator::getBlockMemInfo(Block &block, LoadStoreDict &dict) {
}
accumFactor *= factor;
dim += 1;
dim++;
}
// Set partition index attribute.
@ -107,7 +93,8 @@ void HLSCppEstimator::getBlockMemInfo(Block &block, LoadStoreDict &dict) {
/// limitation. This method will be called by getBlockSchedule method.
unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin,
MemPortDicts &dicts) {
auto arrayOp = cast<ArrayOp>(getMemRef(op).getDefiningOp());
auto memAccess = MemRefAccess(op);
auto arrayOp = cast<ArrayOp>(memAccess.memref.getDefiningOp());
auto partitionIdx = getIntAttrValue(op, "partition_index");
auto partitionNum = getUIntAttrValue(arrayOp, "partition_num");
@ -212,7 +199,7 @@ unsigned HLSCppEstimator::getBlockSchedule(Block &block) {
// Handle load/store operations, ensure the current schedule meets memory
// port limitation.
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
if (isa<mlir::AffineReadOpInterface, mlir::AffineWriteOpInterface>(op)) {
begin = getLoadStoreSchedule(&op, begin, dicts);
end = begin + 1;
}
@ -237,22 +224,6 @@ unsigned HLSCppEstimator::getBlockSchedule(Block &block) {
return blockEnd;
}
static int32_t getDimId(Operation *op, Value value) {
int32_t dimId = -1;
if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
auto operand = std::find(loadOp.getMapOperands().begin(),
loadOp.getMapOperands().end(), value);
if (operand != loadOp.getMapOperands().end())
dimId = operand.getIndex();
} else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
auto operand = std::find(storeOp.getMapOperands().begin(),
storeOp.getMapOperands().end(), value);
if (operand != storeOp.getMapOperands().end())
dimId = operand.getIndex();
}
return dimId;
}
/// Calculate the minimum resource II.
unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoreDict dict) {
unsigned II = 1;
@ -269,9 +240,9 @@ unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoreDict dict) {
writeNum.push_back(0);
}
auto LoadStore = pair.second;
auto loadStores = pair.second;
for (auto op : LoadStore) {
for (auto op : loadStores) {
// Calculate resource-aware minimal II.
auto partitionIdx = getIntAttrValue(op, "partition_index");
if (partitionIdx == -1) {
@ -294,9 +265,9 @@ unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoreDict dict) {
writeNum[p] += accessNum;
}
} else if (isa<AffineLoadOp>(op))
readNum[partitionIdx] += 1;
readNum[partitionIdx]++;
else if (isa<AffineStoreOp>(op))
writeNum[partitionIdx] += 1;
writeNum[partitionIdx]++;
}
unsigned minII = 1;
@ -320,7 +291,75 @@ unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoreDict dict) {
/// Calculate the minimum dependency II.
unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoreDict dict) {
return 0;
unsigned II = 1;
// Collect start and end level of the pipeline.
unsigned endLevel = 1;
unsigned startLevel = 1;
auto currentLoop = forOp;
while (true) {
if (auto outerLoop = dyn_cast<AffineForOp>(currentLoop.getParentOp())) {
currentLoop = outerLoop;
endLevel++;
if (!getBoolAttrValue(outerLoop, "flatten"))
startLevel++;
} else
break;
}
for (auto &pair : dict) {
auto loadStores = pair.second;
// Walk through each pair of source and destination, and each loop level
// that are pipelined.
for (auto loopDepth = startLevel; loopDepth <= endLevel; ++loopDepth) {
unsigned dstIndex = 1;
for (auto dstOp : loadStores) {
MemRefAccess dstAccess(dstOp);
for (auto srcOp : llvm::drop_begin(loadStores, dstIndex)) {
MemRefAccess srcAccess(srcOp);
FlatAffineConstraints depConstrs;
SmallVector<DependenceComponent, 2> depComps;
DependenceResult result = checkMemrefAccessDependence(
srcAccess, dstAccess, loopDepth, &depConstrs, &depComps);
if (hasDependence(result)) {
SmallVector<unsigned, 2> flattenTripCounts;
flattenTripCounts.push_back(1);
unsigned distance = 0;
// Calculate the distance of this dependency.
for (auto it = depComps.rbegin(); it < depComps.rend(); ++it) {
auto dep = *it;
auto tripCount = getUIntAttrValue(dep.op, "trip_count");
if (dep.ub)
distance += flattenTripCounts.back() * dep.ub.getValue();
else if (dep.lb)
distance += flattenTripCounts.back() * dep.lb.getValue();
else
distance += flattenTripCounts.back() * tripCount;
flattenTripCounts.push_back(flattenTripCounts.back() * tripCount);
}
unsigned delay = getUIntAttrValue(srcOp, "schedule_begin") -
getUIntAttrValue(dstOp, "schedule_begin");
if (distance != 0) {
unsigned minII = ceil((float)delay / distance);
II = max(II, minII);
}
}
}
dstIndex++;
}
}
}
return II;
}
bool HLSCppEstimator::visitOp(AffineForOp op) {
@ -339,8 +378,7 @@ bool HLSCppEstimator::visitOp(AffineForOp op) {
setAttrValue(op, "iter_latency", iterLatency);
// Calculate initial interval.
auto II = getResMinII(op, dict);
// II = min(II, getDepMinII());
auto II = max(getResMinII(op, dict), getDepMinII(op, dict));
setAttrValue(op, "init_interval", II);
auto tripCount = getUIntAttrValue(op, "trip_count");
@ -427,8 +465,12 @@ void HLSCppEstimator::estimateFunc(FuncOp func) {
// Set an attribute indicating the trip count. For now, we assume all
// loops have static loop bound.
unsigned tripCount = getConstantTripCount(op).getValue();
setAttrValue(op, "trip_count", tripCount);
if (auto tripCount = getConstantTripCount(op))
setAttrValue(op, "trip_count", (unsigned)tripCount.getValue());
else {
setAttrValue(op, "trip_count", (unsigned)0);
op.emitError("has variable trip count");
}
// Set attributes indicating this loop can be flatten or not.
unsigned opNum = 0;
@ -437,9 +479,9 @@ void HLSCppEstimator::estimateFunc(FuncOp func) {
for (auto &bodyOp : body.front()) {
if (!isa<AffineYieldOp>(bodyOp))
opNum += 1;
opNum++;
if (isa<AffineForOp>(bodyOp)) {
forNum += 1;
forNum++;
innerFlatten = getBoolAttrValue(&bodyOp, "flatten");
}
}

View File

@ -27,7 +27,10 @@ static mlir::AffineForOp getPipelineLoop(mlir::AffineForOp root) {
nestedLoops.push_back(loop);
}
});
return nestedLoops.back();
if (nestedLoops.empty())
return nullptr;
else
return nestedLoops.back();
}
template <typename OpType>

View File

@ -1,17 +1,17 @@
// RUN: scalehls-opt -qor-estimation %s | FileCheck %s
// RUN: scalehls-opt -loop-pipelining="pipeline-level=1" -array-partition -qor-estimation %s | FileCheck %s
// CHECK-LABEL: func @test_for
func @test_for(%arg0: memref<16x4x4xindex>, %arg1: memref<16x4x4xindex>) attributes {dataflow = false} {
%array0 = "hlscpp.array"(%arg0) {interface=true, storage=false, partition=true, partition_type=["cyclic", "cyclic", "cyclic"], partition_factor=[1 : ui32, 1 : ui32, 4 : ui32], storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex>
%array1 = "hlscpp.array"(%arg1) {interface=true, storage=false, partition=true, partition_type=["cyclic", "cyclic", "cyclic"], partition_factor=[1 : ui32, 1 : ui32, 4 : ui32], storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex>
%array0 = "hlscpp.array"(%arg0) {interface=true, storage=false, partition=false, storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex>
%array1 = "hlscpp.array"(%arg1) {interface=true, storage=false, partition=false, storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex>
affine.for %i = 0 to 16 {
affine.for %j = 0 to 4 {
affine.for %k = 0 to 4 {
%0 = affine.load %array0[%i, %j, %k] : memref<16x4x4xindex>
%1 = affine.load %array1[%i, %j, %k] : memref<16x4x4xindex>
%2 = addi %0, %1 : index
affine.store %2, %array1[%i, %j, %k + 2] : memref<16x4x4xindex>
} {pipeline = true, unroll = false, flatten = false}
%2 = addi %1, %1 : index
affine.store %2, %array1[%i, %j+1, %k] : memref<16x4x4xindex>
} {pipeline = false, unroll = false, flatten = false}
} {pipeline = false, unroll = false, flatten = false}
} {pipeline = false, unroll = false, flatten = false}
return