From 93b5a2641e685d02a678cad4d393697a3275c79f Mon Sep 17 00:00:00 2001 From: Hanchen Ye Date: Mon, 14 Dec 2020 20:48:59 -0600 Subject: [PATCH] [QoREstimation] support dep-aware II calculation (#4) --- lib/Analysis/QoREstimation.cpp | 146 ++++++++++++++-------- lib/Transforms/ArrayPartition.cpp | 5 +- test/Analysis/QoREstimation/test_for.mlir | 12 +- 3 files changed, 104 insertions(+), 59 deletions(-) diff --git a/lib/Analysis/QoREstimation.cpp b/lib/Analysis/QoREstimation.cpp index 9937f22..38d3785 100644 --- a/lib/Analysis/QoREstimation.cpp +++ b/lib/Analysis/QoREstimation.cpp @@ -8,6 +8,7 @@ #include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/LoopAnalysis.h" +#include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/IR/Operation.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -36,31 +37,16 @@ HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath) llvm::outs() << latency << "\n"; } -static Value getMemRef(Operation *op) { - if (auto loadOp = dyn_cast(op)) - return loadOp.getMemRef(); - else if (auto storeOp = dyn_cast(op)) - return storeOp.getMemRef(); - else - return nullptr; -} - -static AffineMap getAffineMap(Operation *op) { - if (auto loadOp = dyn_cast(op)) - return loadOp.getAffineMap(); - else if (auto storeOp = dyn_cast(op)) - return storeOp.getAffineMap(); - else - return AffineMap(); -} - /// Collect memory access information of the block. void HLSCppEstimator::getBlockMemInfo(Block &block, LoadStoreDict &dict) { // Walk through all load/store operations in the current block. block.walk([&](Operation *op) { - if (auto memRef = getMemRef(op)) { - auto map = getAffineMap(op); - auto arrayOp = cast(getMemRef(op).getDefiningOp()); + if (isa(op)) { + auto memAccess = MemRefAccess(op); + auto arrayOp = cast(memAccess.memref.getDefiningOp()); + + AffineValueMap accessMap; + memAccess.getAccessMap(&accessMap); dict[arrayOp].push_back(op); @@ -69,7 +55,7 @@ void HLSCppEstimator::getBlockMemInfo(Block &block, LoadStoreDict &dict) { int32_t partitionIdx = 0; unsigned accumFactor = 1; unsigned dim = 0; - for (auto expr : map.getResults()) { + for (auto expr : accessMap.getAffineMap().getResults()) { auto idxExpr = getConstExpr(0); unsigned factor = 1; if (arrayOp.partition()) { @@ -94,7 +80,7 @@ void HLSCppEstimator::getBlockMemInfo(Block &block, LoadStoreDict &dict) { } accumFactor *= factor; - dim += 1; + dim++; } // Set partition index attribute. @@ -107,7 +93,8 @@ void HLSCppEstimator::getBlockMemInfo(Block &block, LoadStoreDict &dict) { /// limitation. This method will be called by getBlockSchedule method. unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin, MemPortDicts &dicts) { - auto arrayOp = cast(getMemRef(op).getDefiningOp()); + auto memAccess = MemRefAccess(op); + auto arrayOp = cast(memAccess.memref.getDefiningOp()); auto partitionIdx = getIntAttrValue(op, "partition_index"); auto partitionNum = getUIntAttrValue(arrayOp, "partition_num"); @@ -212,7 +199,7 @@ unsigned HLSCppEstimator::getBlockSchedule(Block &block) { // Handle load/store operations, ensure the current schedule meets memory // port limitation. - if (isa(op)) { + if (isa(op)) { begin = getLoadStoreSchedule(&op, begin, dicts); end = begin + 1; } @@ -237,22 +224,6 @@ unsigned HLSCppEstimator::getBlockSchedule(Block &block) { return blockEnd; } -static int32_t getDimId(Operation *op, Value value) { - int32_t dimId = -1; - if (auto loadOp = dyn_cast(op)) { - auto operand = std::find(loadOp.getMapOperands().begin(), - loadOp.getMapOperands().end(), value); - if (operand != loadOp.getMapOperands().end()) - dimId = operand.getIndex(); - } else if (auto storeOp = dyn_cast(op)) { - auto operand = std::find(storeOp.getMapOperands().begin(), - storeOp.getMapOperands().end(), value); - if (operand != storeOp.getMapOperands().end()) - dimId = operand.getIndex(); - } - return dimId; -} - /// Calculate the minimum resource II. unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoreDict dict) { unsigned II = 1; @@ -269,9 +240,9 @@ unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoreDict dict) { writeNum.push_back(0); } - auto LoadStore = pair.second; + auto loadStores = pair.second; - for (auto op : LoadStore) { + for (auto op : loadStores) { // Calculate resource-aware minimal II. auto partitionIdx = getIntAttrValue(op, "partition_index"); if (partitionIdx == -1) { @@ -294,9 +265,9 @@ unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoreDict dict) { writeNum[p] += accessNum; } } else if (isa(op)) - readNum[partitionIdx] += 1; + readNum[partitionIdx]++; else if (isa(op)) - writeNum[partitionIdx] += 1; + writeNum[partitionIdx]++; } unsigned minII = 1; @@ -320,7 +291,75 @@ unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoreDict dict) { /// Calculate the minimum dependency II. unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoreDict dict) { - return 0; + unsigned II = 1; + + // Collect start and end level of the pipeline. + unsigned endLevel = 1; + unsigned startLevel = 1; + auto currentLoop = forOp; + while (true) { + if (auto outerLoop = dyn_cast(currentLoop.getParentOp())) { + currentLoop = outerLoop; + endLevel++; + if (!getBoolAttrValue(outerLoop, "flatten")) + startLevel++; + } else + break; + } + + for (auto &pair : dict) { + auto loadStores = pair.second; + + // Walk through each pair of source and destination, and each loop level + // that are pipelined. + for (auto loopDepth = startLevel; loopDepth <= endLevel; ++loopDepth) { + unsigned dstIndex = 1; + for (auto dstOp : loadStores) { + MemRefAccess dstAccess(dstOp); + + for (auto srcOp : llvm::drop_begin(loadStores, dstIndex)) { + MemRefAccess srcAccess(srcOp); + + FlatAffineConstraints depConstrs; + SmallVector depComps; + + DependenceResult result = checkMemrefAccessDependence( + srcAccess, dstAccess, loopDepth, &depConstrs, &depComps); + + if (hasDependence(result)) { + SmallVector flattenTripCounts; + flattenTripCounts.push_back(1); + unsigned distance = 0; + + // Calculate the distance of this dependency. + for (auto it = depComps.rbegin(); it < depComps.rend(); ++it) { + auto dep = *it; + auto tripCount = getUIntAttrValue(dep.op, "trip_count"); + + if (dep.ub) + distance += flattenTripCounts.back() * dep.ub.getValue(); + else if (dep.lb) + distance += flattenTripCounts.back() * dep.lb.getValue(); + else + distance += flattenTripCounts.back() * tripCount; + + flattenTripCounts.push_back(flattenTripCounts.back() * tripCount); + } + + unsigned delay = getUIntAttrValue(srcOp, "schedule_begin") - + getUIntAttrValue(dstOp, "schedule_begin"); + + if (distance != 0) { + unsigned minII = ceil((float)delay / distance); + II = max(II, minII); + } + } + } + dstIndex++; + } + } + } + return II; } bool HLSCppEstimator::visitOp(AffineForOp op) { @@ -339,8 +378,7 @@ bool HLSCppEstimator::visitOp(AffineForOp op) { setAttrValue(op, "iter_latency", iterLatency); // Calculate initial interval. - auto II = getResMinII(op, dict); - // II = min(II, getDepMinII()); + auto II = max(getResMinII(op, dict), getDepMinII(op, dict)); setAttrValue(op, "init_interval", II); auto tripCount = getUIntAttrValue(op, "trip_count"); @@ -427,8 +465,12 @@ void HLSCppEstimator::estimateFunc(FuncOp func) { // Set an attribute indicating the trip count. For now, we assume all // loops have static loop bound. - unsigned tripCount = getConstantTripCount(op).getValue(); - setAttrValue(op, "trip_count", tripCount); + if (auto tripCount = getConstantTripCount(op)) + setAttrValue(op, "trip_count", (unsigned)tripCount.getValue()); + else { + setAttrValue(op, "trip_count", (unsigned)0); + op.emitError("has variable trip count"); + } // Set attributes indicating this loop can be flatten or not. unsigned opNum = 0; @@ -437,9 +479,9 @@ void HLSCppEstimator::estimateFunc(FuncOp func) { for (auto &bodyOp : body.front()) { if (!isa(bodyOp)) - opNum += 1; + opNum++; if (isa(bodyOp)) { - forNum += 1; + forNum++; innerFlatten = getBoolAttrValue(&bodyOp, "flatten"); } } diff --git a/lib/Transforms/ArrayPartition.cpp b/lib/Transforms/ArrayPartition.cpp index 6a496f7..6c285d0 100644 --- a/lib/Transforms/ArrayPartition.cpp +++ b/lib/Transforms/ArrayPartition.cpp @@ -27,7 +27,10 @@ static mlir::AffineForOp getPipelineLoop(mlir::AffineForOp root) { nestedLoops.push_back(loop); } }); - return nestedLoops.back(); + if (nestedLoops.empty()) + return nullptr; + else + return nestedLoops.back(); } template diff --git a/test/Analysis/QoREstimation/test_for.mlir b/test/Analysis/QoREstimation/test_for.mlir index 7ac1996..2ec864a 100644 --- a/test/Analysis/QoREstimation/test_for.mlir +++ b/test/Analysis/QoREstimation/test_for.mlir @@ -1,17 +1,17 @@ -// RUN: scalehls-opt -qor-estimation %s | FileCheck %s +// RUN: scalehls-opt -loop-pipelining="pipeline-level=1" -array-partition -qor-estimation %s | FileCheck %s // CHECK-LABEL: func @test_for func @test_for(%arg0: memref<16x4x4xindex>, %arg1: memref<16x4x4xindex>) attributes {dataflow = false} { - %array0 = "hlscpp.array"(%arg0) {interface=true, storage=false, partition=true, partition_type=["cyclic", "cyclic", "cyclic"], partition_factor=[1 : ui32, 1 : ui32, 4 : ui32], storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex> - %array1 = "hlscpp.array"(%arg1) {interface=true, storage=false, partition=true, partition_type=["cyclic", "cyclic", "cyclic"], partition_factor=[1 : ui32, 1 : ui32, 4 : ui32], storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex> + %array0 = "hlscpp.array"(%arg0) {interface=true, storage=false, partition=false, storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex> + %array1 = "hlscpp.array"(%arg1) {interface=true, storage=false, partition=false, storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex> affine.for %i = 0 to 16 { affine.for %j = 0 to 4 { affine.for %k = 0 to 4 { %0 = affine.load %array0[%i, %j, %k] : memref<16x4x4xindex> %1 = affine.load %array1[%i, %j, %k] : memref<16x4x4xindex> - %2 = addi %0, %1 : index - affine.store %2, %array1[%i, %j, %k + 2] : memref<16x4x4xindex> - } {pipeline = true, unroll = false, flatten = false} + %2 = addi %1, %1 : index + affine.store %2, %array1[%i, %j+1, %k] : memref<16x4x4xindex> + } {pipeline = false, unroll = false, flatten = false} } {pipeline = false, unroll = false, flatten = false} } {pipeline = false, unroll = false, flatten = false} return