[QoREstimation] support profiling latency based estimation (#2)

This commit is contained in:
Hanchen Ye 2020-12-17 21:40:29 -06:00
parent 18e5f434bb
commit 5d854f3b74
3 changed files with 92 additions and 71 deletions

View File

@ -1,26 +1,15 @@
[spec] [specification]
frequency=200MHz frequency=100MHz
[200MHz] [100MHz]
op=2333 fadd=4.0
# define INT_ADD 0.5 fmul=3.0
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency. fdiv=15.0
# define IMULT 7.0 //actual 7.0 fcmp=1.0
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations. fselect=0.0
# define IDIV 36.0 //not chain
# define U_DIV 7.0 //actual 7.0 same with imul fadd_delay=7.25
# define UDIV 36.0 fmul_delay=5.7
# define FP_ADD 8.0 //not chain fdiv_delay=6.07
# define FP_MULT 5.0 //not chain fcmp_delay=6.4
# define FP_DIV 16.0 //not chain fselect_delay=0.69
# define SI_TO_FP 6.0
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
# define SHIFT 0.2
# define ALLOCA_LATENCY 1.0
# define GEP_LATENCY 1.0
# define CAST_LATENCY 0.4
# define PHI_LATENCY 1.5
# define ICMP_LATENCY 0.5
# define FCMP_LATENCY 8.0//0.5
# define SELECT_LATENCY 0.2
# define CALL_LATENCY 1.0

View File

@ -8,7 +8,6 @@
#include "Dialect/HLSCpp/Visitor.h" #include "Dialect/HLSCpp/Visitor.h"
#include "INIReader.h" #include "INIReader.h"
#include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/Liveness.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Pass/Pass.h" #include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/LoopUtils.h"
@ -88,25 +87,20 @@ public:
op->setAttr(name, builder.getStringAttr(value)); op->setAttr(name, builder.getStringAttr(value));
} }
/// Schedule attribute related methods. /// Set schedule attribute methods.
void setScheduleValue(Operation *op, unsigned begin, unsigned end) { void setScheduleValue(Operation *op, unsigned begin, unsigned end) {
setAttrValue(op, "schedule_begin", begin); setAttrValue(op, "schedule_begin", begin);
setAttrValue(op, "schedule_end", end); setAttrValue(op, "schedule_end", end);
} }
unsigned getLatencyValue(Operation *op) {
if (auto latency = getUIntAttrValue(op, "latency"))
return latency;
else
return getUIntAttrValue(op, "schedule_end") -
getUIntAttrValue(op, "schedule_begin");
}
}; };
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// HLSCppEstimator Class Declaration // HLSCppEstimator Class Declaration
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Profiled latency map.
using LatencyMap = llvm::StringMap<unsigned>;
// For storing all memory access operations (including AffineLoadOp and // For storing all memory access operations (including AffineLoadOp and
// AffineStoreOp) indexed by the array instance (ArrayOp). // AffineStoreOp) indexed by the array instance (ArrayOp).
using LoadStores = SmallVector<Operation *, 16>; using LoadStores = SmallVector<Operation *, 16>;
@ -138,8 +132,8 @@ class HLSCppEstimator
: public HLSCppVisitorBase<HLSCppEstimator, Optional<unsigned>, unsigned>, : public HLSCppVisitorBase<HLSCppEstimator, Optional<unsigned>, unsigned>,
public HLSCppToolBase { public HLSCppToolBase {
public: public:
explicit HLSCppEstimator(FuncOp &func) explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
: HLSCppToolBase(OpBuilder(func)), func(func), liveness(Liveness(func)) { : HLSCppToolBase(OpBuilder(func)), func(func), latencyMap(latencyMap) {
getFuncMemRefDepends(); getFuncMemRefDepends();
} }
@ -156,21 +150,34 @@ public:
Optional<unsigned> visitOp(AffineLoadOp op, unsigned begin); Optional<unsigned> visitOp(AffineLoadOp op, unsigned begin);
Optional<unsigned> visitOp(AffineStoreOp op, unsigned begin); Optional<unsigned> visitOp(AffineStoreOp op, unsigned begin);
unsigned getResMinII(AffineForOp forOp, LoadStoresMap &map); unsigned getOpMinII(AffineForOp forOp);
unsigned getResMinII(LoadStoresMap &map);
unsigned getDepMinII(AffineForOp forOp, LoadStoresMap &map); unsigned getDepMinII(AffineForOp forOp, LoadStoresMap &map);
Optional<unsigned> visitOp(AffineForOp op, unsigned begin); Optional<unsigned> visitOp(AffineForOp op, unsigned begin);
Optional<unsigned> visitOp(AffineIfOp op, unsigned begin); Optional<unsigned> visitOp(AffineIfOp op, unsigned begin);
Optional<unsigned> visitOp(ArrayOp op, unsigned begin); Optional<unsigned> visitOp(ArrayOp op, unsigned begin);
Optional<std::pair<unsigned, unsigned>> estimateBlock(Block &block, #define HANDLE(OPTYPE, KEYNAME) \
unsigned begin); Optional<unsigned> visitOp(OPTYPE op, unsigned begin) { \
auto end = begin + latencyMap[KEYNAME] + 1; \
setScheduleValue(op, begin, end); \
return end; \
}
HANDLE(AddFOp, "fadd");
HANDLE(MulFOp, "fmul");
HANDLE(DivFOp, "fdiv");
HANDLE(CmpFOp, "fcmp");
HANDLE(SelectOp, "fselect");
#undef HANDLE
Optional<unsigned> estimateBlock(Block &block, unsigned begin);
void estimateFunc(); void estimateFunc();
FuncOp &func; FuncOp &func;
Liveness liveness;
DependsMap dependsMap; DependsMap dependsMap;
PortsMapDict portsMapDict; PortsMapDict portsMapDict;
LatencyMap &latencyMap;
}; };
} // namespace scalehls } // namespace scalehls

View File

@ -324,9 +324,16 @@ unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin) {
begin++; begin++;
} }
// Memory load/store operation always consumes 1 clock cycle. // Memory load consumes 2 clock cyles, while other memory access including
setScheduleValue(op, begin, begin + 1); // store consumes 1 clock cycle.
return begin + 1; unsigned end = begin;
if (isa<AffineLoadOp>(op))
end += 2;
else
end++;
setScheduleValue(op, begin, end);
return end;
} }
Optional<unsigned> HLSCppEstimator::visitOp(AffineLoadOp op, unsigned begin) { Optional<unsigned> HLSCppEstimator::visitOp(AffineLoadOp op, unsigned begin) {
@ -341,8 +348,23 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineStoreOp op, unsigned begin) {
// AffineForOp Related Methods // AffineForOp Related Methods
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
unsigned HLSCppEstimator::getOpMinII(AffineForOp forOp) {
unsigned II = 1;
forOp.walk([&](Operation *op) {
unsigned minII = 0;
if (auto latency = getUIntAttrValue(op, "latency"))
minII = latency;
else
minII = getUIntAttrValue(op, "schedule_end") -
getUIntAttrValue(op, "schedule_begin");
II = max(II, minII);
});
return II;
}
/// Calculate the minimum resource II. /// Calculate the minimum resource II.
unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoresMap &map) { unsigned HLSCppEstimator::getResMinII(LoadStoresMap &map) {
unsigned II = 1; unsigned II = 1;
for (auto &pair : map) { for (auto &pair : map) {
@ -454,17 +476,13 @@ unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoresMap &map) {
auto dep = *it; auto dep = *it;
auto tripCount = getUIntAttrValue(dep.op, "trip_count"); auto tripCount = getUIntAttrValue(dep.op, "trip_count");
if (dep.ub) if (dep.lb)
distance += flattenTripCounts.back() * dep.ub.getValue();
else if (dep.lb)
distance += flattenTripCounts.back() * dep.lb.getValue(); distance += flattenTripCounts.back() * dep.lb.getValue();
else
distance += flattenTripCounts.back() * tripCount;
flattenTripCounts.push_back(flattenTripCounts.back() * tripCount); flattenTripCounts.push_back(flattenTripCounts.back() * tripCount);
} }
unsigned delay = getUIntAttrValue(srcOp, "schedule_end") - unsigned delay = getUIntAttrValue(srcOp, "schedule_begin") -
getUIntAttrValue(dstOp, "schedule_end"); getUIntAttrValue(dstOp, "schedule_end");
if (distance > 0) { if (distance > 0) {
@ -505,10 +523,9 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) {
} }
// Estimate the loop block. // Estimate the loop block.
if (auto schedule = estimateBlock(loopBlock, begin)) { if (auto schedule = estimateBlock(loopBlock, begin))
begin = max(begin, schedule.getValue().first); end = max(end, schedule.getValue());
end = max(end, schedule.getValue().second); else
} else
return Optional<unsigned>(); return Optional<unsigned>();
// If the current loop is annotated as pipeline, extra dependency and // If the current loop is annotated as pipeline, extra dependency and
@ -519,7 +536,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) {
setAttrValue(op, "iter_latency", iterLatency); setAttrValue(op, "iter_latency", iterLatency);
// Calculate initial interval. // Calculate initial interval.
auto II = max(getResMinII(op, map), getDepMinII(op, map)); auto II = max({getOpMinII(op), getResMinII(map), getDepMinII(op, map)});
setAttrValue(op, "init_interval", II); setAttrValue(op, "init_interval", II);
auto tripCount = getUIntAttrValue(op, "trip_count"); auto tripCount = getUIntAttrValue(op, "trip_count");
@ -579,7 +596,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
// Estimate then block. // Estimate then block.
if (auto schedule = estimateBlock(*thenBlock, begin)) if (auto schedule = estimateBlock(*thenBlock, begin))
end = max(end, schedule.getValue().second); end = max(end, schedule.getValue());
else else
return Optional<unsigned>(); return Optional<unsigned>();
@ -588,7 +605,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
auto elseBlock = op.getElseBlock(); auto elseBlock = op.getElseBlock();
if (auto schedule = estimateBlock(*elseBlock, begin)) if (auto schedule = estimateBlock(*elseBlock, begin))
end = max(end, schedule.getValue().second); end = max(end, schedule.getValue());
else else
return Optional<unsigned>(); return Optional<unsigned>();
} }
@ -620,10 +637,10 @@ Optional<unsigned> HLSCppEstimator::visitOp(ArrayOp op, unsigned begin) {
// Block Scheduler and Estimator // Block Scheduler and Estimator
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// Estimate the latency of a block with ASAP scheduling strategy, return a pair /// Estimate the latency of a block with ASAP scheduling strategy, return the
/// of schedule begin and schedule end. /// end level of schedule.
Optional<std::pair<unsigned, unsigned>> Optional<unsigned> HLSCppEstimator::estimateBlock(Block &block,
HLSCppEstimator::estimateBlock(Block &block, unsigned begin) { unsigned begin) {
unsigned blockBegin = begin; unsigned blockBegin = begin;
unsigned blockEnd = begin; unsigned blockEnd = begin;
@ -642,20 +659,19 @@ HLSCppEstimator::estimateBlock(Block &block, unsigned begin) {
if (auto scheduleEnd = dispatchVisitor(&op, opBegin)) if (auto scheduleEnd = dispatchVisitor(&op, opBegin))
opEnd = max(opEnd, scheduleEnd.getValue()); opEnd = max(opEnd, scheduleEnd.getValue());
else else
return Optional<std::pair<unsigned, unsigned>>(); return Optional<unsigned>();
// Update the block schedule begin and end. // Update the block schedule begin and end.
blockBegin = min(blockBegin, opBegin); blockBegin = min(blockBegin, opBegin);
blockEnd = max(blockEnd, opEnd); blockEnd = max(blockEnd, opEnd);
} }
return std::pair<unsigned, unsigned>(blockBegin, blockEnd); return blockEnd;
} }
void HLSCppEstimator::estimateFunc() { void HLSCppEstimator::estimateFunc() {
// Recursively estimate blocks in the function. // Recursively estimate blocks in the function.
if (auto schedule = estimateBlock(func.front(), 0)) if (auto schedule = estimateBlock(func.front(), 0))
setAttrValue(func, "latency", setAttrValue(func, "latency", schedule.getValue());
schedule.getValue().second - schedule.getValue().first);
else else
setAttrValue(func, "latency", -1); setAttrValue(func, "latency", -1);
} }
@ -664,6 +680,15 @@ void HLSCppEstimator::estimateFunc() {
// Entry of scalehls-opt // Entry of scalehls-opt
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
static void getLatencyMap(INIReader &spec, std::string freq,
LatencyMap &latencyMap) {
latencyMap["fadd"] = spec.GetInteger(freq, "fadd", 4);
latencyMap["fmul"] = spec.GetInteger(freq, "fmul", 3);
latencyMap["fdiv"] = spec.GetInteger(freq, "fdiv", 15);
latencyMap["fcmp"] = spec.GetInteger(freq, "fcmp", 1);
latencyMap["fselect"] = spec.GetInteger(freq, "fselect", 0);
}
namespace { namespace {
struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> { struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
void runOnOperation() override { void runOnOperation() override {
@ -673,14 +698,14 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
llvm::outs() << "error: target spec file parse fail, please refer to " llvm::outs() << "error: target spec file parse fail, please refer to "
"--help option and pass in correct file path\n"; "--help option and pass in correct file path\n";
// TODO: Support estimator initiation from profiling data, constructing a // Collect profiling latency data.
// unique data structure for holding latency and resource information. auto freq = spec.Get("specification", "frequency", "100MHz");
auto freq = spec.Get("spec", "frequency", "200MHz"); LatencyMap latencyMap;
auto latency = spec.GetInteger(freq, "op", 0); getLatencyMap(spec, freq, latencyMap);
// Estimate performance and resource utilization. // Estimate performance and resource utilization.
for (auto func : getOperation().getOps<FuncOp>()) { for (auto func : getOperation().getOps<FuncOp>()) {
HLSCppEstimator estimator(func); HLSCppEstimator estimator(func, latencyMap);
estimator.estimateFunc(); estimator.estimateFunc();
} }
} }