From 5d854f3b74b2803afd34bde211ee85c4a2465303 Mon Sep 17 00:00:00 2001 From: Hanchen Ye Date: Thu, 17 Dec 2020 21:40:29 -0600 Subject: [PATCH] [QoREstimation] support profiling latency based estimation (#2) --- config/target-spec.ini | 39 ++++++--------- include/Analysis/QoREstimation.h | 39 +++++++++------ lib/Analysis/QoREstimation.cpp | 85 +++++++++++++++++++++----------- 3 files changed, 92 insertions(+), 71 deletions(-) diff --git a/config/target-spec.ini b/config/target-spec.ini index 60d0b7b..0e29e17 100644 --- a/config/target-spec.ini +++ b/config/target-spec.ini @@ -1,26 +1,15 @@ -[spec] -frequency=200MHz +[specification] +frequency=100MHz -[200MHz] -op=2333 -# define INT_ADD 0.5 -# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency. -# define IMULT 7.0 //actual 7.0 -# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations. -# define IDIV 36.0 //not chain -# define U_DIV 7.0 //actual 7.0 same with imul -# define UDIV 36.0 -# define FP_ADD 8.0 //not chain -# define FP_MULT 5.0 //not chain -# define FP_DIV 16.0 //not chain -# define SI_TO_FP 6.0 -# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5. -# define SHIFT 0.2 -# define ALLOCA_LATENCY 1.0 -# define GEP_LATENCY 1.0 -# define CAST_LATENCY 0.4 -# define PHI_LATENCY 1.5 -# define ICMP_LATENCY 0.5 -# define FCMP_LATENCY 8.0//0.5 -# define SELECT_LATENCY 0.2 -# define CALL_LATENCY 1.0 +[100MHz] +fadd=4.0 +fmul=3.0 +fdiv=15.0 +fcmp=1.0 +fselect=0.0 + +fadd_delay=7.25 +fmul_delay=5.7 +fdiv_delay=6.07 +fcmp_delay=6.4 +fselect_delay=0.69 diff --git a/include/Analysis/QoREstimation.h b/include/Analysis/QoREstimation.h index ffa2e7f..dc062b0 100644 --- a/include/Analysis/QoREstimation.h +++ b/include/Analysis/QoREstimation.h @@ -8,7 +8,6 @@ #include "Dialect/HLSCpp/Visitor.h" #include "INIReader.h" #include "mlir/Analysis/AffineAnalysis.h" -#include "mlir/Analysis/Liveness.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/LoopUtils.h" @@ -88,25 +87,20 @@ public: op->setAttr(name, builder.getStringAttr(value)); } - /// Schedule attribute related methods. + /// Set schedule attribute methods. void setScheduleValue(Operation *op, unsigned begin, unsigned end) { setAttrValue(op, "schedule_begin", begin); setAttrValue(op, "schedule_end", end); } - - unsigned getLatencyValue(Operation *op) { - if (auto latency = getUIntAttrValue(op, "latency")) - return latency; - else - return getUIntAttrValue(op, "schedule_end") - - getUIntAttrValue(op, "schedule_begin"); - } }; //===----------------------------------------------------------------------===// // HLSCppEstimator Class Declaration //===----------------------------------------------------------------------===// +// Profiled latency map. +using LatencyMap = llvm::StringMap; + // For storing all memory access operations (including AffineLoadOp and // AffineStoreOp) indexed by the array instance (ArrayOp). using LoadStores = SmallVector; @@ -138,8 +132,8 @@ class HLSCppEstimator : public HLSCppVisitorBase, unsigned>, public HLSCppToolBase { public: - explicit HLSCppEstimator(FuncOp &func) - : HLSCppToolBase(OpBuilder(func)), func(func), liveness(Liveness(func)) { + explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap) + : HLSCppToolBase(OpBuilder(func)), func(func), latencyMap(latencyMap) { getFuncMemRefDepends(); } @@ -156,21 +150,34 @@ public: Optional visitOp(AffineLoadOp op, unsigned begin); Optional visitOp(AffineStoreOp op, unsigned begin); - unsigned getResMinII(AffineForOp forOp, LoadStoresMap &map); + unsigned getOpMinII(AffineForOp forOp); + unsigned getResMinII(LoadStoresMap &map); unsigned getDepMinII(AffineForOp forOp, LoadStoresMap &map); Optional visitOp(AffineForOp op, unsigned begin); Optional visitOp(AffineIfOp op, unsigned begin); Optional visitOp(ArrayOp op, unsigned begin); - Optional> estimateBlock(Block &block, - unsigned begin); +#define HANDLE(OPTYPE, KEYNAME) \ + Optional visitOp(OPTYPE op, unsigned begin) { \ + auto end = begin + latencyMap[KEYNAME] + 1; \ + setScheduleValue(op, begin, end); \ + return end; \ + } + HANDLE(AddFOp, "fadd"); + HANDLE(MulFOp, "fmul"); + HANDLE(DivFOp, "fdiv"); + HANDLE(CmpFOp, "fcmp"); + HANDLE(SelectOp, "fselect"); +#undef HANDLE + + Optional estimateBlock(Block &block, unsigned begin); void estimateFunc(); FuncOp &func; - Liveness liveness; DependsMap dependsMap; PortsMapDict portsMapDict; + LatencyMap &latencyMap; }; } // namespace scalehls diff --git a/lib/Analysis/QoREstimation.cpp b/lib/Analysis/QoREstimation.cpp index 0582762..5ca57fc 100644 --- a/lib/Analysis/QoREstimation.cpp +++ b/lib/Analysis/QoREstimation.cpp @@ -324,9 +324,16 @@ unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin) { begin++; } - // Memory load/store operation always consumes 1 clock cycle. - setScheduleValue(op, begin, begin + 1); - return begin + 1; + // Memory load consumes 2 clock cyles, while other memory access including + // store consumes 1 clock cycle. + unsigned end = begin; + if (isa(op)) + end += 2; + else + end++; + + setScheduleValue(op, begin, end); + return end; } Optional HLSCppEstimator::visitOp(AffineLoadOp op, unsigned begin) { @@ -341,8 +348,23 @@ Optional HLSCppEstimator::visitOp(AffineStoreOp op, unsigned begin) { // AffineForOp Related Methods //===----------------------------------------------------------------------===// +unsigned HLSCppEstimator::getOpMinII(AffineForOp forOp) { + unsigned II = 1; + forOp.walk([&](Operation *op) { + unsigned minII = 0; + if (auto latency = getUIntAttrValue(op, "latency")) + minII = latency; + else + minII = getUIntAttrValue(op, "schedule_end") - + getUIntAttrValue(op, "schedule_begin"); + + II = max(II, minII); + }); + return II; +} + /// Calculate the minimum resource II. -unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoresMap &map) { +unsigned HLSCppEstimator::getResMinII(LoadStoresMap &map) { unsigned II = 1; for (auto &pair : map) { @@ -454,17 +476,13 @@ unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoresMap &map) { auto dep = *it; auto tripCount = getUIntAttrValue(dep.op, "trip_count"); - if (dep.ub) - distance += flattenTripCounts.back() * dep.ub.getValue(); - else if (dep.lb) + if (dep.lb) distance += flattenTripCounts.back() * dep.lb.getValue(); - else - distance += flattenTripCounts.back() * tripCount; flattenTripCounts.push_back(flattenTripCounts.back() * tripCount); } - unsigned delay = getUIntAttrValue(srcOp, "schedule_end") - + unsigned delay = getUIntAttrValue(srcOp, "schedule_begin") - getUIntAttrValue(dstOp, "schedule_end"); if (distance > 0) { @@ -505,10 +523,9 @@ Optional HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) { } // Estimate the loop block. - if (auto schedule = estimateBlock(loopBlock, begin)) { - begin = max(begin, schedule.getValue().first); - end = max(end, schedule.getValue().second); - } else + if (auto schedule = estimateBlock(loopBlock, begin)) + end = max(end, schedule.getValue()); + else return Optional(); // If the current loop is annotated as pipeline, extra dependency and @@ -519,7 +536,7 @@ Optional HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) { setAttrValue(op, "iter_latency", iterLatency); // Calculate initial interval. - auto II = max(getResMinII(op, map), getDepMinII(op, map)); + auto II = max({getOpMinII(op), getResMinII(map), getDepMinII(op, map)}); setAttrValue(op, "init_interval", II); auto tripCount = getUIntAttrValue(op, "trip_count"); @@ -579,7 +596,7 @@ Optional HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) { // Estimate then block. if (auto schedule = estimateBlock(*thenBlock, begin)) - end = max(end, schedule.getValue().second); + end = max(end, schedule.getValue()); else return Optional(); @@ -588,7 +605,7 @@ Optional HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) { auto elseBlock = op.getElseBlock(); if (auto schedule = estimateBlock(*elseBlock, begin)) - end = max(end, schedule.getValue().second); + end = max(end, schedule.getValue()); else return Optional(); } @@ -620,10 +637,10 @@ Optional HLSCppEstimator::visitOp(ArrayOp op, unsigned begin) { // Block Scheduler and Estimator //===----------------------------------------------------------------------===// -/// Estimate the latency of a block with ASAP scheduling strategy, return a pair -/// of schedule begin and schedule end. -Optional> -HLSCppEstimator::estimateBlock(Block &block, unsigned begin) { +/// Estimate the latency of a block with ASAP scheduling strategy, return the +/// end level of schedule. +Optional HLSCppEstimator::estimateBlock(Block &block, + unsigned begin) { unsigned blockBegin = begin; unsigned blockEnd = begin; @@ -642,20 +659,19 @@ HLSCppEstimator::estimateBlock(Block &block, unsigned begin) { if (auto scheduleEnd = dispatchVisitor(&op, opBegin)) opEnd = max(opEnd, scheduleEnd.getValue()); else - return Optional>(); + return Optional(); // Update the block schedule begin and end. blockBegin = min(blockBegin, opBegin); blockEnd = max(blockEnd, opEnd); } - return std::pair(blockBegin, blockEnd); + return blockEnd; } void HLSCppEstimator::estimateFunc() { // Recursively estimate blocks in the function. if (auto schedule = estimateBlock(func.front(), 0)) - setAttrValue(func, "latency", - schedule.getValue().second - schedule.getValue().first); + setAttrValue(func, "latency", schedule.getValue()); else setAttrValue(func, "latency", -1); } @@ -664,6 +680,15 @@ void HLSCppEstimator::estimateFunc() { // Entry of scalehls-opt //===----------------------------------------------------------------------===// +static void getLatencyMap(INIReader &spec, std::string freq, + LatencyMap &latencyMap) { + latencyMap["fadd"] = spec.GetInteger(freq, "fadd", 4); + latencyMap["fmul"] = spec.GetInteger(freq, "fmul", 3); + latencyMap["fdiv"] = spec.GetInteger(freq, "fdiv", 15); + latencyMap["fcmp"] = spec.GetInteger(freq, "fcmp", 1); + latencyMap["fselect"] = spec.GetInteger(freq, "fselect", 0); +} + namespace { struct QoREstimation : public scalehls::QoREstimationBase { void runOnOperation() override { @@ -673,14 +698,14 @@ struct QoREstimation : public scalehls::QoREstimationBase { llvm::outs() << "error: target spec file parse fail, please refer to " "--help option and pass in correct file path\n"; - // TODO: Support estimator initiation from profiling data, constructing a - // unique data structure for holding latency and resource information. - auto freq = spec.Get("spec", "frequency", "200MHz"); - auto latency = spec.GetInteger(freq, "op", 0); + // Collect profiling latency data. + auto freq = spec.Get("specification", "frequency", "100MHz"); + LatencyMap latencyMap; + getLatencyMap(spec, freq, latencyMap); // Estimate performance and resource utilization. for (auto func : getOperation().getOps()) { - HLSCppEstimator estimator(func); + HLSCppEstimator estimator(func, latencyMap); estimator.estimateFunc(); } }