[QoREstimation] support profiling latency based estimation (#2)
This commit is contained in:
parent
18e5f434bb
commit
5d854f3b74
|
@ -1,26 +1,15 @@
|
|||
[spec]
|
||||
frequency=200MHz
|
||||
[specification]
|
||||
frequency=100MHz
|
||||
|
||||
[200MHz]
|
||||
op=2333
|
||||
# define INT_ADD 0.5
|
||||
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency.
|
||||
# define IMULT 7.0 //actual 7.0
|
||||
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations.
|
||||
# define IDIV 36.0 //not chain
|
||||
# define U_DIV 7.0 //actual 7.0 same with imul
|
||||
# define UDIV 36.0
|
||||
# define FP_ADD 8.0 //not chain
|
||||
# define FP_MULT 5.0 //not chain
|
||||
# define FP_DIV 16.0 //not chain
|
||||
# define SI_TO_FP 6.0
|
||||
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
|
||||
# define SHIFT 0.2
|
||||
# define ALLOCA_LATENCY 1.0
|
||||
# define GEP_LATENCY 1.0
|
||||
# define CAST_LATENCY 0.4
|
||||
# define PHI_LATENCY 1.5
|
||||
# define ICMP_LATENCY 0.5
|
||||
# define FCMP_LATENCY 8.0//0.5
|
||||
# define SELECT_LATENCY 0.2
|
||||
# define CALL_LATENCY 1.0
|
||||
[100MHz]
|
||||
fadd=4.0
|
||||
fmul=3.0
|
||||
fdiv=15.0
|
||||
fcmp=1.0
|
||||
fselect=0.0
|
||||
|
||||
fadd_delay=7.25
|
||||
fmul_delay=5.7
|
||||
fdiv_delay=6.07
|
||||
fcmp_delay=6.4
|
||||
fselect_delay=0.69
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include "Dialect/HLSCpp/Visitor.h"
|
||||
#include "INIReader.h"
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
#include "mlir/Analysis/Liveness.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "mlir/Transforms/LoopUtils.h"
|
||||
|
@ -88,25 +87,20 @@ public:
|
|||
op->setAttr(name, builder.getStringAttr(value));
|
||||
}
|
||||
|
||||
/// Schedule attribute related methods.
|
||||
/// Set schedule attribute methods.
|
||||
void setScheduleValue(Operation *op, unsigned begin, unsigned end) {
|
||||
setAttrValue(op, "schedule_begin", begin);
|
||||
setAttrValue(op, "schedule_end", end);
|
||||
}
|
||||
|
||||
unsigned getLatencyValue(Operation *op) {
|
||||
if (auto latency = getUIntAttrValue(op, "latency"))
|
||||
return latency;
|
||||
else
|
||||
return getUIntAttrValue(op, "schedule_end") -
|
||||
getUIntAttrValue(op, "schedule_begin");
|
||||
}
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HLSCppEstimator Class Declaration
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Profiled latency map.
|
||||
using LatencyMap = llvm::StringMap<unsigned>;
|
||||
|
||||
// For storing all memory access operations (including AffineLoadOp and
|
||||
// AffineStoreOp) indexed by the array instance (ArrayOp).
|
||||
using LoadStores = SmallVector<Operation *, 16>;
|
||||
|
@ -138,8 +132,8 @@ class HLSCppEstimator
|
|||
: public HLSCppVisitorBase<HLSCppEstimator, Optional<unsigned>, unsigned>,
|
||||
public HLSCppToolBase {
|
||||
public:
|
||||
explicit HLSCppEstimator(FuncOp &func)
|
||||
: HLSCppToolBase(OpBuilder(func)), func(func), liveness(Liveness(func)) {
|
||||
explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
|
||||
: HLSCppToolBase(OpBuilder(func)), func(func), latencyMap(latencyMap) {
|
||||
getFuncMemRefDepends();
|
||||
}
|
||||
|
||||
|
@ -156,21 +150,34 @@ public:
|
|||
Optional<unsigned> visitOp(AffineLoadOp op, unsigned begin);
|
||||
Optional<unsigned> visitOp(AffineStoreOp op, unsigned begin);
|
||||
|
||||
unsigned getResMinII(AffineForOp forOp, LoadStoresMap &map);
|
||||
unsigned getOpMinII(AffineForOp forOp);
|
||||
unsigned getResMinII(LoadStoresMap &map);
|
||||
unsigned getDepMinII(AffineForOp forOp, LoadStoresMap &map);
|
||||
Optional<unsigned> visitOp(AffineForOp op, unsigned begin);
|
||||
|
||||
Optional<unsigned> visitOp(AffineIfOp op, unsigned begin);
|
||||
Optional<unsigned> visitOp(ArrayOp op, unsigned begin);
|
||||
|
||||
Optional<std::pair<unsigned, unsigned>> estimateBlock(Block &block,
|
||||
unsigned begin);
|
||||
#define HANDLE(OPTYPE, KEYNAME) \
|
||||
Optional<unsigned> visitOp(OPTYPE op, unsigned begin) { \
|
||||
auto end = begin + latencyMap[KEYNAME] + 1; \
|
||||
setScheduleValue(op, begin, end); \
|
||||
return end; \
|
||||
}
|
||||
HANDLE(AddFOp, "fadd");
|
||||
HANDLE(MulFOp, "fmul");
|
||||
HANDLE(DivFOp, "fdiv");
|
||||
HANDLE(CmpFOp, "fcmp");
|
||||
HANDLE(SelectOp, "fselect");
|
||||
#undef HANDLE
|
||||
|
||||
Optional<unsigned> estimateBlock(Block &block, unsigned begin);
|
||||
void estimateFunc();
|
||||
|
||||
FuncOp &func;
|
||||
Liveness liveness;
|
||||
DependsMap dependsMap;
|
||||
PortsMapDict portsMapDict;
|
||||
LatencyMap &latencyMap;
|
||||
};
|
||||
|
||||
} // namespace scalehls
|
||||
|
|
|
@ -324,9 +324,16 @@ unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin) {
|
|||
begin++;
|
||||
}
|
||||
|
||||
// Memory load/store operation always consumes 1 clock cycle.
|
||||
setScheduleValue(op, begin, begin + 1);
|
||||
return begin + 1;
|
||||
// Memory load consumes 2 clock cyles, while other memory access including
|
||||
// store consumes 1 clock cycle.
|
||||
unsigned end = begin;
|
||||
if (isa<AffineLoadOp>(op))
|
||||
end += 2;
|
||||
else
|
||||
end++;
|
||||
|
||||
setScheduleValue(op, begin, end);
|
||||
return end;
|
||||
}
|
||||
|
||||
Optional<unsigned> HLSCppEstimator::visitOp(AffineLoadOp op, unsigned begin) {
|
||||
|
@ -341,8 +348,23 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineStoreOp op, unsigned begin) {
|
|||
// AffineForOp Related Methods
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
unsigned HLSCppEstimator::getOpMinII(AffineForOp forOp) {
|
||||
unsigned II = 1;
|
||||
forOp.walk([&](Operation *op) {
|
||||
unsigned minII = 0;
|
||||
if (auto latency = getUIntAttrValue(op, "latency"))
|
||||
minII = latency;
|
||||
else
|
||||
minII = getUIntAttrValue(op, "schedule_end") -
|
||||
getUIntAttrValue(op, "schedule_begin");
|
||||
|
||||
II = max(II, minII);
|
||||
});
|
||||
return II;
|
||||
}
|
||||
|
||||
/// Calculate the minimum resource II.
|
||||
unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoresMap &map) {
|
||||
unsigned HLSCppEstimator::getResMinII(LoadStoresMap &map) {
|
||||
unsigned II = 1;
|
||||
|
||||
for (auto &pair : map) {
|
||||
|
@ -454,17 +476,13 @@ unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoresMap &map) {
|
|||
auto dep = *it;
|
||||
auto tripCount = getUIntAttrValue(dep.op, "trip_count");
|
||||
|
||||
if (dep.ub)
|
||||
distance += flattenTripCounts.back() * dep.ub.getValue();
|
||||
else if (dep.lb)
|
||||
if (dep.lb)
|
||||
distance += flattenTripCounts.back() * dep.lb.getValue();
|
||||
else
|
||||
distance += flattenTripCounts.back() * tripCount;
|
||||
|
||||
flattenTripCounts.push_back(flattenTripCounts.back() * tripCount);
|
||||
}
|
||||
|
||||
unsigned delay = getUIntAttrValue(srcOp, "schedule_end") -
|
||||
unsigned delay = getUIntAttrValue(srcOp, "schedule_begin") -
|
||||
getUIntAttrValue(dstOp, "schedule_end");
|
||||
|
||||
if (distance > 0) {
|
||||
|
@ -505,10 +523,9 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) {
|
|||
}
|
||||
|
||||
// Estimate the loop block.
|
||||
if (auto schedule = estimateBlock(loopBlock, begin)) {
|
||||
begin = max(begin, schedule.getValue().first);
|
||||
end = max(end, schedule.getValue().second);
|
||||
} else
|
||||
if (auto schedule = estimateBlock(loopBlock, begin))
|
||||
end = max(end, schedule.getValue());
|
||||
else
|
||||
return Optional<unsigned>();
|
||||
|
||||
// If the current loop is annotated as pipeline, extra dependency and
|
||||
|
@ -519,7 +536,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) {
|
|||
setAttrValue(op, "iter_latency", iterLatency);
|
||||
|
||||
// Calculate initial interval.
|
||||
auto II = max(getResMinII(op, map), getDepMinII(op, map));
|
||||
auto II = max({getOpMinII(op), getResMinII(map), getDepMinII(op, map)});
|
||||
setAttrValue(op, "init_interval", II);
|
||||
|
||||
auto tripCount = getUIntAttrValue(op, "trip_count");
|
||||
|
@ -579,7 +596,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
|
|||
|
||||
// Estimate then block.
|
||||
if (auto schedule = estimateBlock(*thenBlock, begin))
|
||||
end = max(end, schedule.getValue().second);
|
||||
end = max(end, schedule.getValue());
|
||||
else
|
||||
return Optional<unsigned>();
|
||||
|
||||
|
@ -588,7 +605,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
|
|||
auto elseBlock = op.getElseBlock();
|
||||
|
||||
if (auto schedule = estimateBlock(*elseBlock, begin))
|
||||
end = max(end, schedule.getValue().second);
|
||||
end = max(end, schedule.getValue());
|
||||
else
|
||||
return Optional<unsigned>();
|
||||
}
|
||||
|
@ -620,10 +637,10 @@ Optional<unsigned> HLSCppEstimator::visitOp(ArrayOp op, unsigned begin) {
|
|||
// Block Scheduler and Estimator
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Estimate the latency of a block with ASAP scheduling strategy, return a pair
|
||||
/// of schedule begin and schedule end.
|
||||
Optional<std::pair<unsigned, unsigned>>
|
||||
HLSCppEstimator::estimateBlock(Block &block, unsigned begin) {
|
||||
/// Estimate the latency of a block with ASAP scheduling strategy, return the
|
||||
/// end level of schedule.
|
||||
Optional<unsigned> HLSCppEstimator::estimateBlock(Block &block,
|
||||
unsigned begin) {
|
||||
unsigned blockBegin = begin;
|
||||
unsigned blockEnd = begin;
|
||||
|
||||
|
@ -642,20 +659,19 @@ HLSCppEstimator::estimateBlock(Block &block, unsigned begin) {
|
|||
if (auto scheduleEnd = dispatchVisitor(&op, opBegin))
|
||||
opEnd = max(opEnd, scheduleEnd.getValue());
|
||||
else
|
||||
return Optional<std::pair<unsigned, unsigned>>();
|
||||
return Optional<unsigned>();
|
||||
|
||||
// Update the block schedule begin and end.
|
||||
blockBegin = min(blockBegin, opBegin);
|
||||
blockEnd = max(blockEnd, opEnd);
|
||||
}
|
||||
return std::pair<unsigned, unsigned>(blockBegin, blockEnd);
|
||||
return blockEnd;
|
||||
}
|
||||
|
||||
void HLSCppEstimator::estimateFunc() {
|
||||
// Recursively estimate blocks in the function.
|
||||
if (auto schedule = estimateBlock(func.front(), 0))
|
||||
setAttrValue(func, "latency",
|
||||
schedule.getValue().second - schedule.getValue().first);
|
||||
setAttrValue(func, "latency", schedule.getValue());
|
||||
else
|
||||
setAttrValue(func, "latency", -1);
|
||||
}
|
||||
|
@ -664,6 +680,15 @@ void HLSCppEstimator::estimateFunc() {
|
|||
// Entry of scalehls-opt
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static void getLatencyMap(INIReader &spec, std::string freq,
|
||||
LatencyMap &latencyMap) {
|
||||
latencyMap["fadd"] = spec.GetInteger(freq, "fadd", 4);
|
||||
latencyMap["fmul"] = spec.GetInteger(freq, "fmul", 3);
|
||||
latencyMap["fdiv"] = spec.GetInteger(freq, "fdiv", 15);
|
||||
latencyMap["fcmp"] = spec.GetInteger(freq, "fcmp", 1);
|
||||
latencyMap["fselect"] = spec.GetInteger(freq, "fselect", 0);
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
||||
void runOnOperation() override {
|
||||
|
@ -673,14 +698,14 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
|||
llvm::outs() << "error: target spec file parse fail, please refer to "
|
||||
"--help option and pass in correct file path\n";
|
||||
|
||||
// TODO: Support estimator initiation from profiling data, constructing a
|
||||
// unique data structure for holding latency and resource information.
|
||||
auto freq = spec.Get("spec", "frequency", "200MHz");
|
||||
auto latency = spec.GetInteger(freq, "op", 0);
|
||||
// Collect profiling latency data.
|
||||
auto freq = spec.Get("specification", "frequency", "100MHz");
|
||||
LatencyMap latencyMap;
|
||||
getLatencyMap(spec, freq, latencyMap);
|
||||
|
||||
// Estimate performance and resource utilization.
|
||||
for (auto func : getOperation().getOps<FuncOp>()) {
|
||||
HLSCppEstimator estimator(func);
|
||||
HLSCppEstimator estimator(func, latencyMap);
|
||||
estimator.estimateFunc();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue