[QoREstimation] support profiling latency based estimation (#2)
This commit is contained in:
parent
18e5f434bb
commit
5d854f3b74
|
@ -1,26 +1,15 @@
|
||||||
[spec]
|
[specification]
|
||||||
frequency=200MHz
|
frequency=100MHz
|
||||||
|
|
||||||
[200MHz]
|
[100MHz]
|
||||||
op=2333
|
fadd=4.0
|
||||||
# define INT_ADD 0.5
|
fmul=3.0
|
||||||
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency.
|
fdiv=15.0
|
||||||
# define IMULT 7.0 //actual 7.0
|
fcmp=1.0
|
||||||
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations.
|
fselect=0.0
|
||||||
# define IDIV 36.0 //not chain
|
|
||||||
# define U_DIV 7.0 //actual 7.0 same with imul
|
fadd_delay=7.25
|
||||||
# define UDIV 36.0
|
fmul_delay=5.7
|
||||||
# define FP_ADD 8.0 //not chain
|
fdiv_delay=6.07
|
||||||
# define FP_MULT 5.0 //not chain
|
fcmp_delay=6.4
|
||||||
# define FP_DIV 16.0 //not chain
|
fselect_delay=0.69
|
||||||
# define SI_TO_FP 6.0
|
|
||||||
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
|
|
||||||
# define SHIFT 0.2
|
|
||||||
# define ALLOCA_LATENCY 1.0
|
|
||||||
# define GEP_LATENCY 1.0
|
|
||||||
# define CAST_LATENCY 0.4
|
|
||||||
# define PHI_LATENCY 1.5
|
|
||||||
# define ICMP_LATENCY 0.5
|
|
||||||
# define FCMP_LATENCY 8.0//0.5
|
|
||||||
# define SELECT_LATENCY 0.2
|
|
||||||
# define CALL_LATENCY 1.0
|
|
||||||
|
|
|
@ -8,7 +8,6 @@
|
||||||
#include "Dialect/HLSCpp/Visitor.h"
|
#include "Dialect/HLSCpp/Visitor.h"
|
||||||
#include "INIReader.h"
|
#include "INIReader.h"
|
||||||
#include "mlir/Analysis/AffineAnalysis.h"
|
#include "mlir/Analysis/AffineAnalysis.h"
|
||||||
#include "mlir/Analysis/Liveness.h"
|
|
||||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||||
#include "mlir/Pass/Pass.h"
|
#include "mlir/Pass/Pass.h"
|
||||||
#include "mlir/Transforms/LoopUtils.h"
|
#include "mlir/Transforms/LoopUtils.h"
|
||||||
|
@ -88,25 +87,20 @@ public:
|
||||||
op->setAttr(name, builder.getStringAttr(value));
|
op->setAttr(name, builder.getStringAttr(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Schedule attribute related methods.
|
/// Set schedule attribute methods.
|
||||||
void setScheduleValue(Operation *op, unsigned begin, unsigned end) {
|
void setScheduleValue(Operation *op, unsigned begin, unsigned end) {
|
||||||
setAttrValue(op, "schedule_begin", begin);
|
setAttrValue(op, "schedule_begin", begin);
|
||||||
setAttrValue(op, "schedule_end", end);
|
setAttrValue(op, "schedule_end", end);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getLatencyValue(Operation *op) {
|
|
||||||
if (auto latency = getUIntAttrValue(op, "latency"))
|
|
||||||
return latency;
|
|
||||||
else
|
|
||||||
return getUIntAttrValue(op, "schedule_end") -
|
|
||||||
getUIntAttrValue(op, "schedule_begin");
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// HLSCppEstimator Class Declaration
|
// HLSCppEstimator Class Declaration
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// Profiled latency map.
|
||||||
|
using LatencyMap = llvm::StringMap<unsigned>;
|
||||||
|
|
||||||
// For storing all memory access operations (including AffineLoadOp and
|
// For storing all memory access operations (including AffineLoadOp and
|
||||||
// AffineStoreOp) indexed by the array instance (ArrayOp).
|
// AffineStoreOp) indexed by the array instance (ArrayOp).
|
||||||
using LoadStores = SmallVector<Operation *, 16>;
|
using LoadStores = SmallVector<Operation *, 16>;
|
||||||
|
@ -138,8 +132,8 @@ class HLSCppEstimator
|
||||||
: public HLSCppVisitorBase<HLSCppEstimator, Optional<unsigned>, unsigned>,
|
: public HLSCppVisitorBase<HLSCppEstimator, Optional<unsigned>, unsigned>,
|
||||||
public HLSCppToolBase {
|
public HLSCppToolBase {
|
||||||
public:
|
public:
|
||||||
explicit HLSCppEstimator(FuncOp &func)
|
explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
|
||||||
: HLSCppToolBase(OpBuilder(func)), func(func), liveness(Liveness(func)) {
|
: HLSCppToolBase(OpBuilder(func)), func(func), latencyMap(latencyMap) {
|
||||||
getFuncMemRefDepends();
|
getFuncMemRefDepends();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,21 +150,34 @@ public:
|
||||||
Optional<unsigned> visitOp(AffineLoadOp op, unsigned begin);
|
Optional<unsigned> visitOp(AffineLoadOp op, unsigned begin);
|
||||||
Optional<unsigned> visitOp(AffineStoreOp op, unsigned begin);
|
Optional<unsigned> visitOp(AffineStoreOp op, unsigned begin);
|
||||||
|
|
||||||
unsigned getResMinII(AffineForOp forOp, LoadStoresMap &map);
|
unsigned getOpMinII(AffineForOp forOp);
|
||||||
|
unsigned getResMinII(LoadStoresMap &map);
|
||||||
unsigned getDepMinII(AffineForOp forOp, LoadStoresMap &map);
|
unsigned getDepMinII(AffineForOp forOp, LoadStoresMap &map);
|
||||||
Optional<unsigned> visitOp(AffineForOp op, unsigned begin);
|
Optional<unsigned> visitOp(AffineForOp op, unsigned begin);
|
||||||
|
|
||||||
Optional<unsigned> visitOp(AffineIfOp op, unsigned begin);
|
Optional<unsigned> visitOp(AffineIfOp op, unsigned begin);
|
||||||
Optional<unsigned> visitOp(ArrayOp op, unsigned begin);
|
Optional<unsigned> visitOp(ArrayOp op, unsigned begin);
|
||||||
|
|
||||||
Optional<std::pair<unsigned, unsigned>> estimateBlock(Block &block,
|
#define HANDLE(OPTYPE, KEYNAME) \
|
||||||
unsigned begin);
|
Optional<unsigned> visitOp(OPTYPE op, unsigned begin) { \
|
||||||
|
auto end = begin + latencyMap[KEYNAME] + 1; \
|
||||||
|
setScheduleValue(op, begin, end); \
|
||||||
|
return end; \
|
||||||
|
}
|
||||||
|
HANDLE(AddFOp, "fadd");
|
||||||
|
HANDLE(MulFOp, "fmul");
|
||||||
|
HANDLE(DivFOp, "fdiv");
|
||||||
|
HANDLE(CmpFOp, "fcmp");
|
||||||
|
HANDLE(SelectOp, "fselect");
|
||||||
|
#undef HANDLE
|
||||||
|
|
||||||
|
Optional<unsigned> estimateBlock(Block &block, unsigned begin);
|
||||||
void estimateFunc();
|
void estimateFunc();
|
||||||
|
|
||||||
FuncOp &func;
|
FuncOp &func;
|
||||||
Liveness liveness;
|
|
||||||
DependsMap dependsMap;
|
DependsMap dependsMap;
|
||||||
PortsMapDict portsMapDict;
|
PortsMapDict portsMapDict;
|
||||||
|
LatencyMap &latencyMap;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace scalehls
|
} // namespace scalehls
|
||||||
|
|
|
@ -324,9 +324,16 @@ unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin) {
|
||||||
begin++;
|
begin++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Memory load/store operation always consumes 1 clock cycle.
|
// Memory load consumes 2 clock cyles, while other memory access including
|
||||||
setScheduleValue(op, begin, begin + 1);
|
// store consumes 1 clock cycle.
|
||||||
return begin + 1;
|
unsigned end = begin;
|
||||||
|
if (isa<AffineLoadOp>(op))
|
||||||
|
end += 2;
|
||||||
|
else
|
||||||
|
end++;
|
||||||
|
|
||||||
|
setScheduleValue(op, begin, end);
|
||||||
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<unsigned> HLSCppEstimator::visitOp(AffineLoadOp op, unsigned begin) {
|
Optional<unsigned> HLSCppEstimator::visitOp(AffineLoadOp op, unsigned begin) {
|
||||||
|
@ -341,8 +348,23 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineStoreOp op, unsigned begin) {
|
||||||
// AffineForOp Related Methods
|
// AffineForOp Related Methods
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
unsigned HLSCppEstimator::getOpMinII(AffineForOp forOp) {
|
||||||
|
unsigned II = 1;
|
||||||
|
forOp.walk([&](Operation *op) {
|
||||||
|
unsigned minII = 0;
|
||||||
|
if (auto latency = getUIntAttrValue(op, "latency"))
|
||||||
|
minII = latency;
|
||||||
|
else
|
||||||
|
minII = getUIntAttrValue(op, "schedule_end") -
|
||||||
|
getUIntAttrValue(op, "schedule_begin");
|
||||||
|
|
||||||
|
II = max(II, minII);
|
||||||
|
});
|
||||||
|
return II;
|
||||||
|
}
|
||||||
|
|
||||||
/// Calculate the minimum resource II.
|
/// Calculate the minimum resource II.
|
||||||
unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoresMap &map) {
|
unsigned HLSCppEstimator::getResMinII(LoadStoresMap &map) {
|
||||||
unsigned II = 1;
|
unsigned II = 1;
|
||||||
|
|
||||||
for (auto &pair : map) {
|
for (auto &pair : map) {
|
||||||
|
@ -454,17 +476,13 @@ unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoresMap &map) {
|
||||||
auto dep = *it;
|
auto dep = *it;
|
||||||
auto tripCount = getUIntAttrValue(dep.op, "trip_count");
|
auto tripCount = getUIntAttrValue(dep.op, "trip_count");
|
||||||
|
|
||||||
if (dep.ub)
|
if (dep.lb)
|
||||||
distance += flattenTripCounts.back() * dep.ub.getValue();
|
|
||||||
else if (dep.lb)
|
|
||||||
distance += flattenTripCounts.back() * dep.lb.getValue();
|
distance += flattenTripCounts.back() * dep.lb.getValue();
|
||||||
else
|
|
||||||
distance += flattenTripCounts.back() * tripCount;
|
|
||||||
|
|
||||||
flattenTripCounts.push_back(flattenTripCounts.back() * tripCount);
|
flattenTripCounts.push_back(flattenTripCounts.back() * tripCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned delay = getUIntAttrValue(srcOp, "schedule_end") -
|
unsigned delay = getUIntAttrValue(srcOp, "schedule_begin") -
|
||||||
getUIntAttrValue(dstOp, "schedule_end");
|
getUIntAttrValue(dstOp, "schedule_end");
|
||||||
|
|
||||||
if (distance > 0) {
|
if (distance > 0) {
|
||||||
|
@ -505,10 +523,9 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Estimate the loop block.
|
// Estimate the loop block.
|
||||||
if (auto schedule = estimateBlock(loopBlock, begin)) {
|
if (auto schedule = estimateBlock(loopBlock, begin))
|
||||||
begin = max(begin, schedule.getValue().first);
|
end = max(end, schedule.getValue());
|
||||||
end = max(end, schedule.getValue().second);
|
else
|
||||||
} else
|
|
||||||
return Optional<unsigned>();
|
return Optional<unsigned>();
|
||||||
|
|
||||||
// If the current loop is annotated as pipeline, extra dependency and
|
// If the current loop is annotated as pipeline, extra dependency and
|
||||||
|
@ -519,7 +536,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) {
|
||||||
setAttrValue(op, "iter_latency", iterLatency);
|
setAttrValue(op, "iter_latency", iterLatency);
|
||||||
|
|
||||||
// Calculate initial interval.
|
// Calculate initial interval.
|
||||||
auto II = max(getResMinII(op, map), getDepMinII(op, map));
|
auto II = max({getOpMinII(op), getResMinII(map), getDepMinII(op, map)});
|
||||||
setAttrValue(op, "init_interval", II);
|
setAttrValue(op, "init_interval", II);
|
||||||
|
|
||||||
auto tripCount = getUIntAttrValue(op, "trip_count");
|
auto tripCount = getUIntAttrValue(op, "trip_count");
|
||||||
|
@ -579,7 +596,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
|
||||||
|
|
||||||
// Estimate then block.
|
// Estimate then block.
|
||||||
if (auto schedule = estimateBlock(*thenBlock, begin))
|
if (auto schedule = estimateBlock(*thenBlock, begin))
|
||||||
end = max(end, schedule.getValue().second);
|
end = max(end, schedule.getValue());
|
||||||
else
|
else
|
||||||
return Optional<unsigned>();
|
return Optional<unsigned>();
|
||||||
|
|
||||||
|
@ -588,7 +605,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
|
||||||
auto elseBlock = op.getElseBlock();
|
auto elseBlock = op.getElseBlock();
|
||||||
|
|
||||||
if (auto schedule = estimateBlock(*elseBlock, begin))
|
if (auto schedule = estimateBlock(*elseBlock, begin))
|
||||||
end = max(end, schedule.getValue().second);
|
end = max(end, schedule.getValue());
|
||||||
else
|
else
|
||||||
return Optional<unsigned>();
|
return Optional<unsigned>();
|
||||||
}
|
}
|
||||||
|
@ -620,10 +637,10 @@ Optional<unsigned> HLSCppEstimator::visitOp(ArrayOp op, unsigned begin) {
|
||||||
// Block Scheduler and Estimator
|
// Block Scheduler and Estimator
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
/// Estimate the latency of a block with ASAP scheduling strategy, return a pair
|
/// Estimate the latency of a block with ASAP scheduling strategy, return the
|
||||||
/// of schedule begin and schedule end.
|
/// end level of schedule.
|
||||||
Optional<std::pair<unsigned, unsigned>>
|
Optional<unsigned> HLSCppEstimator::estimateBlock(Block &block,
|
||||||
HLSCppEstimator::estimateBlock(Block &block, unsigned begin) {
|
unsigned begin) {
|
||||||
unsigned blockBegin = begin;
|
unsigned blockBegin = begin;
|
||||||
unsigned blockEnd = begin;
|
unsigned blockEnd = begin;
|
||||||
|
|
||||||
|
@ -642,20 +659,19 @@ HLSCppEstimator::estimateBlock(Block &block, unsigned begin) {
|
||||||
if (auto scheduleEnd = dispatchVisitor(&op, opBegin))
|
if (auto scheduleEnd = dispatchVisitor(&op, opBegin))
|
||||||
opEnd = max(opEnd, scheduleEnd.getValue());
|
opEnd = max(opEnd, scheduleEnd.getValue());
|
||||||
else
|
else
|
||||||
return Optional<std::pair<unsigned, unsigned>>();
|
return Optional<unsigned>();
|
||||||
|
|
||||||
// Update the block schedule begin and end.
|
// Update the block schedule begin and end.
|
||||||
blockBegin = min(blockBegin, opBegin);
|
blockBegin = min(blockBegin, opBegin);
|
||||||
blockEnd = max(blockEnd, opEnd);
|
blockEnd = max(blockEnd, opEnd);
|
||||||
}
|
}
|
||||||
return std::pair<unsigned, unsigned>(blockBegin, blockEnd);
|
return blockEnd;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HLSCppEstimator::estimateFunc() {
|
void HLSCppEstimator::estimateFunc() {
|
||||||
// Recursively estimate blocks in the function.
|
// Recursively estimate blocks in the function.
|
||||||
if (auto schedule = estimateBlock(func.front(), 0))
|
if (auto schedule = estimateBlock(func.front(), 0))
|
||||||
setAttrValue(func, "latency",
|
setAttrValue(func, "latency", schedule.getValue());
|
||||||
schedule.getValue().second - schedule.getValue().first);
|
|
||||||
else
|
else
|
||||||
setAttrValue(func, "latency", -1);
|
setAttrValue(func, "latency", -1);
|
||||||
}
|
}
|
||||||
|
@ -664,6 +680,15 @@ void HLSCppEstimator::estimateFunc() {
|
||||||
// Entry of scalehls-opt
|
// Entry of scalehls-opt
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
static void getLatencyMap(INIReader &spec, std::string freq,
|
||||||
|
LatencyMap &latencyMap) {
|
||||||
|
latencyMap["fadd"] = spec.GetInteger(freq, "fadd", 4);
|
||||||
|
latencyMap["fmul"] = spec.GetInteger(freq, "fmul", 3);
|
||||||
|
latencyMap["fdiv"] = spec.GetInteger(freq, "fdiv", 15);
|
||||||
|
latencyMap["fcmp"] = spec.GetInteger(freq, "fcmp", 1);
|
||||||
|
latencyMap["fselect"] = spec.GetInteger(freq, "fselect", 0);
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
||||||
void runOnOperation() override {
|
void runOnOperation() override {
|
||||||
|
@ -673,14 +698,14 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
||||||
llvm::outs() << "error: target spec file parse fail, please refer to "
|
llvm::outs() << "error: target spec file parse fail, please refer to "
|
||||||
"--help option and pass in correct file path\n";
|
"--help option and pass in correct file path\n";
|
||||||
|
|
||||||
// TODO: Support estimator initiation from profiling data, constructing a
|
// Collect profiling latency data.
|
||||||
// unique data structure for holding latency and resource information.
|
auto freq = spec.Get("specification", "frequency", "100MHz");
|
||||||
auto freq = spec.Get("spec", "frequency", "200MHz");
|
LatencyMap latencyMap;
|
||||||
auto latency = spec.GetInteger(freq, "op", 0);
|
getLatencyMap(spec, freq, latencyMap);
|
||||||
|
|
||||||
// Estimate performance and resource utilization.
|
// Estimate performance and resource utilization.
|
||||||
for (auto func : getOperation().getOps<FuncOp>()) {
|
for (auto func : getOperation().getOps<FuncOp>()) {
|
||||||
HLSCppEstimator estimator(func);
|
HLSCppEstimator estimator(func, latencyMap);
|
||||||
estimator.estimateFunc();
|
estimator.estimateFunc();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue