[QoREstimation] design parameters data structure; initial impl of longer path search
This commit is contained in:
parent
9049f0cec8
commit
ce293ee4c5
|
@ -50,19 +50,4 @@ def LegalPartitionType : AttrConstraint<Or<[
|
||||||
|
|
||||||
def PartitionTypeAttr : Confined<StrAttr, [LegalPartitionType]> {}
|
def PartitionTypeAttr : Confined<StrAttr, [LegalPartitionType]> {}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Pragma bind_op Constraints
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
def LegalOpImpl : AttrConstraint<Or<[
|
|
||||||
CPred<"$_self.cast<StringAttr>().getValue() == \"dsp\"">,
|
|
||||||
CPred<"$_self.cast<StringAttr>().getValue() == \"fabric\"">,
|
|
||||||
CPred<"$_self.cast<StringAttr>().getValue() == \"meddsp\"">,
|
|
||||||
CPred<"$_self.cast<StringAttr>().getValue() == \"fulldsp\"">,
|
|
||||||
CPred<"$_self.cast<StringAttr>().getValue() == \"maxdsp\"">,
|
|
||||||
CPred<"$_self.cast<StringAttr>().getValue() == \"primitivedsp\"">
|
|
||||||
]>>;
|
|
||||||
|
|
||||||
def OpImplAttr : Confined<StrAttr, [LegalOpImpl]> {}
|
|
||||||
|
|
||||||
#endif // SCALEHLS_DIALECT_HLSCPP_ATTRIBUTES_TD
|
#endif // SCALEHLS_DIALECT_HLSCPP_ATTRIBUTES_TD
|
||||||
|
|
|
@ -6,8 +6,10 @@
|
||||||
#include "Transforms/INIReader.h"
|
#include "Transforms/INIReader.h"
|
||||||
#include "Transforms/Passes.h"
|
#include "Transforms/Passes.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
using namespace mlir;
|
using namespace mlir;
|
||||||
using namespace scalehls;
|
using namespace scalehls;
|
||||||
|
using namespace hlscpp;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
struct PragmaDSE : public PragmaDSEBase<PragmaDSE> {
|
struct PragmaDSE : public PragmaDSEBase<PragmaDSE> {
|
||||||
|
|
|
@ -6,66 +6,153 @@
|
||||||
#include "Transforms/INIReader.h"
|
#include "Transforms/INIReader.h"
|
||||||
#include "Transforms/Passes.h"
|
#include "Transforms/Passes.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
using namespace mlir;
|
using namespace mlir;
|
||||||
using namespace scalehls;
|
using namespace scalehls;
|
||||||
|
using namespace hlscpp;
|
||||||
|
|
||||||
|
/// This class includes all possible parameters kind for "processes" (function,
|
||||||
|
/// for/parallel loop, and if).
|
||||||
|
enum class ProcParam {
|
||||||
|
// Pragam configurations.
|
||||||
|
EnablePipeline,
|
||||||
|
InitialInterval,
|
||||||
|
UnrollFactor,
|
||||||
|
|
||||||
|
// Performance parameters.
|
||||||
|
LoopBound,
|
||||||
|
IterLatency,
|
||||||
|
Latency,
|
||||||
|
|
||||||
|
// Resource parameters.
|
||||||
|
LUT,
|
||||||
|
DSP,
|
||||||
|
BRAM
|
||||||
|
};
|
||||||
|
|
||||||
|
/// This class includes all possible parameters kind for memories (memref,
|
||||||
|
/// tensor, and vector).
|
||||||
|
enum class MemParam {
|
||||||
|
// Pragma configurations.
|
||||||
|
StorageType,
|
||||||
|
StorageImpl,
|
||||||
|
PartitionType,
|
||||||
|
PartitionFactor,
|
||||||
|
InterfaceMode,
|
||||||
|
|
||||||
|
// Performance parameters.
|
||||||
|
ReadNum,
|
||||||
|
WriteNum,
|
||||||
|
ReadPorts,
|
||||||
|
WritePorts,
|
||||||
|
DepdcyLatency,
|
||||||
|
DepdcyDistance,
|
||||||
|
|
||||||
|
// Resource parameters.
|
||||||
|
LUT,
|
||||||
|
BRAM
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
namespace {
|
namespace {
|
||||||
class QoREstimator {
|
class QoREstimator {
|
||||||
public:
|
public:
|
||||||
explicit QoREstimator(std::string toolConfigPath, std::string opLatencyPath) {
|
explicit QoREstimator(std::string targetSpecPath, std::string opLatencyPath);
|
||||||
INIReader toolConfig(toolConfigPath);
|
|
||||||
if (toolConfig.ParseError())
|
|
||||||
llvm::outs() << "error: Tool configuration file parse fail.\n";
|
|
||||||
|
|
||||||
INIReader opLatency(opLatencyPath);
|
/// Get parameters.
|
||||||
if (opLatency.ParseError())
|
unsigned getMemParam(Value *mem, MemParam kind) {
|
||||||
llvm::outs() << "error: Op latency file parse fail.\n";
|
return memParams[mem][(unsigned)kind];
|
||||||
|
}
|
||||||
auto freq = toolConfig.Get("config", "frequency", "200MHz");
|
unsigned getProcParam(Operation *proc, ProcParam kind) {
|
||||||
auto latency = opLatency.GetInteger(freq, "op", 0);
|
return procParams[proc][(unsigned)kind];
|
||||||
llvm::outs() << latency << "\n";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void estimateLoop(AffineForOp loop);
|
/// These methods can extract static parameters and pragma configurations (if
|
||||||
|
/// applicable) of the input CDFG, and update them in procParams or memParams.
|
||||||
|
void analyzePragma(ModuleOp module);
|
||||||
|
void analyzeModule(ModuleOp module);
|
||||||
|
|
||||||
|
/// These methods can estimate the performance and resource utilization of a
|
||||||
|
/// specific MLIR structure, and update them in procParams or memroyParams.
|
||||||
|
void estimateAffineFor(AffineForOp affineFor);
|
||||||
|
void estimateAffineParallel(AffineParallelOp affineParallel);
|
||||||
|
void estimateAffineIf(AffineIfOp affineIf);
|
||||||
void estimateFunc(FuncOp func);
|
void estimateFunc(FuncOp func);
|
||||||
void estimateModule(ModuleOp module);
|
void estimateModule(ModuleOp module);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
DenseMap<Operation *, SmallVector<unsigned, 9>> procParams;
|
||||||
|
DenseMap<Value *, SmallVector<unsigned, 13>> memParams;
|
||||||
|
|
||||||
|
// Set parameters.
|
||||||
|
void setMemParam(Value *mem, unsigned kind, unsigned param) {
|
||||||
|
memParams[mem][(unsigned)kind] = param;
|
||||||
|
}
|
||||||
|
void setProcParam(Operation *proc, MemParam kind, unsigned param) {
|
||||||
|
procParams[proc][(unsigned)kind] = param;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
/// Estimator constructor.
|
||||||
|
QoREstimator::QoREstimator(std::string targetSpecPath,
|
||||||
|
std::string opLatencyPath) {
|
||||||
|
INIReader targetSpec(targetSpecPath);
|
||||||
|
if (targetSpec.ParseError())
|
||||||
|
llvm::outs() << "error: target spec file parse fail, please refer to "
|
||||||
|
"--help option and pass in correct file path\n";
|
||||||
|
|
||||||
|
INIReader opLatency(opLatencyPath);
|
||||||
|
if (opLatency.ParseError())
|
||||||
|
llvm::outs() << "error: Op latency file parse fail, please refer to "
|
||||||
|
"--help option and pass in correct file path\n";
|
||||||
|
|
||||||
|
auto freq = targetSpec.Get("config", "frequency", "200MHz");
|
||||||
|
auto latency = opLatency.GetInteger(freq, "op", 0);
|
||||||
|
llvm::outs() << latency << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This method will search the longest path in a DAG block using a ASAP (As
|
||||||
|
/// Soon As Possible) manner. Loop, function, if, and other operation owning
|
||||||
|
/// regions will be considered as a whole.
|
||||||
|
unsigned searchLongestPath(Block &block) {
|
||||||
|
DenseMap<Value, unsigned> valueReadyTime;
|
||||||
|
unsigned blockReadyTime = 0;
|
||||||
|
for (auto &op : block) {
|
||||||
|
|
||||||
|
// Calculate ready time of all predecessors.
|
||||||
|
unsigned allPredsReadyTime = 0;
|
||||||
|
for (auto operand : op.getOperands()) {
|
||||||
|
if (operand.getKind() == Value::Kind::BlockArgument)
|
||||||
|
continue;
|
||||||
|
else if (operand.getParentBlock() != &block)
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
allPredsReadyTime = max(allPredsReadyTime, valueReadyTime[operand]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate ready time of the current operation.
|
||||||
|
unsigned opReadyTime = allPredsReadyTime + 1;
|
||||||
|
for (auto result : op.getResults())
|
||||||
|
valueReadyTime[result] = opReadyTime;
|
||||||
|
|
||||||
|
// Update block ready time.
|
||||||
|
blockReadyTime = max(blockReadyTime, opReadyTime);
|
||||||
|
}
|
||||||
|
return blockReadyTime;
|
||||||
|
}
|
||||||
|
|
||||||
/// For now, estimation for unrolled loops are following the analytical model
|
/// For now, estimation for unrolled loops are following the analytical model
|
||||||
/// of COMBA, which is suspected to be wrong. Meanwhile, we assume the absence
|
/// of COMBA, which is suspected to be wrong. Meanwhile, we assume the absence
|
||||||
/// of function call in the loop body.
|
/// of function call in the loop body.
|
||||||
void QoREstimator::estimateLoop(AffineForOp loop) {
|
void QoREstimator::estimateAffineFor(AffineForOp affineFor) {
|
||||||
auto &body = loop.getLoopBody();
|
auto &body = affineFor.getLoopBody();
|
||||||
if (body.getBlocks().size() != 1)
|
if (body.getBlocks().size() != 1)
|
||||||
loop.emitError("has zero or more than one basic blocks.");
|
affineFor.emitError("has zero or more than one basic blocks.");
|
||||||
|
|
||||||
auto paramOp = dyn_cast<hlscpp::LoopParamOp>(body.front().front());
|
|
||||||
if (!paramOp) {
|
|
||||||
loop.emitError("doesn't have parameter operations as front.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: a simple AEAP scheduling.
|
|
||||||
unsigned iterLatency = paramOp.getNonprocLatency();
|
|
||||||
for (auto &op : body.front()) {
|
for (auto &op : body.front()) {
|
||||||
if (auto subLoop = dyn_cast<mlir::AffineForOp>(op)) {
|
if (auto subAffineFor = dyn_cast<mlir::AffineForOp>(op))
|
||||||
estimateLoop(subLoop);
|
estimateAffineFor(subAffineFor);
|
||||||
auto subParamOp =
|
|
||||||
dyn_cast<hlscpp::LoopParamOp>(subLoop.getLoopBody().front().front());
|
|
||||||
iterLatency += subParamOp.getLatency();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned latency = iterLatency;
|
|
||||||
// When loop is not completely unrolled.
|
|
||||||
if (paramOp.getLoopBound() > 1)
|
|
||||||
latency = iterLatency * paramOp.getLoopBound() * paramOp.getUnrollFactor();
|
|
||||||
auto builder = Builder(paramOp.getContext());
|
|
||||||
paramOp.setAttr("latency", builder.getUI32IntegerAttr(latency));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// For now, function pipelining and task-level dataflow optimizations are not
|
/// For now, function pipelining and task-level dataflow optimizations are not
|
||||||
|
@ -74,25 +161,18 @@ void QoREstimator::estimateFunc(FuncOp func) {
|
||||||
if (func.getBlocks().size() != 1)
|
if (func.getBlocks().size() != 1)
|
||||||
func.emitError("has zero or more than one basic blocks.");
|
func.emitError("has zero or more than one basic blocks.");
|
||||||
|
|
||||||
auto paramOp = dyn_cast<FuncParamOp>(func.front().front());
|
|
||||||
if (!paramOp) {
|
|
||||||
func.emitError("doesn't have parameter operations as front.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recursively estimate latency of sub-elements, including functions and
|
// Recursively estimate latency of sub-elements, including functions and
|
||||||
// loops. These sub-elements will be considered as a normal node in the CDFG
|
// loops. These sub-elements will be considered as a normal node in the CDFG
|
||||||
// for function latency estimzation.
|
// for function latency estimzation.
|
||||||
for (auto &op : func.front()) {
|
for (auto &op : func.front()) {
|
||||||
if (auto subFunc = dyn_cast<FuncOp>(op))
|
if (auto subFunc = dyn_cast<FuncOp>(op))
|
||||||
estimateFunc(subFunc);
|
estimateFunc(subFunc);
|
||||||
else if (auto subLoop = dyn_cast<AffineForOp>(op))
|
else if (auto subAffineFor = dyn_cast<AffineForOp>(op))
|
||||||
estimateLoop(subLoop);
|
estimateAffineFor(subAffineFor);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Estimate function latency.
|
// Estimate function latency.
|
||||||
for (auto &op : func.front()) {
|
llvm::outs() << searchLongestPath(func.front()) << "\n";
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void QoREstimator::estimateModule(ModuleOp module) {
|
void QoREstimator::estimateModule(ModuleOp module) {
|
||||||
|
@ -104,12 +184,10 @@ void QoREstimator::estimateModule(ModuleOp module) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
struct QoREstimation : public QoREstimationBase<QoREstimation> {
|
struct QoREstimation : public QoREstimationBase<QoREstimation> {
|
||||||
void runOnOperation() override {
|
void runOnOperation() override {
|
||||||
// QoREstimator(toolConfig, opLatency).estimateModule(getOperation());
|
QoREstimator(targetSpec, opLatency).estimateModule(getOperation());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
Loading…
Reference in New Issue