[QoREstimation] design parameters data structure; initial impl of longer path search

This commit is contained in:
Hanchen Ye 2020-09-23 18:23:43 -05:00
parent 9049f0cec8
commit ce293ee4c5
3 changed files with 130 additions and 65 deletions

View File

@ -50,19 +50,4 @@ def LegalPartitionType : AttrConstraint<Or<[
def PartitionTypeAttr : Confined<StrAttr, [LegalPartitionType]> {}
//===----------------------------------------------------------------------===//
// Pragma bind_op Constraints
//===----------------------------------------------------------------------===//
def LegalOpImpl : AttrConstraint<Or<[
CPred<"$_self.cast<StringAttr>().getValue() == \"dsp\"">,
CPred<"$_self.cast<StringAttr>().getValue() == \"fabric\"">,
CPred<"$_self.cast<StringAttr>().getValue() == \"meddsp\"">,
CPred<"$_self.cast<StringAttr>().getValue() == \"fulldsp\"">,
CPred<"$_self.cast<StringAttr>().getValue() == \"maxdsp\"">,
CPred<"$_self.cast<StringAttr>().getValue() == \"primitivedsp\"">
]>>;
def OpImplAttr : Confined<StrAttr, [LegalOpImpl]> {}
#endif // SCALEHLS_DIALECT_HLSCPP_ATTRIBUTES_TD

View File

@ -6,8 +6,10 @@
#include "Transforms/INIReader.h"
#include "Transforms/Passes.h"
using namespace std;
using namespace mlir;
using namespace scalehls;
using namespace hlscpp;
namespace {
struct PragmaDSE : public PragmaDSEBase<PragmaDSE> {

View File

@ -6,66 +6,153 @@
#include "Transforms/INIReader.h"
#include "Transforms/Passes.h"
using namespace std;
using namespace mlir;
using namespace scalehls;
using namespace hlscpp;
/// This class includes all possible parameters kind for "processes" (function,
/// for/parallel loop, and if).
enum class ProcParam {
// Pragam configurations.
EnablePipeline,
InitialInterval,
UnrollFactor,
// Performance parameters.
LoopBound,
IterLatency,
Latency,
// Resource parameters.
LUT,
DSP,
BRAM
};
/// This class includes all possible parameters kind for memories (memref,
/// tensor, and vector).
enum class MemParam {
// Pragma configurations.
StorageType,
StorageImpl,
PartitionType,
PartitionFactor,
InterfaceMode,
// Performance parameters.
ReadNum,
WriteNum,
ReadPorts,
WritePorts,
DepdcyLatency,
DepdcyDistance,
// Resource parameters.
LUT,
BRAM
};
/*
namespace {
class QoREstimator {
public:
explicit QoREstimator(std::string toolConfigPath, std::string opLatencyPath) {
INIReader toolConfig(toolConfigPath);
if (toolConfig.ParseError())
llvm::outs() << "error: Tool configuration file parse fail.\n";
explicit QoREstimator(std::string targetSpecPath, std::string opLatencyPath);
INIReader opLatency(opLatencyPath);
if (opLatency.ParseError())
llvm::outs() << "error: Op latency file parse fail.\n";
auto freq = toolConfig.Get("config", "frequency", "200MHz");
auto latency = opLatency.GetInteger(freq, "op", 0);
llvm::outs() << latency << "\n";
/// Get parameters.
unsigned getMemParam(Value *mem, MemParam kind) {
return memParams[mem][(unsigned)kind];
}
unsigned getProcParam(Operation *proc, ProcParam kind) {
return procParams[proc][(unsigned)kind];
}
void estimateLoop(AffineForOp loop);
/// These methods can extract static parameters and pragma configurations (if
/// applicable) of the input CDFG, and update them in procParams or memParams.
void analyzePragma(ModuleOp module);
void analyzeModule(ModuleOp module);
/// These methods can estimate the performance and resource utilization of a
/// specific MLIR structure, and update them in procParams or memroyParams.
void estimateAffineFor(AffineForOp affineFor);
void estimateAffineParallel(AffineParallelOp affineParallel);
void estimateAffineIf(AffineIfOp affineIf);
void estimateFunc(FuncOp func);
void estimateModule(ModuleOp module);
private:
DenseMap<Operation *, SmallVector<unsigned, 9>> procParams;
DenseMap<Value *, SmallVector<unsigned, 13>> memParams;
// Set parameters.
void setMemParam(Value *mem, unsigned kind, unsigned param) {
memParams[mem][(unsigned)kind] = param;
}
void setProcParam(Operation *proc, MemParam kind, unsigned param) {
procParams[proc][(unsigned)kind] = param;
}
};
} // namespace
/// Estimator constructor.
QoREstimator::QoREstimator(std::string targetSpecPath,
std::string opLatencyPath) {
INIReader targetSpec(targetSpecPath);
if (targetSpec.ParseError())
llvm::outs() << "error: target spec file parse fail, please refer to "
"--help option and pass in correct file path\n";
INIReader opLatency(opLatencyPath);
if (opLatency.ParseError())
llvm::outs() << "error: Op latency file parse fail, please refer to "
"--help option and pass in correct file path\n";
auto freq = targetSpec.Get("config", "frequency", "200MHz");
auto latency = opLatency.GetInteger(freq, "op", 0);
llvm::outs() << latency << "\n";
}
/// This method will search the longest path in a DAG block using a ASAP (As
/// Soon As Possible) manner. Loop, function, if, and other operation owning
/// regions will be considered as a whole.
unsigned searchLongestPath(Block &block) {
DenseMap<Value, unsigned> valueReadyTime;
unsigned blockReadyTime = 0;
for (auto &op : block) {
// Calculate ready time of all predecessors.
unsigned allPredsReadyTime = 0;
for (auto operand : op.getOperands()) {
if (operand.getKind() == Value::Kind::BlockArgument)
continue;
else if (operand.getParentBlock() != &block)
continue;
else
allPredsReadyTime = max(allPredsReadyTime, valueReadyTime[operand]);
}
// Calculate ready time of the current operation.
unsigned opReadyTime = allPredsReadyTime + 1;
for (auto result : op.getResults())
valueReadyTime[result] = opReadyTime;
// Update block ready time.
blockReadyTime = max(blockReadyTime, opReadyTime);
}
return blockReadyTime;
}
/// For now, estimation for unrolled loops are following the analytical model
/// of COMBA, which is suspected to be wrong. Meanwhile, we assume the absence
/// of function call in the loop body.
void QoREstimator::estimateLoop(AffineForOp loop) {
auto &body = loop.getLoopBody();
void QoREstimator::estimateAffineFor(AffineForOp affineFor) {
auto &body = affineFor.getLoopBody();
if (body.getBlocks().size() != 1)
loop.emitError("has zero or more than one basic blocks.");
affineFor.emitError("has zero or more than one basic blocks.");
auto paramOp = dyn_cast<hlscpp::LoopParamOp>(body.front().front());
if (!paramOp) {
loop.emitError("doesn't have parameter operations as front.");
return;
}
// TODO: a simple AEAP scheduling.
unsigned iterLatency = paramOp.getNonprocLatency();
for (auto &op : body.front()) {
if (auto subLoop = dyn_cast<mlir::AffineForOp>(op)) {
estimateLoop(subLoop);
auto subParamOp =
dyn_cast<hlscpp::LoopParamOp>(subLoop.getLoopBody().front().front());
iterLatency += subParamOp.getLatency();
}
if (auto subAffineFor = dyn_cast<mlir::AffineForOp>(op))
estimateAffineFor(subAffineFor);
}
unsigned latency = iterLatency;
// When loop is not completely unrolled.
if (paramOp.getLoopBound() > 1)
latency = iterLatency * paramOp.getLoopBound() * paramOp.getUnrollFactor();
auto builder = Builder(paramOp.getContext());
paramOp.setAttr("latency", builder.getUI32IntegerAttr(latency));
}
/// For now, function pipelining and task-level dataflow optimizations are not
@ -74,25 +161,18 @@ void QoREstimator::estimateFunc(FuncOp func) {
if (func.getBlocks().size() != 1)
func.emitError("has zero or more than one basic blocks.");
auto paramOp = dyn_cast<FuncParamOp>(func.front().front());
if (!paramOp) {
func.emitError("doesn't have parameter operations as front.");
return;
}
// Recursively estimate latency of sub-elements, including functions and
// loops. These sub-elements will be considered as a normal node in the CDFG
// for function latency estimzation.
for (auto &op : func.front()) {
if (auto subFunc = dyn_cast<FuncOp>(op))
estimateFunc(subFunc);
else if (auto subLoop = dyn_cast<AffineForOp>(op))
estimateLoop(subLoop);
else if (auto subAffineFor = dyn_cast<AffineForOp>(op))
estimateAffineFor(subAffineFor);
}
// Estimate function latency.
for (auto &op : func.front()) {
}
llvm::outs() << searchLongestPath(func.front()) << "\n";
}
void QoREstimator::estimateModule(ModuleOp module) {
@ -104,12 +184,10 @@ void QoREstimator::estimateModule(ModuleOp module) {
}
}
*/
namespace {
struct QoREstimation : public QoREstimationBase<QoREstimation> {
void runOnOperation() override {
// QoREstimator(toolConfig, opLatency).estimateModule(getOperation());
QoREstimator(targetSpec, opLatency).estimateModule(getOperation());
}
};
} // namespace