[QoREstimation] split out QoREstimation.h, StaticParam.h, and Visitor.h; refine QoREstimation code structure with new created classes; a relative completed searchLongestPath implementation

This commit is contained in:
Hanchen Ye 2020-09-24 00:30:03 -05:00
parent ce293ee4c5
commit 21968283d1
7 changed files with 505 additions and 303 deletions

View File

@ -0,0 +1,78 @@
//===------------------------------------------------------------*- C++ -*-===//
//
//===----------------------------------------------------------------------===//
#ifndef SCALEHLS_TRANSFORMS_QORESTIMATION_H
#define SCALEHLS_TRANSFORMS_QORESTIMATION_H
#include "StaticParam.h"
#include "Visitor.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Pass/Pass.h"
#include "llvm/ADT/TypeSwitch.h"
namespace mlir {
namespace scalehls {
//===----------------------------------------------------------------------===//
// HLSCppAnalyzer Class Declaration
//===----------------------------------------------------------------------===//
class HLSCppAnalyzer : public HLSCppVisitorBase<HLSCppAnalyzer, bool> {
public:
explicit HLSCppAnalyzer(ProcParam &procParam, MemParam &memParam)
: procParam(procParam), memParam(memParam) {}
ProcParam &procParam;
MemParam &memParam;
bool visitUnhandledOp(Operation *op) { return true; }
using HLSCppVisitorBase::visitOp;
bool visitOp(AffineForOp op);
bool visitOp(AffineParallelOp op);
bool visitOp(AffineIfOp op);
void analyzeOperation(Operation *op);
void analyzeFunc(FuncOp func);
void analyzeBlock(Block &block);
void analyzeModule(ModuleOp module);
};
//===----------------------------------------------------------------------===//
// QoREstimator Class Declaration
//===----------------------------------------------------------------------===//
class QoREstimator : public HLSCppVisitorBase<QoREstimator, bool> {
public:
explicit QoREstimator(ProcParam &procParam, MemParam &memParam,
std::string targetSpecPath, std::string opLatencyPath);
ProcParam &procParam;
MemParam &memParam;
bool visitUnhandledOp(Operation *op) { return true; }
using HLSCppVisitorBase::visitOp;
/// These methods can estimate the performance and resource utilization of a
/// specific MLIR structure, and update them in procParams or memroyParams.
bool visitOp(AffineForOp op);
bool visitOp(AffineParallelOp op);
bool visitOp(AffineIfOp op);
/// These methods are used for searching longest path in a DAG.
void updateValueTimeStamp(Operation *currentOp, unsigned opTimeStamp,
DenseMap<Value, unsigned> &valueTimeStampMap);
unsigned searchLongestPath(Block &block);
/// MLIR component estimators.
void estimateOperation(Operation *op);
void estimateFunc(FuncOp func);
void estimateBlock(Block &block);
void estimateModule(ModuleOp module);
};
} // namespace scalehls
} // namespace mlir
#endif // SCALEHLS_TRANSFORMS_QORESTIMATION_H

View File

@ -0,0 +1,94 @@
//===------------------------------------------------------------*- C++ -*-===//
//
//===----------------------------------------------------------------------===//
#ifndef SCALEHLS_TRANSFORMS_STATICPARAM_H
#define SCALEHLS_TRANSFORMS_STATICPARAM_H
#include "mlir/IR/Operation.h"
#include "mlir/IR/Value.h"
namespace mlir {
namespace scalehls {
//===----------------------------------------------------------------------===//
// ParamBase class
//===----------------------------------------------------------------------===//
template <typename ParamKind, typename KeyType> class ParamBase {
public:
void init(KeyType key) {
for (unsigned i = 0, e = (unsigned)ParamKind::KindNum; i < e; ++i)
Params[key].push_back(0);
}
unsigned get(KeyType key, ParamKind kind) {
return Params[key][(unsigned)kind];
}
void set(KeyType key, ParamKind kind, unsigned param) {
Params[key][(unsigned)kind] = param;
}
private:
DenseMap<KeyType, SmallVector<unsigned, 16>> Params;
};
//===----------------------------------------------------------------------===//
// ProcParam and MemParam classes
//===----------------------------------------------------------------------===//
enum class ProcParamKind {
// Process-related pragam configurations.
EnablePipeline,
InitialInterval,
UnrollFactor,
// Performance parameters.
LoopBound,
IterLatency,
Latency,
// Resource parameters.
LUT,
BRAM,
DSP,
KindNum = DSP + 1
};
enum class MemParamKind {
// Pragma configurations.
StorageType,
StorageImpl,
PartitionType,
PartitionFactor,
InterfaceMode,
// Performance parameters.
ReadNum,
WriteNum,
ReadPorts,
WritePorts,
DepdcyLatency,
DepdcyDistance,
// Resource parameters.
LUT,
BRAM,
KindNum = BRAM + 1
};
/// This class includes all possible parameters kind for "processes" (function,
/// for/parallel loop, and if).
class ProcParam : public ParamBase<ProcParamKind, Operation *> {};
/// This class includes all possible parameters kind for memories (memref,
/// tensor, and vector).
class MemParam : public ParamBase<MemParamKind, Value> {};
} // namespace scalehls
} // namespace mlir
#endif // SCALEHLS_TRANSFORMS_STATICPARAM_H

187
include/Visitor.h Normal file
View File

@ -0,0 +1,187 @@
//===------------------------------------------------------------*- C++ -*-===//
//
//===----------------------------------------------------------------------===//
#ifndef SCALEHLS_VISITOR_H
#define SCALEHLS_VISITOR_H
#include "Dialect/HLSCpp/HLSCpp.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
namespace mlir {
namespace scalehls {
using namespace hlscpp;
/// This class is a visitor for SSACFG operation nodes.
template <typename ConcreteType, typename ResultType, typename... ExtraArgs>
class HLSCppVisitorBase {
public:
ResultType dispatchVisitor(Operation *op, ExtraArgs... args) {
auto *thisCast = static_cast<ConcreteType *>(this);
return TypeSwitch<Operation *, ResultType>(op)
.template Case<
// Affine statements.
AffineForOp, AffineIfOp, AffineParallelOp, AffineApplyOp,
AffineMaxOp, AffineMinOp, AffineLoadOp, AffineStoreOp,
AffineYieldOp, AffineVectorLoadOp, AffineVectorStoreOp,
AffineDmaStartOp, AffineDmaWaitOp,
// Memref-related statements.
AllocOp, AllocaOp, LoadOp, StoreOp, DeallocOp, DmaStartOp,
DmaWaitOp, AtomicRMWOp, GenericAtomicRMWOp, AtomicYieldOp,
MemRefCastOp, ViewOp, SubViewOp,
// Tensor-related statements.
TensorLoadOp, TensorStoreOp, ExtractElementOp, TensorFromElementsOp,
SplatOp, TensorCastOp, DimOp, RankOp,
// Unary expressions.
AbsFOp, CeilFOp, NegFOp, CosOp, SinOp, TanhOp, SqrtOp, RsqrtOp,
ExpOp, Exp2Op, LogOp, Log2Op, Log10Op,
// Float binary expressions.
CmpFOp, AddFOp, SubFOp, MulFOp, DivFOp, RemFOp,
// Integer binary expressions.
CmpIOp, AddIOp, SubIOp, MulIOp, SignedDivIOp, SignedRemIOp,
UnsignedDivIOp, UnsignedRemIOp, XOrOp, AndOp, OrOp, ShiftLeftOp,
SignedShiftRightOp, UnsignedShiftRightOp,
// Complex expressions.
AddCFOp, SubCFOp, ImOp, ReOp, CreateComplexOp,
// Special operations.
SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp,
SignExtendIOp, IndexCastOp, CallOp, ReturnOp, AssignOp, EndOp,
// Pragma operations.
ApplyPragmasOp, PragmaPipelineOp, PragmaUnrollOp,
PragmaArrayPartitionOp>([&](auto opNode) -> ResultType {
return thisCast->visitOp(opNode, args...);
})
.Default([&](auto opNode) -> ResultType {
return thisCast->visitInvalidOp(op, args...);
});
}
/// This callback is invoked on any invalid operations.
ResultType visitInvalidOp(Operation *op, ExtraArgs... args) {
op->emitOpError("is unsupported operation.");
abort();
}
/// This callback is invoked on any operations that are not handled by the
/// concrete visitor.
ResultType visitUnhandledOp(Operation *op, ExtraArgs... args) {
return ResultType();
}
#define HANDLE(OPTYPE) \
ResultType visitOp(OPTYPE op, ExtraArgs... args) { \
return static_cast<ConcreteType *>(this)->visitUnhandledOp(op, args...); \
}
// Affine statements.
HANDLE(AffineForOp);
HANDLE(AffineIfOp);
HANDLE(AffineParallelOp);
HANDLE(AffineApplyOp);
HANDLE(AffineMaxOp);
HANDLE(AffineMinOp);
HANDLE(AffineLoadOp);
HANDLE(AffineStoreOp);
HANDLE(AffineYieldOp);
HANDLE(AffineVectorLoadOp);
HANDLE(AffineVectorStoreOp);
HANDLE(AffineDmaStartOp);
HANDLE(AffineDmaWaitOp);
// Memref-related statements.
HANDLE(AllocOp);
HANDLE(AllocaOp);
HANDLE(LoadOp);
HANDLE(StoreOp);
HANDLE(DeallocOp);
HANDLE(DmaStartOp);
HANDLE(DmaWaitOp);
HANDLE(AtomicRMWOp);
HANDLE(GenericAtomicRMWOp);
HANDLE(AtomicYieldOp);
HANDLE(MemRefCastOp);
HANDLE(ViewOp);
HANDLE(SubViewOp);
// Tensor-related statements.
HANDLE(TensorLoadOp);
HANDLE(TensorStoreOp);
HANDLE(ExtractElementOp);
HANDLE(TensorFromElementsOp);
HANDLE(SplatOp);
HANDLE(TensorCastOp);
HANDLE(DimOp);
HANDLE(RankOp);
// Unary expressions.
HANDLE(AbsFOp);
HANDLE(CeilFOp);
HANDLE(NegFOp);
HANDLE(CosOp);
HANDLE(SinOp);
HANDLE(TanhOp);
HANDLE(SqrtOp);
HANDLE(RsqrtOp);
HANDLE(ExpOp);
HANDLE(Exp2Op);
HANDLE(LogOp);
HANDLE(Log2Op);
HANDLE(Log10Op);
// Float binary expressions.
HANDLE(CmpFOp);
HANDLE(AddFOp);
HANDLE(SubFOp);
HANDLE(MulFOp);
HANDLE(DivFOp);
HANDLE(RemFOp);
// Integer binary expressions.
HANDLE(CmpIOp);
HANDLE(AddIOp);
HANDLE(SubIOp);
HANDLE(MulIOp);
HANDLE(SignedDivIOp);
HANDLE(SignedRemIOp);
HANDLE(UnsignedDivIOp);
HANDLE(UnsignedRemIOp);
HANDLE(XOrOp);
HANDLE(AndOp);
HANDLE(OrOp);
HANDLE(ShiftLeftOp);
HANDLE(SignedShiftRightOp);
HANDLE(UnsignedShiftRightOp);
// Complex expressions.
HANDLE(AddCFOp);
HANDLE(SubCFOp);
HANDLE(ImOp);
HANDLE(ReOp);
HANDLE(CreateComplexOp);
// Special operations.
HANDLE(SelectOp);
HANDLE(ConstantOp);
HANDLE(CopySignOp);
HANDLE(TruncateIOp);
HANDLE(ZeroExtendIOp);
HANDLE(SignExtendIOp);
HANDLE(IndexCastOp);
HANDLE(CallOp);
HANDLE(ReturnOp);
HANDLE(AssignOp);
HANDLE(EndOp);
// Pragma operations.
HANDLE(ApplyPragmasOp);
HANDLE(PragmaPipelineOp);
HANDLE(PragmaUnrollOp);
HANDLE(PragmaArrayPartitionOp);
#undef HANDLE
};
} // namespace scalehls
} // namespace mlir
#endif // SCALEHLS_VISITOR_H

View File

@ -4,6 +4,7 @@
#include "EmitHLSCpp.h"
#include "Dialect/HLSCpp/HLSCpp.h"
#include "Visitor.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/Dialect/SCF/SCF.h"
@ -26,9 +27,6 @@ using namespace hlscpp;
//===----------------------------------------------------------------------===//
// Some Base Classes
//
// These classes should be factored out, and can be inherited by emitters
// targeting various backends (e.g., Xilinx Vivado HLS, Intel FPGAs, etc.).
//===----------------------------------------------------------------------===//
namespace {
@ -130,176 +128,6 @@ SmallString<8> HLSCppEmitterBase::getName(Value val) {
return state.nameTable[val];
}
namespace {
/// This class is a visitor for SSACFG operation nodes.
template <typename ConcreteType, typename ResultType, typename... ExtraArgs>
class HLSCppVisitorBase {
public:
ResultType dispatchVisitor(Operation *op, ExtraArgs... args) {
auto *thisCast = static_cast<ConcreteType *>(this);
return TypeSwitch<Operation *, ResultType>(op)
.template Case<
// Affine statements.
AffineForOp, AffineIfOp, AffineParallelOp, AffineApplyOp,
AffineMaxOp, AffineMinOp, AffineLoadOp, AffineStoreOp,
AffineYieldOp, AffineVectorLoadOp, AffineVectorStoreOp,
AffineDmaStartOp, AffineDmaWaitOp,
// Memref-related statements.
AllocOp, AllocaOp, LoadOp, StoreOp, DeallocOp, DmaStartOp,
DmaWaitOp, AtomicRMWOp, GenericAtomicRMWOp, AtomicYieldOp,
MemRefCastOp, ViewOp, SubViewOp,
// Tensor-related statements.
TensorLoadOp, TensorStoreOp, ExtractElementOp, TensorFromElementsOp,
SplatOp, TensorCastOp, DimOp, RankOp,
// Unary expressions.
AbsFOp, CeilFOp, NegFOp, CosOp, SinOp, TanhOp, SqrtOp, RsqrtOp,
ExpOp, Exp2Op, LogOp, Log2Op, Log10Op,
// Float binary expressions.
CmpFOp, AddFOp, SubFOp, MulFOp, DivFOp, RemFOp,
// Integer binary expressions.
CmpIOp, AddIOp, SubIOp, MulIOp, SignedDivIOp, SignedRemIOp,
UnsignedDivIOp, UnsignedRemIOp, XOrOp, AndOp, OrOp, ShiftLeftOp,
SignedShiftRightOp, UnsignedShiftRightOp,
// Complex expressions.
AddCFOp, SubCFOp, ImOp, ReOp, CreateComplexOp,
// Special operations.
SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp,
SignExtendIOp, IndexCastOp, CallOp, ReturnOp, AssignOp, EndOp,
// Pragma operations.
ApplyPragmasOp, PragmaPipelineOp, PragmaUnrollOp,
PragmaArrayPartitionOp>([&](auto opNode) -> ResultType {
return thisCast->visitOp(opNode, args...);
})
.Default([&](auto opNode) -> ResultType {
return thisCast->visitInvalidOp(op, args...);
});
}
/// This callback is invoked on any invalid operations.
ResultType visitInvalidOp(Operation *op, ExtraArgs... args) {
op->emitOpError("is unsupported operation.");
abort();
}
/// This callback is invoked on any operations that are not handled by the
/// concrete visitor.
ResultType visitUnhandledOp(Operation *op, ExtraArgs... args) {
return ResultType();
}
#define HANDLE(OPTYPE) \
ResultType visitOp(OPTYPE op, ExtraArgs... args) { \
return static_cast<ConcreteType *>(this)->visitUnhandledOp(op, args...); \
}
// Affine statements.
HANDLE(AffineForOp);
HANDLE(AffineIfOp);
HANDLE(AffineParallelOp);
HANDLE(AffineApplyOp);
HANDLE(AffineMaxOp);
HANDLE(AffineMinOp);
HANDLE(AffineLoadOp);
HANDLE(AffineStoreOp);
HANDLE(AffineYieldOp);
HANDLE(AffineVectorLoadOp);
HANDLE(AffineVectorStoreOp);
HANDLE(AffineDmaStartOp);
HANDLE(AffineDmaWaitOp);
// Memref-related statements.
HANDLE(AllocOp);
HANDLE(AllocaOp);
HANDLE(LoadOp);
HANDLE(StoreOp);
HANDLE(DeallocOp);
HANDLE(DmaStartOp);
HANDLE(DmaWaitOp);
HANDLE(AtomicRMWOp);
HANDLE(GenericAtomicRMWOp);
HANDLE(AtomicYieldOp);
HANDLE(MemRefCastOp);
HANDLE(ViewOp);
HANDLE(SubViewOp);
// Tensor-related statements.
HANDLE(TensorLoadOp);
HANDLE(TensorStoreOp);
HANDLE(ExtractElementOp);
HANDLE(TensorFromElementsOp);
HANDLE(SplatOp);
HANDLE(TensorCastOp);
HANDLE(DimOp);
HANDLE(RankOp);
// Unary expressions.
HANDLE(AbsFOp);
HANDLE(CeilFOp);
HANDLE(NegFOp);
HANDLE(CosOp);
HANDLE(SinOp);
HANDLE(TanhOp);
HANDLE(SqrtOp);
HANDLE(RsqrtOp);
HANDLE(ExpOp);
HANDLE(Exp2Op);
HANDLE(LogOp);
HANDLE(Log2Op);
HANDLE(Log10Op);
// Float binary expressions.
HANDLE(CmpFOp);
HANDLE(AddFOp);
HANDLE(SubFOp);
HANDLE(MulFOp);
HANDLE(DivFOp);
HANDLE(RemFOp);
// Integer binary expressions.
HANDLE(CmpIOp);
HANDLE(AddIOp);
HANDLE(SubIOp);
HANDLE(MulIOp);
HANDLE(SignedDivIOp);
HANDLE(SignedRemIOp);
HANDLE(UnsignedDivIOp);
HANDLE(UnsignedRemIOp);
HANDLE(XOrOp);
HANDLE(AndOp);
HANDLE(OrOp);
HANDLE(ShiftLeftOp);
HANDLE(SignedShiftRightOp);
HANDLE(UnsignedShiftRightOp);
// Complex expressions.
HANDLE(AddCFOp);
HANDLE(SubCFOp);
HANDLE(ImOp);
HANDLE(ReOp);
HANDLE(CreateComplexOp);
// Special operations.
HANDLE(SelectOp);
HANDLE(ConstantOp);
HANDLE(CopySignOp);
HANDLE(TruncateIOp);
HANDLE(ZeroExtendIOp);
HANDLE(SignExtendIOp);
HANDLE(IndexCastOp);
HANDLE(CallOp);
HANDLE(ReturnOp);
HANDLE(AssignOp);
HANDLE(EndOp);
// Pragma operations.
HANDLE(ApplyPragmasOp);
HANDLE(PragmaPipelineOp);
HANDLE(PragmaUnrollOp);
HANDLE(PragmaArrayPartitionOp);
#undef HANDLE
};
} // namespace
//===----------------------------------------------------------------------===//
// ModuleEmitter Class Declaration
//===----------------------------------------------------------------------===//
@ -655,7 +483,7 @@ private:
} // namespace
//===----------------------------------------------------------------------===//
// ModuleEmitter Class Implementation
// ModuleEmitter Class Definition
//===----------------------------------------------------------------------===//
/// Affine statement emitters.

View File

@ -5,6 +5,7 @@
#include "Dialect/HLSCpp/HLSCpp.h"
#include "Transforms/INIReader.h"
#include "Transforms/Passes.h"
#include "Transforms/QoREstimation.h"
using namespace std;
using namespace mlir;

View File

@ -2,100 +2,61 @@
//
//===----------------------------------------------------------------------===//
#include "Transforms/QoREstimation.h"
#include "Dialect/HLSCpp/HLSCpp.h"
#include "Transforms/INIReader.h"
#include "Transforms/Passes.h"
#include "Visitor.h"
using namespace std;
using namespace mlir;
using namespace scalehls;
using namespace hlscpp;
/// This class includes all possible parameters kind for "processes" (function,
/// for/parallel loop, and if).
enum class ProcParam {
// Pragam configurations.
EnablePipeline,
InitialInterval,
UnrollFactor,
//===----------------------------------------------------------------------===//
// HLSCppAnalyzer Class Definition
//===----------------------------------------------------------------------===//
// Performance parameters.
LoopBound,
IterLatency,
Latency,
bool HLSCppAnalyzer::visitOp(AffineForOp op) { return true; }
// Resource parameters.
LUT,
DSP,
BRAM
};
bool HLSCppAnalyzer::visitOp(AffineParallelOp op) { return true; }
/// This class includes all possible parameters kind for memories (memref,
/// tensor, and vector).
enum class MemParam {
// Pragma configurations.
StorageType,
StorageImpl,
PartitionType,
PartitionFactor,
InterfaceMode,
bool HLSCppAnalyzer::visitOp(AffineIfOp op) { return true; }
// Performance parameters.
ReadNum,
WriteNum,
ReadPorts,
WritePorts,
DepdcyLatency,
DepdcyDistance,
/// This method will update all parameters except IterLatency, Latency, LUT,
/// BRAM, and DSP through static analysis.
void HLSCppAnalyzer::analyzeOperation(Operation *op) {
if (dispatchVisitor(op))
return;
// Resource parameters.
LUT,
BRAM
};
op->emitError("can't be correctly analyzed.");
}
namespace {
class QoREstimator {
public:
explicit QoREstimator(std::string targetSpecPath, std::string opLatencyPath);
void HLSCppAnalyzer::analyzeFunc(FuncOp func) { procParam.init(func); }
/// Get parameters.
unsigned getMemParam(Value *mem, MemParam kind) {
return memParams[mem][(unsigned)kind];
}
unsigned getProcParam(Operation *proc, ProcParam kind) {
return procParams[proc][(unsigned)kind];
void HLSCppAnalyzer::analyzeBlock(Block &block) {
for (auto &op : block)
analyzeOperation(&op);
}
/// This method is a wrapper for recursively calling operation analyzer.
void HLSCppAnalyzer::analyzeModule(ModuleOp module) {
for (auto &op : module) {
if (auto func = dyn_cast<FuncOp>(op)) {
analyzeFunc(func);
} else if (!isa<ModuleTerminatorOp>(op))
op.emitError("is unsupported operation.");
}
}
/// These methods can extract static parameters and pragma configurations (if
/// applicable) of the input CDFG, and update them in procParams or memParams.
void analyzePragma(ModuleOp module);
void analyzeModule(ModuleOp module);
/// These methods can estimate the performance and resource utilization of a
/// specific MLIR structure, and update them in procParams or memroyParams.
void estimateAffineFor(AffineForOp affineFor);
void estimateAffineParallel(AffineParallelOp affineParallel);
void estimateAffineIf(AffineIfOp affineIf);
void estimateFunc(FuncOp func);
void estimateModule(ModuleOp module);
private:
DenseMap<Operation *, SmallVector<unsigned, 9>> procParams;
DenseMap<Value *, SmallVector<unsigned, 13>> memParams;
// Set parameters.
void setMemParam(Value *mem, unsigned kind, unsigned param) {
memParams[mem][(unsigned)kind] = param;
}
void setProcParam(Operation *proc, MemParam kind, unsigned param) {
procParams[proc][(unsigned)kind] = param;
}
};
} // namespace
//===----------------------------------------------------------------------===//
// QoREstimator Class Definition
//===----------------------------------------------------------------------===//
/// Estimator constructor.
QoREstimator::QoREstimator(std::string targetSpecPath,
std::string opLatencyPath) {
QoREstimator::QoREstimator(ProcParam &procParam, MemParam &memParam,
string targetSpecPath, string opLatencyPath)
: procParam(procParam), memParam(memParam) {
INIReader targetSpec(targetSpecPath);
if (targetSpec.ParseError())
llvm::outs() << "error: target spec file parse fail, please refer to "
@ -111,83 +72,136 @@ QoREstimator::QoREstimator(std::string targetSpecPath,
llvm::outs() << latency << "\n";
}
/// This method will search the longest path in a DAG block using a ASAP (As
/// Soon As Possible) manner. Loop, function, if, and other operation owning
/// regions will be considered as a whole.
unsigned searchLongestPath(Block &block) {
DenseMap<Value, unsigned> valueReadyTime;
unsigned blockReadyTime = 0;
for (auto &op : block) {
// Calculate ready time of all predecessors.
unsigned allPredsReadyTime = 0;
for (auto operand : op.getOperands()) {
if (operand.getKind() == Value::Kind::BlockArgument)
continue;
else if (operand.getParentBlock() != &block)
continue;
else
allPredsReadyTime = max(allPredsReadyTime, valueReadyTime[operand]);
}
// Calculate ready time of the current operation.
unsigned opReadyTime = allPredsReadyTime + 1;
for (auto result : op.getResults())
valueReadyTime[result] = opReadyTime;
// Update block ready time.
blockReadyTime = max(blockReadyTime, opReadyTime);
}
return blockReadyTime;
}
/// For now, estimation for unrolled loops are following the analytical model
/// of COMBA, which is suspected to be wrong. Meanwhile, we assume the absence
/// of function call in the loop body.
void QoREstimator::estimateAffineFor(AffineForOp affineFor) {
auto &body = affineFor.getLoopBody();
///
/// This method will update ProcParam::IterLatency and ProcParam::Latency of the
/// current affine for loop.
bool QoREstimator::visitOp(AffineForOp op) {
auto &body = op.getLoopBody();
if (body.getBlocks().size() != 1)
affineFor.emitError("has zero or more than one basic blocks.");
for (auto &op : body.front()) {
if (auto subAffineFor = dyn_cast<mlir::AffineForOp>(op))
estimateAffineFor(subAffineFor);
}
}
/// For now, function pipelining and task-level dataflow optimizations are not
/// considered for simplicity.
void QoREstimator::estimateFunc(FuncOp func) {
if (func.getBlocks().size() != 1)
func.emitError("has zero or more than one basic blocks.");
op.emitError("has zero or more than one basic blocks.");
// Recursively estimate latency of sub-elements, including functions and
// loops. These sub-elements will be considered as a normal node in the CDFG
// for function latency estimzation.
for (auto &op : func.front()) {
if (auto subFunc = dyn_cast<FuncOp>(op))
estimateFunc(subFunc);
else if (auto subAffineFor = dyn_cast<AffineForOp>(op))
estimateAffineFor(subAffineFor);
for (auto &op : body.front()) {
estimateOperation(&op);
}
// Estimate iteration latency.
unsigned iterLatency = searchLongestPath(body.front());
procParam.set(op, ProcParamKind::IterLatency, iterLatency);
// Estimate affine for loop latency.
unsigned latency = iterLatency;
if (procParam.get(op, ProcParamKind::LoopBound) != 1)
latency *= procParam.get(op, ProcParamKind::LoopBound) *
procParam.get(op, ProcParamKind::UnrollFactor);
procParam.set(op, ProcParamKind::Latency, latency);
}
bool QoREstimator::visitOp(AffineParallelOp op) { return true; }
bool QoREstimator::visitOp(AffineIfOp op) { return true; }
/// This method recursively update the time stamp of all values (1) directly
/// generated as result by the current operation or (2) generated by any
/// operations insided of the region held by the current operation.
void QoREstimator::updateValueTimeStamp(
Operation *currentOp, unsigned opTimeStamp,
DenseMap<Value, unsigned> &valueTimeStampMap) {
for (auto result : currentOp->getResults())
valueTimeStampMap[result] = opTimeStamp;
for (auto &region : currentOp->getRegions()) {
for (auto &op : region.front())
updateValueTimeStamp(&op, opTimeStamp, valueTimeStampMap);
}
}
/// This method will search the longest path in a DAG block using a ASAP (As
/// Soon As Possible) manner. Loop, function, if, and other operation owning
/// regions will be considered as a whole.
unsigned QoREstimator::searchLongestPath(Block &block) {
DenseMap<Value, unsigned> valueTimeStampMap;
unsigned blockTimeStamp = 0;
for (auto &op : block) {
unsigned opTimeStamp = 0;
// Add the latest ready time among all predecessors.
for (auto operand : op.getOperands())
opTimeStamp = max(opTimeStamp, valueTimeStampMap[operand]);
// Add latency of the current operation.
if (auto subAffineFor = dyn_cast<AffineForOp>(op))
opTimeStamp += procParam.get(subAffineFor, ProcParamKind::Latency);
else
opTimeStamp += 1;
blockTimeStamp = max(blockTimeStamp, opTimeStamp);
// Update ready time of each value generated by the current operation.
updateValueTimeStamp(&op, opTimeStamp, valueTimeStampMap);
}
return blockTimeStamp;
}
void QoREstimator::estimateOperation(Operation *op) {
if (dispatchVisitor(op))
return;
op->emitError("can't be correctly estimated.");
}
/// For now, function pipelining and task-level dataflow optimizations are not
/// considered for simplicity. Meanwhile, we assume the absence of function call
/// in the loop body.
///
/// This method will update ProcParam::Latency of the current function.
void QoREstimator::estimateFunc(FuncOp func) {
if (func.getBlocks().size() != 1)
func.emitError("has zero or more than one basic blocks.");
estimateBlock(func.front());
// Estimate function latency.
llvm::outs() << searchLongestPath(func.front()) << "\n";
unsigned latency = searchLongestPath(func.front());
procParam.set(func, ProcParamKind::Latency, latency);
}
void QoREstimator::estimateBlock(Block &block) {
for (auto &op : block)
estimateOperation(&op);
}
void QoREstimator::estimateModule(ModuleOp module) {
for (auto &op : module) {
if (auto func = dyn_cast<FuncOp>(op))
if (auto func = dyn_cast<FuncOp>(op)) {
estimateFunc(func);
else if (!isa<ModuleTerminatorOp>(op))
} else if (!isa<ModuleTerminatorOp>(op))
op.emitError("is unsupported operation.");
}
}
//===----------------------------------------------------------------------===//
// Entry of scalehls-opt
//===----------------------------------------------------------------------===//
namespace {
struct QoREstimation : public QoREstimationBase<QoREstimation> {
void runOnOperation() override {
QoREstimator(targetSpec, opLatency).estimateModule(getOperation());
ProcParam procParam;
MemParam memParam;
// Extract all static parameters and current pragma configurations.
HLSCppAnalyzer analyzer(procParam, memParam);
analyzer.analyzeModule(getOperation());
// Estimate performance and resource utilization.
QoREstimator estimator(analyzer.procParam, analyzer.memParam, targetSpec,
opLatency);
estimator.estimateModule(getOperation());
}
};
} // namespace

View File

@ -1,4 +1,4 @@
// RUN: scalehls-opt -pragma-dse %s | FileCheck %s
// RUN: scalehls-opt -qor-estimation -pragma-dse %s | FileCheck %s
// CHECK-LABEL: func @test_pragma()
func @test_pragma() {