[QoREstimation] split out QoREstimation.h, StaticParam.h, and Visitor.h; refine QoREstimation code structure with new created classes; a relative completed searchLongestPath implementation
This commit is contained in:
parent
ce293ee4c5
commit
21968283d1
|
@ -0,0 +1,78 @@
|
|||
//===------------------------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef SCALEHLS_TRANSFORMS_QORESTIMATION_H
|
||||
#define SCALEHLS_TRANSFORMS_QORESTIMATION_H
|
||||
|
||||
#include "StaticParam.h"
|
||||
#include "Visitor.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "llvm/ADT/TypeSwitch.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace scalehls {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HLSCppAnalyzer Class Declaration
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class HLSCppAnalyzer : public HLSCppVisitorBase<HLSCppAnalyzer, bool> {
|
||||
public:
|
||||
explicit HLSCppAnalyzer(ProcParam &procParam, MemParam &memParam)
|
||||
: procParam(procParam), memParam(memParam) {}
|
||||
|
||||
ProcParam &procParam;
|
||||
MemParam &memParam;
|
||||
|
||||
bool visitUnhandledOp(Operation *op) { return true; }
|
||||
|
||||
using HLSCppVisitorBase::visitOp;
|
||||
bool visitOp(AffineForOp op);
|
||||
bool visitOp(AffineParallelOp op);
|
||||
bool visitOp(AffineIfOp op);
|
||||
|
||||
void analyzeOperation(Operation *op);
|
||||
void analyzeFunc(FuncOp func);
|
||||
void analyzeBlock(Block &block);
|
||||
void analyzeModule(ModuleOp module);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// QoREstimator Class Declaration
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class QoREstimator : public HLSCppVisitorBase<QoREstimator, bool> {
|
||||
public:
|
||||
explicit QoREstimator(ProcParam &procParam, MemParam &memParam,
|
||||
std::string targetSpecPath, std::string opLatencyPath);
|
||||
|
||||
ProcParam &procParam;
|
||||
MemParam &memParam;
|
||||
|
||||
bool visitUnhandledOp(Operation *op) { return true; }
|
||||
|
||||
using HLSCppVisitorBase::visitOp;
|
||||
/// These methods can estimate the performance and resource utilization of a
|
||||
/// specific MLIR structure, and update them in procParams or memroyParams.
|
||||
bool visitOp(AffineForOp op);
|
||||
bool visitOp(AffineParallelOp op);
|
||||
bool visitOp(AffineIfOp op);
|
||||
|
||||
/// These methods are used for searching longest path in a DAG.
|
||||
void updateValueTimeStamp(Operation *currentOp, unsigned opTimeStamp,
|
||||
DenseMap<Value, unsigned> &valueTimeStampMap);
|
||||
unsigned searchLongestPath(Block &block);
|
||||
|
||||
/// MLIR component estimators.
|
||||
void estimateOperation(Operation *op);
|
||||
void estimateFunc(FuncOp func);
|
||||
void estimateBlock(Block &block);
|
||||
void estimateModule(ModuleOp module);
|
||||
};
|
||||
|
||||
} // namespace scalehls
|
||||
} // namespace mlir
|
||||
|
||||
#endif // SCALEHLS_TRANSFORMS_QORESTIMATION_H
|
|
@ -0,0 +1,94 @@
|
|||
//===------------------------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef SCALEHLS_TRANSFORMS_STATICPARAM_H
|
||||
#define SCALEHLS_TRANSFORMS_STATICPARAM_H
|
||||
|
||||
#include "mlir/IR/Operation.h"
|
||||
#include "mlir/IR/Value.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace scalehls {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ParamBase class
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
template <typename ParamKind, typename KeyType> class ParamBase {
|
||||
public:
|
||||
void init(KeyType key) {
|
||||
for (unsigned i = 0, e = (unsigned)ParamKind::KindNum; i < e; ++i)
|
||||
Params[key].push_back(0);
|
||||
}
|
||||
|
||||
unsigned get(KeyType key, ParamKind kind) {
|
||||
return Params[key][(unsigned)kind];
|
||||
}
|
||||
|
||||
void set(KeyType key, ParamKind kind, unsigned param) {
|
||||
Params[key][(unsigned)kind] = param;
|
||||
}
|
||||
|
||||
private:
|
||||
DenseMap<KeyType, SmallVector<unsigned, 16>> Params;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ProcParam and MemParam classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
enum class ProcParamKind {
|
||||
// Process-related pragam configurations.
|
||||
EnablePipeline,
|
||||
InitialInterval,
|
||||
UnrollFactor,
|
||||
|
||||
// Performance parameters.
|
||||
LoopBound,
|
||||
IterLatency,
|
||||
Latency,
|
||||
|
||||
// Resource parameters.
|
||||
LUT,
|
||||
BRAM,
|
||||
DSP,
|
||||
|
||||
KindNum = DSP + 1
|
||||
};
|
||||
|
||||
enum class MemParamKind {
|
||||
// Pragma configurations.
|
||||
StorageType,
|
||||
StorageImpl,
|
||||
PartitionType,
|
||||
PartitionFactor,
|
||||
InterfaceMode,
|
||||
|
||||
// Performance parameters.
|
||||
ReadNum,
|
||||
WriteNum,
|
||||
ReadPorts,
|
||||
WritePorts,
|
||||
DepdcyLatency,
|
||||
DepdcyDistance,
|
||||
|
||||
// Resource parameters.
|
||||
LUT,
|
||||
BRAM,
|
||||
|
||||
KindNum = BRAM + 1
|
||||
};
|
||||
|
||||
/// This class includes all possible parameters kind for "processes" (function,
|
||||
/// for/parallel loop, and if).
|
||||
class ProcParam : public ParamBase<ProcParamKind, Operation *> {};
|
||||
|
||||
/// This class includes all possible parameters kind for memories (memref,
|
||||
/// tensor, and vector).
|
||||
class MemParam : public ParamBase<MemParamKind, Value> {};
|
||||
|
||||
} // namespace scalehls
|
||||
} // namespace mlir
|
||||
|
||||
#endif // SCALEHLS_TRANSFORMS_STATICPARAM_H
|
|
@ -0,0 +1,187 @@
|
|||
//===------------------------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef SCALEHLS_VISITOR_H
|
||||
#define SCALEHLS_VISITOR_H
|
||||
|
||||
#include "Dialect/HLSCpp/HLSCpp.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace scalehls {
|
||||
|
||||
using namespace hlscpp;
|
||||
|
||||
/// This class is a visitor for SSACFG operation nodes.
|
||||
template <typename ConcreteType, typename ResultType, typename... ExtraArgs>
|
||||
class HLSCppVisitorBase {
|
||||
public:
|
||||
ResultType dispatchVisitor(Operation *op, ExtraArgs... args) {
|
||||
auto *thisCast = static_cast<ConcreteType *>(this);
|
||||
return TypeSwitch<Operation *, ResultType>(op)
|
||||
.template Case<
|
||||
// Affine statements.
|
||||
AffineForOp, AffineIfOp, AffineParallelOp, AffineApplyOp,
|
||||
AffineMaxOp, AffineMinOp, AffineLoadOp, AffineStoreOp,
|
||||
AffineYieldOp, AffineVectorLoadOp, AffineVectorStoreOp,
|
||||
AffineDmaStartOp, AffineDmaWaitOp,
|
||||
// Memref-related statements.
|
||||
AllocOp, AllocaOp, LoadOp, StoreOp, DeallocOp, DmaStartOp,
|
||||
DmaWaitOp, AtomicRMWOp, GenericAtomicRMWOp, AtomicYieldOp,
|
||||
MemRefCastOp, ViewOp, SubViewOp,
|
||||
// Tensor-related statements.
|
||||
TensorLoadOp, TensorStoreOp, ExtractElementOp, TensorFromElementsOp,
|
||||
SplatOp, TensorCastOp, DimOp, RankOp,
|
||||
// Unary expressions.
|
||||
AbsFOp, CeilFOp, NegFOp, CosOp, SinOp, TanhOp, SqrtOp, RsqrtOp,
|
||||
ExpOp, Exp2Op, LogOp, Log2Op, Log10Op,
|
||||
// Float binary expressions.
|
||||
CmpFOp, AddFOp, SubFOp, MulFOp, DivFOp, RemFOp,
|
||||
// Integer binary expressions.
|
||||
CmpIOp, AddIOp, SubIOp, MulIOp, SignedDivIOp, SignedRemIOp,
|
||||
UnsignedDivIOp, UnsignedRemIOp, XOrOp, AndOp, OrOp, ShiftLeftOp,
|
||||
SignedShiftRightOp, UnsignedShiftRightOp,
|
||||
// Complex expressions.
|
||||
AddCFOp, SubCFOp, ImOp, ReOp, CreateComplexOp,
|
||||
// Special operations.
|
||||
SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp,
|
||||
SignExtendIOp, IndexCastOp, CallOp, ReturnOp, AssignOp, EndOp,
|
||||
// Pragma operations.
|
||||
ApplyPragmasOp, PragmaPipelineOp, PragmaUnrollOp,
|
||||
PragmaArrayPartitionOp>([&](auto opNode) -> ResultType {
|
||||
return thisCast->visitOp(opNode, args...);
|
||||
})
|
||||
.Default([&](auto opNode) -> ResultType {
|
||||
return thisCast->visitInvalidOp(op, args...);
|
||||
});
|
||||
}
|
||||
|
||||
/// This callback is invoked on any invalid operations.
|
||||
ResultType visitInvalidOp(Operation *op, ExtraArgs... args) {
|
||||
op->emitOpError("is unsupported operation.");
|
||||
abort();
|
||||
}
|
||||
|
||||
/// This callback is invoked on any operations that are not handled by the
|
||||
/// concrete visitor.
|
||||
ResultType visitUnhandledOp(Operation *op, ExtraArgs... args) {
|
||||
return ResultType();
|
||||
}
|
||||
|
||||
#define HANDLE(OPTYPE) \
|
||||
ResultType visitOp(OPTYPE op, ExtraArgs... args) { \
|
||||
return static_cast<ConcreteType *>(this)->visitUnhandledOp(op, args...); \
|
||||
}
|
||||
|
||||
// Affine statements.
|
||||
HANDLE(AffineForOp);
|
||||
HANDLE(AffineIfOp);
|
||||
HANDLE(AffineParallelOp);
|
||||
HANDLE(AffineApplyOp);
|
||||
HANDLE(AffineMaxOp);
|
||||
HANDLE(AffineMinOp);
|
||||
HANDLE(AffineLoadOp);
|
||||
HANDLE(AffineStoreOp);
|
||||
HANDLE(AffineYieldOp);
|
||||
HANDLE(AffineVectorLoadOp);
|
||||
HANDLE(AffineVectorStoreOp);
|
||||
HANDLE(AffineDmaStartOp);
|
||||
HANDLE(AffineDmaWaitOp);
|
||||
|
||||
// Memref-related statements.
|
||||
HANDLE(AllocOp);
|
||||
HANDLE(AllocaOp);
|
||||
HANDLE(LoadOp);
|
||||
HANDLE(StoreOp);
|
||||
HANDLE(DeallocOp);
|
||||
HANDLE(DmaStartOp);
|
||||
HANDLE(DmaWaitOp);
|
||||
HANDLE(AtomicRMWOp);
|
||||
HANDLE(GenericAtomicRMWOp);
|
||||
HANDLE(AtomicYieldOp);
|
||||
HANDLE(MemRefCastOp);
|
||||
HANDLE(ViewOp);
|
||||
HANDLE(SubViewOp);
|
||||
|
||||
// Tensor-related statements.
|
||||
HANDLE(TensorLoadOp);
|
||||
HANDLE(TensorStoreOp);
|
||||
HANDLE(ExtractElementOp);
|
||||
HANDLE(TensorFromElementsOp);
|
||||
HANDLE(SplatOp);
|
||||
HANDLE(TensorCastOp);
|
||||
HANDLE(DimOp);
|
||||
HANDLE(RankOp);
|
||||
|
||||
// Unary expressions.
|
||||
HANDLE(AbsFOp);
|
||||
HANDLE(CeilFOp);
|
||||
HANDLE(NegFOp);
|
||||
HANDLE(CosOp);
|
||||
HANDLE(SinOp);
|
||||
HANDLE(TanhOp);
|
||||
HANDLE(SqrtOp);
|
||||
HANDLE(RsqrtOp);
|
||||
HANDLE(ExpOp);
|
||||
HANDLE(Exp2Op);
|
||||
HANDLE(LogOp);
|
||||
HANDLE(Log2Op);
|
||||
HANDLE(Log10Op);
|
||||
|
||||
// Float binary expressions.
|
||||
HANDLE(CmpFOp);
|
||||
HANDLE(AddFOp);
|
||||
HANDLE(SubFOp);
|
||||
HANDLE(MulFOp);
|
||||
HANDLE(DivFOp);
|
||||
HANDLE(RemFOp);
|
||||
|
||||
// Integer binary expressions.
|
||||
HANDLE(CmpIOp);
|
||||
HANDLE(AddIOp);
|
||||
HANDLE(SubIOp);
|
||||
HANDLE(MulIOp);
|
||||
HANDLE(SignedDivIOp);
|
||||
HANDLE(SignedRemIOp);
|
||||
HANDLE(UnsignedDivIOp);
|
||||
HANDLE(UnsignedRemIOp);
|
||||
HANDLE(XOrOp);
|
||||
HANDLE(AndOp);
|
||||
HANDLE(OrOp);
|
||||
HANDLE(ShiftLeftOp);
|
||||
HANDLE(SignedShiftRightOp);
|
||||
HANDLE(UnsignedShiftRightOp);
|
||||
|
||||
// Complex expressions.
|
||||
HANDLE(AddCFOp);
|
||||
HANDLE(SubCFOp);
|
||||
HANDLE(ImOp);
|
||||
HANDLE(ReOp);
|
||||
HANDLE(CreateComplexOp);
|
||||
|
||||
// Special operations.
|
||||
HANDLE(SelectOp);
|
||||
HANDLE(ConstantOp);
|
||||
HANDLE(CopySignOp);
|
||||
HANDLE(TruncateIOp);
|
||||
HANDLE(ZeroExtendIOp);
|
||||
HANDLE(SignExtendIOp);
|
||||
HANDLE(IndexCastOp);
|
||||
HANDLE(CallOp);
|
||||
HANDLE(ReturnOp);
|
||||
HANDLE(AssignOp);
|
||||
HANDLE(EndOp);
|
||||
|
||||
// Pragma operations.
|
||||
HANDLE(ApplyPragmasOp);
|
||||
HANDLE(PragmaPipelineOp);
|
||||
HANDLE(PragmaUnrollOp);
|
||||
HANDLE(PragmaArrayPartitionOp);
|
||||
#undef HANDLE
|
||||
};
|
||||
} // namespace scalehls
|
||||
} // namespace mlir
|
||||
|
||||
#endif // SCALEHLS_VISITOR_H
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include "EmitHLSCpp.h"
|
||||
#include "Dialect/HLSCpp/HLSCpp.h"
|
||||
#include "Visitor.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
|
@ -26,9 +27,6 @@ using namespace hlscpp;
|
|||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Some Base Classes
|
||||
//
|
||||
// These classes should be factored out, and can be inherited by emitters
|
||||
// targeting various backends (e.g., Xilinx Vivado HLS, Intel FPGAs, etc.).
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
|
@ -130,176 +128,6 @@ SmallString<8> HLSCppEmitterBase::getName(Value val) {
|
|||
return state.nameTable[val];
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// This class is a visitor for SSACFG operation nodes.
|
||||
template <typename ConcreteType, typename ResultType, typename... ExtraArgs>
|
||||
class HLSCppVisitorBase {
|
||||
public:
|
||||
ResultType dispatchVisitor(Operation *op, ExtraArgs... args) {
|
||||
auto *thisCast = static_cast<ConcreteType *>(this);
|
||||
return TypeSwitch<Operation *, ResultType>(op)
|
||||
.template Case<
|
||||
// Affine statements.
|
||||
AffineForOp, AffineIfOp, AffineParallelOp, AffineApplyOp,
|
||||
AffineMaxOp, AffineMinOp, AffineLoadOp, AffineStoreOp,
|
||||
AffineYieldOp, AffineVectorLoadOp, AffineVectorStoreOp,
|
||||
AffineDmaStartOp, AffineDmaWaitOp,
|
||||
// Memref-related statements.
|
||||
AllocOp, AllocaOp, LoadOp, StoreOp, DeallocOp, DmaStartOp,
|
||||
DmaWaitOp, AtomicRMWOp, GenericAtomicRMWOp, AtomicYieldOp,
|
||||
MemRefCastOp, ViewOp, SubViewOp,
|
||||
// Tensor-related statements.
|
||||
TensorLoadOp, TensorStoreOp, ExtractElementOp, TensorFromElementsOp,
|
||||
SplatOp, TensorCastOp, DimOp, RankOp,
|
||||
// Unary expressions.
|
||||
AbsFOp, CeilFOp, NegFOp, CosOp, SinOp, TanhOp, SqrtOp, RsqrtOp,
|
||||
ExpOp, Exp2Op, LogOp, Log2Op, Log10Op,
|
||||
// Float binary expressions.
|
||||
CmpFOp, AddFOp, SubFOp, MulFOp, DivFOp, RemFOp,
|
||||
// Integer binary expressions.
|
||||
CmpIOp, AddIOp, SubIOp, MulIOp, SignedDivIOp, SignedRemIOp,
|
||||
UnsignedDivIOp, UnsignedRemIOp, XOrOp, AndOp, OrOp, ShiftLeftOp,
|
||||
SignedShiftRightOp, UnsignedShiftRightOp,
|
||||
// Complex expressions.
|
||||
AddCFOp, SubCFOp, ImOp, ReOp, CreateComplexOp,
|
||||
// Special operations.
|
||||
SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp,
|
||||
SignExtendIOp, IndexCastOp, CallOp, ReturnOp, AssignOp, EndOp,
|
||||
// Pragma operations.
|
||||
ApplyPragmasOp, PragmaPipelineOp, PragmaUnrollOp,
|
||||
PragmaArrayPartitionOp>([&](auto opNode) -> ResultType {
|
||||
return thisCast->visitOp(opNode, args...);
|
||||
})
|
||||
.Default([&](auto opNode) -> ResultType {
|
||||
return thisCast->visitInvalidOp(op, args...);
|
||||
});
|
||||
}
|
||||
|
||||
/// This callback is invoked on any invalid operations.
|
||||
ResultType visitInvalidOp(Operation *op, ExtraArgs... args) {
|
||||
op->emitOpError("is unsupported operation.");
|
||||
abort();
|
||||
}
|
||||
|
||||
/// This callback is invoked on any operations that are not handled by the
|
||||
/// concrete visitor.
|
||||
ResultType visitUnhandledOp(Operation *op, ExtraArgs... args) {
|
||||
return ResultType();
|
||||
}
|
||||
|
||||
#define HANDLE(OPTYPE) \
|
||||
ResultType visitOp(OPTYPE op, ExtraArgs... args) { \
|
||||
return static_cast<ConcreteType *>(this)->visitUnhandledOp(op, args...); \
|
||||
}
|
||||
|
||||
// Affine statements.
|
||||
HANDLE(AffineForOp);
|
||||
HANDLE(AffineIfOp);
|
||||
HANDLE(AffineParallelOp);
|
||||
HANDLE(AffineApplyOp);
|
||||
HANDLE(AffineMaxOp);
|
||||
HANDLE(AffineMinOp);
|
||||
HANDLE(AffineLoadOp);
|
||||
HANDLE(AffineStoreOp);
|
||||
HANDLE(AffineYieldOp);
|
||||
HANDLE(AffineVectorLoadOp);
|
||||
HANDLE(AffineVectorStoreOp);
|
||||
HANDLE(AffineDmaStartOp);
|
||||
HANDLE(AffineDmaWaitOp);
|
||||
|
||||
// Memref-related statements.
|
||||
HANDLE(AllocOp);
|
||||
HANDLE(AllocaOp);
|
||||
HANDLE(LoadOp);
|
||||
HANDLE(StoreOp);
|
||||
HANDLE(DeallocOp);
|
||||
HANDLE(DmaStartOp);
|
||||
HANDLE(DmaWaitOp);
|
||||
HANDLE(AtomicRMWOp);
|
||||
HANDLE(GenericAtomicRMWOp);
|
||||
HANDLE(AtomicYieldOp);
|
||||
HANDLE(MemRefCastOp);
|
||||
HANDLE(ViewOp);
|
||||
HANDLE(SubViewOp);
|
||||
|
||||
// Tensor-related statements.
|
||||
HANDLE(TensorLoadOp);
|
||||
HANDLE(TensorStoreOp);
|
||||
HANDLE(ExtractElementOp);
|
||||
HANDLE(TensorFromElementsOp);
|
||||
HANDLE(SplatOp);
|
||||
HANDLE(TensorCastOp);
|
||||
HANDLE(DimOp);
|
||||
HANDLE(RankOp);
|
||||
|
||||
// Unary expressions.
|
||||
HANDLE(AbsFOp);
|
||||
HANDLE(CeilFOp);
|
||||
HANDLE(NegFOp);
|
||||
HANDLE(CosOp);
|
||||
HANDLE(SinOp);
|
||||
HANDLE(TanhOp);
|
||||
HANDLE(SqrtOp);
|
||||
HANDLE(RsqrtOp);
|
||||
HANDLE(ExpOp);
|
||||
HANDLE(Exp2Op);
|
||||
HANDLE(LogOp);
|
||||
HANDLE(Log2Op);
|
||||
HANDLE(Log10Op);
|
||||
|
||||
// Float binary expressions.
|
||||
HANDLE(CmpFOp);
|
||||
HANDLE(AddFOp);
|
||||
HANDLE(SubFOp);
|
||||
HANDLE(MulFOp);
|
||||
HANDLE(DivFOp);
|
||||
HANDLE(RemFOp);
|
||||
|
||||
// Integer binary expressions.
|
||||
HANDLE(CmpIOp);
|
||||
HANDLE(AddIOp);
|
||||
HANDLE(SubIOp);
|
||||
HANDLE(MulIOp);
|
||||
HANDLE(SignedDivIOp);
|
||||
HANDLE(SignedRemIOp);
|
||||
HANDLE(UnsignedDivIOp);
|
||||
HANDLE(UnsignedRemIOp);
|
||||
HANDLE(XOrOp);
|
||||
HANDLE(AndOp);
|
||||
HANDLE(OrOp);
|
||||
HANDLE(ShiftLeftOp);
|
||||
HANDLE(SignedShiftRightOp);
|
||||
HANDLE(UnsignedShiftRightOp);
|
||||
|
||||
// Complex expressions.
|
||||
HANDLE(AddCFOp);
|
||||
HANDLE(SubCFOp);
|
||||
HANDLE(ImOp);
|
||||
HANDLE(ReOp);
|
||||
HANDLE(CreateComplexOp);
|
||||
|
||||
// Special operations.
|
||||
HANDLE(SelectOp);
|
||||
HANDLE(ConstantOp);
|
||||
HANDLE(CopySignOp);
|
||||
HANDLE(TruncateIOp);
|
||||
HANDLE(ZeroExtendIOp);
|
||||
HANDLE(SignExtendIOp);
|
||||
HANDLE(IndexCastOp);
|
||||
HANDLE(CallOp);
|
||||
HANDLE(ReturnOp);
|
||||
HANDLE(AssignOp);
|
||||
HANDLE(EndOp);
|
||||
|
||||
// Pragma operations.
|
||||
HANDLE(ApplyPragmasOp);
|
||||
HANDLE(PragmaPipelineOp);
|
||||
HANDLE(PragmaUnrollOp);
|
||||
HANDLE(PragmaArrayPartitionOp);
|
||||
#undef HANDLE
|
||||
};
|
||||
} // namespace
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ModuleEmitter Class Declaration
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -655,7 +483,7 @@ private:
|
|||
} // namespace
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ModuleEmitter Class Implementation
|
||||
// ModuleEmitter Class Definition
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Affine statement emitters.
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include "Dialect/HLSCpp/HLSCpp.h"
|
||||
#include "Transforms/INIReader.h"
|
||||
#include "Transforms/Passes.h"
|
||||
#include "Transforms/QoREstimation.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace mlir;
|
||||
|
|
|
@ -2,100 +2,61 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Transforms/QoREstimation.h"
|
||||
#include "Dialect/HLSCpp/HLSCpp.h"
|
||||
#include "Transforms/INIReader.h"
|
||||
#include "Transforms/Passes.h"
|
||||
#include "Visitor.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace mlir;
|
||||
using namespace scalehls;
|
||||
using namespace hlscpp;
|
||||
|
||||
/// This class includes all possible parameters kind for "processes" (function,
|
||||
/// for/parallel loop, and if).
|
||||
enum class ProcParam {
|
||||
// Pragam configurations.
|
||||
EnablePipeline,
|
||||
InitialInterval,
|
||||
UnrollFactor,
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HLSCppAnalyzer Class Definition
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Performance parameters.
|
||||
LoopBound,
|
||||
IterLatency,
|
||||
Latency,
|
||||
bool HLSCppAnalyzer::visitOp(AffineForOp op) { return true; }
|
||||
|
||||
// Resource parameters.
|
||||
LUT,
|
||||
DSP,
|
||||
BRAM
|
||||
};
|
||||
bool HLSCppAnalyzer::visitOp(AffineParallelOp op) { return true; }
|
||||
|
||||
/// This class includes all possible parameters kind for memories (memref,
|
||||
/// tensor, and vector).
|
||||
enum class MemParam {
|
||||
// Pragma configurations.
|
||||
StorageType,
|
||||
StorageImpl,
|
||||
PartitionType,
|
||||
PartitionFactor,
|
||||
InterfaceMode,
|
||||
bool HLSCppAnalyzer::visitOp(AffineIfOp op) { return true; }
|
||||
|
||||
// Performance parameters.
|
||||
ReadNum,
|
||||
WriteNum,
|
||||
ReadPorts,
|
||||
WritePorts,
|
||||
DepdcyLatency,
|
||||
DepdcyDistance,
|
||||
/// This method will update all parameters except IterLatency, Latency, LUT,
|
||||
/// BRAM, and DSP through static analysis.
|
||||
void HLSCppAnalyzer::analyzeOperation(Operation *op) {
|
||||
if (dispatchVisitor(op))
|
||||
return;
|
||||
|
||||
// Resource parameters.
|
||||
LUT,
|
||||
BRAM
|
||||
};
|
||||
op->emitError("can't be correctly analyzed.");
|
||||
}
|
||||
|
||||
namespace {
|
||||
class QoREstimator {
|
||||
public:
|
||||
explicit QoREstimator(std::string targetSpecPath, std::string opLatencyPath);
|
||||
void HLSCppAnalyzer::analyzeFunc(FuncOp func) { procParam.init(func); }
|
||||
|
||||
/// Get parameters.
|
||||
unsigned getMemParam(Value *mem, MemParam kind) {
|
||||
return memParams[mem][(unsigned)kind];
|
||||
}
|
||||
unsigned getProcParam(Operation *proc, ProcParam kind) {
|
||||
return procParams[proc][(unsigned)kind];
|
||||
void HLSCppAnalyzer::analyzeBlock(Block &block) {
|
||||
for (auto &op : block)
|
||||
analyzeOperation(&op);
|
||||
}
|
||||
|
||||
/// This method is a wrapper for recursively calling operation analyzer.
|
||||
void HLSCppAnalyzer::analyzeModule(ModuleOp module) {
|
||||
for (auto &op : module) {
|
||||
if (auto func = dyn_cast<FuncOp>(op)) {
|
||||
analyzeFunc(func);
|
||||
} else if (!isa<ModuleTerminatorOp>(op))
|
||||
op.emitError("is unsupported operation.");
|
||||
}
|
||||
}
|
||||
|
||||
/// These methods can extract static parameters and pragma configurations (if
|
||||
/// applicable) of the input CDFG, and update them in procParams or memParams.
|
||||
void analyzePragma(ModuleOp module);
|
||||
void analyzeModule(ModuleOp module);
|
||||
|
||||
/// These methods can estimate the performance and resource utilization of a
|
||||
/// specific MLIR structure, and update them in procParams or memroyParams.
|
||||
void estimateAffineFor(AffineForOp affineFor);
|
||||
void estimateAffineParallel(AffineParallelOp affineParallel);
|
||||
void estimateAffineIf(AffineIfOp affineIf);
|
||||
void estimateFunc(FuncOp func);
|
||||
void estimateModule(ModuleOp module);
|
||||
|
||||
private:
|
||||
DenseMap<Operation *, SmallVector<unsigned, 9>> procParams;
|
||||
DenseMap<Value *, SmallVector<unsigned, 13>> memParams;
|
||||
|
||||
// Set parameters.
|
||||
void setMemParam(Value *mem, unsigned kind, unsigned param) {
|
||||
memParams[mem][(unsigned)kind] = param;
|
||||
}
|
||||
void setProcParam(Operation *proc, MemParam kind, unsigned param) {
|
||||
procParams[proc][(unsigned)kind] = param;
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
//===----------------------------------------------------------------------===//
|
||||
// QoREstimator Class Definition
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Estimator constructor.
|
||||
QoREstimator::QoREstimator(std::string targetSpecPath,
|
||||
std::string opLatencyPath) {
|
||||
QoREstimator::QoREstimator(ProcParam &procParam, MemParam &memParam,
|
||||
string targetSpecPath, string opLatencyPath)
|
||||
: procParam(procParam), memParam(memParam) {
|
||||
INIReader targetSpec(targetSpecPath);
|
||||
if (targetSpec.ParseError())
|
||||
llvm::outs() << "error: target spec file parse fail, please refer to "
|
||||
|
@ -111,83 +72,136 @@ QoREstimator::QoREstimator(std::string targetSpecPath,
|
|||
llvm::outs() << latency << "\n";
|
||||
}
|
||||
|
||||
/// This method will search the longest path in a DAG block using a ASAP (As
|
||||
/// Soon As Possible) manner. Loop, function, if, and other operation owning
|
||||
/// regions will be considered as a whole.
|
||||
unsigned searchLongestPath(Block &block) {
|
||||
DenseMap<Value, unsigned> valueReadyTime;
|
||||
unsigned blockReadyTime = 0;
|
||||
for (auto &op : block) {
|
||||
|
||||
// Calculate ready time of all predecessors.
|
||||
unsigned allPredsReadyTime = 0;
|
||||
for (auto operand : op.getOperands()) {
|
||||
if (operand.getKind() == Value::Kind::BlockArgument)
|
||||
continue;
|
||||
else if (operand.getParentBlock() != &block)
|
||||
continue;
|
||||
else
|
||||
allPredsReadyTime = max(allPredsReadyTime, valueReadyTime[operand]);
|
||||
}
|
||||
|
||||
// Calculate ready time of the current operation.
|
||||
unsigned opReadyTime = allPredsReadyTime + 1;
|
||||
for (auto result : op.getResults())
|
||||
valueReadyTime[result] = opReadyTime;
|
||||
|
||||
// Update block ready time.
|
||||
blockReadyTime = max(blockReadyTime, opReadyTime);
|
||||
}
|
||||
return blockReadyTime;
|
||||
}
|
||||
|
||||
/// For now, estimation for unrolled loops are following the analytical model
|
||||
/// of COMBA, which is suspected to be wrong. Meanwhile, we assume the absence
|
||||
/// of function call in the loop body.
|
||||
void QoREstimator::estimateAffineFor(AffineForOp affineFor) {
|
||||
auto &body = affineFor.getLoopBody();
|
||||
///
|
||||
/// This method will update ProcParam::IterLatency and ProcParam::Latency of the
|
||||
/// current affine for loop.
|
||||
bool QoREstimator::visitOp(AffineForOp op) {
|
||||
auto &body = op.getLoopBody();
|
||||
if (body.getBlocks().size() != 1)
|
||||
affineFor.emitError("has zero or more than one basic blocks.");
|
||||
|
||||
for (auto &op : body.front()) {
|
||||
if (auto subAffineFor = dyn_cast<mlir::AffineForOp>(op))
|
||||
estimateAffineFor(subAffineFor);
|
||||
}
|
||||
}
|
||||
|
||||
/// For now, function pipelining and task-level dataflow optimizations are not
|
||||
/// considered for simplicity.
|
||||
void QoREstimator::estimateFunc(FuncOp func) {
|
||||
if (func.getBlocks().size() != 1)
|
||||
func.emitError("has zero or more than one basic blocks.");
|
||||
op.emitError("has zero or more than one basic blocks.");
|
||||
|
||||
// Recursively estimate latency of sub-elements, including functions and
|
||||
// loops. These sub-elements will be considered as a normal node in the CDFG
|
||||
// for function latency estimzation.
|
||||
for (auto &op : func.front()) {
|
||||
if (auto subFunc = dyn_cast<FuncOp>(op))
|
||||
estimateFunc(subFunc);
|
||||
else if (auto subAffineFor = dyn_cast<AffineForOp>(op))
|
||||
estimateAffineFor(subAffineFor);
|
||||
for (auto &op : body.front()) {
|
||||
estimateOperation(&op);
|
||||
}
|
||||
|
||||
// Estimate iteration latency.
|
||||
unsigned iterLatency = searchLongestPath(body.front());
|
||||
procParam.set(op, ProcParamKind::IterLatency, iterLatency);
|
||||
|
||||
// Estimate affine for loop latency.
|
||||
unsigned latency = iterLatency;
|
||||
if (procParam.get(op, ProcParamKind::LoopBound) != 1)
|
||||
latency *= procParam.get(op, ProcParamKind::LoopBound) *
|
||||
procParam.get(op, ProcParamKind::UnrollFactor);
|
||||
procParam.set(op, ProcParamKind::Latency, latency);
|
||||
}
|
||||
|
||||
bool QoREstimator::visitOp(AffineParallelOp op) { return true; }
|
||||
|
||||
bool QoREstimator::visitOp(AffineIfOp op) { return true; }
|
||||
|
||||
/// This method recursively update the time stamp of all values (1) directly
|
||||
/// generated as result by the current operation or (2) generated by any
|
||||
/// operations insided of the region held by the current operation.
|
||||
void QoREstimator::updateValueTimeStamp(
|
||||
Operation *currentOp, unsigned opTimeStamp,
|
||||
DenseMap<Value, unsigned> &valueTimeStampMap) {
|
||||
for (auto result : currentOp->getResults())
|
||||
valueTimeStampMap[result] = opTimeStamp;
|
||||
for (auto ®ion : currentOp->getRegions()) {
|
||||
for (auto &op : region.front())
|
||||
updateValueTimeStamp(&op, opTimeStamp, valueTimeStampMap);
|
||||
}
|
||||
}
|
||||
|
||||
/// This method will search the longest path in a DAG block using a ASAP (As
|
||||
/// Soon As Possible) manner. Loop, function, if, and other operation owning
|
||||
/// regions will be considered as a whole.
|
||||
unsigned QoREstimator::searchLongestPath(Block &block) {
|
||||
DenseMap<Value, unsigned> valueTimeStampMap;
|
||||
unsigned blockTimeStamp = 0;
|
||||
|
||||
for (auto &op : block) {
|
||||
unsigned opTimeStamp = 0;
|
||||
|
||||
// Add the latest ready time among all predecessors.
|
||||
for (auto operand : op.getOperands())
|
||||
opTimeStamp = max(opTimeStamp, valueTimeStampMap[operand]);
|
||||
|
||||
// Add latency of the current operation.
|
||||
if (auto subAffineFor = dyn_cast<AffineForOp>(op))
|
||||
opTimeStamp += procParam.get(subAffineFor, ProcParamKind::Latency);
|
||||
else
|
||||
opTimeStamp += 1;
|
||||
blockTimeStamp = max(blockTimeStamp, opTimeStamp);
|
||||
|
||||
// Update ready time of each value generated by the current operation.
|
||||
updateValueTimeStamp(&op, opTimeStamp, valueTimeStampMap);
|
||||
}
|
||||
return blockTimeStamp;
|
||||
}
|
||||
|
||||
void QoREstimator::estimateOperation(Operation *op) {
|
||||
if (dispatchVisitor(op))
|
||||
return;
|
||||
|
||||
op->emitError("can't be correctly estimated.");
|
||||
}
|
||||
|
||||
/// For now, function pipelining and task-level dataflow optimizations are not
|
||||
/// considered for simplicity. Meanwhile, we assume the absence of function call
|
||||
/// in the loop body.
|
||||
///
|
||||
/// This method will update ProcParam::Latency of the current function.
|
||||
void QoREstimator::estimateFunc(FuncOp func) {
|
||||
if (func.getBlocks().size() != 1)
|
||||
func.emitError("has zero or more than one basic blocks.");
|
||||
|
||||
estimateBlock(func.front());
|
||||
|
||||
// Estimate function latency.
|
||||
llvm::outs() << searchLongestPath(func.front()) << "\n";
|
||||
unsigned latency = searchLongestPath(func.front());
|
||||
procParam.set(func, ProcParamKind::Latency, latency);
|
||||
}
|
||||
|
||||
void QoREstimator::estimateBlock(Block &block) {
|
||||
for (auto &op : block)
|
||||
estimateOperation(&op);
|
||||
}
|
||||
|
||||
void QoREstimator::estimateModule(ModuleOp module) {
|
||||
for (auto &op : module) {
|
||||
if (auto func = dyn_cast<FuncOp>(op))
|
||||
if (auto func = dyn_cast<FuncOp>(op)) {
|
||||
estimateFunc(func);
|
||||
else if (!isa<ModuleTerminatorOp>(op))
|
||||
} else if (!isa<ModuleTerminatorOp>(op))
|
||||
op.emitError("is unsupported operation.");
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Entry of scalehls-opt
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
struct QoREstimation : public QoREstimationBase<QoREstimation> {
|
||||
void runOnOperation() override {
|
||||
QoREstimator(targetSpec, opLatency).estimateModule(getOperation());
|
||||
ProcParam procParam;
|
||||
MemParam memParam;
|
||||
|
||||
// Extract all static parameters and current pragma configurations.
|
||||
HLSCppAnalyzer analyzer(procParam, memParam);
|
||||
analyzer.analyzeModule(getOperation());
|
||||
|
||||
// Estimate performance and resource utilization.
|
||||
QoREstimator estimator(analyzer.procParam, analyzer.memParam, targetSpec,
|
||||
opLatency);
|
||||
estimator.estimateModule(getOperation());
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// RUN: scalehls-opt -pragma-dse %s | FileCheck %s
|
||||
// RUN: scalehls-opt -qor-estimation -pragma-dse %s | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: func @test_pragma()
|
||||
func @test_pragma() {
|
||||
|
|
Loading…
Reference in New Issue