[QoREstimator] a simple impl of estimator with lots of assumptions, can handle unroll and pipeline
This commit is contained in:
parent
003a91472c
commit
46f444463e
|
@ -28,10 +28,9 @@ scalehls-opt -convert-to-hlscpp test/EmitHLSCpp/test_*.mlir | scalehls-translate
|
|||
### PragmaDSE Pass
|
||||
|
||||
### EmitHLSCpp
|
||||
1. **Test HLS C++ emitter with onnx/npcomp;**
|
||||
2. TODOs in EmitHLSCpp.cpp;
|
||||
3. Support memref/tensor cast/view/subview operations;
|
||||
4. Support atomic/complex/extention -related operations.
|
||||
1. TODOs in EmitHLSCpp.cpp;
|
||||
2. Support memref/tensor cast/view/subview operations;
|
||||
3. Support atomic/complex/extention -related operations.
|
||||
|
||||
## References
|
||||
1. [MLIR Documents](https://mlir.llvm.org)
|
||||
|
|
|
@ -23,6 +23,8 @@ public:
|
|||
explicit HLSCppAnalyzer(ProcParam &procParam, MemParam &memParam)
|
||||
: procParam(procParam), memParam(memParam) {}
|
||||
|
||||
bool inPipeline;
|
||||
|
||||
ProcParam &procParam;
|
||||
MemParam &memParam;
|
||||
|
||||
|
@ -48,6 +50,11 @@ public:
|
|||
explicit QoREstimator(ProcParam &procParam, MemParam &memParam,
|
||||
std::string targetSpecPath, std::string opLatencyPath);
|
||||
|
||||
using OpDenseMap = DenseMap<Operation *, unsigned>;
|
||||
// This flag indicates that currently the estimator is in a pipelined region,
|
||||
// which will impact the estimation strategy.
|
||||
bool inPipeline;
|
||||
|
||||
ProcParam &procParam;
|
||||
MemParam &memParam;
|
||||
|
||||
|
@ -61,9 +68,9 @@ public:
|
|||
bool visitOp(AffineIfOp op);
|
||||
|
||||
/// These methods are used for searching longest path in a DAG.
|
||||
void updateValueTimeStamp(Operation *currentOp, unsigned opTimeStamp,
|
||||
DenseMap<Value, unsigned> &valueTimeStampMap);
|
||||
unsigned searchLongestPath(Block &block);
|
||||
void alignBlockSchedule(Block &block, OpDenseMap &opScheduleMap,
|
||||
unsigned opSchedule);
|
||||
unsigned getBlockSchedule(Block &block, OpDenseMap &opScheduleMap);
|
||||
|
||||
/// MLIR component estimators.
|
||||
void estimateOperation(Operation *op);
|
||||
|
|
|
@ -30,7 +30,6 @@ public:
|
|||
Params[key][(unsigned)kind] = param;
|
||||
}
|
||||
|
||||
private:
|
||||
DenseMap<KeyType, SmallVector<unsigned, 16>> Params;
|
||||
};
|
||||
|
||||
|
@ -41,12 +40,18 @@ private:
|
|||
enum class ProcParamKind {
|
||||
// Process-related pragam configurations.
|
||||
EnablePipeline,
|
||||
InitialInterval,
|
||||
UnrollFactor,
|
||||
|
||||
// Process attributes.
|
||||
LowerBound,
|
||||
UpperBound,
|
||||
IterNumber,
|
||||
IsPerfect,
|
||||
|
||||
// Performance parameters.
|
||||
LoopBound,
|
||||
InitInterval,
|
||||
IterLatency,
|
||||
PipeIterNumber,
|
||||
Latency,
|
||||
|
||||
// Resource parameters.
|
||||
|
|
|
@ -28,32 +28,7 @@ def LoopPragmaOp : HLSCppOp<"loop_pragma", [
|
|||
BoolAttr : $skip_exit_check
|
||||
);
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
// Pipeline pragma-related methods.
|
||||
unsigned getII() {
|
||||
return getAttrOfType<IntegerAttr>("II").getUInt();
|
||||
}
|
||||
bool isEnableFlush() {
|
||||
return getAttrOfType<BoolAttr>("enable_flush").getValue();
|
||||
}
|
||||
bool isRewind() {
|
||||
return getAttrOfType<BoolAttr>("rewind").getValue();
|
||||
}
|
||||
bool isOff() {
|
||||
return getAttrOfType<BoolAttr>("off").getValue();
|
||||
}
|
||||
|
||||
// Unroll pragma-related methods.
|
||||
unsigned getFactor() {
|
||||
return getAttrOfType<IntegerAttr>("factor").getUInt();
|
||||
}
|
||||
bool isRegion() {
|
||||
return getAttrOfType<BoolAttr>("region").getValue();
|
||||
}
|
||||
bool isSkipExitCheck() {
|
||||
return getAttrOfType<BoolAttr>("skip_exit_check").getValue();
|
||||
}
|
||||
}];
|
||||
let extraClassDeclaration = [{}];
|
||||
}
|
||||
|
||||
def FuncPragmaOp : HLSCppOp<"func_pragma", [
|
||||
|
@ -77,26 +52,7 @@ def FuncPragmaOp : HLSCppOp<"func_pragma", [
|
|||
BoolAttr : $dataflow
|
||||
);
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
// Pipeline pragma-related methods.
|
||||
unsigned getII() {
|
||||
return getAttrOfType<IntegerAttr>("II").getUInt();
|
||||
}
|
||||
bool isEnableFlush() {
|
||||
return getAttrOfType<BoolAttr>("enable_flush").getValue();
|
||||
}
|
||||
bool isRewind() {
|
||||
return getAttrOfType<BoolAttr>("rewind").getValue();
|
||||
}
|
||||
bool isOff() {
|
||||
return getAttrOfType<BoolAttr>("off").getValue();
|
||||
}
|
||||
|
||||
// Dataflow pragma-related methods.
|
||||
bool isDataflow() {
|
||||
return getAttrOfType<BoolAttr>("dataflow").getValue();
|
||||
}
|
||||
}];
|
||||
let extraClassDeclaration = [{}];
|
||||
}
|
||||
|
||||
def ArrayPragmaOp : HLSCppOp<"array_pragma", [
|
||||
|
@ -119,17 +75,7 @@ def ArrayPragmaOp : HLSCppOp<"array_pragma", [
|
|||
// (1) dim is not larger than variable's dim.
|
||||
// (2) factor is not larger than variable's shape of the corresponding dim.
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
StringRef getPartitionType() {
|
||||
return getAttrOfType<StringAttr>("type").getValue();
|
||||
}
|
||||
unsigned getFactor() {
|
||||
return getAttrOfType<IntegerAttr>("factor").getUInt();
|
||||
}
|
||||
unsigned getDim() {
|
||||
return getAttrOfType<IntegerAttr>("dim").getUInt();
|
||||
}
|
||||
}];
|
||||
let extraClassDeclaration = [{}];
|
||||
}
|
||||
|
||||
#endif // SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD
|
||||
|
|
|
@ -17,7 +17,63 @@ using namespace hlscpp;
|
|||
// HLSCppAnalyzer Class Definition
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool HLSCppAnalyzer::visitOp(AffineForOp op) { return true; }
|
||||
bool HLSCppAnalyzer::visitOp(AffineForOp op) {
|
||||
auto &body = op.getLoopBody();
|
||||
if (body.getBlocks().size() != 1)
|
||||
op.emitError("has zero or more than one basic blocks.");
|
||||
|
||||
if (procParam.Params[op].empty())
|
||||
procParam.init(op);
|
||||
|
||||
// Recursively analyze all childs.
|
||||
analyzeBlock(body.front());
|
||||
|
||||
// Pragma configurations.
|
||||
unsigned unrollFactor = 1;
|
||||
if (auto loopPragma = dyn_cast<LoopPragmaOp>(body.front().front())) {
|
||||
procParam.set(op, ProcParamKind::EnablePipeline, !loopPragma.off());
|
||||
procParam.set(op, ProcParamKind::UnrollFactor, loopPragma.factor());
|
||||
unrollFactor = loopPragma.factor();
|
||||
}
|
||||
|
||||
// Loop statistics.
|
||||
if (!op.getUpperBoundMap().isSingleConstant() ||
|
||||
!op.getLowerBoundMap().isSingleConstant())
|
||||
op.emitError("has variable upper or lower bound.");
|
||||
|
||||
unsigned upperBound = op.getUpperBoundMap().getSingleConstantResult();
|
||||
unsigned lowerBound = op.getLowerBoundMap().getSingleConstantResult();
|
||||
unsigned step = op.getStep();
|
||||
|
||||
procParam.set(op, ProcParamKind::UpperBound, upperBound);
|
||||
procParam.set(op, ProcParamKind::LowerBound, lowerBound);
|
||||
procParam.set(op, ProcParamKind::IterNumber,
|
||||
(upperBound - lowerBound) / step / unrollFactor);
|
||||
|
||||
unsigned opNum = 0;
|
||||
unsigned loopNum = 0;
|
||||
bool isPerfect = false;
|
||||
for (auto &bodyOp : op.getRegion().front()) {
|
||||
if (!isa<LoopPragmaOp>(bodyOp) && !isa<AffineYieldOp>(bodyOp)) {
|
||||
opNum += 1;
|
||||
if (auto forOp = dyn_cast<AffineForOp>(bodyOp)) {
|
||||
loopNum += 1;
|
||||
isPerfect = procParam.get(forOp, ProcParamKind::IsPerfect);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Perfect nested loop.
|
||||
if (opNum == 1 && loopNum == 1 && isPerfect)
|
||||
procParam.set(op, ProcParamKind::IsPerfect, 1);
|
||||
// The inner loop.
|
||||
else if (loopNum == 0)
|
||||
procParam.set(op, ProcParamKind::IsPerfect, 1);
|
||||
else
|
||||
procParam.set(op, ProcParamKind::IsPerfect, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HLSCppAnalyzer::visitOp(AffineParallelOp op) { return true; }
|
||||
|
||||
|
@ -32,7 +88,14 @@ void HLSCppAnalyzer::analyzeOperation(Operation *op) {
|
|||
op->emitError("can't be correctly analyzed.");
|
||||
}
|
||||
|
||||
void HLSCppAnalyzer::analyzeFunc(FuncOp func) { procParam.init(func); }
|
||||
void HLSCppAnalyzer::analyzeFunc(FuncOp func) {
|
||||
if (func.getBlocks().size() != 1)
|
||||
func.emitError("has zero or more than one basic blocks.");
|
||||
|
||||
procParam.init(func);
|
||||
|
||||
analyzeBlock(func.front());
|
||||
}
|
||||
|
||||
void HLSCppAnalyzer::analyzeBlock(Block &block) {
|
||||
for (auto &op : block)
|
||||
|
@ -57,6 +120,9 @@ void HLSCppAnalyzer::analyzeModule(ModuleOp module) {
|
|||
QoREstimator::QoREstimator(ProcParam &procParam, MemParam &memParam,
|
||||
string targetSpecPath, string opLatencyPath)
|
||||
: procParam(procParam), memParam(memParam) {
|
||||
|
||||
inPipeline = false;
|
||||
|
||||
INIReader targetSpec(targetSpecPath);
|
||||
if (targetSpec.ParseError())
|
||||
llvm::outs() << "error: target spec file parse fail, please refer to "
|
||||
|
@ -67,84 +133,130 @@ QoREstimator::QoREstimator(ProcParam &procParam, MemParam &memParam,
|
|||
llvm::outs() << "error: Op latency file parse fail, please refer to "
|
||||
"--help option and pass in correct file path\n";
|
||||
|
||||
// TODO: Support estimator initiation from profiling data.
|
||||
auto freq = targetSpec.Get("config", "frequency", "200MHz");
|
||||
auto latency = opLatency.GetInteger(freq, "op", 0);
|
||||
llvm::outs() << latency << "\n";
|
||||
}
|
||||
|
||||
/// For now, estimation for unrolled loops are following the analytical model
|
||||
/// of COMBA, which is suspected to be wrong. Meanwhile, we assume the absence
|
||||
/// of function call in the loop body.
|
||||
///
|
||||
/// This method will update ProcParam::IterLatency and ProcParam::Latency of the
|
||||
/// current affine for loop.
|
||||
void QoREstimator::alignBlockSchedule(Block &block, OpDenseMap &opScheduleMap,
|
||||
unsigned opSchedule) {
|
||||
for (auto &op : block) {
|
||||
if (auto child = dyn_cast<mlir::AffineForOp>(op))
|
||||
alignBlockSchedule(child.getRegion().front(), opScheduleMap, opSchedule);
|
||||
opScheduleMap[&op] = opSchedule;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned QoREstimator::getBlockSchedule(Block &block,
|
||||
OpDenseMap &opScheduleMap) {
|
||||
unsigned blockSchedule = 0;
|
||||
|
||||
for (auto &op : block) {
|
||||
unsigned opSchedule = 0;
|
||||
|
||||
// Add the latest scheduled time among all predecessors.
|
||||
for (auto operand : op.getOperands()) {
|
||||
if (operand.getKind() != Value::Kind::BlockArgument)
|
||||
opSchedule = max(opSchedule, opScheduleMap[operand.getDefiningOp()]);
|
||||
}
|
||||
|
||||
// Add latency of the current operation.
|
||||
unsigned childSchedule = 0;
|
||||
if (auto child = dyn_cast<mlir::AffineForOp>(op)) {
|
||||
opSchedule += procParam.get(child, ProcParamKind::Latency);
|
||||
if (inPipeline)
|
||||
childSchedule =
|
||||
getBlockSchedule(child.getRegion().front(), opScheduleMap);
|
||||
else
|
||||
alignBlockSchedule(child.getRegion().front(), opScheduleMap,
|
||||
opSchedule);
|
||||
} else {
|
||||
// For now we make a simple assumption tha all standard operations has an
|
||||
// unit latency.
|
||||
// TODO: Support estimation from profiling data.
|
||||
opSchedule += 1;
|
||||
}
|
||||
|
||||
opScheduleMap[&op] = opSchedule;
|
||||
blockSchedule = max({blockSchedule, childSchedule, opSchedule});
|
||||
}
|
||||
return blockSchedule;
|
||||
}
|
||||
|
||||
bool QoREstimator::visitOp(AffineForOp op) {
|
||||
auto &body = op.getLoopBody();
|
||||
if (body.getBlocks().size() != 1)
|
||||
op.emitError("has zero or more than one basic blocks.");
|
||||
|
||||
// Recursively estimate latency of sub-elements, including functions and
|
||||
// loops. These sub-elements will be considered as a normal node in the CDFG
|
||||
// for function latency estimzation.
|
||||
estimateBlock(body.front());
|
||||
if (procParam.get(op, ProcParamKind::EnablePipeline)) {
|
||||
inPipeline = true;
|
||||
|
||||
// Estimate iteration latency.
|
||||
unsigned iterLatency = searchLongestPath(body.front());
|
||||
procParam.set(op, ProcParamKind::IterLatency, iterLatency);
|
||||
OpDenseMap opScheduleMap;
|
||||
auto iterLatency = getBlockSchedule(body.front(), opScheduleMap);
|
||||
procParam.set(op, ProcParamKind::IterLatency, iterLatency);
|
||||
|
||||
// Estimate affine for loop latency.
|
||||
unsigned latency = iterLatency;
|
||||
if (procParam.get(op, ProcParamKind::LoopBound) != 1)
|
||||
latency *= procParam.get(op, ProcParamKind::LoopBound) *
|
||||
procParam.get(op, ProcParamKind::UnrollFactor);
|
||||
procParam.set(op, ProcParamKind::Latency, latency);
|
||||
// For now we make a simple assumption that II is equal to 1.
|
||||
auto iterNumber = procParam.get(op, ProcParamKind::IterNumber);
|
||||
procParam.set(op, ProcParamKind::PipeIterNumber, iterNumber);
|
||||
procParam.set(op, ProcParamKind::Latency, iterLatency + iterNumber - 1);
|
||||
|
||||
// TODO: Calculate initial interval.
|
||||
procParam.set(op, ProcParamKind::InitInterval, 1);
|
||||
|
||||
} else {
|
||||
// Recursively estimate each operation, mainly AffineFor operation for now.
|
||||
estimateBlock(body.front());
|
||||
|
||||
// This simply means the current loop can be merged into the child loop
|
||||
// pipeline. This will increase the total IterNumber without changing the
|
||||
// IterLatency.
|
||||
if (inPipeline && procParam.get(op, ProcParamKind::IsPerfect)) {
|
||||
if (auto child = dyn_cast<AffineForOp>(
|
||||
std::next(op.getLoopBody().front().begin()))) {
|
||||
auto initInterval = procParam.get(child, ProcParamKind::InitInterval);
|
||||
auto iterLatency = procParam.get(child, ProcParamKind::IterLatency);
|
||||
auto pipeIterNumber =
|
||||
procParam.get(child, ProcParamKind::PipeIterNumber) *
|
||||
procParam.get(op, ProcParamKind::IterNumber);
|
||||
|
||||
procParam.set(op, ProcParamKind::InitInterval, initInterval);
|
||||
procParam.set(op, ProcParamKind::IterLatency, iterLatency);
|
||||
procParam.set(op, ProcParamKind::PipeIterNumber, pipeIterNumber);
|
||||
procParam.set(op, ProcParamKind::Latency,
|
||||
iterLatency + initInterval * (pipeIterNumber - 1));
|
||||
} else {
|
||||
inPipeline = false;
|
||||
op.emitError("is not a perfect loop.");
|
||||
}
|
||||
}
|
||||
|
||||
// This branch take cares of all unpipelined or imperfect loops.
|
||||
else {
|
||||
inPipeline = false;
|
||||
|
||||
OpDenseMap opScheduleMap;
|
||||
auto iterLatency = getBlockSchedule(body.front(), opScheduleMap);
|
||||
procParam.set(op, ProcParamKind::IterLatency, iterLatency);
|
||||
|
||||
// For now we follow the COMBA approach for unrooled loops.
|
||||
unsigned latency = iterLatency;
|
||||
if (procParam.get(op, ProcParamKind::IterNumber) != 1)
|
||||
latency *= procParam.get(op, ProcParamKind::IterNumber) *
|
||||
procParam.get(op, ProcParamKind::UnrollFactor);
|
||||
procParam.set(op, ProcParamKind::Latency, latency);
|
||||
|
||||
// TODO: Calculate initial interval.
|
||||
procParam.set(op, ProcParamKind::InitInterval, 1);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool QoREstimator::visitOp(AffineParallelOp op) { return true; }
|
||||
|
||||
bool QoREstimator::visitOp(AffineIfOp op) { return true; }
|
||||
|
||||
/// This method recursively update the time stamp of all values (1) directly
|
||||
/// generated as result by the current operation or (2) generated by any
|
||||
/// operations insided of the region held by the current operation.
|
||||
void QoREstimator::updateValueTimeStamp(
|
||||
Operation *currentOp, unsigned opTimeStamp,
|
||||
DenseMap<Value, unsigned> &valueTimeStampMap) {
|
||||
for (auto result : currentOp->getResults())
|
||||
valueTimeStampMap[result] = opTimeStamp;
|
||||
for (auto ®ion : currentOp->getRegions()) {
|
||||
for (auto &op : region.front())
|
||||
updateValueTimeStamp(&op, opTimeStamp, valueTimeStampMap);
|
||||
}
|
||||
}
|
||||
|
||||
/// This method will search the longest path in a DAG block using a ASAP (As
|
||||
/// Soon As Possible) manner. Loop, function, if, and other operation owning
|
||||
/// regions will be considered as a whole.
|
||||
unsigned QoREstimator::searchLongestPath(Block &block) {
|
||||
DenseMap<Value, unsigned> valueTimeStampMap;
|
||||
unsigned blockTimeStamp = 0;
|
||||
|
||||
for (auto &op : block) {
|
||||
unsigned opTimeStamp = 0;
|
||||
|
||||
// Add the latest ready time among all predecessors.
|
||||
for (auto operand : op.getOperands())
|
||||
opTimeStamp = max(opTimeStamp, valueTimeStampMap[operand]);
|
||||
|
||||
// Add latency of the current operation.
|
||||
if (auto subAffineFor = dyn_cast<AffineForOp>(op))
|
||||
opTimeStamp += procParam.get(subAffineFor, ProcParamKind::Latency);
|
||||
else
|
||||
opTimeStamp += 1;
|
||||
blockTimeStamp = max(blockTimeStamp, opTimeStamp);
|
||||
|
||||
// Update ready time of each value generated by the current operation.
|
||||
updateValueTimeStamp(&op, opTimeStamp, valueTimeStampMap);
|
||||
}
|
||||
return blockTimeStamp;
|
||||
}
|
||||
|
||||
void QoREstimator::estimateOperation(Operation *op) {
|
||||
if (dispatchVisitor(op))
|
||||
return;
|
||||
|
@ -152,19 +264,14 @@ void QoREstimator::estimateOperation(Operation *op) {
|
|||
op->emitError("can't be correctly estimated.");
|
||||
}
|
||||
|
||||
/// For now, function pipelining and task-level dataflow optimizations are not
|
||||
/// considered for simplicity. Meanwhile, we assume the absence of function call
|
||||
/// in the loop body.
|
||||
///
|
||||
/// This method will update ProcParam::Latency of the current function.
|
||||
void QoREstimator::estimateFunc(FuncOp func) {
|
||||
if (func.getBlocks().size() != 1)
|
||||
func.emitError("has zero or more than one basic blocks.");
|
||||
|
||||
estimateBlock(func.front());
|
||||
|
||||
// Estimate function latency.
|
||||
unsigned latency = searchLongestPath(func.front());
|
||||
OpDenseMap opScheduleMap;
|
||||
auto latency = getBlockSchedule(func.front(), opScheduleMap);
|
||||
procParam.set(func, ProcParamKind::Latency, latency);
|
||||
}
|
||||
|
||||
|
@ -187,7 +294,7 @@ void QoREstimator::estimateModule(ModuleOp module) {
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
struct QoREstimation : public QoREstimationBase<QoREstimation> {
|
||||
struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
||||
void runOnOperation() override {
|
||||
ProcParam procParam;
|
||||
MemParam memParam;
|
||||
|
@ -200,6 +307,26 @@ struct QoREstimation : public QoREstimationBase<QoREstimation> {
|
|||
QoREstimator estimator(analyzer.procParam, analyzer.memParam, targetSpec,
|
||||
opLatency);
|
||||
estimator.estimateModule(getOperation());
|
||||
|
||||
for (auto item : procParam.Params) {
|
||||
llvm::outs() << "EnablePipeline:"
|
||||
<< item.second[(unsigned)ProcParamKind::EnablePipeline]
|
||||
<< "\nUnrollFactor:"
|
||||
<< item.second[(unsigned)ProcParamKind::UnrollFactor]
|
||||
<< "\nIterNumber:"
|
||||
<< item.second[(unsigned)ProcParamKind::IterNumber]
|
||||
<< "\nIsPerfect:"
|
||||
<< item.second[(unsigned)ProcParamKind::IsPerfect]
|
||||
<< "\nInitInterval:"
|
||||
<< item.second[(unsigned)ProcParamKind::InitInterval]
|
||||
<< "\nIterLatency:"
|
||||
<< item.second[(unsigned)ProcParamKind::IterLatency]
|
||||
<< "\nPipeIterNumber:"
|
||||
<< item.second[(unsigned)ProcParamKind::PipeIterNumber]
|
||||
<< "\nLatency:"
|
||||
<< item.second[(unsigned)ProcParamKind::Latency] << "\n";
|
||||
llvm::outs() << *item.first << "\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
|
|
@ -1024,13 +1024,13 @@ void ModuleEmitter::emitAssign(AssignOp *op) {
|
|||
void ModuleEmitter::emitLoopPragma(LoopPragmaOp *op) {
|
||||
indent();
|
||||
os << "#pragma HLS pipeline";
|
||||
if (op->isOff())
|
||||
if (op->off())
|
||||
os << " off\n";
|
||||
else {
|
||||
os << " II=" << op->getII();
|
||||
if (op->isRewind())
|
||||
os << " II=" << op->II();
|
||||
if (op->rewind())
|
||||
os << " rewind";
|
||||
if (op->isEnableFlush())
|
||||
if (op->enable_flush())
|
||||
os << " enable_flush";
|
||||
os << "\n";
|
||||
}
|
||||
|
@ -1038,10 +1038,10 @@ void ModuleEmitter::emitLoopPragma(LoopPragmaOp *op) {
|
|||
indent();
|
||||
os << "#pragma HLS unroll";
|
||||
// TODO: default factor.
|
||||
os << " factor=" << op->getFactor();
|
||||
if (op->isRegion())
|
||||
os << " factor=" << op->factor();
|
||||
if (op->region())
|
||||
os << " region";
|
||||
if (op->isSkipExitCheck())
|
||||
if (op->skip_exit_check())
|
||||
os << " skip_exit_check";
|
||||
os << "\n";
|
||||
}
|
||||
|
@ -1049,13 +1049,13 @@ void ModuleEmitter::emitLoopPragma(LoopPragmaOp *op) {
|
|||
void ModuleEmitter::emitFuncPragma(FuncPragmaOp *op) {
|
||||
indent();
|
||||
os << "#pragma HLS pipeline";
|
||||
if (op->isOff())
|
||||
if (op->off())
|
||||
os << " off\n";
|
||||
else {
|
||||
os << " II=" << op->getII();
|
||||
if (op->isRewind())
|
||||
os << " II=" << op->II();
|
||||
if (op->rewind())
|
||||
os << " rewind";
|
||||
if (op->isEnableFlush())
|
||||
if (op->enable_flush())
|
||||
os << " enable_flush";
|
||||
os << "\n";
|
||||
}
|
||||
|
@ -1066,10 +1066,10 @@ void ModuleEmitter::emitArrayPragma(ArrayPragmaOp *op) {
|
|||
os << "#pragma HLS array_partition";
|
||||
os << " variable=";
|
||||
emitValue(op->getOperand());
|
||||
os << " " << op->getPartitionType();
|
||||
if (op->getPartitionType() != "complete")
|
||||
os << " factor=" << op->getFactor();
|
||||
os << " dim=" << op->getDim();
|
||||
os << " " << op->type();
|
||||
if (op->type() != "complete")
|
||||
os << " factor=" << op->factor();
|
||||
os << " dim=" << op->dim();
|
||||
os << "\n\n";
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
// RUN: scalehls-opt -qor-estimation %s | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: func @test_for
|
||||
func @test_for(%arg0: memref<16xindex>, %arg1: index) {
|
||||
%c11 = constant 11 : index
|
||||
|
||||
affine.for %i = 0 to 16 step 2 {
|
||||
"hlscpp.loop_pragma" () {II = 1 : ui32, enable_flush = false, rewind = false, off = true, factor = 4 : ui32, region = false, skip_exit_check = true} : () -> ()
|
||||
affine.for %j = 0 to 8 {
|
||||
"hlscpp.loop_pragma" () {II = 1 : ui32, enable_flush = false, rewind = false, off = false, factor = 1 : ui32, region = false, skip_exit_check = true} : () -> ()
|
||||
affine.for %k = 0 to 8 step 2 {
|
||||
"hlscpp.loop_pragma" () {II = 1 : ui32, enable_flush = false, rewind = false, off = true, factor = 4 : ui32, region = false, skip_exit_check = true} : () -> ()
|
||||
%0 = load %arg0[%i] : memref<16xindex>
|
||||
%1 = addi %0, %j : index
|
||||
store %1, %arg0[%k] : memref<16xindex>
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
// RUN: scalehls-opt -qor-estimation -pragma-dse %s | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: func @test_pragma()
|
||||
func @test_pragma() {
|
||||
return
|
||||
}
|
Loading…
Reference in New Issue