[QoREstimation] update code with the new ArrayOp; add new HLSCppToolBase class

This commit is contained in:
Hanchen Ye 2020-10-06 01:36:51 -05:00
parent a2934a09a9
commit 8365d24ded
4 changed files with 116 additions and 275 deletions

View File

@ -5,7 +5,6 @@
#ifndef SCALEHLS_ANALYSIS_QORESTIMATION_H
#define SCALEHLS_ANALYSIS_QORESTIMATION_H
#include "Analysis/StaticParam.h"
#include "Visitor.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Pass/Pass.h"
@ -14,29 +13,63 @@
namespace mlir {
namespace scalehls {
//===----------------------------------------------------------------------===//
// HLSCppToolBase Class Declaration
//===----------------------------------------------------------------------===//
class HLSCppToolBase {
public:
explicit HLSCppToolBase(OpBuilder &builder) : builder(builder) {}
/// Get value methods.
unsigned getUIntAttrValue(Operation *op, StringRef name) {
return op->getAttrOfType<IntegerAttr>(name).getUInt();
}
bool getBoolAttrValue(Operation *op, StringRef name) {
return op->getAttrOfType<BoolAttr>(name).getValue();
}
StringRef getStrAttrValue(Operation *op, StringRef name) {
return op->getAttrOfType<StringAttr>(name).getValue();
}
/// Set value methods.
void setAttrValue(Operation *op, StringRef name, unsigned value) {
op->setAttr(name, builder.getUI32IntegerAttr(value));
}
void setAttrValue(Operation *op, StringRef name, bool value) {
op->setAttr(name, builder.getBoolAttr(value));
}
void setAttrValue(Operation *op, StringRef name, StringRef value) {
op->setAttr(name, builder.getStringAttr(value));
}
private:
OpBuilder &builder;
};
//===----------------------------------------------------------------------===//
// HLSCppAnalyzer Class Declaration
//===----------------------------------------------------------------------===//
class HLSCppAnalyzer : public HLSCppVisitorBase<HLSCppAnalyzer, bool> {
class HLSCppAnalyzer : public HLSCppVisitorBase<HLSCppAnalyzer, bool>,
public HLSCppToolBase {
public:
explicit HLSCppAnalyzer(ProcParam &procParam, MemParam &memParam)
: procParam(procParam), memParam(memParam) {}
explicit HLSCppAnalyzer(OpBuilder &builder) : HLSCppToolBase(builder) {}
bool inPipeline;
ProcParam &procParam;
MemParam &memParam;
bool visitUnhandledOp(Operation *op) { return true; }
using HLSCppVisitorBase::visitOp;
bool visitOp(AffineForOp op);
bool visitOp(AffineIfOp op);
void analyzeOperation(Operation *op);
void analyzeFunc(FuncOp func);
void analyzeBlock(Block &block);
void analyzeFunc(FuncOp func);
void analyzeModule(ModuleOp module);
};
@ -44,10 +77,11 @@ public:
// QoREstimator Class Declaration
//===----------------------------------------------------------------------===//
class QoREstimator : public HLSCppVisitorBase<QoREstimator, bool> {
class QoREstimator : public HLSCppVisitorBase<QoREstimator, bool>,
public HLSCppToolBase {
public:
explicit QoREstimator(ProcParam &procParam, MemParam &memParam,
std::string targetSpecPath, std::string opLatencyPath);
explicit QoREstimator(OpBuilder &builder, std::string targetSpecPath,
std::string opLatencyPath);
// For storing the scheduled time stamp of operations;
using ScheduleMap = llvm::SmallDenseMap<Operation *, unsigned, 16>;
@ -65,9 +99,6 @@ public:
// which will impact the estimation strategy.
bool inPipeline;
ProcParam &procParam;
MemParam &memParam;
bool visitUnhandledOp(Operation *op) { return true; }
using HLSCppVisitorBase::visitOp;

View File

@ -1,136 +0,0 @@
//===------------------------------------------------------------*- C++ -*-===//
//
//===----------------------------------------------------------------------===//
#ifndef SCALEHLS_ANALYSIS_STATICPARAM_H
#define SCALEHLS_ANALYSIS_STATICPARAM_H
#include "mlir/IR/Operation.h"
#include "mlir/IR/Value.h"
namespace mlir {
namespace scalehls {
//===----------------------------------------------------------------------===//
// ParamBase class
//===----------------------------------------------------------------------===//
template <typename ParamKind, typename KeyType> class ParamBase {
public:
void init(KeyType key) {
for (unsigned i = 0; i < (unsigned)ParamKind::KindNum; ++i)
Params[key].push_back(0);
}
unsigned get(KeyType key, ParamKind kind) {
return Params[key][(unsigned)kind];
}
void set(KeyType key, ParamKind kind, unsigned param) {
Params[key][(unsigned)kind] = param;
}
DenseMap<KeyType, SmallVector<unsigned, 16>> Params;
};
//===----------------------------------------------------------------------===//
// ProcParam and MemParam classes
//===----------------------------------------------------------------------===//
enum class ProcParamKind {
// Process-related pragam configurations.
EnablePipeline,
UnrollFactor,
// Process attributes.
LowerBound,
UpperBound,
IterNumber,
IsPerfect,
// Performance parameters.
InitInterval,
IterLatency,
PipeIterNumber,
Latency,
// Resource parameters.
LUT,
BRAM,
DSP,
KindNum = DSP + 1
};
enum class MemParamKind {
// Pragma configurations.
StorageType,
StorageImpl,
PartitionType,
PartitionFactor,
InterfaceMode,
// Performance parameters.
ReadNum,
WriteNum,
ReadPorts,
WritePorts,
// Resource parameters.
LUT,
BRAM,
KindNum = BRAM + 1
};
/// This class includes all possible parameters kind for "processes" (function,
/// for/parallel loop, and if).
class ProcParam : public ParamBase<ProcParamKind, Operation *> {
// Process-related pragam configurations.
unsigned getEnablePipeline(Operation *op) {
return get(op, ProcParamKind::EnablePipeline);
}
unsigned getUnrollFactor(Operation *op) {
return get(op, ProcParamKind::UnrollFactor);
}
// Process attributes.
unsigned getLowerBound(Operation *op) {
return get(op, ProcParamKind::LowerBound);
}
unsigned getUpperBound(Operation *op) {
return get(op, ProcParamKind::UpperBound);
}
unsigned getIterNumber(Operation *op) {
return get(op, ProcParamKind::IterNumber);
}
unsigned getIsPerfect(Operation *op) {
return get(op, ProcParamKind::IsPerfect);
}
// Performance parameters.
unsigned getInitInterval(Operation *op) {
return get(op, ProcParamKind::InitInterval);
}
unsigned getIterLatency(Operation *op) {
return get(op, ProcParamKind::IterLatency);
}
unsigned getPipeIterNumber(Operation *op) {
return get(op, ProcParamKind::PipeIterNumber);
}
unsigned getLatency(Operation *op) { return get(op, ProcParamKind::Latency); }
// Resource parameters.
unsigned getLUT(Operation *op) { return get(op, ProcParamKind::LUT); }
unsigned getBRAM(Operation *op) { return get(op, ProcParamKind::BRAM); }
unsigned getDSP(Operation *op) { return get(op, ProcParamKind::DSP); }
};
/// This class includes all possible parameters kind for memories (memref,
/// tensor, and vector).
class MemParam : public ParamBase<MemParamKind, Value> {};
} // namespace scalehls
} // namespace mlir
#endif // SCALEHLS_ANALYSIS_STATICPARAM_H

View File

@ -26,85 +26,60 @@ bool HLSCppAnalyzer::visitOp(AffineForOp op) {
if (body.getBlocks().size() != 1)
op.emitError("has zero or more than one basic blocks.");
if (procParam.Params[op].empty())
procParam.init(op);
// Recursively analyze all childs.
analyzeBlock(body.front());
// Pragma configurations.
unsigned unrollFactor = 1;
if (auto loopPragma = dyn_cast<LoopPragmaOp>(body.front().front())) {
procParam.set(op, ProcParamKind::EnablePipeline, loopPragma.pipeline());
procParam.set(op, ProcParamKind::UnrollFactor, loopPragma.unroll_factor());
unrollFactor = loopPragma.unroll_factor();
}
// Loop statistics.
if (!op.getUpperBoundMap().isSingleConstant() ||
!op.getLowerBoundMap().isSingleConstant())
// Set an attribute indicating iteration number .
if (!op.hasConstantLowerBound() || !op.hasConstantUpperBound())
op.emitError("has variable upper or lower bound.");
unsigned upperBound = op.getUpperBoundMap().getSingleConstantResult();
unsigned lowerBound = op.getLowerBoundMap().getSingleConstantResult();
unsigned step = op.getStep();
unsigned iterNumber =
(op.getConstantUpperBound() - op.getConstantLowerBound()) /
getUIntAttrValue(op, "unroll_factor") / op.getStep();
procParam.set(op, ProcParamKind::UpperBound, upperBound);
procParam.set(op, ProcParamKind::LowerBound, lowerBound);
procParam.set(op, ProcParamKind::IterNumber,
(upperBound - lowerBound) / step / unrollFactor);
setAttrValue(op, "iter_number", iterNumber);
// Set an attribute indicating this loop is perfect or not.
unsigned opNum = 0;
unsigned loopNum = 0;
bool isPerfect = false;
for (auto &bodyOp : op.getRegion().front()) {
if (!isa<LoopPragmaOp>(bodyOp) && !isa<AffineYieldOp>(bodyOp)) {
bool childPerfect = false;
for (auto &bodyOp : body.front()) {
if (!isa<AffineYieldOp>(bodyOp))
opNum += 1;
if (auto child = dyn_cast<AffineForOp>(bodyOp)) {
loopNum += 1;
isPerfect = procParam.get(child, ProcParamKind::IsPerfect);
}
if (auto child = dyn_cast<AffineForOp>(bodyOp)) {
loopNum += 1;
childPerfect = getBoolAttrValue(child, "perfect");
}
}
// Perfect nested loop.
if (opNum == 1 && loopNum == 1 && isPerfect)
procParam.set(op, ProcParamKind::IsPerfect, 1);
// The inner loop.
if (opNum == 1 && loopNum == 1 && childPerfect)
setAttrValue(op, "perfect", true);
else if (loopNum == 0)
procParam.set(op, ProcParamKind::IsPerfect, 1);
setAttrValue(op, "perfect", true);
else
procParam.set(op, ProcParamKind::IsPerfect, 0);
setAttrValue(op, "perfect", false);
return true;
}
bool HLSCppAnalyzer::visitOp(AffineIfOp op) { return true; }
/// This method will update all parameters except IterLatency, Latency, LUT,
/// BRAM, and DSP through static analysis.
void HLSCppAnalyzer::analyzeOperation(Operation *op) {
if (dispatchVisitor(op))
return;
op->emitError("can't be correctly analyzed.");
void HLSCppAnalyzer::analyzeBlock(Block &block) {
for (auto &op : block) {
if (dispatchVisitor(&op))
continue;
op.emitError("can't be correctly analyzed.");
}
}
void HLSCppAnalyzer::analyzeFunc(FuncOp func) {
if (func.getBlocks().size() != 1)
func.emitError("has zero or more than one basic blocks.");
procParam.init(func);
analyzeBlock(func.front());
}
void HLSCppAnalyzer::analyzeBlock(Block &block) {
for (auto &op : block)
analyzeOperation(&op);
}
/// This method is a wrapper for recursively calling operation analyzer.
void HLSCppAnalyzer::analyzeModule(ModuleOp module) {
for (auto &op : module) {
if (auto func = dyn_cast<FuncOp>(op)) {
@ -119,9 +94,9 @@ void HLSCppAnalyzer::analyzeModule(ModuleOp module) {
//===----------------------------------------------------------------------===//
/// Estimator constructor.
QoREstimator::QoREstimator(ProcParam &procParam, MemParam &memParam,
string targetSpecPath, string opLatencyPath)
: procParam(procParam), memParam(memParam) {
QoREstimator::QoREstimator(OpBuilder &builder, string targetSpecPath,
string opLatencyPath)
: HLSCppToolBase(builder) {
inPipeline = false;
@ -166,7 +141,7 @@ unsigned QoREstimator::getBlockSchedule(Block &block,
// Add latency of the current operation.
unsigned childSchedule = 0;
if (auto child = dyn_cast<mlir::AffineForOp>(op)) {
opSchedule += procParam.get(child, ProcParamKind::Latency);
opSchedule += getUIntAttrValue(child, "latency");
if (inPipeline)
childSchedule =
getBlockSchedule(child.getRegion().front(), opScheduleMap);
@ -239,16 +214,16 @@ bool QoREstimator::visitOp(AffineForOp op) {
if (body.getBlocks().size() != 1)
op.emitError("has zero or more than one basic blocks.");
if (procParam.get(op, ProcParamKind::EnablePipeline)) {
if (getBoolAttrValue(op, "pipeline")) {
inPipeline = true;
ScheduleMap opScheduleMap;
auto iterLatency = getBlockSchedule(body.front(), opScheduleMap);
procParam.set(op, ProcParamKind::IterLatency, iterLatency);
getUIntAttrValue(op, "iter_latency");
// For now we make a simple assumption that II is equal to 1.
auto iterNumber = procParam.get(op, ProcParamKind::IterNumber);
procParam.set(op, ProcParamKind::PipeIterNumber, iterNumber);
auto iterNumber = getUIntAttrValue(op, "iter_number");
setAttrValue(op, "pipeline_iter", iterNumber);
// Calculate initial interval.
MemAccessList memLoadList;
@ -266,10 +241,8 @@ bool QoREstimator::visitOp(AffineForOp op) {
for (auto &op : body.front()) {
}
procParam.set(op, ProcParamKind::InitInterval, initInterval);
procParam.set(op, ProcParamKind::Latency,
iterLatency + initInterval * (iterNumber - 1));
setAttrValue(op, "pipeline_II", initInterval);
setAttrValue(op, "latency", iterLatency + initInterval * (iterNumber - 1));
}
// If the loop is not pipelined, the estimation is much different and requires
@ -283,21 +256,20 @@ bool QoREstimator::visitOp(AffineForOp op) {
// This simply means the current loop can be merged into the child loop
// pipeline. This will increase the total IterNumber without changing the
// IterLatency.
if (inPipeline && procParam.get(op, ProcParamKind::IsPerfect)) {
if (inPipeline && getBoolAttrValue(op, "perfect")) {
if (auto child = dyn_cast<AffineForOp>(
std::next(op.getLoopBody().front().begin()))) {
auto initInterval = procParam.get(child, ProcParamKind::InitInterval);
auto iterLatency = procParam.get(child, ProcParamKind::IterLatency);
auto pipeIterNumber =
procParam.get(child, ProcParamKind::PipeIterNumber) *
procParam.get(op, ProcParamKind::IterNumber);
auto initInterval = getUIntAttrValue(child, "pipeline_II");
auto iterLatency = getUIntAttrValue(child, "iter_latency");
auto pipeIterNumber = getUIntAttrValue(child, "pipeline_iter") *
getUIntAttrValue(op, "iter_number");
procParam.set(op, ProcParamKind::InitInterval, initInterval);
procParam.set(op, ProcParamKind::IterLatency, iterLatency);
procParam.set(op, ProcParamKind::PipeIterNumber, pipeIterNumber);
setAttrValue(op, "pipeline_II", initInterval);
setAttrValue(op, "iter_latency", iterLatency);
setAttrValue(op, "pipeline_iter", pipeIterNumber);
procParam.set(op, ProcParamKind::Latency,
iterLatency + initInterval * (pipeIterNumber - 1));
setAttrValue(op, "latency",
iterLatency + initInterval * (pipeIterNumber - 1));
} else {
inPipeline = false;
op.emitError("is not a perfect loop.");
@ -310,17 +282,17 @@ bool QoREstimator::visitOp(AffineForOp op) {
ScheduleMap opScheduleMap;
auto iterLatency = getBlockSchedule(body.front(), opScheduleMap);
procParam.set(op, ProcParamKind::IterLatency, iterLatency);
setAttrValue(op, "iter_latency", iterLatency);
// For now we follow the COMBA approach for unrooled loops.
unsigned latency = iterLatency;
if (procParam.get(op, ProcParamKind::IterNumber) != 1)
latency *= procParam.get(op, ProcParamKind::IterNumber) *
procParam.get(op, ProcParamKind::UnrollFactor);
procParam.set(op, ProcParamKind::Latency, latency);
if (getUIntAttrValue(op, "iter_number") != 1)
latency *= getUIntAttrValue(op, "iter_number") *
getUIntAttrValue(op, "unroll_factor");
setAttrValue(op, "latency", latency);
// TODO: Calculate initial interval.
procParam.set(op, ProcParamKind::InitInterval, 1);
setAttrValue(op, "iter_latency", (unsigned)1);
}
}
return true;
@ -328,11 +300,12 @@ bool QoREstimator::visitOp(AffineForOp op) {
bool QoREstimator::visitOp(AffineIfOp op) { return true; }
void QoREstimator::estimateOperation(Operation *op) {
if (dispatchVisitor(op))
return;
op->emitError("can't be correctly estimated.");
void QoREstimator::estimateBlock(Block &block) {
for (auto &op : block) {
if (dispatchVisitor(&op))
continue;
op.emitError("can't be correctly analyzed.");
}
}
void QoREstimator::estimateFunc(FuncOp func) {
@ -343,12 +316,7 @@ void QoREstimator::estimateFunc(FuncOp func) {
ScheduleMap opScheduleMap;
auto latency = getBlockSchedule(func.front(), opScheduleMap);
procParam.set(func, ProcParamKind::Latency, latency);
}
void QoREstimator::estimateBlock(Block &block) {
for (auto &op : block)
estimateOperation(&op);
setAttrValue(func, "latency", latency);
}
void QoREstimator::estimateModule(ModuleOp module) {
@ -367,37 +335,15 @@ void QoREstimator::estimateModule(ModuleOp module) {
namespace {
struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
void runOnOperation() override {
ProcParam procParam;
MemParam memParam;
auto builder = OpBuilder(getOperation());
// Extract all static parameters and current pragma configurations.
HLSCppAnalyzer analyzer(procParam, memParam);
HLSCppAnalyzer analyzer(builder);
analyzer.analyzeModule(getOperation());
// Estimate performance and resource utilization.
QoREstimator estimator(analyzer.procParam, analyzer.memParam, targetSpec,
opLatency);
QoREstimator estimator(builder, targetSpec, opLatency);
estimator.estimateModule(getOperation());
for (auto item : procParam.Params) {
llvm::outs() << "EnablePipeline:"
<< item.second[(unsigned)ProcParamKind::EnablePipeline]
<< "\nUnrollFactor:"
<< item.second[(unsigned)ProcParamKind::UnrollFactor]
<< "\nIterNumber:"
<< item.second[(unsigned)ProcParamKind::IterNumber]
<< "\nIsPerfect:"
<< item.second[(unsigned)ProcParamKind::IsPerfect]
<< "\nInitInterval:"
<< item.second[(unsigned)ProcParamKind::InitInterval]
<< "\nIterLatency:"
<< item.second[(unsigned)ProcParamKind::IterLatency]
<< "\nPipeIterNumber:"
<< item.second[(unsigned)ProcParamKind::PipeIterNumber]
<< "\nLatency:"
<< item.second[(unsigned)ProcParamKind::Latency] << "\n";
llvm::outs() << *item.first << "\n";
}
}
};
} // namespace

View File

@ -23,7 +23,7 @@ static void convertBlock(Block &block) {
for (auto &op : block) {
if (isa<ArrayOp>(op))
continue;
auto b = OpBuilder(&op);
auto builder = OpBuilder(&op);
// ArrayOp will be inserted after each ShapedType value from declaration
// or function signature.
@ -41,19 +41,19 @@ static void convertBlock(Block &block) {
if (insertArrayOp) {
// Insert array operation and set attributes.
b.setInsertionPointAfterValue(operand);
builder.setInsertionPointAfterValue(operand);
auto arrayOp =
b.create<ArrayOp>(op.getLoc(), operand.getType(), operand);
builder.create<ArrayOp>(op.getLoc(), operand.getType(), operand);
operand.replaceAllUsesExcept(arrayOp.getResult(),
SmallPtrSet<Operation *, 1>{arrayOp});
// Set array pragma attributes, default array instance is ram_1p
// bram. Other attributes are not set here since they requires more
// analysis to be determined.
arrayOp.setAttr("interface", b.getBoolAttr(false));
arrayOp.setAttr("storage_type", b.getStringAttr("ram_1p"));
arrayOp.setAttr("storage_impl", b.getStringAttr("bram"));
arrayOp.setAttr("partition", b.getBoolAttr(false));
arrayOp.setAttr("interface", builder.getBoolAttr(false));
arrayOp.setAttr("storage_type", builder.getStringAttr("ram_1p"));
arrayOp.setAttr("storage_impl", builder.getStringAttr("bram"));
arrayOp.setAttr("partition", builder.getBoolAttr(false));
}
}
}
@ -63,9 +63,9 @@ static void convertBlock(Block &block) {
forOp.emitError("has zero or more than one basic blocks");
// Set loop pragma attributes.
forOp.setAttr("pipeline", b.getBoolAttr(false));
forOp.setAttr("pipeline_II", b.getUI32IntegerAttr(1));
forOp.setAttr("unroll_factor", b.getUI32IntegerAttr(1));
forOp.setAttr("pipeline", builder.getBoolAttr(false));
forOp.setAttr("pipeline_II", builder.getUI32IntegerAttr(1));
forOp.setAttr("unroll_factor", builder.getUI32IntegerAttr(1));
convertBlock(forOp.getLoopBody().front());
}