[QoREstimator] thoroughly rewrite the estimator with new strategy and structure
This commit is contained in:
parent
fac1498067
commit
512c842908
|
@ -7,6 +7,8 @@
|
|||
|
||||
#include "Dialect/HLSCpp/Visitor.h"
|
||||
#include "INIReader.h"
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
#include "mlir/Analysis/Liveness.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "mlir/Transforms/LoopUtils.h"
|
||||
|
@ -21,10 +23,12 @@ namespace scalehls {
|
|||
|
||||
class HLSCppToolBase {
|
||||
public:
|
||||
explicit HLSCppToolBase(OpBuilder &builder) : builder(builder) {}
|
||||
explicit HLSCppToolBase(OpBuilder builder) : builder(builder) {}
|
||||
|
||||
/// Get value methods.
|
||||
int64_t getIntAttrValue(Operation *op, StringRef name) {
|
||||
OpBuilder builder;
|
||||
|
||||
/// Get attribute value methods.
|
||||
int32_t getIntAttrValue(Operation *op, StringRef name) {
|
||||
if (auto attr = op->getAttrOfType<IntegerAttr>(name))
|
||||
return attr.getInt();
|
||||
else
|
||||
|
@ -52,6 +56,7 @@ public:
|
|||
return "";
|
||||
}
|
||||
|
||||
/// Get partition information methods.
|
||||
StringRef getPartitionType(ArrayOp op, unsigned dim) {
|
||||
if (auto attr = op.partition_type()[dim].cast<StringAttr>())
|
||||
return attr.getValue();
|
||||
|
@ -66,15 +71,15 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
/// Set value methods.
|
||||
void setAttrValue(Operation *op, StringRef name, unsigned value) {
|
||||
op->setAttr(name, builder.getUI32IntegerAttr(value));
|
||||
}
|
||||
|
||||
/// Set attribute value methods.
|
||||
void setAttrValue(Operation *op, StringRef name, int32_t value) {
|
||||
op->setAttr(name, builder.getI32IntegerAttr(value));
|
||||
}
|
||||
|
||||
void setAttrValue(Operation *op, StringRef name, unsigned value) {
|
||||
op->setAttr(name, builder.getUI32IntegerAttr(value));
|
||||
}
|
||||
|
||||
void setAttrValue(Operation *op, StringRef name, bool value) {
|
||||
op->setAttr(name, builder.getBoolAttr(value));
|
||||
}
|
||||
|
@ -82,21 +87,6 @@ public:
|
|||
void setAttrValue(Operation *op, StringRef name, StringRef value) {
|
||||
op->setAttr(name, builder.getStringAttr(value));
|
||||
}
|
||||
|
||||
/// Get expression methods.
|
||||
AffineExpr getSymbolExpr(unsigned value) {
|
||||
return getAffineSymbolExpr(value, builder.getContext());
|
||||
}
|
||||
|
||||
AffineExpr getDimExpr(unsigned value) {
|
||||
return getAffineDimExpr(value, builder.getContext());
|
||||
}
|
||||
|
||||
AffineExpr getConstExpr(int64_t value) {
|
||||
return getAffineConstantExpr(value, builder.getContext());
|
||||
}
|
||||
|
||||
OpBuilder &builder;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -104,9 +94,13 @@ public:
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// For storing all memory access operations (including AffineLoadOp and
|
||||
// AffineStoreOp) indexed by the array instantce (ArrayOp).
|
||||
using LoadStore = SmallVector<Operation *, 16>;
|
||||
using LoadStoreDict = llvm::SmallDenseMap<Operation *, LoadStore, 8>;
|
||||
// AffineStoreOp) indexed by the array instance (ArrayOp).
|
||||
using LoadStores = SmallVector<Operation *, 16>;
|
||||
using LoadStoresMap = DenseMap<Operation *, LoadStores>;
|
||||
|
||||
// For storing all dependent operations indexed by the source operation.
|
||||
using Depends = SmallVector<Operation *, 16>;
|
||||
using DependsMap = DenseMap<Operation *, Depends>;
|
||||
|
||||
// Indicate the unoccupied memory ports number.
|
||||
struct PortInfo {
|
||||
|
@ -118,37 +112,49 @@ struct PortInfo {
|
|||
unsigned rdwrPort;
|
||||
};
|
||||
|
||||
// For storing ports number information of each memory instance.
|
||||
using MemPort = SmallVector<PortInfo, 16>;
|
||||
using MemPortDict = llvm::SmallDenseMap<Operation *, MemPort, 8>;
|
||||
// For storing ports number of all partitions indexed by the array instance
|
||||
// (ArrayOp).
|
||||
using Ports = SmallVector<PortInfo, 16>;
|
||||
using PortsMap = DenseMap<Operation *, Ports>;
|
||||
|
||||
// For storing MemPort indexed by the pipeline stage.
|
||||
using MemPortDicts = SmallVector<MemPortDict, 16>;
|
||||
// For storing PortsMap indexed by the scheduling level.
|
||||
using PortsMapDict = DenseMap<unsigned, PortsMap>;
|
||||
|
||||
class HLSCppEstimator : public HLSCppVisitorBase<HLSCppEstimator, bool>,
|
||||
public HLSCppToolBase {
|
||||
class HLSCppEstimator
|
||||
: public HLSCppVisitorBase<HLSCppEstimator, Optional<unsigned>, unsigned>,
|
||||
public HLSCppToolBase {
|
||||
public:
|
||||
explicit HLSCppEstimator(OpBuilder &builder, std::string targetSpecPath);
|
||||
|
||||
bool visitUnhandledOp(Operation *op) { return true; }
|
||||
explicit HLSCppEstimator(FuncOp &func)
|
||||
: HLSCppToolBase(OpBuilder(func)), func(func), liveness(Liveness(func)) {
|
||||
getFuncMemRefDepends();
|
||||
}
|
||||
|
||||
void getFuncMemRefDepends();
|
||||
using HLSCppVisitorBase::visitOp;
|
||||
bool visitOp(AffineForOp op);
|
||||
bool visitOp(AffineIfOp op);
|
||||
bool visitOp(ArrayOp op);
|
||||
Optional<unsigned> visitUnhandledOp(Operation *op, unsigned begin) {
|
||||
// Default latency of any unhandled operation is 1.
|
||||
return begin + 1;
|
||||
}
|
||||
|
||||
void getBlockMemInfo(Block &block, LoadStoreDict &info);
|
||||
int32_t getPartitionIndex(Operation *op);
|
||||
unsigned getLoadStoreSchedule(Operation *op, unsigned begin);
|
||||
Optional<unsigned> visitOp(AffineLoadOp op, unsigned begin);
|
||||
Optional<unsigned> visitOp(AffineStoreOp op, unsigned begin);
|
||||
|
||||
unsigned getLoadStoreSchedule(Operation *op, unsigned begin,
|
||||
MemPortDicts &dicts);
|
||||
void updateChildBlockSchedule(Block &block, unsigned begin);
|
||||
unsigned getBlockSchedule(Block &block);
|
||||
unsigned getResMinII(AffineForOp forOp, LoadStoresMap &map);
|
||||
unsigned getDepMinII(AffineForOp forOp, LoadStoresMap &map);
|
||||
Optional<unsigned> visitOp(AffineForOp op, unsigned begin);
|
||||
|
||||
unsigned getResMinII(AffineForOp forOp, LoadStoreDict dict);
|
||||
unsigned getDepMinII(AffineForOp forOp, LoadStoreDict dict);
|
||||
Optional<unsigned> visitOp(AffineIfOp op, unsigned begin);
|
||||
Optional<unsigned> visitOp(ArrayOp op, unsigned begin);
|
||||
|
||||
bool estimateFunc(FuncOp func);
|
||||
bool estimateBlock(Block &block);
|
||||
Optional<unsigned> estimateBlock(Block &block, unsigned blockBegin);
|
||||
void estimateFunc();
|
||||
|
||||
FuncOp &func;
|
||||
Liveness liveness;
|
||||
DependsMap dependsMap;
|
||||
PortsMapDict portsMapDict;
|
||||
};
|
||||
|
||||
} // namespace scalehls
|
||||
|
|
|
@ -5,14 +5,11 @@
|
|||
#include "Analysis/QoREstimation.h"
|
||||
#include "Analysis/Passes.h"
|
||||
#include "Dialect/HLSCpp/HLSCpp.h"
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
#include "mlir/Analysis/AffineStructures.h"
|
||||
#include "mlir/Analysis/Liveness.h"
|
||||
#include "mlir/Analysis/LoopAnalysis.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
|
||||
#include "mlir/IR/Operation.h"
|
||||
#include "mlir/IR/PatternMatch.h"
|
||||
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace mlir;
|
||||
|
@ -20,97 +17,246 @@ using namespace scalehls;
|
|||
using namespace hlscpp;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HLSCppEstimator Class Definition
|
||||
// Helpers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Estimator constructor.
|
||||
HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath)
|
||||
: HLSCppToolBase(builder) {
|
||||
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
|
||||
// AffineIfOp is transparent.
|
||||
static bool checkSameLevel(Operation *lhsOp, Operation *rhsOp) {
|
||||
// If lhsOp and rhsOp are already at the same level, return true.
|
||||
if (lhsOp->getBlock() == rhsOp->getBlock())
|
||||
return true;
|
||||
|
||||
INIReader targetSpec(targetSpecPath);
|
||||
if (targetSpec.ParseError())
|
||||
llvm::outs() << "error: target spec file parse fail, please refer to "
|
||||
"--help option and pass in correct file path\n";
|
||||
// Get all nested parent AffineIfOps, include lhsOp and rhsOp.
|
||||
auto getNests = ([&](Operation *op, SmallVector<Operation *, 4> &nests) {
|
||||
nests.push_back(op);
|
||||
auto currentOp = op;
|
||||
while (true) {
|
||||
if (auto parentOp = currentOp->getParentOfType<AffineIfOp>()) {
|
||||
nests.push_back(parentOp);
|
||||
currentOp = parentOp;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
// TODO: Support estimator initiation from profiling data.
|
||||
auto freq = targetSpec.Get("spec", "frequency", "200MHz");
|
||||
auto latency = targetSpec.GetInteger(freq, "op", 0);
|
||||
llvm::outs() << latency << "\n";
|
||||
SmallVector<Operation *, 4> lhsNests;
|
||||
SmallVector<Operation *, 4> rhsNests;
|
||||
|
||||
getNests(lhsOp, lhsNests);
|
||||
getNests(rhsOp, rhsNests);
|
||||
|
||||
// If any parent of lhsOp and any parent of rhsOp are at the same level,
|
||||
// return true.
|
||||
for (auto lhs : lhsNests)
|
||||
for (auto rhs : rhsNests)
|
||||
if (lhs->getBlock() == rhs->getBlock())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Collect memory access information of the block.
|
||||
void HLSCppEstimator::getBlockMemInfo(Block &block, LoadStoreDict &dict) {
|
||||
// Walk through all load/store operations in the current block.
|
||||
/// Get all nested parent AffineForOps. Since AffineIfOps are transparent,
|
||||
/// AffineIfOps are skipped during the procedure.
|
||||
static void getLoopNests(Operation *op, SmallVector<Operation *, 4> &nests) {
|
||||
auto currentOp = op;
|
||||
while (true) {
|
||||
if (auto parentOp = currentOp->getParentOfType<AffineForOp>()) {
|
||||
nests.push_back(parentOp);
|
||||
currentOp = parentOp;
|
||||
} else if (auto parentOp = currentOp->getParentOfType<AffineIfOp>())
|
||||
currentOp = parentOp;
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the definition ArrayOp given any memory access operation.
|
||||
static ArrayOp getArrayOp(Operation *op) {
|
||||
auto defOp = MemRefAccess(op).memref.getDefiningOp();
|
||||
assert(defOp && "MemRef is block argument");
|
||||
|
||||
auto arrayOp = dyn_cast<ArrayOp>(defOp);
|
||||
assert(arrayOp && "MemRef is not defined by ArrayOp");
|
||||
|
||||
return arrayOp;
|
||||
}
|
||||
|
||||
/// Collect all load and store operations in the block.
|
||||
static void getLoadStoresMap(Block &block, LoadStoresMap &map) {
|
||||
block.walk([&](Operation *op) {
|
||||
if (isa<mlir::AffineReadOpInterface, mlir::AffineWriteOpInterface>(op)) {
|
||||
auto memAccess = MemRefAccess(op);
|
||||
auto arrayOp = cast<ArrayOp>(memAccess.memref.getDefiningOp());
|
||||
|
||||
AffineValueMap accessMap;
|
||||
memAccess.getAccessMap(&accessMap);
|
||||
|
||||
dict[arrayOp].push_back(op);
|
||||
|
||||
// Calculate the partition index of this load/store operation honoring the
|
||||
// partition strategy applied.
|
||||
int32_t partitionIdx = 0;
|
||||
unsigned accumFactor = 1;
|
||||
unsigned dim = 0;
|
||||
for (auto expr : accessMap.getAffineMap().getResults()) {
|
||||
auto idxExpr = getConstExpr(0);
|
||||
unsigned factor = 1;
|
||||
if (arrayOp.partition()) {
|
||||
auto type = getPartitionType(arrayOp, dim);
|
||||
factor = getPartitionFactor(arrayOp, dim);
|
||||
|
||||
if (type == "cyclic")
|
||||
idxExpr = expr % getConstExpr(factor);
|
||||
else if (type == "block") {
|
||||
auto size = arrayOp.getType().cast<ShapedType>().getShape()[dim];
|
||||
idxExpr = expr.floorDiv(getConstExpr((size + factor - 1) / factor));
|
||||
}
|
||||
}
|
||||
if (auto constExpr = idxExpr.dyn_cast<AffineConstantExpr>()) {
|
||||
if (dim == 0)
|
||||
partitionIdx = constExpr.getValue();
|
||||
else
|
||||
partitionIdx += constExpr.getValue() * accumFactor;
|
||||
} else {
|
||||
partitionIdx = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
accumFactor *= factor;
|
||||
dim++;
|
||||
}
|
||||
|
||||
// Set partition index attribute.
|
||||
setAttrValue(op, "partition_index", partitionIdx);
|
||||
}
|
||||
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
|
||||
map[getArrayOp(op)].push_back(op);
|
||||
});
|
||||
}
|
||||
|
||||
/// Calculate load/store operation schedule honoring the memory ports number
|
||||
/// limitation. This method will be called by getBlockSchedule method.
|
||||
unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin,
|
||||
MemPortDicts &dicts) {
|
||||
auto memAccess = MemRefAccess(op);
|
||||
auto arrayOp = cast<ArrayOp>(memAccess.memref.getDefiningOp());
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MemRef Dependency Collection Methods
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
auto partitionIdx = getIntAttrValue(op, "partition_index");
|
||||
/// Get the common loop depth shared by lhsOp and rhsOp.
|
||||
static unsigned getCommonLoopDepth(Operation *lhsOp, Operation *rhsOp) {
|
||||
// Collect all parent nested loops.
|
||||
SmallVector<Operation *, 4> lhsLoopNests;
|
||||
SmallVector<Operation *, 4> rhsLoopNests;
|
||||
|
||||
getLoopNests(lhsOp, lhsLoopNests);
|
||||
getLoopNests(rhsOp, rhsLoopNests);
|
||||
|
||||
// Calculate common loop depth.
|
||||
auto lhsDepth = lhsLoopNests.size();
|
||||
auto rhsDepth = rhsLoopNests.size();
|
||||
unsigned commonLoopDepth = 0;
|
||||
|
||||
for (unsigned i = 0, e = min(lhsDepth, rhsDepth); i < e; ++i) {
|
||||
if (lhsLoopNests[lhsDepth - 1 - i] == rhsLoopNests[rhsDepth - 1 - i])
|
||||
commonLoopDepth++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return commonLoopDepth;
|
||||
}
|
||||
|
||||
/// Collect all dependencies detected in the function.
|
||||
void HLSCppEstimator::getFuncMemRefDepends() {
|
||||
LoadStoresMap loadStoresMap;
|
||||
getLoadStoresMap(func.front(), loadStoresMap);
|
||||
|
||||
// Walk through all ArrayOp - LoadOp/StoreOp pairs.
|
||||
for (auto &pair : loadStoresMap) {
|
||||
auto loadStores = pair.second;
|
||||
|
||||
// Walk through each pair of source and destination. Note that for intra
|
||||
// dependencies, srcOp is always before dstOp.
|
||||
unsigned srcIndex = 1;
|
||||
for (auto srcOp : loadStores) {
|
||||
MemRefAccess srcAccess(srcOp);
|
||||
for (auto dstOp : llvm::drop_begin(loadStores, srcIndex)) {
|
||||
MemRefAccess dstAccess(dstOp);
|
||||
|
||||
bool dependFlag = false;
|
||||
auto commonLoopDepth = getCommonLoopDepth(srcOp, dstOp);
|
||||
for (unsigned depth = 1; depth <= commonLoopDepth; ++depth) {
|
||||
// Initialize constraints and components.
|
||||
FlatAffineConstraints dependConstrs;
|
||||
SmallVector<DependenceComponent, 2> dependComps;
|
||||
|
||||
// Check dependency.
|
||||
DependenceResult result = checkMemrefAccessDependence(
|
||||
srcAccess, dstAccess, depth, &dependConstrs, &dependComps);
|
||||
dependFlag = hasDependence(result);
|
||||
}
|
||||
|
||||
// All dependencies are pushed into the dependsMap output.
|
||||
if (dependFlag)
|
||||
dependsMap[dstOp].push_back(srcOp);
|
||||
}
|
||||
srcIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// LoadOp and StoreOp Related Methods
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Get the pointer of the scrOp's parent loop, which should locate at the same
|
||||
// level with dstOp's any parent loop.
|
||||
static Operation *getSameLevelSourceOp(Operation *srcOp, Operation *dstOp) {
|
||||
// If srcOp and dstOp are already at the same level, return the srcOp.
|
||||
if (checkSameLevel(srcOp, dstOp))
|
||||
return srcOp;
|
||||
|
||||
SmallVector<Operation *, 4> srcNests;
|
||||
SmallVector<Operation *, 4> dstNests;
|
||||
srcNests.push_back(srcOp);
|
||||
dstNests.push_back(dstOp);
|
||||
|
||||
getLoopNests(srcOp, srcNests);
|
||||
getLoopNests(dstOp, dstNests);
|
||||
|
||||
// If any parent of srcOp (or itself) and any parent of dstOp (or itself) are
|
||||
// at the same level, return the pointer.
|
||||
for (auto src : srcNests)
|
||||
for (auto dst : dstNests)
|
||||
if (checkSameLevel(src, dst))
|
||||
return src;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Calculate the overall partition index.
|
||||
int32_t HLSCppEstimator::getPartitionIndex(Operation *op) {
|
||||
auto arrayOp = getArrayOp(op);
|
||||
AffineValueMap accessMap;
|
||||
MemRefAccess(op).getAccessMap(&accessMap);
|
||||
|
||||
// Calculate the partition index of this load/store operation honoring the
|
||||
// partition strategy applied.
|
||||
int32_t partitionIdx = 0;
|
||||
unsigned accumFactor = 1;
|
||||
unsigned dim = 0;
|
||||
|
||||
for (auto expr : accessMap.getAffineMap().getResults()) {
|
||||
auto idxExpr = builder.getAffineConstantExpr(0);
|
||||
unsigned factor = 1;
|
||||
|
||||
if (arrayOp.partition()) {
|
||||
auto type = getPartitionType(arrayOp, dim);
|
||||
factor = getPartitionFactor(arrayOp, dim);
|
||||
|
||||
if (type == "cyclic")
|
||||
idxExpr = expr % builder.getAffineConstantExpr(factor);
|
||||
else if (type == "block") {
|
||||
auto size = arrayOp.getType().cast<ShapedType>().getShape()[dim];
|
||||
idxExpr = expr.floorDiv(
|
||||
builder.getAffineConstantExpr((size + factor - 1) / factor));
|
||||
}
|
||||
}
|
||||
|
||||
if (auto constExpr = idxExpr.dyn_cast<AffineConstantExpr>()) {
|
||||
if (dim == 0)
|
||||
partitionIdx = constExpr.getValue();
|
||||
else
|
||||
partitionIdx += constExpr.getValue() * accumFactor;
|
||||
} else {
|
||||
partitionIdx = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
accumFactor *= factor;
|
||||
dim++;
|
||||
}
|
||||
return partitionIdx;
|
||||
}
|
||||
|
||||
/// Schedule load/store operation honoring the memory ports number limitation.
|
||||
unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin) {
|
||||
// Check dependencies of the operation and update schedule level.
|
||||
for (auto srcOp : dependsMap[op]) {
|
||||
auto sameLevelSrcOp = getSameLevelSourceOp(srcOp, op);
|
||||
begin = max(getUIntAttrValue(sameLevelSrcOp, "schedule_end"), begin);
|
||||
}
|
||||
|
||||
// Calculate partition index.
|
||||
auto partitionIdx = getPartitionIndex(op);
|
||||
setAttrValue(op, "partition_index", partitionIdx);
|
||||
|
||||
auto arrayOp = getArrayOp(op);
|
||||
auto partitionNum = getUIntAttrValue(arrayOp, "partition_num");
|
||||
auto storageType = getStrAttrValue(arrayOp, "storage_type");
|
||||
|
||||
// Try to avoid memory port violation until a legal schedule is found.
|
||||
// Since an infinite length pipeline can be generated, this while loop can
|
||||
// be proofed to have an end.
|
||||
// Try to avoid memory port violation until a legal schedule is found. Since
|
||||
// an infinite length schedule cannot be generated, this while loop can be
|
||||
// proofed to have an end.
|
||||
while (true) {
|
||||
auto memPort = dicts[begin][arrayOp];
|
||||
auto memPort = portsMapDict[begin][arrayOp];
|
||||
bool memPortEmpty = memPort.empty();
|
||||
|
||||
// If the memory has not been occupied by the current stage, it should
|
||||
// be initialized according to its storage type. Note that each
|
||||
// partition should have one PortNum structure.
|
||||
// If the memory has not been occupied by the current schedule level, it
|
||||
// should be initialized according to its storage type. Note that each
|
||||
// partition should have one PortInfo structure.
|
||||
if (memPortEmpty) {
|
||||
for (unsigned p = 0; p < partitionNum; ++p) {
|
||||
unsigned rdPort = 0;
|
||||
|
@ -125,167 +271,72 @@ unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin,
|
|||
rdwrPort = 1;
|
||||
else {
|
||||
rdwrPort = 2;
|
||||
// arrayOp.emitError("unsupported storage type.");
|
||||
}
|
||||
PortInfo portInfo(rdPort, wrPort, rdwrPort);
|
||||
memPort.push_back(portInfo);
|
||||
|
||||
memPort.push_back(PortInfo(rdPort, wrPort, rdwrPort));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: When partition index can't be determined, this operation will be
|
||||
// considered to occupy all ports.
|
||||
// Indicate whether the operation is successfully scheduled in the current
|
||||
// schedule level.
|
||||
bool successFlag = false;
|
||||
|
||||
if (partitionIdx == -1) {
|
||||
// When partition index can't be determined, this operation must occupy
|
||||
// all ports in the scheduled level.
|
||||
if (memPortEmpty) {
|
||||
for (unsigned p = 0; p < partitionNum; ++p) {
|
||||
memPort[p].rdPort = 0;
|
||||
memPort[p].wrPort = 0;
|
||||
memPort[p].rdwrPort = 0;
|
||||
}
|
||||
dicts[begin][arrayOp] = memPort;
|
||||
break;
|
||||
} else {
|
||||
if (++begin >= dicts.size()) {
|
||||
MemPortDict memPortDict;
|
||||
dicts.push_back(memPortDict);
|
||||
}
|
||||
successFlag = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Find whether the current schedule meets memory port limitation. If
|
||||
// not, the schedule will increase by 1.
|
||||
PortInfo portInfo = memPort[partitionIdx];
|
||||
if (isa<AffineLoadOp>(op) && portInfo.rdPort > 0) {
|
||||
memPort[partitionIdx].rdPort -= 1;
|
||||
dicts[begin][arrayOp] = memPort;
|
||||
break;
|
||||
} else if (isa<AffineStoreOp>(op) && portInfo.wrPort > 0) {
|
||||
memPort[partitionIdx].wrPort -= 1;
|
||||
dicts[begin][arrayOp] = memPort;
|
||||
break;
|
||||
} else if (portInfo.rdwrPort > 0) {
|
||||
memPort[partitionIdx].rdwrPort -= 1;
|
||||
dicts[begin][arrayOp] = memPort;
|
||||
break;
|
||||
} else {
|
||||
if (++begin >= dicts.size()) {
|
||||
MemPortDict memPortDict;
|
||||
dicts.push_back(memPortDict);
|
||||
// When partition index can be determined, figure out whether the current
|
||||
// schedule meets memory port limitation.
|
||||
PortInfo portInfo = memPort[partitionIdx];
|
||||
if (isa<AffineLoadOp>(op) && portInfo.rdPort > 0) {
|
||||
memPort[partitionIdx].rdPort -= 1;
|
||||
successFlag = true;
|
||||
|
||||
} else if (isa<AffineStoreOp>(op) && portInfo.wrPort > 0) {
|
||||
memPort[partitionIdx].wrPort -= 1;
|
||||
successFlag = true;
|
||||
|
||||
} else if (portInfo.rdwrPort > 0) {
|
||||
memPort[partitionIdx].rdwrPort -= 1;
|
||||
successFlag = true;
|
||||
}
|
||||
}
|
||||
|
||||
// If successed, break the while loop. Otherwise increase the schedule level
|
||||
// by 1 and continue to try.
|
||||
if (successFlag) {
|
||||
portsMapDict[begin][arrayOp] = memPort;
|
||||
break;
|
||||
} else
|
||||
begin++;
|
||||
}
|
||||
return begin;
|
||||
|
||||
// Memory load/store operation always consumes 1 clock cycle.
|
||||
return begin + 1;
|
||||
}
|
||||
|
||||
void HLSCppEstimator::updateChildBlockSchedule(Block &block, unsigned begin) {
|
||||
for (auto &op : block) {
|
||||
unsigned newBegin = begin;
|
||||
unsigned newEnd = begin;
|
||||
|
||||
// Update the schedule of all operations in the child block.
|
||||
if (getUIntAttrValue(&op, "schedule_end")) {
|
||||
newBegin += getUIntAttrValue(&op, "schedule_begin");
|
||||
newEnd += getUIntAttrValue(&op, "schedule_end");
|
||||
setAttrValue(&op, "schedule_begin", newBegin);
|
||||
setAttrValue(&op, "schedule_end", newEnd);
|
||||
}
|
||||
|
||||
// Recursively apply to all child blocks.
|
||||
if (op.getNumRegions()) {
|
||||
for (auto ®ion : op.getRegions()) {
|
||||
for (auto &block : region.getBlocks())
|
||||
updateChildBlockSchedule(block, begin);
|
||||
}
|
||||
}
|
||||
}
|
||||
Optional<unsigned> HLSCppEstimator::visitOp(AffineLoadOp op, unsigned begin) {
|
||||
return getLoadStoreSchedule(op, begin);
|
||||
}
|
||||
|
||||
/// Schedule the block with ASAP algorithm.
|
||||
unsigned HLSCppEstimator::getBlockSchedule(Block &block) {
|
||||
unsigned blockEnd = 0;
|
||||
MemPortDicts dicts;
|
||||
|
||||
for (auto &op : block) {
|
||||
// Find the latest predecessor dominating the current operation. This
|
||||
// should be considered as the earliest stage that the current operation
|
||||
// can be scheduled.
|
||||
unsigned begin = 0;
|
||||
unsigned end = 0;
|
||||
for (auto operand : op.getOperands()) {
|
||||
if (auto defOp = operand.getDefiningOp())
|
||||
begin = max(getUIntAttrValue(defOp, "schedule_end"), begin);
|
||||
}
|
||||
|
||||
// Handle loop operations.
|
||||
if (auto forOp = dyn_cast<AffineForOp>(op)) {
|
||||
// Live ins of the for loop body will also impact the schedule begin.
|
||||
Liveness liveness(block.getParentOp());
|
||||
for (auto liveIn : liveness.getLiveIn(&forOp.getLoopBody().front())) {
|
||||
if (auto defOp = liveIn.getDefiningOp())
|
||||
begin = max(getUIntAttrValue(defOp, "schedule_end"), begin);
|
||||
}
|
||||
|
||||
// Update the schedule of all operations in the loop body.
|
||||
updateChildBlockSchedule(forOp.getLoopBody().front(), begin);
|
||||
|
||||
// Child loop is considered as a large node, and two extra clock cycles
|
||||
// will be required to enter and exit the child loop.
|
||||
end = begin + getUIntAttrValue(forOp, "latency") + 2;
|
||||
}
|
||||
|
||||
// Handle if operations.
|
||||
else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
|
||||
// Live ins of the if body will also impact the schedule begin.
|
||||
Liveness liveness(block.getParentOp());
|
||||
for (auto liveIn : liveness.getLiveIn(ifOp.getThenBlock())) {
|
||||
if (auto defOp = liveIn.getDefiningOp())
|
||||
begin = max(getUIntAttrValue(defOp, "schedule_end"), begin);
|
||||
}
|
||||
|
||||
if (ifOp.hasElse()) {
|
||||
for (auto liveIn : liveness.getLiveIn(ifOp.getElseBlock())) {
|
||||
if (auto defOp = liveIn.getDefiningOp())
|
||||
begin = max(getUIntAttrValue(defOp, "schedule_end"), begin);
|
||||
}
|
||||
// Update the schedule of all operations in the else block.
|
||||
updateChildBlockSchedule(*ifOp.getElseBlock(), begin);
|
||||
}
|
||||
|
||||
// Update the schedule of all operations in the then block.
|
||||
updateChildBlockSchedule(*ifOp.getThenBlock(), begin);
|
||||
|
||||
end = begin + getUIntAttrValue(ifOp, "latency");
|
||||
}
|
||||
|
||||
// Handle load/store operations.
|
||||
else if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
|
||||
// Insert new schedule level to the memory port dicts.
|
||||
while (begin >= dicts.size()) {
|
||||
MemPortDict memPortDict;
|
||||
dicts.push_back(memPortDict);
|
||||
}
|
||||
|
||||
// Ensure the current schedule meets memory port limitation.
|
||||
begin = getLoadStoreSchedule(&op, begin, dicts);
|
||||
end = begin + 1;
|
||||
}
|
||||
|
||||
// Default case. All normal expressions and operations will be handled by
|
||||
// this branch.
|
||||
else {
|
||||
// TODO: For now, we assume all operations take one clock cycle to
|
||||
// execute, should support to accept profiling data.
|
||||
end = begin + 1;
|
||||
}
|
||||
|
||||
setAttrValue(&op, "schedule_begin", begin);
|
||||
setAttrValue(&op, "schedule_end", end);
|
||||
blockEnd = max(blockEnd, end);
|
||||
}
|
||||
return blockEnd;
|
||||
Optional<unsigned> HLSCppEstimator::visitOp(AffineStoreOp op, unsigned begin) {
|
||||
return getLoadStoreSchedule(op, begin);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AffineForOp Related Methods
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Calculate the minimum resource II.
|
||||
unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoreDict dict) {
|
||||
unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoresMap &dict) {
|
||||
unsigned II = 1;
|
||||
|
||||
for (auto &pair : dict) {
|
||||
|
@ -354,7 +405,7 @@ unsigned HLSCppEstimator::getResMinII(AffineForOp forOp, LoadStoreDict dict) {
|
|||
}
|
||||
|
||||
/// Calculate the minimum dependency II.
|
||||
unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoreDict dict) {
|
||||
unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoresMap &dict) {
|
||||
unsigned II = 1;
|
||||
|
||||
// Collect start and end level of the pipeline.
|
||||
|
@ -375,8 +426,9 @@ unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoreDict dict) {
|
|||
auto loadStores = pair.second;
|
||||
|
||||
// Walk through each pair of source and destination, and each loop level
|
||||
// that are pipelined.
|
||||
for (auto loopDepth = startLevel; loopDepth <= endLevel; ++loopDepth) {
|
||||
// that are pipelined. Note that for inter-dependency, dstOp is always
|
||||
// before srcOp.
|
||||
for (unsigned loopDepth = startLevel; loopDepth <= endLevel; ++loopDepth) {
|
||||
unsigned dstIndex = 1;
|
||||
for (auto dstOp : loadStores) {
|
||||
MemRefAccess dstAccess(dstOp);
|
||||
|
@ -393,7 +445,7 @@ unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoreDict dict) {
|
|||
if (hasDependence(result)) {
|
||||
SmallVector<unsigned, 2> flattenTripCounts;
|
||||
flattenTripCounts.push_back(1);
|
||||
unsigned distance = 0;
|
||||
int64_t distance = 0;
|
||||
|
||||
// Calculate the distance of this dependency.
|
||||
for (auto it = depComps.rbegin(); it < depComps.rend(); ++it) {
|
||||
|
@ -413,7 +465,7 @@ unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoreDict dict) {
|
|||
unsigned delay = getUIntAttrValue(srcOp, "schedule_begin") -
|
||||
getUIntAttrValue(dstOp, "schedule_begin");
|
||||
|
||||
if (distance != 0) {
|
||||
if (distance > 0) {
|
||||
unsigned minII = ceil((float)delay / distance);
|
||||
II = max(II, minII);
|
||||
}
|
||||
|
@ -426,37 +478,48 @@ unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoreDict dict) {
|
|||
return II;
|
||||
}
|
||||
|
||||
bool HLSCppEstimator::visitOp(AffineForOp op) {
|
||||
auto &body = op.getLoopBody();
|
||||
if (body.getBlocks().size() != 1) {
|
||||
Optional<unsigned> HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) {
|
||||
if (op.getLoopBody().getBlocks().size() != 1) {
|
||||
op.emitError("has zero or more than one basic blocks.");
|
||||
return false;
|
||||
return Optional<unsigned>();
|
||||
}
|
||||
|
||||
// Recursively estimate all contained operations.
|
||||
if (!estimateBlock(body.front()))
|
||||
return false;
|
||||
|
||||
// Set an attribute indicating the trip count. For now, we assume all
|
||||
// loops have static loop bound.
|
||||
// Set an attribute indicating the trip count. For now, we assume all loops
|
||||
// have static loop bound.
|
||||
if (auto tripCount = getConstantTripCount(op))
|
||||
setAttrValue(op, "trip_count", (unsigned)tripCount.getValue());
|
||||
else {
|
||||
setAttrValue(op, "trip_count", (unsigned)0);
|
||||
op.emitError("has undetermined trip count");
|
||||
return false;
|
||||
return Optional<unsigned>();
|
||||
}
|
||||
|
||||
// If the current loop is annotated as pipeline, extra dependency and II
|
||||
// analysis will be executed.
|
||||
if (getBoolAttrValue(op, "pipeline")) {
|
||||
LoadStoreDict dict;
|
||||
getBlockMemInfo(body.front(), dict);
|
||||
unsigned end = begin;
|
||||
auto &loopBlock = op.getLoopBody().front();
|
||||
|
||||
// Live ins will impact the scheduling.
|
||||
for (auto liveIn : liveness.getLiveIn(&loopBlock))
|
||||
if (auto defOp = liveIn.getDefiningOp())
|
||||
begin = max(begin, getUIntAttrValue(defOp, "schedule_end"));
|
||||
|
||||
// Estimate the loop block.
|
||||
if (auto esti = estimateBlock(loopBlock, begin))
|
||||
end = max(end, esti.getValue());
|
||||
else
|
||||
return Optional<unsigned>();
|
||||
|
||||
// If the current loop is annotated as pipeline, extra dependency and
|
||||
// resource aware II analysis will be executed.
|
||||
if (getBoolAttrValue(op, "pipeline")) {
|
||||
// Calculate latency of each iteration.
|
||||
auto iterLatency = getBlockSchedule(body.front());
|
||||
auto iterLatency = end - begin;
|
||||
setAttrValue(op, "iter_latency", iterLatency);
|
||||
|
||||
// Collect load and store operations in the loop block for estimating the
|
||||
// achievable initial interval.
|
||||
LoadStoresMap dict;
|
||||
getLoadStoresMap(loopBlock, dict);
|
||||
|
||||
// Calculate initial interval.
|
||||
auto II = max(getResMinII(op, dict), getDepMinII(op, dict));
|
||||
setAttrValue(op, "init_interval", II);
|
||||
|
@ -464,117 +527,143 @@ bool HLSCppEstimator::visitOp(AffineForOp op) {
|
|||
auto tripCount = getUIntAttrValue(op, "trip_count");
|
||||
setAttrValue(op, "flatten_trip_count", tripCount);
|
||||
|
||||
setAttrValue(op, "latency", iterLatency + II * (tripCount - 1));
|
||||
return true;
|
||||
auto latency = iterLatency + II * (tripCount - 1);
|
||||
setAttrValue(op, "latency", latency);
|
||||
|
||||
// Entering and leaving a loop will consume extra 2 clock cycles.
|
||||
return begin + latency + 2;
|
||||
}
|
||||
|
||||
// This means the current loop can be flattened into the child loop. If the
|
||||
// child loop is pipelined, this will increase the flattened loop trip count
|
||||
// without changing the iteration latency. Note that this will be propogated
|
||||
// above until meeting an imperfect loop.
|
||||
// If the current loop is annotated as flatten, it will be flattened into
|
||||
// the child pipelined loop. This will increase the flattened loop trip count
|
||||
// without changing the iteration latency.
|
||||
if (getBoolAttrValue(op, "flatten")) {
|
||||
if (auto child = dyn_cast<AffineForOp>(op.getLoopBody().front().front())) {
|
||||
// This means the inner loop is pipelined, because otherwise II will be
|
||||
// equal to zero. So that in this case, this loop will be flattened into
|
||||
// the inner pipelined loop.
|
||||
if (auto II = getUIntAttrValue(child, "init_interval")) {
|
||||
setAttrValue(op, "init_interval", II);
|
||||
auto child = dyn_cast<AffineForOp>(op.getLoopBody().front().front());
|
||||
assert(child && "the first containing operation is not a loop");
|
||||
|
||||
auto iterLatency = getUIntAttrValue(child, "iter_latency");
|
||||
setAttrValue(op, "iter_latency", iterLatency);
|
||||
auto iterLatency = getUIntAttrValue(child, "iter_latency");
|
||||
setAttrValue(op, "iter_latency", iterLatency);
|
||||
|
||||
auto flattenTripCount = getUIntAttrValue(child, "flatten_trip_count") *
|
||||
getUIntAttrValue(op, "trip_count");
|
||||
setAttrValue(op, "flatten_trip_count", flattenTripCount);
|
||||
auto II = getUIntAttrValue(child, "init_interval");
|
||||
setAttrValue(op, "init_interval", II);
|
||||
|
||||
setAttrValue(op, "latency", iterLatency + II * (flattenTripCount - 1));
|
||||
} else {
|
||||
auto iterLatency = getUIntAttrValue(child, "latency");
|
||||
setAttrValue(op, "iter_latency", iterLatency);
|
||||
auto flattenTripCount = getUIntAttrValue(child, "flatten_trip_count") *
|
||||
getUIntAttrValue(op, "trip_count");
|
||||
setAttrValue(op, "flatten_trip_count", flattenTripCount);
|
||||
|
||||
unsigned latency = iterLatency * getUIntAttrValue(op, "trip_count");
|
||||
setAttrValue(op, "latency", latency);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
auto latency = iterLatency + II * (flattenTripCount - 1);
|
||||
setAttrValue(op, "latency", latency);
|
||||
|
||||
// Since the loop is flattened, it will no longer be entered and left.
|
||||
return begin + latency;
|
||||
}
|
||||
|
||||
// Default case, aka !pipeline && !flatten.
|
||||
LoadStoreDict dict;
|
||||
getBlockMemInfo(body.front(), dict);
|
||||
|
||||
auto iterLatency = getBlockSchedule(body.front());
|
||||
// Default case, calculate latency of each iteration.
|
||||
auto iterLatency = end - begin;
|
||||
setAttrValue(op, "iter_latency", iterLatency);
|
||||
|
||||
unsigned latency = iterLatency * getUIntAttrValue(op, "trip_count");
|
||||
setAttrValue(op, "latency", latency);
|
||||
return true;
|
||||
|
||||
return begin + latency + 2;
|
||||
}
|
||||
|
||||
bool HLSCppEstimator::visitOp(AffineIfOp op) {
|
||||
auto thenBlock = op.getThenBlock();
|
||||
if (!estimateBlock(*thenBlock))
|
||||
return false;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Other Operation Handlers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
LoadStoreDict dict;
|
||||
getBlockMemInfo(*thenBlock, dict);
|
||||
auto latency = getBlockSchedule(*thenBlock);
|
||||
Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
|
||||
unsigned end = begin;
|
||||
auto thenBlock = op.getThenBlock();
|
||||
|
||||
// Live ins will impact the scheduling.
|
||||
for (auto liveIn : liveness.getLiveIn(thenBlock))
|
||||
if (auto defOp = liveIn.getDefiningOp())
|
||||
begin = max(begin, getUIntAttrValue(defOp, "schedule_end"));
|
||||
|
||||
// Estimate then block.
|
||||
if (auto esti = estimateBlock(*thenBlock, begin))
|
||||
end = max(end, esti.getValue());
|
||||
else
|
||||
return Optional<unsigned>();
|
||||
|
||||
// Handle else block if required.
|
||||
if (op.hasElse()) {
|
||||
auto elseBlock = op.getElseBlock();
|
||||
if (!estimateBlock(*elseBlock))
|
||||
return false;
|
||||
|
||||
getBlockMemInfo(*elseBlock, dict);
|
||||
latency = max(latency, getBlockSchedule(*elseBlock));
|
||||
for (auto liveIn : liveness.getLiveIn(elseBlock))
|
||||
if (auto defOp = liveIn.getDefiningOp())
|
||||
begin = max(begin, getUIntAttrValue(defOp, "schedule_end"));
|
||||
|
||||
if (auto esti = estimateBlock(*elseBlock, begin))
|
||||
end = max(end, esti.getValue());
|
||||
else
|
||||
return Optional<unsigned>();
|
||||
}
|
||||
|
||||
setAttrValue(op, "latency", latency);
|
||||
return true;
|
||||
return end;
|
||||
}
|
||||
|
||||
bool HLSCppEstimator::visitOp(ArrayOp op) {
|
||||
Optional<unsigned> HLSCppEstimator::visitOp(ArrayOp op, unsigned begin) {
|
||||
// Annotate the total parition number of the array.
|
||||
unsigned partitionNum = 1;
|
||||
if (op.partition()) {
|
||||
auto rank = op.getType().cast<ShapedType>().getRank();
|
||||
for (unsigned i = 0; i < rank; ++i) {
|
||||
if (auto factor = getPartitionFactor(op, i))
|
||||
for (unsigned dim = 0; dim < rank; ++dim) {
|
||||
if (auto factor = getPartitionFactor(op, dim))
|
||||
partitionNum *= factor;
|
||||
}
|
||||
}
|
||||
setAttrValue(op, "partition_num", partitionNum);
|
||||
return true;
|
||||
|
||||
// ArrayOp is a dummy memory instance which does not consume any clock
|
||||
// cycles.
|
||||
return begin;
|
||||
}
|
||||
|
||||
bool HLSCppEstimator::estimateBlock(Block &block) {
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Block Scheduler and Estimator
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Estimate the latency of a block with ASAP scheduling strategy.
|
||||
Optional<unsigned> HLSCppEstimator::estimateBlock(Block &block,
|
||||
unsigned blockBegin) {
|
||||
unsigned blockEnd = blockBegin;
|
||||
|
||||
for (auto &op : block) {
|
||||
if (dispatchVisitor(&op))
|
||||
continue;
|
||||
else {
|
||||
op.emitError("can't be correctly estimated.");
|
||||
return false;
|
||||
}
|
||||
unsigned begin = blockBegin;
|
||||
unsigned end = blockBegin;
|
||||
|
||||
// Find the latest arrived predecessor dominating the current operation.
|
||||
// This should be considered as the earliest possible scheduling level
|
||||
// that the current operation can be scheduled.
|
||||
for (auto operand : op.getOperands())
|
||||
if (auto defOp = operand.getDefiningOp())
|
||||
begin = max(begin, getUIntAttrValue(defOp, "schedule_end"));
|
||||
|
||||
// Estimate the current operation.
|
||||
if (auto esti = dispatchVisitor(&op, begin))
|
||||
end = max(end, esti.getValue());
|
||||
else
|
||||
return Optional<unsigned>();
|
||||
|
||||
setAttrValue(&op, "schedule_begin", begin);
|
||||
setAttrValue(&op, "schedule_end", end);
|
||||
|
||||
blockEnd = max(blockEnd, end);
|
||||
}
|
||||
return true;
|
||||
return blockEnd;
|
||||
}
|
||||
|
||||
bool HLSCppEstimator::estimateFunc(FuncOp func) {
|
||||
if (func.getBlocks().size() != 1) {
|
||||
void HLSCppEstimator::estimateFunc() {
|
||||
if (func.getBlocks().size() != 1)
|
||||
func.emitError("has zero or more than one basic blocks.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Recursively estimate all contained operations.
|
||||
if (!estimateBlock(func.front()))
|
||||
return false;
|
||||
|
||||
LoadStoreDict dict;
|
||||
getBlockMemInfo(func.front(), dict);
|
||||
|
||||
auto latency = getBlockSchedule(func.front());
|
||||
setAttrValue(func, "latency", latency);
|
||||
return true;
|
||||
// Recursively estimate blocks in the function.
|
||||
if (auto esti = estimateBlock(func.front(), 0))
|
||||
setAttrValue(func, "latency", esti.getValue());
|
||||
else
|
||||
setAttrValue(func, "latency", "unknown");
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -584,13 +673,22 @@ bool HLSCppEstimator::estimateFunc(FuncOp func) {
|
|||
namespace {
|
||||
struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
||||
void runOnOperation() override {
|
||||
auto module = getOperation();
|
||||
auto builder = OpBuilder(module);
|
||||
// Read configuration file.
|
||||
INIReader spec(targetSpec);
|
||||
if (spec.ParseError())
|
||||
llvm::outs() << "error: target spec file parse fail, please refer to "
|
||||
"--help option and pass in correct file path\n";
|
||||
|
||||
// TODO: Support estimator initiation from profiling data, constructing a
|
||||
// unique data structure for holding latency and resource information.
|
||||
auto freq = spec.Get("spec", "frequency", "200MHz");
|
||||
auto latency = spec.GetInteger(freq, "op", 0);
|
||||
|
||||
// Estimate performance and resource utilization.
|
||||
HLSCppEstimator estimator(builder, targetSpec);
|
||||
for (auto func : module.getOps<FuncOp>())
|
||||
estimator.estimateFunc(func);
|
||||
for (auto func : getOperation().getOps<FuncOp>()) {
|
||||
HLSCppEstimator estimator(func);
|
||||
estimator.estimateFunc();
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
|
|
@ -117,8 +117,8 @@ void ConvertToHLSCpp::runOnOperation() {
|
|||
if (!forOp.getAttr("pipeline"))
|
||||
forOp.setAttr("pipeline", builder.getBoolAttr(false));
|
||||
|
||||
if (!forOp.getAttr("unroll"))
|
||||
forOp.setAttr("unroll", builder.getBoolAttr(false));
|
||||
// if (!forOp.getAttr("unroll"))
|
||||
// forOp.setAttr("unroll", builder.getBoolAttr(false));
|
||||
|
||||
if (!forOp.getAttr("flatten"))
|
||||
forOp.setAttr("flatten", builder.getBoolAttr(false));
|
||||
|
|
|
@ -34,8 +34,8 @@ static mlir::AffineForOp getPipelineLoop(mlir::AffineForOp root) {
|
|||
}
|
||||
|
||||
template <typename OpType>
|
||||
static void applyArrayPartition(LoadStoreDict &dict, OpBuilder &builder) {
|
||||
for (auto pair : dict) {
|
||||
static void applyArrayPartition(LoadStoresMap &map, OpBuilder &builder) {
|
||||
for (auto pair : map) {
|
||||
auto arrayOp = cast<ArrayOp>(pair.first);
|
||||
auto arrayType = arrayOp.getType().cast<MemRefType>();
|
||||
auto arrayAccesses = pair.second;
|
||||
|
@ -116,21 +116,21 @@ void ArrayPartition::runOnOperation() {
|
|||
for (auto forOp : func.getOps<mlir::AffineForOp>()) {
|
||||
if (auto outermost = getPipelineLoop(forOp)) {
|
||||
// Collect memory access information.
|
||||
LoadStoreDict loadDict;
|
||||
LoadStoresMap loadMap;
|
||||
outermost.walk([&](mlir::AffineLoadOp loadOp) {
|
||||
auto arrayOp = cast<ArrayOp>(loadOp.getMemRef().getDefiningOp());
|
||||
loadDict[arrayOp].push_back(loadOp);
|
||||
loadMap[arrayOp].push_back(loadOp);
|
||||
});
|
||||
|
||||
LoadStoreDict storeDict;
|
||||
LoadStoresMap storeMap;
|
||||
outermost.walk([&](mlir::AffineStoreOp storeOp) {
|
||||
auto arrayOp = cast<ArrayOp>(storeOp.getMemRef().getDefiningOp());
|
||||
storeDict[arrayOp].push_back(storeOp);
|
||||
storeMap[arrayOp].push_back(storeOp);
|
||||
});
|
||||
|
||||
// Apply array partition pragma.
|
||||
applyArrayPartition<mlir::AffineLoadOp>(loadDict, builder);
|
||||
applyArrayPartition<mlir::AffineStoreOp>(storeDict, builder);
|
||||
applyArrayPartition<mlir::AffineLoadOp>(loadMap, builder);
|
||||
applyArrayPartition<mlir::AffineStoreOp>(storeMap, builder);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,26 +41,22 @@ void LoopPipelining::runOnOperation() {
|
|||
});
|
||||
|
||||
// All outer loops that perfect nest the pipelined loop can be flattened.
|
||||
forOp.walk([&](mlir::AffineForOp loop) {
|
||||
unsigned opNum = 0;
|
||||
unsigned forNum = 0;
|
||||
bool innerFlatten = false;
|
||||
|
||||
for (auto &bodyOp : loop.getLoopBody().front()) {
|
||||
if (!isa<AffineYieldOp>(bodyOp))
|
||||
opNum++;
|
||||
if (isa<AffineForOp>(bodyOp)) {
|
||||
forNum++;
|
||||
if (auto flatten = bodyOp.getAttrOfType<BoolAttr>("flatten"))
|
||||
innerFlatten = flatten.getValue();
|
||||
}
|
||||
}
|
||||
|
||||
if (forNum == 0 || (opNum == 1 && innerFlatten))
|
||||
loop.setAttr("flatten", builder.getBoolAttr(true));
|
||||
else
|
||||
loop.setAttr("flatten", builder.getBoolAttr(false));
|
||||
});
|
||||
SmallVector<mlir::AffineForOp, 4> flattenedLoops;
|
||||
flattenedLoops.push_back(targetLoop);
|
||||
while (true) {
|
||||
auto currentLoop = flattenedLoops.back();
|
||||
if (auto outerLoop = currentLoop.getParentOfType<mlir::AffineForOp>()) {
|
||||
// Only if the current loop is the only child loop of the outer loop,
|
||||
// the outer loop can be flattened into the current loop.
|
||||
auto &body = outerLoop.getLoopBody().front();
|
||||
if (&body.front() == currentLoop && body.getOperations().size() == 2) {
|
||||
flattenedLoops.push_back(outerLoop);
|
||||
outerLoop.setAttr("flatten", builder.getBoolAttr("true"));
|
||||
} else
|
||||
break;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Canonicalize the IR after loop unrolling.
|
||||
|
|
Loading…
Reference in New Issue