[QoREstimation] support function call estimation, a known issue is CallOps inside of loops are not comprehensively considered; estimation refinement for multiple loops and select op (#5); fix related bugs
This commit is contained in:
parent
b0bf044c9a
commit
85c47e98e3
21
README.md
21
README.md
|
@ -36,23 +36,30 @@ After the installation and test successfully completed, you should be able to pl
|
|||
$ export PATH=$SCALEHLS_DIR/build/bin:$PATH
|
||||
$ cd $SCALEHLS_DIR
|
||||
|
||||
$ # Benchmark generation, dataflow-level optimization, and bufferization.
|
||||
$ # Benchmark generation, dataflow-level optimization, HLSKernel lowering and bufferization.
|
||||
$ benchmark-gen -type "cnn" -config "config/cnn-config.ini" -number 1 \
|
||||
| scalehls-opt -legalize-dataflow -split-function \
|
||||
-hlskernel-bufferize -hlskernel-to-affine -func-bufferize -canonicalize
|
||||
|
||||
$ # HLSKernel lowering, loop-level and pragma-level optimizations, and performance estimation.
|
||||
$ # Loop and pragma-level optimizations, performance estimation, and HLS C++ code generation.
|
||||
$ scalehls-opt test/Conversion/HLSKernelToAffine/test_gemm.mlir -hlskernel-to-affine \
|
||||
-affine-loop-perfection -remove-var-loop-bound -partial-affine-loop-tile="tile-level=1 tile-size=4" \
|
||||
-affine-loop-perfection -remove-var-loop-bound -affine-loop-normalize \
|
||||
-partial-affine-loop-tile="tile-level=1 tile-size=4" \
|
||||
-convert-to-hlscpp="top-function=test_gemm" -loop-pipelining="pipeline-level=1" \
|
||||
-store-op-forward -simplify-memref-access -array-partition -cse -canonicalize \
|
||||
-qor-estimation="target-spec=config/target-spec.ini"
|
||||
-qor-estimation="target-spec=config/target-spec.ini" \
|
||||
| scalehls-translate -emit-hlscpp
|
||||
|
||||
$ # HLS C++ code generation.
|
||||
$ scalehls-opt test/Conversion/HLSKernelToAffine/test_gemm.mlir -hlskernel-to-affine \
|
||||
$ # Put them together.
|
||||
$ benchmark-gen -type "cnn" -config "config/cnn-config.ini" -number 1 \
|
||||
| scalehls-opt -legalize-dataflow -split-function \
|
||||
-hlskernel-bufferize -hlskernel-to-affine -func-bufferize \
|
||||
-affine-loop-perfection -affine-loop-normalize \
|
||||
-convert-to-hlscpp="top-function=auto_gen_cnn" \
|
||||
-store-op-forward -simplify-memref-access -cse -canonicalize \
|
||||
-qor-estimation="target-spec=config/target-spec.ini" \
|
||||
| scalehls-translate -emit-hlscpp
|
||||
```
|
||||
You can go through `benchmark-gen`, `scalehls-opt`, and `scalehls-translate` to try the whole flow. We also provide some computation kernel level test cases located at `test/Conversion/HLSKernelToAffine/` for experimenting the ScaleHLS passes and tools.
|
||||
|
||||
## Ablation study
|
||||
If Vivado HLS (2019.1 tested) is installed on your machine, running the following script will report the HLS results for some benchmarks (around 8 hours on AMD Ryzen7 3800X for all 33 tests).
|
||||
|
|
|
@ -6,10 +6,8 @@ fadd=4
|
|||
fmul=3
|
||||
fdiv=15
|
||||
fcmp=1
|
||||
fselect=0
|
||||
|
||||
fadd_delay=7.25
|
||||
fmul_delay=5.7
|
||||
fdiv_delay=6.07
|
||||
fcmp_delay=6.4
|
||||
fselect_delay=0.69
|
||||
|
|
|
@ -20,9 +20,7 @@ def QoREstimation : Pass<"qor-estimation", "ModuleOp"> {
|
|||
let options = [
|
||||
Option<"targetSpec", "target-spec", "std::string",
|
||||
/*default=*/"\"../config/target-spec.ini\"",
|
||||
"File path: target backend specifications and configurations">,
|
||||
Option<"topFunction", "top-function", "std::string", /*default=*/"",
|
||||
"The top function for HLS synthesis">
|
||||
"File path: target backend specifications and configurations">
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
@ -87,10 +87,10 @@ public:
|
|||
// Helper methods
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// For storing all affine memory access operations (including AffineLoadOp and
|
||||
// AffineStoreOp) indexed by the corresponding memref.
|
||||
using LoadStores = SmallVector<Operation *, 16>;
|
||||
using LoadStoresMap = DenseMap<Value, LoadStores>;
|
||||
// For storing all affine memory access operations (including CallOp,
|
||||
// AffineLoadOp, and AffineStoreOp) indexed by the corresponding memref.
|
||||
using MemAccesses = SmallVector<Operation *, 16>;
|
||||
using MemAccessesMap = DenseMap<Value, MemAccesses>;
|
||||
|
||||
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
|
||||
// AffineIfOp is transparent.
|
||||
|
@ -110,8 +110,11 @@ hlscpp::ArrayOp getArrayOp(Value memref);
|
|||
|
||||
hlscpp::ArrayOp getArrayOp(Operation *op);
|
||||
|
||||
/// Collect all load and store operations in the block.
|
||||
void getLoadStoresMap(Block &block, LoadStoresMap &map);
|
||||
/// Collect all load and store operations in the block. The collected operations
|
||||
/// in the MemAccessesMap are ordered, which means an operation will never
|
||||
/// dominate another operation in front of it.
|
||||
void getMemAccessesMap(Block &block, MemAccessesMap &map,
|
||||
bool includeCalls = false);
|
||||
|
||||
} // namespace scalehls
|
||||
} // namespace mlir
|
||||
|
|
|
@ -32,7 +32,7 @@ public:
|
|||
explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
|
||||
: HLSCppAnalysisBase(OpBuilder(func)), func(func),
|
||||
latencyMap(latencyMap) {
|
||||
getFuncMemRefDepends();
|
||||
getFuncDependencies();
|
||||
}
|
||||
|
||||
// Indicate the unoccupied memory ports number.
|
||||
|
@ -56,7 +56,8 @@ public:
|
|||
using Depends = SmallVector<Operation *, 16>;
|
||||
using DependsMap = DenseMap<Operation *, Depends>;
|
||||
|
||||
void getFuncMemRefDepends();
|
||||
/// Collect all dependencies detected in the function.
|
||||
void getFuncDependencies();
|
||||
|
||||
void setScheduleValue(Operation *op, unsigned begin, unsigned end) {
|
||||
setAttrValue(op, "schedule_begin", begin);
|
||||
|
@ -65,11 +66,12 @@ public:
|
|||
|
||||
using HLSCppVisitorBase::visitOp;
|
||||
Optional<unsigned> visitUnhandledOp(Operation *op, unsigned begin) {
|
||||
// Default latency of any unhandled operation is 1.
|
||||
setScheduleValue(op, begin, begin + 1);
|
||||
return begin + 1;
|
||||
// Default latency of any unhandled operation is 0.
|
||||
setScheduleValue(op, begin, begin);
|
||||
return begin;
|
||||
}
|
||||
|
||||
/// LoadOp and StoreOp related methods.
|
||||
int32_t getPartitionIndex(Operation *op);
|
||||
unsigned getLoadStoreSchedule(Operation *op, unsigned begin);
|
||||
Optional<unsigned> visitOp(AffineLoadOp op, unsigned begin) {
|
||||
|
@ -79,15 +81,15 @@ public:
|
|||
return getLoadStoreSchedule(op, begin);
|
||||
}
|
||||
|
||||
/// AffineForOp related methods.
|
||||
// unsigned getOpMinII(AffineForOp forOp);
|
||||
unsigned getResMinII(LoadStoresMap &map);
|
||||
unsigned getDepMinII(AffineForOp forOp, LoadStoresMap &map);
|
||||
unsigned getResMinII(MemAccessesMap &map);
|
||||
unsigned getDepMinII(AffineForOp forOp, MemAccessesMap &map);
|
||||
Optional<unsigned> visitOp(AffineForOp op, unsigned begin);
|
||||
|
||||
/// Other operation handlers.
|
||||
Optional<unsigned> visitOp(AffineIfOp op, unsigned begin);
|
||||
Optional<unsigned> visitOp(ReturnOp op, unsigned begin);
|
||||
Optional<unsigned> visitOp(AffineYieldOp op, unsigned begin);
|
||||
Optional<unsigned> visitOp(ArrayOp op, unsigned begin);
|
||||
Optional<unsigned> visitOp(CallOp op, unsigned begin);
|
||||
|
||||
/// Handle operations with profiled latency.
|
||||
#define HANDLE(OPTYPE, KEYNAME) \
|
||||
|
@ -100,10 +102,11 @@ public:
|
|||
HANDLE(MulFOp, "fmul");
|
||||
HANDLE(DivFOp, "fdiv");
|
||||
HANDLE(CmpFOp, "fcmp");
|
||||
HANDLE(SelectOp, "fselect");
|
||||
#undef HANDLE
|
||||
|
||||
Optional<unsigned> estimateBlock(Block &block, unsigned begin);
|
||||
/// Block scheduler and estimator.
|
||||
Optional<std::pair<unsigned, unsigned>> estimateBlock(Block &block,
|
||||
unsigned begin);
|
||||
void reverseSchedule();
|
||||
void estimateFunc();
|
||||
|
||||
|
@ -115,21 +118,27 @@ public:
|
|||
} // namespace
|
||||
|
||||
/// Collect all dependencies detected in the function.
|
||||
void HLSCppEstimator::getFuncMemRefDepends() {
|
||||
void HLSCppEstimator::getFuncDependencies() {
|
||||
// TODO: This can be simplified by traversing each ArrayOp in the function.
|
||||
LoadStoresMap loadStoresMap;
|
||||
getLoadStoresMap(func.front(), loadStoresMap);
|
||||
MemAccessesMap map;
|
||||
getMemAccessesMap(func.front(), map, /*includeCallOp=*/true);
|
||||
|
||||
// Walk through all ArrayOp - LoadOp/StoreOp pairs.
|
||||
for (auto &pair : loadStoresMap) {
|
||||
auto loadStores = pair.second;
|
||||
// Walk through all ArrayOp - LoadOp/StoreOp pairs, and find all memory
|
||||
// related dependencies.
|
||||
for (auto &pair : map) {
|
||||
auto memAccesses = pair.second;
|
||||
|
||||
// Walk through each pair of source and destination. Note that for intra
|
||||
// iteration dependencies, srcOp is always before dstOp.
|
||||
unsigned srcIndex = 1;
|
||||
for (auto srcOp : loadStores) {
|
||||
for (auto srcOp : memAccesses) {
|
||||
for (auto dstOp : llvm::drop_begin(memAccesses, srcIndex)) {
|
||||
if (isa<mlir::CallOp>(srcOp) || isa<mlir::CallOp>(dstOp)) {
|
||||
// TODO: for now, all dstOps are considered to have dependencies to
|
||||
// the srcOp if either the dstOp or srcOp is a CallOp.
|
||||
dependsMap[srcOp].push_back(dstOp);
|
||||
} else {
|
||||
MemRefAccess srcAccess(srcOp);
|
||||
for (auto dstOp : llvm::drop_begin(loadStores, srcIndex)) {
|
||||
MemRefAccess dstAccess(dstOp);
|
||||
|
||||
bool dependFlag = false;
|
||||
|
@ -149,9 +158,24 @@ void HLSCppEstimator::getFuncMemRefDepends() {
|
|||
if (dependFlag)
|
||||
dependsMap[srcOp].push_back(dstOp);
|
||||
}
|
||||
}
|
||||
srcIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
// Walk through all loops in the function and establish dependencies. The
|
||||
// rationale here is in Vivado HLS, a loop will always be dominated by another
|
||||
// loop before it, even if no actual dependencies exist between them.
|
||||
SmallVector<Operation *, 16> loops;
|
||||
func.walk([&](AffineForOp loop) { loops.push_back(loop); });
|
||||
|
||||
unsigned loopIndex = 1;
|
||||
for (auto srcLoop : loops) {
|
||||
for (auto dstLoop : llvm::drop_begin(loops, loopIndex))
|
||||
if (checkSameLevel(srcLoop, dstLoop))
|
||||
dependsMap[srcLoop].push_back(dstLoop);
|
||||
loopIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -235,12 +259,6 @@ int32_t HLSCppEstimator::getPartitionIndex(Operation *op) {
|
|||
|
||||
/// Schedule load/store operation honoring the memory ports number limitation.
|
||||
unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin) {
|
||||
// Check dependencies of the operation and update schedule level.
|
||||
for (auto dstOp : dependsMap[op]) {
|
||||
auto sameLevelDstOp = getSameLevelDstOp(op, dstOp);
|
||||
begin = max(getUIntAttrValue(sameLevelDstOp, "schedule_end"), begin);
|
||||
}
|
||||
|
||||
// Calculate partition index.
|
||||
auto partitionIdx = getPartitionIndex(op);
|
||||
setAttrValue(op, "partition_index", partitionIdx);
|
||||
|
@ -348,7 +366,7 @@ unsigned HLSCppEstimator::getLoadStoreSchedule(Operation *op, unsigned begin) {
|
|||
// }
|
||||
|
||||
/// Calculate the minimum resource II.
|
||||
unsigned HLSCppEstimator::getResMinII(LoadStoresMap &map) {
|
||||
unsigned HLSCppEstimator::getResMinII(MemAccessesMap &map) {
|
||||
unsigned II = 1;
|
||||
|
||||
for (auto &pair : map) {
|
||||
|
@ -414,7 +432,7 @@ unsigned HLSCppEstimator::getResMinII(LoadStoresMap &map) {
|
|||
}
|
||||
|
||||
/// Calculate the minimum dependency II.
|
||||
unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, LoadStoresMap &map) {
|
||||
unsigned HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
|
||||
unsigned II = 1;
|
||||
|
||||
// Collect start and end level of the pipeline.
|
||||
|
@ -496,21 +514,16 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineForOp op, unsigned begin) {
|
|||
|
||||
// Collect load and store operations in the loop block for solving possible
|
||||
// dependencies.
|
||||
LoadStoresMap map;
|
||||
getLoadStoresMap(loopBlock, map);
|
||||
|
||||
// Check dependencies of all load/store operations and update schedule level.
|
||||
for (auto pair : map)
|
||||
for (auto srcOp : pair.second)
|
||||
for (auto dstOp : dependsMap[srcOp]) {
|
||||
auto sameLevelDstOp = getSameLevelDstOp(srcOp, dstOp);
|
||||
begin = max(getUIntAttrValue(sameLevelDstOp, "schedule_end"), begin);
|
||||
}
|
||||
// TODO: include CallOps, how? Maybe we need to somehow analyze the memory
|
||||
// access behavior of the CallOp.
|
||||
MemAccessesMap map;
|
||||
getMemAccessesMap(loopBlock, map);
|
||||
|
||||
// Estimate the loop block.
|
||||
if (auto schedule = estimateBlock(loopBlock, begin))
|
||||
end = max(end, schedule.getValue());
|
||||
else
|
||||
if (auto schedule = estimateBlock(loopBlock, begin)) {
|
||||
end = max(end, schedule.getValue().second);
|
||||
begin = max(begin, schedule.getValue().first);
|
||||
} else
|
||||
return Optional<unsigned>();
|
||||
|
||||
// If the current loop is annotated as pipeline, extra dependency and
|
||||
|
@ -582,7 +595,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
|
|||
|
||||
// Estimate then block.
|
||||
if (auto schedule = estimateBlock(*thenBlock, begin))
|
||||
end = max(end, schedule.getValue());
|
||||
end = max(end, schedule.getValue().second);
|
||||
else
|
||||
return Optional<unsigned>();
|
||||
|
||||
|
@ -591,7 +604,7 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
|
|||
auto elseBlock = op.getElseBlock();
|
||||
|
||||
if (auto schedule = estimateBlock(*elseBlock, begin))
|
||||
end = max(end, schedule.getValue());
|
||||
end = max(end, schedule.getValue().second);
|
||||
else
|
||||
return Optional<unsigned>();
|
||||
}
|
||||
|
@ -602,23 +615,20 @@ Optional<unsigned> HLSCppEstimator::visitOp(AffineIfOp op, unsigned begin) {
|
|||
return end;
|
||||
}
|
||||
|
||||
Optional<unsigned> HLSCppEstimator::visitOp(ReturnOp op, unsigned begin) {
|
||||
setScheduleValue(op, begin, begin);
|
||||
return begin;
|
||||
}
|
||||
Optional<unsigned> HLSCppEstimator::visitOp(mlir::CallOp op, unsigned begin) {
|
||||
auto callee = SymbolTable::lookupSymbolIn(func.getParentOp(), op.getCallee());
|
||||
auto subFunc = dyn_cast<FuncOp>(callee);
|
||||
assert(subFunc && "callable is not a function operation");
|
||||
|
||||
Optional<unsigned> HLSCppEstimator::visitOp(AffineYieldOp op, unsigned begin) {
|
||||
setScheduleValue(op, begin, begin);
|
||||
return begin;
|
||||
}
|
||||
HLSCppEstimator estimator(subFunc, latencyMap);
|
||||
estimator.estimateFunc();
|
||||
|
||||
Optional<unsigned> HLSCppEstimator::visitOp(ArrayOp op, unsigned begin) {
|
||||
for (auto user : op.getResult().getUsers()) {
|
||||
auto sameLevelDstOp = getSameLevelDstOp(op, user);
|
||||
begin = max(getUIntAttrValue(sameLevelDstOp, "schedule_end"), begin);
|
||||
}
|
||||
setScheduleValue(op, begin, begin);
|
||||
return begin;
|
||||
// We assume enter and leave the subfunction require extra 2 clock cycles.
|
||||
if (auto subLatency = getUIntAttrValue(subFunc, "latency")) {
|
||||
setScheduleValue(op, begin, begin + subLatency + 2);
|
||||
return begin + subLatency + 1;
|
||||
} else
|
||||
return Optional<unsigned>();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -626,9 +636,11 @@ Optional<unsigned> HLSCppEstimator::visitOp(ArrayOp op, unsigned begin) {
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Estimate the latency of a block with ALAP scheduling strategy, return the
|
||||
/// end level of schedule.
|
||||
Optional<unsigned> HLSCppEstimator::estimateBlock(Block &block,
|
||||
unsigned begin) {
|
||||
/// end level of schedule. Meanwhile, the input begin will also be updated if
|
||||
/// required (typically happens in AffineForOps).
|
||||
Optional<std::pair<unsigned, unsigned>>
|
||||
HLSCppEstimator::estimateBlock(Block &block, unsigned begin) {
|
||||
unsigned blockBegin = begin;
|
||||
unsigned blockEnd = begin;
|
||||
|
||||
// Reversely walk through all operations in the block.
|
||||
|
@ -639,19 +651,32 @@ Optional<unsigned> HLSCppEstimator::estimateBlock(Block &block,
|
|||
|
||||
// Fine the latest arrived successor relying on the current operation.
|
||||
for (auto result : op->getResults())
|
||||
for (auto user : result.getUsers())
|
||||
opBegin = max(opBegin, getUIntAttrValue(user, "schedule_end"));
|
||||
for (auto user : result.getUsers()) {
|
||||
auto sameLevelUser = getSameLevelDstOp(op, user);
|
||||
opBegin = max(opBegin, getUIntAttrValue(sameLevelUser, "schedule_end"));
|
||||
}
|
||||
|
||||
// Check dependencies of the operation and update schedule level.
|
||||
for (auto dstOp : dependsMap[op]) {
|
||||
auto sameLevelDstOp = getSameLevelDstOp(op, dstOp);
|
||||
opBegin = max(opBegin, getUIntAttrValue(sameLevelDstOp, "schedule_end"));
|
||||
}
|
||||
|
||||
// Estimate the current operation.
|
||||
if (auto scheduleEnd = dispatchVisitor(op, opBegin))
|
||||
opEnd = max(opEnd, scheduleEnd.getValue());
|
||||
else
|
||||
return Optional<unsigned>();
|
||||
return Optional<std::pair<unsigned, unsigned>>();
|
||||
|
||||
// Update the block schedule end and begin.
|
||||
if (it == block.rbegin())
|
||||
blockBegin = opBegin;
|
||||
else
|
||||
blockBegin = min(blockBegin, opBegin);
|
||||
|
||||
// Update the block schedule end.
|
||||
blockEnd = max(blockEnd, opEnd);
|
||||
}
|
||||
return blockEnd;
|
||||
return std::pair<unsigned, unsigned>(blockBegin, blockEnd);
|
||||
}
|
||||
|
||||
void HLSCppEstimator::reverseSchedule() {
|
||||
|
@ -663,13 +688,16 @@ void HLSCppEstimator::reverseSchedule() {
|
|||
// Reverse schedule level.
|
||||
if (auto surOp = getSurroundingOp(op)) {
|
||||
if (isa<mlir::AffineForOp>(surOp)) {
|
||||
auto surOpBegin = getUIntAttrValue(surOp, "schedule_begin");
|
||||
|
||||
if (getBoolAttrValue(surOp, "flatten")) {
|
||||
// Handle flattened surrounding loops.
|
||||
setScheduleValue(op, 0, end - begin);
|
||||
setScheduleValue(op, surOpBegin, surOpBegin + end - begin);
|
||||
} else {
|
||||
// Handle normal cases.
|
||||
auto iterLatency = getUIntAttrValue(surOp, "iter_latency");
|
||||
setScheduleValue(op, iterLatency - end, iterLatency - begin);
|
||||
setScheduleValue(op, surOpBegin + iterLatency - end,
|
||||
surOpBegin + iterLatency - begin);
|
||||
}
|
||||
} else if (isa<FuncOp>(surOp)) {
|
||||
auto latency = getUIntAttrValue(surOp, "latency");
|
||||
|
@ -682,11 +710,13 @@ void HLSCppEstimator::reverseSchedule() {
|
|||
void HLSCppEstimator::estimateFunc() {
|
||||
// Recursively estimate blocks in the function.
|
||||
if (auto schedule = estimateBlock(func.front(), 0)) {
|
||||
auto latency = schedule.getValue();
|
||||
auto latency = schedule.getValue().second;
|
||||
setAttrValue(func, "latency", latency);
|
||||
|
||||
// Scheduled levels of all operations are reversed in this method, because
|
||||
// we have done the ALAP scheduling in a reverse order.
|
||||
// we have done the ALAP scheduling in a reverse order. Note that after the
|
||||
// reverse, the annotated scheduling level of each operation is a relative
|
||||
// level of the nearest surrounding AffineForOp or FuncOp.
|
||||
reverseSchedule();
|
||||
} else {
|
||||
// Scheduling failed due to early error.
|
||||
|
@ -706,7 +736,6 @@ static void getLatencyMap(INIReader &spec, std::string freq,
|
|||
latencyMap["fmul"] = spec.GetInteger(freq, "fmul", 3);
|
||||
latencyMap["fdiv"] = spec.GetInteger(freq, "fdiv", 15);
|
||||
latencyMap["fcmp"] = spec.GetInteger(freq, "fcmp", 1);
|
||||
latencyMap["fselect"] = spec.GetInteger(freq, "fselect", 0);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -725,10 +754,17 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
|||
getLatencyMap(spec, freq, latencyMap);
|
||||
|
||||
// Estimate performance and resource utilization.
|
||||
for (auto func : getOperation().getOps<FuncOp>()) {
|
||||
for (auto func : getOperation().getOps<FuncOp>())
|
||||
if (auto topFunction = func.getAttrOfType<BoolAttr>("top_function"))
|
||||
if (topFunction.getValue()) {
|
||||
// Estimate the top function. If any other functions are called by the
|
||||
// top function, it will be estimated in the procedure of estimating
|
||||
// the top function.
|
||||
HLSCppEstimator estimator(func, latencyMap);
|
||||
estimator.estimateFunc();
|
||||
}
|
||||
|
||||
// TODO: Somehow print the estimation report?
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
|
|
@ -119,14 +119,25 @@ hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) {
|
|||
}
|
||||
|
||||
/// Collect all load and store operations in the block.
|
||||
void scalehls::getLoadStoresMap(Block &block, LoadStoresMap &map) {
|
||||
void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
|
||||
bool includeCalls) {
|
||||
for (auto &op : block) {
|
||||
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
|
||||
map[MemRefAccess(&op).memref].push_back(&op);
|
||||
else if (op.getNumRegions()) {
|
||||
|
||||
else if (includeCalls && isa<CallOp>(op)) {
|
||||
// All CallOps accessing the memory will be pushed back to the map.
|
||||
for (auto operand : op.getOperands())
|
||||
if (operand.getType().isa<MemRefType>()) {
|
||||
map[operand].push_back(&op);
|
||||
break;
|
||||
}
|
||||
|
||||
} else if (op.getNumRegions()) {
|
||||
// Recursively collect memory access operations in each block.
|
||||
for (auto ®ion : op.getRegions())
|
||||
for (auto &block : region)
|
||||
getLoadStoresMap(block, map);
|
||||
getMemAccessesMap(block, map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1445,6 +1445,7 @@ void ModuleEmitter::emitFunction(FuncOp func) {
|
|||
emitError(func, "has zero or more than one basic blocks.");
|
||||
|
||||
if (auto top = func.getAttrOfType<BoolAttr>("top_function"))
|
||||
if (top.getValue())
|
||||
os << "/// This is top function.\n";
|
||||
|
||||
if (auto latency = func.getAttrOfType<IntegerAttr>("latency"))
|
||||
|
|
|
@ -32,7 +32,7 @@ static mlir::AffineForOp getPipelineLoop(mlir::AffineForOp root) {
|
|||
}
|
||||
|
||||
template <typename OpType>
|
||||
static void applyArrayPartition(LoadStoresMap &map, OpBuilder &builder) {
|
||||
static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) {
|
||||
for (auto pair : map) {
|
||||
auto arrayOp = getArrayOp(pair.first);
|
||||
auto arrayShape = arrayOp.getShapedType().getShape();
|
||||
|
@ -118,12 +118,12 @@ void ArrayPartition::runOnOperation() {
|
|||
// TODO: support imperfect loop.
|
||||
if (auto outermost = getPipelineLoop(forOp)) {
|
||||
// Collect memory access information.
|
||||
LoadStoresMap loadMap;
|
||||
MemAccessesMap loadMap;
|
||||
outermost.walk([&](mlir::AffineLoadOp loadOp) {
|
||||
loadMap[loadOp.getMemRef()].push_back(loadOp);
|
||||
});
|
||||
|
||||
LoadStoresMap storeMap;
|
||||
MemAccessesMap storeMap;
|
||||
outermost.walk([&](mlir::AffineStoreOp storeOp) {
|
||||
storeMap[storeOp.getMemRef()].push_back(storeOp);
|
||||
});
|
||||
|
|
|
@ -24,8 +24,8 @@ void SimplifyMemRefAccess::runOnOperation() {
|
|||
auto func = getOperation();
|
||||
|
||||
// Collect all load and store operations in the function block.
|
||||
LoadStoresMap map;
|
||||
getLoadStoresMap(func.front(), map);
|
||||
MemAccessesMap map;
|
||||
getMemAccessesMap(func.front(), map);
|
||||
|
||||
for (auto pair : map) {
|
||||
auto loadStores = pair.second;
|
||||
|
|
Loading…
Reference in New Issue