[FuncPipelining] comprehensively support function pipelining in emitter, estimator, and array partition pass
This commit is contained in:
parent
5bf4d7943e
commit
9cc5f3abdc
|
@ -84,6 +84,7 @@ public:
|
|||
/// AffineForOp related methods.
|
||||
// unsigned getOpMinII(AffineForOp forOp);
|
||||
int64_t getResMinII(MemAccessesMap &map);
|
||||
int64_t getDepMinII(FuncOp func, MemAccessesMap &map);
|
||||
int64_t getDepMinII(AffineForOp forOp, MemAccessesMap &map);
|
||||
bool visitOp(AffineForOp op, int64_t begin);
|
||||
|
||||
|
@ -98,6 +99,7 @@ public:
|
|||
return true; \
|
||||
}
|
||||
HANDLE(AddFOp, "fadd");
|
||||
HANDLE(SubFOp, "fadd");
|
||||
HANDLE(MulFOp, "fmul");
|
||||
HANDLE(DivFOp, "fdiv");
|
||||
HANDLE(CmpFOp, "fcmp");
|
||||
|
|
|
@ -322,7 +322,39 @@ int64_t HLSCppEstimator::getResMinII(MemAccessesMap &map) {
|
|||
return II;
|
||||
}
|
||||
|
||||
/// Calculate the minimum dependency II.
|
||||
/// Calculate the minimum dependency II of function.
|
||||
int64_t HLSCppEstimator::getDepMinII(FuncOp func, MemAccessesMap &map) {
|
||||
int64_t II = 1;
|
||||
|
||||
for (auto &pair : map) {
|
||||
auto loadStores = pair.second;
|
||||
|
||||
// Walk through each pair of source and destination. Note that here dstOp is
|
||||
// always before srcOp.
|
||||
int64_t dstIndex = 1;
|
||||
for (auto dstOp : loadStores) {
|
||||
for (auto srcOp : llvm::drop_begin(loadStores, dstIndex)) {
|
||||
// We ignore RAR pairs.
|
||||
if (isa<AffineReadOpInterface>(dstOp) &&
|
||||
isa<AffineReadOpInterface>(srcOp))
|
||||
continue;
|
||||
|
||||
if (MemRefAccess(dstOp) == MemRefAccess(srcOp)) {
|
||||
float delay = getIntAttrValue(dstOp, "schedule_end") -
|
||||
getIntAttrValue(srcOp, "schedule_begin");
|
||||
|
||||
// Distance is always 1. Therefore, the minimum II is equal to delay.
|
||||
int64_t minII = delay;
|
||||
II = max(II, minII);
|
||||
}
|
||||
}
|
||||
dstIndex++;
|
||||
}
|
||||
}
|
||||
return II;
|
||||
}
|
||||
|
||||
/// Calculate the minimum dependency II of loop.
|
||||
int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
|
||||
int64_t II = 1;
|
||||
|
||||
|
@ -344,8 +376,7 @@ int64_t HLSCppEstimator::getDepMinII(AffineForOp forOp, MemAccessesMap &map) {
|
|||
auto loadStores = pair.second;
|
||||
|
||||
// Walk through each pair of source and destination, and each loop level
|
||||
// that are pipelined. Note that for inter-dependency, dstOp is always
|
||||
// before srcOp.
|
||||
// that are pipelined. Note that here dstOp is always before srcOp.
|
||||
for (unsigned loopDepth = startLevel; loopDepth <= endLevel; ++loopDepth) {
|
||||
int64_t dstIndex = 1;
|
||||
for (auto dstOp : loadStores) {
|
||||
|
@ -545,7 +576,7 @@ int64_t HLSCppEstimator::getResourceMap(Block &block, ResourceMap &addFMap,
|
|||
auto end = getIntAttrValue(&op, "schedule_end");
|
||||
|
||||
// Accumulate the resource utilization of each operation.
|
||||
if (isa<AddFOp>(op))
|
||||
if (isa<AddFOp, SubFOp>(op))
|
||||
for (unsigned i = begin; i < end; ++i)
|
||||
addFMap[i]++;
|
||||
|
||||
|
@ -682,6 +713,21 @@ void HLSCppEstimator::estimateFunc() {
|
|||
auto latency = schedule.getValue().second;
|
||||
setAttrValue(func, "latency", latency);
|
||||
|
||||
// TODO: support dataflow interval estimation.
|
||||
|
||||
// TODO: support CallOp inside of the function.
|
||||
if (auto attr = func.getAttrOfType<BoolAttr>("pipeline")) {
|
||||
if (attr.getValue()) {
|
||||
// Collect all memory access operations for calculating II.
|
||||
MemAccessesMap map;
|
||||
getMemAccessesMap(func.front(), map);
|
||||
|
||||
// Calculate initial interval.
|
||||
auto II = max(getResMinII(map), getDepMinII(func, map));
|
||||
setAttrValue(func, "interval", II);
|
||||
}
|
||||
}
|
||||
|
||||
// Scheduled levels of all operations are reversed in this method, because
|
||||
// we have done the ALAP scheduling in a reverse order. Note that after the
|
||||
// reverse, the annotated scheduling level of each operation is a relative
|
||||
|
|
|
@ -1434,6 +1434,16 @@ void ModuleEmitter::emitFunctionPragmas(FuncOp func, ArrayRef<Value> portList) {
|
|||
}
|
||||
}
|
||||
|
||||
if (auto pipeline = func.getAttrOfType<BoolAttr>("pipeline")) {
|
||||
if (pipeline.getValue()) {
|
||||
indent();
|
||||
os << "#pragma HLS pipeline\n";
|
||||
|
||||
// An empty line.
|
||||
os << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Only top function should emit interface pragmas.
|
||||
if (auto topFunction = func.getAttrOfType<BoolAttr>("top_function")) {
|
||||
if (topFunction.getValue()) {
|
||||
|
@ -1489,8 +1499,12 @@ void ModuleEmitter::emitFunction(FuncOp func) {
|
|||
if (top.getValue())
|
||||
os << "/// This is top function.\n";
|
||||
|
||||
if (auto latency = func.getAttrOfType<IntegerAttr>("latency"))
|
||||
os << "/// Latency=" << latency.getInt() << "\n";
|
||||
if (auto latency = func.getAttrOfType<IntegerAttr>("latency")) {
|
||||
os << "/// Latency=" << latency.getInt();
|
||||
if (auto interval = func.getAttrOfType<IntegerAttr>("interval"))
|
||||
os << ", interval=" << interval.getInt();
|
||||
os << "\n";
|
||||
}
|
||||
|
||||
if (auto dsp = func.getAttrOfType<IntegerAttr>("dsp"))
|
||||
os << "/// DSP=" << dsp.getInt() << "\n";
|
||||
|
|
|
@ -25,22 +25,32 @@ struct ArrayPartition : public ArrayPartitionBase<ArrayPartition> {
|
|||
} // namespace
|
||||
|
||||
bool scalehls::applyArrayPartition(FuncOp func, OpBuilder &builder) {
|
||||
// Only memory accesses in pipelined loops will be executed in parallel.
|
||||
SmallVector<AffineForOp, 4> pipelinedLoops;
|
||||
func.walk([&](AffineForOp loop) {
|
||||
if (auto attr = loop.getAttrOfType<BoolAttr>("pipeline"))
|
||||
if (attr.getValue())
|
||||
pipelinedLoops.push_back(loop);
|
||||
});
|
||||
// Check whether the input function is pipelined.
|
||||
bool funcPipeline = false;
|
||||
if (auto attr = func.getAttrOfType<BoolAttr>("pipeline"))
|
||||
if (attr.getValue())
|
||||
funcPipeline = true;
|
||||
|
||||
// Only memory accesses in pipelined loops or function will be executed in
|
||||
// parallel and required to partition.
|
||||
SmallVector<Block *, 4> pipelinedBlocks;
|
||||
if (funcPipeline)
|
||||
pipelinedBlocks.push_back(&func.front());
|
||||
else
|
||||
func.walk([&](AffineForOp loop) {
|
||||
if (auto attr = loop.getAttrOfType<BoolAttr>("pipeline"))
|
||||
if (attr.getValue())
|
||||
pipelinedBlocks.push_back(&loop.getLoopBody().front());
|
||||
});
|
||||
|
||||
// Storing the partition information of each memref.
|
||||
using PartitionInfo = std::pair<PartitionKind, int64_t>;
|
||||
DenseMap<Value, SmallVector<PartitionInfo, 4>> partitionsMap;
|
||||
|
||||
// Traverse all pipelined loops.
|
||||
for (auto loop : pipelinedLoops) {
|
||||
for (auto block : pipelinedBlocks) {
|
||||
MemAccessesMap accessesMap;
|
||||
getMemAccessesMap(loop.getLoopBody().front(), accessesMap);
|
||||
getMemAccessesMap(*block, accessesMap);
|
||||
|
||||
for (auto pair : accessesMap) {
|
||||
auto memref = pair.first;
|
||||
|
|
|
@ -38,6 +38,7 @@ bool scalehls::applyFuncPipelining(FuncOp func, OpBuilder &builder) {
|
|||
func.walk([&](AffineForOp loop) { loopUnrollFull(loop); });
|
||||
|
||||
func.setAttr("pipeline", builder.getBoolAttr(true));
|
||||
func.setAttr("dataflow", builder.getBoolAttr(false));
|
||||
|
||||
// For now, this method will always success.
|
||||
return true;
|
||||
|
|
|
@ -57,7 +57,8 @@ void PartialAffineLoopTile::runOnOperation() {
|
|||
else
|
||||
permMap.push_back(i - realTileLevel);
|
||||
}
|
||||
permuteLoops(nestedLoops, permMap);
|
||||
if (isValidLoopInterchangePermutation(nestedLoops, permMap))
|
||||
permuteLoops(nestedLoops, permMap);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,15 +2,15 @@
|
|||
|
||||
// CHECK: module {
|
||||
#map = affine_map<(d0) -> (d0 + 1)>
|
||||
func @test_syrk(%alpha: f32, %beta: f32, %A: memref<16x8xf32>, %C: memref<16x16xf32>) {
|
||||
func @test_syrk(%alpha: f32, %beta: f32, %A: memref<16x16xf32>, %C: memref<16x16xf32>) {
|
||||
affine.for %i = 0 to 16 {
|
||||
affine.for %j = 0 to #map(%i) {
|
||||
%0 = affine.load %C[%i, %j] : memref<16x16xf32>
|
||||
%1 = mulf %beta, %0 : f32
|
||||
affine.store %1, %C[%i, %j] : memref<16x16xf32>
|
||||
affine.for %k = 0 to 8 {
|
||||
%2 = affine.load %A[%i, %k] : memref<16x8xf32>
|
||||
%3 = affine.load %A[%j, %k] : memref<16x8xf32>
|
||||
%2 = affine.load %A[%i, %k] : memref<16x16xf32>
|
||||
%3 = affine.load %A[%j, %k] : memref<16x16xf32>
|
||||
%4 = affine.load %C[%i, %j] : memref<16x16xf32>
|
||||
%5 = mulf %alpha, %2 : f32
|
||||
%6 = mulf %5, %3 : f32
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
// RUN: scalehls-opt -func-pipelining %s | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: func @test_for
|
||||
func @test_for() {
|
||||
return
|
||||
}
|
Loading…
Reference in New Issue