[LoopPerfection] Fix the issue when the loop contains local buffer allocs

This commit is contained in:
Hanchen Ye 2022-02-02 15:02:03 -06:00
parent ad965874c5
commit 97c33324b8
2 changed files with 22 additions and 8 deletions

View File

@ -26,6 +26,16 @@ bool scalehls::applyAffineLoopPerfection(AffineLoopBand &band) {
for (auto &op : llvm::make_early_inc_range(loop.getOps())) {
if (&op == childLoop)
break;
// Any operations that generate memrefs should be promoted out of the loop
// nest. If the operation has more than one results, return false.
if (llvm::any_of(op.getResultTypes(),
[](Type type) { return type.isa<MemRefType>(); })) {
if (op.getNumResults() != 1)
return false;
op.moveBefore(band.front());
continue;
}
// If any user of prefix operations is in the child loop, we need to
// buffer the result in a memory on stack such that the users can fetch
// the correct data from the stack.

View File

@ -2,18 +2,20 @@
module {
func @test_gemm(%arg0: f32, %arg1: f32, %arg2: memref<32x32xf32>, %arg3: memref<32x32xf32>, %arg4: memref<32x32xf32>) {
// CHECK: %0 = memref.alloc() : memref<1xf32>
// CHECK: %0 = memref.alloc() : memref<2xf32>
// CHECK: %1 = memref.alloc() : memref<1xf32>
affine.for %arg5 = 0 to 32 {
affine.for %arg6 = 0 to 32 {
%buf = memref.alloc() : memref<2xf32>
// CHECK-NOT: %0 = affine.load %arg2[%arg5, %arg6] : memref<32x32xf32>
// CHECK-NOT: %1 = arith.mulf %0, %arg0 : f32
%0 = affine.load %arg2[%arg5, %arg6] : memref<32x32xf32>
%1 = arith.mulf %0, %arg0 : f32
affine.for %arg7 = 0 to 32 {
// CHECK: %1 = affine.load %arg2[%arg5, %arg6] : memref<32x32xf32>
// CHECK: %2 = arith.mulf %1, %arg0 : f32
// CHECK: %2 = affine.load %arg2[%arg5, %arg6] : memref<32x32xf32>
// CHECK: %3 = arith.mulf %2, %arg0 : f32
// CHECK: affine.if #set0(%arg7) {
// CHECK: affine.store %2, %0[0] : memref<1xf32>
// CHECK: affine.store %3, %1[0] : memref<1xf32>
// CHECK: }
%4 = affine.load %arg3[%arg5, %arg7] : memref<32x32xf32>
%5 = arith.mulf %1, %4 : f32
@ -21,11 +23,13 @@ module {
%7 = arith.mulf %5, %6 : f32
%8 = affine.load %arg2[%arg5, %arg6] : memref<32x32xf32>
%9 = arith.addf %8, %7 : f32
affine.store %9, %arg2[%arg5, %arg6] : memref<32x32xf32>
// CHECK: %10 = affine.load %arg2[%arg5, %arg6] : memref<32x32xf32>
// CHECK: %11 = arith.addf %10, %2 : f32
%10 = affine.load %buf[0] : memref<2xf32>
%11 = arith.addf %10, %9 : f32
affine.store %11, %arg2[%arg5, %arg6] : memref<32x32xf32>
// CHECK: %13 = affine.load %arg2[%arg5, %arg6] : memref<32x32xf32>
// CHECK: %14 = arith.addf %13, %3 : f32
// CHECK: affine.if #set1(%arg7) {
// CHECK: affine.store %11, %arg2[%arg5, %arg6] : memref<32x32xf32>
// CHECK: affine.store %14, %arg2[%arg5, %arg6] : memref<32x32xf32>
// CHECK: }
}
// CHECK-NOT: %2 = affine.load %arg2[%arg5, %arg6] : memref<32x32xf32>