[mlir][linalg] Support padding LinalgOps in use-def chain.

Previously, only LinalgOps whose operands are defined by an ExtractSliceOp could be padded. The revision supports walking a use-def chain of LinalgOps to find an ExtractSliceOp.

Reviewed By: hanchung

Differential Revision: https://reviews.llvm.org/D122116
This commit is contained in:
gysit 2022-03-24 10:28:20 +00:00
parent 6872c8bdc4
commit b1b57f8104
3 changed files with 115 additions and 82 deletions

View File

@ -182,9 +182,16 @@ static LogicalResult padOperandToSmallestStaticBoundingBox(
if (failed(paddingValue))
return failure(hasDynamicShape);
// Cannot construct a static bounding box if the operand is not defined by an
// ExtractSliceOp.
auto sliceOp = opOperand->get().getDefiningOp<tensor::ExtractSliceOp>();
// Follow the use-def chain if `currOpOperand` is defined by a LinalgOp.
OpOperand *currOpOperand = opOperand;
while (auto linalgOp = currOpOperand->get().getDefiningOp<LinalgOp>()) {
OpResult result = currOpOperand->get().cast<OpResult>();
currOpOperand = linalgOp.getOutputOperand(result.getResultNumber());
}
// Cannot construct a static bounding box if the `currOpOperand` is not
// defined by an ExtractSliceOp.
auto sliceOp = currOpOperand->get().getDefiningOp<tensor::ExtractSliceOp>();
if (!sliceOp)
return failure(hasDynamicShape);

View File

@ -1,10 +1,11 @@
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=MATMUL
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=FILL
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 pad-inputs-only run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=MATMUL
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad pack-paddings=1,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad pack-paddings=1,0 run-enable-pass=false" -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL-MATMUL
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 pad-inputs-only run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY
// MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 12, 7)>
// MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)>
#map = affine_map<()[s0] -> (7, -s0 + 12)>
#map = affine_map<()[s0] -> (-s0 + 12, 7)>
// MATMUL: static_sizes_output_divisible
// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>
@ -13,10 +14,10 @@
// MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index
// MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index
// MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index
func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
func.func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
// MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index
// MATMUL: %[[TS2:.*]] = affine.min #[[MAP0]]()[%[[IV2]]]
@ -43,24 +44,24 @@ func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>,
// MATMUL: %[[T6:.*]] = tensor.insert_slice %[[T5]]
%4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
%5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
return %5 : tensor<24x25xf32>
func.return %5 : tensor<24x25xf32>
}
// -----
// MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 25, 7)>
// MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)>
#map = affine_map<()[s0] -> (7, -s0 + 25)>
#map = affine_map<()[s0] -> (-s0 + 25, 7)>
// MATMUL: static_sizes_input_divisible
// MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>
// MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index
// MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index
// MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index
func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
func.func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
// MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index
%3 = tensor.extract_slice %arg0[%iv0, %iv2] [4, 6] [1, 1] : tensor<24x12xf32> to tensor<4x6xf32>
@ -86,7 +87,7 @@ func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>,
%8 = tensor.insert_slice %7 into %arg2[%iv0, %iv1] [4, %4] [1, 1] : tensor<4x?xf32> into tensor<24x25xf32>
// MATMUL: return %[[T4]]
return %8 : tensor<24x25xf32>
func.return %8 : tensor<24x25xf32>
}
// -----
@ -97,9 +98,9 @@ func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>,
// MATMUL-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 5)>
// MATMUL-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 6)>
#map0 = affine_map<()[s0, s1] -> (5, -s0 + s1)>
#map1 = affine_map<()[s0, s1] -> (6, -s0 + s1)>
#map2 = affine_map<()[s0, s1] -> (7, -s0 + s1)>
#map0 = affine_map<()[s0, s1] -> (-s0 + s1, 5)>
#map1 = affine_map<()[s0, s1] -> (-s0 + s1, 6)>
#map2 = affine_map<()[s0, s1] -> (-s0 + s1, 7)>
// MATMUL: dynamic_sizes
// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<?x?xf32>
@ -108,10 +109,10 @@ func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>,
// MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index
// MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index
// MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index
func @dynamic_sizes(%arg0: tensor<?x?xf32>,
%arg1: tensor<?x?xf32>,
%arg2: tensor<?x?xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<?x?xf32> {
func.func @dynamic_sizes(%arg0: tensor<?x?xf32>,
%arg1: tensor<?x?xf32>,
%arg2: tensor<?x?xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<?x?xf32> {
// MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index
// MATMUL-DAG: %[[C1:.*]] = arith.constant 1
%c1 = arith.constant 1 : index
@ -156,29 +157,53 @@ func @dynamic_sizes(%arg0: tensor<?x?xf32>,
%13 = tensor.insert_slice %12 into %arg2[%iv0, %iv1] [%6, %9] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
// MATMUL: return %[[T8]]
return %13 : tensor<?x?xf32>
func.return %13 : tensor<?x?xf32>
}
// -----
#map0 = affine_map<()[s0] -> (64, s0)>
// FILL: pad_multiple
// FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32>
func @pad_multiple(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
// FILL-MATMUL: pad_multiple
// FILL-MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32>
func.func @pad_multiple(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 0.0 : f32
%size = affine.min #map0()[%iv0]
// FILL-MATMUL: %[[T0:.*]] = tensor.extract_slice
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
// Check the two operations are padded by the same pad tensor operation.
// FILL-MATMUL: %[[T1:.*]] = tensor.pad %[[T0]]
// FILL-MATMUL: %[[T2:.*]] = linalg.fill {{.*}} outs(%[[T1]]
// FILL-MATMUL: %[[T3:.*]] = linalg.matmul {{.*}} outs(%[[T2]]
// FILL-MATMUL: = tensor.extract_slice %[[T3]]
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%2 = linalg.matmul ins(%0, %0 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%1 : tensor<?x?xf32>) -> tensor<?x?xf32>
func.return %2 : tensor<?x?xf32>
}
// -----
#map0 = affine_map<()[s0] -> (64, s0)>
// MATMUL: pad_chain
// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32>
func.func @pad_chain(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 0.0 : f32
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
// Check both fill operations are padded by the same pad tensor operation.
// FILL: %[[T0:.*]] = tensor.pad
// FILL: %[[T1:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T0]]
// FILL: %[[T2:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T1]]
// FILL: = tensor.extract_slice %[[T2]]
// Check the matmul at the end of the use-def chain is padded.
// MATMUL: %[[T0:.*]] = linalg.fill
// MATMUL: %[[T1:.*]] = tensor.pad %[[T0]]
// MATMUL: %[[T2:.*]] = linalg.matmul {{.*}} outs(%[[T1]]
// MATMUL: = tensor.extract_slice %[[T2]]
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %2 : tensor<?x?xf32>
%2 = linalg.matmul ins(%0, %0 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%1 : tensor<?x?xf32>) -> tensor<?x?xf32>
func.return %2 : tensor<?x?xf32>
}
// -----
@ -187,8 +212,8 @@ func @pad_multiple(%arg0: tensor<64x64xf32>,
// MATMUL: compose_padding
// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32>
func @compose_padding(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
func.func @compose_padding(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 0.0 : f32
// MATMUL: %[[SIZE:.*]] = affine.min
@ -220,7 +245,7 @@ func @compose_padding(%arg0: tensor<64x64xf32>,
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
// MATMUL: return %[[T5]]
return %5 : tensor<?x?xf32>
func.return %5 : tensor<?x?xf32>
}
// -----
@ -228,8 +253,8 @@ func @compose_padding(%arg0: tensor<64x64xf32>,
#map0 = affine_map<()[s0] -> (64, s0)>
// MATMUL: different_padding_values
func @different_padding_values(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
func.func @different_padding_values(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 42.0 : f32
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
@ -245,7 +270,7 @@ func @different_padding_values(%arg0: tensor<64x64xf32>,
// MATMUL: = tensor.pad
// MATMUL: = linalg.matmul
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %5 : tensor<?x?xf32>
func.return %5 : tensor<?x?xf32>
}
// -----
@ -253,8 +278,8 @@ func @different_padding_values(%arg0: tensor<64x64xf32>,
#map0 = affine_map<()[s0] -> (64, s0)>
// MATMUL: different_padding_dynamic_sizes
func @different_padding_dynamic_sizes(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
func.func @different_padding_dynamic_sizes(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 0.0 : f32
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%iv0, %iv0] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
@ -270,7 +295,7 @@ func @different_padding_dynamic_sizes(%arg0: tensor<64x64xf32>,
// MATMUL: = tensor.pad
// MATMUL: = linalg.matmul
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %5 : tensor<?x?xf32>
func.return %5 : tensor<?x?xf32>
}
// -----
@ -278,8 +303,8 @@ func @different_padding_dynamic_sizes(%arg0: tensor<64x64xf32>,
#map0 = affine_map<()[s0] -> (64, s0)>
// MATMUL: different_padding_dynamic_rank
func @different_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>,
%iv0 : index) -> tensor<?x?xf32> {
func.func @different_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 0.0 : f32
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0, 0] [%size, %size, 1] [1, 1, 1] : tensor<64x64x1xf32> to tensor<?x?xf32>
@ -295,7 +320,7 @@ func @different_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>,
// MATMUL: = tensor.pad
// MATMUL: = linalg.matmul
%4 = linalg.matmul ins(%3, %3 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%3 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %4 : tensor<?x?xf32>
func.return %4 : tensor<?x?xf32>
}
// -----
@ -303,8 +328,8 @@ func @different_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>,
#map0 = affine_map<()[s0] -> (64, s0)>
// MATMUL: different_padding_static_sizes
func @different_padding_static_sizes(%arg0: tensor<62x62xf32>,
%iv0 : index) -> tensor<?x?xf32> {
func.func @different_padding_static_sizes(%arg0: tensor<62x62xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 0.0 : f32
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor<?x?xf32>
@ -320,7 +345,7 @@ func @different_padding_static_sizes(%arg0: tensor<62x62xf32>,
// MATMUL: = tensor.pad
// MATMUL: = linalg.matmul
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %5 : tensor<?x?xf32>
func.return %5 : tensor<?x?xf32>
}
// -----
@ -330,9 +355,9 @@ func @different_padding_static_sizes(%arg0: tensor<62x62xf32>,
// FILL: scalar_operand
// FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: f32
// FILL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<24x12xf32>
func @scalar_operand(%arg0: f32,
%arg1: tensor<24x12xf32>,
%iv0 : index) -> tensor<24x12xf32> {
func.func @scalar_operand(%arg0: f32,
%arg1: tensor<24x12xf32>,
%iv0 : index) -> tensor<24x12xf32> {
%0 = affine.min #map0()[%iv0]
// FILL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]]
@ -343,7 +368,7 @@ func @scalar_operand(%arg0: f32,
// FILL: %[[T6:.*]] = linalg.fill ins(%[[ARG0]]{{.*}}outs(%[[T1]]
%2 = linalg.fill ins(%arg0 : f32) outs(%1 : tensor<4x?xf32>) -> tensor<4x?xf32>
%3 = tensor.insert_slice %2 into %arg1[0, 0] [4, %0] [1, 1] : tensor<4x?xf32> into tensor<24x12xf32>
return %3 : tensor<24x12xf32>
func.return %3 : tensor<24x12xf32>
}
// -----
@ -352,10 +377,10 @@ func @scalar_operand(%arg0: f32,
// MATMUL: static_extract_slice_missing
// MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<4x5xf32>,
func @static_extract_slice_missing(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<4x5xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<4x5xf32> {
func.func @static_extract_slice_missing(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<4x5xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<4x5xf32> {
%0 = affine.min #map0()[%iv2]
%1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
%2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
@ -366,7 +391,7 @@ func @static_extract_slice_missing(%arg0: tensor<24x12xf32>,
// MATMUL: = linalg.matmul ins(%[[T0]], %[[T1]]
// MATMUL-SAME: outs(%[[ARG2]]
%3 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%arg2 : tensor<4x5xf32>) -> tensor<4x5xf32>
return %3 : tensor<4x5xf32>
func.return %3 : tensor<4x5xf32>
}
// -----
@ -377,10 +402,10 @@ func @static_extract_slice_missing(%arg0: tensor<24x12xf32>,
// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<4x?xf32>,
// MATMUL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>,
// MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>,
func @dynamic_extract_slice_missing(%arg0: tensor<4x?xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
func.func @dynamic_extract_slice_missing(%arg0: tensor<4x?xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
%0 = affine.min #map0()[%iv2]
// MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]]
@ -393,7 +418,7 @@ func @dynamic_extract_slice_missing(%arg0: tensor<4x?xf32>,
// MATMUL-SAME: outs(%[[T1]]
%4 = linalg.matmul ins(%arg0, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
%5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
return %5 : tensor<24x25xf32>
func.return %5 : tensor<24x25xf32>
}
// -----
@ -402,10 +427,10 @@ func @dynamic_extract_slice_missing(%arg0: tensor<4x?xf32>,
// INPUTS-ONLY: static_input_padding_only
// INPUTS-ONLY-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>,
func @static_input_padding_only(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
func.func @static_input_padding_only(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
%0 = affine.min #map0()[%iv2]
%1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
%2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
@ -420,7 +445,7 @@ func @static_input_padding_only(%arg0: tensor<24x12xf32>,
// INPUTS-ONLY-SAME: outs(%[[T0]]
%4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
%5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
return %5 : tensor<24x25xf32>
func.return %5 : tensor<24x25xf32>
}
// -----
@ -431,10 +456,10 @@ func @static_input_padding_only(%arg0: tensor<24x12xf32>,
// INPUTS-ONLY-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>,
// INPUTS-ONLY-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>,
// INPUTS-ONLY-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>,
func @dynamic_input_padding_only(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
func.func @dynamic_input_padding_only(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
%0 = affine.min #map0()[%iv2]
// INPUTS-ONLY: %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
@ -449,7 +474,7 @@ func @dynamic_input_padding_only(%arg0: tensor<24x12xf32>,
// INPUTS-ONLY-SAME: outs(%[[T2]]
%4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x?xf32>) outs(%3 : tensor<4x?xf32>) -> tensor<4x?xf32>
%5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, %0] [1, 1] : tensor<4x?xf32> into tensor<24x25xf32>
return %5 : tensor<24x25xf32>
func.return %5 : tensor<24x25xf32>
}
// -----
@ -458,8 +483,8 @@ func @dynamic_input_padding_only(%arg0: tensor<24x12xf32>,
// FILL: rank_reducing
// FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<1x64x1x64xf32>
func @rank_reducing(%arg0: tensor<1x64x1x64xf32>,
%iv0 : index) -> tensor<1x?x?xf32> {
func.func @rank_reducing(%arg0: tensor<1x64x1x64xf32>,
%iv0 : index) -> tensor<1x?x?xf32> {
%cst = arith.constant 0.0 : f32
%size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0, 0, 0] [1, %size, 1, %size] [1, 1, 1, 1] : tensor<1x64x1x64xf32> to tensor<1x?x?xf32>
@ -470,5 +495,5 @@ func @rank_reducing(%arg0: tensor<1x64x1x64xf32>,
// FILL-SAME: tensor<1x64x64xf32>
// FILL: = tensor.extract_slice %[[T1]]
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32>
return %1 : tensor<1x?x?xf32>
func.return %1 : tensor<1x?x?xf32>
}

View File

@ -184,6 +184,7 @@ void TestLinalgCodegenStrategy::runStrategy(
LinalgPaddingOptions paddingOptions,
vector::VectorContractLowering vectorContractLowering,
vector::VectorTransferSplit vectorTransferSplit) {
std::string anchorOpNameOrWildcard = fuse ? "" : anchorOpName.getValue();
CodegenStrategy strategy;
strategy
.tileAndFuseIf(fuse && !tileSizes.empty(), anchorOpName,
@ -199,11 +200,11 @@ void TestLinalgCodegenStrategy::runStrategy(
LinalgPromotionOptions()
.setAlignment(16)
.setUseFullTileBuffersByDefault(registerPromoteFullTile))
.padIf(pad, "", std::move(paddingOptions))
.padIf(pad, anchorOpNameOrWildcard, std::move(paddingOptions))
.decomposeIf(decompose)
.generalizeIf(generalize, "")
.generalizeIf(generalize, anchorOpNameOrWildcard)
.interchangeIf(!iteratorInterchange.empty(), iteratorInterchange)
.vectorizeIf(vectorize, "", nullptr, vectorizePadding)
.vectorizeIf(vectorize, anchorOpNameOrWildcard, nullptr, vectorizePadding)
.vectorLowering(
LinalgVectorLoweringOptions()
.setVectorTransformsOptions(