[mlir][linalg][bufferize][NFC] Use RewritePatterns instead of custom traversal

This change simplifies BufferizableOpInterface and other functions. Overall, the API will get smaller: Functions related to custom IR traversal are deleted entirely. This will makes it easier to write BufferizableOpInterface implementations.

This is also in preparation of unifying Comprehensive Bufferize and core bufferization. While Comprehensive Bufferize could theoretically maintain its own IR traversal, there is no reason to do so, because all bufferize implementations in BufferizableOpInterface have to support partial bufferization anyway. And we can share a larger part of the code base between the two bufferizations.

Differential Revision: https://reviews.llvm.org/D116448
This commit is contained in:
Matthias Springer 2022-01-07 00:50:21 +09:00
parent f558acf492
commit 15c7e3ee15
12 changed files with 116 additions and 168 deletions

View File

@ -443,20 +443,6 @@ private:
const BufferizationOptions &options;
};
/// Bufferize all ops in the given region.
LogicalResult bufferize(RewriterBase &rewriter, Region *region,
const BufferizationState &state);
/// Bufferize all ops in the given block.
LogicalResult bufferize(RewriterBase &rewriter, Block *block,
const BufferizationState &state);
/// Bufferize the given op. If the op has no tensor OpOperands/OpResults, this
/// function returns immediately. Otherwise, it calls the `bufferize` interface
/// method of `BufferizableOpInterface`.
LogicalResult bufferize(RewriterBase &rewriter, Operation *op,
const BufferizationState &state);
/// Return a contiguous MemRefType (i.e. with canonical/empty layout map)
/// with the same shape as `shapedType` and specified `layout` and
/// `addressSpace`.
@ -529,17 +515,7 @@ struct AllocationHoistingBarrierOnly
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationState &state) const {
auto isaTensor = [](Type t) { return t.isa<TensorType>(); };
if (any_of(op->getOperandTypes(), isaTensor) ||
any_of(op->getResultTypes(), isaTensor))
if (!state.getOptions().allowUnknownOps)
return op->emitError() << "unsupported op with tensors";
for (Region &region : op->getRegions())
if (failed(comprehensive_bufferize::bufferize(rewriter, &region, state)))
return failure();
return success();
return failure();
}
bool isAllocationHoistingBarrier(Operation *op) const { return true; }

View File

@ -452,59 +452,6 @@ void mlir::linalg::comprehensive_bufferize::BufferizationState::replaceOp(
rewriter.eraseOp(op);
}
LogicalResult mlir::linalg::comprehensive_bufferize::bufferize(
RewriterBase &rewriter, Region *region, const BufferizationState &state) {
for (Block &block : *region)
if (failed(bufferize(rewriter, &block, state)))
return failure();
return success();
}
LogicalResult mlir::linalg::comprehensive_bufferize::bufferize(
RewriterBase &rewriter, Block *block, const BufferizationState &state) {
// Ops may get deleted during the traversal, so do not iterate over `block`
// directly.
SmallVector<Operation *> ops;
ops.reserve(block->getOperations().size());
for (Operation &op : *block)
ops.push_back(&op);
for (Operation *op : ops)
if (failed(bufferize(rewriter, op, state)))
return failure();
return success();
}
LogicalResult mlir::linalg::comprehensive_bufferize::bufferize(
RewriterBase &rewriter, Operation *op, const BufferizationState &state) {
// Check if op has tensor results or operands.
auto isaTensor = [](Type t) { return t.isa<TensorType>(); };
bool hasTensorResult = any_of(op->getResultTypes(), isaTensor);
bool hasTensorOperand = any_of(op->getOperandTypes(), isaTensor);
bool hasRegions = !op->getRegions().empty();
// No tensor results/operands or regions. We are done.
if (!hasTensorResult && !hasTensorOperand && !hasRegions)
return success();
// Bufferize using `BufferizableOpInterface`. Interface implementations are
// responsible for bufferizing nested ops.
if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op)) {
rewriter.setInsertionPoint(op);
return bufferizableOp.bufferize(rewriter, state);
}
// `op` is an unbufferizable tensor op.
if (!state.getOptions().allowUnknownOps)
return op->emitError() << "unsupported op with tensors";
// Bufferize all regions.
for (Region &region : op->getRegions())
if (failed(bufferize(rewriter, &region, state)))
return failure();
return success();
}
//===----------------------------------------------------------------------===//
// Bufferization-specific scoped alloc/dealloc insertion support.
//===----------------------------------------------------------------------===//
@ -657,28 +604,15 @@ Value mlir::linalg::comprehensive_bufferize::BufferizationState::lookupBuffer(
if (auto toTensorOp = tensor.getDefiningOp<bufferization::ToTensorOp>())
return toTensorOp.memref();
if (!isFunctionArgument(tensor)) {
if (static_cast<bool>(options.dynCastBufferizableOp(tensor))) {
// Dump tensor for easier debugging.
tensor.dump();
llvm_unreachable("op is known, but has not been bufferized yet");
return Value();
}
if (!options.allowUnknownOps) {
// Dump tensor for easier debugging.
tensor.dump();
// Note: An assertion should already have failed earlier.
llvm_unreachable("unknown ops are not allowed");
return Value();
}
}
// Insert to_memref op.
OpBuilder::InsertionGuard g(rewriter);
setInsertionPointAfter(rewriter, tensor);
return rewriter.create<bufferization::ToMemrefOp>(
tensor.getLoc(),
getDynamicMemRefType(tensor.getType().cast<RankedTensorType>()), tensor);
Type memrefType =
tensor.getType().isa<RankedTensorType>()
? getDynamicMemRefType(tensor.getType().cast<RankedTensorType>())
: getContiguousOrUnrankedMemRefType(tensor.getType());
return rewriter.create<bufferization::ToMemrefOp>(tensor.getLoc(), memrefType,
tensor);
}
bool mlir::linalg::comprehensive_bufferize::BufferizationState::isInPlace(

View File

@ -54,21 +54,18 @@ struct ToMemrefOpInterface
const BufferizationState &state) const {
auto toMemrefOp = cast<bufferization::ToMemrefOp>(op);
// Fold to_memref(to_tensor(x)) to x.
// Fold to_memref(to_tensor(x)) to x. Insert a cast if necessary.
if (auto toTensorOp =
toMemrefOp.tensor().getDefiningOp<bufferization::ToTensorOp>()) {
rewriter.replaceOp(toMemrefOp, toTensorOp.memref());
Value buffer = toTensorOp.memref();
if (toTensorOp.memref().getType() != toMemrefOp.getType())
buffer = rewriter.create<memref::CastOp>(toMemrefOp.getLoc(), buffer,
toMemrefOp.getType());
rewriter.replaceOp(toMemrefOp, buffer);
return success();
}
// If a ToMemrefOp's tensor operand has not been bufferized yet, the op
// remains unchanged. All IR up to this ToMemrefOp has already been
// bufferized, unless there were unknown ops that could be bufferized.
assert((isFunctionArgument(toMemrefOp.tensor()) ||
state.getOptions().allowUnknownOps) &&
"expected that tensor is mapped");
return success();
return failure();
}
};
@ -87,7 +84,7 @@ struct ToTensorOpInterface
bufferization::ToTensorOp> {
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationState &state) const {
return success();
return failure();
}
bool isWritable(Operation *op, Value value,

View File

@ -92,4 +92,5 @@ add_mlir_dialect_library(MLIRComprehensiveBufferize
MLIRMemRef
MLIRStandard
MLIRStandardOpsTransforms
MLIRTransforms
)

View File

@ -115,6 +115,7 @@
#include "mlir/IR/Dominance.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
@ -547,6 +548,13 @@ static LogicalResult inPlaceAnalysis(SmallVector<Operation *> &ops,
return success();
}
/// Return true if the given op has a tensor result or a tensor operand.
static bool hasTensorSemantics(Operation *op) {
bool hasTensorResult = any_of(op->getResultTypes(), isaTensor);
bool hasTensorOperand = any_of(op->getOperandTypes(), isaTensor);
return hasTensorResult || hasTensorOperand;
}
/// Analyze all ops that are contained in `op`.
static LogicalResult inPlaceAnalysis(Operation *op,
BufferizationAliasInfo &aliasInfo,
@ -557,8 +565,7 @@ static LogicalResult inPlaceAnalysis(Operation *op,
SmallVector<Operation *> ops;
op->walk([&](Operation *op) {
// No tensors => no buffers.
if (none_of(op->getOperandTypes(), isaTensor) &&
none_of(op->getResultTypes(), isaTensor))
if (!hasTensorSemantics(op))
return;
ops.push_back(op);
});
@ -655,6 +662,63 @@ LogicalResult mlir::linalg::comprehensive_bufferize::runComprehensiveBufferize(
return runComprehensiveBufferize(op, *options, state);
}
/// Rewrite pattern that bufferizes bufferizable ops.
struct BufferizationPattern
: public OpInterfaceRewritePattern<BufferizableOpInterface> {
BufferizationPattern(MLIRContext *context, BufferizationState &state,
PatternBenefit benefit = 1)
: OpInterfaceRewritePattern<BufferizableOpInterface>(context, benefit),
state(state) {}
LogicalResult matchAndRewrite(BufferizableOpInterface bufferizableOp,
PatternRewriter &rewriter) const override {
// No tensors => no buffers.
if (!hasTensorSemantics(bufferizableOp.getOperation()))
return failure();
if (!state.getOptions().isOpAllowed(bufferizableOp.getOperation()))
return failure();
return bufferizableOp.bufferize(rewriter, state);
}
private:
const BufferizationState &state;
};
/// Check the result of bufferization. Return an error if an op was not
/// bufferized, unless partial bufferization is allowed.
static LogicalResult
checkBufferizationResult(Operation *op, const BufferizationOptions &options) {
if (!options.allowUnknownOps) {
// Check if all ops were bufferized.
LogicalResult status = success();
op->walk([&](Operation *op) {
if (!hasTensorSemantics(op))
return WalkResult::advance();
// Bufferization dialect ops will canonicalize away if all other ops are
// bufferized.
if (isa<bufferization::ToMemrefOp, bufferization::ToTensorOp>(op))
return WalkResult::advance();
// Ops that are not in the allow list can be ignored.
if (!options.isOpAllowed(op))
return WalkResult::advance();
// Ops without any uses and no side effects will fold away.
if (op->getUses().empty() && MemoryEffectOpInterface::hasNoEffect(op))
return WalkResult::advance();
status = op->emitError("op was not bufferized");
return WalkResult::interrupt();
});
if (failed(status))
return status;
}
return success();
}
LogicalResult mlir::linalg::comprehensive_bufferize::runComprehensiveBufferize(
Operation *op, const BufferizationOptions &options,
BufferizationState &state) {
@ -690,8 +754,10 @@ LogicalResult mlir::linalg::comprehensive_bufferize::runComprehensiveBufferize(
}
// Bufferize the op and its nested ops.
if (failed(bufferize(rewriter, op, state)))
OwningRewritePatternList patterns(op->getContext());
patterns.add<BufferizationPattern>(op->getContext(), state);
if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns))))
return failure();
return success();
return checkBufferizationResult(op, options);
}

View File

@ -64,14 +64,12 @@ static LogicalResult bufferizeLinalgOp(RewriterBase &rewriter, LinalgOp op,
// Set insertion point now that potential alloc/dealloc are introduced.
rewriter.setInsertionPoint(op);
auto bufferizedOp = cast<LinalgOp>(op.clone(
rewriter, op.getLoc(), /*resultTypes=*/TypeRange{}, newOperands));
op.clone(rewriter, op.getLoc(), /*resultTypes=*/TypeRange{}, newOperands);
// Replace the results of the old op with the new output buffers.
state.replaceOp(rewriter, op, newOutputBuffers);
return comprehensive_bufferize::bufferize(rewriter, bufferizedOp.getBlock(),
state);
return success();
}
/// Linalg OpResults usually bufferize inplace with their tied (output
@ -310,7 +308,7 @@ struct TiledLoopOpInterface
for (auto it : llvm::zip(oldRegionInOutArgs, newRegionInOutArgs)) {
Value oldArg = std::get<0>(it);
Value newArg = std::get<1>(it);
rewriter.setInsertionPointToStart(newTiledLoopOp->getBlock());
rewriter.setInsertionPointToStart(newTiledLoopOp.getBody());
if (oldArg.getType().isa<TensorType>()) {
newBlockArgs.push_back(rewriter.create<bufferization::ToTensorOp>(
oldArg.getLoc(), newArg));
@ -346,9 +344,7 @@ struct TiledLoopOpInterface
// Replace results and delete old op.
state.replaceOp(rewriter, op, newResults);
// Bufferize loop body.
return comprehensive_bufferize::bufferize(rewriter,
newTiledLoopOp.getBody(), state);
return success();
}
};

View File

@ -660,20 +660,12 @@ struct ReturnOpInterface
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationState &state) const {
#ifndef NDEBUG
auto returnOp = cast<ReturnOp>(op);
assert(isa<FuncOp>(returnOp->getParentOp()) &&
"only support FuncOp parent for ReturnOp");
for (OpOperand &operand : returnOp->getOpOperands()) {
auto tensorType = operand.get().getType().dyn_cast<TensorType>();
if (!tensorType)
continue;
Value v = state.lookupBuffer(rewriter, operand.get());
Value returnTensor =
rewriter.create<bufferization::ToTensorOp>(returnOp.getLoc(), v);
operand.set(returnTensor);
}
return success();
#endif // NDEBUG
return failure();
}
};
@ -681,10 +673,7 @@ struct FuncOpInterface
: public BufferizableOpInterface::ExternalModel<FuncOpInterface, FuncOp> {
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationState &state) const {
auto funcOp = cast<FuncOp>(op);
// Bufferize function body.
return comprehensive_bufferize::bufferize(rewriter, &funcOp.body(), state);
return failure();
}
/// Return `true` if the given function argument is writable.

View File

@ -64,14 +64,12 @@ struct ExecuteRegionOpInterface
const BufferizationState &state) const {
// TODO: Add bufferization support when needed. scf.execute_region should be
// bufferized similar to scf.if.
auto executeRegionOp = cast<scf::ExecuteRegionOp>(op);
bool hasTensorReturnType = any_of(
op->getResultTypes(), [](Type t) { return t.isa<TensorType>(); });
if (hasTensorReturnType)
return op->emitError(
"scf.execute_region with tensor result not supported");
return comprehensive_bufferize::bufferize(
rewriter, &executeRegionOp.getRegion(), state);
return success();
}
BufferRelation bufferRelation(Operation *op, OpResult opResult,
@ -196,14 +194,6 @@ struct IfOpInterface
// Replace op results.
state.replaceOp(rewriter, op, newIfOp->getResults());
// Bufferize then/else blocks.
if (failed(comprehensive_bufferize::bufferize(rewriter, newIfOp.thenBlock(),
state)))
return failure();
if (failed(comprehensive_bufferize::bufferize(rewriter, newIfOp.elseBlock(),
state)))
return failure();
return success();
}
@ -338,10 +328,6 @@ struct ForOpInterface
// Replace loop results.
state.replaceOp(rewriter, op, newForOp->getResults());
// Bufferize loop body.
if (failed(comprehensive_bufferize::bufferize(rewriter, loopBody, state)))
return failure();
return success();
}
};

View File

@ -31,23 +31,23 @@ func @main() {
%v1 = arith.constant 1.0 : f32
%v2 = arith.constant 2.0 : f32
// CHECK-NEXT: %[[C:.*]] = memref.alloca() {alignment = 128 : i64} : memref<f32>
// CHECK-NEXT: %[[B:.*]] = memref.alloca() {alignment = 128 : i64} : memref<64xf32>
// CHECK-NEXT: %[[A:.*]] = memref.alloca() {alignment = 128 : i64} : memref<64xf32>
// CHECK-NEXT: %[[B:.*]] = memref.alloca() {alignment = 128 : i64} : memref<64xf32>
// CHECK-NEXT: %[[C:.*]] = memref.alloca() {alignment = 128 : i64} : memref<f32>
%A = linalg.init_tensor [64] : tensor<64xf32>
%B = linalg.init_tensor [64] : tensor<64xf32>
%C = linalg.init_tensor [] : tensor<f32>
// CHECK-NEXT: linalg.fill(%[[C1]], %[[A]]) : f32, memref<64xf32>
// CHECK-NEXT: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
// CHECK-NEXT: linalg.fill(%[[C2]], %[[B]]) : f32, memref<64xf32>
// CHECK-NEXT: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
// CHECK-NEXT: linalg.fill(%[[C0]], %[[C]]) : f32, memref<f32>
// CHECK-NEXT: %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
%AA = linalg.fill(%v1, %A) : f32, tensor<64xf32> -> tensor<64xf32>
%BB = linalg.fill(%v2, %B) : f32, tensor<64xf32> -> tensor<64xf32>
%CC = linalg.fill(%v0, %C) : f32, tensor<f32> -> tensor<f32>
// CHECK-NEXT: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
// CHECK-NEXT: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
// CHECK-NEXT: %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
// CHECK-NEXT: call @init_and_dot(%[[cA]], %[[cB]], %[[cC]])
%res = call @init_and_dot(%AA, %BB, %CC) :
(tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32>

View File

@ -142,7 +142,7 @@ func @scf_yield(%b : i1, %A : tensor<4xf32>, %B : tensor<4xf32>) -> tensor<4xf32
func @unknown_op(%A : tensor<4xf32>) -> tensor<4xf32>
{
// expected-error @+1 {{unsupported op with tensors}}
// expected-error @+1 {{op was not bufferized}}
%r = "marklar"(%A) : (tensor<4xf32>) -> (tensor<4xf32>)
return %r: tensor<4xf32>
}
@ -193,7 +193,8 @@ func @to_memref_op_is_writing(
func private @foo(%t : tensor<?xf32>) -> (f32, tensor<?xf32>, f32)
func @call_to_unknown_tensor_returning_func(%t : tensor<?xf32>) {
// expected-error @+1 {{call to FuncOp that returns non-equivalent tensors not supported}}
// expected-error @+2 {{call to FuncOp that returns non-equivalent tensors not supported}}
// expected-error @+1 {{op was not bufferized}}
call @foo(%t) : (tensor<?xf32>) -> (f32, tensor<?xf32>, f32)
return
}
@ -206,7 +207,8 @@ func @foo(%t : tensor<5xf32>) -> (tensor<5xf32>) {
}
func @call_to_func_returning_non_equiv_tensor(%t : tensor<5xf32>) {
// expected-error @+1 {{call to FuncOp that returns non-equivalent tensors not supported}}
// expected-error @+2 {{call to FuncOp that returns non-equivalent tensors not supported}}
// expected-error @+1 {{op was not bufferized}}
call @foo(%t) : (tensor<5xf32>) -> (tensor<5xf32>)
return
}

View File

@ -168,9 +168,9 @@ func @insert_slice_fun(%A0 : tensor<?xf32>,
-> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
{
// Hoisted allocs.
// CHECK: %[[REALLOC1:.*]] = memref.alloc
// CHECK: %[[REALLOC2:.*]] = memref.alloc
// CHECK: %[[REALLOC3:.*]] = memref.alloc
// CHECK: %[[REALLOC2:.*]] = memref.alloc
// CHECK: %[[REALLOC1:.*]] = memref.alloc
// Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
// CHECK: linalg.copy(%[[A0]], %[[REALLOC3]]
@ -516,23 +516,23 @@ func @main() {
%v1 = arith.constant 1.0 : f32
%v2 = arith.constant 2.0 : f32
// CHECK-NEXT: %[[C:.*]] = memref.alloc() {alignment = 128 : i64} : memref<f32>
// CHECK-NEXT: %[[B:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
// CHECK-NEXT: %[[A:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
// CHECK-NEXT: %[[B:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
// CHECK-NEXT: %[[C:.*]] = memref.alloc() {alignment = 128 : i64} : memref<f32>
%A = linalg.init_tensor [64] : tensor<64xf32>
%B = linalg.init_tensor [64] : tensor<64xf32>
%C = linalg.init_tensor [] : tensor<f32>
// CHECK-NEXT: linalg.fill(%[[C1]], %[[A]]) : f32, memref<64xf32>
// CHECK-NEXT: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
// CHECK-NEXT: linalg.fill(%[[C2]], %[[B]]) : f32, memref<64xf32>
// CHECK-NEXT: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
// CHECK-NEXT: linalg.fill(%[[C0]], %[[C]]) : f32, memref<f32>
// CHECK-NEXT: %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
%AA = linalg.fill(%v1, %A) : f32, tensor<64xf32> -> tensor<64xf32>
%BB = linalg.fill(%v2, %B) : f32, tensor<64xf32> -> tensor<64xf32>
%CC = linalg.fill(%v0, %C) : f32, tensor<f32> -> tensor<f32>
// CHECK-NEXT: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
// CHECK-NEXT: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
// CHECK-NEXT: %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
// CHECK-NEXT: call @init_and_dot(%[[cA]], %[[cB]], %[[cC]])
%res = call @init_and_dot(%AA, %BB, %CC) :
(tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32>
@ -762,6 +762,7 @@ func @matmul(
tensor<256x192xf32> to tensor<256x16xf32>
// %4 does not match an insert_slice, it cannot be bufferized inplace and needs to alloc.
// CHECK: %[[T:.*]] = memref.subview %[[C]][%[[I]], %[[J]]] [8, 16] [1, 1]
%4 = tensor.extract_slice %C[%arg3, %arg5] [8, 16] [1, 1] :
tensor<128x192xf32> to tensor<8x16xf32>
@ -787,7 +788,6 @@ func @matmul(
// insert_slice is inplace but its source comes from an equivalent buffer
// that is not in place. So we must insert a copy of the small buffer into
// the bigger buffer.
// CHECK: %[[T:.*]] = memref.subview %[[C]][%[[I]], %[[J]]] [8, 16] [1, 1]
// CHECK: linalg.copy(%[[ALLOC]], %[[T]])
%7 = tensor.insert_slice %6 into %arg6[%arg3, %arg5] [8, 16] [1, 1] :
tensor<8x16xf32> into tensor<128x192xf32>
@ -858,7 +858,8 @@ func @buffer_forwarding_conflict(
// init_tensor itself does not alloc but forwards to the **second**
// insert_slice. InitTensorOp replaces the init_tensor with an out-of-place
// extract_slice.
// CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]])
// CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]])
// CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
%a = linalg.init_tensor[%sz] : tensor<?xf32>
// CHECK: linalg.fill({{.*}}, %[[EXTRACT_SLICE_ALLOC]]) : f32, memref<?xf32>
@ -869,7 +870,6 @@ func @buffer_forwarding_conflict(
// CHECK: linalg.copy(%[[EXTRACT_SLICE_ALLOC]], %[[SV0_ALLOC]]) : memref<?xf32>, memref<?xf32>
%r0 = tensor.insert_slice %f into %t[0][%sz][1]: tensor<?xf32> into tensor<?xf32>
// CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
// CHECK: linalg.copy(%[[EXTRACT_SLICE_ALLOC]], %[[T_SUBVIEW]])
%r1 = tensor.insert_slice %f into %t[42][%sz][1]: tensor<?xf32> into tensor<?xf32>

View File

@ -6778,6 +6778,7 @@ cc_library(
":Pass",
":StandardOps",
":Support",
":Transforms",
"//llvm:Support",
],
)