[mlir] support returning unranked memrefs

Initially, unranked memref descriptors in the LLVM dialect were designed only
to be passed into functions. An assertion was guarding against returning
unranked memrefs from functions in the standard-to-LLVM conversion. This is
insufficient for functions that wish to return an unranked memref such that the
caller does not know the rank in advance, and hence cannot allocate the
descriptor and pass it in as an argument.

Introduce a calling convention for returning unranked memref descriptors as
follows. An unranked memref descriptor always points to a ranked memref
descriptor stored on stack of the current function. When an unranked memref
descriptor is returned from a function, the ranked memref descriptor it points
to is copied to dynamically allocated memory, the ownership of which is
transferred to the caller. The caller is responsible for deallocating the
dynamically allocated memory and for copying the pointed-to ranked memref
descriptor onto its stack.

Provide default lowerings for std.return, std.call and std.indirect_call that
maintain the conversion defined above.

This convention is additionally exercised by a runtime test to guard against
memory errors.

Differential Revision: https://reviews.llvm.org/D82647
This commit is contained in:
Alex Zinenko 2020-06-26 14:34:00 +02:00
parent bb91520e4f
commit 6323065fd6
8 changed files with 413 additions and 31 deletions

View File

@ -246,7 +246,7 @@ func @bar() {
}
```
### Calling Convention for `memref`
### Calling Convention for Ranked `memref`
Function _arguments_ of `memref` type, ranked or unranked, are _expanded_ into a
list of arguments of non-aggregate types that the memref descriptor defined
@ -317,7 +317,9 @@ llvm.func @bar() {
```
For **unranked** memrefs, the list of function arguments always contains two
### Calling Convention for Unranked `memref`
For unranked memrefs, the list of function arguments always contains two
elements, same as the unranked memref descriptor: an integer rank, and a
type-erased (`!llvm<"i8*">`) pointer to the ranked memref descriptor. Note that
while the _calling convention_ does not require stack allocation, _casting_ to
@ -369,6 +371,20 @@ llvm.func @bar() {
}
```
**Lifetime.** The second element of the unranked memref descriptor points to
some memory in which the ranked memref descriptor is stored. By convention, this
memory is allocated on stack and has the lifetime of the function. (*Note:* due
to function-length lifetime, creation of multiple unranked memref descriptors,
e.g., in a loop, may lead to stack overflows.) If an unranked descriptor has to
be returned from a function, the ranked descriptor it points to is copied into
dynamically allocated memory, and the pointer in the unranked descriptor is
updated accodingly. The allocation happens immediately before returning. It is
the responsibility of the caller to free the dynamically allocated memory. The
default conversion of `std.call` and `std.call_indirect` copies the ranked
descriptor to newly allocated memory on the caller's stack. Thus, the convention
of the ranked memref descriptor pointed to by an unranked memref descriptor
being stored on stack is respected.
*This convention may or may not apply if the conversion of MemRef types is
overridden by the user.*

View File

@ -129,6 +129,9 @@ public:
/// Gets the bitwidth of the index type when converted to LLVM.
unsigned getIndexTypeBitwidth() { return customizations.indexBitwidth; }
/// Gets the pointer bitwidth.
unsigned getPointerBitwidth(unsigned addressSpace = 0);
protected:
/// LLVM IR module used to parse/create types.
llvm::Module *module;
@ -386,6 +389,13 @@ public:
/// Returns the number of non-aggregate values that would be produced by
/// `unpack`.
static unsigned getNumUnpackedValues() { return 2; }
/// Builds IR computing the sizes in bytes (suitable for opaque allocation)
/// and appends the corresponding values into `sizes`.
static void computeSizes(OpBuilder &builder, Location loc,
LLVMTypeConverter &typeConverter,
ArrayRef<UnrankedMemRefDescriptor> values,
SmallVectorImpl<Value> &sizes);
};
/// Base class for operation conversions targeting the LLVM IR dialect. Provides

View File

@ -794,6 +794,13 @@ def LLVM_PowOp : LLVM_BinarySameArgsIntrinsicOp<"pow">;
def LLVM_BitReverseOp : LLVM_UnaryIntrinsicOp<"bitreverse">;
def LLVM_CtPopOp : LLVM_UnaryIntrinsicOp<"ctpop">;
def LLVM_MemcpyOp : LLVM_ZeroResultIntrOp<"memcpy", [0, 1, 2]>,
Arguments<(ins LLVM_Type:$dst, LLVM_Type:$src,
LLVM_Type:$len, LLVM_Type:$isVolatile)>;
def LLVM_MemcpyInlineOp : LLVM_ZeroResultIntrOp<"memcpy.inline", [0, 1, 2]>,
Arguments<(ins LLVM_Type:$dst, LLVM_Type:$src,
LLVM_Type:$len, LLVM_Type:$isVolatile)>;
//
// Vector Reductions.
//

View File

@ -24,6 +24,7 @@
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Support/MathExtras.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/Utils.h"
@ -184,6 +185,10 @@ LLVM::LLVMType LLVMTypeConverter::getIndexType() {
return LLVM::LLVMType::getIntNTy(llvmDialect, getIndexTypeBitwidth());
}
unsigned LLVMTypeConverter::getPointerBitwidth(unsigned addressSpace) {
return module->getDataLayout().getPointerSizeInBits(addressSpace);
}
Type LLVMTypeConverter::convertIndexType(IndexType type) {
return getIndexType();
}
@ -769,6 +774,51 @@ void UnrankedMemRefDescriptor::unpack(OpBuilder &builder, Location loc,
results.push_back(d.memRefDescPtr(builder, loc));
}
void UnrankedMemRefDescriptor::computeSizes(
OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter,
ArrayRef<UnrankedMemRefDescriptor> values, SmallVectorImpl<Value> &sizes) {
if (values.empty())
return;
// Cache the index type.
LLVM::LLVMType indexType = typeConverter.getIndexType();
// Initialize shared constants.
Value one = createIndexAttrConstant(builder, loc, indexType, 1);
Value two = createIndexAttrConstant(builder, loc, indexType, 2);
Value pointerSize = createIndexAttrConstant(
builder, loc, indexType, ceilDiv(typeConverter.getPointerBitwidth(), 8));
Value indexSize =
createIndexAttrConstant(builder, loc, indexType,
ceilDiv(typeConverter.getIndexTypeBitwidth(), 8));
sizes.reserve(sizes.size() + values.size());
for (UnrankedMemRefDescriptor desc : values) {
// Emit IR computing the memory necessary to store the descriptor. This
// assumes the descriptor to be
// { type*, type*, index, index[rank], index[rank] }
// and densely packed, so the total size is
// 2 * sizeof(pointer) + (1 + 2 * rank) * sizeof(index).
// TODO: consider including the actual size (including eventual padding due
// to data layout) into the unranked descriptor.
Value doublePointerSize =
builder.create<LLVM::MulOp>(loc, indexType, two, pointerSize);
// (1 + 2 * rank) * sizeof(index)
Value rank = desc.rank(builder, loc);
Value doubleRank = builder.create<LLVM::MulOp>(loc, indexType, two, rank);
Value doubleRankIncremented =
builder.create<LLVM::AddOp>(loc, indexType, doubleRank, one);
Value rankIndexSize = builder.create<LLVM::MulOp>(
loc, indexType, doubleRankIncremented, indexSize);
// Total allocation size.
Value allocationSize = builder.create<LLVM::AddOp>(
loc, indexType, doublePointerSize, rankIndexSize);
sizes.push_back(allocationSize);
}
}
LLVM::LLVMDialect &ConvertToLLVMPattern::getDialect() const {
return *typeConverter.getDialect();
}
@ -1863,6 +1913,104 @@ struct AllocOpLowering : public AllocLikeOpLowering<AllocOp> {
using AllocaOpLowering = AllocLikeOpLowering<AllocaOp>;
/// Copies the shaped descriptor part to (if `toDynamic` is set) or from
/// (otherwise) the dynamically allocated memory for any operands that were
/// unranked descriptors originally.
static LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc,
LLVMTypeConverter &typeConverter,
TypeRange origTypes,
SmallVectorImpl<Value> &operands,
bool toDynamic) {
assert(origTypes.size() == operands.size() &&
"expected as may original types as operands");
// Find operands of unranked memref type and store them.
SmallVector<UnrankedMemRefDescriptor, 4> unrankedMemrefs;
for (unsigned i = 0, e = operands.size(); i < e; ++i) {
if (!origTypes[i].isa<UnrankedMemRefType>())
continue;
unrankedMemrefs.emplace_back(operands[i]);
}
if (unrankedMemrefs.empty())
return success();
// Compute allocation sizes.
SmallVector<Value, 4> sizes;
UnrankedMemRefDescriptor::computeSizes(builder, loc, typeConverter,
unrankedMemrefs, sizes);
// Get frequently used types.
auto voidType = LLVM::LLVMType::getVoidTy(typeConverter.getDialect());
auto voidPtrType = LLVM::LLVMType::getInt8PtrTy(typeConverter.getDialect());
auto i1Type = LLVM::LLVMType::getInt1Ty(typeConverter.getDialect());
LLVM::LLVMType indexType = typeConverter.getIndexType();
// Find the malloc and free, or declare them if necessary.
auto module = builder.getInsertionPoint()->getParentOfType<ModuleOp>();
auto mallocFunc = module.lookupSymbol<LLVM::LLVMFuncOp>("malloc");
if (!mallocFunc && toDynamic) {
OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointToStart(module.getBody());
mallocFunc = builder.create<LLVM::LLVMFuncOp>(
builder.getUnknownLoc(), "malloc",
LLVM::LLVMType::getFunctionTy(
voidPtrType, llvm::makeArrayRef(indexType), /*isVarArg=*/false));
}
auto freeFunc = module.lookupSymbol<LLVM::LLVMFuncOp>("free");
if (!freeFunc && !toDynamic) {
OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointToStart(module.getBody());
freeFunc = builder.create<LLVM::LLVMFuncOp>(
builder.getUnknownLoc(), "free",
LLVM::LLVMType::getFunctionTy(voidType, llvm::makeArrayRef(voidPtrType),
/*isVarArg=*/false));
}
// Initialize shared constants.
Value zero =
builder.create<LLVM::ConstantOp>(loc, i1Type, builder.getBoolAttr(false));
unsigned unrankedMemrefPos = 0;
for (unsigned i = 0, e = operands.size(); i < e; ++i) {
Type type = origTypes[i];
if (!type.isa<UnrankedMemRefType>())
continue;
Value allocationSize = sizes[unrankedMemrefPos++];
UnrankedMemRefDescriptor desc(operands[i]);
// Allocate memory, copy, and free the source if necessary.
Value memory =
toDynamic
? builder.create<LLVM::CallOp>(loc, mallocFunc, allocationSize)
.getResult(0)
: builder.create<LLVM::AllocaOp>(loc, voidPtrType, allocationSize,
/*alignment=*/0);
Value source = desc.memRefDescPtr(builder, loc);
builder.create<LLVM::MemcpyOp>(loc, memory, source, allocationSize, zero);
if (!toDynamic)
builder.create<LLVM::CallOp>(loc, freeFunc, source);
// Create a new descriptor. The same descriptor can be returned multiple
// times, attempting to modify its pointer can lead to memory leaks
// (allocated twice and overwritten) or double frees (the caller does not
// know if the descriptor points to the same memory).
Type descriptorType = typeConverter.convertType(type);
if (!descriptorType)
return failure();
auto updatedDesc =
UnrankedMemRefDescriptor::undef(builder, loc, descriptorType);
Value rank = desc.rank(builder, loc);
updatedDesc.setRank(builder, loc, rank);
updatedDesc.setMemRefDescPtr(builder, loc, memory);
operands[i] = updatedDesc;
}
return success();
}
// A CallOp automatically promotes MemRefType to a sequence of alloca/store and
// passes the pointer to the MemRef across function boundaries.
template <typename CallOpType>
@ -1882,13 +2030,6 @@ struct CallOpInterfaceLowering : public ConvertOpToLLVMPattern<CallOpType> {
unsigned numResults = callOp.getNumResults();
auto resultTypes = llvm::to_vector<4>(callOp.getResultTypes());
for (Type resType : resultTypes) {
assert(!resType.isa<UnrankedMemRefType>() &&
"Returning unranked memref is not supported. Pass result as an"
"argument instead.");
(void)resType;
}
if (numResults != 0) {
if (!(packedResult =
this->typeConverter.packFunctionResults(resultTypes)))
@ -1900,25 +2041,25 @@ struct CallOpInterfaceLowering : public ConvertOpToLLVMPattern<CallOpType> {
auto newOp = rewriter.create<LLVM::CallOp>(op->getLoc(), packedResult,
promoted, op->getAttrs());
// If < 2 results, packing did not do anything and we can just return.
if (numResults < 2) {
rewriter.replaceOp(op, newOp.getResults());
return success();
}
// Otherwise, it had been converted to an operation producing a structure.
// Extract individual results from the structure and return them as list.
// TODO(aminim, ntv, riverriddle, zinenko): this seems like patching around
// a particular interaction between MemRefType and CallOp lowering. Find a
// way to avoid special casing.
SmallVector<Value, 4> results;
results.reserve(numResults);
for (unsigned i = 0; i < numResults; ++i) {
auto type = this->typeConverter.convertType(op->getResult(i).getType());
results.push_back(rewriter.create<LLVM::ExtractValueOp>(
op->getLoc(), type, newOp.getOperation()->getResult(0),
rewriter.getI64ArrayAttr(i)));
if (numResults < 2) {
// If < 2 results, packing did not do anything and we can just return.
results.append(newOp.result_begin(), newOp.result_end());
} else {
// Otherwise, it had been converted to an operation producing a structure.
// Extract individual results from the structure and return them as list.
results.reserve(numResults);
for (unsigned i = 0; i < numResults; ++i) {
auto type = this->typeConverter.convertType(op->getResult(i).getType());
results.push_back(rewriter.create<LLVM::ExtractValueOp>(
op->getLoc(), type, newOp.getOperation()->getResult(0),
rewriter.getI64ArrayAttr(i)));
}
}
if (failed(copyUnrankedDescriptors(
rewriter, op->getLoc(), this->typeConverter, op->getResultTypes(),
results, /*toDynamic=*/false)))
return failure();
rewriter.replaceOp(op, results);
return success();
@ -2397,6 +2538,10 @@ struct ReturnOpLowering : public ConvertOpToLLVMPattern<ReturnOp> {
matchAndRewrite(Operation *op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
unsigned numArguments = op->getNumOperands();
auto updatedOperands = llvm::to_vector<4>(operands);
copyUnrankedDescriptors(rewriter, op->getLoc(), typeConverter,
op->getOperands().getTypes(), updatedOperands,
/*toDynamic=*/true);
// If ReturnOp has 0 or 1 operand, create it and return immediately.
if (numArguments == 0) {
@ -2406,7 +2551,7 @@ struct ReturnOpLowering : public ConvertOpToLLVMPattern<ReturnOp> {
}
if (numArguments == 1) {
rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
op, ArrayRef<Type>(), operands.front(), op->getAttrs());
op, ArrayRef<Type>(), updatedOperands, op->getAttrs());
return success();
}
@ -2418,7 +2563,7 @@ struct ReturnOpLowering : public ConvertOpToLLVMPattern<ReturnOp> {
Value packed = rewriter.create<LLVM::UndefOp>(op->getLoc(), packedType);
for (unsigned i = 0; i < numArguments; ++i) {
packed = rewriter.create<LLVM::InsertValueOp>(
op->getLoc(), packedType, packed, operands[i],
op->getLoc(), packedType, packed, updatedOperands[i],
rewriter.getI64ArrayAttr(i));
}
rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, ArrayRef<Type>(), packed,

View File

@ -109,3 +109,134 @@ func @other_callee(%arg0: memref<?xf32>, %arg1: index) attributes { llvm.emit_c_
// EMIT_C_ATTRIBUTE: @_mlir_ciface_other_callee
// EMIT_C_ATTRIBUTE: llvm.call @other_callee
//===========================================================================//
// Calling convention on returning unranked memrefs.
//===========================================================================//
// CHECK-LABEL: llvm.func @return_var_memref_caller
func @return_var_memref_caller(%arg0: memref<4x3xf32>) {
// CHECK: %[[CALL_RES:.*]] = llvm.call @return_var_memref
%0 = call @return_var_memref(%arg0) : (memref<4x3xf32>) -> memref<*xf32>
// CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : index)
// CHECK: %[[TWO:.*]] = llvm.mlir.constant(2 : index)
// These sizes may depend on the data layout, not matching specific values.
// CHECK: %[[PTR_SIZE:.*]] = llvm.mlir.constant
// CHECK: %[[IDX_SIZE:.*]] = llvm.mlir.constant
// CHECK: %[[DOUBLE_PTR_SIZE:.*]] = llvm.mul %[[TWO]], %[[PTR_SIZE]]
// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RES]][0] : !llvm<"{ i64, i8* }">
// CHECK: %[[DOUBLE_RANK:.*]] = llvm.mul %[[TWO]], %[[RANK]]
// CHECK: %[[DOUBLE_RANK_INC:.*]] = llvm.add %[[DOUBLE_RANK]], %[[ONE]]
// CHECK: %[[TABLES_SIZE:.*]] = llvm.mul %[[DOUBLE_RANK_INC]], %[[IDX_SIZE]]
// CHECK: %[[ALLOC_SIZE:.*]] = llvm.add %[[DOUBLE_PTR_SIZE]], %[[TABLES_SIZE]]
// CHECK: %[[FALSE:.*]] = llvm.mlir.constant(false)
// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOC_SIZE]] x !llvm.i8
// CHECK: %[[SOURCE:.*]] = llvm.extractvalue %[[CALL_RES]][1]
// CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SOURCE]], %[[ALLOC_SIZE]], %[[FALSE]])
// CHECK: llvm.call @free(%[[SOURCE]])
// CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ i64, i8* }">
// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RES]][0] : !llvm<"{ i64, i8* }">
// CHECK: %[[DESC_1:.*]] = llvm.insertvalue %[[RANK]], %[[DESC]][0]
// CHECK: llvm.insertvalue %[[ALLOCA]], %[[DESC_1]][1]
return
}
// CHECK-LABEL: llvm.func @return_var_memref
func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> {
// Match the construction of the unranked descriptor.
// CHECK: %[[ALLOCA:.*]] = llvm.alloca
// CHECK: %[[MEMORY:.*]] = llvm.bitcast %[[ALLOCA]]
// CHECK: %[[DESC_0:.*]] = llvm.mlir.undef : !llvm<"{ i64, i8* }">
// CHECK: %[[DESC_1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_0]][0]
// CHECK: %[[DESC_2:.*]] = llvm.insertvalue %[[MEMORY]], %[[DESC_1]][1]
%0 = memref_cast %arg0: memref<4x3xf32> to memref<*xf32>
// CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : index)
// CHECK: %[[TWO:.*]] = llvm.mlir.constant(2 : index)
// These sizes may depend on the data layout, not matching specific values.
// CHECK: %[[PTR_SIZE:.*]] = llvm.mlir.constant
// CHECK: %[[IDX_SIZE:.*]] = llvm.mlir.constant
// CHECK: %[[DOUBLE_PTR_SIZE:.*]] = llvm.mul %[[TWO]], %[[PTR_SIZE]]
// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_2]][0] : !llvm<"{ i64, i8* }">
// CHECK: %[[DOUBLE_RANK:.*]] = llvm.mul %[[TWO]], %[[RANK]]
// CHECK: %[[DOUBLE_RANK_INC:.*]] = llvm.add %[[DOUBLE_RANK]], %[[ONE]]
// CHECK: %[[TABLES_SIZE:.*]] = llvm.mul %[[DOUBLE_RANK_INC]], %[[IDX_SIZE]]
// CHECK: %[[ALLOC_SIZE:.*]] = llvm.add %[[DOUBLE_PTR_SIZE]], %[[TABLES_SIZE]]
// CHECK: %[[FALSE:.*]] = llvm.mlir.constant(false)
// CHECK: %[[ALLOCATED:.*]] = llvm.call @malloc(%[[ALLOC_SIZE]])
// CHECK: %[[SOURCE:.*]] = llvm.extractvalue %[[DESC_2]][1]
// CHECK: "llvm.intr.memcpy"(%[[ALLOCATED]], %[[SOURCE]], %[[ALLOC_SIZE]], %[[FALSE]])
// CHECK: %[[NEW_DESC:.*]] = llvm.mlir.undef : !llvm<"{ i64, i8* }">
// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_2]][0] : !llvm<"{ i64, i8* }">
// CHECK: %[[NEW_DESC_1:.*]] = llvm.insertvalue %[[RANK]], %[[NEW_DESC]][0]
// CHECK: %[[NEW_DESC_2:.*]] = llvm.insertvalue %[[ALLOCATED]], %[[NEW_DESC_1]][1]
// CHECL: llvm.return %[[NEW_DESC_2]]
return %0 : memref<*xf32>
}
// CHECK-LABEL: llvm.func @return_two_var_memref_caller
func @return_two_var_memref_caller(%arg0: memref<4x3xf32>) {
// Only check that we create two different descriptors using different
// memory, and deallocate both sources. The size computation is same as for
// the single result.
// CHECK: %[[CALL_RES:.*]] = llvm.call @return_two_var_memref
// CHECK: %[[RES_1:.*]] = llvm.extractvalue %[[CALL_RES]][0]
// CHECK: %[[RES_2:.*]] = llvm.extractvalue %[[CALL_RES]][1]
%0:2 = call @return_two_var_memref(%arg0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>)
// CHECK: %[[ALLOCA_1:.*]] = llvm.alloca %{{.*}} x !llvm.i8
// CHECK: %[[SOURCE_1:.*]] = llvm.extractvalue %[[RES_1:.*]][1] : ![[DESC_TYPE:.*]]
// CHECK: "llvm.intr.memcpy"(%[[ALLOCA_1]], %[[SOURCE_1]], %{{.*}}, %[[FALSE:.*]])
// CHECK: llvm.call @free(%[[SOURCE_1]])
// CHECK: %[[DESC_1:.*]] = llvm.mlir.undef : ![[DESC_TYPE]]
// CHECK: %[[DESC_11:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_1]][0]
// CHECK: llvm.insertvalue %[[ALLOCA_1]], %[[DESC_11]][1]
// CHECK: %[[ALLOCA_2:.*]] = llvm.alloca %{{.*}} x !llvm.i8
// CHECK: %[[SOURCE_2:.*]] = llvm.extractvalue %[[RES_2:.*]][1]
// CHECK: "llvm.intr.memcpy"(%[[ALLOCA_2]], %[[SOURCE_2]], %{{.*}}, %[[FALSE]])
// CHECK: llvm.call @free(%[[SOURCE_2]])
// CHECK: %[[DESC_2:.*]] = llvm.mlir.undef : ![[DESC_TYPE]]
// CHECK: %[[DESC_21:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_2]][0]
// CHECK: llvm.insertvalue %[[ALLOCA_2]], %[[DESC_21]][1]
return
}
// CHECK-LABEL: llvm.func @return_two_var_memref
func @return_two_var_memref(%arg0: memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) {
// Match the construction of the unranked descriptor.
// CHECK: %[[ALLOCA:.*]] = llvm.alloca
// CHECK: %[[MEMORY:.*]] = llvm.bitcast %[[ALLOCA]]
// CHECK: %[[DESC_0:.*]] = llvm.mlir.undef : !llvm<"{ i64, i8* }">
// CHECK: %[[DESC_1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_0]][0]
// CHECK: %[[DESC_2:.*]] = llvm.insertvalue %[[MEMORY]], %[[DESC_1]][1]
%0 = memref_cast %arg0 : memref<4x3xf32> to memref<*xf32>
// Only check that we allocate the memory for each operand of the "return"
// separately, even if both operands are the same value. The calling
// convention requires the caller to free them and the caller cannot know
// whether they are the same value or not.
// CHECK: %[[ALLOCATED_1:.*]] = llvm.call @malloc(%{{.*}})
// CHECK: %[[SOURCE_1:.*]] = llvm.extractvalue %[[DESC_2]][1]
// CHECK: "llvm.intr.memcpy"(%[[ALLOCATED_1]], %[[SOURCE_1]], %{{.*}}, %[[FALSE:.*]])
// CHECK: %[[RES_1:.*]] = llvm.mlir.undef
// CHECK: %[[RES_11:.*]] = llvm.insertvalue %{{.*}}, %[[RES_1]][0]
// CHECK: %[[RES_12:.*]] = llvm.insertvalue %[[ALLOCATED_1]], %[[RES_11]][1]
// CHECK: %[[ALLOCATED_2:.*]] = llvm.call @malloc(%{{.*}})
// CHECK: %[[SOURCE_2:.*]] = llvm.extractvalue %[[DESC_2]][1]
// CHECK: "llvm.intr.memcpy"(%[[ALLOCATED_2]], %[[SOURCE_2]], %{{.*}}, %[[FALSE]])
// CHECK: %[[RES_2:.*]] = llvm.mlir.undef
// CHECK: %[[RES_21:.*]] = llvm.insertvalue %{{.*}}, %[[RES_2]][0]
// CHECK: %[[RES_22:.*]] = llvm.insertvalue %[[ALLOCATED_2]], %[[RES_21]][1]
// CHECK: %[[RESULTS:.*]] = llvm.mlir.undef : !llvm<"{ { i64, i8* }, { i64, i8* } }">
// CHECK: %[[RESULTS_1:.*]] = llvm.insertvalue %[[RES_12]], %[[RESULTS]]
// CHECK: %[[RESULTS_2:.*]] = llvm.insertvalue %[[RES_22]], %[[RESULTS_1]]
// CHECK: llvm.return %[[RESULTS_2]]
return %0, %0 : memref<*xf32>, memref<*xf32>
}

View File

@ -1,7 +1,9 @@
// RUN: mlir-opt %s | mlir-opt | FileCheck %s
// CHECK-LABEL: func @ops(%arg0: !llvm.i32, %arg1: !llvm.float)
func @ops(%arg0 : !llvm.i32, %arg1 : !llvm.float) {
// CHECK-LABEL: func @ops
func @ops(%arg0: !llvm.i32, %arg1: !llvm.float,
%arg2: !llvm<"i8*">, %arg3: !llvm<"i8*">,
%arg4: !llvm.i1) {
// Integer arithmetic binary operations.
//
// CHECK-NEXT: %0 = llvm.add %arg0, %arg0 : !llvm.i32
@ -109,6 +111,17 @@ func @ops(%arg0 : !llvm.i32, %arg1 : !llvm.float) {
// CHECK: "llvm.intr.ctpop"(%{{.*}}) : (!llvm.i32) -> !llvm.i32
%33 = "llvm.intr.ctpop"(%arg0) : (!llvm.i32) -> !llvm.i32
// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32, !llvm.i1) -> ()
"llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32, !llvm.i1) -> ()
// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32, !llvm.i1) -> ()
"llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32, !llvm.i1) -> ()
// CHECK: %[[SZ:.*]] = llvm.mlir.constant
%sz = llvm.mlir.constant(10: i64) : !llvm.i64
// CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> ()
"llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg4) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> ()
// CHECK: llvm.return
llvm.return
}
@ -315,4 +328,4 @@ func @useFenceInst() {
// CHECK: release
llvm.fence release
return
}
}

View File

@ -202,6 +202,17 @@ llvm.func @masked_intrinsics(%A: !llvm<"<7 x float>*">, %mask: !llvm<"<7 x i1>">
llvm.return
}
// CHECK-LABEL: @memcpy_test
llvm.func @memcpy_test(%arg0: !llvm.i32, %arg1: !llvm.i1, %arg2: !llvm<"i8*">, %arg3: !llvm<"i8*">) {
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 %{{.*}}, i1 %{{.*}})
"llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg1) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32, !llvm.i1) -> ()
%sz = llvm.mlir.constant(10: i64) : !llvm.i64
// CHECK: call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 10, i1 %{{.*}})
"llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg1) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> ()
llvm.return
}
// Check that intrinsics are declared with appropriate types.
// CHECK-DAG: declare float @llvm.fma.f32(float, float, float)
// CHECK-DAG: declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #0
@ -231,3 +242,5 @@ llvm.func @masked_intrinsics(%A: !llvm<"<7 x float>*">, %mask: !llvm<"<7 x i1>">
// CHECK-DAG: declare void @llvm.matrix.column.major.store.v48f32.p0f32(<48 x float>, float* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg)
// CHECK-DAG: declare <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>*, i32 immarg, <7 x i1>, <7 x float>)
// CHECK-DAG: declare void @llvm.masked.store.v7f32.p0v7f32(<7 x float>, <7 x float>*, i32 immarg, <7 x i1>)
// CHECK-DAG: declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg)
// CHECK-DAG: declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg)

View File

@ -18,6 +18,21 @@
// CHECK: rank = 0
// 122 is ASCII for 'z'.
// CHECK: [z]
//
// CHECK: rank = 2
// CHECK-SAME: sizes = [4, 3]
// CHECK-SAME: strides = [3, 1]
// CHECK-COUNT-4: [1, 1, 1]
//
// CHECK: rank = 2
// CHECK-SAME: sizes = [4, 3]
// CHECK-SAME: strides = [3, 1]
// CHECK-COUNT-4: [1, 1, 1]
//
// CHECK: rank = 2
// CHECK-SAME: sizes = [4, 3]
// CHECK-SAME: strides = [3, 1]
// CHECK-COUNT-4: [1, 1, 1]
func @main() -> () {
%A = alloc() : memref<10x3xf32, 0>
%f2 = constant 2.00000e+00 : f32
@ -48,8 +63,40 @@ func @main() -> () {
call @print_memref_i8(%U4) : (memref<*xi8>) -> ()
dealloc %A : memref<10x3xf32, 0>
call @return_var_memref_caller() : () -> ()
call @return_two_var_memref_caller() : () -> ()
return
}
func @print_memref_i8(memref<*xi8>) attributes { llvm.emit_c_interface }
func @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
func @return_two_var_memref_caller() {
%0 = alloca() : memref<4x3xf32>
%c0f32 = constant 1.0 : f32
linalg.fill(%0, %c0f32) : memref<4x3xf32>, f32
%1:2 = call @return_two_var_memref(%0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>)
call @print_memref_f32(%1#0) : (memref<*xf32>) -> ()
call @print_memref_f32(%1#1) : (memref<*xf32>) -> ()
return
}
func @return_two_var_memref(%arg0: memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) {
%0 = memref_cast %arg0 : memref<4x3xf32> to memref<*xf32>
return %0, %0 : memref<*xf32>, memref<*xf32>
}
func @return_var_memref_caller() {
%0 = alloca() : memref<4x3xf32>
%c0f32 = constant 1.0 : f32
linalg.fill(%0, %c0f32) : memref<4x3xf32>, f32
%1 = call @return_var_memref(%0) : (memref<4x3xf32>) -> memref<*xf32>
call @print_memref_f32(%1) : (memref<*xf32>) -> ()
return
}
func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> {
%0 = memref_cast %arg0: memref<4x3xf32> to memref<*xf32>
return %0 : memref<*xf32>
}