CodeGen: Fix invalid bitcast for coerced function argument

Clang assumes coerced function argument is in address space 0, which is not always true and results in invalid bitcasts.

This patch fixes failure in OpenCL conformance test api/get_kernel_arg_info with amdgcn---amdgizcl triple, where non-zero alloca address space is used.

Differential Revision: https://reviews.llvm.org/D34777

llvm-svn: 306721
This commit is contained in:
Yaxun Liu 2017-06-29 18:47:45 +00:00
parent d3a0571301
commit e9e5c4f975
2 changed files with 49 additions and 8 deletions

View File

@ -1297,7 +1297,7 @@ static void CreateCoercedStore(llvm::Value *Src,
// If store is legal, just bitcast the src pointer.
if (SrcSize <= DstSize) {
Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy));
Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
BuildAggStore(CGF, Src, Dst, DstIsVolatile);
} else {
// Otherwise do coercion through memory. This is stupid, but
@ -2412,8 +2412,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
Address AddrToStoreInto = Address::invalid();
if (SrcSize <= DstSize) {
AddrToStoreInto =
Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
} else {
AddrToStoreInto =
CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");

View File

@ -1,4 +1,5 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck %s
// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=COM,X86 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd-amdgizcl | FileCheck -check-prefixes=COM,AMD %s
typedef struct {
int cells[9];
@ -8,16 +9,57 @@ typedef struct {
int cells[16];
} Mat4X4;
struct StructOneMember {
int2 x;
};
struct StructTwoMember {
int2 x;
int2 y;
};
// COM-LABEL: define void @foo
Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
Mat4X4 out;
return out;
}
// COM-LABEL: define {{.*}} void @ker
// Expect two mem copies: one for the argument "in", and one for
// the return value.
// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
// AMD: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
out[0] = foo(in[1]);
}
// Expect two mem copies: one for the argument "in", and one for
// the return value.
// CHECK: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
// CHECK: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
// AMD-LABEL: define void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval align 8 %u)
void FuncOneMember(struct StructOneMember u) {
u.x = (int2)(0, 0);
}
// AMD-LABEL: define amdgpu_kernel void @KernelOneMember
// AMD-SAME: (<2 x i32> %[[u_coerce:.*]])
// AMD: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
// AMD: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
// AMD: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
// AMD: call void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval align 8 %[[u]])
kernel void KernelOneMember(struct StructOneMember u) {
FuncOneMember(u);
}
// AMD-LABEL: define void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval align 8 %u)
void FuncTwoMember(struct StructTwoMember u) {
u.x = (int2)(0, 0);
}
// AMD-LABEL: define amdgpu_kernel void @KernelTwoMember
// AMD-SAME: (%struct.StructTwoMember %[[u_coerce:.*]])
// AMD: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
// AMD: store %struct.StructTwoMember %[[u_coerce]], %struct.StructTwoMember addrspace(5)* %[[u]]
// AMD: call void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval align 8 %[[u]])
kernel void KernelTwoMember(struct StructTwoMember u) {
FuncTwoMember(u);
}