CodeGen: Don't completely mess-up optimized atomic libcalls

Summary:
We did a great job getting this wrong:
- We messed up which LLVM IR types to use for arguments and return values.
  The optimized libcalls use integer types for values.

  Clang attempted to use the IR type which corresponds to the value
  passed in instead of using an appropriately sized integer type.  This
  would result in violations of the ABI for, as an example, floating
  point types.
- We didn't bother recording the result of the atomic libcall in the
  destination memory.

Instead, call the functions with arguments matching the type of the
libcall prototype's parameters.

This fixes PR20780.

Differential Revision: http://reviews.llvm.org/D5098

llvm-svn: 216714
This commit is contained in:
David Majnemer 2014-08-29 07:27:49 +00:00
parent 486f440cf1
commit 0392cf892f
3 changed files with 109 additions and 18 deletions

View File

@ -465,11 +465,19 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
static void
AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy,
SourceLocation Loc) {
SourceLocation Loc, CharUnits SizeInChars) {
if (UseOptimizedLibcall) {
// Load value and pass it to the function directly.
unsigned Align = CGF.getContext().getTypeAlignInChars(ValTy).getQuantity();
Val = CGF.EmitLoadOfScalar(Val, false, Align, ValTy, Loc);
int64_t SizeInBits = CGF.getContext().toBits(SizeInChars);
ValTy =
CGF.getContext().getIntTypeForBitwidth(SizeInBits, /*Signed=*/false);
llvm::Type *IPtrTy = llvm::IntegerType::get(CGF.getLLVMContext(),
SizeInBits)->getPointerTo();
Val = CGF.EmitLoadOfScalar(CGF.Builder.CreateBitCast(Val, IPtrTy), false,
Align, CGF.getContext().getPointerType(ValTy),
Loc);
// Coerce the value into an appropriately sized integer type.
Args.add(RValue::get(Val), ValTy);
} else {
// Non-optimized functions always take a reference.
@ -638,7 +646,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
HaveRetTy = true;
Args.add(RValue::get(EmitCastToVoidPtr(Val1)), getContext().VoidPtrTy);
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2, MemTy,
E->getExprLoc());
E->getExprLoc(), sizeChars);
Args.add(RValue::get(Order), getContext().IntTy);
Order = OrderFail;
break;
@ -650,7 +658,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
case AtomicExpr::AO__atomic_exchange:
LibCallName = "__atomic_exchange";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
E->getExprLoc());
E->getExprLoc(), sizeChars);
break;
// void __atomic_store(size_t size, void *mem, void *val, int order)
// void __atomic_store_N(T *mem, T val, int order)
@ -661,7 +669,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
RetTy = getContext().VoidTy;
HaveRetTy = true;
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
E->getExprLoc());
E->getExprLoc(), sizeChars);
break;
// void __atomic_load(size_t size, void *mem, void *return, int order)
// T __atomic_load_N(T *mem, int order)
@ -675,35 +683,35 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
case AtomicExpr::AO__atomic_fetch_add:
LibCallName = "__atomic_fetch_add";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
E->getExprLoc());
E->getExprLoc(), sizeChars);
break;
// T __atomic_fetch_and_N(T *mem, T val, int order)
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
LibCallName = "__atomic_fetch_and";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
E->getExprLoc());
E->getExprLoc(), sizeChars);
break;
// T __atomic_fetch_or_N(T *mem, T val, int order)
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
LibCallName = "__atomic_fetch_or";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
E->getExprLoc());
E->getExprLoc(), sizeChars);
break;
// T __atomic_fetch_sub_N(T *mem, T val, int order)
case AtomicExpr::AO__c11_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
LibCallName = "__atomic_fetch_sub";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
E->getExprLoc());
E->getExprLoc(), sizeChars);
break;
// T __atomic_fetch_xor_N(T *mem, T val, int order)
case AtomicExpr::AO__c11_atomic_fetch_xor:
case AtomicExpr::AO__atomic_fetch_xor:
LibCallName = "__atomic_fetch_xor";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
E->getExprLoc());
E->getExprLoc(), sizeChars);
break;
default: return EmitUnsupportedRValue(E, "atomic library call");
}
@ -715,7 +723,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
if (!HaveRetTy) {
if (UseOptimizedLibcall) {
// Value is returned directly.
RetTy = MemTy;
// The function returns an appropriately sized integer type.
RetTy = getContext().getIntTypeForBitwidth(
getContext().toBits(sizeChars), /*Signed=*/false);
} else {
// Value is returned through parameter before the order.
RetTy = getContext().VoidTy;
@ -733,8 +743,16 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
if (!RetTy->isVoidType())
return Res;
if (!RetTy->isVoidType()) {
if (UseOptimizedLibcall) {
if (HaveRetTy)
return Res;
llvm::StoreInst *StoreDest = Builder.CreateStore(
Res.getScalarVal(),
Builder.CreateBitCast(Dest, FTy->getReturnType()->getPointerTo()));
StoreDest->setAlignment(Align);
}
}
if (E->getType()->isVoidType())
return RValue::get(nullptr);
return convertTempToRValue(Dest, E->getType(), E->getExprLoc());

View File

@ -7,31 +7,31 @@ enum memory_order {
int *test_c11_atomic_fetch_add_int_ptr(_Atomic(int *) *p) {
// CHECK: test_c11_atomic_fetch_add_int_ptr
// CHECK: {{%[^ ]*}} = tail call i32* @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5)
// CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5)
return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst);
}
int *test_c11_atomic_fetch_sub_int_ptr(_Atomic(int *) *p) {
// CHECK: test_c11_atomic_fetch_sub_int_ptr
// CHECK: {{%[^ ]*}} = tail call i32* @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5)
// CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5)
return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst);
}
int test_c11_atomic_fetch_add_int(_Atomic(int) *p) {
// CHECK: test_c11_atomic_fetch_add_int
// CHECK: {{%[^ ]*}} = tail call i32 bitcast (i32* (i8*, i32, i32)* @__atomic_fetch_add_4 to i32 (i8*, i32, i32)*)(i8* {{%[0-9]+}}, i32 3, i32 5)
// CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5)
return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst);
}
int test_c11_atomic_fetch_sub_int(_Atomic(int) *p) {
// CHECK: test_c11_atomic_fetch_sub_int
// CHECK: {{%[^ ]*}} = tail call i32 bitcast (i32* (i8*, i32, i32)* @__atomic_fetch_sub_4 to i32 (i8*, i32, i32)*)(i8* {{%[0-9]+}}, i32 5, i32 5)
// CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5)
return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst);
}
int *fp2a(int **p) {
// CHECK: @fp2a
// CHECK: {{%[^ ]*}} = tail call i32* @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0)
// CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0)
// Note, the GNU builtins do not multiply by sizeof(T)!
return __atomic_fetch_sub(p, 4, memory_order_relaxed);
}

View File

@ -139,6 +139,79 @@ float ff3(_Atomic(float) *d) {
return __c11_atomic_exchange(d, 2, memory_order_seq_cst);
}
struct S {
double x;
};
struct S fd1(struct S *a) {
// CHECK-LABEL: @fd1
// CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4
// CHECK: [[RET:%.*]] = alloca %struct.S, align 4
// CHECK: [[CALL:%.*]] = call i64 @__atomic_load_8(
// CHECK: [[CAST:%.*]] = bitcast %struct.S* [[RET]] to i64*
// CHECK: store i64 [[CALL]], i64* [[CAST]], align 4
struct S ret;
__atomic_load(a, &ret, memory_order_seq_cst);
return ret;
}
void fd2(struct S *a, struct S *b) {
// CHECK-LABEL: @fd2
// CHECK: [[A_ADDR:%.*]] = alloca %struct.S*, align 4
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca %struct.S*, align 4
// CHECK-NEXT: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4
// CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
// CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4
// CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4
// CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
// CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
// CHECK-NEXT: [[LOAD_B:%.*]] = load i64* [[COERCED_B]], align 4
// CHECK-NEXT: call void @__atomic_store_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
// CHECK-NEXT: ret void
__atomic_store(a, b, memory_order_seq_cst);
}
void fd3(struct S *a, struct S *b, struct S *c) {
// CHECK-LABEL: @fd3
// CHECK: [[A_ADDR:%.*]] = alloca %struct.S*, align 4
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca %struct.S*, align 4
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca %struct.S*, align 4
// CHECK-NEXT: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4
// CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
// CHECK-NEXT: store %struct.S* %c, %struct.S** [[C_ADDR]], align 4
// CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4
// CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4
// CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S** [[C_ADDR]], align 4
// CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
// CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
// CHECK-NEXT: [[LOAD_B:%.*]] = load i64* [[COERCED_B]], align 4
// CHECK-NEXT: [[CALL:%.*]] = call i64 @__atomic_exchange_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
// CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
// CHECK-NEXT: store i64 [[CALL]], i64* [[COERCED_C]], align 4
__atomic_exchange(a, b, c, memory_order_seq_cst);
}
_Bool fd4(struct S *a, struct S *b, struct S *c) {
// CHECK-LABEL: @fd4
// CHECK: [[A_ADDR:%.*]] = alloca %struct.S*, align 4
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca %struct.S*, align 4
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca %struct.S*, align 4
// CHECK: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4
// CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
// CHECK-NEXT: store %struct.S* %c, %struct.S** [[C_ADDR]], align 4
// CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4
// CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4
// CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S** [[C_ADDR]], align 4
// CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
// CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i8*
// CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
// CHECK-NEXT: [[LOAD_C:%.*]] = load i64* [[COERCED_C]], align 4
// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange_8(i8* [[COERCED_A]], i8* [[COERCED_B]], i64 [[LOAD_C]]
// CHECK-NEXT: ret i1 [[CALL]]
return __atomic_compare_exchange(a, b, c, 1, 5, 5);
}
int* fp1(_Atomic(int*) *p) {
// CHECK-LABEL: @fp1
// CHECK: load atomic i32* {{.*}} seq_cst