CodeGen: Don't completely mess-up optimized atomic libcalls
Summary: We did a great job getting this wrong: - We messed up which LLVM IR types to use for arguments and return values. The optimized libcalls use integer types for values. Clang attempted to use the IR type which corresponds to the value passed in instead of using an appropriately sized integer type. This would result in violations of the ABI for, as an example, floating point types. - We didn't bother recording the result of the atomic libcall in the destination memory. Instead, call the functions with arguments matching the type of the libcall prototype's parameters. This fixes PR20780. Differential Revision: http://reviews.llvm.org/D5098 llvm-svn: 216714
This commit is contained in:
parent
486f440cf1
commit
0392cf892f
|
@ -465,11 +465,19 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
|
|||
static void
|
||||
AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
|
||||
bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy,
|
||||
SourceLocation Loc) {
|
||||
SourceLocation Loc, CharUnits SizeInChars) {
|
||||
if (UseOptimizedLibcall) {
|
||||
// Load value and pass it to the function directly.
|
||||
unsigned Align = CGF.getContext().getTypeAlignInChars(ValTy).getQuantity();
|
||||
Val = CGF.EmitLoadOfScalar(Val, false, Align, ValTy, Loc);
|
||||
int64_t SizeInBits = CGF.getContext().toBits(SizeInChars);
|
||||
ValTy =
|
||||
CGF.getContext().getIntTypeForBitwidth(SizeInBits, /*Signed=*/false);
|
||||
llvm::Type *IPtrTy = llvm::IntegerType::get(CGF.getLLVMContext(),
|
||||
SizeInBits)->getPointerTo();
|
||||
Val = CGF.EmitLoadOfScalar(CGF.Builder.CreateBitCast(Val, IPtrTy), false,
|
||||
Align, CGF.getContext().getPointerType(ValTy),
|
||||
Loc);
|
||||
// Coerce the value into an appropriately sized integer type.
|
||||
Args.add(RValue::get(Val), ValTy);
|
||||
} else {
|
||||
// Non-optimized functions always take a reference.
|
||||
|
@ -638,7 +646,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
|
|||
HaveRetTy = true;
|
||||
Args.add(RValue::get(EmitCastToVoidPtr(Val1)), getContext().VoidPtrTy);
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2, MemTy,
|
||||
E->getExprLoc());
|
||||
E->getExprLoc(), sizeChars);
|
||||
Args.add(RValue::get(Order), getContext().IntTy);
|
||||
Order = OrderFail;
|
||||
break;
|
||||
|
@ -650,7 +658,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
|
|||
case AtomicExpr::AO__atomic_exchange:
|
||||
LibCallName = "__atomic_exchange";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
|
||||
E->getExprLoc());
|
||||
E->getExprLoc(), sizeChars);
|
||||
break;
|
||||
// void __atomic_store(size_t size, void *mem, void *val, int order)
|
||||
// void __atomic_store_N(T *mem, T val, int order)
|
||||
|
@ -661,7 +669,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
|
|||
RetTy = getContext().VoidTy;
|
||||
HaveRetTy = true;
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
|
||||
E->getExprLoc());
|
||||
E->getExprLoc(), sizeChars);
|
||||
break;
|
||||
// void __atomic_load(size_t size, void *mem, void *return, int order)
|
||||
// T __atomic_load_N(T *mem, int order)
|
||||
|
@ -675,35 +683,35 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
|
|||
case AtomicExpr::AO__atomic_fetch_add:
|
||||
LibCallName = "__atomic_fetch_add";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
|
||||
E->getExprLoc());
|
||||
E->getExprLoc(), sizeChars);
|
||||
break;
|
||||
// T __atomic_fetch_and_N(T *mem, T val, int order)
|
||||
case AtomicExpr::AO__c11_atomic_fetch_and:
|
||||
case AtomicExpr::AO__atomic_fetch_and:
|
||||
LibCallName = "__atomic_fetch_and";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
|
||||
E->getExprLoc());
|
||||
E->getExprLoc(), sizeChars);
|
||||
break;
|
||||
// T __atomic_fetch_or_N(T *mem, T val, int order)
|
||||
case AtomicExpr::AO__c11_atomic_fetch_or:
|
||||
case AtomicExpr::AO__atomic_fetch_or:
|
||||
LibCallName = "__atomic_fetch_or";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
|
||||
E->getExprLoc());
|
||||
E->getExprLoc(), sizeChars);
|
||||
break;
|
||||
// T __atomic_fetch_sub_N(T *mem, T val, int order)
|
||||
case AtomicExpr::AO__c11_atomic_fetch_sub:
|
||||
case AtomicExpr::AO__atomic_fetch_sub:
|
||||
LibCallName = "__atomic_fetch_sub";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
|
||||
E->getExprLoc());
|
||||
E->getExprLoc(), sizeChars);
|
||||
break;
|
||||
// T __atomic_fetch_xor_N(T *mem, T val, int order)
|
||||
case AtomicExpr::AO__c11_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__atomic_fetch_xor:
|
||||
LibCallName = "__atomic_fetch_xor";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
|
||||
E->getExprLoc());
|
||||
E->getExprLoc(), sizeChars);
|
||||
break;
|
||||
default: return EmitUnsupportedRValue(E, "atomic library call");
|
||||
}
|
||||
|
@ -715,7 +723,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
|
|||
if (!HaveRetTy) {
|
||||
if (UseOptimizedLibcall) {
|
||||
// Value is returned directly.
|
||||
RetTy = MemTy;
|
||||
// The function returns an appropriately sized integer type.
|
||||
RetTy = getContext().getIntTypeForBitwidth(
|
||||
getContext().toBits(sizeChars), /*Signed=*/false);
|
||||
} else {
|
||||
// Value is returned through parameter before the order.
|
||||
RetTy = getContext().VoidTy;
|
||||
|
@ -733,8 +743,16 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
|
|||
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
|
||||
llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
|
||||
RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
|
||||
if (!RetTy->isVoidType())
|
||||
return Res;
|
||||
if (!RetTy->isVoidType()) {
|
||||
if (UseOptimizedLibcall) {
|
||||
if (HaveRetTy)
|
||||
return Res;
|
||||
llvm::StoreInst *StoreDest = Builder.CreateStore(
|
||||
Res.getScalarVal(),
|
||||
Builder.CreateBitCast(Dest, FTy->getReturnType()->getPointerTo()));
|
||||
StoreDest->setAlignment(Align);
|
||||
}
|
||||
}
|
||||
if (E->getType()->isVoidType())
|
||||
return RValue::get(nullptr);
|
||||
return convertTempToRValue(Dest, E->getType(), E->getExprLoc());
|
||||
|
|
|
@ -7,31 +7,31 @@ enum memory_order {
|
|||
|
||||
int *test_c11_atomic_fetch_add_int_ptr(_Atomic(int *) *p) {
|
||||
// CHECK: test_c11_atomic_fetch_add_int_ptr
|
||||
// CHECK: {{%[^ ]*}} = tail call i32* @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5)
|
||||
// CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5)
|
||||
return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst);
|
||||
}
|
||||
|
||||
int *test_c11_atomic_fetch_sub_int_ptr(_Atomic(int *) *p) {
|
||||
// CHECK: test_c11_atomic_fetch_sub_int_ptr
|
||||
// CHECK: {{%[^ ]*}} = tail call i32* @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5)
|
||||
// CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5)
|
||||
return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst);
|
||||
}
|
||||
|
||||
int test_c11_atomic_fetch_add_int(_Atomic(int) *p) {
|
||||
// CHECK: test_c11_atomic_fetch_add_int
|
||||
// CHECK: {{%[^ ]*}} = tail call i32 bitcast (i32* (i8*, i32, i32)* @__atomic_fetch_add_4 to i32 (i8*, i32, i32)*)(i8* {{%[0-9]+}}, i32 3, i32 5)
|
||||
// CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5)
|
||||
return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst);
|
||||
}
|
||||
|
||||
int test_c11_atomic_fetch_sub_int(_Atomic(int) *p) {
|
||||
// CHECK: test_c11_atomic_fetch_sub_int
|
||||
// CHECK: {{%[^ ]*}} = tail call i32 bitcast (i32* (i8*, i32, i32)* @__atomic_fetch_sub_4 to i32 (i8*, i32, i32)*)(i8* {{%[0-9]+}}, i32 5, i32 5)
|
||||
// CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5)
|
||||
return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst);
|
||||
}
|
||||
|
||||
int *fp2a(int **p) {
|
||||
// CHECK: @fp2a
|
||||
// CHECK: {{%[^ ]*}} = tail call i32* @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0)
|
||||
// CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0)
|
||||
// Note, the GNU builtins do not multiply by sizeof(T)!
|
||||
return __atomic_fetch_sub(p, 4, memory_order_relaxed);
|
||||
}
|
||||
|
|
|
@ -139,6 +139,79 @@ float ff3(_Atomic(float) *d) {
|
|||
return __c11_atomic_exchange(d, 2, memory_order_seq_cst);
|
||||
}
|
||||
|
||||
struct S {
|
||||
double x;
|
||||
};
|
||||
|
||||
struct S fd1(struct S *a) {
|
||||
// CHECK-LABEL: @fd1
|
||||
// CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4
|
||||
// CHECK: [[RET:%.*]] = alloca %struct.S, align 4
|
||||
// CHECK: [[CALL:%.*]] = call i64 @__atomic_load_8(
|
||||
// CHECK: [[CAST:%.*]] = bitcast %struct.S* [[RET]] to i64*
|
||||
// CHECK: store i64 [[CALL]], i64* [[CAST]], align 4
|
||||
struct S ret;
|
||||
__atomic_load(a, &ret, memory_order_seq_cst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void fd2(struct S *a, struct S *b) {
|
||||
// CHECK-LABEL: @fd2
|
||||
// CHECK: [[A_ADDR:%.*]] = alloca %struct.S*, align 4
|
||||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca %struct.S*, align 4
|
||||
// CHECK-NEXT: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
|
||||
// CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
|
||||
// CHECK-NEXT: [[LOAD_B:%.*]] = load i64* [[COERCED_B]], align 4
|
||||
// CHECK-NEXT: call void @__atomic_store_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
|
||||
// CHECK-NEXT: ret void
|
||||
__atomic_store(a, b, memory_order_seq_cst);
|
||||
}
|
||||
|
||||
void fd3(struct S *a, struct S *b, struct S *c) {
|
||||
// CHECK-LABEL: @fd3
|
||||
// CHECK: [[A_ADDR:%.*]] = alloca %struct.S*, align 4
|
||||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca %struct.S*, align 4
|
||||
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca %struct.S*, align 4
|
||||
// CHECK-NEXT: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.S* %c, %struct.S** [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S** [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
|
||||
// CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
|
||||
// CHECK-NEXT: [[LOAD_B:%.*]] = load i64* [[COERCED_B]], align 4
|
||||
// CHECK-NEXT: [[CALL:%.*]] = call i64 @__atomic_exchange_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
|
||||
// CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
|
||||
// CHECK-NEXT: store i64 [[CALL]], i64* [[COERCED_C]], align 4
|
||||
|
||||
__atomic_exchange(a, b, c, memory_order_seq_cst);
|
||||
}
|
||||
|
||||
_Bool fd4(struct S *a, struct S *b, struct S *c) {
|
||||
// CHECK-LABEL: @fd4
|
||||
// CHECK: [[A_ADDR:%.*]] = alloca %struct.S*, align 4
|
||||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca %struct.S*, align 4
|
||||
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca %struct.S*, align 4
|
||||
// CHECK: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: store %struct.S* %c, %struct.S** [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S** [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
|
||||
// CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i8*
|
||||
// CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
|
||||
// CHECK-NEXT: [[LOAD_C:%.*]] = load i64* [[COERCED_C]], align 4
|
||||
// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange_8(i8* [[COERCED_A]], i8* [[COERCED_B]], i64 [[LOAD_C]]
|
||||
// CHECK-NEXT: ret i1 [[CALL]]
|
||||
return __atomic_compare_exchange(a, b, c, 1, 5, 5);
|
||||
}
|
||||
|
||||
int* fp1(_Atomic(int*) *p) {
|
||||
// CHECK-LABEL: @fp1
|
||||
// CHECK: load atomic i32* {{.*}} seq_cst
|
||||
|
|
Loading…
Reference in New Issue