Improve C11 atomics support:

- Generate atomicrmw operations in most of the cases when it's sensible to do
  so.
- Don't crash in several common cases (and hopefully don't crash in more of
  them).
- Add some better tests.

We now generate significantly better code for things like:
_Atomic(int) x;
...
x++;

On MIPS, this now generates a 4-instruction ll/sc loop, where previously it
generated about 30 instructions in two nested loops.  On x86-64, we generate a
single lock incl, instead of a lock cmpxchgl loop (one instruction instead of
ten).

llvm-svn: 176420
This commit is contained in:
David Chisnall 2013-03-03 16:02:42 +00:00
parent e0a5c0d914
commit ef78c305fa
3 changed files with 228 additions and 12 deletions

View File

@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre) {
QualType type = E->getSubExpr()->getType();
llvm::Value *value = EmitLoadOfLValue(LV);
llvm::Value *input = value;
llvm::PHINode *atomicPHI = 0;
llvm::Value *value;
llvm::Value *input;
int amount = (isInc ? 1 : -1);
if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
type = atomicTy->getValueType();
if (isInc && type->isBooleanType()) {
llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
if (isPre) {
Builder.Insert(new llvm::StoreInst(True,
LV.getAddress(), LV.isVolatileQualified(),
LV.getAlignment().getQuantity(),
llvm::SequentiallyConsistent));
return Builder.getTrue();
}
// For atomic bool increment, we just store true and return it for
// preincrement, do an atomic swap with true for postincrement
return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
LV.getAddress(), True, llvm::SequentiallyConsistent);
}
// Special case for atomic increment / decrement on integers, emit
// atomicrmw instructions. We skip this if we want to be doing overflow
// checking, and fall into the slow path with the atomic cmpxchg loop.
if (!type->isBooleanType() && type->isIntegerType() &&
!(type->isUnsignedIntegerType() &&
CGF.SanOpts->UnsignedIntegerOverflow) &&
CGF.getLangOpts().getSignedOverflowBehavior() !=
LangOptions::SOB_Trapping) {
llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add :
llvm::AtomicRMWInst::Sub;
llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add :
llvm::Instruction::Sub;
llvm::Value *amt = CGF.EmitToMemory(
llvm::ConstantInt::get(ConvertType(type), 1, true), type);
llvm::Value *old = Builder.CreateAtomicRMW(aop,
LV.getAddress(), amt, llvm::SequentiallyConsistent);
return isPre ? Builder.CreateBinOp(op, old, amt) : old;
}
value = EmitLoadOfLValue(LV);
input = value;
// For every other atomic operation, we need to emit a load-op-cmpxchg loop
llvm::BasicBlock *startBB = Builder.GetInsertBlock();
llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
value = CGF.EmitToMemory(value, type);
Builder.CreateBr(opBB);
Builder.SetInsertPoint(opBB);
atomicPHI = Builder.CreatePHI(value->getType(), 2);
atomicPHI->addIncoming(value, startBB);
type = atomicTy->getValueType();
value = atomicPHI;
} else {
value = EmitLoadOfLValue(LV);
input = value;
}
// Special case of integer increment that we have to check first: bool++.
@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
value, llvm::SequentiallyConsistent);
CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
atomicPHI->addIncoming(old, opBB);
llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
Builder.CreateCondBr(success, contBB, opBB);
@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
OpInfo.E = E;
// Load/convert the LHS.
LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
OpInfo.LHS = EmitLoadOfLValue(LHSLV);
llvm::PHINode *atomicPHI = 0;
if (LHSTy->isAtomicType()) {
if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
QualType type = atomicTy->getValueType();
if (!type->isBooleanType() && type->isIntegerType() &&
!(type->isUnsignedIntegerType() &&
CGF.SanOpts->UnsignedIntegerOverflow) &&
CGF.getLangOpts().getSignedOverflowBehavior() !=
LangOptions::SOB_Trapping) {
llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP;
switch (OpInfo.Opcode) {
// We don't have atomicrmw operands for *, %, /, <<, >>
case BO_MulAssign: case BO_DivAssign:
case BO_RemAssign:
case BO_ShlAssign:
case BO_ShrAssign:
break;
case BO_AddAssign:
aop = llvm::AtomicRMWInst::Add;
break;
case BO_SubAssign:
aop = llvm::AtomicRMWInst::Sub;
break;
case BO_AndAssign:
aop = llvm::AtomicRMWInst::And;
break;
case BO_XorAssign:
aop = llvm::AtomicRMWInst::Xor;
break;
case BO_OrAssign:
aop = llvm::AtomicRMWInst::Or;
break;
default:
llvm_unreachable("Invalid compound assignment type");
}
if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
E->getRHS()->getType(), LHSTy), LHSTy);
Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
llvm::SequentiallyConsistent);
return LHSLV;
}
}
// FIXME: For floating point types, we should be saving and restoring the
// floating point environment in the loop.
llvm::BasicBlock *startBB = Builder.GetInsertBlock();
llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
OpInfo.LHS = EmitLoadOfLValue(LHSLV);
OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type);
Builder.CreateBr(opBB);
Builder.SetInsertPoint(opBB);
atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2);
atomicPHI->addIncoming(OpInfo.LHS, startBB);
OpInfo.LHS = atomicPHI;
}
else
OpInfo.LHS = EmitLoadOfLValue(LHSLV);
OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
E->getComputationLHSType());
@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
Result, llvm::SequentiallyConsistent);
CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
atomicPHI->addIncoming(old, opBB);
llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
Builder.CreateCondBr(success, contBB, opBB);

View File

@ -15,9 +15,4 @@ void foo(int x)
// CHECK: sdiv i32
// CHECK: cmpxchg i16*
// These should be emitting atomicrmw instructions, but they aren't yet
i += 2; // CHECK: cmpxchg
i -= 2; // CHECK: cmpxchg
i++; // CHECK: cmpxchg
i--; // CHECK: cmpxchg
}

View File

@ -0,0 +1,139 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s
// Test that we are generating atomicrmw instructions, rather than
// compare-exchange loops for common atomic ops. This makes a big difference
// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
// the load and then another ll/sc in the loop, expanding to about 30
// instructions when it should be only 4. It has a smaller, but still
// noticeable, impact on platforms like x86 and RISC-V, where there are atomic
// RMW instructions.
//
// We currently emit cmpxchg loops for most operations on _Bools, because
// they're sufficiently rare that it's not worth making sure that the semantics
// are correct.
typedef int __attribute__((vector_size(16))) vector;
_Atomic(_Bool) b;
_Atomic(int) i;
_Atomic(long long) l;
_Atomic(short) s;
_Atomic(char*) p;
_Atomic(float) f;
_Atomic(vector) v;
// CHECK-NOT: cmpxchg
// CHECK: testinc
void testinc(void)
{
// Special case for suffix bool++, sets to true and returns the old value.
// CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
b++;
// CHECK: atomicrmw add i32* @i, i32 1 seq_cst
i++;
// CHECK: atomicrmw add i64* @l, i64 1 seq_cst
l++;
// CHECK: atomicrmw add i16* @s, i16 1 seq_cst
s++;
// Prefix increment
// Special case for bool: set to true and return true
// CHECK: store atomic i8 1, i8* @b seq_cst, align 1
++b;
// Currently, we have no variant of atomicrmw that returns the new value, so
// we have to generate an atomic add, which returns the old value, and then a
// non-atomic add.
// CHECK: atomicrmw add i32* @i, i32 1 seq_cst
// CHECK: add i32
++i;
// CHECK: atomicrmw add i64* @l, i64 1 seq_cst
// CHECK: add i64
++l;
// CHECK: atomicrmw add i16* @s, i16 1 seq_cst
// CHECK: add i16
++s;
}
// CHECK: testdec
void testdec(void)
{
// CHECK: cmpxchg i8* @b
b--;
// CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
i--;
// CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
l--;
// CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
s--;
// CHECK: cmpxchg i8* @b
--b;
// CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
// CHECK: sub i32
--i;
// CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
// CHECK: sub i64
--l;
// CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
// CHECK: sub i16
--s;
}
// CHECK: testaddeq
void testaddeq(void)
{
// CHECK: cmpxchg i8* @b
// CHECK: atomicrmw add i32* @i, i32 42 seq_cst
// CHECK: atomicrmw add i64* @l, i64 42 seq_cst
// CHECK: atomicrmw add i16* @s, i16 42 seq_cst
b += 42;
i += 42;
l += 42;
s += 42;
}
// CHECK: testsubeq
void testsubeq(void)
{
// CHECK: cmpxchg i8* @b
// CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
// CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
// CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
b -= 42;
i -= 42;
l -= 42;
s -= 42;
}
// CHECK: testxoreq
void testxoreq(void)
{
// CHECK: cmpxchg i8* @b
// CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
// CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
// CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
b ^= 42;
i ^= 42;
l ^= 42;
s ^= 42;
}
// CHECK: testoreq
void testoreq(void)
{
// CHECK: cmpxchg i8* @b
// CHECK: atomicrmw or i32* @i, i32 42 seq_cst
// CHECK: atomicrmw or i64* @l, i64 42 seq_cst
// CHECK: atomicrmw or i16* @s, i16 42 seq_cst
b |= 42;
i |= 42;
l |= 42;
s |= 42;
}
// CHECK: testandeq
void testandeq(void)
{
// CHECK: cmpxchg i8* @b
// CHECK: atomicrmw and i32* @i, i32 42 seq_cst
// CHECK: atomicrmw and i64* @l, i64 42 seq_cst
// CHECK: atomicrmw and i16* @s, i16 42 seq_cst
b &= 42;
i &= 42;
l &= 42;
s &= 42;
}