Improve C11 atomics support:

- Generate atomicrmw operations in most of the cases when it's sensible to do so. - Don't crash in several common cases (and hopefully don't crash in more of them). - Add some better tests. We now generate significantly better code for things like: _Atomic(int) x; ... x++; On MIPS, this now generates a 4-instruction ll/sc loop, where previously it generated about 30 instructions in two nested loops. On x86-64, we generate a single lock incl, instead of a lock cmpxchgl loop (one instruction instead of ten). llvm-svn: 176420
2013-03-03 16:02:42 +00:00 · 2013-03-03 16:02:42 +00:00 · ef78c305fa
parent e0a5c0d914
commit ef78c305fa
3 changed files with 228 additions and 12 deletions
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                           bool isInc, bool isPre) {
  
  QualType type = E->getSubExpr()->getType();
-  llvm::Value *value = EmitLoadOfLValue(LV);
-  llvm::Value *input = value;
  llvm::PHINode *atomicPHI = 0;
+  llvm::Value *value;
+  llvm::Value *input;

  int amount = (isInc ? 1 : -1);

  if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
+    type = atomicTy->getValueType();
+    if (isInc && type->isBooleanType()) {
+      llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
+      if (isPre) {
+        Builder.Insert(new llvm::StoreInst(True,
+              LV.getAddress(), LV.isVolatileQualified(),
+              LV.getAlignment().getQuantity(),
+              llvm::SequentiallyConsistent));
+        return Builder.getTrue();
+      }
+      // For atomic bool increment, we just store true and return it for
+      // preincrement, do an atomic swap with true for postincrement
+        return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
+            LV.getAddress(), True, llvm::SequentiallyConsistent);
+    }
+    // Special case for atomic increment / decrement on integers, emit
+    // atomicrmw instructions.  We skip this if we want to be doing overflow
+    // checking, and fall into the slow path with the atomic cmpxchg loop.  
+    if (!type->isBooleanType() && type->isIntegerType() &&
+        !(type->isUnsignedIntegerType() &&
+         CGF.SanOpts->UnsignedIntegerOverflow) &&
+        CGF.getLangOpts().getSignedOverflowBehavior() !=
+         LangOptions::SOB_Trapping) {
+      llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add :
+        llvm::AtomicRMWInst::Sub;
+      llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add :
+        llvm::Instruction::Sub;
+      llvm::Value *amt = CGF.EmitToMemory(
+          llvm::ConstantInt::get(ConvertType(type), 1, true), type);
+      llvm::Value *old = Builder.CreateAtomicRMW(aop,
+          LV.getAddress(), amt, llvm::SequentiallyConsistent);
+      return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+    }
+    value = EmitLoadOfLValue(LV);
+    input = value;
+    // For every other atomic operation, we need to emit a load-op-cmpxchg loop
    llvm::BasicBlock *startBB = Builder.GetInsertBlock();
    llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
+    value = CGF.EmitToMemory(value, type);
    Builder.CreateBr(opBB);
    Builder.SetInsertPoint(opBB);
    atomicPHI = Builder.CreatePHI(value->getType(), 2);
    atomicPHI->addIncoming(value, startBB);
-    type = atomicTy->getValueType();
    value = atomicPHI;
+  } else {
+    value = EmitLoadOfLValue(LV);
+    input = value;
  }

  // Special case of integer increment that we have to check first: bool++.
@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
    llvm::BasicBlock *opBB = Builder.GetInsertBlock();
    llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
    llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
-        value, llvm::SequentiallyConsistent);
+        CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
    atomicPHI->addIncoming(old, opBB);
    llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
    Builder.CreateCondBr(success, contBB, opBB);
@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
  OpInfo.E = E;
  // Load/convert the LHS.
  LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
-  OpInfo.LHS = EmitLoadOfLValue(LHSLV);

  llvm::PHINode *atomicPHI = 0;
-  if (LHSTy->isAtomicType()) {
+  if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
+    QualType type = atomicTy->getValueType();
+    if (!type->isBooleanType() && type->isIntegerType() &&
+         !(type->isUnsignedIntegerType() &&
+          CGF.SanOpts->UnsignedIntegerOverflow) &&
+         CGF.getLangOpts().getSignedOverflowBehavior() !=
+          LangOptions::SOB_Trapping) {
+      llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP;
+      switch (OpInfo.Opcode) {
+        // We don't have atomicrmw operands for *, %, /, <<, >>
+        case BO_MulAssign: case BO_DivAssign:
+        case BO_RemAssign:
+        case BO_ShlAssign:
+        case BO_ShrAssign:
+          break;
+        case BO_AddAssign:
+          aop = llvm::AtomicRMWInst::Add;
+          break;
+        case BO_SubAssign:
+          aop = llvm::AtomicRMWInst::Sub;
+          break;
+        case BO_AndAssign:
+          aop = llvm::AtomicRMWInst::And;
+          break;
+        case BO_XorAssign:
+          aop = llvm::AtomicRMWInst::Xor;
+          break;
+        case BO_OrAssign:
+          aop = llvm::AtomicRMWInst::Or;
+          break;
+        default:
+          llvm_unreachable("Invalid compound assignment type");
+      }
+      if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
+        llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
+              E->getRHS()->getType(), LHSTy), LHSTy);
+        Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
+            llvm::SequentiallyConsistent);
+        return LHSLV;
+      }
+    }
    // FIXME: For floating point types, we should be saving and restoring the
    // floating point environment in the loop.
    llvm::BasicBlock *startBB = Builder.GetInsertBlock();
    llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
+    OpInfo.LHS = EmitLoadOfLValue(LHSLV);
+    OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type);
    Builder.CreateBr(opBB);
    Builder.SetInsertPoint(opBB);
    atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2);
    atomicPHI->addIncoming(OpInfo.LHS, startBB);
    OpInfo.LHS = atomicPHI;
  }
+  else
+    OpInfo.LHS = EmitLoadOfLValue(LHSLV);

  OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
                                    E->getComputationLHSType());
@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
    llvm::BasicBlock *opBB = Builder.GetInsertBlock();
    llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
    llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
-        Result, llvm::SequentiallyConsistent);
+        CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
    atomicPHI->addIncoming(old, opBB);
    llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
    Builder.CreateCondBr(success, contBB, opBB);
--- a/clang/test/CodeGen/atomic_ops.c
+++ b/clang/test/CodeGen/atomic_ops.c
@ -15,9 +15,4 @@ void foo(int x)
  // CHECK: sdiv i32
  // CHECK: cmpxchg i16*

-  // These should be emitting atomicrmw instructions, but they aren't yet
-  i += 2; // CHECK: cmpxchg
-  i -= 2; // CHECK: cmpxchg
-  i++; // CHECK: cmpxchg
-  i--; // CHECK: cmpxchg
 }
--- a/clang/test/CodeGen/c11atomics.c
+++ b/clang/test/CodeGen/c11atomics.c
@ -0,0 +1,139 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s
+
+// Test that we are generating atomicrmw instructions, rather than
+// compare-exchange loops for common atomic ops.  This makes a big difference
+// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
+// the load and then another ll/sc in the loop, expanding to about 30
+// instructions when it should be only 4.  It has a smaller, but still
+// noticeable, impact on platforms like x86 and RISC-V, where there are atomic
+// RMW instructions.
+//
+// We currently emit cmpxchg loops for most operations on _Bools, because
+// they're sufficiently rare that it's not worth making sure that the semantics
+// are correct.
+
+typedef int __attribute__((vector_size(16))) vector;
+
+_Atomic(_Bool) b;
+_Atomic(int) i;
+_Atomic(long long) l;
+_Atomic(short) s;
+_Atomic(char*) p;
+_Atomic(float) f;
+_Atomic(vector) v;
+
+// CHECK-NOT: cmpxchg
+
+// CHECK: testinc
+void testinc(void)
+{
+  // Special case for suffix bool++, sets to true and returns the old value.
+  // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
+  b++;
+  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
+  i++;
+  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
+  l++;
+  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
+  s++;
+  // Prefix increment
+  // Special case for bool: set to true and return true
+  // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
+  ++b;
+  // Currently, we have no variant of atomicrmw that returns the new value, so
+  // we have to generate an atomic add, which returns the old value, and then a
+  // non-atomic add.
+  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
+  // CHECK: add i32 
+  ++i;
+  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
+  // CHECK: add i64
+  ++l;
+  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
+  // CHECK: add i16
+  ++s;
+}
+// CHECK: testdec
+void testdec(void)
+{
+  // CHECK: cmpxchg i8* @b
+  b--;
+  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
+  i--;
+  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
+  l--;
+  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
+  s--;
+  // CHECK: cmpxchg i8* @b
+  --b;
+  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
+  // CHECK: sub i32
+  --i;
+  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
+  // CHECK: sub i64
+  --l;
+  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
+  // CHECK: sub i16
+  --s;
+}
+// CHECK: testaddeq
+void testaddeq(void)
+{
+  // CHECK: cmpxchg i8* @b
+  // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
+  // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
+  // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
+  b += 42;
+  i += 42;
+  l += 42;
+  s += 42;
+}
+// CHECK: testsubeq
+void testsubeq(void)
+{
+  // CHECK: cmpxchg i8* @b
+  // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
+  // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
+  // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
+  b -= 42;
+  i -= 42;
+  l -= 42;
+  s -= 42;
+}
+// CHECK: testxoreq
+void testxoreq(void)
+{
+  // CHECK: cmpxchg i8* @b
+  // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
+  // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
+  // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
+  b ^= 42;
+  i ^= 42;
+  l ^= 42;
+  s ^= 42;
+}
+// CHECK: testoreq
+void testoreq(void)
+{
+  // CHECK: cmpxchg i8* @b
+  // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
+  // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
+  // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
+  b |= 42;
+  i |= 42;
+  l |= 42;
+  s |= 42;
+}
+// CHECK: testandeq
+void testandeq(void)
+{
+  // CHECK: cmpxchg i8* @b
+  // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
+  // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
+  // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
+  b &= 42;
+  i &= 42;
+  l &= 42;
+  s &= 42;
+}
+