ARM & AArch64: make use of common cmpxchg idioms after expansion

The C and C++ semantics for compare_exchange require it to return a bool
indicating success. This gets mapped to LLVM IR which follows each cmpxchg with
an icmp of the value loaded against the desired value.

When lowered to ldxr/stxr loops, this extra comparison is redundant: its
results are implicit in the control-flow of the function.

This commit makes two changes: it replaces that icmp with appropriate PHI
nodes, and then makes sure earlyCSE is called after expansion to actually make
use of the opportunities revealed.

I've also added -{arm,aarch64}-enable-atomic-tidy options, so that
existing fragile tests aren't perturbed too much by the change. Many
of them either rely on undef/unreachable too pervasively to be
restored to something well-defined (particularly while making sure
they test the same obscure assert from many years ago), or depend on a
particular CFG shape, which is disrupted by SimplifyCFG.

rdar://problem/16227836

llvm-svn: 209883
This commit is contained in:
Tim Northover 2014-05-30 10:09:59 +00:00
parent 5070c18928
commit b4ddc0845a
59 changed files with 347 additions and 87 deletions

View File

@ -300,12 +300,50 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
// Finally, make sure later instructions don't get reordered with a fence if
// necessary.
// Make sure later instructions don't get reordered with a fence if necessary.
Builder.SetInsertPoint(BarrierBB);
insertTrailingFence(Builder, SuccessOrder);
Builder.CreateBr(ExitBB);
// Finally, we have control-flow based knowledge of whether the cmpxchg
// succeeded or not. We expose this to later passes by converting any
// subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
// Setup the builder so we can create any PHIs we need.
Builder.SetInsertPoint(FailureBB, FailureBB->begin());
BasicBlock *SuccessBB = FailureOrder == Monotonic ? BarrierBB : TryStoreBB;
PHINode *Success = 0, *Failure = 0;
// Look for any users of the cmpxchg that are just comparing the loaded value
// against the desired one, and replace them with the CFG-derived version.
for (auto User : CI->users()) {
ICmpInst *ICmp = dyn_cast<ICmpInst>(User);
if (!ICmp)
continue;
// Because we know ICmp uses CI, we only need one operand to be the old
// value.
if (ICmp->getOperand(0) != CI->getCompareOperand() &&
ICmp->getOperand(1) != CI->getCompareOperand())
continue;
if (ICmp->getPredicate() == CmpInst::ICMP_EQ) {
if (!Success) {
Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
Success->addIncoming(ConstantInt::getFalse(Ctx), LoopBB);
}
ICmp->replaceAllUsesWith(Success);
} else if (ICmp->getPredicate() == CmpInst::ICMP_NE) {
if (!Failure) {
Failure = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
Failure->addIncoming(ConstantInt::getFalse(Ctx), SuccessBB);
Failure->addIncoming(ConstantInt::getTrue(Ctx), LoopBB);
}
ICmp->replaceAllUsesWith(Failure);
}
}
CI->replaceAllUsesWith(Loaded);
CI->eraseFromParent();

View File

@ -53,6 +53,12 @@ static cl::opt<bool>
EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair"
" optimization pass"), cl::init(true), cl::Hidden);
static cl::opt<bool>
EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden,
cl::desc("Run SimplifyCFG after expanding atomic operations"
" to make use of cmpxchg flow-based information"),
cl::init(true));
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
@ -113,6 +119,7 @@ public:
return getTM<AArch64TargetMachine>();
}
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
bool addILPOpts() override;
@ -135,6 +142,20 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
return new AArch64PassConfig(this, PM);
}
void AArch64PassConfig::addIRPasses() {
// Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
// ourselves.
addPass(createAtomicExpandLoadLinkedPass(TM));
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass());
TargetPassConfig::addIRPasses();
}
// Pass Pipeline Configuration
bool AArch64PassConfig::addPreISel() {
// Run promote constant before global merge, so that the promoted constants
@ -146,10 +167,6 @@ bool AArch64PassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64AddressTypePromotionPass());
// Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
// ourselves.
addPass(createAtomicExpandLoadLinkedPass(TM));
return false;
}

View File

@ -28,6 +28,12 @@ DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
cl::desc("Inhibit optimization of S->D register accesses on A15"),
cl::init(false));
static cl::opt<bool>
EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
cl::desc("Run SimplifyCFG after expanding atomic operations"
" to make use of cmpxchg flow-based information"),
cl::init(true));
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@ -213,6 +219,7 @@ public:
return *getARMTargetMachine().getSubtargetImpl();
}
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
bool addPreRegAlloc() override;
@ -225,11 +232,22 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
return new ARMPassConfig(this, PM);
}
bool ARMPassConfig::addPreISel() {
void ARMPassConfig::addIRPasses() {
const ARMSubtarget *Subtarget = &getARMSubtarget();
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
addPass(createAtomicExpandLoadLinkedPass(TM));
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass());
}
TargetPassConfig::addIRPasses();
}
bool ARMPassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createGlobalMergePass(TM));

View File

@ -1,4 +1,4 @@
; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu | FileCheck %s
; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
@var8 = global i8 0
@var16 = global i16 0

View File

@ -1,4 +1,4 @@
; RUN: llc %s -o - | FileCheck %s
; RUN: llc %s -o - -aarch64-atomic-cfg-tidy=0 | FileCheck %s
; Check that ANDS (tst) is not merged with ADD when the immediate
; is not 0.
; <rdar://problem/16693089>

View File

@ -1,4 +1,4 @@
; RUN: llc -O3 < %s | FileCheck %s
; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 | FileCheck %s
target triple = "arm64-apple-ios"
; rdar://12462006

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -stress-early-ifcvt | FileCheck %s
; RUN: llc < %s -stress-early-ifcvt -aarch64-atomic-cfg-tidy=0 | FileCheck %s
target triple = "arm64-apple-macosx"
; CHECK: mm2

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone < %s | FileCheck %s
; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s
@lhs = global fp128 zeroinitializer, align 16
@rhs = global fp128 zeroinitializer, align 16

View File

@ -1,4 +1,4 @@
; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s
; RUN: llc -march=arm64 -mtriple=arm64-apple-ios -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s
; rdar://11935841
define void @t1() nounwind ssp {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=arm64 | FileCheck %s
; RUN: llc < %s -march=arm64 -aarch64-atomic-cfg-tidy=0 | FileCheck %s
;
; Get the actual value of the overflow bit.

View File

@ -889,8 +889,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
; function there.
; CHECK-NEXT: cmp w[[OLD]], w0
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
; As above, w1 is a reasonable guess.
; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
; CHECK-NOT: dmb
@ -911,8 +910,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
; function there.
; CHECK-NEXT: cmp w[[OLD]], w0
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
; As above, w1 is a reasonable guess.
; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
; CHECK-NOT: dmb
@ -933,8 +931,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
; function there.
; CHECK-NEXT: cmp w[[OLD]], w0
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
; As above, w1 is a reasonable guess.
; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
; CHECK-NOT: dmb

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
@addr = global i8* null

View File

@ -1,4 +1,4 @@
; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
@stored_label = global i8* null

View File

@ -0,0 +1,89 @@
; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s
define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
; CHECK-LABEL: test_return:
; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0]
; CHECK: cmp [[LOADED]], w1
; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]]
; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0]
; CHECK: cbnz [[STATUS]], [[LOOP]]
; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: orr w0, wzr, #0x1
; CHECK: ret
; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: mov w0, wzr
; CHECK: ret
%loaded = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
%success = icmp eq i32 %loaded, %oldval
%conv = zext i1 %success to i32
ret i32 %conv
}
define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
; CHECK-LABEL: test_return_bool:
; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxrb [[LOADED:w[0-9]+]], [x0]
; CHECK: cmp [[LOADED]], w1, uxtb
; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]]
; CHECK: stlxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0]
; CHECK: cbnz [[STATUS]], [[LOOP]]
; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: mov w0, wzr
; CHECK: ret
; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: orr w0, wzr, #0x1
; CHECK: ret
%loaded = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic
%failure = icmp ne i8 %loaded, %oldValue
ret i1 %failure
}
define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) {
; CHECK-LABEL: test_conditional:
; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0]
; CHECK: cmp [[LOADED]], w1
; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]]
; CHECK: stlxr [[STATUS:w[0-9]+]], w2, [x0]
; CHECK: cbnz [[STATUS]], [[LOOP]]
; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: b _bar
; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: b _baz
%loaded = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
%success = icmp eq i32 %loaded, %oldval
br i1 %success, label %true, label %false
true:
tail call void @bar() #2
br label %end
false:
tail call void @baz() #2
br label %end
end:
ret void
}
declare void @bar()
declare void @baz()

View File

@ -1,5 +1,5 @@
; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s --check-prefix=CHECK
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-NOFP %s
define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
; CHECK-LABEL: test_select_i32:

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s
; LLVM should be able to cope with multiple uses of the same flag-setting
; instruction at different points of a routine. Either by rematerializing the

View File

@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s
; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s
define i32 @test_jumptable(i32 %in) {
; CHECK: test_jumptable

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s
; This file contains tests for the AArch64 load/store optimizer.

View File

@ -1,4 +1,4 @@
; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s
; We've got the usual issues with LLVM reordering blocks here. The
; tests are correct for the current order, but who knows when that

View File

@ -1,4 +1,4 @@
; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
; RUN: llc -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 < %s | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "armv7-eabi"

View File

@ -1,4 +1,4 @@
; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp < %s | FileCheck %s
; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp -arm-atomic-cfg-tidy=0 < %s | FileCheck %s
; PR5423
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=arm1136jf-s | FileCheck %s
; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=arm1136jf-s -arm-atomic-cfg-tidy=0 | FileCheck %s
; Radar 8589805: Counting the number of microcoded operations, such as for an
; LDM instruction, was causing an assertion failure because the microop count
; was being treated as an instruction count.

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -asm-verbose=false -O3 -mtriple=armv6-apple-darwin -relocation-model=pic -mcpu=arm1136jf-s | FileCheck %s
; RUN: llc < %s -asm-verbose=false -O3 -mtriple=armv6-apple-darwin -relocation-model=pic -mcpu=arm1136jf-s -arm-atomic-cfg-tidy=0 | FileCheck %s
; rdar://8959122 illegal register operands for UMULL instruction
; in cfrac nightly test.
; Armv6 generates a umull that must write to two distinct destination regs.

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s
;CHECK-LABEL: foo:
;CHECK: adds

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
; RUN: llc < %s -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s
; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
target triple = "armv7--linux-gnueabi"

View File

@ -1,8 +1,8 @@
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck -check-prefix=ARM %s
; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck -check-prefix=THUMB %s
; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \
; RUN: llc -mtriple=arm-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=ARM %s
; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=THUMB %s
; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \
; RUN: | FileCheck -check-prefix=T2 %s
; RUN: llc -mtriple=thumbv8-eabi %s -o - | FileCheck -check-prefix=V8 %s
; RUN: llc -mtriple=thumbv8-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=V8 %s
; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.

View File

@ -189,9 +189,9 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2
; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3
; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: orrs [[MISMATCH_HI]], [[MISMATCH_LO]]
; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG1]], r2
; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG2]], r3
; CHECK-THUMB-LE: orrs [[MISMATCH_HI]], [[MISMATCH_LO]]
; CHECK-THUMB: bne
; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
; CHECK-THUMB: cmp

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=armv6-apple-ios5.0 -mattr=+vfp2 | FileCheck %s -check-prefix=CHECKV6
; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 \
; RUN: llc < %s -mtriple=armv6-apple-ios5.0 -mattr=+vfp2 -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=CHECKV6
; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=CHECKT2D
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-atomic-cfg-tidy=0 \
; RUN: | FileCheck %s -check-prefix=CHECKELF
; Enable tailcall optimization for iOS 5.0

View File

@ -0,0 +1,101 @@
; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s | FileCheck %s
define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
; CHECK-LABEL: test_return:
; CHECK: dmb ishst
; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
; CHECK: cmp [[LOADED]], r1
; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
; CHECK: cmp [[STATUS]], #0
; CHECK: bne [[LOOP]]
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
; CHECK: movs r0, #1
; CHECK: dmb ish
; CHECK: bx lr
; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
; CHECK: movs r0, #0
; CHECK: dmb ish
; CHECK: bx lr
%loaded = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
%success = icmp eq i32 %loaded, %oldval
%conv = zext i1 %success to i32
ret i32 %conv
}
define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
; CHECK-LABEL: test_return_bool:
; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
; CHECK: dmb ishst
; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldrexb [[LOADED:r[0-9]+]], [r0]
; CHECK: cmp [[LOADED]], [[OLDBYTE]]
; CHECK: itt ne
; CHECK: movne r0, #1
; CHECK: bxne lr
; CHECK: strexb [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
; CHECK: cmp [[STATUS]], #0
; CHECK: bne [[LOOP]]
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
; CHECK: movs r0, #0
; CHECK: bx lr
%loaded = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic
%failure = icmp ne i8 %loaded, %oldValue
ret i1 %failure
}
define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) {
; CHECK-LABEL: test_conditional:
; CHECK: dmb ishst
; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
; CHECK: cmp [[LOADED]], r1
; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
; CHECK: cmp [[STATUS]], #0
; CHECK: bne [[LOOP]]
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
; CHECK: dmb ish
; CHECK: b.w _bar
; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
; CHECK: dmb ish
; CHECK: b.w _baz
%loaded = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
%success = icmp eq i32 %loaded, %oldval
br i1 %success, label %true, label %false
true:
tail call void @bar() #2
br label %end
false:
tail call void @baz() #2
br label %end
end:
ret void
}
declare void @bar()
declare void @baz()

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=armv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s
define double @f1() nounwind {
; CHECK-LABEL: f1:

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o - | FileCheck %s
; RUN: llc -mtriple=arm-eabi -arm-atomic-cfg-tidy=0 -mattr=+v6,+vfp2 %s -o - | FileCheck %s
@i = weak global i32 0 ; <i32*> [#uses=2]
@u = weak global i32 0 ; <i32*> [#uses=2]

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv8 -print-machineinstrs=if-converter -o /dev/null 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8 -print-machineinstrs=if-converter -arm-atomic-cfg-tidy=0 -o /dev/null 2>&1 | FileCheck %s
%struct.S = type { i8* (i8*)*, [1 x i8] }
define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s
; RUN: llc < %s -mtriple=arm-apple-ios -arm-atomic-cfg-tidy=0 -mcpu=cortex-a9 | FileCheck %s
; rdar://8402126
; Make sure if-converter is not predicating vldmia and ldmia. These are
; micro-coded and would have long issue latency even if predicated on

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s
; If ARMBaseInstrInfo::AnalyzeBlocks returns the wrong value, which was possible
; for blocks with indirect branches, the IfConverter could end up deleting

View File

@ -1,4 +1,4 @@
; RUN: llc -regalloc=greedy < %s | FileCheck %s
; RUN: llc -regalloc=greedy -arm-atomic-cfg-tidy=0 < %s | FileCheck %s
; LSR shouldn't introduce more induction variables than needed, increasing
; register pressure and therefore spilling. There is more room for improvement

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc -mtriple=thumb-eabi -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=misched %s -o - 2>&1 | FileCheck %s
; RUN: llc -mtriple=thumb-eabi -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=misched -arm-atomic-cfg-tidy=0 %s -o - 2>&1 | FileCheck %s
;
; Loop counter copies should be eliminated.
; There is also a MUL here, but we don't care where it is scheduled.

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s
; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s
; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s
; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 -regalloc=basic | FileCheck %s
; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.
%struct.int16x8_t = type { <8 x i16> }

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon | FileCheck %s
; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon -arm-atomic-cfg-tidy=0 | FileCheck %s
; PR4789
%bar = type { float, float, float }

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mcpu=cortex-a15 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mcpu=cortex-a15 -verify-machineinstrs -arm-atomic-cfg-tidy=0 | FileCheck %s
; Check a spill right after a function call with large struct byval is correctly
; generated.

View File

@ -1,5 +1,5 @@
; Tests for the two-address instruction pass.
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -arm-atomic-cfg-tidy=0 %s -o - | FileCheck %s
define void @PR13378() nounwind {
; This was orriginally a crasher trying to schedule the instructions.

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=arm -mtriple=armv7-linux-gnueabihf -float-abi=hard -mcpu=cortex-a9 -O3 | FileCheck %s
; RUN: llc < %s -march=arm -mtriple=armv7-linux-gnueabihf -arm-atomic-cfg-tidy=0 -float-abi=hard -mcpu=cortex-a9 -O3 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32-S64"

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-atomic-cfg-tidy=0 | FileCheck %s
@csize = external global [100 x [20 x [4 x i8]]] ; <[100 x [20 x [4 x i8]]]*> [#uses=1]
@vsize = external global [100 x [20 x [4 x i8]]] ; <[100 x [20 x [4 x i8]]]*> [#uses=1]

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -arm-atomic-cfg-tidy=0 | FileCheck %s
; PR4659
; PR4682

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-cgp-branch-opts | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-cgp-branch-opts -arm-atomic-cfg-tidy=0 | FileCheck %s
%struct.pix_pos = type { i32, i32, i32, i32, i32, i32 }

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 -O3 | FileCheck %s
; rdar://7493908
; Make sure the result of the first dynamic_alloc isn't copied back to sp more

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -arm-atomic-cfg-tidy=0 | FileCheck %s
; rdar://8115404
; Tail merging must not split an IT block.

View File

@ -1,5 +1,5 @@
; rdar://8465407
; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s
%struct.buf = type opaque

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=thumbv7-apple-darwin10 < %s | FileCheck %s
; RUN: llc -mtriple=thumbv7-apple-darwin10 -arm-atomic-cfg-tidy=0 < %s | FileCheck %s
%struct.op = type { %struct.op*, %struct.op*, %struct.op* ()*, i32, i16, i16, i8, i8 }

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -O3 -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
; RUN: llc < %s -O3 -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 -mcpu=cortex-a8 | FileCheck %s
; Formerly crashed, 3573915.
define void @RotateStarsFP_Vec() nounwind {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s
define void @fht(float* nocapture %fz, i16 signext %n) nounwind {
; CHECK-LABEL: fht:

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
; rdar://7352504
; Make sure we use "str r9, [sp, #+28]" instead of "sub.w r4, r7, #256" followed by "str r9, [r4, #-32]".

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 -arm-atomic-cfg-tidy=0 | FileCheck %s
; If-conversion defeats the purpose of this test, which is to check
; conditional branch generation, so a call to make sure it doesn't
; happen and we get actual branches.

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s
; rdar://7354379
declare double @foo(double) nounwind readnone

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-default-it | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8-apple-ios -arm-no-restrict-it | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 -arm-default-it | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8-apple-ios -arm-atomic-cfg-tidy=0 -arm-no-restrict-it | FileCheck %s
define void @foo(i32 %X, i32 %Y) {
entry:

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8-apple-darwin -arm-no-restrict-it | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 -arm-default-it | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8-apple-darwin -arm-atomic-cfg-tidy=0 -arm-no-restrict-it | FileCheck %s
; There shouldn't be a unconditional branch at end of bb52.
; rdar://7184787

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon -arm-atomic-cfg-tidy=0 | FileCheck %s
; PR4789
%bar = type { float, float, float }

View File

@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it | FileCheck %s
; CHECK: it ne
; CHECK-NEXT: cmpne
; CHECK-NEXT: bne [[JUMPTARGET:.LBB[0-9]+_[0-9]+]]

View File

@ -1,4 +1,4 @@
; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - -arm-atomic-cfg-tidy=0 | FileCheck %s
;
; LSR should only check for valid address modes when the IV user is a
; memory address.