[Power] Improve the expansion of atomic loads/stores

Summary:
Atomic loads and store of up to the native size (32 bits, or 64 for PPC64)
can be lowered to a simple load or store instruction (as the synchronization
is already handled by AtomicExpand, and the atomicity is guaranteed thanks to
the alignment requirements of atomic accesses). This is exactly what this patch
does. Previously, these were implemented by complex
load-linked/store-conditional loops.. an obvious performance problem.

For example, this patch turns
```
define void @store_i8_unordered(i8* %mem) {
  store atomic i8 42, i8* %mem unordered, align 1
  ret void
}
```
from
```
_store_i8_unordered:                    ; @store_i8_unordered
; BB#0:
    rlwinm r2, r3, 3, 27, 28
    li r4, 42
    xori r5, r2, 24
    rlwinm r2, r3, 0, 0, 29
    li r3, 255
    slw r4, r4, r5
    slw r3, r3, r5
    and r4, r4, r3
LBB4_1:                                 ; =>This Inner Loop Header: Depth=1
    lwarx r5, 0, r2
    andc r5, r5, r3
    or r5, r4, r5
    stwcx. r5, 0, r2
    bne cr0, LBB4_1
; BB#2:
    blr
```
into
```
_store_i8_unordered:                    ; @store_i8_unordered
; BB#0:
    li r2, 42
    stb r2, 0(r3)
    blr

```
which looks like a pretty clear win to me.

Test Plan:
fixed the tests + new test for indexed accesses + make check-all

Reviewers: jfb, wschmidt, hfinkel

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D5587

llvm-svn: 218922
This commit is contained in:
Robin Morisset 2014-10-02 22:27:07 +00:00
parent 7425c8c279
commit e1ca44bd4c
7 changed files with 127 additions and 10 deletions

View File

@ -613,10 +613,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
} }
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); if (!isPPC64) {
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); }
setBooleanContents(ZeroOrOneBooleanContent); setBooleanContents(ZeroOrOneBooleanContent);
// Altivec instructions set fields to all zeros or all ones. // Altivec instructions set fields to all zeros or all ones.

View File

@ -1135,3 +1135,9 @@ def : Pat<(i64 (unaligned4load xoaddr:$src)),
def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
(STDX $rS, xoaddr:$dst)>; (STDX $rS, xoaddr:$dst)>;
// 64-bits atomic loads and stores
def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>;
def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>;
def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;

View File

@ -3695,3 +3695,19 @@ defm : TrapExtendedMnemonic<"lgt", 1>;
defm : TrapExtendedMnemonic<"lnl", 5>; defm : TrapExtendedMnemonic<"lnl", 5>;
defm : TrapExtendedMnemonic<"lng", 6>; defm : TrapExtendedMnemonic<"lng", 6>;
defm : TrapExtendedMnemonic<"u", 31>; defm : TrapExtendedMnemonic<"u", 31>;
// Atomic loads
def : Pat<(atomic_load_8 iaddr:$src), (LBZ memri:$src)>;
def : Pat<(atomic_load_16 iaddr:$src), (LHZ memri:$src)>;
def : Pat<(atomic_load_32 iaddr:$src), (LWZ memri:$src)>;
def : Pat<(atomic_load_8 xaddr:$src), (LBZX memrr:$src)>;
def : Pat<(atomic_load_16 xaddr:$src), (LHZX memrr:$src)>;
def : Pat<(atomic_load_32 xaddr:$src), (LWZX memrr:$src)>;
// Atomic stores
def : Pat<(atomic_store_8 iaddr:$ptr, i32:$val), (STB gprc:$val, memri:$ptr)>;
def : Pat<(atomic_store_16 iaddr:$ptr, i32:$val), (STH gprc:$val, memri:$ptr)>;
def : Pat<(atomic_store_32 iaddr:$ptr, i32:$val), (STW gprc:$val, memri:$ptr)>;
def : Pat<(atomic_store_8 xaddr:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>;
def : Pat<(atomic_store_16 xaddr:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>;
def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>;

View File

@ -30,8 +30,9 @@ define void @atomic_store(i64* %mem, i64 %val) nounwind {
entry: entry:
; CHECK: @atomic_store ; CHECK: @atomic_store
store atomic i64 %val, i64* %mem release, align 64 store atomic i64 %val, i64* %mem release, align 64
; CHECK: ldarx ; CHECK: sync 1
; CHECK: stdcx. ; CHECK-NOT: stdcx
; CHECK: std
ret void ret void
} }
@ -39,9 +40,9 @@ define i64 @atomic_load(i64* %mem) nounwind {
entry: entry:
; CHECK: @atomic_load ; CHECK: @atomic_load
%tmp = load atomic i64* %mem acquire, align 64 %tmp = load atomic i64* %mem acquire, align 64
; CHECK: ldarx ; CHECK-NOT: ldarx
; CHECK: stdcx. ; CHECK: ld
; CHECK: stdcx. ; CHECK: sync 1
ret i64 %tmp ret i64 %tmp
} }

View File

@ -0,0 +1,81 @@
; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction).
; This is already checked for in Atomics-64.ll
; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc64 | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
; In this file, we check that atomic load/store can make use of the indexed
; versions of the instructions.
; Indexed version of loads
define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) {
; CHECK-LABEL: load_x_i8_seq_cst
; CHECK: sync 0
; CHECK: lbzx
; CHECK: sync 1
%ptr = getelementptr inbounds [100000 x i8]* %mem, i64 0, i64 90000
%val = load atomic i8* %ptr seq_cst, align 1
ret i8 %val
}
define i16 @load_x_i16_acquire([100000 x i16]* %mem) {
; CHECK-LABEL: load_x_i16_acquire
; CHECK: lhzx
; CHECK: sync 1
%ptr = getelementptr inbounds [100000 x i16]* %mem, i64 0, i64 90000
%val = load atomic i16* %ptr acquire, align 2
ret i16 %val
}
define i32 @load_x_i32_monotonic([100000 x i32]* %mem) {
; CHECK-LABEL: load_x_i32_monotonic
; CHECK: lwzx
; CHECK-NOT: sync
%ptr = getelementptr inbounds [100000 x i32]* %mem, i64 0, i64 90000
%val = load atomic i32* %ptr monotonic, align 4
ret i32 %val
}
define i64 @load_x_i64_unordered([100000 x i64]* %mem) {
; CHECK-LABEL: load_x_i64_unordered
; PPC32: __sync_
; PPC64-NOT: __sync_
; PPC64: ldx
; CHECK-NOT: sync
%ptr = getelementptr inbounds [100000 x i64]* %mem, i64 0, i64 90000
%val = load atomic i64* %ptr unordered, align 8
ret i64 %val
}
; Indexed version of stores
define void @store_x_i8_seq_cst([100000 x i8]* %mem) {
; CHECK-LABEL: store_x_i8_seq_cst
; CHECK: sync 0
; CHECK: stbx
%ptr = getelementptr inbounds [100000 x i8]* %mem, i64 0, i64 90000
store atomic i8 42, i8* %ptr seq_cst, align 1
ret void
}
define void @store_x_i16_release([100000 x i16]* %mem) {
; CHECK-LABEL: store_x_i16_release
; CHECK: sync 1
; CHECK: sthx
%ptr = getelementptr inbounds [100000 x i16]* %mem, i64 0, i64 90000
store atomic i16 42, i16* %ptr release, align 2
ret void
}
define void @store_x_i32_monotonic([100000 x i32]* %mem) {
; CHECK-LABEL: store_x_i32_monotonic
; CHECK-NOT: sync
; CHECK: stwx
%ptr = getelementptr inbounds [100000 x i32]* %mem, i64 0, i64 90000
store atomic i32 42, i32* %ptr monotonic, align 4
ret void
}
define void @store_x_i64_unordered([100000 x i64]* %mem) {
; CHECK-LABEL: store_x_i64_unordered
; CHECK-NOT: sync 0
; CHECK-NOT: sync 1
; PPC32: __sync_
; PPC64-NOT: __sync_
; PPC64: stdx
%ptr = getelementptr inbounds [100000 x i64]* %mem, i64 0, i64 90000
store atomic i64 42, i64* %ptr unordered, align 8
ret void
}

View File

@ -11,18 +11,21 @@
; We also vary orderings to check for barriers. ; We also vary orderings to check for barriers.
define i8 @load_i8_unordered(i8* %mem) { define i8 @load_i8_unordered(i8* %mem) {
; CHECK-LABEL: load_i8_unordered ; CHECK-LABEL: load_i8_unordered
; CHECK: lbz
; CHECK-NOT: sync ; CHECK-NOT: sync
%val = load atomic i8* %mem unordered, align 1 %val = load atomic i8* %mem unordered, align 1
ret i8 %val ret i8 %val
} }
define i16 @load_i16_monotonic(i16* %mem) { define i16 @load_i16_monotonic(i16* %mem) {
; CHECK-LABEL: load_i16_monotonic ; CHECK-LABEL: load_i16_monotonic
; CHECK: lhz
; CHECK-NOT: sync ; CHECK-NOT: sync
%val = load atomic i16* %mem monotonic, align 2 %val = load atomic i16* %mem monotonic, align 2
ret i16 %val ret i16 %val
} }
define i32 @load_i32_acquire(i32* %mem) { define i32 @load_i32_acquire(i32* %mem) {
; CHECK-LABEL: load_i32_acquire ; CHECK-LABEL: load_i32_acquire
; CHECK: lwz
%val = load atomic i32* %mem acquire, align 4 %val = load atomic i32* %mem acquire, align 4
; CHECK: sync 1 ; CHECK: sync 1
ret i32 %val ret i32 %val
@ -30,6 +33,9 @@ define i32 @load_i32_acquire(i32* %mem) {
define i64 @load_i64_seq_cst(i64* %mem) { define i64 @load_i64_seq_cst(i64* %mem) {
; CHECK-LABEL: load_i64_seq_cst ; CHECK-LABEL: load_i64_seq_cst
; CHECK: sync 0 ; CHECK: sync 0
; PPC32: __sync_
; PPC64-NOT: __sync_
; PPC64: ld
%val = load atomic i64* %mem seq_cst, align 8 %val = load atomic i64* %mem seq_cst, align 8
; CHECK: sync 1 ; CHECK: sync 1
ret i64 %val ret i64 %val
@ -39,24 +45,30 @@ define i64 @load_i64_seq_cst(i64* %mem) {
define void @store_i8_unordered(i8* %mem) { define void @store_i8_unordered(i8* %mem) {
; CHECK-LABEL: store_i8_unordered ; CHECK-LABEL: store_i8_unordered
; CHECK-NOT: sync ; CHECK-NOT: sync
; CHECK: stb
store atomic i8 42, i8* %mem unordered, align 1 store atomic i8 42, i8* %mem unordered, align 1
ret void ret void
} }
define void @store_i16_monotonic(i16* %mem) { define void @store_i16_monotonic(i16* %mem) {
; CHECK-LABEL: store_i16_monotonic ; CHECK-LABEL: store_i16_monotonic
; CHECK-NOT: sync ; CHECK-NOT: sync
; CHECK: sth
store atomic i16 42, i16* %mem monotonic, align 2 store atomic i16 42, i16* %mem monotonic, align 2
ret void ret void
} }
define void @store_i32_release(i32* %mem) { define void @store_i32_release(i32* %mem) {
; CHECK-LABEL: store_i32_release ; CHECK-LABEL: store_i32_release
; CHECK: sync 1 ; CHECK: sync 1
; CHECK: stw
store atomic i32 42, i32* %mem release, align 4 store atomic i32 42, i32* %mem release, align 4
ret void ret void
} }
define void @store_i64_seq_cst(i64* %mem) { define void @store_i64_seq_cst(i64* %mem) {
; CHECK-LABEL: store_i64_seq_cst ; CHECK-LABEL: store_i64_seq_cst
; CHECK: sync 0 ; CHECK: sync 0
; PPC32: __sync_
; PPC64-NOT: __sync_
; PPC64: std
store atomic i64 42, i64* %mem seq_cst, align 8 store atomic i64 42, i64* %mem seq_cst, align 8
ret void ret void
} }

View File

@ -13,4 +13,5 @@ entry:
ret void ret void
} }
; CHECK: stwcx. ; CHECK: sync
; CHECK: stb