asan: fix atomic operations on ARM
implement correct atomic load/store for ARM add test for atomic load/store http://llvm-reviews.chandlerc.com/D2582 llvm-svn: 199802
This commit is contained in:
parent
bc6659c4e9
commit
db1ad12ae2
|
@ -92,7 +92,8 @@ class AsanChunkFifoList: public IntrusiveList<AsanChunk> {
|
|||
|
||||
struct AsanThreadLocalMallocStorage {
|
||||
uptr quarantine_cache[16];
|
||||
uptr allocator2_cache[96 * (512 * 8 + 16)]; // Opaque.
|
||||
// Allocator cache contains atomic_uint64_t which must be 8-byte aligned.
|
||||
ALIGNED(8) uptr allocator2_cache[96 * (512 * 8 + 16)]; // Opaque.
|
||||
void CommitBack();
|
||||
private:
|
||||
// These objects are allocated via mmap() and are zero-initialized.
|
||||
|
|
|
@ -44,7 +44,8 @@ struct atomic_uint32_t {
|
|||
|
||||
struct atomic_uint64_t {
|
||||
typedef u64 Type;
|
||||
volatile Type val_dont_use;
|
||||
// On 32-bit platforms u64 is not necessary aligned on 8 bytes.
|
||||
volatile ALIGNED(8) Type val_dont_use;
|
||||
};
|
||||
|
||||
struct atomic_uintptr_t {
|
||||
|
|
|
@ -15,8 +15,26 @@
|
|||
#ifndef SANITIZER_ATOMIC_CLANG_H
|
||||
#define SANITIZER_ATOMIC_CLANG_H
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
# include "sanitizer_atomic_clang_x86.h"
|
||||
#else
|
||||
# include "sanitizer_atomic_clang_other.h"
|
||||
#endif
|
||||
|
||||
namespace __sanitizer {
|
||||
|
||||
// We would like to just use compiler builtin atomic operations
|
||||
// for loads and stores, but they are mostly broken in clang:
|
||||
// - they lead to vastly inefficient code generation
|
||||
// (http://llvm.org/bugs/show_bug.cgi?id=17281)
|
||||
// - 64-bit atomic operations are not implemented on x86_32
|
||||
// (http://llvm.org/bugs/show_bug.cgi?id=15034)
|
||||
// - they are not implemented on ARM
|
||||
// error: undefined reference to '__atomic_load_4'
|
||||
|
||||
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
|
||||
// for mappings of the memory model to different processors.
|
||||
|
||||
INLINE void atomic_signal_fence(memory_order) {
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
}
|
||||
|
@ -25,59 +43,6 @@ INLINE void atomic_thread_fence(memory_order) {
|
|||
__sync_synchronize();
|
||||
}
|
||||
|
||||
INLINE void proc_yield(int cnt) {
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
for (int i = 0; i < cnt; i++)
|
||||
__asm__ __volatile__("pause");
|
||||
#endif
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
INLINE typename T::Type atomic_load(
|
||||
const volatile T *a, memory_order mo) {
|
||||
DCHECK(mo & (memory_order_relaxed | memory_order_consume
|
||||
| memory_order_acquire | memory_order_seq_cst));
|
||||
DCHECK(!((uptr)a % sizeof(*a)));
|
||||
typename T::Type v;
|
||||
// FIXME:
|
||||
// 64-bit atomic operations are not atomic on 32-bit platforms.
|
||||
// The implementation lacks necessary memory fences on ARM/PPC.
|
||||
// We would like to use compiler builtin atomic operations,
|
||||
// but they are mostly broken:
|
||||
// - they lead to vastly inefficient code generation
|
||||
// (http://llvm.org/bugs/show_bug.cgi?id=17281)
|
||||
// - 64-bit atomic operations are not implemented on x86_32
|
||||
// (http://llvm.org/bugs/show_bug.cgi?id=15034)
|
||||
// - they are not implemented on ARM
|
||||
// error: undefined reference to '__atomic_load_4'
|
||||
if (mo == memory_order_relaxed) {
|
||||
v = a->val_dont_use;
|
||||
} else {
|
||||
atomic_signal_fence(memory_order_seq_cst);
|
||||
v = a->val_dont_use;
|
||||
atomic_signal_fence(memory_order_seq_cst);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
|
||||
DCHECK(mo & (memory_order_relaxed | memory_order_release
|
||||
| memory_order_seq_cst));
|
||||
DCHECK(!((uptr)a % sizeof(*a)));
|
||||
if (mo == memory_order_relaxed) {
|
||||
a->val_dont_use = v;
|
||||
} else {
|
||||
atomic_signal_fence(memory_order_seq_cst);
|
||||
a->val_dont_use = v;
|
||||
atomic_signal_fence(memory_order_seq_cst);
|
||||
}
|
||||
if (mo == memory_order_seq_cst)
|
||||
atomic_thread_fence(memory_order_seq_cst);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
INLINE typename T::Type atomic_fetch_add(volatile T *a,
|
||||
typename T::Type v, memory_order mo) {
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
//===-- sanitizer_atomic_clang_other.h --------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
|
||||
// Not intended for direct inclusion. Include sanitizer_atomic.h.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef SANITIZER_ATOMIC_CLANG_OTHER_H
|
||||
#define SANITIZER_ATOMIC_CLANG_OTHER_H
|
||||
|
||||
namespace __sanitizer {
|
||||
|
||||
INLINE void proc_yield(int cnt) {
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
INLINE typename T::Type atomic_load(
|
||||
const volatile T *a, memory_order mo) {
|
||||
DCHECK(mo & (memory_order_relaxed | memory_order_consume
|
||||
| memory_order_acquire | memory_order_seq_cst));
|
||||
DCHECK(!((uptr)a % sizeof(*a)));
|
||||
typename T::Type v;
|
||||
|
||||
if (sizeof(*a) < 8 || sizeof(void*) == 8) {
|
||||
// Assume that aligned loads are atomic.
|
||||
if (mo == memory_order_relaxed) {
|
||||
v = a->val_dont_use;
|
||||
} else if (mo == memory_order_consume) {
|
||||
// Assume that processor respects data dependencies
|
||||
// (and that compiler won't break them).
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
v = a->val_dont_use;
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
} else if (mo == memory_order_acquire) {
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
v = a->val_dont_use;
|
||||
__sync_synchronize();
|
||||
} else { // seq_cst
|
||||
// E.g. on POWER we need a hw fence even before the store.
|
||||
__sync_synchronize();
|
||||
v = a->val_dont_use;
|
||||
__sync_synchronize();
|
||||
}
|
||||
} else {
|
||||
// 64-bit load on 32-bit platform.
|
||||
// Gross, but simple and reliable.
|
||||
// Assume that it is not in read-only memory.
|
||||
v = __sync_fetch_and_add((typename T::Type volatile*)&a->val_dont_use, 0);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
|
||||
DCHECK(mo & (memory_order_relaxed | memory_order_release
|
||||
| memory_order_seq_cst));
|
||||
DCHECK(!((uptr)a % sizeof(*a)));
|
||||
|
||||
if (sizeof(*a) < 8 || sizeof(void*) == 8) {
|
||||
// Assume that aligned loads are atomic.
|
||||
if (mo == memory_order_relaxed) {
|
||||
a->val_dont_use = v;
|
||||
} else if (mo == memory_order_release) {
|
||||
__sync_synchronize();
|
||||
a->val_dont_use = v;
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
} else { // seq_cst
|
||||
__sync_synchronize();
|
||||
a->val_dont_use = v;
|
||||
__sync_synchronize();
|
||||
}
|
||||
} else {
|
||||
// 64-bit store on 32-bit platform.
|
||||
// Gross, but simple and reliable.
|
||||
typename T::Type cmp = a->val_dont_use;
|
||||
typename T::Type cur;
|
||||
for (;;) {
|
||||
cur = __sync_val_compare_and_swap(&a->val_dont_use, cmp, v);
|
||||
if (cmp == v)
|
||||
break;
|
||||
cmp = cur;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __sanitizer
|
||||
|
||||
#endif // #ifndef SANITIZER_ATOMIC_CLANG_OTHER_H
|
|
@ -0,0 +1,116 @@
|
|||
//===-- sanitizer_atomic_clang_x86.h ----------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
|
||||
// Not intended for direct inclusion. Include sanitizer_atomic.h.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef SANITIZER_ATOMIC_CLANG_X86_H
|
||||
#define SANITIZER_ATOMIC_CLANG_X86_H
|
||||
|
||||
namespace __sanitizer {
|
||||
|
||||
INLINE void proc_yield(int cnt) {
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
for (int i = 0; i < cnt; i++)
|
||||
__asm__ __volatile__("pause");
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
INLINE typename T::Type atomic_load(
|
||||
const volatile T *a, memory_order mo) {
|
||||
DCHECK(mo & (memory_order_relaxed | memory_order_consume
|
||||
| memory_order_acquire | memory_order_seq_cst));
|
||||
DCHECK(!((uptr)a % sizeof(*a)));
|
||||
typename T::Type v;
|
||||
|
||||
if (sizeof(*a) < 8 || sizeof(void*) == 8) {
|
||||
// Assume that aligned loads are atomic.
|
||||
if (mo == memory_order_relaxed) {
|
||||
v = a->val_dont_use;
|
||||
} else if (mo == memory_order_consume) {
|
||||
// Assume that processor respects data dependencies
|
||||
// (and that compiler won't break them).
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
v = a->val_dont_use;
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
} else if (mo == memory_order_acquire) {
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
v = a->val_dont_use;
|
||||
// On x86 loads are implicitly acquire.
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
} else { // seq_cst
|
||||
// On x86 plain MOV is enough for seq_cst store.
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
v = a->val_dont_use;
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
}
|
||||
} else {
|
||||
// 64-bit load on 32-bit platform.
|
||||
__asm__ __volatile__(
|
||||
"movq %1, %%mm0;" // Use mmx reg for 64-bit atomic moves
|
||||
"movq %%mm0, %0;" // (ptr could be read-only)
|
||||
"emms;" // Empty mmx state/Reset FP regs
|
||||
: "=m" (v)
|
||||
: "m" (a->val_dont_use)
|
||||
: // mark the FP stack and mmx registers as clobbered
|
||||
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
|
||||
#ifdef __MMX__
|
||||
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
|
||||
#endif // #ifdef __MMX__
|
||||
"memory");
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
|
||||
DCHECK(mo & (memory_order_relaxed | memory_order_release
|
||||
| memory_order_seq_cst));
|
||||
DCHECK(!((uptr)a % sizeof(*a)));
|
||||
|
||||
if (sizeof(*a) < 8 || sizeof(void*) == 8) {
|
||||
// Assume that aligned loads are atomic.
|
||||
if (mo == memory_order_relaxed) {
|
||||
a->val_dont_use = v;
|
||||
} else if (mo == memory_order_release) {
|
||||
// On x86 stores are implicitly release.
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
a->val_dont_use = v;
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
} else { // seq_cst
|
||||
// On x86 stores are implicitly release.
|
||||
__asm__ __volatile__("" ::: "memory");
|
||||
a->val_dont_use = v;
|
||||
__sync_synchronize();
|
||||
}
|
||||
} else {
|
||||
// 64-bit store on 32-bit platform.
|
||||
__asm__ __volatile__(
|
||||
"movq %1, %%mm0;" // Use mmx reg for 64-bit atomic moves
|
||||
"movq %%mm0, %0;"
|
||||
"emms;" // Empty mmx state/Reset FP regs
|
||||
: "=m" (a->val_dont_use)
|
||||
: "m" (v)
|
||||
: // mark the FP stack and mmx registers as clobbered
|
||||
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
|
||||
#ifdef __MMX__
|
||||
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
|
||||
#endif // #ifdef __MMX__
|
||||
"memory");
|
||||
if (mo == memory_order_seq_cst)
|
||||
__sync_synchronize();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __sanitizer
|
||||
|
||||
#endif // #ifndef SANITIZER_ATOMIC_CLANG_X86_H
|
|
@ -15,6 +15,79 @@
|
|||
|
||||
namespace __sanitizer {
|
||||
|
||||
template<typename T>
|
||||
struct ValAndMagic {
|
||||
typename T::Type magic0;
|
||||
T a;
|
||||
typename T::Type magic1;
|
||||
|
||||
static ValAndMagic<T> *sink;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
ValAndMagic<T> *ValAndMagic<T>::sink;
|
||||
|
||||
template<typename T, memory_order load_mo, memory_order store_mo>
|
||||
void CheckStoreLoad() {
|
||||
typedef typename T::Type Type;
|
||||
ValAndMagic<T> val;
|
||||
// Prevent the compiler from scalarizing the struct.
|
||||
ValAndMagic<T>::sink = &val;
|
||||
// Ensure that surrounding memory is not overwritten.
|
||||
val.magic0 = val.magic1 = (Type)-3;
|
||||
for (u64 i = 0; i < 100; i++) {
|
||||
// Generate a value that occupies all bytes of the variable.
|
||||
u64 v = i;
|
||||
v |= v << 8;
|
||||
v |= v << 16;
|
||||
v |= v << 32;
|
||||
val.a.val_dont_use = (Type)v;
|
||||
EXPECT_EQ(atomic_load(&val.a, load_mo), (Type)v);
|
||||
val.a.val_dont_use = (Type)-1;
|
||||
atomic_store(&val.a, (Type)v, store_mo);
|
||||
EXPECT_EQ(val.a.val_dont_use, (Type)v);
|
||||
}
|
||||
EXPECT_EQ(val.magic0, (Type)-3);
|
||||
EXPECT_EQ(val.magic1, (Type)-3);
|
||||
}
|
||||
|
||||
TEST(SanitizerCommon, AtomicStoreLoad) {
|
||||
CheckStoreLoad<atomic_uint8_t, memory_order_relaxed, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint8_t, memory_order_consume, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint8_t, memory_order_acquire, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint8_t, memory_order_relaxed, memory_order_release>();
|
||||
CheckStoreLoad<atomic_uint8_t, memory_order_seq_cst, memory_order_seq_cst>();
|
||||
|
||||
CheckStoreLoad<atomic_uint16_t, memory_order_relaxed, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint16_t, memory_order_consume, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint16_t, memory_order_acquire, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint16_t, memory_order_relaxed, memory_order_release>();
|
||||
CheckStoreLoad<atomic_uint16_t, memory_order_seq_cst, memory_order_seq_cst>();
|
||||
|
||||
CheckStoreLoad<atomic_uint32_t, memory_order_relaxed, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint32_t, memory_order_consume, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint32_t, memory_order_acquire, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint32_t, memory_order_relaxed, memory_order_release>();
|
||||
CheckStoreLoad<atomic_uint32_t, memory_order_seq_cst, memory_order_seq_cst>();
|
||||
|
||||
CheckStoreLoad<atomic_uint64_t, memory_order_relaxed, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint64_t, memory_order_consume, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint64_t, memory_order_acquire, memory_order_relaxed>();
|
||||
CheckStoreLoad<atomic_uint64_t, memory_order_relaxed, memory_order_release>();
|
||||
CheckStoreLoad<atomic_uint64_t, memory_order_seq_cst, memory_order_seq_cst>();
|
||||
|
||||
CheckStoreLoad<atomic_uintptr_t, memory_order_relaxed, memory_order_relaxed>
|
||||
();
|
||||
CheckStoreLoad<atomic_uintptr_t, memory_order_consume, memory_order_relaxed>
|
||||
();
|
||||
CheckStoreLoad<atomic_uintptr_t, memory_order_acquire, memory_order_relaxed>
|
||||
();
|
||||
CheckStoreLoad<atomic_uintptr_t, memory_order_relaxed, memory_order_release>
|
||||
();
|
||||
CheckStoreLoad<atomic_uintptr_t, memory_order_seq_cst, memory_order_seq_cst>
|
||||
();
|
||||
}
|
||||
|
||||
// Clang crashes while compiling this test for Android:
|
||||
// http://llvm.org/bugs/show_bug.cgi?id=15587
|
||||
#if !SANITIZER_ANDROID
|
||||
|
|
Loading…
Reference in New Issue