asan: fix atomic operations on ARM

implement correct atomic load/store for ARM
add test for atomic load/store
http://llvm-reviews.chandlerc.com/D2582

llvm-svn: 199802
This commit is contained in:
Dmitry Vyukov 2014-01-22 14:13:37 +00:00
parent bc6659c4e9
commit db1ad12ae2
6 changed files with 307 additions and 55 deletions

View File

@ -92,7 +92,8 @@ class AsanChunkFifoList: public IntrusiveList<AsanChunk> {
struct AsanThreadLocalMallocStorage {
uptr quarantine_cache[16];
uptr allocator2_cache[96 * (512 * 8 + 16)]; // Opaque.
// Allocator cache contains atomic_uint64_t which must be 8-byte aligned.
ALIGNED(8) uptr allocator2_cache[96 * (512 * 8 + 16)]; // Opaque.
void CommitBack();
private:
// These objects are allocated via mmap() and are zero-initialized.

View File

@ -44,7 +44,8 @@ struct atomic_uint32_t {
struct atomic_uint64_t {
typedef u64 Type;
volatile Type val_dont_use;
// On 32-bit platforms u64 is not necessary aligned on 8 bytes.
volatile ALIGNED(8) Type val_dont_use;
};
struct atomic_uintptr_t {

View File

@ -15,8 +15,26 @@
#ifndef SANITIZER_ATOMIC_CLANG_H
#define SANITIZER_ATOMIC_CLANG_H
#if defined(__i386__) || defined(__x86_64__)
# include "sanitizer_atomic_clang_x86.h"
#else
# include "sanitizer_atomic_clang_other.h"
#endif
namespace __sanitizer {
// We would like to just use compiler builtin atomic operations
// for loads and stores, but they are mostly broken in clang:
// - they lead to vastly inefficient code generation
// (http://llvm.org/bugs/show_bug.cgi?id=17281)
// - 64-bit atomic operations are not implemented on x86_32
// (http://llvm.org/bugs/show_bug.cgi?id=15034)
// - they are not implemented on ARM
// error: undefined reference to '__atomic_load_4'
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
// for mappings of the memory model to different processors.
INLINE void atomic_signal_fence(memory_order) {
__asm__ __volatile__("" ::: "memory");
}
@ -25,59 +43,6 @@ INLINE void atomic_thread_fence(memory_order) {
__sync_synchronize();
}
INLINE void proc_yield(int cnt) {
__asm__ __volatile__("" ::: "memory");
#if defined(__i386__) || defined(__x86_64__)
for (int i = 0; i < cnt; i++)
__asm__ __volatile__("pause");
#endif
__asm__ __volatile__("" ::: "memory");
}
template<typename T>
INLINE typename T::Type atomic_load(
const volatile T *a, memory_order mo) {
DCHECK(mo & (memory_order_relaxed | memory_order_consume
| memory_order_acquire | memory_order_seq_cst));
DCHECK(!((uptr)a % sizeof(*a)));
typename T::Type v;
// FIXME:
// 64-bit atomic operations are not atomic on 32-bit platforms.
// The implementation lacks necessary memory fences on ARM/PPC.
// We would like to use compiler builtin atomic operations,
// but they are mostly broken:
// - they lead to vastly inefficient code generation
// (http://llvm.org/bugs/show_bug.cgi?id=17281)
// - 64-bit atomic operations are not implemented on x86_32
// (http://llvm.org/bugs/show_bug.cgi?id=15034)
// - they are not implemented on ARM
// error: undefined reference to '__atomic_load_4'
if (mo == memory_order_relaxed) {
v = a->val_dont_use;
} else {
atomic_signal_fence(memory_order_seq_cst);
v = a->val_dont_use;
atomic_signal_fence(memory_order_seq_cst);
}
return v;
}
template<typename T>
INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
DCHECK(mo & (memory_order_relaxed | memory_order_release
| memory_order_seq_cst));
DCHECK(!((uptr)a % sizeof(*a)));
if (mo == memory_order_relaxed) {
a->val_dont_use = v;
} else {
atomic_signal_fence(memory_order_seq_cst);
a->val_dont_use = v;
atomic_signal_fence(memory_order_seq_cst);
}
if (mo == memory_order_seq_cst)
atomic_thread_fence(memory_order_seq_cst);
}
template<typename T>
INLINE typename T::Type atomic_fetch_add(volatile T *a,
typename T::Type v, memory_order mo) {

View File

@ -0,0 +1,96 @@
//===-- sanitizer_atomic_clang_other.h --------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
// Not intended for direct inclusion. Include sanitizer_atomic.h.
//
//===----------------------------------------------------------------------===//
#ifndef SANITIZER_ATOMIC_CLANG_OTHER_H
#define SANITIZER_ATOMIC_CLANG_OTHER_H
namespace __sanitizer {
INLINE void proc_yield(int cnt) {
__asm__ __volatile__("" ::: "memory");
}
template<typename T>
INLINE typename T::Type atomic_load(
const volatile T *a, memory_order mo) {
DCHECK(mo & (memory_order_relaxed | memory_order_consume
| memory_order_acquire | memory_order_seq_cst));
DCHECK(!((uptr)a % sizeof(*a)));
typename T::Type v;
if (sizeof(*a) < 8 || sizeof(void*) == 8) {
// Assume that aligned loads are atomic.
if (mo == memory_order_relaxed) {
v = a->val_dont_use;
} else if (mo == memory_order_consume) {
// Assume that processor respects data dependencies
// (and that compiler won't break them).
__asm__ __volatile__("" ::: "memory");
v = a->val_dont_use;
__asm__ __volatile__("" ::: "memory");
} else if (mo == memory_order_acquire) {
__asm__ __volatile__("" ::: "memory");
v = a->val_dont_use;
__sync_synchronize();
} else { // seq_cst
// E.g. on POWER we need a hw fence even before the store.
__sync_synchronize();
v = a->val_dont_use;
__sync_synchronize();
}
} else {
// 64-bit load on 32-bit platform.
// Gross, but simple and reliable.
// Assume that it is not in read-only memory.
v = __sync_fetch_and_add((typename T::Type volatile*)&a->val_dont_use, 0);
}
return v;
}
template<typename T>
INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
DCHECK(mo & (memory_order_relaxed | memory_order_release
| memory_order_seq_cst));
DCHECK(!((uptr)a % sizeof(*a)));
if (sizeof(*a) < 8 || sizeof(void*) == 8) {
// Assume that aligned loads are atomic.
if (mo == memory_order_relaxed) {
a->val_dont_use = v;
} else if (mo == memory_order_release) {
__sync_synchronize();
a->val_dont_use = v;
__asm__ __volatile__("" ::: "memory");
} else { // seq_cst
__sync_synchronize();
a->val_dont_use = v;
__sync_synchronize();
}
} else {
// 64-bit store on 32-bit platform.
// Gross, but simple and reliable.
typename T::Type cmp = a->val_dont_use;
typename T::Type cur;
for (;;) {
cur = __sync_val_compare_and_swap(&a->val_dont_use, cmp, v);
if (cmp == v)
break;
cmp = cur;
}
}
}
} // namespace __sanitizer
#endif // #ifndef SANITIZER_ATOMIC_CLANG_OTHER_H

View File

@ -0,0 +1,116 @@
//===-- sanitizer_atomic_clang_x86.h ----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
// Not intended for direct inclusion. Include sanitizer_atomic.h.
//
//===----------------------------------------------------------------------===//
#ifndef SANITIZER_ATOMIC_CLANG_X86_H
#define SANITIZER_ATOMIC_CLANG_X86_H
namespace __sanitizer {
INLINE void proc_yield(int cnt) {
__asm__ __volatile__("" ::: "memory");
for (int i = 0; i < cnt; i++)
__asm__ __volatile__("pause");
__asm__ __volatile__("" ::: "memory");
}
template<typename T>
INLINE typename T::Type atomic_load(
const volatile T *a, memory_order mo) {
DCHECK(mo & (memory_order_relaxed | memory_order_consume
| memory_order_acquire | memory_order_seq_cst));
DCHECK(!((uptr)a % sizeof(*a)));
typename T::Type v;
if (sizeof(*a) < 8 || sizeof(void*) == 8) {
// Assume that aligned loads are atomic.
if (mo == memory_order_relaxed) {
v = a->val_dont_use;
} else if (mo == memory_order_consume) {
// Assume that processor respects data dependencies
// (and that compiler won't break them).
__asm__ __volatile__("" ::: "memory");
v = a->val_dont_use;
__asm__ __volatile__("" ::: "memory");
} else if (mo == memory_order_acquire) {
__asm__ __volatile__("" ::: "memory");
v = a->val_dont_use;
// On x86 loads are implicitly acquire.
__asm__ __volatile__("" ::: "memory");
} else { // seq_cst
// On x86 plain MOV is enough for seq_cst store.
__asm__ __volatile__("" ::: "memory");
v = a->val_dont_use;
__asm__ __volatile__("" ::: "memory");
}
} else {
// 64-bit load on 32-bit platform.
__asm__ __volatile__(
"movq %1, %%mm0;" // Use mmx reg for 64-bit atomic moves
"movq %%mm0, %0;" // (ptr could be read-only)
"emms;" // Empty mmx state/Reset FP regs
: "=m" (v)
: "m" (a->val_dont_use)
: // mark the FP stack and mmx registers as clobbered
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
#ifdef __MMX__
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
#endif // #ifdef __MMX__
"memory");
}
return v;
}
template<typename T>
INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
DCHECK(mo & (memory_order_relaxed | memory_order_release
| memory_order_seq_cst));
DCHECK(!((uptr)a % sizeof(*a)));
if (sizeof(*a) < 8 || sizeof(void*) == 8) {
// Assume that aligned loads are atomic.
if (mo == memory_order_relaxed) {
a->val_dont_use = v;
} else if (mo == memory_order_release) {
// On x86 stores are implicitly release.
__asm__ __volatile__("" ::: "memory");
a->val_dont_use = v;
__asm__ __volatile__("" ::: "memory");
} else { // seq_cst
// On x86 stores are implicitly release.
__asm__ __volatile__("" ::: "memory");
a->val_dont_use = v;
__sync_synchronize();
}
} else {
// 64-bit store on 32-bit platform.
__asm__ __volatile__(
"movq %1, %%mm0;" // Use mmx reg for 64-bit atomic moves
"movq %%mm0, %0;"
"emms;" // Empty mmx state/Reset FP regs
: "=m" (a->val_dont_use)
: "m" (v)
: // mark the FP stack and mmx registers as clobbered
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
#ifdef __MMX__
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
#endif // #ifdef __MMX__
"memory");
if (mo == memory_order_seq_cst)
__sync_synchronize();
}
}
} // namespace __sanitizer
#endif // #ifndef SANITIZER_ATOMIC_CLANG_X86_H

View File

@ -15,6 +15,79 @@
namespace __sanitizer {
template<typename T>
struct ValAndMagic {
typename T::Type magic0;
T a;
typename T::Type magic1;
static ValAndMagic<T> *sink;
};
template<typename T>
ValAndMagic<T> *ValAndMagic<T>::sink;
template<typename T, memory_order load_mo, memory_order store_mo>
void CheckStoreLoad() {
typedef typename T::Type Type;
ValAndMagic<T> val;
// Prevent the compiler from scalarizing the struct.
ValAndMagic<T>::sink = &val;
// Ensure that surrounding memory is not overwritten.
val.magic0 = val.magic1 = (Type)-3;
for (u64 i = 0; i < 100; i++) {
// Generate a value that occupies all bytes of the variable.
u64 v = i;
v |= v << 8;
v |= v << 16;
v |= v << 32;
val.a.val_dont_use = (Type)v;
EXPECT_EQ(atomic_load(&val.a, load_mo), (Type)v);
val.a.val_dont_use = (Type)-1;
atomic_store(&val.a, (Type)v, store_mo);
EXPECT_EQ(val.a.val_dont_use, (Type)v);
}
EXPECT_EQ(val.magic0, (Type)-3);
EXPECT_EQ(val.magic1, (Type)-3);
}
TEST(SanitizerCommon, AtomicStoreLoad) {
CheckStoreLoad<atomic_uint8_t, memory_order_relaxed, memory_order_relaxed>();
CheckStoreLoad<atomic_uint8_t, memory_order_consume, memory_order_relaxed>();
CheckStoreLoad<atomic_uint8_t, memory_order_acquire, memory_order_relaxed>();
CheckStoreLoad<atomic_uint8_t, memory_order_relaxed, memory_order_release>();
CheckStoreLoad<atomic_uint8_t, memory_order_seq_cst, memory_order_seq_cst>();
CheckStoreLoad<atomic_uint16_t, memory_order_relaxed, memory_order_relaxed>();
CheckStoreLoad<atomic_uint16_t, memory_order_consume, memory_order_relaxed>();
CheckStoreLoad<atomic_uint16_t, memory_order_acquire, memory_order_relaxed>();
CheckStoreLoad<atomic_uint16_t, memory_order_relaxed, memory_order_release>();
CheckStoreLoad<atomic_uint16_t, memory_order_seq_cst, memory_order_seq_cst>();
CheckStoreLoad<atomic_uint32_t, memory_order_relaxed, memory_order_relaxed>();
CheckStoreLoad<atomic_uint32_t, memory_order_consume, memory_order_relaxed>();
CheckStoreLoad<atomic_uint32_t, memory_order_acquire, memory_order_relaxed>();
CheckStoreLoad<atomic_uint32_t, memory_order_relaxed, memory_order_release>();
CheckStoreLoad<atomic_uint32_t, memory_order_seq_cst, memory_order_seq_cst>();
CheckStoreLoad<atomic_uint64_t, memory_order_relaxed, memory_order_relaxed>();
CheckStoreLoad<atomic_uint64_t, memory_order_consume, memory_order_relaxed>();
CheckStoreLoad<atomic_uint64_t, memory_order_acquire, memory_order_relaxed>();
CheckStoreLoad<atomic_uint64_t, memory_order_relaxed, memory_order_release>();
CheckStoreLoad<atomic_uint64_t, memory_order_seq_cst, memory_order_seq_cst>();
CheckStoreLoad<atomic_uintptr_t, memory_order_relaxed, memory_order_relaxed>
();
CheckStoreLoad<atomic_uintptr_t, memory_order_consume, memory_order_relaxed>
();
CheckStoreLoad<atomic_uintptr_t, memory_order_acquire, memory_order_relaxed>
();
CheckStoreLoad<atomic_uintptr_t, memory_order_relaxed, memory_order_release>
();
CheckStoreLoad<atomic_uintptr_t, memory_order_seq_cst, memory_order_seq_cst>
();
}
// Clang crashes while compiling this test for Android:
// http://llvm.org/bugs/show_bug.cgi?id=15587
#if !SANITIZER_ANDROID