From 2498629e3446695a47b8317a1a68bf368d84d54c Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 10 May 2016 11:19:50 +0000 Subject: [PATCH] tsan: fix another crash due to processors Another stack where we try to free sync objects, but don't have a processors is: // ResetRange // __interceptor_munmap // __deallocate_stack // start_thread // clone Again, it is a latent bug that lead to memory leaks. Also, increase amount of memory we scan in MetaMap::ResetRange. Without that the test does not fail, as we fail to free the sync objects on stack. llvm-svn: 269041 --- compiler-rt/lib/tsan/rtl/tsan_interceptors.cc | 1 + compiler-rt/lib/tsan/rtl/tsan_mman.cc | 51 ++++++++++++------- compiler-rt/lib/tsan/rtl/tsan_rtl.h | 10 ++++ compiler-rt/lib/tsan/rtl/tsan_sync.cc | 13 +++-- compiler-rt/test/tsan/lots_of_threads.c | 30 +++++++++++ 5 files changed, 82 insertions(+), 23 deletions(-) create mode 100644 compiler-rt/test/tsan/lots_of_threads.c diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc b/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc index f662c447712b..7064194ab059 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc @@ -740,6 +740,7 @@ TSAN_INTERCEPTOR(int, munmap, void *addr, long_t sz) { if (sz != 0) { // If sz == 0, munmap will return EINVAL and don't unmap any memory. DontNeedShadowFor((uptr)addr, sz); + ScopedGlobalProcessor sgp; ctx->metamap.ResetRange(thr->proc(), (uptr)addr, (uptr)sz); } int res = REAL(munmap)(addr, sz); diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cc b/compiler-rt/lib/tsan/rtl/tsan_mman.cc index d3af9a08c19e..aa7198200b03 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_mman.cc +++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cc @@ -78,6 +78,38 @@ GlobalProc *global_proc() { return reinterpret_cast(&global_proc_placeholder); } +ScopedGlobalProcessor::ScopedGlobalProcessor() { + GlobalProc *gp = global_proc(); + ThreadState *thr = cur_thread(); + if (thr->proc()) + return; + // If we don't have a proc, use the global one. + // There are currently only two known case where this path is triggered: + // __interceptor_free + // __nptl_deallocate_tsd + // start_thread + // clone + // and: + // ResetRange + // __interceptor_munmap + // __deallocate_stack + // start_thread + // clone + // Ideally, we destroy thread state (and unwire proc) when a thread actually + // exits (i.e. when we join/wait it). Then we would not need the global proc + gp->mtx.Lock(); + ProcWire(gp->proc, thr); +} + +ScopedGlobalProcessor::~ScopedGlobalProcessor() { + GlobalProc *gp = global_proc(); + ThreadState *thr = cur_thread(); + if (thr->proc() != gp->proc) + return; + ProcUnwire(gp->proc, thr); + gp->mtx.Unlock(); +} + void InitializeAllocator() { allocator()->Init(common_flags()->allocator_may_return_null); } @@ -137,29 +169,12 @@ void *user_calloc(ThreadState *thr, uptr pc, uptr size, uptr n) { } void user_free(ThreadState *thr, uptr pc, void *p, bool signal) { - GlobalProc *gp = nullptr; - if (thr->proc() == nullptr) { - // If we don't have a proc, use the global one. - // There is currently only one known case where this path is triggered: - // __interceptor_free - // __nptl_deallocate_tsd - // start_thread - // clone - // Ideally, we destroy thread state (and unwire proc) when a thread actually - // exits (i.e. when we join/wait it). Then we would not need the global proc - gp = global_proc(); - gp->mtx.Lock(); - ProcWire(gp->proc, thr); - } + ScopedGlobalProcessor sgp; if (ctx && ctx->initialized) OnUserFree(thr, pc, (uptr)p, true); allocator()->Deallocate(&thr->proc()->alloc_cache, p); if (signal) SignalUnsafeCall(thr, pc); - if (gp) { - ProcUnwire(gp->proc, thr); - gp->mtx.Unlock(); - } } void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index aa8181586cc8..ff69015660b6 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -345,6 +345,16 @@ struct Processor { DDPhysicalThread *dd_pt; }; +#ifndef SANITIZER_GO +// ScopedGlobalProcessor temporary setups a global processor for the current +// thread, if it does not have one. Intended for interceptors that can run +// at the very thread end, when we already destroyed the thread processor. +struct ScopedGlobalProcessor { + ScopedGlobalProcessor(); + ~ScopedGlobalProcessor(); +}; +#endif + // This struct is stored in TLS. struct ThreadState { FastState fast_state; diff --git a/compiler-rt/lib/tsan/rtl/tsan_sync.cc b/compiler-rt/lib/tsan/rtl/tsan_sync.cc index 15759d4e09d4..50fe151a056e 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_sync.cc +++ b/compiler-rt/lib/tsan/rtl/tsan_sync.cc @@ -152,18 +152,21 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) { const uptr p0 = p; const uptr sz0 = sz; // Probe start of the range. - while (sz > 0) { + for (uptr checked = 0; sz > 0; checked += kPageSize) { bool has_something = FreeRange(proc, p, kPageSize); p += kPageSize; sz -= kPageSize; - if (!has_something) + if (!has_something && checked > (128 << 10)) break; } // Probe end of the range. - while (sz > 0) { - bool has_something = FreeRange(proc, p - kPageSize, kPageSize); + for (uptr checked = 0; sz > 0; checked += kPageSize) { + bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize); sz -= kPageSize; - if (!has_something) + // Stacks grow down, so sync object are most likely at the end of the region + // (if it is a stack). The very end of the stack is TLS and tsan increases + // TLS by at least 256K, so check at least 512K. + if (!has_something && checked > (512 << 10)) break; } // Finally, page out the whole range (including the parts that we've just diff --git a/compiler-rt/test/tsan/lots_of_threads.c b/compiler-rt/test/tsan/lots_of_threads.c new file mode 100644 index 000000000000..eef9b1cb036c --- /dev/null +++ b/compiler-rt/test/tsan/lots_of_threads.c @@ -0,0 +1,30 @@ +// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s +#include "test.h" + +void *thr(void *arg) { + // Create a sync object on stack, so there is something to free on thread end. + volatile int x; + __atomic_fetch_add(&x, 1, __ATOMIC_SEQ_CST); + barrier_wait(&barrier); + return 0; +} + +int main() { + const int kThreads = 10; + barrier_init(&barrier, kThreads + 1); + pthread_t t[kThreads]; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 16 << 20); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + for (int i = 0; i < kThreads; i++) + pthread_create(&t[i], &attr, thr, 0); + pthread_attr_destroy(&attr); + barrier_wait(&barrier); + sleep(1); + fprintf(stderr, "DONE\n"); + return 0; +} + +// CHECK: DONE +