tsan: fix another crash due to processors

Another stack where we try to free sync objects,
but don't have a processors is:

  //   ResetRange
  //   __interceptor_munmap
  //   __deallocate_stack
  //   start_thread
  //   clone

Again, it is a latent bug that lead to memory leaks.
Also, increase amount of memory we scan in MetaMap::ResetRange.
Without that the test does not fail, as we fail to free
the sync objects on stack.

llvm-svn: 269041
This commit is contained in:
Dmitry Vyukov 2016-05-10 11:19:50 +00:00
parent 895774225a
commit 2498629e34
5 changed files with 82 additions and 23 deletions

View File

@ -740,6 +740,7 @@ TSAN_INTERCEPTOR(int, munmap, void *addr, long_t sz) {
if (sz != 0) {
// If sz == 0, munmap will return EINVAL and don't unmap any memory.
DontNeedShadowFor((uptr)addr, sz);
ScopedGlobalProcessor sgp;
ctx->metamap.ResetRange(thr->proc(), (uptr)addr, (uptr)sz);
}
int res = REAL(munmap)(addr, sz);

View File

@ -78,6 +78,38 @@ GlobalProc *global_proc() {
return reinterpret_cast<GlobalProc*>(&global_proc_placeholder);
}
ScopedGlobalProcessor::ScopedGlobalProcessor() {
GlobalProc *gp = global_proc();
ThreadState *thr = cur_thread();
if (thr->proc())
return;
// If we don't have a proc, use the global one.
// There are currently only two known case where this path is triggered:
// __interceptor_free
// __nptl_deallocate_tsd
// start_thread
// clone
// and:
// ResetRange
// __interceptor_munmap
// __deallocate_stack
// start_thread
// clone
// Ideally, we destroy thread state (and unwire proc) when a thread actually
// exits (i.e. when we join/wait it). Then we would not need the global proc
gp->mtx.Lock();
ProcWire(gp->proc, thr);
}
ScopedGlobalProcessor::~ScopedGlobalProcessor() {
GlobalProc *gp = global_proc();
ThreadState *thr = cur_thread();
if (thr->proc() != gp->proc)
return;
ProcUnwire(gp->proc, thr);
gp->mtx.Unlock();
}
void InitializeAllocator() {
allocator()->Init(common_flags()->allocator_may_return_null);
}
@ -137,29 +169,12 @@ void *user_calloc(ThreadState *thr, uptr pc, uptr size, uptr n) {
}
void user_free(ThreadState *thr, uptr pc, void *p, bool signal) {
GlobalProc *gp = nullptr;
if (thr->proc() == nullptr) {
// If we don't have a proc, use the global one.
// There is currently only one known case where this path is triggered:
// __interceptor_free
// __nptl_deallocate_tsd
// start_thread
// clone
// Ideally, we destroy thread state (and unwire proc) when a thread actually
// exits (i.e. when we join/wait it). Then we would not need the global proc
gp = global_proc();
gp->mtx.Lock();
ProcWire(gp->proc, thr);
}
ScopedGlobalProcessor sgp;
if (ctx && ctx->initialized)
OnUserFree(thr, pc, (uptr)p, true);
allocator()->Deallocate(&thr->proc()->alloc_cache, p);
if (signal)
SignalUnsafeCall(thr, pc);
if (gp) {
ProcUnwire(gp->proc, thr);
gp->mtx.Unlock();
}
}
void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {

View File

@ -345,6 +345,16 @@ struct Processor {
DDPhysicalThread *dd_pt;
};
#ifndef SANITIZER_GO
// ScopedGlobalProcessor temporary setups a global processor for the current
// thread, if it does not have one. Intended for interceptors that can run
// at the very thread end, when we already destroyed the thread processor.
struct ScopedGlobalProcessor {
ScopedGlobalProcessor();
~ScopedGlobalProcessor();
};
#endif
// This struct is stored in TLS.
struct ThreadState {
FastState fast_state;

View File

@ -152,18 +152,21 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) {
const uptr p0 = p;
const uptr sz0 = sz;
// Probe start of the range.
while (sz > 0) {
for (uptr checked = 0; sz > 0; checked += kPageSize) {
bool has_something = FreeRange(proc, p, kPageSize);
p += kPageSize;
sz -= kPageSize;
if (!has_something)
if (!has_something && checked > (128 << 10))
break;
}
// Probe end of the range.
while (sz > 0) {
bool has_something = FreeRange(proc, p - kPageSize, kPageSize);
for (uptr checked = 0; sz > 0; checked += kPageSize) {
bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize);
sz -= kPageSize;
if (!has_something)
// Stacks grow down, so sync object are most likely at the end of the region
// (if it is a stack). The very end of the stack is TLS and tsan increases
// TLS by at least 256K, so check at least 512K.
if (!has_something && checked > (512 << 10))
break;
}
// Finally, page out the whole range (including the parts that we've just

View File

@ -0,0 +1,30 @@
// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
#include "test.h"
void *thr(void *arg) {
// Create a sync object on stack, so there is something to free on thread end.
volatile int x;
__atomic_fetch_add(&x, 1, __ATOMIC_SEQ_CST);
barrier_wait(&barrier);
return 0;
}
int main() {
const int kThreads = 10;
barrier_init(&barrier, kThreads + 1);
pthread_t t[kThreads];
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setstacksize(&attr, 16 << 20);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
for (int i = 0; i < kThreads; i++)
pthread_create(&t[i], &attr, thr, 0);
pthread_attr_destroy(&attr);
barrier_wait(&barrier);
sleep(1);
fprintf(stderr, "DONE\n");
return 0;
}
// CHECK: DONE