From 3afc797e42f8c745cbd4e2049c5e81c64fef5001 Mon Sep 17 00:00:00 2001 From: Kostya Kortchinsky Date: Tue, 14 Aug 2018 18:34:52 +0000 Subject: [PATCH] [scudo] Fix race condition in deallocation path when Quarantine is bypassed Summary: There is a race window in the deallocation path when the Quarantine is bypassed. Initially we would just erase the header of a chunk if we were not to use the Quarantine, as opposed to using a compare-exchange primitive, to make things faster. It turned out to be a poor decision, as 2 threads (or more) could simultaneously deallocate the same pointer, and if the checks were to done before the header got erased, this would result in the pointer being added twice (or more) to distinct thread caches, and eventually be reused. Winning the race is not trivial but can happen with enough control over the allocation primitives. The repro added attempts to trigger the bug, with a moderate success rate, but it should be enough to notice if the bug ever make its way back into the code. Since I am changing things in this file, there are 2 smaller changes tagging along, marking a variable `const`, and improving the Quarantine bypass test at runtime. Reviewers: alekseyshl, eugenis, kcc, vitalybuka Reviewed By: eugenis, vitalybuka Subscribers: delcypher, #sanitizers, llvm-commits Differential Revision: https://reviews.llvm.org/D50655 llvm-svn: 339705 --- compiler-rt/lib/scudo/scudo_allocator.cpp | 12 ++-- compiler-rt/test/scudo/dealloc-race.c | 69 +++++++++++++++++++++++ 2 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 compiler-rt/test/scudo/dealloc-race.c diff --git a/compiler-rt/lib/scudo/scudo_allocator.cpp b/compiler-rt/lib/scudo/scudo_allocator.cpp index 4a11bf5fcc21..df91fdc80cf5 100644 --- a/compiler-rt/lib/scudo/scudo_allocator.cpp +++ b/compiler-rt/lib/scudo/scudo_allocator.cpp @@ -264,7 +264,8 @@ struct Allocator { Quarantine.Init( static_cast(getFlags()->QuarantineSizeKb) << 10, static_cast(getFlags()->ThreadLocalQuarantineSizeKb) << 10); - QuarantineChunksUpToSize = getFlags()->QuarantineChunksUpToSize; + QuarantineChunksUpToSize = (Quarantine.GetCacheSize() == 0) ? 0 : + getFlags()->QuarantineChunksUpToSize; DeallocationTypeMismatch = getFlags()->DeallocationTypeMismatch; DeleteSizeMismatch = getFlags()->DeleteSizeMismatch; ZeroContents = getFlags()->ZeroContents; @@ -389,10 +390,11 @@ struct Allocator { // quarantine chunk size threshold. void quarantineOrDeallocateChunk(void *Ptr, UnpackedHeader *Header, uptr Size) { - const bool BypassQuarantine = (Quarantine.GetCacheSize() == 0) || - (Size > QuarantineChunksUpToSize); + const bool BypassQuarantine = !Size || (Size > QuarantineChunksUpToSize); if (BypassQuarantine) { - Chunk::eraseHeader(Ptr); + UnpackedHeader NewHeader = *Header; + NewHeader.State = ChunkAvailable; + Chunk::compareExchangeHeader(Ptr, &NewHeader, Header); void *BackendPtr = Chunk::getBackendPtr(Ptr, Header); if (Header->ClassId) { bool UnlockRequired; @@ -675,7 +677,7 @@ void *scudoValloc(uptr Size) { } void *scudoPvalloc(uptr Size) { - uptr PageSize = GetPageSizeCached(); + const uptr PageSize = GetPageSizeCached(); if (UNLIKELY(CheckForPvallocOverflow(Size, PageSize))) { errno = ENOMEM; if (Instance.canReturnNull()) diff --git a/compiler-rt/test/scudo/dealloc-race.c b/compiler-rt/test/scudo/dealloc-race.c new file mode 100644 index 000000000000..326e22f27373 --- /dev/null +++ b/compiler-rt/test/scudo/dealloc-race.c @@ -0,0 +1,69 @@ +// RUN: %clang_scudo %s -O2 -o %t +// RUN: %env_scudo_opts="QuarantineChunksUpToSize=0" %run %t 2>&1 + +// This test attempts to reproduce a race condition in the deallocation path +// when bypassing the Quarantine. The old behavior was to zero-out the chunk +// header after checking its checksum, state & various other things, but that +// left a window during which 2 (or more) threads could deallocate the same +// chunk, with a net result of having said chunk present in those distinct +// thread caches. + +// A passing test means all the children died with an error. The failing +// scenario involves winning a race, so repro can be scarce. + +#include +#include +#include +#include +#include + +const int kNumThreads = 2; +pthread_t tid[kNumThreads]; + +pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +char go = 0; + +// Frees the pointer passed when signaled to. +void *thread_free(void *p) { + pthread_mutex_lock(&mutex); + while (!go) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + free(p); + return 0; +} + +// Allocates a chunk, and attempts to free it "simultaneously" by 2 threads. +void child(void) { + void *p = malloc(16); + for (int i = 0; i < kNumThreads; i++) + pthread_create(&tid[i], 0, thread_free, p); + pthread_mutex_lock(&mutex); + go = 1; + pthread_cond_broadcast(&cond); + pthread_mutex_unlock(&mutex); + for (int i = 0; i < kNumThreads; i++) + pthread_join(tid[i], 0); +} + +int main(int argc, char** argv) { + const int kChildren = 40; + pid_t pid; + for (int i = 0; i < kChildren; ++i) { + pid = fork(); + if (pid < 0) { + exit(1); + } else if (pid == 0) { + child(); + exit(0); + } else { + int status; + wait(&status); + // A 0 status means the child didn't die with an error. The race was won. + if (status == 0) + exit(1); + } + } + return 0; +}