[scudo] Fix race condition in deallocation path when Quarantine is bypassed

Summary: There is a race window in the deallocation path when the Quarantine is bypassed. Initially we would just erase the header of a chunk if we were not to use the Quarantine, as opposed to using a compare-exchange primitive, to make things faster. It turned out to be a poor decision, as 2 threads (or more) could simultaneously deallocate the same pointer, and if the checks were to done before the header got erased, this would result in the pointer being added twice (or more) to distinct thread caches, and eventually be reused. Winning the race is not trivial but can happen with enough control over the allocation primitives. The repro added attempts to trigger the bug, with a moderate success rate, but it should be enough to notice if the bug ever make its way back into the code. Since I am changing things in this file, there are 2 smaller changes tagging along, marking a variable `const`, and improving the Quarantine bypass test at runtime. Reviewers: alekseyshl, eugenis, kcc, vitalybuka Reviewed By: eugenis, vitalybuka Subscribers: delcypher, #sanitizers, llvm-commits Differential Revision: https://reviews.llvm.org/D50655 llvm-svn: 339705
2018-08-14 18:34:52 +00:00 · 2018-08-14 18:34:52 +00:00 · 3afc797e42
parent 97b722121e
commit 3afc797e42
2 changed files with 76 additions and 5 deletions
--- a/compiler-rt/lib/scudo/scudo_allocator.cpp
+++ b/compiler-rt/lib/scudo/scudo_allocator.cpp
@ -264,7 +264,8 @@ struct Allocator {
    Quarantine.Init(
        static_cast<uptr>(getFlags()->QuarantineSizeKb) << 10,
        static_cast<uptr>(getFlags()->ThreadLocalQuarantineSizeKb) << 10);
-    QuarantineChunksUpToSize = getFlags()->QuarantineChunksUpToSize;
+    QuarantineChunksUpToSize = (Quarantine.GetCacheSize() == 0) ? 0 :
+        getFlags()->QuarantineChunksUpToSize;
    DeallocationTypeMismatch = getFlags()->DeallocationTypeMismatch;
    DeleteSizeMismatch = getFlags()->DeleteSizeMismatch;
    ZeroContents = getFlags()->ZeroContents;
@ -389,10 +390,11 @@ struct Allocator {
  // quarantine chunk size threshold.
  void quarantineOrDeallocateChunk(void *Ptr, UnpackedHeader *Header,
                                   uptr Size) {
-    const bool BypassQuarantine = (Quarantine.GetCacheSize() == 0) ||
-        (Size > QuarantineChunksUpToSize);
+    const bool BypassQuarantine = !Size || (Size > QuarantineChunksUpToSize);
    if (BypassQuarantine) {
-      Chunk::eraseHeader(Ptr);
+      UnpackedHeader NewHeader = *Header;
+      NewHeader.State = ChunkAvailable;
+      Chunk::compareExchangeHeader(Ptr, &NewHeader, Header);
      void *BackendPtr = Chunk::getBackendPtr(Ptr, Header);
      if (Header->ClassId) {
        bool UnlockRequired;
@ -675,7 +677,7 @@ void *scudoValloc(uptr Size) {
 }

 void *scudoPvalloc(uptr Size) {
-  uptr PageSize = GetPageSizeCached();
+  const uptr PageSize = GetPageSizeCached();
  if (UNLIKELY(CheckForPvallocOverflow(Size, PageSize))) {
    errno = ENOMEM;
    if (Instance.canReturnNull())
--- a/compiler-rt/test/scudo/dealloc-race.c
+++ b/compiler-rt/test/scudo/dealloc-race.c
@ -0,0 +1,69 @@
+// RUN: %clang_scudo %s -O2 -o %t
+// RUN: %env_scudo_opts="QuarantineChunksUpToSize=0" %run %t 2>&1
+
+// This test attempts to reproduce a race condition in the deallocation path
+// when bypassing the Quarantine. The old behavior was to zero-out the chunk
+// header after checking its checksum, state & various other things, but that
+// left a window during which 2 (or more) threads could deallocate the same
+// chunk, with a net result of having said chunk present in those distinct
+// thread caches.
+
+// A passing test means all the children died with an error. The failing
+// scenario involves winning a race, so repro can be scarce.
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+const int kNumThreads = 2;
+pthread_t tid[kNumThreads];
+
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+char go = 0;
+
+// Frees the pointer passed when signaled to.
+void *thread_free(void *p) {
+  pthread_mutex_lock(&mutex);
+  while (!go)
+    pthread_cond_wait(&cond, &mutex);
+  pthread_mutex_unlock(&mutex);
+  free(p);
+  return 0;
+}
+
+// Allocates a chunk, and attempts to free it "simultaneously" by 2 threads.
+void child(void) {
+  void *p = malloc(16);
+  for (int i = 0; i < kNumThreads; i++)
+    pthread_create(&tid[i], 0, thread_free, p);
+  pthread_mutex_lock(&mutex);
+  go = 1;
+  pthread_cond_broadcast(&cond);
+  pthread_mutex_unlock(&mutex);
+  for (int i = 0; i < kNumThreads; i++)
+    pthread_join(tid[i], 0);
+}
+
+int main(int argc, char** argv) {
+  const int kChildren = 40;
+  pid_t pid;
+  for (int i = 0; i < kChildren; ++i) {
+    pid = fork();
+    if (pid < 0) {
+      exit(1);
+    } else if (pid == 0) {
+      child();
+      exit(0);
+    } else {
+      int status;
+      wait(&status);
+      // A 0 status means the child didn't die with an error. The race was won.
+      if (status == 0)
+        exit(1);
+    }
+  }
+  return 0;
+}