[XRay][compiler-rt] Use a hand-written circular buffer in BufferQueue
Summary: This change removes the dependency on using a std::deque<...> for the storage of the buffers in the buffer queue. We instead implement a fixed-size circular buffer that's resilient to exhaustion, and preserves the semantics of the BufferQueue. We're moving away from using std::deque<...> for two reasons: - We want to remove dependencies on the STL for data structures. - We want the data structure we use to not require re-allocation in the normal course of operation. The internal implementation of the buffer queue uses heap-allocated arrays that are initialized once when the BufferQueue is created, and re-uses slots in the buffer array as buffers are returned in order. We also change the lock used in the implementation to a spinlock instead of a blocking mutex. We reason that since the release operations now take very little time in the critical section, that a spinlock would be appropriate. This change is related to D38073. This change is a re-submit with the following changes: - Keeping track of the live buffers with a counter independent of the pointers keeping track of the extents of the circular buffer. - Additional documentation of what the data members are meant to represent. Reviewers: dblaikie, kpw, pelikan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D38119 llvm-svn: 314877
This commit is contained in:
parent
e14145dcb0
commit
d06e917b9e
|
@ -16,6 +16,7 @@
|
|||
#include "sanitizer_common/sanitizer_common.h"
|
||||
#include "sanitizer_common/sanitizer_libc.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <tuple>
|
||||
|
||||
|
@ -23,18 +24,21 @@ using namespace __xray;
|
|||
using namespace __sanitizer;
|
||||
|
||||
BufferQueue::BufferQueue(std::size_t B, std::size_t N, bool &Success)
|
||||
: BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing{0} {
|
||||
for (auto &T : Buffers) {
|
||||
: BufferSize(B), Buffers(new std::tuple<Buffer, bool>[N]()),
|
||||
BufferCount(N), Finalizing{0}, OwnedBuffers(new void *[N]()),
|
||||
Next(Buffers.get()), First(Buffers.get()), LiveBuffers(0) {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
auto &T = Buffers[i];
|
||||
void *Tmp = malloc(BufferSize);
|
||||
if (Tmp == nullptr) {
|
||||
Success = false;
|
||||
return;
|
||||
}
|
||||
|
||||
auto &Buf = std::get<0>(T);
|
||||
std::get<1>(T) = false;
|
||||
Buf.Buffer = Tmp;
|
||||
Buf.Size = B;
|
||||
OwnedBuffers.emplace(Tmp);
|
||||
OwnedBuffers[i] = Tmp;
|
||||
}
|
||||
Success = true;
|
||||
}
|
||||
|
@ -42,27 +46,41 @@ BufferQueue::BufferQueue(std::size_t B, std::size_t N, bool &Success)
|
|||
BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
|
||||
if (__sanitizer::atomic_load(&Finalizing, __sanitizer::memory_order_acquire))
|
||||
return ErrorCode::QueueFinalizing;
|
||||
__sanitizer::BlockingMutexLock Guard(&Mutex);
|
||||
if (Buffers.empty())
|
||||
__sanitizer::SpinMutexLock Guard(&Mutex);
|
||||
if (LiveBuffers == BufferCount)
|
||||
return ErrorCode::NotEnoughMemory;
|
||||
auto &T = Buffers.front();
|
||||
|
||||
auto &T = *Next;
|
||||
auto &B = std::get<0>(T);
|
||||
Buf = B;
|
||||
B.Buffer = nullptr;
|
||||
B.Size = 0;
|
||||
Buffers.pop_front();
|
||||
++LiveBuffers;
|
||||
|
||||
First = Next;
|
||||
if (++Next == (Buffers.get() + BufferCount))
|
||||
Next = Buffers.get();
|
||||
|
||||
return ErrorCode::Ok;
|
||||
}
|
||||
|
||||
BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
|
||||
if (OwnedBuffers.count(Buf.Buffer) == 0)
|
||||
// Blitz through the buffers array to find the buffer.
|
||||
if (std::none_of(OwnedBuffers.get(), OwnedBuffers.get() + BufferCount,
|
||||
[&Buf](void *P) { return P == Buf.Buffer; }))
|
||||
return ErrorCode::UnrecognizedBuffer;
|
||||
__sanitizer::BlockingMutexLock Guard(&Mutex);
|
||||
__sanitizer::SpinMutexLock Guard(&Mutex);
|
||||
|
||||
// This points to a semantic bug, we really ought to not be releasing more
|
||||
// buffers than we actually get.
|
||||
if (LiveBuffers == 0)
|
||||
return ErrorCode::NotEnoughMemory;
|
||||
|
||||
// Now that the buffer has been released, we mark it as "used".
|
||||
Buffers.emplace(Buffers.end(), Buf, true /* used */);
|
||||
*First = std::make_tuple(Buf, true);
|
||||
Buf.Buffer = nullptr;
|
||||
Buf.Size = 0;
|
||||
--LiveBuffers;
|
||||
if (++First == (Buffers.get() + BufferCount))
|
||||
First = Buffers.get();
|
||||
return ErrorCode::Ok;
|
||||
}
|
||||
|
||||
|
@ -74,7 +92,8 @@ BufferQueue::ErrorCode BufferQueue::finalize() {
|
|||
}
|
||||
|
||||
BufferQueue::~BufferQueue() {
|
||||
for (auto &T : Buffers) {
|
||||
for (auto I = Buffers.get(), E = Buffers.get() + BufferCount; I != E; ++I) {
|
||||
auto &T = *I;
|
||||
auto &Buf = std::get<0>(T);
|
||||
free(Buf.Buffer);
|
||||
}
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
|
||||
#include "sanitizer_common/sanitizer_atomic.h"
|
||||
#include "sanitizer_common/sanitizer_mutex.h"
|
||||
#include <deque>
|
||||
#include <unordered_set>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
namespace __xray {
|
||||
|
@ -36,15 +36,30 @@ public:
|
|||
};
|
||||
|
||||
private:
|
||||
// Size of each individual Buffer.
|
||||
size_t BufferSize;
|
||||
|
||||
// We use a bool to indicate whether the Buffer has been used in this
|
||||
// freelist implementation.
|
||||
std::deque<std::tuple<Buffer, bool>> Buffers;
|
||||
__sanitizer::BlockingMutex Mutex;
|
||||
std::unordered_set<void *> OwnedBuffers;
|
||||
std::unique_ptr<std::tuple<Buffer, bool>[]> Buffers;
|
||||
size_t BufferCount;
|
||||
|
||||
__sanitizer::SpinMutex Mutex;
|
||||
__sanitizer::atomic_uint8_t Finalizing;
|
||||
|
||||
// Pointers to buffers managed/owned by the BufferQueue.
|
||||
std::unique_ptr<void *[]> OwnedBuffers;
|
||||
|
||||
// Pointer to the next buffer to be handed out.
|
||||
std::tuple<Buffer, bool> *Next;
|
||||
|
||||
// Pointer to the entry in the array where the next released buffer will be
|
||||
// placed.
|
||||
std::tuple<Buffer, bool> *First;
|
||||
|
||||
// Count of buffers that have been handed out through 'getBuffer'.
|
||||
size_t LiveBuffers;
|
||||
|
||||
public:
|
||||
enum class ErrorCode : unsigned {
|
||||
Ok,
|
||||
|
@ -117,8 +132,9 @@ public:
|
|||
/// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a
|
||||
/// releaseBuffer(...) operation).
|
||||
template <class F> void apply(F Fn) {
|
||||
__sanitizer::BlockingMutexLock G(&Mutex);
|
||||
for (const auto &T : Buffers) {
|
||||
__sanitizer::SpinMutexLock G(&Mutex);
|
||||
for (auto I = Buffers.get(), E = Buffers.get() + BufferCount; I != E; ++I) {
|
||||
const auto &T = *I;
|
||||
if (std::get<1>(T))
|
||||
Fn(std::get<0>(T));
|
||||
}
|
||||
|
|
|
@ -1,37 +1,63 @@
|
|||
// RUN: %clangxx_xray -g -std=c++11 %s -o %t
|
||||
// RUN: rm fdr-thread-order.* || true
|
||||
// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-thread-order. xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_threshold_us=0" %run %t 2>&1 | FileCheck %s
|
||||
// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-thread-order.* | head -1`" | FileCheck %s --check-prefix TRACE
|
||||
// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false \
|
||||
// RUN: xray_logfile_base=fdr-thread-order. xray_fdr_log=true verbosity=1 \
|
||||
// RUN: xray_fdr_log_func_duration_threshold_us=0" %run %t 2>&1 | \
|
||||
// RUN: FileCheck %s
|
||||
// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \
|
||||
// RUN: "`ls fdr-thread-order.* | head -1`"
|
||||
// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \
|
||||
// RUN: "`ls fdr-thread-order.* | head -1`" | \
|
||||
// RUN: FileCheck %s --check-prefix TRACE
|
||||
// RUN: rm fdr-thread-order.*
|
||||
// FIXME: Make llvm-xray work on non-x86_64 as well.
|
||||
// REQUIRES: x86_64-linux
|
||||
// REQUIRES: built-in-llvm-tree
|
||||
|
||||
#include "xray/xray_log_interface.h"
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <thread>
|
||||
|
||||
constexpr auto kBufferSize = 16384;
|
||||
constexpr auto kBufferMax = 10;
|
||||
|
||||
thread_local uint64_t var = 0;
|
||||
[[clang::xray_always_instrument]] void __attribute__((noinline)) f1() { ++var; }
|
||||
[[clang::xray_always_instrument]] void __attribute__((noinline)) f2() { ++var; }
|
||||
std::atomic<uint64_t> var{0};
|
||||
|
||||
[[clang::xray_always_instrument]] void __attribute__((noinline)) f1() {
|
||||
for (auto i = 0; i < 1 << 20; ++i)
|
||||
++var;
|
||||
}
|
||||
|
||||
[[clang::xray_always_instrument]] void __attribute__((noinline)) f2() {
|
||||
for (auto i = 0; i < 1 << 20; ++i)
|
||||
++var;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace __xray;
|
||||
FDRLoggingOptions Options;
|
||||
__xray_patch();
|
||||
assert(__xray_log_init(kBufferSize, kBufferMax, &Options,
|
||||
sizeof(FDRLoggingOptions)) ==
|
||||
XRayLogInitStatus::XRAY_LOG_INITIALIZED);
|
||||
__xray_patch();
|
||||
std::thread t1([] { f1(); });
|
||||
std::thread t2([] { f2(); });
|
||||
t1.join();
|
||||
t2.join();
|
||||
|
||||
std::atomic_thread_fence(std::memory_order_acq_rel);
|
||||
|
||||
{
|
||||
std::thread t1([] { f1(); });
|
||||
std::thread t2([] { f2(); });
|
||||
t1.join();
|
||||
t2.join();
|
||||
}
|
||||
|
||||
std::atomic_thread_fence(std::memory_order_acq_rel);
|
||||
__xray_log_finalize();
|
||||
__xray_log_flushLog();
|
||||
__xray_unpatch();
|
||||
return var > 0 ? 0 : 1;
|
||||
// CHECK: {{.*}}XRay: Log file in '{{.*}}'
|
||||
// CHECK-NOT: Failed
|
||||
}
|
||||
|
||||
// We want to make sure that the order of the function log doesn't matter.
|
||||
|
|
Loading…
Reference in New Issue