[XRay][compiler-rt] Profiling Mode: Flush logs on exit

Summary:
This change adds support for writing out profiles at program exit.

Depends on D48653.

Reviewers: kpw, eizan

Reviewed By: kpw

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D48956

llvm-svn: 336969
This commit is contained in:
Dean Michael Berris 2018-07-13 04:04:18 +00:00
parent 00712cb749
commit 5d92d3e5be
5 changed files with 81 additions and 36 deletions

View File

@ -30,13 +30,11 @@ struct ThreadTrie {
tid_t TId;
FunctionCallTrie *Trie;
};
Vector<ThreadTrie> ThreadTries;
struct ProfileBuffer {
void *Data;
size_t Size;
};
Vector<ProfileBuffer> ProfileBuffers;
struct BlockHeader {
u32 BlockSize;
@ -44,6 +42,10 @@ struct BlockHeader {
u64 ThreadId;
};
// These need to be pointers that point to heap/internal-allocator-allocated
// objects because these are accessed even at program exit.
Vector<ThreadTrie> *ThreadTries = nullptr;
Vector<ProfileBuffer> *ProfileBuffers = nullptr;
FunctionCallTrie::Allocators *GlobalAllocators = nullptr;
} // namespace
@ -57,8 +59,16 @@ void post(const FunctionCallTrie &T, tid_t TId) {
new (GlobalAllocators) FunctionCallTrie::Allocators();
*GlobalAllocators = FunctionCallTrie::InitAllocatorsCustom(
profilingFlags()->global_allocator_max);
ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>(
InternalAlloc(sizeof(Vector<ThreadTrie>)));
new (ThreadTries) Vector<ThreadTrie>();
ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>(
InternalAlloc(sizeof(Vector<ProfileBuffer>)));
new (ProfileBuffers) Vector<ProfileBuffer>();
});
DCHECK_NE(GlobalAllocators, nullptr);
DCHECK_NE(ThreadTries, nullptr);
DCHECK_NE(ProfileBuffers, nullptr);
ThreadTrie *Item = nullptr;
{
@ -66,7 +76,7 @@ void post(const FunctionCallTrie &T, tid_t TId) {
if (GlobalAllocators == nullptr)
return;
Item = ThreadTries.PushBack();
Item = ThreadTries->PushBack();
Item->TId = TId;
// Here we're using the internal allocator instead of the managed allocator
@ -188,15 +198,15 @@ void serialize() {
SpinMutexLock Lock(&GlobalMutex);
// Clear out the global ProfileBuffers.
for (uptr I = 0; I < ProfileBuffers.Size(); ++I)
InternalFree(ProfileBuffers[I].Data);
ProfileBuffers.Reset();
for (uptr I = 0; I < ProfileBuffers->Size(); ++I)
InternalFree((*ProfileBuffers)[I].Data);
ProfileBuffers->Reset();
if (ThreadTries.Size() == 0)
if (ThreadTries->Size() == 0)
return;
// Then repopulate the global ProfileBuffers.
for (u32 I = 0; I < ThreadTries.Size(); ++I) {
for (u32 I = 0; I < ThreadTries->Size(); ++I) {
using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max, 0);
ProfileRecord::PathAllocator PathAlloc(
@ -207,7 +217,7 @@ void serialize() {
// use a local allocator and an __xray::Array<...> to store the intermediary
// data, then compute the size as we're going along. Then we'll allocate the
// contiguous space to contain the thread buffer data.
const auto &Trie = *ThreadTries[I].Trie;
const auto &Trie = *(*ThreadTries)[I].Trie;
if (Trie.getRoots().empty())
continue;
populateRecords(ProfileRecords, PathAlloc, Trie);
@ -227,8 +237,8 @@ void serialize() {
for (const auto &Record : ProfileRecords)
CumulativeSizes += 20 + (4 * Record.Path->size());
BlockHeader Header{16 + CumulativeSizes, I, ThreadTries[I].TId};
auto Buffer = ProfileBuffers.PushBack();
BlockHeader Header{16 + CumulativeSizes, I, (*ThreadTries)[I].TId};
auto Buffer = ProfileBuffers->PushBack();
Buffer->Size = sizeof(Header) + CumulativeSizes;
Buffer->Data = InternalAlloc(Buffer->Size, nullptr, 64);
DCHECK_NE(Buffer->Data, nullptr);
@ -244,18 +254,26 @@ void serialize() {
void reset() {
SpinMutexLock Lock(&GlobalMutex);
// Clear out the profile buffers that have been serialized.
for (uptr I = 0; I < ProfileBuffers.Size(); ++I)
InternalFree(ProfileBuffers[I].Data);
ProfileBuffers.Reset();
// Clear out the function call tries per thread.
for (uptr I = 0; I < ThreadTries.Size(); ++I) {
auto &T = ThreadTries[I];
T.Trie->~FunctionCallTrie();
InternalFree(T.Trie);
if (ProfileBuffers != nullptr) {
// Clear out the profile buffers that have been serialized.
for (uptr I = 0; I < ProfileBuffers->Size(); ++I)
InternalFree((*ProfileBuffers)[I].Data);
ProfileBuffers->Reset();
InternalFree(ProfileBuffers);
ProfileBuffers = nullptr;
}
if (ThreadTries != nullptr) {
// Clear out the function call tries per thread.
for (uptr I = 0; I < ThreadTries->Size(); ++I) {
auto &T = (*ThreadTries)[I];
T.Trie->~FunctionCallTrie();
InternalFree(T.Trie);
}
ThreadTries->Reset();
InternalFree(ThreadTries);
ThreadTries = nullptr;
}
ThreadTries.Reset();
// Reset the global allocators.
if (GlobalAllocators != nullptr) {
@ -267,18 +285,29 @@ void reset() {
InternalAlloc(sizeof(FunctionCallTrie::Allocators)));
new (GlobalAllocators) FunctionCallTrie::Allocators();
*GlobalAllocators = FunctionCallTrie::InitAllocators();
ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>(
InternalAlloc(sizeof(Vector<ThreadTrie>)));
new (ThreadTries) Vector<ThreadTrie>();
ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>(
InternalAlloc(sizeof(Vector<ProfileBuffer>)));
new (ProfileBuffers) Vector<ProfileBuffer>();
}
XRayBuffer nextBuffer(XRayBuffer B) {
SpinMutexLock Lock(&GlobalMutex);
if (B.Data == nullptr && ProfileBuffers.Size())
return {ProfileBuffers[0].Data, ProfileBuffers[0].Size};
if (ProfileBuffers == nullptr || ProfileBuffers->Size() == 0)
return {nullptr, 0};
if (B.Data == nullptr)
return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size};
BlockHeader Header;
internal_memcpy(&Header, B.Data, sizeof(BlockHeader));
auto NextBlock = Header.BlockNum + 1;
if (NextBlock < ProfileBuffers.Size())
return {ProfileBuffers[NextBlock].Data, ProfileBuffers[NextBlock].Size};
if (NextBlock < ProfileBuffers->Size())
return {(*ProfileBuffers)[NextBlock].Data,
(*ProfileBuffers)[NextBlock].Size};
return {nullptr, 0};
}

View File

@ -277,7 +277,7 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
// We need to reset the profile data collection implementation now.
profileCollectorService::reset();
// We need to set up the at-thread-exit handler.
// We need to set up the exit handlers.
static pthread_once_t Once = PTHREAD_ONCE_INIT;
pthread_once(&Once, +[] {
pthread_key_create(&ProfilingKey, +[](void *P) {
@ -288,6 +288,19 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
postCurrentThreadFCT(TLD);
});
// We also need to set up an exit handler, so that we can get the profile
// information at exit time. We use the C API to do this, to not rely on C++
// ABI functions for registering exit handlers.
Atexit(+[] {
// Finalize and flush.
if (profilingFinalize() != XRAY_LOG_FINALIZED)
return;
if (profilingFlush() != XRAY_LOG_FLUSHED)
return;
if (Verbosity())
Report("XRay Profile flushed at exit.");
});
});
__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
@ -321,13 +334,16 @@ bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
profilingFlush,
};
auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
Verbosity())
Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
"%d\n",
RegistrationResult);
if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
if (Verbosity())
Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
"%d\n",
RegistrationResult);
return false;
}
if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
__xray_set_log_impl(Impl);
__xray_log_select_mode("xray_profiling");
return true;
}

View File

@ -20,7 +20,7 @@ XRAY_FLAG(uptr, global_allocator_max, 2 << 24,
"Maximum size of the global allocator for profile storage.")
XRAY_FLAG(uptr, stack_allocator_max, 2 << 24,
"Maximum size of the traversal stack allocator.")
XRAY_FLAG(int, grace_period_ms, 100,
XRAY_FLAG(int, grace_period_ms, 1,
"Profile collection will wait this much time in milliseconds before "
"resetting the global state. This gives a chance to threads to "
"notice that the profiler has been finalized and clean up.")

View File

@ -8,7 +8,7 @@
// RUN: XRAY_PROFILING_OPTIONS=no_flush=1 %run %t
// RUN: XRAY_OPTIONS=verbosity=1 %run %t
// RUN: PROFILES=`ls xray-log.profiling-multi-* | wc -l`
// RUN: [ $PROFILES -eq 1 ]
// RUN: [ $PROFILES -ge 1 ]
// RUN: rm -f xray-log.profiling-multi-*
//
// REQUIRES: x86_64-target-arch

View File

@ -8,7 +8,7 @@
// RUN: XRAY_PROFILING_OPTIONS=no_flush=true %run %t
// RUN: XRAY_OPTIONS=verbosity=1 %run %t
// RUN: PROFILES=`ls xray-log.profiling-single-* | wc -l`
// RUN: [ $PROFILES -eq 2 ]
// RUN: [ $PROFILES -ge 2 ]
// RUN: rm -f xray-log.profiling-single-*
//
// REQUIRES: x86_64-target-arch