[XRay][compiler-rt] Profiling Mode: Flush logs on exit
Summary: This change adds support for writing out profiles at program exit. Depends on D48653. Reviewers: kpw, eizan Reviewed By: kpw Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D48956 llvm-svn: 336969
This commit is contained in:
parent
00712cb749
commit
5d92d3e5be
|
@ -30,13 +30,11 @@ struct ThreadTrie {
|
||||||
tid_t TId;
|
tid_t TId;
|
||||||
FunctionCallTrie *Trie;
|
FunctionCallTrie *Trie;
|
||||||
};
|
};
|
||||||
Vector<ThreadTrie> ThreadTries;
|
|
||||||
|
|
||||||
struct ProfileBuffer {
|
struct ProfileBuffer {
|
||||||
void *Data;
|
void *Data;
|
||||||
size_t Size;
|
size_t Size;
|
||||||
};
|
};
|
||||||
Vector<ProfileBuffer> ProfileBuffers;
|
|
||||||
|
|
||||||
struct BlockHeader {
|
struct BlockHeader {
|
||||||
u32 BlockSize;
|
u32 BlockSize;
|
||||||
|
@ -44,6 +42,10 @@ struct BlockHeader {
|
||||||
u64 ThreadId;
|
u64 ThreadId;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// These need to be pointers that point to heap/internal-allocator-allocated
|
||||||
|
// objects because these are accessed even at program exit.
|
||||||
|
Vector<ThreadTrie> *ThreadTries = nullptr;
|
||||||
|
Vector<ProfileBuffer> *ProfileBuffers = nullptr;
|
||||||
FunctionCallTrie::Allocators *GlobalAllocators = nullptr;
|
FunctionCallTrie::Allocators *GlobalAllocators = nullptr;
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -57,8 +59,16 @@ void post(const FunctionCallTrie &T, tid_t TId) {
|
||||||
new (GlobalAllocators) FunctionCallTrie::Allocators();
|
new (GlobalAllocators) FunctionCallTrie::Allocators();
|
||||||
*GlobalAllocators = FunctionCallTrie::InitAllocatorsCustom(
|
*GlobalAllocators = FunctionCallTrie::InitAllocatorsCustom(
|
||||||
profilingFlags()->global_allocator_max);
|
profilingFlags()->global_allocator_max);
|
||||||
|
ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>(
|
||||||
|
InternalAlloc(sizeof(Vector<ThreadTrie>)));
|
||||||
|
new (ThreadTries) Vector<ThreadTrie>();
|
||||||
|
ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>(
|
||||||
|
InternalAlloc(sizeof(Vector<ProfileBuffer>)));
|
||||||
|
new (ProfileBuffers) Vector<ProfileBuffer>();
|
||||||
});
|
});
|
||||||
DCHECK_NE(GlobalAllocators, nullptr);
|
DCHECK_NE(GlobalAllocators, nullptr);
|
||||||
|
DCHECK_NE(ThreadTries, nullptr);
|
||||||
|
DCHECK_NE(ProfileBuffers, nullptr);
|
||||||
|
|
||||||
ThreadTrie *Item = nullptr;
|
ThreadTrie *Item = nullptr;
|
||||||
{
|
{
|
||||||
|
@ -66,7 +76,7 @@ void post(const FunctionCallTrie &T, tid_t TId) {
|
||||||
if (GlobalAllocators == nullptr)
|
if (GlobalAllocators == nullptr)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Item = ThreadTries.PushBack();
|
Item = ThreadTries->PushBack();
|
||||||
Item->TId = TId;
|
Item->TId = TId;
|
||||||
|
|
||||||
// Here we're using the internal allocator instead of the managed allocator
|
// Here we're using the internal allocator instead of the managed allocator
|
||||||
|
@ -188,15 +198,15 @@ void serialize() {
|
||||||
SpinMutexLock Lock(&GlobalMutex);
|
SpinMutexLock Lock(&GlobalMutex);
|
||||||
|
|
||||||
// Clear out the global ProfileBuffers.
|
// Clear out the global ProfileBuffers.
|
||||||
for (uptr I = 0; I < ProfileBuffers.Size(); ++I)
|
for (uptr I = 0; I < ProfileBuffers->Size(); ++I)
|
||||||
InternalFree(ProfileBuffers[I].Data);
|
InternalFree((*ProfileBuffers)[I].Data);
|
||||||
ProfileBuffers.Reset();
|
ProfileBuffers->Reset();
|
||||||
|
|
||||||
if (ThreadTries.Size() == 0)
|
if (ThreadTries->Size() == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Then repopulate the global ProfileBuffers.
|
// Then repopulate the global ProfileBuffers.
|
||||||
for (u32 I = 0; I < ThreadTries.Size(); ++I) {
|
for (u32 I = 0; I < ThreadTries->Size(); ++I) {
|
||||||
using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
|
using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType;
|
||||||
ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max, 0);
|
ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max, 0);
|
||||||
ProfileRecord::PathAllocator PathAlloc(
|
ProfileRecord::PathAllocator PathAlloc(
|
||||||
|
@ -207,7 +217,7 @@ void serialize() {
|
||||||
// use a local allocator and an __xray::Array<...> to store the intermediary
|
// use a local allocator and an __xray::Array<...> to store the intermediary
|
||||||
// data, then compute the size as we're going along. Then we'll allocate the
|
// data, then compute the size as we're going along. Then we'll allocate the
|
||||||
// contiguous space to contain the thread buffer data.
|
// contiguous space to contain the thread buffer data.
|
||||||
const auto &Trie = *ThreadTries[I].Trie;
|
const auto &Trie = *(*ThreadTries)[I].Trie;
|
||||||
if (Trie.getRoots().empty())
|
if (Trie.getRoots().empty())
|
||||||
continue;
|
continue;
|
||||||
populateRecords(ProfileRecords, PathAlloc, Trie);
|
populateRecords(ProfileRecords, PathAlloc, Trie);
|
||||||
|
@ -227,8 +237,8 @@ void serialize() {
|
||||||
for (const auto &Record : ProfileRecords)
|
for (const auto &Record : ProfileRecords)
|
||||||
CumulativeSizes += 20 + (4 * Record.Path->size());
|
CumulativeSizes += 20 + (4 * Record.Path->size());
|
||||||
|
|
||||||
BlockHeader Header{16 + CumulativeSizes, I, ThreadTries[I].TId};
|
BlockHeader Header{16 + CumulativeSizes, I, (*ThreadTries)[I].TId};
|
||||||
auto Buffer = ProfileBuffers.PushBack();
|
auto Buffer = ProfileBuffers->PushBack();
|
||||||
Buffer->Size = sizeof(Header) + CumulativeSizes;
|
Buffer->Size = sizeof(Header) + CumulativeSizes;
|
||||||
Buffer->Data = InternalAlloc(Buffer->Size, nullptr, 64);
|
Buffer->Data = InternalAlloc(Buffer->Size, nullptr, 64);
|
||||||
DCHECK_NE(Buffer->Data, nullptr);
|
DCHECK_NE(Buffer->Data, nullptr);
|
||||||
|
@ -244,18 +254,26 @@ void serialize() {
|
||||||
|
|
||||||
void reset() {
|
void reset() {
|
||||||
SpinMutexLock Lock(&GlobalMutex);
|
SpinMutexLock Lock(&GlobalMutex);
|
||||||
// Clear out the profile buffers that have been serialized.
|
if (ProfileBuffers != nullptr) {
|
||||||
for (uptr I = 0; I < ProfileBuffers.Size(); ++I)
|
// Clear out the profile buffers that have been serialized.
|
||||||
InternalFree(ProfileBuffers[I].Data);
|
for (uptr I = 0; I < ProfileBuffers->Size(); ++I)
|
||||||
ProfileBuffers.Reset();
|
InternalFree((*ProfileBuffers)[I].Data);
|
||||||
|
ProfileBuffers->Reset();
|
||||||
// Clear out the function call tries per thread.
|
InternalFree(ProfileBuffers);
|
||||||
for (uptr I = 0; I < ThreadTries.Size(); ++I) {
|
ProfileBuffers = nullptr;
|
||||||
auto &T = ThreadTries[I];
|
}
|
||||||
T.Trie->~FunctionCallTrie();
|
|
||||||
InternalFree(T.Trie);
|
if (ThreadTries != nullptr) {
|
||||||
|
// Clear out the function call tries per thread.
|
||||||
|
for (uptr I = 0; I < ThreadTries->Size(); ++I) {
|
||||||
|
auto &T = (*ThreadTries)[I];
|
||||||
|
T.Trie->~FunctionCallTrie();
|
||||||
|
InternalFree(T.Trie);
|
||||||
|
}
|
||||||
|
ThreadTries->Reset();
|
||||||
|
InternalFree(ThreadTries);
|
||||||
|
ThreadTries = nullptr;
|
||||||
}
|
}
|
||||||
ThreadTries.Reset();
|
|
||||||
|
|
||||||
// Reset the global allocators.
|
// Reset the global allocators.
|
||||||
if (GlobalAllocators != nullptr) {
|
if (GlobalAllocators != nullptr) {
|
||||||
|
@ -267,18 +285,29 @@ void reset() {
|
||||||
InternalAlloc(sizeof(FunctionCallTrie::Allocators)));
|
InternalAlloc(sizeof(FunctionCallTrie::Allocators)));
|
||||||
new (GlobalAllocators) FunctionCallTrie::Allocators();
|
new (GlobalAllocators) FunctionCallTrie::Allocators();
|
||||||
*GlobalAllocators = FunctionCallTrie::InitAllocators();
|
*GlobalAllocators = FunctionCallTrie::InitAllocators();
|
||||||
|
ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>(
|
||||||
|
InternalAlloc(sizeof(Vector<ThreadTrie>)));
|
||||||
|
new (ThreadTries) Vector<ThreadTrie>();
|
||||||
|
ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>(
|
||||||
|
InternalAlloc(sizeof(Vector<ProfileBuffer>)));
|
||||||
|
new (ProfileBuffers) Vector<ProfileBuffer>();
|
||||||
}
|
}
|
||||||
|
|
||||||
XRayBuffer nextBuffer(XRayBuffer B) {
|
XRayBuffer nextBuffer(XRayBuffer B) {
|
||||||
SpinMutexLock Lock(&GlobalMutex);
|
SpinMutexLock Lock(&GlobalMutex);
|
||||||
if (B.Data == nullptr && ProfileBuffers.Size())
|
|
||||||
return {ProfileBuffers[0].Data, ProfileBuffers[0].Size};
|
if (ProfileBuffers == nullptr || ProfileBuffers->Size() == 0)
|
||||||
|
return {nullptr, 0};
|
||||||
|
|
||||||
|
if (B.Data == nullptr)
|
||||||
|
return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size};
|
||||||
|
|
||||||
BlockHeader Header;
|
BlockHeader Header;
|
||||||
internal_memcpy(&Header, B.Data, sizeof(BlockHeader));
|
internal_memcpy(&Header, B.Data, sizeof(BlockHeader));
|
||||||
auto NextBlock = Header.BlockNum + 1;
|
auto NextBlock = Header.BlockNum + 1;
|
||||||
if (NextBlock < ProfileBuffers.Size())
|
if (NextBlock < ProfileBuffers->Size())
|
||||||
return {ProfileBuffers[NextBlock].Data, ProfileBuffers[NextBlock].Size};
|
return {(*ProfileBuffers)[NextBlock].Data,
|
||||||
|
(*ProfileBuffers)[NextBlock].Size};
|
||||||
return {nullptr, 0};
|
return {nullptr, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -277,7 +277,7 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
|
||||||
// We need to reset the profile data collection implementation now.
|
// We need to reset the profile data collection implementation now.
|
||||||
profileCollectorService::reset();
|
profileCollectorService::reset();
|
||||||
|
|
||||||
// We need to set up the at-thread-exit handler.
|
// We need to set up the exit handlers.
|
||||||
static pthread_once_t Once = PTHREAD_ONCE_INIT;
|
static pthread_once_t Once = PTHREAD_ONCE_INIT;
|
||||||
pthread_once(&Once, +[] {
|
pthread_once(&Once, +[] {
|
||||||
pthread_key_create(&ProfilingKey, +[](void *P) {
|
pthread_key_create(&ProfilingKey, +[](void *P) {
|
||||||
|
@ -288,6 +288,19 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
|
||||||
|
|
||||||
postCurrentThreadFCT(TLD);
|
postCurrentThreadFCT(TLD);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// We also need to set up an exit handler, so that we can get the profile
|
||||||
|
// information at exit time. We use the C API to do this, to not rely on C++
|
||||||
|
// ABI functions for registering exit handlers.
|
||||||
|
Atexit(+[] {
|
||||||
|
// Finalize and flush.
|
||||||
|
if (profilingFinalize() != XRAY_LOG_FINALIZED)
|
||||||
|
return;
|
||||||
|
if (profilingFlush() != XRAY_LOG_FLUSHED)
|
||||||
|
return;
|
||||||
|
if (Verbosity())
|
||||||
|
Report("XRay Profile flushed at exit.");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
|
__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
|
||||||
|
@ -321,13 +334,16 @@ bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
|
||||||
profilingFlush,
|
profilingFlush,
|
||||||
};
|
};
|
||||||
auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
|
auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
|
||||||
if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
|
if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
|
||||||
Verbosity())
|
if (Verbosity())
|
||||||
Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
|
Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
|
||||||
"%d\n",
|
"%d\n",
|
||||||
RegistrationResult);
|
RegistrationResult);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
|
if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
|
||||||
__xray_set_log_impl(Impl);
|
__xray_log_select_mode("xray_profiling");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ XRAY_FLAG(uptr, global_allocator_max, 2 << 24,
|
||||||
"Maximum size of the global allocator for profile storage.")
|
"Maximum size of the global allocator for profile storage.")
|
||||||
XRAY_FLAG(uptr, stack_allocator_max, 2 << 24,
|
XRAY_FLAG(uptr, stack_allocator_max, 2 << 24,
|
||||||
"Maximum size of the traversal stack allocator.")
|
"Maximum size of the traversal stack allocator.")
|
||||||
XRAY_FLAG(int, grace_period_ms, 100,
|
XRAY_FLAG(int, grace_period_ms, 1,
|
||||||
"Profile collection will wait this much time in milliseconds before "
|
"Profile collection will wait this much time in milliseconds before "
|
||||||
"resetting the global state. This gives a chance to threads to "
|
"resetting the global state. This gives a chance to threads to "
|
||||||
"notice that the profiler has been finalized and clean up.")
|
"notice that the profiler has been finalized and clean up.")
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
// RUN: XRAY_PROFILING_OPTIONS=no_flush=1 %run %t
|
// RUN: XRAY_PROFILING_OPTIONS=no_flush=1 %run %t
|
||||||
// RUN: XRAY_OPTIONS=verbosity=1 %run %t
|
// RUN: XRAY_OPTIONS=verbosity=1 %run %t
|
||||||
// RUN: PROFILES=`ls xray-log.profiling-multi-* | wc -l`
|
// RUN: PROFILES=`ls xray-log.profiling-multi-* | wc -l`
|
||||||
// RUN: [ $PROFILES -eq 1 ]
|
// RUN: [ $PROFILES -ge 1 ]
|
||||||
// RUN: rm -f xray-log.profiling-multi-*
|
// RUN: rm -f xray-log.profiling-multi-*
|
||||||
//
|
//
|
||||||
// REQUIRES: x86_64-target-arch
|
// REQUIRES: x86_64-target-arch
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
// RUN: XRAY_PROFILING_OPTIONS=no_flush=true %run %t
|
// RUN: XRAY_PROFILING_OPTIONS=no_flush=true %run %t
|
||||||
// RUN: XRAY_OPTIONS=verbosity=1 %run %t
|
// RUN: XRAY_OPTIONS=verbosity=1 %run %t
|
||||||
// RUN: PROFILES=`ls xray-log.profiling-single-* | wc -l`
|
// RUN: PROFILES=`ls xray-log.profiling-single-* | wc -l`
|
||||||
// RUN: [ $PROFILES -eq 2 ]
|
// RUN: [ $PROFILES -ge 2 ]
|
||||||
// RUN: rm -f xray-log.profiling-single-*
|
// RUN: rm -f xray-log.profiling-single-*
|
||||||
//
|
//
|
||||||
// REQUIRES: x86_64-target-arch
|
// REQUIRES: x86_64-target-arch
|
||||||
|
|
Loading…
Reference in New Issue