[profile] Static counter allocation for value profiling (part-2)

Differential Revision: http://reviews.llvm.org/D20460

llvm-svn: 270337
This commit is contained in:
Xinliang David Li 2016-05-21 22:55:45 +00:00
parent b628dd3568
commit 4e8754d2cb
11 changed files with 136 additions and 24 deletions

View File

@ -80,7 +80,7 @@ INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \
INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), FunctionPointer, \
FunctionAddr)
INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \
ConstantPointerNull::get(Int8PtrTy))
ValuesPtrExpr)
INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \
ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters))
INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \
@ -597,6 +597,12 @@ ValueProfData *serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data
#define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names
#define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts
/* Array of pointers. Each pointer points to a list
* of value nodes associated with one value site.
*/
#define INSTR_PROF_VALS_SECT_NAME __llvm_prf_values
/* Value profile nodes section. */
#define INSTR_PROF_VNODES_SECT_NAME __llvm_prf_vnodes
#define INSTR_PROF_COVMAP_SECT_NAME __llvm_covmap
#define INSTR_PROF_DATA_SECT_NAME_STR \
@ -607,6 +613,10 @@ ValueProfData *serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME)
#define INSTR_PROF_COVMAP_SECT_NAME_STR \
INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_SECT_NAME)
#define INSTR_PROF_VALS_SECT_NAME_STR \
INSTR_PROF_QUOTE(INSTR_PROF_VALS_SECT_NAME)
#define INSTR_PROF_VNODES_SECT_NAME_STR \
INSTR_PROF_QUOTE(INSTR_PROF_VNODES_SECT_NAME)
/* Macros to define start/stop section symbol for a given
* section on Linux. For instance

View File

@ -61,6 +61,8 @@ const char *__llvm_profile_begin_names(void);
const char *__llvm_profile_end_names(void);
uint64_t *__llvm_profile_begin_counters(void);
uint64_t *__llvm_profile_end_counters(void);
ValueProfNode *__llvm_profile_begin_vnodes();
ValueProfNode *__llvm_profile_end_vnodes();
/*!
* \brief Clear profile counters to zero.

View File

@ -161,6 +161,9 @@ COMPILER_RT_VISIBILITY extern void (*FreeHook)(void *);
COMPILER_RT_VISIBILITY extern uint8_t *DynamicBufferIOBuffer;
COMPILER_RT_VISIBILITY extern uint32_t VPBufferSize;
COMPILER_RT_VISIBILITY extern uint32_t VPMaxNumValsPerSite;
/* Pointer to the start of static value counters to be allocted. */
COMPILER_RT_VISIBILITY extern ValueProfNode *CurrentVNode;
COMPILER_RT_VISIBILITY extern ValueProfNode *EndVNode;
extern void (*VPMergeHook)(struct ValueProfData *, __llvm_profile_data *);
#endif

View File

@ -29,6 +29,13 @@ COMPILER_RT_VISIBILITY
extern uint64_t
CountersEnd __asm("section$end$__DATA$" INSTR_PROF_CNTS_SECT_NAME_STR);
COMPILER_RT_VISIBILITY
extern ValueProfNode
VNodestart __asm("section$start$__DATA$" INSTR_PROF_VNODES_SECT_NAME_STR);
COMPILER_RT_VISIBILITY
extern ValueProfNode
VNodesEnd __asm("section$end$__DATA$" INSTR_PROF_VNODES_SECT_NAME_STR);
COMPILER_RT_VISIBILITY
const __llvm_profile_data *__llvm_profile_begin_data(void) {
return &DataStart;
@ -43,4 +50,11 @@ COMPILER_RT_VISIBILITY
uint64_t *__llvm_profile_begin_counters(void) { return &CountersStart; }
COMPILER_RT_VISIBILITY
uint64_t *__llvm_profile_end_counters(void) { return &CountersEnd; }
COMPILER_RT_VISIBILITY
ValueProfNode *__llvm_profile_begin_vnodes(void) {
return &VNodesStart;
}
COMPILER_RT_VISIBILITY
ValueProfNode *__llvm_profile_end_vnode(void) { return &VNodesEnd; }
#endif

View File

@ -18,6 +18,8 @@
#define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_SECT_NAME)
#define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_SECT_NAME)
#define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_SECT_NAME)
#define PROF_VNODES_START INSTR_PROF_SECT_START(INSTR_PROF_VNODES_SECT_NAME)
#define PROF_VNODES_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNODES_SECT_NAME)
/* Declare section start and stop symbols for various sections
* generated by compiler instrumentation.
@ -28,6 +30,8 @@ extern uint64_t PROF_CNTS_START COMPILER_RT_VISIBILITY;
extern uint64_t PROF_CNTS_STOP COMPILER_RT_VISIBILITY;
extern char PROF_NAME_START COMPILER_RT_VISIBILITY;
extern char PROF_NAME_STOP COMPILER_RT_VISIBILITY;
extern ValueProfNode PROF_VNODES_START COMPILER_RT_VISIBILITY;
extern ValueProfNode PROF_VNODES_STOP COMPILER_RT_VISIBILITY;
/* Add dummy data to ensure the section is always created. */
__llvm_profile_data
@ -35,6 +39,7 @@ __llvm_profile_data
uint64_t
__prof_cnts_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_CNTS_SECT_NAME_STR);
char __prof_nms_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_NAME_SECT_NAME_STR);
ValueProfNode __prof_vnodes_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_VNODES_SECT_NAME_STR);
COMPILER_RT_VISIBILITY const __llvm_profile_data *
__llvm_profile_begin_data(void) {
@ -56,4 +61,13 @@ COMPILER_RT_VISIBILITY uint64_t *__llvm_profile_begin_counters(void) {
COMPILER_RT_VISIBILITY uint64_t *__llvm_profile_end_counters(void) {
return &PROF_CNTS_STOP;
}
COMPILER_RT_VISIBILITY ValueProfNode *
__llvm_profile_begin_vnodes(void) {
return &PROF_VNODES_START;
}
COMPILER_RT_VISIBILITY ValueProfNode *
__llvm_profile_end_vnodes(void) {
return &PROF_VNODES_STOP;
}
#endif

View File

@ -80,4 +80,12 @@ COMPILER_RT_VISIBILITY
uint64_t *__llvm_profile_begin_counters(void) { return CountersFirst; }
COMPILER_RT_VISIBILITY
uint64_t *__llvm_profile_end_counters(void) { return CountersLast; }
COMPILER_RT_VISIBILITY
ValueProfNode *__llvm_profile_begin_vnodes(void) {
return 0;
}
COMPILER_RT_VISIBILITY
ValueProfNode *__llvm_profile_end_vnode(void) { return 0; }
#endif

View File

@ -18,13 +18,6 @@
#define INSTR_PROF_COMMON_API_IMPL
#include "InstrProfData.inc"
#define PROF_OOM(Msg) PROF_ERR(Msg ":%s\n", "Out of memory");
#define PROF_OOM_RETURN(Msg) \
{ \
PROF_OOM(Msg) \
return NULL; \
}
COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
INSTR_PROF_MAX_NUM_VAL_PER_SITE;
@ -35,6 +28,19 @@ COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() {
VPMaxNumValsPerSite = atoi(Str);
if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
CurrentVNode = __llvm_profile_begin_vnodes();
EndVNode = __llvm_profile_end_vnodes();
if (!(EndVNode > CurrentVNode)) {
CurrentVNode = 0;
EndVNode = 0;
}
/* Adjust max vals per site to a smaller value
* when static allocation is in use. */
else {
if (!Str || !Str[0])
VPMaxNumValsPerSite = 8;
}
}
COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
@ -83,14 +89,35 @@ static int allocateValueProfileCounters(__llvm_profile_data *Data) {
return 1;
}
COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = 0;
COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = 0;
static int hasNoStaticCounters() { return (EndVNode == 0); }
static ValueProfNode *allocateOneNode(__llvm_profile_data *Data, uint32_t Index,
uint64_t Value) {
ValueProfNode *Node;
if (hasNoStaticCounters())
return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
if (Node >= EndVNode) {
PROF_WARN("Running out of nodes: site_%d@func_%" PRIu64
", value=%" PRIu64 " \n", Index, Data->NameRef, Value);
return 0;
}
return Node;
}
COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
uint32_t CounterIndex) {
__llvm_profile_data *PData = (__llvm_profile_data *)Data;
if (!PData)
return;
/* This path will never be taken when value site array is allocated
statically at compile time. */
if (!PData->Values) {
if (!allocateValueProfileCounters(PData))
return;
@ -154,7 +181,7 @@ __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
return;
}
CurrentVNode = (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
CurrentVNode = allocateOneNode(PData, CounterIndex, TargetValue);
if (!CurrentVNode)
return;
@ -168,7 +195,7 @@ __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
else if (PrevVNode && !PrevVNode->Next)
Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurrentVNode);
if (!Success) {
if (!Success && hasNoStaticCounters()) {
free(CurrentVNode);
return;
}

View File

@ -1,10 +1,16 @@
// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -o %t %S/Inputs/instrprof-value-prof-evict.c
// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=10 -o %t %S/Inputs/instrprof-value-prof-evict.c
// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
// RUN: llvm-profdata merge -o %t.profdata %t.profraw
// RUN: llvm-profdata show --all-functions -ic-targets %t.profdata | FileCheck %S/Inputs/instrprof-value-prof-evict.c
// IR level instrumentation
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -o %t.ir %S/Inputs/instrprof-value-prof-evict.c
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=10 -Xclang -fprofile-instrument=llvm -o %t.ir %S/Inputs/instrprof-value-prof-evict.c
// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw %run %t.ir
// RUN: llvm-profdata merge -o %t.ir.profdata %t.ir.profraw
// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.profdata | FileCheck %S/Inputs/instrprof-value-prof-evict.c
// IR level instrumentation, dynamic allocation
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=false -Xclang -fprofile-instrument=llvm -o %t.ir.dyn %S/Inputs/instrprof-value-prof-evict.c
// RUN: env LLVM_PROFILE_FILE=%t.ir.dyn.profraw %run %t.ir.dyn
// RUN: llvm-profdata merge -o %t.ir.dyn.profdata %t.ir.dyn.profraw
// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.dyn.profdata | FileCheck %S/Inputs/instrprof-value-prof-evict.c

View File

@ -1,17 +1,37 @@
// RUN: mkdir -p %t.d
// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -fPIC -shared -o %t.d/t.shared -DSHARED_LIB %S/Inputs/instrprof-value-prof-real.c
// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -o %t -rpath %t.d %t.d/t.shared -DCALL_SHARED %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -fPIC -shared -o %t.d/t.shared -DSHARED_LIB %S/Inputs/instrprof-value-prof-real.c
// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -o %t -rpath %t.d %t.d/t.shared -DCALL_SHARED %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255 %run %t
// RUN: llvm-profdata merge -o %t.profdata %t.profraw
// RUN: llvm-profdata show --all-functions -ic-targets %t.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c
// RUN: llvm-profdata show --all-functions -ic-targets %t.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c --check-prefix=SHARED
// IR level instrumentation
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -fPIC -shared -o %t.d/t.ir.shared -DSHARED_LIB %S/Inputs/instrprof-value-prof-real.c
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -rpath %t.d -o %t.ir %t.d/t.ir.shared -DCALL_SHARED %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw %run %t.ir
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -fPIC -shared -o %t.d/t.ir.shared -DSHARED_LIB %S/Inputs/instrprof-value-prof-real.c
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -rpath %t.d -o %t.ir %t.d/t.ir.shared -DCALL_SHARED %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255 %run %t.ir
// RUN: llvm-profdata merge -o %t.ir.profdata %t.ir.profraw
// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c
// RUN: llvm-profdata merge -text %t.ir.profdata -o %t.ir.proftxt
// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c --check-prefix=SHARED
// RUN: FileCheck %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.proftxt
// IR level instrumentation: dynamic memory allocation
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=false -mllvm -vp-counters-per-site=256 -fPIC -shared -o %t.d/t.ir.dyn.shared -DSHARED_LIB %S/Inputs/instrprof-value-prof-real.c
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=false -mllvm -vp-counters-per-site=256 -rpath %t.d -o %t.ir.dyn %t.d/t.ir.dyn.shared -DCALL_SHARED %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.ir.dyn.profraw %run %t.ir.dyn
// RUN: llvm-profdata merge -o %t.ir.dyn.profdata %t.ir.dyn.profraw
// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.dyn.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c
// RUN: llvm-profdata merge -text %t.ir.dyn.profdata -o %t.ir.dyn.proftxt
// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.dyn.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c --check-prefix=SHARED
// RUN: FileCheck %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.dyn.proftxt
// IR level instrumentation: main program uses static counter, shared library uses dynamic memory alloc.
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=false -mllvm -vp-counters-per-site=256 -fPIC -shared -o %t.d/t.ir.dyn.shared -DSHARED_LIB %S/Inputs/instrprof-value-prof-real.c
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -rpath %t.d -o %t.ir.mixed %t.d/t.ir.dyn.shared -DCALL_SHARED %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.ir.mixed.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255 %run %t.ir.mixed
// RUN: llvm-profdata merge -o %t.ir.mixed.profdata %t.ir.mixed.profraw
// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.mixed.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c
// RUN: llvm-profdata merge -text %t.ir.mixed.profdata -o %t.ir.mixed.proftxt
// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.mixed.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c --check-prefix=SHARED
// RUN: FileCheck %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.mixed.proftxt

View File

@ -1,4 +1,4 @@
// RUN: %clang_profgen -O2 -o %t %s
// RUN: %clang_profgen -mllvm -vp-static-alloc=false -O2 -o %t %s
// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
// RUN: env LLVM_PROFILE_FILE=%t-2.profraw %run %t DO_NOT_INSTRUMENT
// RUN: llvm-profdata merge -o %t.profdata %t.profraw

View File

@ -1,12 +1,20 @@
// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -o %t %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -o %t %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255 %run %t
// RUN: llvm-profdata merge -o %t.profdata %t.profraw
// RUN: llvm-profdata show --all-functions -ic-targets %t.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c
// IR level instrumentation
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -o %t.ir %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw %run %t.ir
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -o %t.ir %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255 %run %t.ir
// RUN: llvm-profdata merge -o %t.ir.profdata %t.ir.profraw
// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c
// RUN: llvm-profdata merge -text %t.ir.profdata -o %t.ir.proftxt
// RUN: FileCheck %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.proftxt
// IR level instrumentation with dynamic memory allocation
// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=false -mllvm -vp-counters-per-site=256 -o %t.ir.dyn %S/Inputs/instrprof-value-prof-real.c
// RUN: env LLVM_PROFILE_FILE=%t.ir.dyn.profraw %run %t.ir.dyn
// RUN: llvm-profdata merge -o %t.ir.dyn.profdata %t.ir.dyn.profraw
// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.dyn.profdata | FileCheck %S/Inputs/instrprof-value-prof-real.c
// RUN: llvm-profdata merge -text %t.ir.dyn.profdata -o %t.ir.dyn.proftxt
// RUN: FileCheck %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.dyn.proftxt