[XRay][compiler-rt] Support sled versioning for custom event sleds

Summary:
This change introduces versions to the instrumentation map entries we
emit for XRay instrumentaiton points. The status quo for the version is
currently set to 0 (as emitted by the LLVM back-end), and versions will
count up to 255 (unsigned char).

This change is in preparation for supporting the newer version of the
custom event sleds that will be emitted by the LLVM compiler.

While we're here, we take the opportunity to stash more registers and
align the stack properly in the __xray_CustomEvent trampoline.

Reviewers: kpw, pcc, dblaikie

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D36816

llvm-svn: 311524
This commit is contained in:
Dean Michael Berris 2017-08-23 04:42:37 +00:00
parent c4ea1010c1
commit 71f88a955d
5 changed files with 85 additions and 22 deletions

View File

@ -28,13 +28,15 @@ struct XRaySledEntry {
uint64_t Function;
unsigned char Kind;
unsigned char AlwaysInstrument;
unsigned char Padding[14]; // Need 32 bytes
unsigned char Version;
unsigned char Padding[13]; // Need 32 bytes
#elif SANITIZER_WORDSIZE == 32
uint32_t Address;
uint32_t Function;
unsigned char Kind;
unsigned char AlwaysInstrument;
unsigned char Padding[6]; // Need 16 bytes
unsigned char Version;
unsigned char Padding[5]; // Need 16 bytes
#else
#error "Unsupported word size."
#endif

View File

@ -202,10 +202,7 @@ __xray_ArgLoggerEntry:
.type __xray_CustomEvent,@function
__xray_CustomEvent:
.cfi_startproc
subq $16, %rsp
.cfi_def_cfa_offset 24
movq %rbp, 8(%rsp)
movq %rax, 0(%rsp)
SAVE_REGISTERS
// We take two arguments to this trampoline, which should be in rdi and rsi
// already. We also make sure that we stash %rax because we use that register
@ -215,14 +212,20 @@ __xray_CustomEvent:
je .LcustomEventCleanup
// At this point we know that rcx and rdx already has the data, so we just
// call the logging handler.
// call the logging handler, after aligning the stack to a 16-byte boundary.
// The approach we're taking here uses additional stack space to stash the
// stack pointer twice before aligning the pointer to 16-bytes. If the stack
// was 8-byte aligned, it will become 16-byte aligned -- when restoring the
// pointer, we can always look -8 bytes from the current position to get
// either of the values we've stashed in the first place.
pushq %rsp
pushq (%rsp)
andq $-0x10, %rsp
callq *%rax
movq 8(%rsp), %rsp
.LcustomEventCleanup:
movq 0(%rsp), %rax
movq 8(%rsp), %rbp
addq $16, %rsp
.cfi_def_cfa_offset 8
RESTORE_REGISTERS
retq
.Ltmp8:

View File

@ -76,6 +76,7 @@ static constexpr uint8_t CallOpCode = 0xe8;
static constexpr uint16_t MovR10Seq = 0xba41;
static constexpr uint16_t Jmp9Seq = 0x09eb;
static constexpr uint16_t Jmp20Seq = 0x14eb;
static constexpr uint16_t Jmp15Seq = 0x0feb;
static constexpr uint8_t JmpOpCode = 0xe9;
static constexpr uint8_t RetOpCode = 0xc3;
static constexpr uint16_t NopwSeq = 0x9066;
@ -207,8 +208,10 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
// Here we do the dance of replacing the following sled:
//
// In Version 0:
//
// xray_sled_n:
// jmp +19 // 2 bytes
// jmp +20 // 2 bytes
// ...
//
// With the following:
@ -216,24 +219,35 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
// nopw // 2 bytes*
// ...
//
// We need to do this in the following order:
//
// 1. Overwrite the 5-byte nop with the call (relative), where (relative) is
// the relative offset to the __xray_CustomEvent trampoline.
// 2. Do a two-byte atomic write over the 'jmp +24' to turn it into a 'nopw'.
// This allows us to "enable" this code once the changes have committed.
// The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
//
// The "unpatch" should just turn the 'nopw' back to a 'jmp +24'.
// ---
//
// In Version 1:
//
// The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
// to a jmp, use 15 bytes instead.
//
if (Enable) {
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
std::memory_order_release);
} else {
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
std::memory_order_release);
}
switch (Sled.Version) {
case 1:
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
std::memory_order_release);
break;
case 0:
default:
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
std::memory_order_release);
break;
}
}
return false;
}

View File

@ -0,0 +1,42 @@
// Make sure we're aligning the stack properly when lowering the custom event
// calls.
//
// RUN: %clangxx_xray -std=c++11 %s -o %t
// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false" \
// RUN: %run %t 2>&1
// REQUIRES: x86_64-linux
// REQUIRES: built-in-llvm-tree
#include <xmmintrin.h>
#include <stdio.h>
#include "xray/xray_interface.h"
[[clang::xray_never_instrument]] __attribute__((weak)) __m128 f(__m128 *i) {
return *i;
}
[[clang::xray_always_instrument]] void foo() {
__xray_customevent(0, 0);
__m128 v = {};
f(&v);
}
[[clang::xray_always_instrument]] void bar() {
__xray_customevent(0, 0);
}
void printer(void* ptr, size_t size) {
printf("handler called\n");
__m128 v = {};
f(&v);
}
int main(int argc, char* argv[]) {
__xray_set_customevent_handler(printer);
__xray_patch();
foo(); // CHECK: handler called
bar(); // CHECK: handler called
__xray_unpatch();
__xray_remove_customevent_handler();
foo();
bar();
}

View File

@ -2,6 +2,8 @@
//
// RUN: %clangxx_xray -std=c++11 %s -o %t
// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s
// RUN: %clangxx_xray -std=c++11 -fpic -fpie %s -o %t
// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s
// FIXME: Support this in non-x86_64 as well
// REQUIRES: x86_64-linux
// REQUIRES: built-in-llvm-tree