[XRay] ARM 32-bit no-Thumb support in LLVM

This is a port of XRay to ARM 32-bit, without Thumb support yet. The XRay instrumentation support is moving up to AsmPrinter.
This is one of 3 commits to different repositories of XRay ARM port. The other 2 are:

https://reviews.llvm.org/D23932 (Clang test)
https://reviews.llvm.org/D23933 (compiler-rt)

Differential Revision: https://reviews.llvm.org/D23931

llvm-svn: 281878
This commit is contained in:
Dean Michael Berris 2016-09-19 00:54:35 +00:00
parent 3c46abb2ea
commit 4640154446
17 changed files with 328 additions and 63 deletions

View File

@ -183,6 +183,34 @@ public:
MCSymbol *getSymbol(const GlobalValue *GV) const;
//===------------------------------------------------------------------===//
// XRay instrumentation implementation.
//===------------------------------------------------------------------===//
public:
// This describes the kind of sled we're storing in the XRay table.
enum class SledKind : uint8_t {
FUNCTION_ENTER = 0,
FUNCTION_EXIT = 1,
TAIL_CALL = 2,
};
// The table will contain these structs that point to the sled, the function
// containing the sled, and what kind of sled (and whether they should always
// be instrumented).
struct XRayFunctionEntry {
const MCSymbol *Sled;
const MCSymbol *Function;
SledKind Kind;
bool AlwaysInstrument;
const class Function *Fn;
};
// All the sleds to be emitted.
std::vector<XRayFunctionEntry> Sleds;
// Helper function to record a given XRay sled.
void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
//===------------------------------------------------------------------===//
// MachineFunctionPass Implementation.
//===------------------------------------------------------------------===//

View File

@ -977,11 +977,19 @@ def PATCHABLE_FUNCTION_ENTER : Instruction {
def PATCHABLE_RET : Instruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins variable_ops);
let AsmString = "# XRay Function Exit.";
let AsmString = "# XRay Function Patchable RET.";
let usesCustomInserter = 1;
let hasSideEffects = 1;
let isReturn = 1;
}
def PATCHABLE_FUNCTION_EXIT : Instruction {
let OutOperandList = (outs);
let InOperandList = (ins);
let AsmString = "# XRay Function Exit.";
let usesCustomInserter = 1;
let hasSideEffects = 0; // FIXME: is this correct?
let isReturn = 0; // Original return instruction will follow
}
def PATCHABLE_TAIL_CALL : Instruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins variable_ops);

View File

@ -153,8 +153,25 @@ HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER)
/// Wraps a return instruction and its operands to enable adding nop sleds
/// either before or after the return. The nop sleds are useful for inserting
/// instrumentation instructions at runtime.
/// The patch here replaces the return instruction.
HANDLE_TARGET_OPCODE(PATCHABLE_RET)
/// This is a marker instruction which gets translated into a nop sled, useful
/// for inserting instrumentation instructions at runtime.
/// The patch here prepends the return instruction.
/// The same thing as in x86_64 is not possible for ARM because it has multiple
/// return instructions. Furthermore, CPU allows parametrized and even
/// conditional return instructions. In the current ARM implementation we are
/// making use of the fact that currently LLVM doesn't seem to generate
/// conditional return instructions.
/// On ARM, the same instruction can be used for popping multiple registers
/// from the stack and returning (it just pops pc register too), and LLVM
/// generates it sometimes. So we can't insert the sled between this stack
/// adjustment and the return without splitting the original instruction into 2
/// instructions. So on ARM, rather than jumping into the exit trampoline, we
/// call it, it does the tracing, preserves the stack and returns.
HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT)
/// Wraps a tail call instruction and its operands to enable adding nop sleds
/// either before or after the tail exit. We use this as a disambiguation from
/// PATCHABLE_RET which specifically only works for return instructions.

View File

@ -71,6 +71,8 @@ public:
virtual ~TargetSubtargetInfo();
virtual bool isXRaySupported() const { return false; }
// Interfaces to the major aspects of target machine information:
//
// -- Instruction opcode and operand information

View File

@ -2600,3 +2600,13 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
AsmPrinterHandler::~AsmPrinterHandler() {}
void AsmPrinterHandler::markFunctionEnd() {}
void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
SledKind Kind) {
auto Fn = MI.getParent()->getParent()->getFunction();
auto Attr = Fn->getFnAttribute("function-instrument");
bool AlwaysInstrument =
Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
Sleds.emplace_back(
XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
}

View File

@ -34,37 +34,33 @@ struct XRayInstrumentation : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override;
private:
// Replace the original RET instruction with the exit sled code ("patchable
// ret" pseudo-instruction), so that at runtime XRay can replace the sled
// with a code jumping to XRay trampoline, which calls the tracing handler
// and, in the end, issues the RET instruction.
// This is the approach to go on CPUs which have a single RET instruction,
// like x86/x86_64.
void replaceRetWithPatchableRet(MachineFunction &MF,
const TargetInstrInfo *TII);
// Prepend the original return instruction with the exit sled code ("patchable
// function exit" pseudo-instruction), preserving the original return
// instruction just after the exit sled code.
// This is the approach to go on CPUs which have multiple options for the
// return instruction, like ARM. For such CPUs we can't just jump into the
// XRay trampoline and issue a single return instruction there. We rather
// have to call the trampoline and return from it to the original return
// instruction of the function being instrumented.
void prependRetWithPatchableExit(MachineFunction &MF,
const TargetInstrInfo *TII);
};
}
} // anonymous namespace
bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
auto &F = *MF.getFunction();
auto InstrAttr = F.getFnAttribute("function-instrument");
bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
InstrAttr.isStringAttribute() &&
InstrAttr.getValueAsString() == "xray-always";
Attribute Attr = F.getFnAttribute("xray-instruction-threshold");
unsigned XRayThreshold = 0;
if (!AlwaysInstrument) {
if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute())
return false; // XRay threshold attribute not found.
if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
return false; // Invalid value for threshold.
if (F.size() < XRayThreshold)
return false; // Function is too small.
}
// FIXME: Do the loop triviality analysis here or in an earlier pass.
// First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
// MachineFunction.
auto &FirstMBB = *MF.begin();
auto &FirstMI = *FirstMBB.begin();
auto *TII = MF.getSubtarget().getInstrInfo();
BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
// Then we look for *all* terminators and returns, then replace those with
void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
const TargetInstrInfo *TII)
{
// We look for *all* terminators and returns, then replace those with
// PATCHABLE_RET instructions.
SmallVector<MachineInstr *, 4> Terminators;
for (auto &MBB : MF) {
@ -92,7 +88,68 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
for (auto &I : Terminators)
I->eraseFromParent();
}
void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF,
const TargetInstrInfo *TII)
{
for (auto &MBB : MF) {
for (auto &T : MBB.terminators()) {
if (T.isReturn()) {
// Prepend the return instruction with PATCHABLE_FUNCTION_EXIT
BuildMI(MBB, T, T.getDebugLoc(),
TII->get(TargetOpcode::PATCHABLE_FUNCTION_EXIT));
}
}
}
}
bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
auto &F = *MF.getFunction();
auto InstrAttr = F.getFnAttribute("function-instrument");
bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
InstrAttr.isStringAttribute() &&
InstrAttr.getValueAsString() == "xray-always";
Attribute Attr = F.getFnAttribute("xray-instruction-threshold");
unsigned XRayThreshold = 0;
if (!AlwaysInstrument) {
if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute())
return false; // XRay threshold attribute not found.
if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
return false; // Invalid value for threshold.
if (F.size() < XRayThreshold)
return false; // Function is too small.
}
auto &FirstMBB = *MF.begin();
auto &FirstMI = *FirstMBB.begin();
if (!MF.getSubtarget().isXRaySupported()) {
FirstMI.emitError("An attempt to perform XRay instrumentation for an"
" unsupported target.");
return false;
}
// FIXME: Do the loop triviality analysis here or in an earlier pass.
// First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
// MachineFunction.
auto *TII = MF.getSubtarget().getInstrInfo();
BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
switch (MF.getTarget().getTargetTriple().getArch()) {
case Triple::ArchType::arm:
case Triple::ArchType::thumb:
// For the architectures which don't have a single return instruction
prependRetWithPatchableExit(MF, TII);
break;
default:
// For the architectures that have a single return instruction (such as
// RETQ on x86_64).
replaceRetWithPatchableRet(MF, TII);
break;
}
return true;
}

View File

@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the rest of the function body.
EmitFunctionBody();
// Emit the XRay table for this function.
EmitXRayTable();
// If we need V4T thumb mode Register Indirect Jump pads, emit them.
// These are created per function, rather than per TU, since it's
// relatively easy to exceed the thumb branch range within a TU.
@ -2019,6 +2022,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(0));
return;
}
case ARM::PATCHABLE_FUNCTION_ENTER:
LowerPATCHABLE_FUNCTION_ENTER(*MI);
return;
case ARM::PATCHABLE_FUNCTION_EXIT:
LowerPATCHABLE_FUNCTION_EXIT(*MI);
return;
}
MCInst TmpInst;

View File

@ -101,7 +101,19 @@ public:
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
//===------------------------------------------------------------------===//
// XRay implementation
//===------------------------------------------------------------------===//
public:
// XRay-specific lowering for ARM.
void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
// Helper function that emits the XRay sleds we've collected for a particular
// function.
void EmitXRayTable();
private:
void EmitSled(const MachineInstr &MI, SledKind Kind);
// Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
void emitAttributes();

View File

@ -100,6 +100,10 @@ public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
virtual void getNoopForElfTarget(MCInst &NopInst) const {
getNoopForMachoTarget(NopInst);
}
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;

View File

@ -21,6 +21,11 @@
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
@ -150,3 +155,92 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
}
}
}
void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
{
if (MI.getParent()->getParent()->getInfo<ARMFunctionInfo>()
->isThumbFunction())
{
MI.emitError("An attempt to perform XRay instrumentation for a"
" Thumb function (not supported). Detected when emitting a sled.");
return;
}
static const int8_t NoopsInSledCount = 6;
// We want to emit the following pattern:
//
// .Lxray_sled_N:
// ALIGN
// B #20
// ; 6 NOP instructions (24 bytes)
// .tmpN
//
// We need the 24 bytes (6 instructions) because at runtime, we'd be patching
// over the full 28 bytes (7 instructions) with the following pattern:
//
// PUSH{ r0, lr }
// MOVW r0, #<lower 16 bits of function ID>
// MOVT r0, #<higher 16 bits of function ID>
// MOVW ip, #<lower 16 bits of address of __xray_FunctionEntry/Exit>
// MOVT ip, #<higher 16 bits of address of __xray_FunctionEntry/Exit>
// BLX ip
// POP{ r0, lr }
//
OutStreamer->EmitCodeAlignment(4);
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
OutStreamer->EmitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
// Emit "B #20" instruction, which jumps over the next 24 bytes (because
// register pc is 8 bytes ahead of the jump instruction by the moment CPU
// is executing it).
// By analogy to ARMAsmPrinter::emitPseudoExpansionLowering() |case ARM::B|.
// It is not clear why |addReg(0)| is needed (the last operand).
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc).addImm(20)
.addImm(ARMCC::AL).addReg(0));
MCInst Noop;
Subtarget->getInstrInfo()->getNoopForElfTarget(Noop);
for (int8_t I = 0; I < NoopsInSledCount; I++)
{
OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
}
OutStreamer->EmitLabel(Target);
recordSled(CurSled, MI, Kind);
}
void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
{
EmitSled(MI, SledKind::FUNCTION_ENTER);
}
void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI)
{
EmitSled(MI, SledKind::FUNCTION_EXIT);
}
void ARMAsmPrinter::EmitXRayTable()
{
if (Sleds.empty())
return;
if (Subtarget->isTargetELF()) {
auto *Section = OutContext.getELFSection(
"xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0,
CurrentFnSym->getName());
auto PrevSection = OutStreamer->getCurrentSectionOnly();
OutStreamer->SwitchSection(Section);
for (const auto &Sled : Sleds) {
OutStreamer->EmitSymbolValue(Sled.Sled, 4);
OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
auto Kind = static_cast<uint8_t>(Sled.Kind);
OutStreamer->EmitBytes(
StringRef(reinterpret_cast<const char *>(&Kind), 1));
OutStreamer->EmitBytes(
StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
OutStreamer->EmitZeros(6);
}
OutStreamer->SwitchSection(PrevSection);
}
Sleds.clear();
}

View File

@ -101,6 +101,11 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
: (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
TLInfo(TM, *this) {}
bool ARMSubtarget::isXRaySupported() const {
// We don't currently suppport Thumb, but Windows requires Thumb.
return hasV6Ops() && hasARMOps() && !isTargetWindows();
}
void ARMSubtarget::initializeEnvironment() {
// MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
// directly from it, but we can try to make sure they're consistent when both

View File

@ -540,6 +540,8 @@ public:
}
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
virtual bool isXRaySupported() const override;
bool isAPCS_ABI() const;
bool isAAPCS_ABI() const;
bool isAAPCS16_ABI() const;

View File

@ -71,27 +71,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
StackMapShadowTracker SMShadowTracker;
// This describes the kind of sled we're storing in the XRay table.
enum class SledKind : uint8_t {
FUNCTION_ENTER = 0,
FUNCTION_EXIT = 1,
TAIL_CALL = 2,
};
// The table will contain these structs that point to the sled, the function
// containing the sled, and what kind of sled (and whether they should always
// be instrumented).
struct XRayFunctionEntry {
const MCSymbol *Sled;
const MCSymbol *Function;
SledKind Kind;
bool AlwaysInstrument;
const class Function *Fn;
};
// All the sleds to be emitted.
std::vector<XRayFunctionEntry> Sleds;
// All instructions emitted by the X86AsmPrinter should use this helper
// method.
//
@ -117,8 +96,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
// function.
void EmitXRayTable();
// Helper function to record a given XRay sled.
void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
public:
explicit X86AsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)

View File

@ -1020,16 +1020,6 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
getSubtargetInfo());
}
void X86AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
SledKind Kind) {
auto Fn = MI.getParent()->getParent()->getFunction();
auto Attr = Fn->getFnAttribute("function-instrument");
bool AlwaysInstrument =
Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
Sleds.emplace_back(
XRayFunctionEntry{Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn});
}
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
X86MCInstLower &MCIL) {
// We want to emit the following pattern:

View File

@ -460,6 +460,8 @@ public:
bool hasPKU() const { return HasPKU; }
bool hasMPX() const { return HasMPX; }
virtual bool isXRaySupported() const override { return is64Bit(); }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
bool useSoftFloat() const { return UseSoftFloat; }

View File

@ -0,0 +1,24 @@
; RUN: llc -filetype=asm -o - -mtriple=armv6-unknown-linux-gnu < %s | FileCheck %s
define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
; CHECK-LABEL: Lxray_sled_0:
; CHECK-NEXT: b #20
; CHECK-NEXT: mov r0, r0
; CHECK-NEXT: mov r0, r0
; CHECK-NEXT: mov r0, r0
; CHECK-NEXT: mov r0, r0
; CHECK-NEXT: mov r0, r0
; CHECK-NEXT: mov r0, r0
; CHECK-LABEL: Ltmp0:
ret i32 0
; CHECK-LABEL: Lxray_sled_1:
; CHECK-NEXT: b #20
; CHECK-NEXT: mov r0, r0
; CHECK-NEXT: mov r0, r0
; CHECK-NEXT: mov r0, r0
; CHECK-NEXT: mov r0, r0
; CHECK-NEXT: mov r0, r0
; CHECK-NEXT: mov r0, r0
; CHECK-LABEL: Ltmp1:
; CHECK-NEXT: bx lr
}

View File

@ -0,0 +1,24 @@
; RUN: llc -filetype=asm -o - -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s
define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
; CHECK-LABEL: Lxray_sled_0:
; CHECK-NEXT: b #20
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-LABEL: Ltmp0:
ret i32 0
; CHECK-LABEL: Lxray_sled_1:
; CHECK-NEXT: b #20
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-LABEL: Ltmp1:
; CHECK-NEXT: bx lr
}