[XRay] ARM 32-bit no-Thumb support in LLVM

This is a port of XRay to ARM 32-bit, without Thumb support yet. The XRay instrumentation support is moving up to AsmPrinter. This is one of 3 commits to different repositories of XRay ARM port. The other 2 are: https://reviews.llvm.org/D23932 (Clang test) https://reviews.llvm.org/D23933 (compiler-rt) Differential Revision: https://reviews.llvm.org/D23931 llvm-svn: 281878
2016-09-19 00:54:35 +00:00 · 2016-09-19 00:54:35 +00:00 · 4640154446
parent 3c46abb2ea
commit 4640154446
17 changed files with 328 additions and 63 deletions
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@ -183,6 +183,34 @@ public:

  MCSymbol *getSymbol(const GlobalValue *GV) const;

+  //===------------------------------------------------------------------===//
+  // XRay instrumentation implementation.
+  //===------------------------------------------------------------------===//
+public:
+  // This describes the kind of sled we're storing in the XRay table.
+  enum class SledKind : uint8_t {
+    FUNCTION_ENTER = 0,
+    FUNCTION_EXIT = 1,
+    TAIL_CALL = 2,
+  };
+
+  // The table will contain these structs that point to the sled, the function
+  // containing the sled, and what kind of sled (and whether they should always
+  // be instrumented).
+  struct XRayFunctionEntry {
+    const MCSymbol *Sled;
+    const MCSymbol *Function;
+    SledKind Kind;
+    bool AlwaysInstrument;
+    const class Function *Fn;
+  };
+
+  // All the sleds to be emitted.
+  std::vector<XRayFunctionEntry> Sleds;
+
+  // Helper function to record a given XRay sled.
+  void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
+
  //===------------------------------------------------------------------===//
  // MachineFunctionPass Implementation.
  //===------------------------------------------------------------------===//
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@ -977,11 +977,19 @@ def PATCHABLE_FUNCTION_ENTER : Instruction {
 def PATCHABLE_RET : Instruction {
  let OutOperandList = (outs unknown:$dst);
  let InOperandList = (ins variable_ops);
-  let AsmString = "# XRay Function Exit.";
+  let AsmString = "# XRay Function Patchable RET.";
  let usesCustomInserter = 1;
  let hasSideEffects = 1;
  let isReturn = 1;
 }
+def PATCHABLE_FUNCTION_EXIT : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let AsmString = "# XRay Function Exit.";
+  let usesCustomInserter = 1;
+  let hasSideEffects = 0; // FIXME: is this correct?
+  let isReturn = 0; // Original return instruction will follow
+}
 def PATCHABLE_TAIL_CALL : Instruction {
  let OutOperandList = (outs unknown:$dst);
  let InOperandList = (ins variable_ops);
--- a/llvm/include/llvm/Target/TargetOpcodes.def
+++ b/llvm/include/llvm/Target/TargetOpcodes.def
@ -153,8 +153,25 @@ HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER)
 /// Wraps a return instruction and its operands to enable adding nop sleds
 /// either before or after the return. The nop sleds are useful for inserting
 /// instrumentation instructions at runtime.
+/// The patch here replaces the return instruction.
 HANDLE_TARGET_OPCODE(PATCHABLE_RET)

+/// This is a marker instruction which gets translated into a nop sled, useful
+/// for inserting instrumentation instructions at runtime.
+/// The patch here prepends the return instruction.
+/// The same thing as in x86_64 is not possible for ARM because it has multiple
+/// return instructions. Furthermore, CPU allows parametrized and even
+/// conditional return instructions. In the current ARM implementation we are
+/// making use of the fact that currently LLVM doesn't seem to generate
+/// conditional return instructions.
+/// On ARM, the same instruction can be used for popping multiple registers
+/// from the stack and returning (it just pops pc register too), and LLVM
+/// generates it sometimes. So we can't insert the sled between this stack
+/// adjustment and the return without splitting the original instruction into 2
+/// instructions. So on ARM, rather than jumping into the exit trampoline, we
+/// call it, it does the tracing, preserves the stack and returns.
+HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT)
+
 /// Wraps a tail call instruction and its operands to enable adding nop sleds
 /// either before or after the tail exit. We use this as a disambiguation from
 /// PATCHABLE_RET which specifically only works for return instructions.
--- a/llvm/include/llvm/Target/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/Target/TargetSubtargetInfo.h
@ -71,6 +71,8 @@ public:

  virtual ~TargetSubtargetInfo();

+  virtual bool isXRaySupported() const { return false; }
+
  // Interfaces to the major aspects of target machine information:
  //
  // -- Instruction opcode and operand information
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -2600,3 +2600,13 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
 AsmPrinterHandler::~AsmPrinterHandler() {}

 void AsmPrinterHandler::markFunctionEnd() {}
+
+void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
+  SledKind Kind) {
+  auto Fn = MI.getParent()->getParent()->getFunction();
+  auto Attr = Fn->getFnAttribute("function-instrument");
+  bool AlwaysInstrument =
+    Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
+  Sleds.emplace_back(
+    XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
+}
--- a/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp
@ -34,37 +34,33 @@ struct XRayInstrumentation : public MachineFunctionPass {
  }

  bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+  // Replace the original RET instruction with the exit sled code ("patchable
+  //   ret" pseudo-instruction), so that at runtime XRay can replace the sled
+  //   with a code jumping to XRay trampoline, which calls the tracing handler
+  //   and, in the end, issues the RET instruction.
+  // This is the approach to go on CPUs which have a single RET instruction,
+  //   like x86/x86_64.
+  void replaceRetWithPatchableRet(MachineFunction &MF,
+    const TargetInstrInfo *TII);
+  // Prepend the original return instruction with the exit sled code ("patchable
+  //   function exit" pseudo-instruction), preserving the original return
+  //   instruction just after the exit sled code.
+  // This is the approach to go on CPUs which have multiple options for the
+  //   return instruction, like ARM. For such CPUs we can't just jump into the
+  //   XRay trampoline and issue a single return instruction there. We rather
+  //   have to call the trampoline and return from it to the original return
+  //   instruction of the function being instrumented.
+  void prependRetWithPatchableExit(MachineFunction &MF,
+    const TargetInstrInfo *TII);
 };
-}
+} // anonymous namespace

-bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
-  auto &F = *MF.getFunction();
-  auto InstrAttr = F.getFnAttribute("function-instrument");
-  bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
-                          InstrAttr.isStringAttribute() &&
-                          InstrAttr.getValueAsString() == "xray-always";
-  Attribute Attr = F.getFnAttribute("xray-instruction-threshold");
-  unsigned XRayThreshold = 0;
-  if (!AlwaysInstrument) {
-    if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute())
-      return false; // XRay threshold attribute not found.
-    if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
-      return false; // Invalid value for threshold.
-    if (F.size() < XRayThreshold)
-      return false; // Function is too small.
-  }
-
-  // FIXME: Do the loop triviality analysis here or in an earlier pass.
-
-  // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
-  // MachineFunction.
-  auto &FirstMBB = *MF.begin();
-  auto &FirstMI = *FirstMBB.begin();
-  auto *TII = MF.getSubtarget().getInstrInfo();
-  BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
-          TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
-
-  // Then we look for *all* terminators and returns, then replace those with
+void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
+  const TargetInstrInfo *TII)
+{
+  // We look for *all* terminators and returns, then replace those with
  // PATCHABLE_RET instructions.
  SmallVector<MachineInstr *, 4> Terminators;
  for (auto &MBB : MF) {
@ -92,7 +88,68 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {

  for (auto &I : Terminators)
    I->eraseFromParent();
+}

+void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF,
+  const TargetInstrInfo *TII)
+{
+  for (auto &MBB : MF) {
+    for (auto &T : MBB.terminators()) {
+      if (T.isReturn()) {
+        // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT
+        BuildMI(MBB, T, T.getDebugLoc(),
+                TII->get(TargetOpcode::PATCHABLE_FUNCTION_EXIT));
+      }
+    }
+  }
+}
+
+bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
+  auto &F = *MF.getFunction();
+  auto InstrAttr = F.getFnAttribute("function-instrument");
+  bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
+                          InstrAttr.isStringAttribute() &&
+                          InstrAttr.getValueAsString() == "xray-always";
+  Attribute Attr = F.getFnAttribute("xray-instruction-threshold");
+  unsigned XRayThreshold = 0;
+  if (!AlwaysInstrument) {
+    if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute())
+      return false; // XRay threshold attribute not found.
+    if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
+      return false; // Invalid value for threshold.
+    if (F.size() < XRayThreshold)
+      return false; // Function is too small.
+  }
+
+  auto &FirstMBB = *MF.begin();
+  auto &FirstMI = *FirstMBB.begin();
+
+  if (!MF.getSubtarget().isXRaySupported()) {
+    FirstMI.emitError("An attempt to perform XRay instrumentation for an"
+      " unsupported target.");
+    return false;
+  }
+
+  // FIXME: Do the loop triviality analysis here or in an earlier pass.
+
+  // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
+  // MachineFunction.
+  auto *TII = MF.getSubtarget().getInstrInfo();
+  BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+          TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
+
+  switch (MF.getTarget().getTargetTriple().getArch()) {
+  case Triple::ArchType::arm:
+  case Triple::ArchType::thumb:
+    // For the architectures which don't have a single return instruction
+    prependRetWithPatchableExit(MF, TII);
+    break;
+  default:
+    // For the architectures that have a single return instruction (such as
+    //   RETQ on x86_64).
+    replaceRetWithPatchableRet(MF, TII);
+    break;
+  }
  return true;
 }

--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  // Emit the rest of the function body.
  EmitFunctionBody();

+  // Emit the XRay table for this function.
+  EmitXRayTable();
+
  // If we need V4T thumb mode Register Indirect Jump pads, emit them.
  // These are created per function, rather than per TU, since it's
  // relatively easy to exceed the thumb branch range within a TU.
@ -2019,6 +2022,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                                     .addReg(0));
    return;
  }
+  case ARM::PATCHABLE_FUNCTION_ENTER:
+    LowerPATCHABLE_FUNCTION_ENTER(*MI);
+    return;
+  case ARM::PATCHABLE_FUNCTION_EXIT:
+    LowerPATCHABLE_FUNCTION_EXIT(*MI);
+    return;
  }

  MCInst TmpInst;
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.h
@ -101,7 +101,19 @@ public:
  // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);

+  //===------------------------------------------------------------------===//
+  // XRay implementation
+  //===------------------------------------------------------------------===//
+public:
+  // XRay-specific lowering for ARM.
+  void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
+  void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
+  // Helper function that emits the XRay sleds we've collected for a particular
+  // function.
+  void EmitXRayTable();
+
 private:
+  void EmitSled(const MachineInstr &MI, SledKind Kind);

  // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
  void emitAttributes();
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@ -100,6 +100,10 @@ public:
  // Return whether the target has an explicit NOP encoding.
  bool hasNOP() const;

+  virtual void getNoopForElfTarget(MCInst &NopInst) const {
+    getNoopForMachoTarget(NopInst);
+  }
+
  // Return the non-pre/post incrementing version of 'Opc'. Return 0
  // if there is not such an opcode.
  virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
--- a/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@ -21,6 +21,11 @@
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCStreamer.h"
 using namespace llvm;


@ -150,3 +155,92 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
    }
  }
 }
+
+void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
+{
+  if (MI.getParent()->getParent()->getInfo<ARMFunctionInfo>()
+    ->isThumbFunction())
+  {
+    MI.emitError("An attempt to perform XRay instrumentation for a"
+      " Thumb function (not supported). Detected when emitting a sled.");
+    return;
+  }
+  static const int8_t NoopsInSledCount = 6;
+  // We want to emit the following pattern:
+  //
+  // .Lxray_sled_N:
+  //   ALIGN
+  //   B #20
+  //   ; 6 NOP instructions (24 bytes)
+  // .tmpN
+  //
+  // We need the 24 bytes (6 instructions) because at runtime, we'd be patching
+  // over the full 28 bytes (7 instructions) with the following pattern:
+  //
+  //   PUSH{ r0, lr }
+  //   MOVW r0, #<lower 16 bits of function ID>
+  //   MOVT r0, #<higher 16 bits of function ID>
+  //   MOVW ip, #<lower 16 bits of address of __xray_FunctionEntry/Exit>
+  //   MOVT ip, #<higher 16 bits of address of __xray_FunctionEntry/Exit>
+  //   BLX ip
+  //   POP{ r0, lr }
+  //
+  OutStreamer->EmitCodeAlignment(4);
+  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+  OutStreamer->EmitLabel(CurSled);
+  auto Target = OutContext.createTempSymbol();
+
+  // Emit "B #20" instruction, which jumps over the next 24 bytes (because
+  // register pc is 8 bytes ahead of the jump instruction by the moment CPU
+  // is executing it).
+  // By analogy to ARMAsmPrinter::emitPseudoExpansionLowering() |case ARM::B|.
+  // It is not clear why |addReg(0)| is needed (the last operand).
+  EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc).addImm(20)
+    .addImm(ARMCC::AL).addReg(0));
+
+  MCInst Noop;
+  Subtarget->getInstrInfo()->getNoopForElfTarget(Noop);
+  for (int8_t I = 0; I < NoopsInSledCount; I++)
+  {
+    OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
+  }
+
+  OutStreamer->EmitLabel(Target);
+  recordSled(CurSled, MI, Kind);
+}
+
+void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
+{
+  EmitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI)
+{
+  EmitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void ARMAsmPrinter::EmitXRayTable()
+{
+  if (Sleds.empty())
+    return;
+  if (Subtarget->isTargetELF()) {
+    auto *Section = OutContext.getELFSection(
+      "xray_instr_map", ELF::SHT_PROGBITS,
+      ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0,
+      CurrentFnSym->getName());
+    auto PrevSection = OutStreamer->getCurrentSectionOnly();
+    OutStreamer->SwitchSection(Section);
+    for (const auto &Sled : Sleds) {
+      OutStreamer->EmitSymbolValue(Sled.Sled, 4);
+      OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
+      auto Kind = static_cast<uint8_t>(Sled.Kind);
+      OutStreamer->EmitBytes(
+        StringRef(reinterpret_cast<const char *>(&Kind), 1));
+      OutStreamer->EmitBytes(
+        StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
+      OutStreamer->EmitZeros(6);
+    }
+    OutStreamer->SwitchSection(PrevSection);
+  }
+  Sleds.clear();
+}
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@ -101,6 +101,11 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
                          : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
      TLInfo(TM, *this) {}

+bool ARMSubtarget::isXRaySupported() const {
+  // We don't currently suppport Thumb, but Windows requires Thumb.
+  return hasV6Ops() && hasARMOps() && !isTargetWindows();
+}
+
 void ARMSubtarget::initializeEnvironment() {
  // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
  // directly from it, but we can try to make sure they're consistent when both
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@ -540,6 +540,8 @@ public:
  }
  bool isTargetAndroid() const { return TargetTriple.isAndroid(); }

+  virtual bool isXRaySupported() const override;
+
  bool isAPCS_ABI() const;
  bool isAAPCS_ABI() const;
  bool isAAPCS16_ABI() const;
--- a/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/llvm/lib/Target/X86/X86AsmPrinter.h
@ -71,27 +71,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {

  StackMapShadowTracker SMShadowTracker;

-  // This describes the kind of sled we're storing in the XRay table.
-  enum class SledKind : uint8_t {
-    FUNCTION_ENTER = 0,
-    FUNCTION_EXIT = 1,
-    TAIL_CALL = 2,
-  };
-
-  // The table will contain these structs that point to the sled, the function
-  // containing the sled, and what kind of sled (and whether they should always
-  // be instrumented).
-  struct XRayFunctionEntry {
-    const MCSymbol *Sled;
-    const MCSymbol *Function;
-    SledKind Kind;
-    bool AlwaysInstrument;
-    const class Function *Fn;
-  };
-
-  // All the sleds to be emitted.
-  std::vector<XRayFunctionEntry> Sleds;
-
  // All instructions emitted by the X86AsmPrinter should use this helper
  // method.
  //
@ -117,8 +96,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
  // function.
  void EmitXRayTable();

-  // Helper function to record a given XRay sled.
-  void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
 public:
  explicit X86AsmPrinter(TargetMachine &TM,
                         std::unique_ptr<MCStreamer> Streamer)
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@ -1020,16 +1020,6 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
           getSubtargetInfo());
 }

-void X86AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
-                               SledKind Kind) {
-  auto Fn = MI.getParent()->getParent()->getFunction();
-  auto Attr = Fn->getFnAttribute("function-instrument");
-  bool AlwaysInstrument =
-      Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
-  Sleds.emplace_back(
-      XRayFunctionEntry{Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn});
-}
-
 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
                                                  X86MCInstLower &MCIL) {
  // We want to emit the following pattern:
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@ -460,6 +460,8 @@ public:
  bool hasPKU() const { return HasPKU; }
  bool hasMPX() const { return HasMPX; }

+  virtual bool isXRaySupported() const override { return is64Bit(); }
+
  bool isAtom() const { return X86ProcFamily == IntelAtom; }
  bool isSLM() const { return X86ProcFamily == IntelSLM; }
  bool useSoftFloat() const { return UseSoftFloat; }
--- a/llvm/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll
+++ b/llvm/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll
@ -0,0 +1,24 @@
+; RUN: llc -filetype=asm -o - -mtriple=armv6-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
+; CHECK-LABEL: Lxray_sled_0:
+; CHECK-NEXT:  b  #20
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-LABEL: Ltmp0:
+  ret i32 0
+; CHECK-LABEL: Lxray_sled_1:
+; CHECK-NEXT:  b  #20
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-NEXT:  mov	r0, r0
+; CHECK-LABEL: Ltmp1:
+; CHECK-NEXT:  bx	lr
+}
--- a/llvm/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll
+++ b/llvm/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll
@ -0,0 +1,24 @@
+; RUN: llc -filetype=asm -o - -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
+; CHECK-LABEL: Lxray_sled_0:
+; CHECK-NEXT:  b  #20
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-LABEL: Ltmp0:
+  ret i32 0
+; CHECK-LABEL: Lxray_sled_1:
+; CHECK-NEXT:  b  #20
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-LABEL: Ltmp1:
+; CHECK-NEXT:  bx lr
+}