[PPC64] Emit plt call stubs to the text section rather then the plt section.
On PowerPC calls to functions through the plt must be done through a call stub that is responsible for: 1) Saving the toc pointer to the stack. 2) Loading the target functions address from the plt into both r12 and the count register. 3) Indirectly branching to the target function. Previously we have been emitting these call stubs to the .plt section, however the .plt section should be reserved for the lazy symbol resolution stubs. This patch moves the call stubs to the text section by moving the implementation from writePlt to the thunk framework. Differential Revision: https://reviews.llvm.org/D46204 llvm-svn: 331607
This commit is contained in:
parent
cb2abc7977
commit
d2e887d2f6
|
@ -43,10 +43,10 @@ public:
|
|||
uint32_t calcEFlags() const override;
|
||||
RelExpr getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const override;
|
||||
void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
|
||||
int32_t Index, unsigned RelOff) const override;
|
||||
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
void writeGotHeader(uint8_t *Buf) const override;
|
||||
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
|
||||
uint64_t BranchAddr, const Symbol &S) const override;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
|
@ -64,17 +64,17 @@ static uint16_t applyPPCHighesta(uint64_t V) { return (V + 0x8000) >> 48; }
|
|||
|
||||
PPC64::PPC64() {
|
||||
GotRel = R_PPC64_GLOB_DAT;
|
||||
PltRel = R_PPC64_JMP_SLOT;
|
||||
RelativeRel = R_PPC64_RELATIVE;
|
||||
GotEntrySize = 8;
|
||||
GotPltEntrySize = 8;
|
||||
PltEntrySize = 32;
|
||||
PltEntrySize = 0;
|
||||
PltHeaderSize = 0;
|
||||
GotBaseSymInGotPlt = false;
|
||||
GotBaseSymOff = 0x8000;
|
||||
|
||||
GotHeaderEntriesNum = 1;
|
||||
GotPltHeaderEntriesNum = 2;
|
||||
PltRel = R_PPC64_JMP_SLOT;
|
||||
NeedsThunks = true;
|
||||
|
||||
// We need 64K pages (at least under glibc/Linux, the loader won't
|
||||
// set different permissions on a finer granularity than that).
|
||||
|
@ -170,28 +170,6 @@ void PPC64::writeGotHeader(uint8_t *Buf) const {
|
|||
write64(Buf, getPPC64TocBase());
|
||||
}
|
||||
|
||||
void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
|
||||
uint64_t PltEntryAddr, int32_t Index,
|
||||
unsigned RelOff) const {
|
||||
uint64_t Off = GotPltEntryAddr - getPPC64TocBase();
|
||||
|
||||
// The most-common form of the plt stub. This assumes that the toc-pointer
|
||||
// register is properly initalized, and that the stub must save the toc
|
||||
// pointer value to the stack-save slot reserved for it (sp + 24).
|
||||
// There are 2 other variants but we don't have to emit those until we add
|
||||
// support for R_PPC64_REL24_NOTOC and R_PPC64_TOCSAVE relocations.
|
||||
// We are missing a super simple optimization, where if the upper 16 bits of
|
||||
// the offset are zero, then we can omit the addis instruction, and load
|
||||
// r2 + lo-offset directly into r12. I decided to leave this out in the
|
||||
// spirit of keeping it simple until we can link actual non-trivial
|
||||
// programs.
|
||||
write32(Buf + 0, 0xf8410018); // std r2,24(r1)
|
||||
write32(Buf + 4, 0x3d820000 | applyPPCHa(Off)); // addis r12,r2, X@plt@to@ha
|
||||
write32(Buf + 8, 0xe98c0000 | applyPPCLo(Off)); // ld r12,X@plt@toc@l(r12)
|
||||
write32(Buf + 12, 0x7d8903a6); // mtctr r12
|
||||
write32(Buf + 16, 0x4e800420); // bctr
|
||||
}
|
||||
|
||||
static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
|
||||
uint64_t V = Val - PPC64TocOffset;
|
||||
switch (Type) {
|
||||
|
@ -281,6 +259,13 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
|||
}
|
||||
}
|
||||
|
||||
bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
|
||||
uint64_t BranchAddr, const Symbol &S) const {
|
||||
// If a function is in the plt it needs to be called through
|
||||
// a call stub.
|
||||
return Type == R_PPC64_REL24 && S.isInPlt();
|
||||
}
|
||||
|
||||
TargetInfo *elf::getPPC64TargetInfo() {
|
||||
static PPC64 Target;
|
||||
return &Target;
|
||||
|
|
|
@ -740,13 +740,15 @@ void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) {
|
|||
case R_RELAX_TLS_GD_TO_IE_END:
|
||||
Target->relaxTlsGdToIe(BufLoc, Type, TargetVA);
|
||||
break;
|
||||
case R_PPC_CALL_PLT:
|
||||
case R_PPC_CALL:
|
||||
// Patch a nop (0x60000000) to a ld.
|
||||
if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) {
|
||||
error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc");
|
||||
break;
|
||||
if (Rel.Sym->NeedsTocRestore) {
|
||||
if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) {
|
||||
error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc");
|
||||
break;
|
||||
}
|
||||
write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1)
|
||||
}
|
||||
write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1)
|
||||
Target->relocateOne(BufLoc, Type, TargetVA);
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -159,7 +159,8 @@ protected:
|
|||
: File(File), NameData(Name.Data), NameSize(Name.Size), Binding(Binding),
|
||||
Type(Type), StOther(StOther), SymbolKind(K), NeedsPltAddr(false),
|
||||
IsInGlobalMipsGot(false), Is32BitMipsGot(false), IsInIplt(false),
|
||||
IsInIgot(false), IsPreemptible(false), Used(!Config->GcSections) {}
|
||||
IsInIgot(false), IsPreemptible(false), Used(!Config->GcSections),
|
||||
NeedsTocRestore(false) {}
|
||||
|
||||
public:
|
||||
// True the symbol should point to its PLT entry.
|
||||
|
@ -183,6 +184,10 @@ public:
|
|||
// True if an undefined or shared symbol is used from a live section.
|
||||
unsigned Used : 1;
|
||||
|
||||
// True if a call to this symbol needs to be followed by a restore of the
|
||||
// PPC64 toc pointer.
|
||||
unsigned NeedsTocRestore : 1;
|
||||
|
||||
// The Type field may also have this value. It means that we have not yet seen
|
||||
// a non-Lazy symbol with this name, so we don't know what its type is. The
|
||||
// Type field is normally set to this value for Lazy symbols unless we saw a
|
||||
|
|
|
@ -192,6 +192,23 @@ public:
|
|||
InputSection *getTargetInputSection() const override;
|
||||
};
|
||||
|
||||
|
||||
// PPC64 Plt call stubs.
|
||||
// Any call site that needs to call through a plt entry needs a call stub in
|
||||
// the .text section. The call stub is responsible for:
|
||||
// 1) Saving the toc-pointer to the stack.
|
||||
// 2) Loading the target functions address from the procedure linkage table into
|
||||
// r12 for use by the target functions global entry point, and into the count
|
||||
// register.
|
||||
// 3) Transfering control to the target function through an indirect branch.
|
||||
class PPC64PltCallStub final : public Thunk {
|
||||
public:
|
||||
PPC64PltCallStub(Symbol &Dest) : Thunk(Dest) {}
|
||||
uint32_t size() { return 20; }
|
||||
void writeTo(uint8_t *Buf) override;
|
||||
void addSymbols(ThunkSection &IS) override;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
Defined *Thunk::addSymbol(StringRef Name, uint8_t Type, uint64_t Value,
|
||||
|
@ -485,6 +502,25 @@ InputSection *MicroMipsR6Thunk::getTargetInputSection() const {
|
|||
return dyn_cast<InputSection>(DR.Section);
|
||||
}
|
||||
|
||||
void PPC64PltCallStub::writeTo(uint8_t *Buf) {
|
||||
int64_t Off = Destination.getGotPltVA() - getPPC64TocBase();
|
||||
// Need to add 0x8000 to offset to account for the low bits being signed.
|
||||
uint16_t OffHa = (Off + 0x8000) >> 16;
|
||||
uint16_t OffLo = Off;
|
||||
|
||||
write32(Buf + 0, 0xf8410018); // std r2,24(r1)
|
||||
write32(Buf + 4, 0x3d820000 | OffHa); // addis r12,r2, X@plt@to@ha
|
||||
write32(Buf + 8, 0xe98c0000 | OffLo); // ld r12,X@plt@toc@l(r12)
|
||||
write32(Buf + 12, 0x7d8903a6); // mtctr r12
|
||||
write32(Buf + 16, 0x4e800420); // bctr
|
||||
}
|
||||
|
||||
void PPC64PltCallStub::addSymbols(ThunkSection &IS) {
|
||||
Defined *S = addSymbol(Saver.save("__plt_" + Destination.getName()), STT_FUNC,
|
||||
0, IS);
|
||||
S->NeedsTocRestore = true;
|
||||
}
|
||||
|
||||
Thunk::Thunk(Symbol &D) : Destination(D), Offset(0) {}
|
||||
|
||||
Thunk::~Thunk() = default;
|
||||
|
@ -528,15 +564,26 @@ static Thunk *addThunkMips(RelType Type, Symbol &S) {
|
|||
return make<MipsThunk>(S);
|
||||
}
|
||||
|
||||
static Thunk *addThunkPPC64(RelType Type, Symbol &S) {
|
||||
if (Type == R_PPC64_REL24)
|
||||
return make<PPC64PltCallStub>(S);
|
||||
fatal("unexpected relocation type");
|
||||
}
|
||||
|
||||
Thunk *addThunk(RelType Type, Symbol &S) {
|
||||
if (Config->EMachine == EM_AARCH64)
|
||||
return addThunkAArch64(Type, S);
|
||||
else if (Config->EMachine == EM_ARM)
|
||||
|
||||
if (Config->EMachine == EM_ARM)
|
||||
return addThunkArm(Type, S);
|
||||
else if (Config->EMachine == EM_MIPS)
|
||||
|
||||
if (Config->EMachine == EM_MIPS)
|
||||
return addThunkMips(Type, S);
|
||||
llvm_unreachable("add Thunk only supported for ARM and Mips");
|
||||
return nullptr;
|
||||
|
||||
if (Config->EMachine == EM_PPC64)
|
||||
return addThunkPPC64(Type, S);
|
||||
|
||||
llvm_unreachable("add Thunk only supported for ARM, Mips and PowerPC");
|
||||
}
|
||||
|
||||
} // end namespace elf
|
||||
|
|
|
@ -4,37 +4,57 @@
|
|||
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
|
||||
# RUN: ld.lld -shared %t2.o -o %t2.so
|
||||
# RUN: ld.lld %t.o %t2.so -o %t
|
||||
# RUN: llvm-objdump -d %t | FileCheck %s
|
||||
# RUN: llvm-objdump -D %t | FileCheck %s
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
|
||||
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
|
||||
# RUN: ld.lld -shared %t2.o -o %t2.so
|
||||
# RUN: ld.lld %t.o %t2.so -o %t
|
||||
# RUN: llvm-objdump -d %t | FileCheck %s
|
||||
# RUN: llvm-objdump -D %t | FileCheck %s
|
||||
|
||||
# CHECK: _start:
|
||||
# CHECK-NEXT: 10010004: {{.*}} bl .+28
|
||||
# CHECK-NEXT: 10010008: {{.*}} ld 2, 24(1)
|
||||
# CHECK-NEXT: 1001000c: {{.*}} bl .+52
|
||||
# CHECK-NEXT: 10010010: {{.*}} ld 2, 24(1)
|
||||
# CHECK: Disassembly of section .text:
|
||||
|
||||
# Tocbase + (-2 << 16) + 32576
|
||||
# 0x100380d0 + (-131072) + 32576 = 0x10020010 (.got.plt[2])
|
||||
# CHECK: __plt_foo:
|
||||
# CHECK-NEXT: std 2, 24(1)
|
||||
# CHECK-NEXT: addis 12, 2, -2
|
||||
# CHECK-NEXT: ld 12, 32576(12)
|
||||
# CHECK-NEXT: mtctr 12
|
||||
# CHECK-NEXT: bctr
|
||||
|
||||
# Tocbase + (-2 << 16) + 32584
|
||||
# 0x100380d0 + (-131072) + 32584 = 0x10020018 (.got.plt[3])
|
||||
# CHECK: __plt_ifunc:
|
||||
# CHECK-NEXT: std 2, 24(1)
|
||||
# CHECK-NEXT: addis 12, 2, -2
|
||||
# CHECK-NEXT: ld 12, 32584(12)
|
||||
# CHECK-NEXT: mtctr 12
|
||||
# CHECK-NEXT: bctr
|
||||
|
||||
# CHECK: ifunc:
|
||||
# CHECK-NEXT: 10010028: {{.*}} nop
|
||||
|
||||
# CHECK: _start:
|
||||
# CHECK-NEXT: addis 2, 12, 3
|
||||
# CHECK-NEXT: addi 2, 2, -32604
|
||||
# CHECK-NEXT: bl .+67108812
|
||||
# CHECK-NEXT: ld 2, 24(1)
|
||||
# CHECK-NEXT: bl .+67108824
|
||||
# CHECK-NEXT: ld 2, 24(1)
|
||||
|
||||
# Address of .got.plt
|
||||
# CHECK: Disassembly of section .got.plt:
|
||||
# CHECK-NEXT: .got.plt:
|
||||
# CHECK-NEXT: 10020000:
|
||||
|
||||
|
||||
# Check tocbase
|
||||
# CHECK: Disassembly of section .got:
|
||||
# CHECK-NEXT: .got:
|
||||
# CHECK-NEXT: 100300d0:
|
||||
|
||||
# 0x10010004 + 28 = 0x10010020 (PLT entry 0)
|
||||
# 0x1001000c + 52 = 0x10010040 (PLT entry 1)
|
||||
|
||||
# CHECK: Disassembly of section .plt:
|
||||
# CHECK-NEXT: .plt:
|
||||
# CHECK-NEXT: 10010020: {{.*}} std 2, 24(1)
|
||||
# CHECK-NEXT: 10010024: {{.*}} addis 12, 2, 4098
|
||||
# CHECK-NEXT: 10010028: {{.*}} ld 12, -32752(12)
|
||||
# CHECK-NEXT: 1001002c: {{.*}} mtctr 12
|
||||
# CHECK-NEXT: 10010030: {{.*}} bctr
|
||||
# CHECK-NEXT: 10010034: {{.*}} trap
|
||||
# CHECK-NEXT: 10010038: {{.*}} trap
|
||||
# CHECK-NEXT: 1001003c: {{.*}} trap
|
||||
# CHECK-NEXT: 10010040: {{.*}} std 2, 24(1)
|
||||
# CHECK-NEXT: 10010044: {{.*}} addis 12, 2, 4098
|
||||
# CHECK-NEXT: 10010048: {{.*}} ld 12, -32744(12)
|
||||
# CHECK-NEXT: 1001004c: {{.*}} mtctr 12
|
||||
.text
|
||||
.abiversion 2
|
||||
|
||||
|
@ -43,8 +63,15 @@
|
|||
ifunc:
|
||||
nop
|
||||
|
||||
.global _start
|
||||
.global _start
|
||||
.type _start,@function
|
||||
|
||||
_start:
|
||||
.Lfunc_gep0:
|
||||
addis 2, 12, .TOC.-.Lfunc_gep0@ha
|
||||
addi 2, 2, .TOC.-.Lfunc_gep0@l
|
||||
.Lfunc_lep0:
|
||||
.localentry _start, .Lfunc_lep0-.Lfunc_gep0
|
||||
bl foo
|
||||
nop
|
||||
bl ifunc
|
||||
|
|
|
@ -12,9 +12,17 @@
|
|||
// RUN: ld.lld %t.o %t2.so -o %t
|
||||
// RUN: llvm-objdump -d %t | FileCheck %s
|
||||
|
||||
// CHECK: Disassembly of section .text:
|
||||
// CHECK: Disassembly of section .text:
|
||||
// CHECK-NEXT: __plt_foo:
|
||||
// CHECK-NEXT: std 2, 24(1)
|
||||
// CHECK-NEXT: addis 12, 2, -2
|
||||
// CHECK-NEXT: ld 12, 32576(12)
|
||||
// CHECK-NEXT: mtctr 12
|
||||
// CHECK-NEXT: bctr
|
||||
|
||||
|
||||
// CHECK: _start:
|
||||
// CHECK: bl .+24
|
||||
// CHECK: bl .+67108824
|
||||
.text
|
||||
.abiversion 2
|
||||
.globl _start
|
||||
|
@ -32,13 +40,3 @@ _start:
|
|||
li 0, 1
|
||||
sc
|
||||
.size _start, .-.Lfunc_begin0
|
||||
|
||||
|
||||
|
||||
// CHECK: Disassembly of section .plt:
|
||||
// CHECK: .plt:
|
||||
// CHECK-NEXT: {{.*}} std 2, 24(1)
|
||||
// CHECK-NEXT: {{.*}} addis 12, 2, -2
|
||||
// CHECK-NEXT: {{.*}} ld 12, 32576(12)
|
||||
// CHECK-NEXT: {{.*}} mtctr 12
|
||||
// CHECK: {{.*}} bctr
|
||||
|
|
|
@ -23,20 +23,21 @@ bar_local:
|
|||
|
||||
# Calling external function foo in a shared object needs a nop.
|
||||
# Calling local function bar_local doe snot need a nop.
|
||||
// CHECK: Disassembly of section .text:
|
||||
.global _start
|
||||
_start:
|
||||
bl foo
|
||||
nop
|
||||
bl bar_local
|
||||
|
||||
|
||||
// CHECK: Disassembly of section .text:
|
||||
// CHECK: _start:
|
||||
// CHECK: 10010008: {{.*}} bl .+72
|
||||
// CHECK-NOT: 1001000c: {{.*}} nop
|
||||
// CHECK: 1001000c: {{.*}} ld 2, 24(1)
|
||||
// CHECK: 10010010: {{.*}} bl .+67108848
|
||||
// CHECK-NOT: 10010014: {{.*}} nop
|
||||
// CHECK-NOT: 10010014: {{.*}} ld 2, 24(1)
|
||||
// CHECK: 1001001c: {{.*}} bl .+67108836
|
||||
// CHECK-NOT: 10010020: {{.*}} nop
|
||||
// CHECK: 10010020: {{.*}} ld 2, 24(1)
|
||||
// CHECK: 10010024: {{.*}} bl .+67108848
|
||||
// CHECK-NOT: 10010028: {{.*}} nop
|
||||
// CHECK-NOT: 10010028: {{.*}} ld 2, 24(1)
|
||||
|
||||
# Calling a function in another object file which will have same
|
||||
# TOC base does not need a nop. If nop present, do not rewrite to
|
||||
|
@ -48,18 +49,18 @@ _diff_object:
|
|||
nop
|
||||
|
||||
// CHECK: _diff_object:
|
||||
// CHECK-NEXT: 10010014: {{.*}} bl .+28
|
||||
// CHECK-NEXT: 10010018: {{.*}} bl .+24
|
||||
// CHECK-NEXT: 1001001c: {{.*}} nop
|
||||
// CHECK-NEXT: 10010028: {{.*}} bl .+24
|
||||
// CHECK-NEXT: 1001002c: {{.*}} bl .+20
|
||||
// CHECK-NEXT: 10010030: {{.*}} nop
|
||||
|
||||
# Branching to a local function does not need a nop
|
||||
.global noretbranch
|
||||
noretbranch:
|
||||
b bar_local
|
||||
// CHECK: noretbranch:
|
||||
// CHECK: 10010020: {{.*}} b .+67108832
|
||||
// CHECK-NOT: 10010024: {{.*}} nop
|
||||
// CHECK-NOT: 10010024: {{.*}} ld 2, 24(1)
|
||||
// CHECK: 10010034: {{.*}} b .+67108832
|
||||
// CHECK-NOT: 10010038: {{.*}} nop
|
||||
// CHECK-NOT: 1001003c: {{.*}} ld 2, 24(1)
|
||||
|
||||
// This should come last to check the end-of-buffer condition.
|
||||
.global last
|
||||
|
@ -67,12 +68,5 @@ last:
|
|||
bl foo
|
||||
nop
|
||||
// CHECK: last:
|
||||
// CHECK: 10010024: {{.*}} bl .+44
|
||||
// CHECK-NEXT: 10010028: {{.*}} ld 2, 24(1)
|
||||
|
||||
// CHECK: Disassembly of section .plt:
|
||||
// CHECK: .plt:
|
||||
// CHECK-NEXT: 10010050: {{.*}} std 2, 24(1)
|
||||
// CHECK-NEXT: 10010054: {{.*}} addis 12, 2, 4098
|
||||
// CHECK-NEXT: 10010058: {{.*}} ld 12, -32752(12)
|
||||
// CHECK-NEXT: 1001005c: {{.*}} mtctr 12
|
||||
// CHECK: 10010038: {{.*}} bl .+67108808
|
||||
// CHECK-NEXT: 1001003c: {{.*}} ld 2, 24(1)
|
||||
|
|
Loading…
Reference in New Issue