[PPC64] Emit plt call stubs to the text section rather then the plt section.

On PowerPC calls to functions through the plt must be done through a call stub
that is responsible for:
1) Saving the toc pointer to the stack.
2) Loading the target functions address from the plt into both r12 and the
   count register.
3) Indirectly branching to the target function.

Previously we have been emitting these call stubs to the .plt section, however
the .plt section should be reserved for the lazy symbol resolution stubs. This
patch moves the call stubs to the text section by moving the implementation from
writePlt to the thunk framework.

Differential Revision: https://reviews.llvm.org/D46204

llvm-svn: 331607
This commit is contained in:
Sean Fertile 2018-05-06 19:13:29 +00:00
parent cb2abc7977
commit d2e887d2f6
7 changed files with 153 additions and 95 deletions

View File

@ -43,10 +43,10 @@ public:
uint32_t calcEFlags() const override;
RelExpr getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const override;
void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
int32_t Index, unsigned RelOff) const override;
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void writeGotHeader(uint8_t *Buf) const override;
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const override;
};
} // namespace
@ -64,17 +64,17 @@ static uint16_t applyPPCHighesta(uint64_t V) { return (V + 0x8000) >> 48; }
PPC64::PPC64() {
GotRel = R_PPC64_GLOB_DAT;
PltRel = R_PPC64_JMP_SLOT;
RelativeRel = R_PPC64_RELATIVE;
GotEntrySize = 8;
GotPltEntrySize = 8;
PltEntrySize = 32;
PltEntrySize = 0;
PltHeaderSize = 0;
GotBaseSymInGotPlt = false;
GotBaseSymOff = 0x8000;
GotHeaderEntriesNum = 1;
GotPltHeaderEntriesNum = 2;
PltRel = R_PPC64_JMP_SLOT;
NeedsThunks = true;
// We need 64K pages (at least under glibc/Linux, the loader won't
// set different permissions on a finer granularity than that).
@ -170,28 +170,6 @@ void PPC64::writeGotHeader(uint8_t *Buf) const {
write64(Buf, getPPC64TocBase());
}
void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
uint64_t PltEntryAddr, int32_t Index,
unsigned RelOff) const {
uint64_t Off = GotPltEntryAddr - getPPC64TocBase();
// The most-common form of the plt stub. This assumes that the toc-pointer
// register is properly initalized, and that the stub must save the toc
// pointer value to the stack-save slot reserved for it (sp + 24).
// There are 2 other variants but we don't have to emit those until we add
// support for R_PPC64_REL24_NOTOC and R_PPC64_TOCSAVE relocations.
// We are missing a super simple optimization, where if the upper 16 bits of
// the offset are zero, then we can omit the addis instruction, and load
// r2 + lo-offset directly into r12. I decided to leave this out in the
// spirit of keeping it simple until we can link actual non-trivial
// programs.
write32(Buf + 0, 0xf8410018); // std r2,24(r1)
write32(Buf + 4, 0x3d820000 | applyPPCHa(Off)); // addis r12,r2, X@plt@to@ha
write32(Buf + 8, 0xe98c0000 | applyPPCLo(Off)); // ld r12,X@plt@toc@l(r12)
write32(Buf + 12, 0x7d8903a6); // mtctr r12
write32(Buf + 16, 0x4e800420); // bctr
}
static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
uint64_t V = Val - PPC64TocOffset;
switch (Type) {
@ -281,6 +259,13 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
}
}
bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const {
// If a function is in the plt it needs to be called through
// a call stub.
return Type == R_PPC64_REL24 && S.isInPlt();
}
TargetInfo *elf::getPPC64TargetInfo() {
static PPC64 Target;
return &Target;

View File

@ -740,13 +740,15 @@ void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) {
case R_RELAX_TLS_GD_TO_IE_END:
Target->relaxTlsGdToIe(BufLoc, Type, TargetVA);
break;
case R_PPC_CALL_PLT:
case R_PPC_CALL:
// Patch a nop (0x60000000) to a ld.
if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) {
error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc");
break;
if (Rel.Sym->NeedsTocRestore) {
if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) {
error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc");
break;
}
write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1)
}
write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1)
Target->relocateOne(BufLoc, Type, TargetVA);
break;
default:

View File

@ -159,7 +159,8 @@ protected:
: File(File), NameData(Name.Data), NameSize(Name.Size), Binding(Binding),
Type(Type), StOther(StOther), SymbolKind(K), NeedsPltAddr(false),
IsInGlobalMipsGot(false), Is32BitMipsGot(false), IsInIplt(false),
IsInIgot(false), IsPreemptible(false), Used(!Config->GcSections) {}
IsInIgot(false), IsPreemptible(false), Used(!Config->GcSections),
NeedsTocRestore(false) {}
public:
// True the symbol should point to its PLT entry.
@ -183,6 +184,10 @@ public:
// True if an undefined or shared symbol is used from a live section.
unsigned Used : 1;
// True if a call to this symbol needs to be followed by a restore of the
// PPC64 toc pointer.
unsigned NeedsTocRestore : 1;
// The Type field may also have this value. It means that we have not yet seen
// a non-Lazy symbol with this name, so we don't know what its type is. The
// Type field is normally set to this value for Lazy symbols unless we saw a

View File

@ -192,6 +192,23 @@ public:
InputSection *getTargetInputSection() const override;
};
// PPC64 Plt call stubs.
// Any call site that needs to call through a plt entry needs a call stub in
// the .text section. The call stub is responsible for:
// 1) Saving the toc-pointer to the stack.
// 2) Loading the target functions address from the procedure linkage table into
// r12 for use by the target functions global entry point, and into the count
// register.
// 3) Transfering control to the target function through an indirect branch.
class PPC64PltCallStub final : public Thunk {
public:
PPC64PltCallStub(Symbol &Dest) : Thunk(Dest) {}
uint32_t size() { return 20; }
void writeTo(uint8_t *Buf) override;
void addSymbols(ThunkSection &IS) override;
};
} // end anonymous namespace
Defined *Thunk::addSymbol(StringRef Name, uint8_t Type, uint64_t Value,
@ -485,6 +502,25 @@ InputSection *MicroMipsR6Thunk::getTargetInputSection() const {
return dyn_cast<InputSection>(DR.Section);
}
void PPC64PltCallStub::writeTo(uint8_t *Buf) {
int64_t Off = Destination.getGotPltVA() - getPPC64TocBase();
// Need to add 0x8000 to offset to account for the low bits being signed.
uint16_t OffHa = (Off + 0x8000) >> 16;
uint16_t OffLo = Off;
write32(Buf + 0, 0xf8410018); // std r2,24(r1)
write32(Buf + 4, 0x3d820000 | OffHa); // addis r12,r2, X@plt@to@ha
write32(Buf + 8, 0xe98c0000 | OffLo); // ld r12,X@plt@toc@l(r12)
write32(Buf + 12, 0x7d8903a6); // mtctr r12
write32(Buf + 16, 0x4e800420); // bctr
}
void PPC64PltCallStub::addSymbols(ThunkSection &IS) {
Defined *S = addSymbol(Saver.save("__plt_" + Destination.getName()), STT_FUNC,
0, IS);
S->NeedsTocRestore = true;
}
Thunk::Thunk(Symbol &D) : Destination(D), Offset(0) {}
Thunk::~Thunk() = default;
@ -528,15 +564,26 @@ static Thunk *addThunkMips(RelType Type, Symbol &S) {
return make<MipsThunk>(S);
}
static Thunk *addThunkPPC64(RelType Type, Symbol &S) {
if (Type == R_PPC64_REL24)
return make<PPC64PltCallStub>(S);
fatal("unexpected relocation type");
}
Thunk *addThunk(RelType Type, Symbol &S) {
if (Config->EMachine == EM_AARCH64)
return addThunkAArch64(Type, S);
else if (Config->EMachine == EM_ARM)
if (Config->EMachine == EM_ARM)
return addThunkArm(Type, S);
else if (Config->EMachine == EM_MIPS)
if (Config->EMachine == EM_MIPS)
return addThunkMips(Type, S);
llvm_unreachable("add Thunk only supported for ARM and Mips");
return nullptr;
if (Config->EMachine == EM_PPC64)
return addThunkPPC64(Type, S);
llvm_unreachable("add Thunk only supported for ARM, Mips and PowerPC");
}
} // end namespace elf

View File

@ -4,37 +4,57 @@
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
# RUN: ld.lld -shared %t2.o -o %t2.so
# RUN: ld.lld %t.o %t2.so -o %t
# RUN: llvm-objdump -d %t | FileCheck %s
# RUN: llvm-objdump -D %t | FileCheck %s
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
# RUN: ld.lld -shared %t2.o -o %t2.so
# RUN: ld.lld %t.o %t2.so -o %t
# RUN: llvm-objdump -d %t | FileCheck %s
# RUN: llvm-objdump -D %t | FileCheck %s
# CHECK: _start:
# CHECK-NEXT: 10010004: {{.*}} bl .+28
# CHECK-NEXT: 10010008: {{.*}} ld 2, 24(1)
# CHECK-NEXT: 1001000c: {{.*}} bl .+52
# CHECK-NEXT: 10010010: {{.*}} ld 2, 24(1)
# CHECK: Disassembly of section .text:
# Tocbase + (-2 << 16) + 32576
# 0x100380d0 + (-131072) + 32576 = 0x10020010 (.got.plt[2])
# CHECK: __plt_foo:
# CHECK-NEXT: std 2, 24(1)
# CHECK-NEXT: addis 12, 2, -2
# CHECK-NEXT: ld 12, 32576(12)
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr
# Tocbase + (-2 << 16) + 32584
# 0x100380d0 + (-131072) + 32584 = 0x10020018 (.got.plt[3])
# CHECK: __plt_ifunc:
# CHECK-NEXT: std 2, 24(1)
# CHECK-NEXT: addis 12, 2, -2
# CHECK-NEXT: ld 12, 32584(12)
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr
# CHECK: ifunc:
# CHECK-NEXT: 10010028: {{.*}} nop
# CHECK: _start:
# CHECK-NEXT: addis 2, 12, 3
# CHECK-NEXT: addi 2, 2, -32604
# CHECK-NEXT: bl .+67108812
# CHECK-NEXT: ld 2, 24(1)
# CHECK-NEXT: bl .+67108824
# CHECK-NEXT: ld 2, 24(1)
# Address of .got.plt
# CHECK: Disassembly of section .got.plt:
# CHECK-NEXT: .got.plt:
# CHECK-NEXT: 10020000:
# Check tocbase
# CHECK: Disassembly of section .got:
# CHECK-NEXT: .got:
# CHECK-NEXT: 100300d0:
# 0x10010004 + 28 = 0x10010020 (PLT entry 0)
# 0x1001000c + 52 = 0x10010040 (PLT entry 1)
# CHECK: Disassembly of section .plt:
# CHECK-NEXT: .plt:
# CHECK-NEXT: 10010020: {{.*}} std 2, 24(1)
# CHECK-NEXT: 10010024: {{.*}} addis 12, 2, 4098
# CHECK-NEXT: 10010028: {{.*}} ld 12, -32752(12)
# CHECK-NEXT: 1001002c: {{.*}} mtctr 12
# CHECK-NEXT: 10010030: {{.*}} bctr
# CHECK-NEXT: 10010034: {{.*}} trap
# CHECK-NEXT: 10010038: {{.*}} trap
# CHECK-NEXT: 1001003c: {{.*}} trap
# CHECK-NEXT: 10010040: {{.*}} std 2, 24(1)
# CHECK-NEXT: 10010044: {{.*}} addis 12, 2, 4098
# CHECK-NEXT: 10010048: {{.*}} ld 12, -32744(12)
# CHECK-NEXT: 1001004c: {{.*}} mtctr 12
.text
.abiversion 2
@ -43,8 +63,15 @@
ifunc:
nop
.global _start
.global _start
.type _start,@function
_start:
.Lfunc_gep0:
addis 2, 12, .TOC.-.Lfunc_gep0@ha
addi 2, 2, .TOC.-.Lfunc_gep0@l
.Lfunc_lep0:
.localentry _start, .Lfunc_lep0-.Lfunc_gep0
bl foo
nop
bl ifunc

View File

@ -12,9 +12,17 @@
// RUN: ld.lld %t.o %t2.so -o %t
// RUN: llvm-objdump -d %t | FileCheck %s
// CHECK: Disassembly of section .text:
// CHECK: Disassembly of section .text:
// CHECK-NEXT: __plt_foo:
// CHECK-NEXT: std 2, 24(1)
// CHECK-NEXT: addis 12, 2, -2
// CHECK-NEXT: ld 12, 32576(12)
// CHECK-NEXT: mtctr 12
// CHECK-NEXT: bctr
// CHECK: _start:
// CHECK: bl .+24
// CHECK: bl .+67108824
.text
.abiversion 2
.globl _start
@ -32,13 +40,3 @@ _start:
li 0, 1
sc
.size _start, .-.Lfunc_begin0
// CHECK: Disassembly of section .plt:
// CHECK: .plt:
// CHECK-NEXT: {{.*}} std 2, 24(1)
// CHECK-NEXT: {{.*}} addis 12, 2, -2
// CHECK-NEXT: {{.*}} ld 12, 32576(12)
// CHECK-NEXT: {{.*}} mtctr 12
// CHECK: {{.*}} bctr

View File

@ -23,20 +23,21 @@ bar_local:
# Calling external function foo in a shared object needs a nop.
# Calling local function bar_local doe snot need a nop.
// CHECK: Disassembly of section .text:
.global _start
_start:
bl foo
nop
bl bar_local
// CHECK: Disassembly of section .text:
// CHECK: _start:
// CHECK: 10010008: {{.*}} bl .+72
// CHECK-NOT: 1001000c: {{.*}} nop
// CHECK: 1001000c: {{.*}} ld 2, 24(1)
// CHECK: 10010010: {{.*}} bl .+67108848
// CHECK-NOT: 10010014: {{.*}} nop
// CHECK-NOT: 10010014: {{.*}} ld 2, 24(1)
// CHECK: 1001001c: {{.*}} bl .+67108836
// CHECK-NOT: 10010020: {{.*}} nop
// CHECK: 10010020: {{.*}} ld 2, 24(1)
// CHECK: 10010024: {{.*}} bl .+67108848
// CHECK-NOT: 10010028: {{.*}} nop
// CHECK-NOT: 10010028: {{.*}} ld 2, 24(1)
# Calling a function in another object file which will have same
# TOC base does not need a nop. If nop present, do not rewrite to
@ -48,18 +49,18 @@ _diff_object:
nop
// CHECK: _diff_object:
// CHECK-NEXT: 10010014: {{.*}} bl .+28
// CHECK-NEXT: 10010018: {{.*}} bl .+24
// CHECK-NEXT: 1001001c: {{.*}} nop
// CHECK-NEXT: 10010028: {{.*}} bl .+24
// CHECK-NEXT: 1001002c: {{.*}} bl .+20
// CHECK-NEXT: 10010030: {{.*}} nop
# Branching to a local function does not need a nop
.global noretbranch
noretbranch:
b bar_local
// CHECK: noretbranch:
// CHECK: 10010020: {{.*}} b .+67108832
// CHECK-NOT: 10010024: {{.*}} nop
// CHECK-NOT: 10010024: {{.*}} ld 2, 24(1)
// CHECK: 10010034: {{.*}} b .+67108832
// CHECK-NOT: 10010038: {{.*}} nop
// CHECK-NOT: 1001003c: {{.*}} ld 2, 24(1)
// This should come last to check the end-of-buffer condition.
.global last
@ -67,12 +68,5 @@ last:
bl foo
nop
// CHECK: last:
// CHECK: 10010024: {{.*}} bl .+44
// CHECK-NEXT: 10010028: {{.*}} ld 2, 24(1)
// CHECK: Disassembly of section .plt:
// CHECK: .plt:
// CHECK-NEXT: 10010050: {{.*}} std 2, 24(1)
// CHECK-NEXT: 10010054: {{.*}} addis 12, 2, 4098
// CHECK-NEXT: 10010058: {{.*}} ld 12, -32752(12)
// CHECK-NEXT: 1001005c: {{.*}} mtctr 12
// CHECK: 10010038: {{.*}} bl .+67108808
// CHECK-NEXT: 1001003c: {{.*}} ld 2, 24(1)