From ffeecb5c80391db93526cad7790c71ae64632bfa Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Wed, 21 Aug 2013 07:28:13 +0000 Subject: [PATCH] MC: ObjectSymbolizer can now recognize external function stubs. Only implemented in the Mach-O ObjectSymbolizer. The testcase sadly introduces a new binary. llvm-svn: 188879 --- llvm/include/llvm/MC/MCObjectSymbolizer.h | 5 ++ llvm/lib/MC/MCObjectSymbolizer.cpp | 77 +++++++++++++++++- .../trivial-executable-test.macho-x86-64 | Bin 0 -> 8512 bytes .../X86/objdump-disassembly-symbolic.test | 18 ++++ 4 files changed, 99 insertions(+), 1 deletion(-) create mode 100755 llvm/test/Object/Inputs/trivial-executable-test.macho-x86-64 diff --git a/llvm/include/llvm/MC/MCObjectSymbolizer.h b/llvm/include/llvm/MC/MCObjectSymbolizer.h index 7d70dfd56b89..64b932ebe4f9 100644 --- a/llvm/include/llvm/MC/MCObjectSymbolizer.h +++ b/llvm/include/llvm/MC/MCObjectSymbolizer.h @@ -56,6 +56,11 @@ public: int64_t Value, uint64_t Address); /// @} + /// \brief Look for an external function symbol at \p Addr. + /// (References through the ELF PLT, Mach-O stubs, and similar). + /// \returns An MCExpr representing the external symbol, or 0 if not found. + virtual StringRef findExternalFunctionAt(uint64_t Addr); + /// \brief Create an object symbolizer for \p Obj. static MCObjectSymbolizer * createObjectSymbolizer(MCContext &Ctx, OwningPtr &RelInfo, diff --git a/llvm/lib/MC/MCObjectSymbolizer.cpp b/llvm/lib/MC/MCObjectSymbolizer.cpp index 193342b69bc9..a32e2aeb8240 100644 --- a/llvm/lib/MC/MCObjectSymbolizer.cpp +++ b/llvm/lib/MC/MCObjectSymbolizer.cpp @@ -26,9 +26,18 @@ using namespace object; namespace { class MCMachObjectSymbolizer : public MCObjectSymbolizer { + const MachOObjectFile *MOOF; + // __TEXT;__stubs support. + uint64_t StubsStart; + uint64_t StubsCount; + uint64_t StubSize; + uint64_t StubsIndSymIndex; + public: MCMachObjectSymbolizer(MCContext &Ctx, OwningPtr &RelInfo, - const MachOObjectFile *MOOF) {} + const MachOObjectFile *MOOF); + + StringRef findExternalFunctionAt(uint64_t Addr) LLVM_OVERRIDE; void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, @@ -37,6 +46,62 @@ public: } // End unnamed namespace +MCMachObjectSymbolizer:: +MCMachObjectSymbolizer(MCContext &Ctx, OwningPtr &RelInfo, + const MachOObjectFile *MOOF) + : MCObjectSymbolizer(Ctx, RelInfo, MOOF), MOOF(MOOF), + StubsStart(0), StubsCount(0), StubSize(0), StubsIndSymIndex(0) { + + error_code ec; + for (section_iterator SI = MOOF->begin_sections(), SE = MOOF->end_sections(); + SI != SE; SI.increment(ec)) { + if (ec) break; + StringRef Name; SI->getName(Name); + if (Name == "__stubs") { + SectionRef StubsSec = *SI; + if (MOOF->is64Bit()) { + macho::Section64 S = MOOF->getSection64(StubsSec.getRawDataRefImpl()); + StubsIndSymIndex = S.Reserved1; + StubSize = S.Reserved2; + } else { + macho::Section S = MOOF->getSection(StubsSec.getRawDataRefImpl()); + StubsIndSymIndex = S.Reserved1; + StubSize = S.Reserved2; + } + assert(StubSize && "Mach-O stub entry size can't be zero!"); + StubsSec.getAddress(StubsStart); + StubsSec.getSize(StubsCount); + StubsCount /= StubSize; + } + } +} + +StringRef MCMachObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) { + // FIXME: also, this can all be done at the very beginning, by iterating over + // all stubs and creating the calls to outside functions. Is it worth it + // though? + if (!StubSize) + return StringRef(); + uint64_t StubIdx = (Addr - StubsStart) / StubSize; + if (StubIdx >= StubsCount) + return StringRef(); + + macho::IndirectSymbolTableEntry ISTE = + MOOF->getIndirectSymbolTableEntry(MOOF->getDysymtabLoadCommand(), StubIdx); + uint32_t SymtabIdx = ISTE.Index; + + StringRef SymName; + symbol_iterator SI = MOOF->begin_symbols(); + error_code ec; + for (uint32_t i = 0; i != SymtabIdx; ++i) { + SI.increment(ec); + } + SI->getName(SymName); + assert(SI != MOOF->end_symbols() && "Stub wasn't found in the symbol table!"); + assert(SymName.front() == '_' && "Mach-O symbol doesn't start with '_'!"); + return SymName.substr(1); +} + void MCMachObjectSymbolizer:: tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) { @@ -71,6 +136,16 @@ bool MCObjectSymbolizer:: tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t InstSize) { + if (IsBranch) { + StringRef ExtFnName = findExternalFunctionAt((uint64_t)Value); + if (!ExtFnName.empty()) { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(ExtFnName); + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + MI.addOperand(MCOperand::CreateExpr(Expr)); + return true; + } + } + if (const RelocationRef *R = findRelocationAt(Address + Offset)) { if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) { MI.addOperand(MCOperand::CreateExpr(RelExpr)); diff --git a/llvm/test/Object/Inputs/trivial-executable-test.macho-x86-64 b/llvm/test/Object/Inputs/trivial-executable-test.macho-x86-64 new file mode 100755 index 0000000000000000000000000000000000000000..50a6bab64c47a3da2374ce56b93878338839a4c8 GIT binary patch literal 8512 zcmeHN&1(}u6n|-}#?R&?2`X+S5%rQHcx*_ZsVNpy+koO_%*WCMHoIYW11WeY6hV+e zz>{7C|B4(11K#xNpAbR8vxmm_W@plMV|ov~2eWV9$Nc){HxtO__1m9c$L9cIivR}; z0B{+{UiF&=(I;3o@4bI@e zlB5XXn>}9{sG@lNn?dZ3O6f13td*`MI8Y&}gz)@s&6~KQcmsuZNo5QW54SOqxWYTD zJOQ3)npVej(HF%VD!jDHA0i&{K9Q;V>Ynem>^+qb#TzL+IyaIL;)UfOYz)vjjBp6C zUZ@m8nTDF{bBJg<2fQPbT7|GJ;qA9;jwL$2I}=aC08VJV7UC;hn-Es@G`xfntkigk z2p-j^?+9g{+}o|q$B&BZoBGbJ1~^oY6eCc1b*O|BKHppTxzI~=ell1hpGgCN?phu- z<$>Wer?hlO%!gzFB+Y5v^^aH9Sz=)e%rz07t!H*2+O*~Y%rK%3+m z4yxm0Zmsq zzzi@0%m6dM3@`)C05iZ0Fayj0Gr$bY4+FV8H4SAGT+~{21Dn=QTBX~ z({wkwcHM6|HU*Lx=yZJ#=Oi(KJ;VjkZVIz!*1Numz3(c)thB2w8#2`3Tm;>gpG>4X zh8;ZFj0B+HIF_;TsuQmhG{7+`DOr4m?FSKiBVq@CcG6qTYumt1^>nR5*7~3Q20NXi M$n&di@J|ST0a0kO{r~^~ literal 0 HcmV?d00001 diff --git a/llvm/test/Object/X86/objdump-disassembly-symbolic.test b/llvm/test/Object/X86/objdump-disassembly-symbolic.test index 667bce95f229..858653e95ebc 100644 --- a/llvm/test/Object/X86/objdump-disassembly-symbolic.test +++ b/llvm/test/Object/X86/objdump-disassembly-symbolic.test @@ -3,6 +3,11 @@ RUN: | FileCheck %s -check-prefix ELF-x86-64 RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-object-test.macho-x86-64 \ RUN: | FileCheck %s -check-prefix MACHO-x86-64 +# Generate this using: +# ld trivial-object-test.macho-x86-64 -undefined dynamic_lookup +RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-executable-test.macho-x86-64 \ +RUN: | FileCheck %s -check-prefix MACHO-STUBS-x86-64 + ELF-x86-64: file format ELF64-x86-64 ELF-x86-64: Disassembly of section .text: ELF-x86-64: main: @@ -28,3 +33,16 @@ MACHO-x86-64: 1a: e8 00 00 00 00 callq _Som MACHO-x86-64: 1f: 8b 44 24 04 movl 4(%rsp), %eax MACHO-x86-64: 23: 48 83 c4 08 addq $8, %rsp MACHO-x86-64: 27: c3 ret + +MACHO-STUBS-x86-64: file format Mach-O 64-bit x86-64 +MACHO-STUBS-x86-64: Disassembly of section __TEXT,__text: +MACHO-STUBS-x86-64: _main: +MACHO-STUBS-x86-64: 1f90: 48 83 ec 08 subq $8, %rsp +MACHO-STUBS-x86-64: 1f94: c7 44 24 04 00 00 00 00 movl $0, 4(%rsp) +MACHO-STUBS-x86-64: 1f9c: 48 8d 3d 45 00 00 00 leaq 69(%rip), %rdi ## literal pool for: Hello World! +MACHO-STUBS-x86-64: 1fa3: e8 16 00 00 00 callq puts +MACHO-STUBS-x86-64: 1fa8: 30 c0 xorb %al, %al +MACHO-STUBS-x86-64: 1faa: e8 09 00 00 00 callq SomeOtherFunction +MACHO-STUBS-x86-64: 1faf: 8b 44 24 04 movl 4(%rsp), %eax +MACHO-STUBS-x86-64: 1fb3: 48 83 c4 08 addq $8, %rsp +MACHO-STUBS-x86-64: 1fb7: c3 ret