Recommit "[llvm-objcopy][MachO] Support load commands used in executables/shared libraries"

Summary:
This patch implements copying some load commands that appear in executables/shared libraries such as the indirect symbol table.

I don't add tests intentionally because this patch is incomplete: we need a layout algorithm for executables/shared libraries. I'll submit it as a separate patch with tests.

Reviewers: alexshap, rupprecht, jhenderson, compnerd

Reviewed By: alexshap

Subscribers: abrachet, mgorny, mgrang, MaskRay, mtrent, jakehehrlich, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63395

llvm-svn: 369298
This commit is contained in:
Seiya Nuta 2019-08-19 21:05:31 +00:00
parent 50affbe47f
commit 552bcb854c
9 changed files with 564 additions and 214 deletions

View File

@ -26,6 +26,7 @@ add_llvm_tool(llvm-objcopy
MachO/MachOObjcopy.cpp
MachO/MachOReader.cpp
MachO/MachOWriter.cpp
MachO/MachOLayoutBuilder.cpp
MachO/Object.cpp
DEPENDS
ObjcopyOptsTableGen

View File

@ -0,0 +1,322 @@
//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MachOLayoutBuilder.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
namespace objcopy {
namespace macho {
uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
uint32_t Size = 0;
for (const auto &LC : O.LoadCommands) {
auto &MLC = LC.MachOLoadCommand;
auto cmd = MLC.load_command_data.cmd;
switch (cmd) {
case MachO::LC_SEGMENT:
Size += sizeof(MachO::segment_command) +
sizeof(MachO::section) * LC.Sections.size();
continue;
case MachO::LC_SEGMENT_64:
Size += sizeof(MachO::segment_command_64) +
sizeof(MachO::section_64) * LC.Sections.size();
continue;
}
switch (cmd) {
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
case MachO::LCName: \
Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
break;
#include "llvm/BinaryFormat/MachO.def"
#undef HANDLE_LOAD_COMMAND
}
}
return Size;
}
void MachOLayoutBuilder::constructStringTable() {
for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
StrTableBuilder.add(Sym->Name);
StrTableBuilder.finalize();
}
void MachOLayoutBuilder::updateSymbolIndexes() {
uint32_t Index = 0;
for (auto &Symbol : O.SymTable.Symbols)
Symbol->Index = Index++;
}
// Updates the index and the number of local/external/undefined symbols.
void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
// Make sure that nlist entries in the symbol table are sorted by the those
// types. The order is: local < defined external < undefined external.
assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(),
[](const std::unique_ptr<SymbolEntry> &A,
const std::unique_ptr<SymbolEntry> &B) {
return (A->isLocalSymbol() && !B->isLocalSymbol()) ||
(!A->isUndefinedSymbol() &&
B->isUndefinedSymbol());
}) &&
"Symbols are not sorted by their types.");
uint32_t NumLocalSymbols = 0;
auto Iter = O.SymTable.Symbols.begin();
auto End = O.SymTable.Symbols.end();
for (; Iter != End; ++Iter) {
if ((*Iter)->isExternalSymbol())
break;
++NumLocalSymbols;
}
uint32_t NumExtDefSymbols = 0;
for (; Iter != End; ++Iter) {
if ((*Iter)->isUndefinedSymbol())
break;
++NumExtDefSymbols;
}
MLC.dysymtab_command_data.ilocalsym = 0;
MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
MLC.dysymtab_command_data.nundefsym =
O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
}
// Recomputes and updates offset and size fields in load commands and sections
// since they could be modified.
uint64_t MachOLayoutBuilder::layoutSegments() {
auto HeaderSize =
Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
auto Offset = HeaderSize + O.Header.SizeOfCmds;
// Lay out sections.
for (auto &LC : O.LoadCommands) {
uint64_t FileOff = Offset;
auto &MLC = LC.MachOLoadCommand;
StringRef Segname;
switch (MLC.load_command_data.cmd) {
case MachO::LC_SEGMENT:
Segname = StringRef(MLC.segment_command_data.segname,
strnlen(MLC.segment_command_data.segname,
sizeof(MLC.segment_command_data.segname)));
break;
case MachO::LC_SEGMENT_64:
Segname = StringRef(MLC.segment_command_64_data.segname,
strnlen(MLC.segment_command_64_data.segname,
sizeof(MLC.segment_command_64_data.segname)));
break;
default:
continue;
}
if (Segname == "__LINKEDIT") {
// We update the __LINKEDIT segment later (in layoutTail).
assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
LinkEditLoadCommand = &MLC;
continue;
}
// Update file offsets and sizes of sections.
uint64_t VMSize = 0;
uint64_t FileOffsetInSegment = 0;
for (auto &Sec : LC.Sections) {
if (!Sec.isVirtualSection()) {
auto FilePaddingSize =
OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align);
Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
Sec.Size = Sec.Content.size();
FileOffsetInSegment += FilePaddingSize + Sec.Size;
}
VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
}
// TODO: Handle the __PAGEZERO segment.
switch (MLC.load_command_data.cmd) {
case MachO::LC_SEGMENT:
MLC.segment_command_data.cmdsize =
sizeof(MachO::segment_command) +
sizeof(MachO::section) * LC.Sections.size();
MLC.segment_command_data.nsects = LC.Sections.size();
MLC.segment_command_data.fileoff = FileOff;
MLC.segment_command_data.vmsize = VMSize;
MLC.segment_command_data.filesize = FileOffsetInSegment;
break;
case MachO::LC_SEGMENT_64:
MLC.segment_command_64_data.cmdsize =
sizeof(MachO::segment_command_64) +
sizeof(MachO::section_64) * LC.Sections.size();
MLC.segment_command_64_data.nsects = LC.Sections.size();
MLC.segment_command_64_data.fileoff = FileOff;
MLC.segment_command_64_data.vmsize = VMSize;
MLC.segment_command_64_data.filesize = FileOffsetInSegment;
break;
}
Offset += FileOffsetInSegment;
}
return Offset;
}
uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
for (auto &LC : O.LoadCommands)
for (auto &Sec : LC.Sections) {
Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
Sec.NReloc = Sec.Relocations.size();
Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
}
return Offset;
}
Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
// The order of LINKEDIT elements is as follows:
// rebase info, binding info, weak binding info, lazy binding info, export
// trie, data-in-code, symbol table, indirect symbol table, symbol table
// strings.
uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
uint64_t StartOfLinkEdit = Offset;
uint64_t StartOfRebaseInfo = StartOfLinkEdit;
uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size();
uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size();
uint64_t StartOfLazyBindingInfo =
StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size();
uint64_t StartOfExportTrie =
StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
uint64_t StartOfDataInCode =
StartOfFunctionStarts + O.FunctionStarts.Data.size();
uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size();
uint64_t StartOfIndirectSymbols =
StartOfSymbols + NListSize * O.SymTable.Symbols.size();
uint64_t StartOfSymbolStrings =
StartOfIndirectSymbols +
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
uint64_t LinkEditSize =
(StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit;
// Now we have determined the layout of the contents of the __LINKEDIT
// segment. Update its load command.
if (LinkEditLoadCommand) {
MachO::macho_load_command *MLC = LinkEditLoadCommand;
switch (LinkEditLoadCommand->load_command_data.cmd) {
case MachO::LC_SEGMENT:
MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
MLC->segment_command_data.fileoff = StartOfLinkEdit;
MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
MLC->segment_command_data.filesize = LinkEditSize;
break;
case MachO::LC_SEGMENT_64:
MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
MLC->segment_command_64_data.filesize = LinkEditSize;
break;
}
}
for (auto &LC : O.LoadCommands) {
auto &MLC = LC.MachOLoadCommand;
auto cmd = MLC.load_command_data.cmd;
switch (cmd) {
case MachO::LC_SYMTAB:
MLC.symtab_command_data.symoff = StartOfSymbols;
MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
MLC.symtab_command_data.stroff = StartOfSymbolStrings;
MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
break;
case MachO::LC_DYSYMTAB: {
if (MLC.dysymtab_command_data.ntoc != 0 ||
MLC.dysymtab_command_data.nmodtab != 0 ||
MLC.dysymtab_command_data.nextrefsyms != 0 ||
MLC.dysymtab_command_data.nlocrel != 0 ||
MLC.dysymtab_command_data.nextrel != 0)
return createStringError(llvm::errc::not_supported,
"shared library is not yet supported");
if (!O.IndirectSymTable.Symbols.empty()) {
MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
MLC.dysymtab_command_data.nindirectsyms =
O.IndirectSymTable.Symbols.size();
}
updateDySymTab(MLC);
break;
}
case MachO::LC_DATA_IN_CODE:
MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
break;
case MachO::LC_FUNCTION_STARTS:
MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
break;
case MachO::LC_DYLD_INFO:
case MachO::LC_DYLD_INFO_ONLY:
MLC.dyld_info_command_data.rebase_off =
O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
MLC.dyld_info_command_data.bind_off =
O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
MLC.dyld_info_command_data.weak_bind_off =
O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
MLC.dyld_info_command_data.lazy_bind_off =
O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
MLC.dyld_info_command_data.export_off =
O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
MLC.dyld_info_command_data.export_size = O.Exports.Trie.size();
break;
case MachO::LC_LOAD_DYLINKER:
case MachO::LC_MAIN:
case MachO::LC_RPATH:
case MachO::LC_SEGMENT:
case MachO::LC_SEGMENT_64:
case MachO::LC_VERSION_MIN_MACOSX:
case MachO::LC_BUILD_VERSION:
case MachO::LC_ID_DYLIB:
case MachO::LC_LOAD_DYLIB:
case MachO::LC_UUID:
case MachO::LC_SOURCE_VERSION:
// Nothing to update.
break;
default:
// Abort if it's unsupported in order to prevent corrupting the object.
return createStringError(llvm::errc::not_supported,
"unsupported load command (cmd=0x%x)", cmd);
}
}
return Error::success();
}
Error MachOLayoutBuilder::layout() {
O.Header.NCmds = O.LoadCommands.size();
O.Header.SizeOfCmds = computeSizeOfCmds();
constructStringTable();
updateSymbolIndexes();
uint64_t Offset = layoutSegments();
Offset = layoutRelocations(Offset);
return layoutTail(Offset);
}
} // end namespace macho
} // end namespace objcopy
} // end namespace llvm

View File

@ -0,0 +1,50 @@
//===- MachOLayoutBuilder.h -------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
#define LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
#include "MachOObjcopy.h"
#include "Object.h"
namespace llvm {
namespace objcopy {
namespace macho {
class MachOLayoutBuilder {
Object &O;
bool Is64Bit;
uint64_t PageSize;
// Points to the __LINKEDIT segment if it exists.
MachO::macho_load_command *LinkEditLoadCommand = nullptr;
StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
uint32_t computeSizeOfCmds() const;
void constructStringTable();
void updateSymbolIndexes();
void updateDySymTab(MachO::macho_load_command &MLC);
uint64_t layoutSegments();
uint64_t layoutRelocations(uint64_t Offset);
Error layoutTail(uint64_t Offset);
public:
MachOLayoutBuilder(Object &O, bool Is64Bit, uint64_t PageSize)
: O(O), Is64Bit(Is64Bit), PageSize(PageSize) {}
// Recomputes and updates fields in the given object such as file offsets.
Error layout();
StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; }
};
} // end namespace macho
} // end namespace objcopy
} // end namespace llvm
#endif // LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H

View File

@ -57,7 +57,11 @@ Error executeObjcopyOnBinary(const CopyConfig &Config,
if (Error E = handleArgs(Config, *O))
return createFileError(Config.InputFilename, std::move(E));
MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out);
// TODO: Support 16KB pages which are employed in iOS arm64 binaries:
// https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb
const uint64_t PageSize = 4096;
MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
if (auto E = Writer.finalize())
return E;
return Writer.write();

View File

@ -129,10 +129,19 @@ void MachOReader::readLoadCommands(Object &O) const {
case MachO::LC_SYMTAB:
O.SymTabCommandIndex = O.LoadCommands.size();
break;
case MachO::LC_DYSYMTAB:
O.DySymTabCommandIndex = O.LoadCommands.size();
break;
case MachO::LC_DYLD_INFO:
case MachO::LC_DYLD_INFO_ONLY:
O.DyLdInfoCommandIndex = O.LoadCommands.size();
break;
case MachO::LC_DATA_IN_CODE:
O.DataInCodeCommandIndex = O.LoadCommands.size();
break;
case MachO::LC_FUNCTION_STARTS:
O.FunctionStartsCommandIndex = O.LoadCommands.size();
break;
}
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
case MachO::LCName: \
@ -222,6 +231,35 @@ void MachOReader::readExportInfo(Object &O) const {
O.Exports.Trie = MachOObj.getDyldInfoExportsTrie();
}
void MachOReader::readDataInCodeData(Object &O) const {
if (!O.DataInCodeCommandIndex)
return;
const MachO::linkedit_data_command &LDC =
O.LoadCommands[*O.DataInCodeCommandIndex]
.MachOLoadCommand.linkedit_data_command_data;
O.DataInCode.Data = arrayRefFromStringRef(
MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
}
void MachOReader::readFunctionStartsData(Object &O) const {
if (!O.FunctionStartsCommandIndex)
return;
const MachO::linkedit_data_command &LDC =
O.LoadCommands[*O.FunctionStartsCommandIndex]
.MachOLoadCommand.linkedit_data_command_data;
O.FunctionStarts.Data = arrayRefFromStringRef(
MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
}
void MachOReader::readIndirectSymbolTable(Object &O) const {
MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i)
O.IndirectSymTable.Symbols.push_back(
MachOObj.getIndirectSymbolTableEntry(DySymTab, i));
}
std::unique_ptr<Object> MachOReader::create() const {
auto Obj = std::make_unique<Object>();
readHeader(*Obj);
@ -233,6 +271,9 @@ std::unique_ptr<Object> MachOReader::create() const {
readWeakBindInfo(*Obj);
readLazyBindInfo(*Obj);
readExportInfo(*Obj);
readDataInCodeData(*Obj);
readFunctionStartsData(*Obj);
readIndirectSymbolTable(*Obj);
return Obj;
}

View File

@ -36,6 +36,9 @@ class MachOReader : public Reader {
void readWeakBindInfo(Object &O) const;
void readLazyBindInfo(Object &O) const;
void readExportInfo(Object &O) const;
void readDataInCodeData(Object &O) const;
void readFunctionStartsData(Object &O) const;
void readIndirectSymbolTable(Object &O) const;
public:
explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {}

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "MachOWriter.h"
#include "MachOLayoutBuilder.h"
#include "Object.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/MachO.h"
@ -40,16 +41,10 @@ size_t MachOWriter::totalSize() const {
const MachO::symtab_command &SymTabCommand =
O.LoadCommands[*O.SymTabCommandIndex]
.MachOLoadCommand.symtab_command_data;
if (SymTabCommand.symoff) {
assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) &&
"Incorrect number of symbols");
if (SymTabCommand.symoff)
Ends.push_back(SymTabCommand.symoff + symTableSize());
}
if (SymTabCommand.stroff) {
assert((SymTabCommand.strsize == StrTableBuilder.getSize()) &&
"Incorrect string table size");
if (SymTabCommand.stroff)
Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
}
}
if (O.DyLdInfoCommandIndex) {
const MachO::dyld_info_command &DyLdInfoCommand =
@ -84,6 +79,36 @@ size_t MachOWriter::totalSize() const {
}
}
if (O.DySymTabCommandIndex) {
const MachO::dysymtab_command &DySymTabCommand =
O.LoadCommands[*O.DySymTabCommandIndex]
.MachOLoadCommand.dysymtab_command_data;
if (DySymTabCommand.indirectsymoff)
Ends.push_back(DySymTabCommand.indirectsymoff +
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
}
if (O.DataInCodeCommandIndex) {
const MachO::linkedit_data_command &LinkEditDataCommand =
O.LoadCommands[*O.DataInCodeCommandIndex]
.MachOLoadCommand.linkedit_data_command_data;
if (LinkEditDataCommand.dataoff)
Ends.push_back(LinkEditDataCommand.dataoff +
LinkEditDataCommand.datasize);
}
if (O.FunctionStartsCommandIndex) {
const MachO::linkedit_data_command &LinkEditDataCommand =
O.LoadCommands[*O.FunctionStartsCommandIndex]
.MachOLoadCommand.linkedit_data_command_data;
if (LinkEditDataCommand.dataoff)
Ends.push_back(LinkEditDataCommand.dataoff +
LinkEditDataCommand.datasize);
}
// Otherwise, use the last section / reloction.
for (const auto &LC : O.LoadCommands)
for (const auto &S : LC.Sections) {
@ -120,14 +145,6 @@ void MachOWriter::writeHeader() {
memcpy(B.getBufferStart(), &Header, HeaderSize);
}
void MachOWriter::updateSymbolIndexes() {
uint32_t Index = 0;
for (auto &Symbol : O.SymTable.Symbols) {
Symbol->Index = Index;
Index++;
}
}
void MachOWriter::writeLoadCommands() {
uint8_t *Begin = B.getBufferStart() + headerSize();
for (const auto &LC : O.LoadCommands) {
@ -261,7 +278,7 @@ void MachOWriter::writeSymbolTable() {
.MachOLoadCommand.symtab_command_data;
uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
StrTableBuilder.write(StrTable);
LayoutBuilder.getStringTableBuilder().write(StrTable);
}
void MachOWriter::writeStringTable() {
@ -275,7 +292,7 @@ void MachOWriter::writeStringTable() {
for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
Iter != End; Iter++) {
SymbolEntry *Sym = Iter->get();
auto Nstrx = StrTableBuilder.getOffset(Sym->Name);
uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name);
if (Is64Bit)
writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
@ -344,6 +361,45 @@ void MachOWriter::writeExportInfo() {
memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
}
void MachOWriter::writeIndirectSymbolTable() {
if (!O.DySymTabCommandIndex)
return;
const MachO::dysymtab_command &DySymTabCommand =
O.LoadCommands[*O.DySymTabCommandIndex]
.MachOLoadCommand.dysymtab_command_data;
char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff;
assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) &&
"Incorrect indirect symbol table size");
memcpy(Out, O.IndirectSymTable.Symbols.data(),
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
}
void MachOWriter::writeDataInCodeData() {
if (!O.DataInCodeCommandIndex)
return;
const MachO::linkedit_data_command &LinkEditDataCommand =
O.LoadCommands[*O.DataInCodeCommandIndex]
.MachOLoadCommand.linkedit_data_command_data;
char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) &&
"Incorrect data in code data size");
memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size());
}
void MachOWriter::writeFunctionStartsData() {
if (!O.FunctionStartsCommandIndex)
return;
const MachO::linkedit_data_command &LinkEditDataCommand =
O.LoadCommands[*O.FunctionStartsCommandIndex]
.MachOLoadCommand.linkedit_data_command_data;
char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) &&
"Incorrect function starts data size");
memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size());
}
void MachOWriter::writeTail() {
typedef void (MachOWriter::*WriteHandlerType)(void);
typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
@ -379,6 +435,36 @@ void MachOWriter::writeTail() {
{DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
}
if (O.DySymTabCommandIndex) {
const MachO::dysymtab_command &DySymTabCommand =
O.LoadCommands[*O.DySymTabCommandIndex]
.MachOLoadCommand.dysymtab_command_data;
if (DySymTabCommand.indirectsymoff)
Queue.emplace_back(DySymTabCommand.indirectsymoff,
&MachOWriter::writeIndirectSymbolTable);
}
if (O.DataInCodeCommandIndex) {
const MachO::linkedit_data_command &LinkEditDataCommand =
O.LoadCommands[*O.DataInCodeCommandIndex]
.MachOLoadCommand.linkedit_data_command_data;
if (LinkEditDataCommand.dataoff)
Queue.emplace_back(LinkEditDataCommand.dataoff,
&MachOWriter::writeDataInCodeData);
}
if (O.FunctionStartsCommandIndex) {
const MachO::linkedit_data_command &LinkEditDataCommand =
O.LoadCommands[*O.FunctionStartsCommandIndex]
.MachOLoadCommand.linkedit_data_command_data;
if (LinkEditDataCommand.dataoff)
Queue.emplace_back(LinkEditDataCommand.dataoff,
&MachOWriter::writeFunctionStartsData);
}
llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
return LHS.first < RHS.first;
});
@ -387,198 +473,13 @@ void MachOWriter::writeTail() {
(this->*WriteOp.second)();
}
void MachOWriter::updateSizeOfCmds() {
auto Size = 0;
for (const auto &LC : O.LoadCommands) {
auto &MLC = LC.MachOLoadCommand;
auto cmd = MLC.load_command_data.cmd;
switch (cmd) {
case MachO::LC_SEGMENT:
Size += sizeof(MachO::segment_command) +
sizeof(MachO::section) * LC.Sections.size();
continue;
case MachO::LC_SEGMENT_64:
Size += sizeof(MachO::segment_command_64) +
sizeof(MachO::section_64) * LC.Sections.size();
continue;
}
switch (cmd) {
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
case MachO::LCName: \
Size += sizeof(MachO::LCStruct); \
break;
#include "llvm/BinaryFormat/MachO.def"
#undef HANDLE_LOAD_COMMAND
}
}
O.Header.SizeOfCmds = Size;
}
// Updates the index and the number of local/external/undefined symbols. Here we
// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table
// are already sorted by the those types.
void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) {
uint32_t NumLocalSymbols = 0;
auto Iter = O.SymTable.Symbols.begin();
auto End = O.SymTable.Symbols.end();
for (; Iter != End; Iter++) {
if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT))
break;
NumLocalSymbols++;
}
uint32_t NumExtDefSymbols = 0;
for (; Iter != End; Iter++) {
if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF)
break;
NumExtDefSymbols++;
}
MLC.dysymtab_command_data.ilocalsym = 0;
MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
MLC.dysymtab_command_data.nundefsym =
O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
}
// Recomputes and updates offset and size fields in load commands and sections
// since they could be modified.
Error MachOWriter::layout() {
auto SizeOfCmds = loadCommandsSize();
auto Offset = headerSize() + SizeOfCmds;
O.Header.NCmds = O.LoadCommands.size();
O.Header.SizeOfCmds = SizeOfCmds;
// Lay out sections.
for (auto &LC : O.LoadCommands) {
uint64_t FileOff = Offset;
uint64_t VMSize = 0;
uint64_t FileOffsetInSegment = 0;
for (auto &Sec : LC.Sections) {
if (!Sec.isVirtualSection()) {
auto FilePaddingSize =
OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align);
Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
Sec.Size = Sec.Content.size();
FileOffsetInSegment += FilePaddingSize + Sec.Size;
}
VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
}
// TODO: Handle the __PAGEZERO segment.
auto &MLC = LC.MachOLoadCommand;
switch (MLC.load_command_data.cmd) {
case MachO::LC_SEGMENT:
MLC.segment_command_data.cmdsize =
sizeof(MachO::segment_command) +
sizeof(MachO::section) * LC.Sections.size();
MLC.segment_command_data.nsects = LC.Sections.size();
MLC.segment_command_data.fileoff = FileOff;
MLC.segment_command_data.vmsize = VMSize;
MLC.segment_command_data.filesize = FileOffsetInSegment;
break;
case MachO::LC_SEGMENT_64:
MLC.segment_command_64_data.cmdsize =
sizeof(MachO::segment_command_64) +
sizeof(MachO::section_64) * LC.Sections.size();
MLC.segment_command_64_data.nsects = LC.Sections.size();
MLC.segment_command_64_data.fileoff = FileOff;
MLC.segment_command_64_data.vmsize = VMSize;
MLC.segment_command_64_data.filesize = FileOffsetInSegment;
break;
}
Offset += FileOffsetInSegment;
}
// Lay out relocations.
for (auto &LC : O.LoadCommands)
for (auto &Sec : LC.Sections) {
Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
Sec.NReloc = Sec.Relocations.size();
Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
}
// Lay out tail stuff.
auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
for (auto &LC : O.LoadCommands) {
auto &MLC = LC.MachOLoadCommand;
auto cmd = MLC.load_command_data.cmd;
switch (cmd) {
case MachO::LC_SYMTAB:
MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
MLC.symtab_command_data.symoff = Offset;
Offset += NListSize * MLC.symtab_command_data.nsyms;
MLC.symtab_command_data.stroff = Offset;
Offset += MLC.symtab_command_data.strsize;
break;
case MachO::LC_DYSYMTAB: {
if (MLC.dysymtab_command_data.ntoc != 0 ||
MLC.dysymtab_command_data.nmodtab != 0 ||
MLC.dysymtab_command_data.nextrefsyms != 0 ||
MLC.dysymtab_command_data.nlocrel != 0 ||
MLC.dysymtab_command_data.nextrel != 0)
return createStringError(llvm::errc::not_supported,
"shared library is not yet supported");
if (MLC.dysymtab_command_data.nindirectsyms != 0)
return createStringError(llvm::errc::not_supported,
"indirect symbol table is not yet supported");
updateDySymTab(MLC);
break;
}
case MachO::LC_SEGMENT:
case MachO::LC_SEGMENT_64:
case MachO::LC_VERSION_MIN_MACOSX:
case MachO::LC_BUILD_VERSION:
case MachO::LC_ID_DYLIB:
case MachO::LC_LOAD_DYLIB:
case MachO::LC_UUID:
case MachO::LC_SOURCE_VERSION:
// Nothing to update.
break;
default:
// Abort if it's unsupported in order to prevent corrupting the object.
return createStringError(llvm::errc::not_supported,
"unsupported load command (cmd=0x%x)", cmd);
}
}
return Error::success();
}
void MachOWriter::constructStringTable() {
for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
StrTableBuilder.add(Sym->Name);
StrTableBuilder.finalize();
}
Error MachOWriter::finalize() {
updateSizeOfCmds();
constructStringTable();
if (auto E = layout())
return E;
return Error::success();
}
Error MachOWriter::finalize() { return LayoutBuilder.layout(); }
Error MachOWriter::write() {
if (Error E = B.allocate(totalSize()))
return E;
memset(B.getBufferStart(), 0, totalSize());
writeHeader();
updateSymbolIndexes();
writeLoadCommands();
writeSections();
writeTail();

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "../Buffer.h"
#include "MachOLayoutBuilder.h"
#include "MachOObjcopy.h"
#include "Object.h"
#include "llvm/BinaryFormat/MachO.h"
@ -22,20 +23,15 @@ class MachOWriter {
Object &O;
bool Is64Bit;
bool IsLittleEndian;
uint64_t PageSize;
Buffer &B;
StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
MachOLayoutBuilder LayoutBuilder;
size_t headerSize() const;
size_t loadCommandsSize() const;
size_t symTableSize() const;
size_t strTableSize() const;
void updateDySymTab(MachO::macho_load_command &MLC);
void updateSizeOfCmds();
void updateSymbolIndexes();
void constructStringTable();
Error layout();
void writeHeader();
void writeLoadCommands();
template <typename StructType>
@ -48,11 +44,16 @@ class MachOWriter {
void writeWeakBindInfo();
void writeLazyBindInfo();
void writeExportInfo();
void writeIndirectSymbolTable();
void writeDataInCodeData();
void writeFunctionStartsData();
void writeTail();
public:
MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, Buffer &B)
: O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {}
MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, uint64_t PageSize,
Buffer &B)
: O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian),
PageSize(PageSize), B(B), LayoutBuilder(O, Is64Bit, PageSize) {}
size_t totalSize() const;
Error finalize();

View File

@ -90,6 +90,16 @@ struct SymbolEntry {
uint8_t n_sect;
uint16_t n_desc;
uint64_t n_value;
bool isExternalSymbol() const {
return n_type & ((MachO::N_EXT | MachO::N_PEXT));
}
bool isLocalSymbol() const { return !isExternalSymbol(); }
bool isUndefinedSymbol() const {
return (n_type & MachO::N_TYPE) == MachO::N_UNDF;
}
};
/// The location of the symbol table inside the binary is described by LC_SYMTAB
@ -100,6 +110,10 @@ struct SymbolTable {
const SymbolEntry *getSymbolByIndex(uint32_t Index) const;
};
struct IndirectSymbolTable {
std::vector<uint32_t> Symbols;
};
/// The location of the string table inside the binary is described by LC_SYMTAB
/// load command.
struct StringTable {
@ -206,6 +220,10 @@ struct ExportInfo {
ArrayRef<uint8_t> Trie;
};
struct LinkData {
ArrayRef<uint8_t> Data;
};
struct Object {
MachHeader Header;
std::vector<LoadCommand> LoadCommands;
@ -218,11 +236,20 @@ struct Object {
WeakBindInfo WeakBinds;
LazyBindInfo LazyBinds;
ExportInfo Exports;
IndirectSymbolTable IndirectSymTable;
LinkData DataInCode;
LinkData FunctionStarts;
/// The index of LC_SYMTAB load command if present.
Optional<size_t> SymTabCommandIndex;
/// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.
Optional<size_t> DyLdInfoCommandIndex;
/// The index LC_DYSYMTAB load comamnd if present.
Optional<size_t> DySymTabCommandIndex;
/// The index LC_DATA_IN_CODE load comamnd if present.
Optional<size_t> DataInCodeCommandIndex;
/// The index LC_FUNCTION_STARTS load comamnd if present.
Optional<size_t> FunctionStartsCommandIndex;
};
} // end namespace macho