Generate LC_FUNCTION_STARTS load command.

This load command generates data in the LINKEDIT section which
is a list of ULEB128 delta's to all of the functions in the __text section.

It is then 0 terminated and pointer aligned to pad.

ld64 exposes the -function-starts and no-function-starts cmdline options
to override behaviour from the defaults based on file types.

rdar://problem/24472630

llvm-svn: 260188
This commit is contained in:
Pete Cooper 2016-02-09 01:38:13 +00:00
parent 104364e6b5
commit 41f3e8e408
8 changed files with 238 additions and 43 deletions

View File

@ -156,6 +156,13 @@ public:
_generateVersionLoadCommand = v;
}
bool generateFunctionStartsLoadCommand() const {
return _generateFunctionStartsLoadCommand;
}
void setGenerateFunctionStartsLoadCommand(bool v) {
_generateFunctionStartsLoadCommand = v;
}
uint64_t stackSize() const { return _stackSize; }
void setStackSize(uint64_t stackSize) { _stackSize = stackSize; }
@ -451,6 +458,7 @@ private:
bool _demangle;
bool _mergeObjCCategories = true;
bool _generateVersionLoadCommand = false;
bool _generateFunctionStartsLoadCommand = false;
StringRef _bundleLoader;
mutable std::unique_ptr<mach_o::ArchHandler> _archHandler;
mutable std::unique_ptr<Writer> _writer;

View File

@ -792,6 +792,54 @@ bool DarwinLdDriver::parse(llvm::ArrayRef<const char *> args,
}
}
// Handle -function_starts or -no_function_starts
{
bool flagOn = false;
bool flagOff = false;
if (auto *arg = parsedArgs.getLastArg(OPT_function_starts,
OPT_no_function_starts)) {
flagOn = arg->getOption().getID() == OPT_function_starts;
flagOff = arg->getOption().getID() == OPT_no_function_starts;
}
// default to adding functions start for dynamic code, static code must
// opt-in
switch (ctx.outputMachOType()) {
case llvm::MachO::MH_OBJECT:
ctx.setGenerateFunctionStartsLoadCommand(false);
break;
case llvm::MachO::MH_EXECUTE:
// dynamic executables default to generating a version load command,
// while static exectuables only generate it if required.
if (isStaticExecutable) {
if (flagOn)
ctx.setGenerateFunctionStartsLoadCommand(true);
} else {
if (!flagOff)
ctx.setGenerateFunctionStartsLoadCommand(true);
}
break;
case llvm::MachO::MH_PRELOAD:
case llvm::MachO::MH_KEXT_BUNDLE:
if (flagOn)
ctx.setGenerateFunctionStartsLoadCommand(true);
break;
case llvm::MachO::MH_DYLINKER:
case llvm::MachO::MH_DYLIB:
case llvm::MachO::MH_BUNDLE:
if (!flagOff)
ctx.setGenerateFunctionStartsLoadCommand(true);
break;
case llvm::MachO::MH_FVMLIB:
case llvm::MachO::MH_DYLDLINK:
case llvm::MachO::MH_DYLIB_STUB:
case llvm::MachO::MH_DSYM:
// We don't generate load commands for these file types, even if
// forced on.
break;
}
}
// Handle sdk_version
if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_sdk_version)) {
uint32_t sdkVersion = 0;

View File

@ -43,6 +43,12 @@ def version_load_command : Flag<["-"], "version_load_command">,
HelpText<"Force generation of a version load command">, Group<grp_opts>;
def no_version_load_command : Flag<["-"], "no_version_load_command">,
HelpText<"Disable generation of a version load command">, Group<grp_opts>;
def function_starts : Flag<["-"], "function_starts">,
HelpText<"Force generation of a function starts load command">,
Group<grp_opts>;
def no_function_starts : Flag<["-"], "no_function_starts">,
HelpText<"Disable generation of a function starts load command">,
Group<grp_opts>;
def mllvm : Separate<["-"], "mllvm">,
MetaVarName<"<option>">,
HelpText<"Options to pass to LLVM during LTO">, Group<grp_opts>;

View File

@ -263,6 +263,7 @@ struct NormalizedFile {
std::vector<BindLocation> weakBindingInfo;
std::vector<BindLocation> lazyBindingInfo;
std::vector<Export> exportInfo;
std::vector<uint8_t> functionStarts;
std::vector<DataInCode> dataInCode;
// TODO:

View File

@ -18,6 +18,7 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MachO.h"
#include <system_error>
@ -25,6 +26,46 @@ namespace lld {
namespace mach_o {
namespace normalized {
class ByteBuffer {
public:
ByteBuffer() : _ostream(_bytes) { }
void append_byte(uint8_t b) {
_ostream << b;
}
void append_uleb128(uint64_t value) {
llvm::encodeULEB128(value, _ostream);
}
void append_uleb128Fixed(uint64_t value, unsigned byteCount) {
unsigned min = llvm::getULEB128Size(value);
assert(min <= byteCount);
unsigned pad = byteCount - min;
llvm::encodeULEB128(value, _ostream, pad);
}
void append_sleb128(int64_t value) {
llvm::encodeSLEB128(value, _ostream);
}
void append_string(StringRef str) {
_ostream << str;
append_byte(0);
}
void align(unsigned alignment) {
while ( (_ostream.tell() % alignment) != 0 )
append_byte(0);
}
size_t size() {
return _ostream.tell();
}
const uint8_t *bytes() {
return reinterpret_cast<const uint8_t*>(_ostream.str().data());
}
private:
SmallVector<char, 128> _bytes;
// Stream ivar must be after SmallVector ivar to construct properly.
llvm::raw_svector_ostream _ostream;
};
using namespace llvm::support::endian;
using llvm::sys::getSwappedBytes;

View File

@ -37,7 +37,6 @@
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MachO.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
@ -52,46 +51,6 @@ namespace lld {
namespace mach_o {
namespace normalized {
class ByteBuffer {
public:
ByteBuffer() : _ostream(_bytes) { }
void append_byte(uint8_t b) {
_ostream << b;
}
void append_uleb128(uint64_t value) {
llvm::encodeULEB128(value, _ostream);
}
void append_uleb128Fixed(uint64_t value, unsigned byteCount) {
unsigned min = llvm::getULEB128Size(value);
assert(min <= byteCount);
unsigned pad = byteCount - min;
llvm::encodeULEB128(value, _ostream, pad);
}
void append_sleb128(int64_t value) {
llvm::encodeSLEB128(value, _ostream);
}
void append_string(StringRef str) {
_ostream << str;
append_byte(0);
}
void align(unsigned alignment) {
while ( (_ostream.tell() % alignment) != 0 )
append_byte(0);
}
size_t size() {
return _ostream.tell();
}
const uint8_t *bytes() {
return reinterpret_cast<const uint8_t*>(_ostream.str().data());
}
private:
SmallVector<char, 128> _bytes;
// Stream ivar must be after SmallVector ivar to construct properly.
llvm::raw_svector_ostream _ostream;
};
struct TrieNode; // Forward declaration.
struct TrieEdge : public llvm::ilist_node<TrieEdge> {
@ -188,6 +147,7 @@ private:
void writeBindingInfo();
void writeLazyBindingInfo();
void writeExportInfo();
void writeFunctionStartsInfo();
void writeDataInCodeInfo();
void writeLinkEditContent();
void buildLinkEditInfo();
@ -195,6 +155,7 @@ private:
void buildBindInfo();
void buildLazyBindInfo();
void buildExportTrie();
void computeFunctionStartsSize();
void computeDataInCodeSize();
void computeSymbolTableSizes();
void buildSectionRelocations();
@ -246,6 +207,7 @@ private:
uint32_t _countOfLoadCommands;
uint32_t _endOfLoadCommands;
uint32_t _startOfRelocations;
uint32_t _startOfFunctionStarts;
uint32_t _startOfDataInCode;
uint32_t _startOfSymbols;
uint32_t _startOfIndirectSymbols;
@ -256,6 +218,7 @@ private:
uint32_t _symbolTableUndefinesStartIndex;
uint32_t _symbolStringPoolSize;
uint32_t _symbolTableSize;
uint32_t _functionStartsSize;
uint32_t _dataInCodeSize;
uint32_t _indirectSymbolTableCount;
// Used in object file creation only
@ -321,6 +284,10 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file)
_endOfLoadCommands += sizeof(version_min_command);
_countOfLoadCommands++;
}
if (!_file.functionStarts.empty()) {
_endOfLoadCommands += sizeof(linkedit_data_command);
_countOfLoadCommands++;
}
if (!_file.dataInCode.empty()) {
_endOfLoadCommands += sizeof(linkedit_data_command);
_countOfLoadCommands++;
@ -342,11 +309,13 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file)
_endOfSectionsContent = offset;
computeSymbolTableSizes();
computeFunctionStartsSize();
computeDataInCodeSize();
// Align start of relocations.
_startOfRelocations = pointerAlign(_endOfSectionsContent);
_startOfDataInCode = _startOfRelocations + relocCount * 8;
_startOfFunctionStarts = _startOfRelocations + relocCount * 8;
_startOfDataInCode = _startOfFunctionStarts + _functionStartsSize;
_startOfSymbols = _startOfDataInCode + _dataInCodeSize;
// Add Indirect symbol table.
_startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
@ -387,7 +356,8 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file)
_endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size();
_startOfExportTrie = _endOfLazyBindingInfo;
_endOfExportTrie = _startOfExportTrie + _exportTrie.size();
_startOfDataInCode = _endOfExportTrie;
_startOfFunctionStarts = _endOfExportTrie;
_startOfDataInCode = _startOfFunctionStarts + _functionStartsSize;
_startOfSymbols = _startOfDataInCode + _dataInCodeSize;
_startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
_startOfSymbolStrings = _startOfIndirectSymbols
@ -409,6 +379,7 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file)
<< " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n"
<< " startOfExportTrie=" << _startOfExportTrie << "\n"
<< " endOfExportTrie=" << _endOfExportTrie << "\n"
<< " startOfFunctionStarts=" << _startOfFunctionStarts << "\n"
<< " startOfDataInCode=" << _startOfDataInCode << "\n"
<< " startOfSymbols=" << _startOfSymbols << "\n"
<< " startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
@ -486,6 +457,12 @@ uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count) {
++count;
}
// Add LC_FUNCTION_STARTS if needed
if (!_file.functionStarts.empty()) {
size += sizeof(linkedit_data_command);
++count;
}
// Add LC_DATA_IN_CODE if needed
if (!_file.dataInCode.empty()) {
size += sizeof(linkedit_data_command);
@ -821,6 +798,18 @@ std::error_code MachOFileLayout::writeLoadCommands() {
// LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS
writeVersionMinLoadCommand(_file, _swap, lc);
// Add LC_FUNCTION_STARTS if needed.
if (_functionStartsSize != 0) {
linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
dl->cmd = LC_FUNCTION_STARTS;
dl->cmdsize = sizeof(linkedit_data_command);
dl->dataoff = _startOfFunctionStarts;
dl->datasize = _functionStartsSize;
if (_swap)
swapStruct(*dl);
lc += sizeof(linkedit_data_command);
}
// Add LC_DATA_IN_CODE if needed.
if (_dataInCodeSize != 0) {
linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
@ -992,6 +981,18 @@ std::error_code MachOFileLayout::writeLoadCommands() {
lc += size;
}
// Add LC_FUNCTION_STARTS if needed.
if (_functionStartsSize != 0) {
linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
dl->cmd = LC_FUNCTION_STARTS;
dl->cmdsize = sizeof(linkedit_data_command);
dl->dataoff = _startOfFunctionStarts;
dl->datasize = _functionStartsSize;
if (_swap)
swapStruct(*dl);
lc += sizeof(linkedit_data_command);
}
// Add LC_DATA_IN_CODE if needed.
if (_dataInCodeSize != 0) {
linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
@ -1063,6 +1064,11 @@ void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols,
}
}
void MachOFileLayout::writeFunctionStartsInfo() {
memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(),
_functionStartsSize);
}
void MachOFileLayout::writeDataInCodeInfo() {
uint32_t offset = _startOfDataInCode;
for (const DataInCode &entry : _file.dataInCode) {
@ -1138,6 +1144,7 @@ void MachOFileLayout::buildLinkEditInfo() {
buildLazyBindInfo();
buildExportTrie();
computeSymbolTableSizes();
computeFunctionStartsSize();
computeDataInCodeSize();
}
@ -1417,6 +1424,10 @@ void MachOFileLayout::computeSymbolTableSizes() {
}
}
void MachOFileLayout::computeFunctionStartsSize() {
_functionStartsSize = _file.functionStarts.size();
}
void MachOFileLayout::computeDataInCodeSize() {
_dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry);
}
@ -1424,6 +1435,7 @@ void MachOFileLayout::computeDataInCodeSize() {
void MachOFileLayout::writeLinkEditContent() {
if (_file.fileType == llvm::MachO::MH_OBJECT) {
writeRelocations();
writeFunctionStartsInfo();
writeDataInCodeInfo();
writeSymbolTable();
} else {
@ -1432,6 +1444,7 @@ void MachOFileLayout::writeLinkEditContent() {
writeLazyBindingInfo();
// TODO: add weak binding info
writeExportInfo();
writeFunctionStartsInfo();
writeDataInCodeInfo();
writeSymbolTable();
}

View File

@ -125,6 +125,7 @@ public:
void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file);
void addExportInfo(const lld::File &, NormalizedFile &file);
void addSectionRelocs(const lld::File &, NormalizedFile &file);
void addFunctionStarts(const lld::File &, NormalizedFile &file);
void buildDataInCodeArray(const lld::File &, NormalizedFile &file);
void addDependentDylibs(const lld::File &, NormalizedFile &file);
void copyEntryPointAddress(NormalizedFile &file);
@ -1140,6 +1141,50 @@ void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) {
}
}
void Util::addFunctionStarts(const lld::File &, NormalizedFile &file) {
if (!_ctx.generateFunctionStartsLoadCommand())
return;
file.functionStarts.reserve(8192);
// Delta compress function starts, starting with the mach header symbol.
const uint64_t badAddress = ~0ULL;
uint64_t addr = badAddress;
for (SectionInfo *si : _sectionInfos) {
for (const AtomInfo &info : si->atomsAndOffsets) {
auto type = info.atom->contentType();
if (type == DefinedAtom::typeMachHeader) {
addr = _atomToAddress[info.atom];
continue;
}
if (type != DefinedAtom::typeCode)
continue;
assert(addr != badAddress && "Missing mach header symbol");
// Skip atoms which have 0 size. This is so that LC_FUNCTION_STARTS
// can't spill in to the next section.
if (!info.atom->size())
continue;
uint64_t nextAddr = _atomToAddress[info.atom];
if (_archHandler.isThumbFunction(*info.atom))
nextAddr |= 1;
uint64_t delta = nextAddr - addr;
if (delta) {
ByteBuffer buffer;
buffer.append_uleb128(delta);
file.functionStarts.insert(file.functionStarts.end(), buffer.bytes(),
buffer.bytes() + buffer.size());
}
addr = nextAddr;
}
}
// Null terminate, and pad to pointer size for this arch.
file.functionStarts.push_back(0);
auto size = file.functionStarts.size();
for (unsigned i = size, e = llvm::alignTo(size, _ctx.is64Bit() ? 8 : 4);
i != e; ++i)
file.functionStarts.push_back(0);
}
void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) {
for (SectionInfo *si : _sectionInfos) {
for (const AtomInfo &info : si->atomsAndOffsets) {
@ -1348,6 +1393,7 @@ normalizedFromAtoms(const lld::File &atomFile,
util.addRebaseAndBindingInfo(atomFile, normFile);
util.addExportInfo(atomFile, normFile);
util.addSectionRelocs(atomFile, normFile);
util.addFunctionStarts(atomFile, normFile);
util.buildDataInCodeArray(atomFile, normFile);
util.copyEntryPointAddress(normFile);

View File

@ -0,0 +1,32 @@
# RUN: lld -flavor darwin -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/libSystem.yaml && llvm-objdump -private-headers %t | FileCheck %s
# RUN: lld -flavor darwin -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/libSystem.yaml -static -function_starts && llvm-objdump -private-headers %t | FileCheck %s
# RUN: lld -flavor darwin -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/libSystem.yaml -no_function_starts && llvm-objdump -private-headers %t | FileCheck %s --check-prefix=NO_FUNCTION_STARTS
# RUN: lld -flavor darwin -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/libSystem.yaml -static -function_starts -no_function_starts && llvm-objdump -private-headers %t | FileCheck %s --check-prefix=NO_FUNCTION_STARTS
# RUN: lld -flavor darwin -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/libSystem.yaml -static && llvm-objdump -private-headers %t | FileCheck %s --check-prefix=NO_FUNCTION_STARTS
--- !mach-o
arch: x86_64
file-type: MH_OBJECT
flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]
sections:
- segment: __TEXT
section: __text
type: S_REGULAR
attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ]
address: 0x0000000000000000
content: [ 0x00, 0x00, 0x00, 0x00 ]
global-symbols:
- name: _main
type: N_SECT
scope: [ N_EXT ]
sect: 1
value: 0x0000000000000000
...
# CHECK: Load command {{[0-9]*}}
# CHECK: cmd LC_FUNCTION_STARTS
# CHECK: cmdsize 16
# CHECK: dataoff
# CHECK: datasize
# NO_FUNCTION_STARTS-NOT: LC_FUNCTION_STARTS