[lld-macho] Add 32-bit compact unwind support

This could probably have been part of D99633, but I split it up to make
things a bit more reviewable. I also fixed some bugs in the implementation that
were masked through integer underflows when operating in 64-bit mode.

Reviewed By: #lld-macho, gkm

Differential Revision: https://reviews.llvm.org/D99823
This commit is contained in:
Jez Ng 2021-04-15 21:14:33 -04:00
parent 3bc88eb392
commit 1460942c15
6 changed files with 128 additions and 96 deletions

View File

@ -12,6 +12,7 @@
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Endian.h"
#include <cstddef>
#include <cstdint>
@ -93,6 +94,19 @@ inline void checkUInt(Diagnostic d, uint64_t v, int bits) {
reportRangeError(d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
}
inline void writeAddress(uint8_t *loc, uint64_t addr, uint8_t length) {
switch (length) {
case 2:
llvm::support::endian::write32le(loc, addr);
break;
case 3:
llvm::support::endian::write64le(loc, addr);
break;
default:
llvm_unreachable("invalid r_length");
}
}
extern const RelocAttrs invalidRelocAttrs;
} // namespace macho

View File

@ -32,6 +32,7 @@ class Defined;
class DylibSymbol;
class LoadCommand;
class ObjFile;
class UnwindInfoSection;
class SyntheticSection : public OutputSection {
public:
@ -503,6 +504,7 @@ struct InStruct {
StubsSection *stubs = nullptr;
StubHelperSection *stubHelper = nullptr;
ImageLoaderCacheSection *imageLoaderCache = nullptr;
UnwindInfoSection *unwindInfo = nullptr;
};
extern InStruct in;

View File

@ -91,31 +91,62 @@ using namespace lld::macho;
// TODO(gkm): prune __eh_frame entries superseded by __unwind_info
// TODO(gkm): how do we align the 2nd-level pages?
UnwindInfoSection::UnwindInfoSection()
: SyntheticSection(segment_names::text, section_names::unwindInfo) {
align = 4; // mimic ld64
}
using EncodingMap = llvm::DenseMap<compact_unwind_encoding_t, size_t>;
bool UnwindInfoSection::isNeeded() const {
return (compactUnwindSection != nullptr);
}
template <class Ptr> struct CompactUnwindEntry {
Ptr functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
Ptr personality;
Ptr lsda;
};
SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *>
personalityTable;
struct SecondLevelPage {
uint32_t kind;
size_t entryIndex;
size_t entryCount;
size_t byteCount;
std::vector<compact_unwind_encoding_t> localEncodings;
EncodingMap localEncodingIndexes;
};
template <class Ptr> class UnwindInfoSectionImpl : public UnwindInfoSection {
public:
void prepareRelocations(InputSection *) override;
void finalize() override;
void writeTo(uint8_t *buf) const override;
private:
std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings;
EncodingMap commonEncodingIndexes;
// Indices of personality functions within the GOT.
std::vector<uint32_t> personalities;
SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *>
personalityTable;
std::vector<unwind_info_section_header_lsda_index_entry> lsdaEntries;
// Map of function offset (from the image base) to an index within the LSDA
// array.
llvm::DenseMap<uint32_t, uint32_t> functionToLsdaIndex;
std::vector<CompactUnwindEntry<Ptr>> cuVector;
std::vector<CompactUnwindEntry<Ptr> *> cuPtrVector;
std::vector<SecondLevelPage> secondLevelPages;
uint64_t level2PagesOffset = 0;
};
// Compact unwind relocations have different semantics, so we handle them in a
// separate code path from regular relocations. First, we do not wish to add
// rebase opcodes for __LD,__compact_unwind, because that section doesn't
// actually end up in the final binary. Second, personality pointers always
// reside in the GOT and must be treated specially.
void macho::prepareCompactUnwind(InputSection *isec) {
template <class Ptr>
void UnwindInfoSectionImpl<Ptr>::prepareRelocations(InputSection *isec) {
assert(isec->segname == segment_names::ld &&
isec->name == section_names::compactUnwind);
for (Reloc &r : isec->relocs) {
assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
if (r.offset % sizeof(CompactUnwindEntry64) !=
offsetof(struct CompactUnwindEntry64, personality))
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
offsetof(CompactUnwindEntry<Ptr>, personality))
continue;
if (auto *s = r.referent.dyn_cast<Symbol *>()) {
@ -172,8 +203,10 @@ static void checkTextSegment(InputSection *isec) {
// before converting it to post-link form. There should only be absolute
// relocations here: since we are not emitting the pre-link CU section, there
// is no source address to make a relative location meaningful.
static void relocateCompactUnwind(MergedOutputSection *compactUnwindSection,
std::vector<CompactUnwindEntry64> &cuVector) {
template <class Ptr>
static void
relocateCompactUnwind(MergedOutputSection *compactUnwindSection,
std::vector<CompactUnwindEntry<Ptr>> &cuVector) {
for (const InputSection *isec : compactUnwindSection->inputs) {
uint8_t *buf =
reinterpret_cast<uint8_t *>(cuVector.data()) + isec->outSecFileOff;
@ -195,21 +228,23 @@ static void relocateCompactUnwind(MergedOutputSection *compactUnwindSection,
checkTextSegment(referentIsec);
referentVA = referentIsec->getVA() + r.addend;
}
support::endian::write64le(buf + r.offset, referentVA);
writeAddress(buf + r.offset, referentVA, r.length);
}
}
}
// There should only be a handful of unique personality pointers, so we can
// encode them as 2-bit indices into a small array.
void encodePersonalities(const std::vector<CompactUnwindEntry64 *> &cuPtrVector,
std::vector<uint32_t> &personalities) {
for (CompactUnwindEntry64 *cu : cuPtrVector) {
template <class Ptr>
void encodePersonalities(
const std::vector<CompactUnwindEntry<Ptr> *> &cuPtrVector,
std::vector<uint32_t> &personalities) {
for (CompactUnwindEntry<Ptr> *cu : cuPtrVector) {
if (cu->personality == 0)
continue;
uint32_t personalityOffset = cu->personality - in.header->addr;
// Linear search is fast enough for a small array.
auto it = find(personalities, personalityOffset);
auto it = find(personalities, cu->personality);
uint32_t personalityIndex; // 1-based index
if (it != personalities.end()) {
personalityIndex = std::distance(personalities.begin(), it) + 1;
@ -228,7 +263,7 @@ void encodePersonalities(const std::vector<CompactUnwindEntry64 *> &cuPtrVector,
// Scan the __LD,__compact_unwind entries and compute the space needs of
// __TEXT,__unwind_info and __TEXT,__eh_frame
void UnwindInfoSection::finalize() {
template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
if (compactUnwindSection == nullptr)
return;
@ -240,21 +275,23 @@ void UnwindInfoSection::finalize() {
// encoding+personality+lsda. Folding is necessary because it reduces
// the number of CU entries by as much as 3 orders of magnitude!
compactUnwindSection->finalize();
assert(compactUnwindSection->getSize() % sizeof(CompactUnwindEntry64) == 0);
assert(compactUnwindSection->getSize() % sizeof(CompactUnwindEntry<Ptr>) ==
0);
size_t cuCount =
compactUnwindSection->getSize() / sizeof(CompactUnwindEntry64);
compactUnwindSection->getSize() / sizeof(CompactUnwindEntry<Ptr>);
cuVector.resize(cuCount);
relocateCompactUnwind(compactUnwindSection, cuVector);
// Rather than sort & fold the 32-byte entries directly, we create a
// vector of pointers to entries and sort & fold that instead.
cuPtrVector.reserve(cuCount);
for (CompactUnwindEntry64 &cuEntry : cuVector)
for (CompactUnwindEntry<Ptr> &cuEntry : cuVector)
cuPtrVector.emplace_back(&cuEntry);
std::sort(cuPtrVector.begin(), cuPtrVector.end(),
[](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
return a->functionAddress < b->functionAddress;
});
std::sort(
cuPtrVector.begin(), cuPtrVector.end(),
[](const CompactUnwindEntry<Ptr> *a, const CompactUnwindEntry<Ptr> *b) {
return a->functionAddress < b->functionAddress;
});
// Fold adjacent entries with matching encoding+personality+lsda
// We use three iterators on the same cuPtrVector to fold in-situ:
@ -280,7 +317,7 @@ void UnwindInfoSection::finalize() {
// Count frequencies of the folded encodings
EncodingMap encodingFrequencies;
for (const CompactUnwindEntry64 *cuPtrEntry : cuPtrVector)
for (const CompactUnwindEntry<Ptr> *cuPtrEntry : cuPtrVector)
encodingFrequencies[cuPtrEntry->encoding]++;
// Make a vector of encodings, sorted by descending frequency
@ -316,7 +353,7 @@ void UnwindInfoSection::finalize() {
// If more entries fit in the regular format, we use that.
for (size_t i = 0; i < cuPtrVector.size();) {
secondLevelPages.emplace_back();
UnwindInfoSection::SecondLevelPage &page = secondLevelPages.back();
SecondLevelPage &page = secondLevelPages.back();
page.entryIndex = i;
uintptr_t functionAddressMax =
cuPtrVector[i]->functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
@ -326,7 +363,7 @@ void UnwindInfoSection::finalize() {
sizeof(unwind_info_compressed_second_level_page_header) /
sizeof(uint32_t);
while (wordsRemaining >= 1 && i < cuPtrVector.size()) {
const CompactUnwindEntry64 *cuPtr = cuPtrVector[i];
const CompactUnwindEntry<Ptr> *cuPtr = cuPtrVector[i];
if (cuPtr->functionAddress >= functionAddressMax) {
break;
} else if (commonEncodingIndexes.count(cuPtr->encoding) ||
@ -359,7 +396,7 @@ void UnwindInfoSection::finalize() {
}
}
for (const CompactUnwindEntry64 *cu : cuPtrVector) {
for (const CompactUnwindEntry<Ptr> *cu : cuPtrVector) {
uint32_t functionOffset = cu->functionAddress - in.header->addr;
functionToLsdaIndex[functionOffset] = lsdaEntries.size();
if (cu->lsda != 0)
@ -382,7 +419,8 @@ void UnwindInfoSection::finalize() {
// All inputs are relocated and output addresses are known, so write!
void UnwindInfoSection::writeTo(uint8_t *buf) const {
template <class Ptr>
void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
// section header
auto *uip = reinterpret_cast<unwind_info_section_header *>(buf);
uip->version = 1;
@ -403,7 +441,8 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
// Personalities
for (const uint32_t &personality : personalities)
*i32p++ = in.got->addr + (personality - 1) * target->wordSize;
*i32p++ =
in.got->addr + (personality - 1) * target->wordSize - in.header->addr;
// Level-1 index
uint32_t lsdaOffset =
@ -422,7 +461,7 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
l2PagesOffset += SECOND_LEVEL_PAGE_BYTES;
}
// Level-1 sentinel
const CompactUnwindEntry64 &cuEnd = cuVector.back();
const CompactUnwindEntry<Ptr> &cuEnd = cuVector.back();
iep->functionOffset = cuEnd.functionAddress + cuEnd.functionLength;
iep->secondLevelPagesSectionOffset = 0;
iep->lsdaIndexArraySectionOffset =
@ -455,7 +494,7 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
p2p->encodingsCount = page.localEncodings.size();
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) {
const CompactUnwindEntry64 *cuep = cuPtrVector[page.entryIndex + i];
const CompactUnwindEntry<Ptr> *cuep = cuPtrVector[page.entryIndex + i];
auto it = commonEncodingIndexes.find(cuep->encoding);
if (it == commonEncodingIndexes.end())
it = page.localEncodingIndexes.find(cuep->encoding);
@ -474,7 +513,7 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
p2p->entryCount = page.entryCount;
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) {
const CompactUnwindEntry64 *cuep = cuPtrVector[page.entryIndex + i];
const CompactUnwindEntry<Ptr> *cuep = cuPtrVector[page.entryIndex + i];
*ep++ = cuep->functionAddress;
*ep++ = cuep->encoding;
}
@ -482,3 +521,10 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
pp += SECOND_LEVEL_PAGE_WORDS;
}
}
UnwindInfoSection *macho::makeUnwindInfoSection() {
if (target->wordSize == 8)
return make<UnwindInfoSectionImpl<uint64_t>>();
else
return make<UnwindInfoSectionImpl<uint32_t>>();
}

View File

@ -17,66 +17,30 @@
#include <vector>
// In 2020, we mostly care about 64-bit targets: x86_64 and arm64
struct CompactUnwindEntry64 {
uint64_t functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
uint64_t personality;
uint64_t lsda;
};
// FIXME(gkm): someday we might care about 32-bit targets: x86 & arm
struct CompactUnwindEntry32 {
uint32_t functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
uint32_t personality;
uint32_t lsda;
};
namespace lld {
namespace macho {
class UnwindInfoSection : public SyntheticSection {
public:
UnwindInfoSection();
bool isNeeded() const override { return compactUnwindSection != nullptr; }
uint64_t getSize() const override { return unwindInfoSize; }
bool isNeeded() const override;
void finalize() override;
void writeTo(uint8_t *buf) const override;
virtual void prepareRelocations(InputSection *) = 0;
void setCompactUnwindSection(MergedOutputSection *cuSection) {
compactUnwindSection = cuSection;
}
using EncodingMap = llvm::DenseMap<compact_unwind_encoding_t, size_t>;
protected:
UnwindInfoSection()
: SyntheticSection(segment_names::text, section_names::unwindInfo) {
align = 4;
}
struct SecondLevelPage {
uint32_t kind;
size_t entryIndex;
size_t entryCount;
size_t byteCount;
std::vector<compact_unwind_encoding_t> localEncodings;
EncodingMap localEncodingIndexes;
};
private:
std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings;
EncodingMap commonEncodingIndexes;
// Indices of personality functions within the GOT.
std::vector<uint32_t> personalities;
std::vector<unwind_info_section_header_lsda_index_entry> lsdaEntries;
// Map of function offset (from the image base) to an index within the LSDA
// array.
llvm::DenseMap<uint32_t, uint32_t> functionToLsdaIndex;
std::vector<CompactUnwindEntry64> cuVector;
std::vector<CompactUnwindEntry64 *> cuPtrVector;
std::vector<SecondLevelPage> secondLevelPages;
MergedOutputSection *compactUnwindSection = nullptr;
uint64_t level2PagesOffset = 0;
uint64_t unwindInfoSize = 0;
};
UnwindInfoSection *makeUnwindInfoSection();
void prepareCompactUnwind(InputSection *isec);
} // namespace macho

View File

@ -71,7 +71,6 @@ public:
SymtabSection *symtabSection = nullptr;
IndirectSymtabSection *indirectSymtabSection = nullptr;
CodeSignatureSection *codeSignatureSection = nullptr;
UnwindInfoSection *unwindInfoSection = nullptr;
FunctionStartsSection *functionStartsSection = nullptr;
LCUuid *uuidCommand = nullptr;
@ -517,7 +516,7 @@ void Writer::scanRelocations() {
TimeTraceScope timeScope("Scan relocations");
for (InputSection *isec : inputSections) {
if (isec->segname == segment_names::ld) {
prepareCompactUnwind(isec);
in.unwindInfo->prepareRelocations(isec);
continue;
}
@ -798,7 +797,6 @@ template <class LP> void Writer::createOutputSections() {
TimeTraceScope timeScope("Create output sections");
// First, create hidden sections
stringTableSection = make<StringTableSection>();
unwindInfoSection = make<UnwindInfoSection>(); // TODO(gkm): only when no -r
symtabSection = makeSymtabSection<LP>(*stringTableSection);
indirectSymtabSection = make<IndirectSymtabSection>();
if (config->adhocCodesign)
@ -830,9 +828,9 @@ template <class LP> void Writer::createOutputSections() {
for (const auto &it : mergedOutputSections) {
StringRef segname = it.first.first;
MergedOutputSection *osec = it.second;
if (unwindInfoSection && segname == segment_names::ld) {
if (segname == segment_names::ld) {
assert(osec->name == section_names::compactUnwind);
unwindInfoSection->setCompactUnwindSection(osec);
in.unwindInfo->setCompactUnwindSection(osec);
} else {
getOrCreateOutputSegment(segname)->addOutputSection(osec);
}
@ -993,6 +991,7 @@ template <class LP> void macho::createSyntheticSections() {
in.stubs = make<StubsSection>();
in.stubHelper = make<StubHelperSection>();
in.imageLoaderCache = make<ImageLoaderCacheSection>();
in.unwindInfo = makeUnwindInfoSection();
}
OutputSection *macho::firstTLVDataSection = nullptr;

View File

@ -3,16 +3,23 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/my-personality.s -o %t/x86_64-my-personality.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/main.s -o %t/x86_64-main.o
# RUN: %lld -arch x86_64 -pie -lSystem -lc++ %t/x86_64-my-personality.o %t/x86_64-main.o -o %t/x86_64-personality-first
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/x86_64-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/x86_64-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK -D#%x,BASE=0x100000000
# RUN: %lld -arch x86_64 -pie -lSystem -lc++ %t/x86_64-main.o %t/x86_64-my-personality.o -o %t/x86_64-personality-second
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/x86_64-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/x86_64-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK -D#%x,BASE=0x100000000
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin19.0.0 %t/my-personality.s -o %t/arm64-my-personality.o
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin19.0.0 %t/main.s -o %t/arm64-main.o
# RUN: %lld -arch arm64 -pie -lSystem -lc++ %t/arm64-my-personality.o %t/arm64-main.o -o %t/arm64-personality-first
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK -D#%x,BASE=0x100000000
# RUN: %lld -arch arm64 -pie -lSystem -lc++ %t/arm64-main.o %t/arm64-my-personality.o -o %t/arm64-personality-second
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK -D#%x,BASE=0x100000000
# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/my-personality.s -o %t/arm64-32-my-personality.o
# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/main.s -o %t/arm64-32-main.o
# RUN: %lld-watchos -pie -lSystem -lc++ %t/arm64-32-my-personality.o %t/arm64-32-main.o -o %t/arm64-32-personality-first
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-32-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK -D#%x,BASE=0x4000
# RUN: %lld-watchos -pie -lSystem -lc++ %t/arm64-32-main.o %t/arm64-32-my-personality.o -o %t/arm64-32-personality-second
# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-32-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK -D#%x,BASE=0x4000
# FIRST: Indirect symbols for (__DATA_CONST,__got)
# FIRST-NEXT: address index name
@ -32,16 +39,16 @@
# CHECK: Contents of __unwind_info section:
# CHECK: Personality functions: (count = 2)
# CHECK-DAG: personality[{{[0-9]+}}]: 0x{{0*}}[[#MY_PERSONALITY-0x100000000]]
# CHECK-DAG: personality[{{[0-9]+}}]: 0x{{0*}}[[#GXX_PERSONALITY-0x100000000]]
# CHECK-DAG: personality[{{[0-9]+}}]: 0x{{0*}}[[#MY_PERSONALITY-BASE]]
# CHECK-DAG: personality[{{[0-9]+}}]: 0x{{0*}}[[#GXX_PERSONALITY-BASE]]
# CHECK: LSDA descriptors:
# CHECK-DAG: function offset=0x{{0*}}[[#FOO-0x100000000]], LSDA offset=0x{{0*}}[[#EXCEPTION0-0x100000000]]
# CHECK-DAG: function offset=0x{{0*}}[[#MAIN-0x100000000]], LSDA offset=0x{{0*}}[[#EXCEPTION1-0x100000000]]
# CHECK-DAG: function offset=0x[[#%.8x,FOO-BASE]], LSDA offset=0x[[#%.8x,EXCEPTION0-BASE]]
# CHECK-DAG: function offset=0x[[#%.8x,MAIN-BASE]], LSDA offset=0x[[#%.8x,EXCEPTION1-BASE]]
## Check that we do not add rebase opcodes to the compact unwind section.
# CHECK: Rebase table:
# CHECK-NEXT: segment section address type
# CHECK-NEXT: __DATA_CONST __got 0x{{[0-9a-f]*}} pointer
# CHECK-NEXT: __DATA_CONST __got 0x{{[0-9A-F]*}} pointer
# CHECK-NOT: __TEXT
#--- my-personality.s