Add support for merging string from SHF_STRINGS sections.

llvm-svn: 251212
This commit is contained in:
Rafael Espindola 2015-10-24 22:51:01 +00:00
parent 53c2bff5fe
commit f82ed2a28c
9 changed files with 264 additions and 37 deletions

View File

@ -120,6 +120,33 @@ ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) {
return Entries.slice(1);
}
template <class ELFT>
static bool shouldMerge(const typename ELFFile<ELFT>::Elf_Shdr &Sec) {
typedef typename ELFFile<ELFT>::uintX_t uintX_t;
uintX_t Flags = Sec.sh_flags;
if (!(Flags & SHF_MERGE))
return false;
if (Flags & SHF_WRITE)
error("Writable SHF_MERGE sections are not supported");
uintX_t EntSize = Sec.sh_entsize;
if (Sec.sh_size % EntSize)
error("SHF_MERGE section size must be a multiple of sh_entsize");
// Don't try to merge if the aligment is larger than the sh_entsize.
//
// If this is not a SHF_STRINGS, we would need to pad after every entity. It
// would be equivalent for the producer of the .o to just set a larger
// sh_entsize.
//
// If this is a SHF_STRINGS, the larger alignment makes sense. Unfortunately
// it would complicate tail merging. This doesn't seem that common to
// justify the effort.
if (Sec.sh_addralign > EntSize)
return false;
return true;
}
template <class ELFT>
void elf2::ObjectFile<ELFT>::initializeSections(DenseSet<StringRef> &Comdats) {
uint64_t Size = this->ELFObj.getNumSections();
@ -170,18 +197,13 @@ void elf2::ObjectFile<ELFT>::initializeSections(DenseSet<StringRef> &Comdats) {
error("Relocations pointing to SHF_MERGE are not supported");
break;
}
default: {
uintX_t Flags = Sec.sh_flags;
if (Flags & SHF_MERGE && !(Flags & SHF_STRINGS)) {
if (Flags & SHF_WRITE)
error("Writable SHF_MERGE sections are not supported");
default:
if (shouldMerge<ELFT>(Sec))
Sections[I] = new (this->Alloc) MergeInputSection<ELFT>(this, &Sec);
} else {
else
Sections[I] = new (this->Alloc) InputSection<ELFT>(this, &Sec);
}
break;
}
}
}
}

View File

@ -129,20 +129,36 @@ bool MergeInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
return S->SectionKind == Base::Merge;
}
// FIXME: Optimize this by keeping an offset for each element.
template <class ELFT>
typename MergeInputSection<ELFT>::uintX_t
MergeInputSection<ELFT>::getOffset(uintX_t Offset) {
ArrayRef<uint8_t> Data = this->getSectionData();
uintX_t EntSize = this->Header->sh_entsize;
uintX_t Addend = Offset % EntSize;
Offset -= Addend;
if (Offset + EntSize > Data.size())
ArrayRef<uint8_t> D = this->getSectionData();
StringRef Data((char *)D.data(), D.size());
uintX_t Size = Data.size();
if (Offset >= Size)
error("Entry is past the end of the section");
Data = Data.slice(Offset, EntSize);
return static_cast<MergeOutputSection<ELFT> *>(this->OutSec)
->getOffset(Data) +
Addend;
// Find the element this offset points to.
auto I = std::upper_bound(
this->Offsets.begin(), this->Offsets.end(), Offset,
[](const uintX_t &A, const std::pair<uintX_t, uintX_t> &B) {
return A < B.first;
});
size_t End = I == this->Offsets.end() ? Data.size() : I->first;
--I;
uintX_t Start = I->first;
// Compute the Addend and if the Base is cached, return.
uintX_t Addend = Offset - Start;
uintX_t &Base = I->second;
if (Base != uintX_t(-1))
return Base + Addend;
// Map the base to the offset in the output section and cashe it.
StringRef Entry = Data.substr(Start, End - Start);
Base =
static_cast<MergeOutputSection<ELFT> *>(this->OutSec)->getOffset(Entry);
return Base + Addend;
}
namespace lld {

View File

@ -78,8 +78,11 @@ template <class ELFT> class MergeInputSection : public InputSectionBase<ELFT> {
typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
public:
std::vector<std::pair<uintX_t, uintX_t>> Offsets;
MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header);
static bool classof(const InputSectionBase<ELFT> *S);
// Translate an offset in the input section to an offset in the output
// section.
uintX_t getOffset(uintX_t Offset);
};

View File

@ -718,11 +718,28 @@ MergeOutputSection<ELFT>::MergeOutputSection(StringRef Name, uint32_t sh_type,
: OutputSectionBase<ELFT>(Name, sh_type, sh_flags) {}
template <class ELFT> void MergeOutputSection<ELFT>::writeTo(uint8_t *Buf) {
for (const std::pair<ArrayRef<uint8_t>, uintX_t> &P : Offsets) {
ArrayRef<uint8_t> Data = P.first;
if (shouldTailMerge()) {
StringRef Data = Builder.data();
memcpy(Buf, Data.data(), Data.size());
Buf += Data.size();
return;
}
for (const std::pair<StringRef, size_t> &P : Builder.getMap()) {
StringRef Data = P.first;
memcpy(Buf + P.second, Data.data(), Data.size());
}
}
static size_t findNull(StringRef S, size_t EntSize) {
// Optimize the common case.
if (EntSize == 1)
return S.find(0);
for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
const char *B = S.begin() + I;
if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
return I;
}
return StringRef::npos;
}
template <class ELFT>
@ -732,22 +749,48 @@ void MergeOutputSection<ELFT>::addSection(MergeInputSection<ELFT> *S) {
if (Align > this->Header.sh_addralign)
this->Header.sh_addralign = Align;
uintX_t Off = this->Header.sh_size;
ArrayRef<uint8_t> Data = S->getSectionData();
ArrayRef<uint8_t> D = S->getSectionData();
StringRef Data((char *)D.data(), D.size());
uintX_t EntSize = S->getSectionHdr()->sh_entsize;
if (Data.size() % EntSize)
error("SHF_MERGE section size must be a multiple of sh_entsize");
for (unsigned I = 0, N = Data.size(); I != N; I += EntSize) {
auto P = Offsets.insert(std::make_pair(Data.slice(I, EntSize), Off));
if (P.second)
Off += EntSize;
uintX_t Offset = 0;
if (this->Header.sh_flags & SHF_STRINGS) {
while (!Data.empty()) {
size_t End = findNull(Data, EntSize);
if (End == StringRef::npos)
error("String is not null terminated");
StringRef Entry = Data.substr(0, End + EntSize);
size_t OutputOffset = Builder.add(Entry);
if (shouldTailMerge())
OutputOffset = -1;
S->Offsets.push_back(std::make_pair(Offset, OutputOffset));
uintX_t Size = End + EntSize;
Data = Data.substr(Size);
Offset += Size;
}
} else {
for (unsigned I = 0, N = Data.size(); I != N; I += EntSize) {
StringRef Entry = Data.substr(I, EntSize);
size_t OutputOffset = Builder.add(Entry);
S->Offsets.push_back(std::make_pair(Offset, OutputOffset));
Offset += EntSize;
}
}
this->Header.sh_size = Off;
}
template <class ELFT>
unsigned MergeOutputSection<ELFT>::getOffset(ArrayRef<uint8_t> Val) {
return Offsets.find(Val)->second;
unsigned MergeOutputSection<ELFT>::getOffset(StringRef Val) {
return Builder.getOffset(Val);
}
template <class ELFT> bool MergeOutputSection<ELFT>::shouldTailMerge() const {
return Config->Optimize >= 2 && this->Header.sh_flags & SHF_STRINGS;
}
template <class ELFT> void MergeOutputSection<ELFT>::finalize() {
if (shouldTailMerge())
Builder.finalize();
this->Header.sh_size = Builder.getSize();
}
template <class ELFT>

View File

@ -240,17 +240,17 @@ template <class ELFT>
class MergeOutputSection final : public OutputSectionBase<ELFT> {
typedef typename OutputSectionBase<ELFT>::uintX_t uintX_t;
bool shouldTailMerge() const;
public:
MergeOutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags);
void addSection(MergeInputSection<ELFT> *S);
void writeTo(uint8_t *Buf) override;
unsigned getOffset(ArrayRef<uint8_t> Val);
unsigned getOffset(StringRef Val);
void finalize() override;
private:
// This map is used to find if we already have an entry for a given value and,
// if so, at what offset it is.
llvm::MapVector<ArrayRef<uint8_t>, uintX_t> Offsets;
llvm::StringTableBuilder Builder{llvm::StringTableBuilder::RAW};
};
template <class ELFT>

View File

@ -0,0 +1,39 @@
// REQUIRES: x86
// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
// RUN: ld.lld2 %t.o -o %t.so -shared
// RUN: llvm-readobj -s %t.so | FileCheck %s
.section .rodata.str1.16,"aMS",@progbits,1
.align 16
.asciz "foo"
.section .rodata.str1.1,"aMS",@progbits,1
.asciz "foo"
// CHECK: Name: .rodata
// CHECK-NEXT: Type: SHT_PROGBITS
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: SHF_MERGE
// CHECK-NEXT: SHF_STRINGS
// CHECK-NEXT: ]
// CHECK-NEXT: Address: 0x120
// CHECK-NEXT: Offset: 0x120
// CHECK-NEXT: Size: 4
// CHECK-NEXT: Link: 0
// CHECK-NEXT: Info: 0
// CHECK-NEXT: AddressAlignment: 16
// CHECK: Name: .rodata
// CHECK-NEXT: Type: SHT_PROGBITS
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: SHF_MERGE
// CHECK-NEXT: SHF_STRINGS
// CHECK-NEXT: ]
// CHECK-NEXT: Address: 0x124
// CHECK-NEXT: Offset: 0x124
// CHECK-NEXT: Size: 4
// CHECK-NEXT: Link: 0
// CHECK-NEXT: Info: 0
// CHECK-NEXT: AddressAlignment: 1

View File

@ -0,0 +1,11 @@
// REQUIRES: x86
// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
// RUN: not ld.lld2 %t.o -o %t.so -shared 2>&1 | FileCheck %s
.section .rodata.str1.1,"aMS",@progbits,1
.asciz "abc"
.text
.long .rodata.str1.1 + 4
// CHECK: Entry is past the end of the section

View File

@ -0,0 +1,8 @@
// REQUIRES: x86
// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
// RUN: not ld.lld2 %t.o -o %t.so -shared 2>&1 | FileCheck %s
.section .rodata.str1.1,"aMS",@progbits,1
.ascii "abc"
// CHECK: String is not null terminated

View File

@ -0,0 +1,85 @@
// REQUIRES: x86
// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
// RUN: ld.lld2 -O2 %t.o -o %t.so -shared
// RUN: llvm-readobj -s -section-data -t %t.so | FileCheck %s
// RUN: ld.lld2 -O1 %t.o -o %t.so -shared
// RUN: llvm-readobj -s -section-data -t %t.so | FileCheck --check-prefix=NOTAIL %s
.section .rodata.str1.1,"aMS",@progbits,1
.asciz "abc"
foo:
.ascii "a"
bar:
.asciz "bc"
.asciz "bc"
.section .rodata.str2.2,"aMS",@progbits,2
.align 2
zed:
.short 20
.short 0
// CHECK: Name: .rodata
// CHECK-NEXT: Type: SHT_PROGBITS
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: SHF_MERGE
// CHECK-NEXT: SHF_STRINGS
// CHECK-NEXT: ]
// CHECK-NEXT: Address: 0x120
// CHECK-NEXT: Offset: 0x120
// CHECK-NEXT: Size: 4
// CHECK-NEXT: Link: 0
// CHECK-NEXT: Info: 0
// CHECK-NEXT: AddressAlignment: 1
// CHECK-NEXT: EntrySize: 0
// CHECK-NEXT: SectionData (
// CHECK-NEXT: 0000: 61626300 |abc.|
// CHECK-NEXT: )
// NOTAIL: Name: .rodata
// NOTAIL-NEXT: Type: SHT_PROGBITS
// NOTAIL-NEXT: Flags [
// NOTAIL-NEXT: SHF_ALLOC
// NOTAIL-NEXT: SHF_MERGE
// NOTAIL-NEXT: SHF_STRINGS
// NOTAIL-NEXT: ]
// NOTAIL-NEXT: Address: 0x120
// NOTAIL-NEXT: Offset: 0x120
// NOTAIL-NEXT: Size: 7
// NOTAIL-NEXT: Link: 0
// NOTAIL-NEXT: Info: 0
// NOTAIL-NEXT: AddressAlignment: 1
// NOTAIL-NEXT: EntrySize: 0
// NOTAIL-NEXT: SectionData (
// NOTAIL-NEXT: 0000: 61626300 626300 |abc.bc.|
// NOTAIL-NEXT: )
// CHECK: Name: .rodata
// CHECK-NEXT: Type: SHT_PROGBITS
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: SHF_MERGE
// CHECK-NEXT: SHF_STRINGS
// CHECK-NEXT: ]
// CHECK-NEXT: Address: 0x124
// CHECK-NEXT: Offset: 0x124
// CHECK-NEXT: Size: 4
// CHECK-NEXT: Link: 0
// CHECK-NEXT: Info: 0
// CHECK-NEXT: AddressAlignment: 2
// CHECK-NEXT: EntrySize: 0
// CHECK-NEXT: SectionData (
// CHECK-NEXT: 0000: 14000000 |....|
// CHECK-NEXT: )
// CHECK: Name: bar
// CHECK-NEXT: Value: 0x121
// CHECK: Name: foo
// CHECK-NEXT: Value: 0x120
// CHECK: Name: zed
// CHECK-NEXT: Value: 0x124
// CHECK-NEXT: Size: 0