From e2c2461a6bf0b8ccbe7a2cf9021e16c3be51ca81 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 29 Jan 2016 01:24:25 +0000 Subject: [PATCH] Merge identical strings. This avoids the need to have reserve and addString in sync. We avoid hashing the global symbols again. This means that we don't merge a global symbol that has the same name as some other string, but that doesn't seem very common. The string table size is the same in clang an scylladb with or without hashing global symbols again. llvm-svn: 259136 --- lld/ELF/InputFiles.h | 4 +- lld/ELF/OutputSections.cpp | 129 ++++++++++++--------------- lld/ELF/OutputSections.h | 26 +++--- lld/ELF/Writer.cpp | 7 +- lld/test/ELF/dynamic-reloc.s | 2 +- lld/test/ELF/linkerscript-sections.s | 8 +- lld/test/ELF/shared-be.s | 6 +- lld/test/ELF/shared.s | 4 +- lld/test/ELF/string-table.s | 5 +- 9 files changed, 88 insertions(+), 103 deletions(-) diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 7ca166a7ca5d..e530f7fa70a2 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -129,7 +129,9 @@ public: // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. uint32_t getMipsGp0() const; - std::vector KeptLocalSyms; + // The number is the offset in the string table. It will be used as the + // st_name of the symbol. + std::vector> KeptLocalSyms; private: void initializeSections(llvm::DenseSet &ComdatGroups); diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 7602097a973d..97a7670f177a 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -379,7 +379,6 @@ InterpSection::InterpSection() template void OutputSectionBase::writeHeaderTo(Elf_Shdr *SHdr) { - Header.sh_name = Out::ShStrTab->addString(Name); *SHdr = Header; } @@ -428,7 +427,9 @@ template void HashTableSection::writeTo(uint8_t *Buf) { Elf_Word *Buckets = P; Elf_Word *Chains = P + NumSymbols; - for (SymbolBody *Body : Out::DynSymTab->getSymbols()) { + for (const std::pair &P : + Out::DynSymTab->getSymbols()) { + SymbolBody *Body = P.first; StringRef Name = Body->getName(); unsigned I = Body->DynamicSymbolTableIndex; uint32_t Hash = hashSysv(Name) % NumSymbols; @@ -560,15 +561,18 @@ static bool includeInGnuHashTable(SymbolBody *B) { } template -void GnuHashTableSection::addSymbols(std::vector &Symbols) { - std::vector NotHashed; +void GnuHashTableSection::addSymbols( + std::vector> &Symbols) { + std::vector> NotHashed; NotHashed.reserve(Symbols.size()); HashedSymbols.reserve(Symbols.size()); - for (SymbolBody *B : Symbols) { + for (const std::pair &P : Symbols) { + SymbolBody *B = P.first; if (includeInGnuHashTable(B)) - HashedSymbols.push_back(HashedSymbolData{B, hashGnu(B->getName())}); + HashedSymbols.push_back( + HashedSymbolData{B, P.second, hashGnu(B->getName())}); else - NotHashed.push_back(B); + NotHashed.push_back(P); } if (HashedSymbols.empty()) return; @@ -581,7 +585,7 @@ void GnuHashTableSection::addSymbols(std::vector &Symbols) { Symbols = std::move(NotHashed); for (const HashedSymbolData &Item : HashedSymbols) - Symbols.push_back(Item.Body); + Symbols.push_back(std::make_pair(Item.Body, Item.STName)); } template @@ -606,18 +610,21 @@ template void DynamicSection::finalize() { Elf_Shdr &Header = this->Header; Header.sh_link = Out::DynStrTab->SectionIndex; - // Reserve strings. We know that these are the last string to be added to + auto Add = [=](Entry E) { Entries.push_back(E); }; + + // Add strings. We know that these are the last strings to be added to // DynStrTab and doing this here allows this function to set DT_STRSZ. if (!Config->RPath.empty()) - Out::DynStrTab->reserve(Config->RPath); - if (!Config->SoName.empty()) - Out::DynStrTab->reserve(Config->SoName); + Add({Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH, + Out::DynStrTab->addString(Config->RPath)}); for (const std::unique_ptr> &F : SymTab.getSharedFiles()) if (F->isNeeded()) - Out::DynStrTab->reserve(F->getSoName()); + Add({DT_NEEDED, Out::DynStrTab->addString(F->getSoName())}); + if (!Config->SoName.empty()) + Add({DT_SONAME, Out::DynStrTab->addString(Config->SoName)}); + Out::DynStrTab->finalize(); - auto Add = [=](Entry E) { Entries.push_back(E); }; if (Out::RelaDyn->hasRelocs()) { bool IsRela = Out::RelaDyn->isRela(); @@ -643,13 +650,6 @@ template void DynamicSection::finalize() { if (Out::HashTab) Add({DT_HASH, Out::HashTab}); - if (!Config->RPath.empty()) - Add({Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH, - Out::DynStrTab->addString(Config->RPath)}); - - if (!Config->SoName.empty()) - Add({DT_SONAME, Out::DynStrTab->addString(Config->SoName)}); - if (PreInitArraySec) { Add({DT_PREINIT_ARRAY, PreInitArraySec}); Add({DT_PREINIT_ARRAYSZ, PreInitArraySec->getSize()}); @@ -663,10 +663,6 @@ template void DynamicSection::finalize() { Add({DT_FINI_ARRAYSZ, (uintX_t)FiniArraySec->getSize()}); } - for (const std::unique_ptr> &F : SymTab.getSharedFiles()) - if (F->isNeeded()) - Add({DT_NEEDED, Out::DynStrTab->addString(F->getSoName())}); - if (SymbolBody *B = SymTab.find(Config->Init)) Add({DT_INIT, B}); if (SymbolBody *B = SymTab.find(Config->Fini)) @@ -1349,30 +1345,21 @@ StringTableSection::StringTableSection(StringRef Name, bool Dynamic) this->Header.sh_addralign = 1; } -// String tables are created in two phases. First you call reserve() -// to reserve room in the string table, and then call addString() to actually -// add that string. -// -// Why two phases? We want to know the size of the string table as early as -// possible to fix file layout. So we have separated finalize(), which -// determines the size of the section, from writeTo(), which writes the section -// contents to the output buffer. If we merge reserve() with addString(), -// we need a plumbing work for finalize() and writeTo() so that offsets -// we obtained in the former function can be written in the latter. -// This design eliminated that need. -template void StringTableSection::reserve(StringRef S) { - Reserved += S.size() + 1; // +1 for NUL -} - -// Adds a string to the string table. You must call reserve() with the -// same string before calling addString(). -template size_t StringTableSection::addString(StringRef S) { - size_t Pos = Used; +// Adds a string to the string table. If HashIt is true we hash and check for +// duplicates. It is optional because the name of global symbols are already +// uniqued and hashing them again has a big cost for a small value: uniquing +// them with some other string that happens to be the same. +template +unsigned StringTableSection::addString(StringRef S, bool HashIt) { + if (HashIt) { + auto R = StringMap.insert(std::make_pair(S, Size)); + if (!R.second) + return R.first->second; + } + unsigned Ret = Size; + Size += S.size() + 1; Strings.push_back(S); - Used += S.size() + 1; - Reserved -= S.size() + 1; - assert((int64_t)Reserved >= 0); - return Pos; + return Ret; } template void StringTableSection::writeTo(uint8_t *Buf) { @@ -1390,7 +1377,7 @@ SymbolTableSection::SymbolTableSection( : OutputSectionBase(StrTabSec.isDynamic() ? ".dynsym" : ".symtab", StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, StrTabSec.isDynamic() ? (uintX_t)SHF_ALLOC : 0), - Table(Table), StrTabSec(StrTabSec) { + StrTabSec(StrTabSec), Table(Table) { this->Header.sh_entsize = sizeof(Elf_Sym); this->Header.sh_addralign = ELFT::Is64Bits ? 8 : 4; } @@ -1400,10 +1387,11 @@ SymbolTableSection::SymbolTableSection( // See "Global Offset Table" in Chapter 5 in the following document // for detailed description: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf -static bool sortMipsSymbols(SymbolBody *L, SymbolBody *R) { - if (!L->isInGot() || !R->isInGot()) - return R->isInGot(); - return L->GotIndex < R->GotIndex; +static bool sortMipsSymbols(const std::pair &L, + const std::pair &R) { + if (!L.first->isInGot() || !R.first->isInGot()) + return R.first->isInGot(); + return L.first->GotIndex < R.first->GotIndex; } template void SymbolTableSection::finalize() { @@ -1416,9 +1404,10 @@ template void SymbolTableSection::finalize() { if (!StrTabSec.isDynamic()) { std::stable_sort(Symbols.begin(), Symbols.end(), - [](SymbolBody *L, SymbolBody *R) { - return getSymbolBinding(L) == STB_LOCAL && - getSymbolBinding(R) != STB_LOCAL; + [](const std::pair &L, + const std::pair &R) { + return getSymbolBinding(L.first) == STB_LOCAL && + getSymbolBinding(R.first) != STB_LOCAL; }); return; } @@ -1428,20 +1417,14 @@ template void SymbolTableSection::finalize() { else if (Config->EMachine == EM_MIPS) std::stable_sort(Symbols.begin(), Symbols.end(), sortMipsSymbols); size_t I = 0; - for (SymbolBody *B : Symbols) - B->DynamicSymbolTableIndex = ++I; -} - -template -void SymbolTableSection::addLocalSymbol(StringRef Name) { - StrTabSec.reserve(Name); - ++NumLocals; + for (const std::pair &P : Symbols) + P.first->DynamicSymbolTableIndex = ++I; } template void SymbolTableSection::addSymbol(SymbolBody *Body) { - StrTabSec.reserve(Body->getName()); - Symbols.push_back(Body); + Symbols.push_back( + std::make_pair(Body, StrTabSec.addString(Body->getName(), false))); } template void SymbolTableSection::writeTo(uint8_t *Buf) { @@ -1460,10 +1443,8 @@ void SymbolTableSection::writeLocalSymbols(uint8_t *&Buf) { // Iterate over all input object files to copy their local symbols // to the output symbol table pointed by Buf. for (const std::unique_ptr> &File : Table.getObjectFiles()) { - for (const Elf_Sym *Sym : File->KeptLocalSyms) { - ErrorOr SymNameOrErr = Sym->getName(File->getStringTable()); - fatal(SymNameOrErr); - StringRef SymName = *SymNameOrErr; + for (const std::pair &P : File->KeptLocalSyms) { + const Elf_Sym *Sym = P.first; auto *ESym = reinterpret_cast(Buf); uintX_t VA = 0; @@ -1480,7 +1461,7 @@ void SymbolTableSection::writeLocalSymbols(uint8_t *&Buf) { if (Config->EMachine != EM_AMDGPU) VA += OutSec->getVA(); } - ESym->st_name = StrTabSec.addString(SymName); + ESym->st_name = P.second; ESym->st_size = Sym->st_size; ESym->setBindingAndType(Sym->getBinding(), Sym->getType()); ESym->st_value = VA; @@ -1504,7 +1485,8 @@ void SymbolTableSection::writeGlobalSymbols(uint8_t *Buf) { // Write the internal symbol table contents to the output symbol table // pointed by Buf. auto *ESym = reinterpret_cast(Buf); - for (SymbolBody *Body : Symbols) { + for (const std::pair &P : Symbols) { + SymbolBody *Body = P.first; const OutputSectionBase *OutSec = nullptr; switch (Body->kind()) { @@ -1534,8 +1516,7 @@ void SymbolTableSection::writeGlobalSymbols(uint8_t *Buf) { break; } - StringRef Name = Body->getName(); - ESym->st_name = StrTabSec.addString(Name); + ESym->st_name = P.second; unsigned char Type = STT_NOTYPE; uintX_t Size = 0; diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h index 10f55a9cddc4..93bd997c3b30 100644 --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -12,7 +12,6 @@ #include "lld/Core/LLVM.h" -#include "llvm/ADT/MapVector.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Object/ELF.h" @@ -73,6 +72,7 @@ public: void setVA(uintX_t VA) { Header.sh_addr = VA; } uintX_t getVA() const { return Header.sh_addr; } void setFileOffset(uintX_t Off) { Header.sh_offset = Off; } + void setSHName(unsigned Val) { Header.sh_name = Val; } void writeHeaderTo(Elf_Shdr *SHdr); StringRef getName() { return Name; } @@ -195,12 +195,16 @@ public: void finalize() override; void writeTo(uint8_t *Buf) override; - void addLocalSymbol(StringRef Name); void addSymbol(SymbolBody *Body); StringTableSection &getStrTabSec() const { return StrTabSec; } unsigned getNumSymbols() const { return NumLocals + Symbols.size() + 1; } - ArrayRef getSymbols() const { return Symbols; } + ArrayRef> getSymbols() const { + return Symbols; + } + + unsigned NumLocals = 0; + StringTableSection &StrTabSec; private: void writeLocalSymbols(uint8_t *&Buf); @@ -209,9 +213,7 @@ private: static uint8_t getSymbolBinding(SymbolBody *Body); SymbolTable &Table; - StringTableSection &StrTabSec; - std::vector Symbols; - unsigned NumLocals = 0; + std::vector> Symbols; }; template @@ -328,18 +330,17 @@ class StringTableSection final : public OutputSectionBase { public: typedef typename llvm::object::ELFFile::uintX_t uintX_t; StringTableSection(StringRef Name, bool Dynamic); - void reserve(StringRef S); - size_t addString(StringRef S); + unsigned addString(StringRef S, bool HashIt = true); void writeTo(uint8_t *Buf) override; - size_t getSize() const { return Used + Reserved; } + unsigned getSize() const { return Size; } void finalize() override { this->Header.sh_size = getSize(); } bool isDynamic() const { return Dynamic; } private: const bool Dynamic; + llvm::DenseMap StringMap; std::vector Strings; - size_t Used = 1; // ELF string tables start with a NUL byte, so 1. - size_t Reserved = 0; + unsigned Size = 1; // ELF string tables start with a NUL byte, so 1. }; template @@ -367,7 +368,7 @@ public: // Adds symbols to the hash table. // Sorts the input to satisfy GNU hash section requirements. - void addSymbols(std::vector &Symbols); + void addSymbols(std::vector> &Symbols); private: static unsigned calcNBuckets(unsigned NumHashed); @@ -379,6 +380,7 @@ private: struct HashedSymbolData { SymbolBody *Body; + unsigned STName; uint32_t Hash; }; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 239f6a6433f3..a27c9fc1b092 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -431,8 +431,9 @@ template void Writer::copyLocalSymbols() { if (!Section->isLive()) continue; } - Out::SymTab->addLocalSymbol(SymName); - F->KeptLocalSyms.push_back(&Sym); + ++Out::SymTab->NumLocals; + F->KeptLocalSyms.push_back(std::make_pair( + &Sym, Out::SymTab->StrTabSec.addString(SymName))); } } } @@ -907,7 +908,7 @@ template bool Writer::createSections() { } for (OutputSectionBase *Sec : OutputSections) - Out::ShStrTab->reserve(Sec->getName()); + Sec->setSHName(Out::ShStrTab->addString(Sec->getName())); // Finalizers fix each section's size. // .dynsym is finalized early since that may fill up .gnu.hash. diff --git a/lld/test/ELF/dynamic-reloc.s b/lld/test/ELF/dynamic-reloc.s index 295396006543..5e23ba93d5a9 100644 --- a/lld/test/ELF/dynamic-reloc.s +++ b/lld/test/ELF/dynamic-reloc.s @@ -43,6 +43,7 @@ // CHECK: DynamicSection [ // CHECK-NEXT: Tag Type Name/Value +// CHECK-NEXT: 0x0000000000000001 NEEDED SharedLibrary ({{.*}}2.so) // CHECK-NEXT: 0x0000000000000017 JMPREL // CHECK-NEXT: 0x0000000000000002 PLTRELSZ 24 (bytes) // CHECK-NEXT: 0x0000000000000003 PLTGOT @@ -52,7 +53,6 @@ // CHECK-NEXT: 0x0000000000000005 STRTAB // CHECK-NEXT: 0x000000000000000A STRSZ // CHECK-NEXT: 0x0000000000000004 HASH -// CHECK-NEXT: 0x0000000000000001 NEEDED SharedLibrary ({{.*}}2.so) // CHECK-NEXT: 0x0000000000000015 DEBUG 0x0 // CHECK-NEXT: 0x0000000000000000 NULL 0x0 // CHECK-NEXT: ] diff --git a/lld/test/ELF/linkerscript-sections.s b/lld/test/ELF/linkerscript-sections.s index 165ec335ec13..ea9ae2b2726d 100644 --- a/lld/test/ELF/linkerscript-sections.s +++ b/lld/test/ELF/linkerscript-sections.s @@ -22,7 +22,7 @@ # SEC-DEFAULT: 4 .bss 00000002 {{[0-9a-f]*}} BSS # SEC-DEFAULT: 5 .shstrtab 00000002 {{[0-9a-f]*}} # SEC-DEFAULT: 6 .symtab 00000030 {{[0-9a-f]*}} -# SEC-DEFAULT: 7 .shstrtab 0000003c {{[0-9a-f]*}} +# SEC-DEFAULT: 7 .shstrtab 00000032 {{[0-9a-f]*}} # SEC-DEFAULT: 8 .strtab 00000008 {{[0-9a-f]*}} # Sections are put in order specified in linker script. @@ -42,7 +42,7 @@ # SEC-ORDER: 1 .bss 00000002 {{[0-9a-f]*}} BSS # SEC-ORDER: 2 other 00000003 {{[0-9a-f]*}} DATA # SEC-ORDER: 3 .shstrtab 00000002 {{[0-9a-f]*}} -# SEC-ORDER: 4 .shstrtab 0000003c {{[0-9a-f]*}} +# SEC-ORDER: 4 .shstrtab 00000032 {{[0-9a-f]*}} # SEC-ORDER: 5 .symtab 00000030 {{[0-9a-f]*}} # SEC-ORDER: 6 .strtab 00000008 {{[0-9a-f]*}} # SEC-ORDER: 7 .data 00000020 {{[0-9a-f]*}} DATA @@ -63,7 +63,7 @@ # SEC-SWAP-NAMES: 4 .bss 00000002 {{[0-9a-f]*}} BSS # SEC-SWAP-NAMES: 5 .shstrtab 00000002 {{[0-9a-f]*}} # SEC-SWAP-NAMES: 6 .symtab 00000030 {{[0-9a-f]*}} -# SEC-SWAP-NAMES: 7 .shstrtab 0000003c {{[0-9a-f]*}} +# SEC-SWAP-NAMES: 7 .shstrtab 00000032 {{[0-9a-f]*}} # SEC-SWAP-NAMES: 8 .strtab 00000008 {{[0-9a-f]*}} # .shstrtab from the input object file is discarded. @@ -100,7 +100,7 @@ # SEC-MULTI: 3 .bss 00000002 {{[0-9a-f]*}} BSS # SEC-MULTI: 4 .shstrtab 00000002 {{[0-9a-f]*}} # SEC-MULTI: 5 .symtab 00000030 {{[0-9a-f]*}} -# SEC-MULTI: 6 .shstrtab 00000036 {{[0-9a-f]*}} +# SEC-MULTI: 6 .shstrtab 0000002c {{[0-9a-f]*}} # SEC-MULTI: 7 .strtab 00000008 {{[0-9a-f]*}} .globl _start; diff --git a/lld/test/ELF/shared-be.s b/lld/test/ELF/shared-be.s index 0f57a4b7f0c3..945f365db981 100644 --- a/lld/test/ELF/shared-be.s +++ b/lld/test/ELF/shared-be.s @@ -20,12 +20,12 @@ // CHECK: DynamicSection [ // CHECK-NEXT: Tag Type Name/Value +// CHECK-NEXT: 0x000000000000001D RUNPATH foo:bar +// CHECK-NEXT: 0x0000000000000001 NEEDED SharedLibrary ({{.*}}2.so) // CHECK-NEXT: 0x0000000000000007 RELA [[RELADDR]] // CHECK-NEXT: 0x0000000000000008 RELASZ [[RELSIZE]] (bytes) // CHECK-NEXT: 0x0000000000000009 RELAENT [[RELENT]] (bytes) -// CHECK: 0x000000000000001D RUNPATH foo:bar -// CHECK-NEXT: 0x0000000000000001 NEEDED SharedLibrary ({{.*}}2.so) -// CHECK-NEXT: 0x0000000000000015 DEBUG 0x0 +// CHECK: 0x0000000000000015 DEBUG 0x0 // CHECK-NEXT: 0x0000000000000000 NULL 0x0 // CHECK-NEXT: ] diff --git a/lld/test/ELF/shared.s b/lld/test/ELF/shared.s index 1bf5bd1795bc..850c637ac4e2 100644 --- a/lld/test/ELF/shared.s +++ b/lld/test/ELF/shared.s @@ -243,6 +243,8 @@ // CHECK: DynamicSection [ // CHECK-NEXT: Tag Type Name/Value +// CHECK-NEXT: 0x0000001D RUNPATH foo:bar +// CHECK-NEXT: 0x00000001 NEEDED SharedLibrary ({{.*}}2.so) // CHECK-NEXT: 0x00000011 REL [[RELADDR]] // CHECK-NEXT: 0x00000012 RELSZ [[RELSIZE]] (bytes) // CHECK-NEXT: 0x00000013 RELENT [[RELENT]] (bytes) @@ -251,8 +253,6 @@ // CHECK-NEXT: 0x00000005 STRTAB [[DYNSTRADDR]] // CHECK-NEXT: 0x0000000A STRSZ // CHECK-NEXT: 0x00000004 HASH [[HASHADDR]] -// CHECK-NEXT: 0x0000001D RUNPATH foo:bar -// CHECK-NEXT: 0x00000001 NEEDED SharedLibrary ({{.*}}2.so) // CHECK-NEXT: 0x00000015 DEBUG 0x0 // CHECK-NEXT: 0x00000000 NULL 0x0 // CHECK-NEXT: ] diff --git a/lld/test/ELF/string-table.s b/lld/test/ELF/string-table.s index 8393d6de6cc5..892c348f6fd0 100644 --- a/lld/test/ELF/string-table.s +++ b/lld/test/ELF/string-table.s @@ -59,9 +59,8 @@ _start: // CHECK-NEXT: EntrySize: 0 // CHECK-NEXT: SectionData ( // CHECK-NEXT: 0000: 00626172 002E7465 78740066 6F6F6261 |.bar..text.fooba| -// CHECK-NEXT: 0010: 7200666F 6F626172 00666F6F 62617200 |r.foobar.foobar.| -// CHECK-NEXT: 0020: 2E73796D 74616200 2E736873 74727461 |.symtab..shstrta| -// CHECK-NEXT: 0030: 62002E73 74727461 6200 |b..strtab.| +// CHECK-NEXT: 0010: 72002E73 796D7461 62002E73 68737472 |r..symtab..shstr| +// CHECK-NEXT: 0020: 74616200 2E737472 74616200 |tab..strtab.| // CHECK-NEXT: ) // CHECK-NEXT:} // CHECK: Name: .strtab