From e13373b5b9b11f1754d2c0ecc7151445f7aad157 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 1 Mar 2017 02:51:42 +0000 Subject: [PATCH] Partially rewrite .gnu.hash synthetic section and add comments. This implementation is probably slightly inefficient than before, but that should be negligible because this is not a performance- critical pass. llvm-svn: 296570 --- lld/ELF/SyntheticSections.cpp | 165 +++++++++++++++------------------- lld/ELF/SyntheticSections.h | 20 ++--- 2 files changed, 82 insertions(+), 103 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index a10fa48991cb..610ec5f3f311 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1450,87 +1450,49 @@ GnuHashTableSection::GnuHashTableSection() this->Entsize = ELFT::Is64Bits ? 0 : 4; } -template -unsigned GnuHashTableSection::calcNBuckets(unsigned NumHashed) { - if (!NumHashed) - return 0; - - // These values are prime numbers which are not greater than 2^(N-1) + 1. - // In result, for any particular NumHashed we return a prime number - // which is not greater than NumHashed. - static const unsigned Primes[] = { - 1, 1, 3, 3, 7, 13, 31, 61, 127, 251, - 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071}; - - return Primes[std::min(Log2_32_Ceil(NumHashed), - array_lengthof(Primes) - 1)]; -} - -// Bloom filter estimation: at least 8 bits for each hashed symbol. -// GNU Hash table requirement: it should be a power of 2, -// the minimum value is 1, even for an empty table. -// Expected results for a 32-bit target: -// calcMaskWords(0..4) = 1 -// calcMaskWords(5..8) = 2 -// calcMaskWords(9..16) = 4 -// For a 64-bit target: -// calcMaskWords(0..8) = 1 -// calcMaskWords(9..16) = 2 -// calcMaskWords(17..32) = 4 -template -unsigned GnuHashTableSection::calcMaskWords(unsigned NumHashed) { - if (!NumHashed) - return 1; - return NextPowerOf2((NumHashed - 1) / sizeof(uintX_t)); -} - template void GnuHashTableSection::finalizeContents() { - unsigned NumHashed = Symbols.size(); - NBuckets = calcNBuckets(NumHashed); - MaskWords = calcMaskWords(NumHashed); - // Second hash shift estimation: just predefined values. - Shift2 = ELFT::Is64Bits ? 6 : 5; - this->OutSec->Entsize = this->Entsize; this->OutSec->Link = In::DynSymTab->OutSec->SectionIndex; - this->Size = sizeof(uint32_t) * 4 // Header - + sizeof(uintX_t) * MaskWords // Bloom Filter - + sizeof(uint32_t) * NBuckets // Hash Buckets - + sizeof(uint32_t) * NumHashed; // Hash Values + + // Computes bloom filter size in word size. We want to allocate 8 + // bits for each symbol. It must be a power of two. + if (Symbols.empty()) + MaskWords = 1; + else + MaskWords = NextPowerOf2((Symbols.size() - 1) / sizeof(uintX_t)); + + Size = 16; // Header + Size += sizeof(uintX_t) * MaskWords; // Bloom filter + Size += NBuckets * 4; // Hash buckets + Size += Symbols.size() * 4; // Hash values } template void GnuHashTableSection::writeTo(uint8_t *Buf) { - Buf = writeHeader(Buf); - if (Symbols.empty()) - return; - Buf = writeBloomFilter(Buf); - writeHashTable(Buf); -} - -template -uint8_t *GnuHashTableSection::writeHeader(uint8_t *Buf) { + // Write a header. const endianness E = ELFT::TargetEndianness; write32(Buf, NBuckets); write32(Buf + 4, In::DynSymTab->getNumSymbols() - Symbols.size()); write32(Buf + 8, MaskWords); - write32(Buf + 12, Shift2); - return Buf + 16; + write32(Buf + 12, getShift2()); + Buf += 16; + + writeBloomFilter(Buf); + Buf += sizeof(uintX_t) * MaskWords; + + writeHashTable(Buf); } template -uint8_t *GnuHashTableSection::writeBloomFilter(uint8_t *Buf) { +void GnuHashTableSection::writeBloomFilter(uint8_t *Buf) { typedef typename ELFT::Off Elf_Off; - const unsigned C = sizeof(uintX_t) * 8; - auto *Masks = reinterpret_cast(Buf); - for (const SymbolData &Sym : Symbols) { - size_t Pos = (Sym.Hash / C) & (MaskWords - 1); - uintX_t V = (uintX_t(1) << (Sym.Hash % C)) | - (uintX_t(1) << ((Sym.Hash >> Shift2) % C)); - Masks[Pos] |= V; + auto *Filter = reinterpret_cast(Buf); + for (const Entry &Sym : Symbols) { + size_t I = (Sym.Hash / C) & (MaskWords - 1); + Filter[I] |= uintX_t(1) << (Sym.Hash % C); + Filter[I] |= uintX_t(1) << ((Sym.Hash >> getShift2()) % C); } - return Buf + sizeof(uintX_t) * MaskWords; } template @@ -1538,25 +1500,30 @@ void GnuHashTableSection::writeHashTable(uint8_t *Buf) { // A 32-bit integer type in the target endianness. typedef typename ELFT::Word Elf_Word; - Elf_Word *Buckets = reinterpret_cast(Buf); - Elf_Word *Values = Buckets + NBuckets; + // Group symbols by hash value. + std::vector> Syms(NBuckets); + for (const Entry &Ent : Symbols) + Syms[Ent.Hash % NBuckets].push_back(Ent); - int PrevBucket = -1; - int I = 0; - for (const SymbolData &Sym : Symbols) { - int Bucket = Sym.Hash % NBuckets; - assert(PrevBucket <= Bucket); - if (Bucket != PrevBucket) { - Buckets[Bucket] = Sym.Body->DynsymIndex; - PrevBucket = Bucket; - if (I > 0) - Values[I - 1] |= 1; - } - Values[I] = Sym.Hash & ~1; - ++I; + // Write hash buckets. Hash buckets contain indices in the following + // hash value table. + Elf_Word *Buckets = reinterpret_cast(Buf); + for (size_t I = 0; I < NBuckets; ++I) + if (!Syms[I].empty()) + Buckets[I] = Syms[I][0].Body->DynsymIndex; + + // Write a hash value table. It represents a sequence of chains that + // share the same hash modulo value. The last element of each chain + // is terminated by LSB 1. + Elf_Word *Values = Buckets + NBuckets; + size_t I = 0; + for (std::vector &Vec : Syms) { + if (Vec.empty()) + continue; + for (const Entry &Ent : makeArrayRef(Vec).drop_back()) + Values[I++] = Ent.Hash & ~1; + Values[I++] = Vec.back().Hash | 1; } - if (I > 0) - Values[I - 1] |= 1; } static uint32_t hashGnu(StringRef Name) { @@ -1566,34 +1533,50 @@ static uint32_t hashGnu(StringRef Name) { return H; } +// Returns a number of hash buckets to accomodate given number of elements. +// We want to choose a moderate number that is not too small (which +// causes too many hash collisions) and not too large (which wastes +// disk space.) +// +// We return a prime number because it (is believed to) achieve good +// hash distribution. +static size_t getBucketSize(size_t NumSymbols) { + // List of largest prime numbers that are not greater than 2^n + 1. + for (size_t N : {131071, 65521, 32749, 16381, 8191, 4093, 2039, 1021, 509, + 251, 127, 61, 31, 13, 7, 3, 1}) + if (N <= NumSymbols) + return N; + return 0; +} + // Add symbols to this symbol hash table. Note that this function // destructively sort a given vector -- which is needed because // GNU-style hash table places some sorting requirements. template void GnuHashTableSection::addSymbols(std::vector &V) { - // Ideally this will just be 'auto' but GCC 6.1 is not able - // to deduce it correctly. + // We cannot use 'auto' for Mid because GCC 6.1 cannot deduce + // its type correctly. std::vector::iterator Mid = std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) { return S.Symbol->isUndefined(); }); if (Mid == V.end()) return; - for (auto I = Mid, E = V.end(); I != E; ++I) { - SymbolBody *B = I->Symbol; - size_t StrOff = I->StrTabOffset; - Symbols.push_back({B, StrOff, hashGnu(B->getName())}); + + for (SymbolTableEntry &Ent : llvm::make_range(Mid, V.end())) { + SymbolBody *B = Ent.Symbol; + Symbols.push_back({B, Ent.StrTabOffset, hashGnu(B->getName())}); } - unsigned NBuckets = calcNBuckets(Symbols.size()); + NBuckets = getBucketSize(Symbols.size()); std::stable_sort(Symbols.begin(), Symbols.end(), - [&](const SymbolData &L, const SymbolData &R) { + [&](const Entry &L, const Entry &R) { return L.Hash % NBuckets < R.Hash % NBuckets; }); V.erase(Mid, V.end()); - for (const SymbolData &Sym : Symbols) - V.push_back({Sym.Body, Sym.STName}); + for (const Entry &Ent : Symbols) + V.push_back({Ent.Body, Ent.StrTabOffset}); } template diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 0174b619e5c7..5d2b56470d18 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -445,25 +445,21 @@ public: void addSymbols(std::vector &Symbols); private: - static unsigned calcNBuckets(unsigned NumHashed); - static unsigned calcMaskWords(unsigned NumHashed); + size_t getShift2() const { return ELFT::Is64Bits ? 6 : 5; } - uint8_t *writeHeader(uint8_t *Buf); - uint8_t *writeBloomFilter(uint8_t *Buf); + void writeBloomFilter(uint8_t *Buf); void writeHashTable(uint8_t *Buf); - struct SymbolData { + struct Entry { SymbolBody *Body; - size_t STName; + size_t StrTabOffset; uint32_t Hash; }; - std::vector Symbols; - - unsigned MaskWords; - unsigned NBuckets; - unsigned Shift2; - uintX_t Size = 0; + std::vector Symbols; + size_t MaskWords; + size_t NBuckets; + size_t Size = 0; }; template class HashTableSection final : public SyntheticSection {