Partially rewrite .gnu.hash synthetic section and add comments.
This implementation is probably slightly inefficient than before, but that should be negligible because this is not a performance- critical pass. llvm-svn: 296570
This commit is contained in:
parent
f0725e3f1c
commit
e13373b5b9
|
@ -1450,87 +1450,49 @@ GnuHashTableSection<ELFT>::GnuHashTableSection()
|
|||
this->Entsize = ELFT::Is64Bits ? 0 : 4;
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
unsigned GnuHashTableSection<ELFT>::calcNBuckets(unsigned NumHashed) {
|
||||
if (!NumHashed)
|
||||
return 0;
|
||||
|
||||
// These values are prime numbers which are not greater than 2^(N-1) + 1.
|
||||
// In result, for any particular NumHashed we return a prime number
|
||||
// which is not greater than NumHashed.
|
||||
static const unsigned Primes[] = {
|
||||
1, 1, 3, 3, 7, 13, 31, 61, 127, 251,
|
||||
509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071};
|
||||
|
||||
return Primes[std::min<unsigned>(Log2_32_Ceil(NumHashed),
|
||||
array_lengthof(Primes) - 1)];
|
||||
}
|
||||
|
||||
// Bloom filter estimation: at least 8 bits for each hashed symbol.
|
||||
// GNU Hash table requirement: it should be a power of 2,
|
||||
// the minimum value is 1, even for an empty table.
|
||||
// Expected results for a 32-bit target:
|
||||
// calcMaskWords(0..4) = 1
|
||||
// calcMaskWords(5..8) = 2
|
||||
// calcMaskWords(9..16) = 4
|
||||
// For a 64-bit target:
|
||||
// calcMaskWords(0..8) = 1
|
||||
// calcMaskWords(9..16) = 2
|
||||
// calcMaskWords(17..32) = 4
|
||||
template <class ELFT>
|
||||
unsigned GnuHashTableSection<ELFT>::calcMaskWords(unsigned NumHashed) {
|
||||
if (!NumHashed)
|
||||
return 1;
|
||||
return NextPowerOf2((NumHashed - 1) / sizeof(uintX_t));
|
||||
}
|
||||
|
||||
template <class ELFT> void GnuHashTableSection<ELFT>::finalizeContents() {
|
||||
unsigned NumHashed = Symbols.size();
|
||||
NBuckets = calcNBuckets(NumHashed);
|
||||
MaskWords = calcMaskWords(NumHashed);
|
||||
// Second hash shift estimation: just predefined values.
|
||||
Shift2 = ELFT::Is64Bits ? 6 : 5;
|
||||
|
||||
this->OutSec->Entsize = this->Entsize;
|
||||
this->OutSec->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex;
|
||||
this->Size = sizeof(uint32_t) * 4 // Header
|
||||
+ sizeof(uintX_t) * MaskWords // Bloom Filter
|
||||
+ sizeof(uint32_t) * NBuckets // Hash Buckets
|
||||
+ sizeof(uint32_t) * NumHashed; // Hash Values
|
||||
|
||||
// Computes bloom filter size in word size. We want to allocate 8
|
||||
// bits for each symbol. It must be a power of two.
|
||||
if (Symbols.empty())
|
||||
MaskWords = 1;
|
||||
else
|
||||
MaskWords = NextPowerOf2((Symbols.size() - 1) / sizeof(uintX_t));
|
||||
|
||||
Size = 16; // Header
|
||||
Size += sizeof(uintX_t) * MaskWords; // Bloom filter
|
||||
Size += NBuckets * 4; // Hash buckets
|
||||
Size += Symbols.size() * 4; // Hash values
|
||||
}
|
||||
|
||||
template <class ELFT> void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) {
|
||||
Buf = writeHeader(Buf);
|
||||
if (Symbols.empty())
|
||||
return;
|
||||
Buf = writeBloomFilter(Buf);
|
||||
writeHashTable(Buf);
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
uint8_t *GnuHashTableSection<ELFT>::writeHeader(uint8_t *Buf) {
|
||||
// Write a header.
|
||||
const endianness E = ELFT::TargetEndianness;
|
||||
write32<E>(Buf, NBuckets);
|
||||
write32<E>(Buf + 4, In<ELFT>::DynSymTab->getNumSymbols() - Symbols.size());
|
||||
write32<E>(Buf + 8, MaskWords);
|
||||
write32<E>(Buf + 12, Shift2);
|
||||
return Buf + 16;
|
||||
write32<E>(Buf + 12, getShift2());
|
||||
Buf += 16;
|
||||
|
||||
writeBloomFilter(Buf);
|
||||
Buf += sizeof(uintX_t) * MaskWords;
|
||||
|
||||
writeHashTable(Buf);
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
uint8_t *GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *Buf) {
|
||||
void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *Buf) {
|
||||
typedef typename ELFT::Off Elf_Off;
|
||||
|
||||
const unsigned C = sizeof(uintX_t) * 8;
|
||||
|
||||
auto *Masks = reinterpret_cast<Elf_Off *>(Buf);
|
||||
for (const SymbolData &Sym : Symbols) {
|
||||
size_t Pos = (Sym.Hash / C) & (MaskWords - 1);
|
||||
uintX_t V = (uintX_t(1) << (Sym.Hash % C)) |
|
||||
(uintX_t(1) << ((Sym.Hash >> Shift2) % C));
|
||||
Masks[Pos] |= V;
|
||||
auto *Filter = reinterpret_cast<Elf_Off *>(Buf);
|
||||
for (const Entry &Sym : Symbols) {
|
||||
size_t I = (Sym.Hash / C) & (MaskWords - 1);
|
||||
Filter[I] |= uintX_t(1) << (Sym.Hash % C);
|
||||
Filter[I] |= uintX_t(1) << ((Sym.Hash >> getShift2()) % C);
|
||||
}
|
||||
return Buf + sizeof(uintX_t) * MaskWords;
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
|
@ -1538,25 +1500,30 @@ void GnuHashTableSection<ELFT>::writeHashTable(uint8_t *Buf) {
|
|||
// A 32-bit integer type in the target endianness.
|
||||
typedef typename ELFT::Word Elf_Word;
|
||||
|
||||
Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf);
|
||||
Elf_Word *Values = Buckets + NBuckets;
|
||||
// Group symbols by hash value.
|
||||
std::vector<std::vector<Entry>> Syms(NBuckets);
|
||||
for (const Entry &Ent : Symbols)
|
||||
Syms[Ent.Hash % NBuckets].push_back(Ent);
|
||||
|
||||
int PrevBucket = -1;
|
||||
int I = 0;
|
||||
for (const SymbolData &Sym : Symbols) {
|
||||
int Bucket = Sym.Hash % NBuckets;
|
||||
assert(PrevBucket <= Bucket);
|
||||
if (Bucket != PrevBucket) {
|
||||
Buckets[Bucket] = Sym.Body->DynsymIndex;
|
||||
PrevBucket = Bucket;
|
||||
if (I > 0)
|
||||
Values[I - 1] |= 1;
|
||||
}
|
||||
Values[I] = Sym.Hash & ~1;
|
||||
++I;
|
||||
// Write hash buckets. Hash buckets contain indices in the following
|
||||
// hash value table.
|
||||
Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf);
|
||||
for (size_t I = 0; I < NBuckets; ++I)
|
||||
if (!Syms[I].empty())
|
||||
Buckets[I] = Syms[I][0].Body->DynsymIndex;
|
||||
|
||||
// Write a hash value table. It represents a sequence of chains that
|
||||
// share the same hash modulo value. The last element of each chain
|
||||
// is terminated by LSB 1.
|
||||
Elf_Word *Values = Buckets + NBuckets;
|
||||
size_t I = 0;
|
||||
for (std::vector<Entry> &Vec : Syms) {
|
||||
if (Vec.empty())
|
||||
continue;
|
||||
for (const Entry &Ent : makeArrayRef(Vec).drop_back())
|
||||
Values[I++] = Ent.Hash & ~1;
|
||||
Values[I++] = Vec.back().Hash | 1;
|
||||
}
|
||||
if (I > 0)
|
||||
Values[I - 1] |= 1;
|
||||
}
|
||||
|
||||
static uint32_t hashGnu(StringRef Name) {
|
||||
|
@ -1566,34 +1533,50 @@ static uint32_t hashGnu(StringRef Name) {
|
|||
return H;
|
||||
}
|
||||
|
||||
// Returns a number of hash buckets to accomodate given number of elements.
|
||||
// We want to choose a moderate number that is not too small (which
|
||||
// causes too many hash collisions) and not too large (which wastes
|
||||
// disk space.)
|
||||
//
|
||||
// We return a prime number because it (is believed to) achieve good
|
||||
// hash distribution.
|
||||
static size_t getBucketSize(size_t NumSymbols) {
|
||||
// List of largest prime numbers that are not greater than 2^n + 1.
|
||||
for (size_t N : {131071, 65521, 32749, 16381, 8191, 4093, 2039, 1021, 509,
|
||||
251, 127, 61, 31, 13, 7, 3, 1})
|
||||
if (N <= NumSymbols)
|
||||
return N;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Add symbols to this symbol hash table. Note that this function
|
||||
// destructively sort a given vector -- which is needed because
|
||||
// GNU-style hash table places some sorting requirements.
|
||||
template <class ELFT>
|
||||
void GnuHashTableSection<ELFT>::addSymbols(std::vector<SymbolTableEntry> &V) {
|
||||
// Ideally this will just be 'auto' but GCC 6.1 is not able
|
||||
// to deduce it correctly.
|
||||
// We cannot use 'auto' for Mid because GCC 6.1 cannot deduce
|
||||
// its type correctly.
|
||||
std::vector<SymbolTableEntry>::iterator Mid =
|
||||
std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) {
|
||||
return S.Symbol->isUndefined();
|
||||
});
|
||||
if (Mid == V.end())
|
||||
return;
|
||||
for (auto I = Mid, E = V.end(); I != E; ++I) {
|
||||
SymbolBody *B = I->Symbol;
|
||||
size_t StrOff = I->StrTabOffset;
|
||||
Symbols.push_back({B, StrOff, hashGnu(B->getName())});
|
||||
|
||||
for (SymbolTableEntry &Ent : llvm::make_range(Mid, V.end())) {
|
||||
SymbolBody *B = Ent.Symbol;
|
||||
Symbols.push_back({B, Ent.StrTabOffset, hashGnu(B->getName())});
|
||||
}
|
||||
|
||||
unsigned NBuckets = calcNBuckets(Symbols.size());
|
||||
NBuckets = getBucketSize(Symbols.size());
|
||||
std::stable_sort(Symbols.begin(), Symbols.end(),
|
||||
[&](const SymbolData &L, const SymbolData &R) {
|
||||
[&](const Entry &L, const Entry &R) {
|
||||
return L.Hash % NBuckets < R.Hash % NBuckets;
|
||||
});
|
||||
|
||||
V.erase(Mid, V.end());
|
||||
for (const SymbolData &Sym : Symbols)
|
||||
V.push_back({Sym.Body, Sym.STName});
|
||||
for (const Entry &Ent : Symbols)
|
||||
V.push_back({Ent.Body, Ent.StrTabOffset});
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
|
|
|
@ -445,25 +445,21 @@ public:
|
|||
void addSymbols(std::vector<SymbolTableEntry> &Symbols);
|
||||
|
||||
private:
|
||||
static unsigned calcNBuckets(unsigned NumHashed);
|
||||
static unsigned calcMaskWords(unsigned NumHashed);
|
||||
size_t getShift2() const { return ELFT::Is64Bits ? 6 : 5; }
|
||||
|
||||
uint8_t *writeHeader(uint8_t *Buf);
|
||||
uint8_t *writeBloomFilter(uint8_t *Buf);
|
||||
void writeBloomFilter(uint8_t *Buf);
|
||||
void writeHashTable(uint8_t *Buf);
|
||||
|
||||
struct SymbolData {
|
||||
struct Entry {
|
||||
SymbolBody *Body;
|
||||
size_t STName;
|
||||
size_t StrTabOffset;
|
||||
uint32_t Hash;
|
||||
};
|
||||
|
||||
std::vector<SymbolData> Symbols;
|
||||
|
||||
unsigned MaskWords;
|
||||
unsigned NBuckets;
|
||||
unsigned Shift2;
|
||||
uintX_t Size = 0;
|
||||
std::vector<Entry> Symbols;
|
||||
size_t MaskWords;
|
||||
size_t NBuckets;
|
||||
size_t Size = 0;
|
||||
};
|
||||
|
||||
template <class ELFT> class HashTableSection final : public SyntheticSection {
|
||||
|
|
Loading…
Reference in New Issue