Partially rewrite .gnu.hash synthetic section and add comments.

This implementation is probably slightly inefficient than before,
but that should be negligible because this is not a performance-
critical pass.

llvm-svn: 296570
This commit is contained in:
Rui Ueyama 2017-03-01 02:51:42 +00:00
parent f0725e3f1c
commit e13373b5b9
2 changed files with 82 additions and 103 deletions

View File

@ -1450,87 +1450,49 @@ GnuHashTableSection<ELFT>::GnuHashTableSection()
this->Entsize = ELFT::Is64Bits ? 0 : 4;
}
template <class ELFT>
unsigned GnuHashTableSection<ELFT>::calcNBuckets(unsigned NumHashed) {
if (!NumHashed)
return 0;
// These values are prime numbers which are not greater than 2^(N-1) + 1.
// In result, for any particular NumHashed we return a prime number
// which is not greater than NumHashed.
static const unsigned Primes[] = {
1, 1, 3, 3, 7, 13, 31, 61, 127, 251,
509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071};
return Primes[std::min<unsigned>(Log2_32_Ceil(NumHashed),
array_lengthof(Primes) - 1)];
}
// Bloom filter estimation: at least 8 bits for each hashed symbol.
// GNU Hash table requirement: it should be a power of 2,
// the minimum value is 1, even for an empty table.
// Expected results for a 32-bit target:
// calcMaskWords(0..4) = 1
// calcMaskWords(5..8) = 2
// calcMaskWords(9..16) = 4
// For a 64-bit target:
// calcMaskWords(0..8) = 1
// calcMaskWords(9..16) = 2
// calcMaskWords(17..32) = 4
template <class ELFT>
unsigned GnuHashTableSection<ELFT>::calcMaskWords(unsigned NumHashed) {
if (!NumHashed)
return 1;
return NextPowerOf2((NumHashed - 1) / sizeof(uintX_t));
}
template <class ELFT> void GnuHashTableSection<ELFT>::finalizeContents() {
unsigned NumHashed = Symbols.size();
NBuckets = calcNBuckets(NumHashed);
MaskWords = calcMaskWords(NumHashed);
// Second hash shift estimation: just predefined values.
Shift2 = ELFT::Is64Bits ? 6 : 5;
this->OutSec->Entsize = this->Entsize;
this->OutSec->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex;
this->Size = sizeof(uint32_t) * 4 // Header
+ sizeof(uintX_t) * MaskWords // Bloom Filter
+ sizeof(uint32_t) * NBuckets // Hash Buckets
+ sizeof(uint32_t) * NumHashed; // Hash Values
// Computes bloom filter size in word size. We want to allocate 8
// bits for each symbol. It must be a power of two.
if (Symbols.empty())
MaskWords = 1;
else
MaskWords = NextPowerOf2((Symbols.size() - 1) / sizeof(uintX_t));
Size = 16; // Header
Size += sizeof(uintX_t) * MaskWords; // Bloom filter
Size += NBuckets * 4; // Hash buckets
Size += Symbols.size() * 4; // Hash values
}
template <class ELFT> void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) {
Buf = writeHeader(Buf);
if (Symbols.empty())
return;
Buf = writeBloomFilter(Buf);
writeHashTable(Buf);
}
template <class ELFT>
uint8_t *GnuHashTableSection<ELFT>::writeHeader(uint8_t *Buf) {
// Write a header.
const endianness E = ELFT::TargetEndianness;
write32<E>(Buf, NBuckets);
write32<E>(Buf + 4, In<ELFT>::DynSymTab->getNumSymbols() - Symbols.size());
write32<E>(Buf + 8, MaskWords);
write32<E>(Buf + 12, Shift2);
return Buf + 16;
write32<E>(Buf + 12, getShift2());
Buf += 16;
writeBloomFilter(Buf);
Buf += sizeof(uintX_t) * MaskWords;
writeHashTable(Buf);
}
template <class ELFT>
uint8_t *GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *Buf) {
void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *Buf) {
typedef typename ELFT::Off Elf_Off;
const unsigned C = sizeof(uintX_t) * 8;
auto *Masks = reinterpret_cast<Elf_Off *>(Buf);
for (const SymbolData &Sym : Symbols) {
size_t Pos = (Sym.Hash / C) & (MaskWords - 1);
uintX_t V = (uintX_t(1) << (Sym.Hash % C)) |
(uintX_t(1) << ((Sym.Hash >> Shift2) % C));
Masks[Pos] |= V;
auto *Filter = reinterpret_cast<Elf_Off *>(Buf);
for (const Entry &Sym : Symbols) {
size_t I = (Sym.Hash / C) & (MaskWords - 1);
Filter[I] |= uintX_t(1) << (Sym.Hash % C);
Filter[I] |= uintX_t(1) << ((Sym.Hash >> getShift2()) % C);
}
return Buf + sizeof(uintX_t) * MaskWords;
}
template <class ELFT>
@ -1538,25 +1500,30 @@ void GnuHashTableSection<ELFT>::writeHashTable(uint8_t *Buf) {
// A 32-bit integer type in the target endianness.
typedef typename ELFT::Word Elf_Word;
Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf);
Elf_Word *Values = Buckets + NBuckets;
// Group symbols by hash value.
std::vector<std::vector<Entry>> Syms(NBuckets);
for (const Entry &Ent : Symbols)
Syms[Ent.Hash % NBuckets].push_back(Ent);
int PrevBucket = -1;
int I = 0;
for (const SymbolData &Sym : Symbols) {
int Bucket = Sym.Hash % NBuckets;
assert(PrevBucket <= Bucket);
if (Bucket != PrevBucket) {
Buckets[Bucket] = Sym.Body->DynsymIndex;
PrevBucket = Bucket;
if (I > 0)
Values[I - 1] |= 1;
}
Values[I] = Sym.Hash & ~1;
++I;
// Write hash buckets. Hash buckets contain indices in the following
// hash value table.
Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf);
for (size_t I = 0; I < NBuckets; ++I)
if (!Syms[I].empty())
Buckets[I] = Syms[I][0].Body->DynsymIndex;
// Write a hash value table. It represents a sequence of chains that
// share the same hash modulo value. The last element of each chain
// is terminated by LSB 1.
Elf_Word *Values = Buckets + NBuckets;
size_t I = 0;
for (std::vector<Entry> &Vec : Syms) {
if (Vec.empty())
continue;
for (const Entry &Ent : makeArrayRef(Vec).drop_back())
Values[I++] = Ent.Hash & ~1;
Values[I++] = Vec.back().Hash | 1;
}
if (I > 0)
Values[I - 1] |= 1;
}
static uint32_t hashGnu(StringRef Name) {
@ -1566,34 +1533,50 @@ static uint32_t hashGnu(StringRef Name) {
return H;
}
// Returns a number of hash buckets to accomodate given number of elements.
// We want to choose a moderate number that is not too small (which
// causes too many hash collisions) and not too large (which wastes
// disk space.)
//
// We return a prime number because it (is believed to) achieve good
// hash distribution.
static size_t getBucketSize(size_t NumSymbols) {
// List of largest prime numbers that are not greater than 2^n + 1.
for (size_t N : {131071, 65521, 32749, 16381, 8191, 4093, 2039, 1021, 509,
251, 127, 61, 31, 13, 7, 3, 1})
if (N <= NumSymbols)
return N;
return 0;
}
// Add symbols to this symbol hash table. Note that this function
// destructively sort a given vector -- which is needed because
// GNU-style hash table places some sorting requirements.
template <class ELFT>
void GnuHashTableSection<ELFT>::addSymbols(std::vector<SymbolTableEntry> &V) {
// Ideally this will just be 'auto' but GCC 6.1 is not able
// to deduce it correctly.
// We cannot use 'auto' for Mid because GCC 6.1 cannot deduce
// its type correctly.
std::vector<SymbolTableEntry>::iterator Mid =
std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) {
return S.Symbol->isUndefined();
});
if (Mid == V.end())
return;
for (auto I = Mid, E = V.end(); I != E; ++I) {
SymbolBody *B = I->Symbol;
size_t StrOff = I->StrTabOffset;
Symbols.push_back({B, StrOff, hashGnu(B->getName())});
for (SymbolTableEntry &Ent : llvm::make_range(Mid, V.end())) {
SymbolBody *B = Ent.Symbol;
Symbols.push_back({B, Ent.StrTabOffset, hashGnu(B->getName())});
}
unsigned NBuckets = calcNBuckets(Symbols.size());
NBuckets = getBucketSize(Symbols.size());
std::stable_sort(Symbols.begin(), Symbols.end(),
[&](const SymbolData &L, const SymbolData &R) {
[&](const Entry &L, const Entry &R) {
return L.Hash % NBuckets < R.Hash % NBuckets;
});
V.erase(Mid, V.end());
for (const SymbolData &Sym : Symbols)
V.push_back({Sym.Body, Sym.STName});
for (const Entry &Ent : Symbols)
V.push_back({Ent.Body, Ent.StrTabOffset});
}
template <class ELFT>

View File

@ -445,25 +445,21 @@ public:
void addSymbols(std::vector<SymbolTableEntry> &Symbols);
private:
static unsigned calcNBuckets(unsigned NumHashed);
static unsigned calcMaskWords(unsigned NumHashed);
size_t getShift2() const { return ELFT::Is64Bits ? 6 : 5; }
uint8_t *writeHeader(uint8_t *Buf);
uint8_t *writeBloomFilter(uint8_t *Buf);
void writeBloomFilter(uint8_t *Buf);
void writeHashTable(uint8_t *Buf);
struct SymbolData {
struct Entry {
SymbolBody *Body;
size_t STName;
size_t StrTabOffset;
uint32_t Hash;
};
std::vector<SymbolData> Symbols;
unsigned MaskWords;
unsigned NBuckets;
unsigned Shift2;
uintX_t Size = 0;
std::vector<Entry> Symbols;
size_t MaskWords;
size_t NBuckets;
size_t Size = 0;
};
template <class ELFT> class HashTableSection final : public SyntheticSection {