PDB: Add a class to create the /names stream contents.

This patch adds a new class NameHashTableBuilder which creates /names streams.
This patch contains a test to confirm that a stream created by
NameHashTableBuilder can be read by NameHashTable reader class.

Differential Revision: https://reviews.llvm.org/D28707

llvm-svn: 292040
This commit is contained in:
Rui Ueyama 2017-01-15 00:36:02 +00:00
parent 2f19a324cb
commit dcd32937dc
7 changed files with 214 additions and 8 deletions

View File

@ -0,0 +1,45 @@
//===- NameHashTableBuilder.h - PDB Name Hash Table Builder -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file creates the "/names" stream.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H
#define LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include <vector>
namespace llvm {
namespace msf {
class StreamWriter;
}
namespace pdb {
class NameHashTableBuilder {
public:
// If string S does not exist in the string table, insert it.
// Returns the ID for S.
uint32_t insert(StringRef S);
uint32_t calculateSerializedLength() const;
Error commit(msf::StreamWriter &Writer) const;
private:
DenseMap<StringRef, uint32_t> Strings;
uint32_t StringSize = 1;
};
} // end namespace pdb
} // end namespace llvm
#endif // LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H

View File

@ -302,6 +302,15 @@ struct InfoStreamHeader {
PDB_UniqueId Guid;
};
/// The header preceeding the /names stream.
struct NameHashTableHeader {
support::ulittle32_t Signature;
support::ulittle32_t HashVersion;
support::ulittle32_t ByteSize;
};
const uint32_t NameHashTableSignature = 0xEFFEEFFE;
} // namespace pdb
} // namespace llvm

View File

@ -39,6 +39,7 @@ add_pdb_impl_folder(Raw
Raw/ModInfo.cpp
Raw/ModStream.cpp
Raw/NameHashTable.cpp
Raw/NameHashTableBuilder.cpp
Raw/NameMap.cpp
Raw/NameMapBuilder.cpp
Raw/PDBFile.cpp

View File

@ -13,6 +13,7 @@
#include "llvm/DebugInfo/MSF/StreamReader.h"
#include "llvm/DebugInfo/PDB/Raw/Hash.h"
#include "llvm/DebugInfo/PDB/Raw/RawError.h"
#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
@ -23,17 +24,11 @@ using namespace llvm::pdb;
NameHashTable::NameHashTable() : Signature(0), HashVersion(0), NameCount(0) {}
Error NameHashTable::load(StreamReader &Stream) {
struct Header {
support::ulittle32_t Signature;
support::ulittle32_t HashVersion;
support::ulittle32_t ByteSize;
};
const Header *H;
const NameHashTableHeader *H;
if (auto EC = Stream.readObject(H))
return EC;
if (H->Signature != 0xEFFEEFFE)
if (H->Signature != NameHashTableSignature)
return make_error<RawError>(raw_error_code::corrupt_file,
"Invalid hash table signature");
if (H->HashVersion != 1 && H->HashVersion != 2)

View File

@ -0,0 +1,101 @@
//===- NameHashTable.cpp - PDB Name Hash Table ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/MSF/StreamWriter.h"
#include "llvm/DebugInfo/PDB/Raw/Hash.h"
#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace llvm::support;
using namespace llvm::support::endian;
using namespace llvm::pdb;
uint32_t NameHashTableBuilder::insert(StringRef S) {
auto P = Strings.insert({S, StringSize});
// If a given string didn't exist in the string table, we want to increment
// the string table size.
if (P.second)
StringSize += S.size() + 1; // +1 for '\0'
return P.first->second;
}
static uint32_t computeBucketCount(uint32_t NumStrings) {
// The /names stream is basically an on-disk open-addressing hash table.
// Hash collisions are resolved by linear probing. We cannot make
// utilization 100% because it will make the linear probing extremely
// slow. But lower utilization wastes disk space. As a reasonable
// load factor, we choose 80%. We need +1 because slot 0 is reserved.
return (NumStrings + 1) * 1.25;
}
uint32_t NameHashTableBuilder::calculateSerializedLength() const {
uint32_t Size = 0;
Size += sizeof(NameHashTableHeader);
Size += StringSize;
Size += 4; // Hash table begins with 4-byte size field.
uint32_t BucketCount = computeBucketCount(Strings.size());
Size += BucketCount * 4;
Size += 4; // The /names stream ends with the number of strings.
return Size;
}
Error NameHashTableBuilder::commit(msf::StreamWriter &Writer) const {
// Write a header
NameHashTableHeader H;
H.Signature = NameHashTableSignature;
H.HashVersion = 1;
H.ByteSize = StringSize;
if (auto EC = Writer.writeObject(H))
return EC;
// Write a string table.
uint32_t StringStart = Writer.getOffset();
for (auto Pair : Strings) {
StringRef S = Pair.first;
uint32_t Offset = Pair.second;
Writer.setOffset(StringStart + Offset);
if (auto EC = Writer.writeZeroString(S))
return EC;
}
Writer.setOffset(StringStart + StringSize);
// Write a hash table.
uint32_t BucketCount = computeBucketCount(Strings.size());
if (auto EC = Writer.writeInteger(BucketCount))
return EC;
std::vector<ulittle32_t> Buckets(BucketCount);
for (auto Pair : Strings) {
StringRef S = Pair.first;
uint32_t Offset = Pair.second;
uint32_t Hash = hashStringV1(S);
for (uint32_t I = 0; I != BucketCount; ++I) {
uint32_t Slot = (Hash + I) % BucketCount;
if (Slot == 0)
continue; // Skip reserved slot
if (Buckets[Slot] != 0)
continue;
Buckets[Slot] = Offset;
break;
}
}
if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
return EC;
if (auto EC = Writer.writeInteger(static_cast<uint32_t>(Strings.size())))
return EC;
return Error::success();
}

View File

@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
set(DebugInfoPDBSources
MappedBlockStreamTest.cpp
NameHashTableBuilderTest.cpp
MSFBuilderTest.cpp
PDBApiTest.cpp
)

View File

@ -0,0 +1,54 @@
//===- NameHashTableBuilderTest.cpp ---------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "ErrorChecking.h"
#include "llvm/DebugInfo/MSF/ByteStream.h"
#include "llvm/DebugInfo/MSF/StreamReader.h"
#include "llvm/DebugInfo/MSF/StreamWriter.h"
#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
#include "llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h"
#include "gtest/gtest.h"
using namespace llvm;
using namespace llvm::pdb;
namespace {
class NameHashTableBuilderTest : public ::testing::Test {};
}
TEST_F(NameHashTableBuilderTest, Simple) {
// Create /names table contents.
NameHashTableBuilder Builder;
EXPECT_EQ(1U, Builder.insert("foo"));
EXPECT_EQ(5U, Builder.insert("bar"));
EXPECT_EQ(1U, Builder.insert("foo"));
EXPECT_EQ(9U, Builder.insert("baz"));
std::vector<uint8_t> Buffer(Builder.calculateSerializedLength());
msf::MutableByteStream OutStream(Buffer);
msf::StreamWriter Writer(OutStream);
EXPECT_NO_ERROR(Builder.commit(Writer));
// Reads the contents back.
msf::ByteStream InStream(Buffer);
msf::StreamReader Reader(InStream);
NameHashTable Table;
EXPECT_NO_ERROR(Table.load(Reader));
EXPECT_EQ(3U, Table.getNameCount());
EXPECT_EQ(1U, Table.getHashVersion());
EXPECT_EQ("foo", Table.getStringForID(1));
EXPECT_EQ("bar", Table.getStringForID(5));
EXPECT_EQ("baz", Table.getStringForID(9));
EXPECT_EQ(1U, Table.getIDForString("foo"));
EXPECT_EQ(5U, Table.getIDForString("bar"));
EXPECT_EQ(9U, Table.getIDForString("baz"));
}