From dcd32937dc396d6da334fbdcc930e268d12a28d4 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Sun, 15 Jan 2017 00:36:02 +0000 Subject: [PATCH] PDB: Add a class to create the /names stream contents. This patch adds a new class NameHashTableBuilder which creates /names streams. This patch contains a test to confirm that a stream created by NameHashTableBuilder can be read by NameHashTable reader class. Differential Revision: https://reviews.llvm.org/D28707 llvm-svn: 292040 --- .../DebugInfo/PDB/Raw/NameHashTableBuilder.h | 45 ++++++++ .../include/llvm/DebugInfo/PDB/Raw/RawTypes.h | 9 ++ llvm/lib/DebugInfo/PDB/CMakeLists.txt | 1 + llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp | 11 +- .../PDB/Raw/NameHashTableBuilder.cpp | 101 ++++++++++++++++++ llvm/unittests/DebugInfo/PDB/CMakeLists.txt | 1 + .../PDB/NameHashTableBuilderTest.cpp | 54 ++++++++++ 7 files changed, 214 insertions(+), 8 deletions(-) create mode 100644 llvm/include/llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h create mode 100644 llvm/lib/DebugInfo/PDB/Raw/NameHashTableBuilder.cpp create mode 100644 llvm/unittests/DebugInfo/PDB/NameHashTableBuilderTest.cpp diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h new file mode 100644 index 000000000000..d7fd4efd3ca9 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h @@ -0,0 +1,45 @@ +//===- NameHashTableBuilder.h - PDB Name Hash Table Builder -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file creates the "/names" stream. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H +#define LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace msf { +class StreamWriter; +} +namespace pdb { + +class NameHashTableBuilder { +public: + // If string S does not exist in the string table, insert it. + // Returns the ID for S. + uint32_t insert(StringRef S); + + uint32_t calculateSerializedLength() const; + Error commit(msf::StreamWriter &Writer) const; + +private: + DenseMap Strings; + uint32_t StringSize = 1; +}; + +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/RawTypes.h b/llvm/include/llvm/DebugInfo/PDB/Raw/RawTypes.h index d404b3994dbc..8497ed979540 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Raw/RawTypes.h +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/RawTypes.h @@ -302,6 +302,15 @@ struct InfoStreamHeader { PDB_UniqueId Guid; }; +/// The header preceeding the /names stream. +struct NameHashTableHeader { + support::ulittle32_t Signature; + support::ulittle32_t HashVersion; + support::ulittle32_t ByteSize; +}; + +const uint32_t NameHashTableSignature = 0xEFFEEFFE; + } // namespace pdb } // namespace llvm diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt index 599f01eaf74f..a49c06b1e908 100644 --- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -39,6 +39,7 @@ add_pdb_impl_folder(Raw Raw/ModInfo.cpp Raw/ModStream.cpp Raw/NameHashTable.cpp + Raw/NameHashTableBuilder.cpp Raw/NameMap.cpp Raw/NameMapBuilder.cpp Raw/PDBFile.cpp diff --git a/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp b/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp index 84cccb354bd8..86784a7aa759 100644 --- a/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp +++ b/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/MSF/StreamReader.h" #include "llvm/DebugInfo/PDB/Raw/Hash.h" #include "llvm/DebugInfo/PDB/Raw/RawError.h" +#include "llvm/DebugInfo/PDB/Raw/RawTypes.h" #include "llvm/Support/Endian.h" using namespace llvm; @@ -23,17 +24,11 @@ using namespace llvm::pdb; NameHashTable::NameHashTable() : Signature(0), HashVersion(0), NameCount(0) {} Error NameHashTable::load(StreamReader &Stream) { - struct Header { - support::ulittle32_t Signature; - support::ulittle32_t HashVersion; - support::ulittle32_t ByteSize; - }; - - const Header *H; + const NameHashTableHeader *H; if (auto EC = Stream.readObject(H)) return EC; - if (H->Signature != 0xEFFEEFFE) + if (H->Signature != NameHashTableSignature) return make_error(raw_error_code::corrupt_file, "Invalid hash table signature"); if (H->HashVersion != 1 && H->HashVersion != 2) diff --git a/llvm/lib/DebugInfo/PDB/Raw/NameHashTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Raw/NameHashTableBuilder.cpp new file mode 100644 index 000000000000..2ef242515df1 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Raw/NameHashTableBuilder.cpp @@ -0,0 +1,101 @@ +//===- NameHashTable.cpp - PDB Name Hash Table ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/Hash.h" +#include "llvm/DebugInfo/PDB/Raw/RawTypes.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace llvm::pdb; + +uint32_t NameHashTableBuilder::insert(StringRef S) { + auto P = Strings.insert({S, StringSize}); + + // If a given string didn't exist in the string table, we want to increment + // the string table size. + if (P.second) + StringSize += S.size() + 1; // +1 for '\0' + return P.first->second; +} + +static uint32_t computeBucketCount(uint32_t NumStrings) { + // The /names stream is basically an on-disk open-addressing hash table. + // Hash collisions are resolved by linear probing. We cannot make + // utilization 100% because it will make the linear probing extremely + // slow. But lower utilization wastes disk space. As a reasonable + // load factor, we choose 80%. We need +1 because slot 0 is reserved. + return (NumStrings + 1) * 1.25; +} + +uint32_t NameHashTableBuilder::calculateSerializedLength() const { + uint32_t Size = 0; + Size += sizeof(NameHashTableHeader); + Size += StringSize; + Size += 4; // Hash table begins with 4-byte size field. + + uint32_t BucketCount = computeBucketCount(Strings.size()); + Size += BucketCount * 4; + + Size += 4; // The /names stream ends with the number of strings. + return Size; +} + +Error NameHashTableBuilder::commit(msf::StreamWriter &Writer) const { + // Write a header + NameHashTableHeader H; + H.Signature = NameHashTableSignature; + H.HashVersion = 1; + H.ByteSize = StringSize; + if (auto EC = Writer.writeObject(H)) + return EC; + + // Write a string table. + uint32_t StringStart = Writer.getOffset(); + for (auto Pair : Strings) { + StringRef S = Pair.first; + uint32_t Offset = Pair.second; + Writer.setOffset(StringStart + Offset); + if (auto EC = Writer.writeZeroString(S)) + return EC; + } + Writer.setOffset(StringStart + StringSize); + + // Write a hash table. + uint32_t BucketCount = computeBucketCount(Strings.size()); + if (auto EC = Writer.writeInteger(BucketCount)) + return EC; + std::vector Buckets(BucketCount); + + for (auto Pair : Strings) { + StringRef S = Pair.first; + uint32_t Offset = Pair.second; + uint32_t Hash = hashStringV1(S); + + for (uint32_t I = 0; I != BucketCount; ++I) { + uint32_t Slot = (Hash + I) % BucketCount; + if (Slot == 0) + continue; // Skip reserved slot + if (Buckets[Slot] != 0) + continue; + Buckets[Slot] = Offset; + break; + } + } + + if (auto EC = Writer.writeArray(ArrayRef(Buckets))) + return EC; + if (auto EC = Writer.writeInteger(static_cast(Strings.size()))) + return EC; + return Error::success(); +} diff --git a/llvm/unittests/DebugInfo/PDB/CMakeLists.txt b/llvm/unittests/DebugInfo/PDB/CMakeLists.txt index 406b487e7a18..16a69edd81c3 100644 --- a/llvm/unittests/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/unittests/DebugInfo/PDB/CMakeLists.txt @@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS set(DebugInfoPDBSources MappedBlockStreamTest.cpp + NameHashTableBuilderTest.cpp MSFBuilderTest.cpp PDBApiTest.cpp ) diff --git a/llvm/unittests/DebugInfo/PDB/NameHashTableBuilderTest.cpp b/llvm/unittests/DebugInfo/PDB/NameHashTableBuilderTest.cpp new file mode 100644 index 000000000000..4ae0e67fa208 --- /dev/null +++ b/llvm/unittests/DebugInfo/PDB/NameHashTableBuilderTest.cpp @@ -0,0 +1,54 @@ +//===- NameHashTableBuilderTest.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ErrorChecking.h" + +#include "llvm/DebugInfo/MSF/ByteStream.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h" +#include "llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::pdb; + +namespace { +class NameHashTableBuilderTest : public ::testing::Test {}; +} + +TEST_F(NameHashTableBuilderTest, Simple) { + // Create /names table contents. + NameHashTableBuilder Builder; + EXPECT_EQ(1U, Builder.insert("foo")); + EXPECT_EQ(5U, Builder.insert("bar")); + EXPECT_EQ(1U, Builder.insert("foo")); + EXPECT_EQ(9U, Builder.insert("baz")); + + std::vector Buffer(Builder.calculateSerializedLength()); + msf::MutableByteStream OutStream(Buffer); + msf::StreamWriter Writer(OutStream); + EXPECT_NO_ERROR(Builder.commit(Writer)); + + // Reads the contents back. + msf::ByteStream InStream(Buffer); + msf::StreamReader Reader(InStream); + NameHashTable Table; + EXPECT_NO_ERROR(Table.load(Reader)); + + EXPECT_EQ(3U, Table.getNameCount()); + EXPECT_EQ(1U, Table.getHashVersion()); + EXPECT_EQ("foo", Table.getStringForID(1)); + EXPECT_EQ("bar", Table.getStringForID(5)); + EXPECT_EQ("baz", Table.getStringForID(9)); + EXPECT_EQ(1U, Table.getIDForString("foo")); + EXPECT_EQ(5U, Table.getIDForString("bar")); + EXPECT_EQ(9U, Table.getIDForString("baz")); +}