From d08eab42811f8146527d061be456cd1d80867f09 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Tue, 27 Nov 2018 16:08:53 +0000 Subject: [PATCH] [clangd] Put direct headers into srcs section. Summary: Currently, there's no way of knowing about header files using compilation database, since it doesn't contain header files as entries. Using this information, restoring from cache using compile commands becomes possible instead of doing directory traversal. Also, we can issue indexing actions for out-of-date headers even if source files depending on them haven't changed. Reviewers: sammccall Subscribers: ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, cfe-commits Differential Revision: https://reviews.llvm.org/D54817 llvm-svn: 347669 --- clang-tools-extra/clangd/Headers.h | 14 ++++ clang-tools-extra/clangd/SourceCode.cpp | 12 +++ clang-tools-extra/clangd/SourceCode.h | 9 +++ clang-tools-extra/clangd/index/Background.cpp | 35 +++------ clang-tools-extra/clangd/index/Background.h | 5 +- .../clangd/index/Serialization.cpp | 75 ++++++++++++++++--- .../clangd/index/Serialization.h | 10 +-- .../unittests/clangd/BackgroundIndexTests.cpp | 3 - .../unittests/clangd/SerializationTests.cpp | 45 +++++++---- 9 files changed, 147 insertions(+), 61 deletions(-) diff --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h index 519fc47060e2..7eead04c649a 100644 --- a/clang-tools-extra/clangd/Headers.h +++ b/clang-tools-extra/clangd/Headers.h @@ -48,6 +48,20 @@ struct Inclusion { }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion&); +// Contains information about one file in the build grpah and its direct +// dependencies. Doesn't own the strings it references (IncludeGraph is +// self-contained). +struct IncludeGraphNode { + // True if current file is a main file rather than a header. + bool IsTU; + llvm::StringRef URI; + FileDigest Digest; + std::vector DirectIncludes; +}; +// FileURI and FileInclusions are references to keys of the map containing +// them. +using IncludeGraph = llvm::StringMap; + // Information captured about the inclusion graph in a translation unit. // This includes detailed information about the direct #includes, and summary // information about all transitive includes. diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 43122859c550..4d276ef2f237 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -227,5 +227,17 @@ bool IsRangeConsecutive(const Range &Left, const Range &Right) { Left.end.character == Right.start.character; } +FileDigest digest(StringRef Content) { + return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); +} + +Optional digestFile(const SourceManager &SM, FileID FID) { + bool Invalid = false; + StringRef Content = SM.getBufferData(FID, &Invalid); + if (Invalid) + return None; + return digest(Content); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h index fd5d53b418db..769d0afc7215 100644 --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -16,13 +16,22 @@ #include "Protocol.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/Support/SHA1.h" namespace clang { class SourceManager; namespace clangd { +// We tend to generate digests for source codes in a lot of different places. +// This represents the type for those digests to prevent us hard coding details +// of hashing function at every place that needs to store this information. +using FileDigest = decltype(llvm::SHA1::hash({})); +FileDigest digest(StringRef Content); +Optional digestFile(const SourceManager &SM, FileID FID); + // Counts the number of UTF-16 code units needed to represent a string (LSP // specifies string lengths in UTF-16 code units). size_t lspLength(StringRef Code); diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index ecd586531ff0..d4ad39580d12 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -11,6 +11,7 @@ #include "ClangdUnit.h" #include "Compiler.h" #include "Logger.h" +#include "SourceCode.h" #include "Threading.h" #include "Trace.h" #include "URI.h" @@ -149,19 +150,6 @@ void BackgroundIndex::enqueueTask(Task T) { QueueCV.notify_all(); } -static BackgroundIndex::FileDigest digest(StringRef Content) { - return SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); -} - -static Optional digestFile(const SourceManager &SM, - FileID FID) { - bool Invalid = false; - StringRef Content = SM.getBufferData(FID, &Invalid); - if (Invalid) - return None; - return digest(Content); -} - // Resolves URI to file paths with cache. class URIToFileCache { public: @@ -193,8 +181,7 @@ private: }; /// Given index results from a TU, only update files in \p FilesToUpdate. -void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, - RefSlab Refs, +void BackgroundIndex::update(StringRef MainFile, IndexFileIn Index, const StringMap &FilesToUpdate, BackgroundIndexStorage *IndexStorage) { // Partition symbols/references into files. @@ -204,7 +191,7 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, }; StringMap Files; URIToFileCache URICache(MainFile); - for (const auto &Sym : Symbols) { + for (const auto &Sym : *Index.Symbols) { if (Sym.CanonicalDeclaration) { auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI); if (FilesToUpdate.count(DeclPath) != 0) @@ -222,7 +209,7 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, } } DenseMap RefToIDs; - for (const auto &SymRefs : Refs) { + for (const auto &SymRefs : *Index.Refs) { for (const auto &R : SymRefs.second) { auto Path = URICache.resolve(R.Location.FileURI); if (FilesToUpdate.count(Path) != 0) { @@ -250,12 +237,11 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, auto Hash = FilesToUpdate.lookup(Path); // We need to store shards before updating the index, since the latter // consumes slabs. - // FIXME: Store Hash in the Shard. if (IndexStorage) { IndexFileOut Shard; Shard.Symbols = SS.get(); Shard.Refs = RS.get(); - Shard.Digest = &Hash; + if (auto Error = IndexStorage->storeShard(Path, Shard)) elog("Failed to write background-index shard for file {0}: {1}", Path, std::move(Error)); @@ -275,8 +261,8 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, // \p FileDigests contains file digests for the current indexed files, and all // changed files will be added to \p FilesToUpdate. decltype(SymbolCollector::Options::FileFilter) createFileFilter( - const llvm::StringMap &FileDigests, - llvm::StringMap &FilesToUpdate) { + const llvm::StringMap &FileDigests, + llvm::StringMap &FilesToUpdate) { return [&FileDigests, &FilesToUpdate](const SourceManager &SM, FileID FID) { StringRef Path; if (const auto *F = SM.getFileEntryForID(FID)) @@ -375,8 +361,11 @@ Error BackgroundIndex::index(tooling::CompileCommand Cmd, Symbols.size(), Refs.numRefs()); SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs())); - update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate, - IndexStorage); + IndexFileIn Index; + Index.Symbols = std::move(Symbols); + Index.Refs = std::move(Refs); + + update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage); { // Make sure hash for the main file is always updated even if there is no // index data in it. diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h index c71e48f2427c..e1eb3ccfd18c 100644 --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -84,11 +84,10 @@ public: LLVM_NODISCARD bool blockUntilIdleForTest(llvm::Optional TimeoutSeconds = 10); - using FileDigest = decltype(llvm::SHA1::hash({})); - private: /// Given index results from a TU, only update files in \p FilesToUpdate. - void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, + /// Also stores new index information on IndexStorage. + void update(llvm::StringRef MainFile, IndexFileIn Index, const llvm::StringMap &FilesToUpdate, BackgroundIndexStorage *IndexStorage); diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index 9eb4d21eb8f8..1d17a42c4869 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -247,6 +247,31 @@ SymbolLocation readLocation(Reader &Data, ArrayRef Strings) { return Loc; } +IncludeGraphNode readIncludeGraphNode(Reader &Data, + llvm::ArrayRef Strings) { + IncludeGraphNode IGN; + IGN.IsTU = Data.consume8(); + IGN.URI = Data.consumeString(Strings); + llvm::StringRef Digest = Data.consume(IGN.Digest.size()); + std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin()); + IGN.DirectIncludes.resize(Data.consumeVar()); + for (llvm::StringRef &Include : IGN.DirectIncludes) + Include = Data.consumeString(Strings); + return IGN; +} + +void writeIncludeGraphNode(const IncludeGraphNode &IGN, + const StringTableOut &Strings, raw_ostream &OS) { + OS.write(IGN.IsTU); + writeVar(Strings.index(IGN.URI), OS); + llvm::StringRef Hash(reinterpret_cast(IGN.Digest.data()), + IGN.Digest.size()); + OS << Hash; + writeVar(IGN.DirectIncludes.size(), OS); + for (llvm::StringRef Include : IGN.DirectIncludes) + writeVar(Strings.index(Include), OS); +} + void writeSymbol(const Symbol &Sym, const StringTableOut &Strings, raw_ostream &OS) { OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists, @@ -333,7 +358,7 @@ std::pair> readRefs(Reader &Data, // A file is a RIFF chunk with type 'CdIx'. // It contains the sections: // - meta: version number -// - srcs: checksum of the source file +// - srcs: information related to include graph // - stri: string table // - symb: symbols // - refs: references to symbols @@ -367,10 +392,20 @@ Expected readRIFF(StringRef Data) { IndexFileIn Result; if (Chunks.count("srcs")) { - Reader Hash(Chunks.lookup("srcs")); - Result.Digest.emplace(); - llvm::StringRef Digest = Hash.consume(Result.Digest->size()); - std::copy(Digest.bytes_begin(), Digest.bytes_end(), Result.Digest->begin()); + Reader SrcsReader(Chunks.lookup("srcs")); + Result.Sources.emplace(); + while (!SrcsReader.eof()) { + auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings); + auto Entry = Result.Sources->try_emplace(IGN.URI).first; + Entry->getValue() = std::move(IGN); + // We change all the strings inside the structure to point at the keys in + // the map, since it is the only copy of the string that's going to live. + Entry->getValue().URI = Entry->getKey(); + for (auto &Include : Entry->getValue().DirectIncludes) + Include = Result.Sources->try_emplace(Include).first->getKey(); + } + if (SrcsReader.err()) + return makeError("malformed or truncated include uri"); } if (Chunks.count("symb")) { @@ -397,6 +432,13 @@ Expected readRIFF(StringRef Data) { return std::move(Result); } +template +void visitStrings(IncludeGraphNode &IGN, const Callback &CB) { + CB(IGN.URI); + for (llvm::StringRef &Include : IGN.DirectIncludes) + CB(Include); +} + void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) { assert(Data.Symbols && "An index file without symbols makes no sense!"); riff::File RIFF; @@ -409,18 +451,19 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) { } RIFF.Chunks.push_back({riff::fourCC("meta"), Meta}); - if (Data.Digest) { - llvm::StringRef Hash(reinterpret_cast(Data.Digest->data()), - Data.Digest->size()); - RIFF.Chunks.push_back({riff::fourCC("srcs"), Hash}); - } - StringTableOut Strings; std::vector Symbols; for (const auto &Sym : *Data.Symbols) { Symbols.emplace_back(Sym); visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); }); } + std::vector Sources; + if (Data.Sources) + for (const auto &Source : *Data.Sources) { + Sources.push_back(Source.getValue()); + visitStrings(Sources.back(), [&](StringRef &S) { Strings.intern(S); }); + } + std::vector>> Refs; if (Data.Refs) { for (const auto &Sym : *Data.Refs) { @@ -458,6 +501,16 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) { RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection}); } + std::string SrcsSection; + { + { + raw_string_ostream SrcsOS(SrcsSection); + for (const auto &SF : Sources) + writeIncludeGraphNode(SF, Strings, SrcsOS); + } + RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection}); + } + OS << RIFF; } diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index add8110a161e..c403b95798e6 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -24,6 +24,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H +#include "Headers.h" #include "Index.h" #include "llvm/Support/Error.h" @@ -37,11 +38,10 @@ enum class IndexFileFormat { // Holds the contents of an index file that was read. struct IndexFileIn { - using FileDigest = std::array; llvm::Optional Symbols; llvm::Optional Refs; - // Digest of the source file that generated the contents. - llvm::Optional Digest; + // Keys are URIs of the source files. + llvm::Optional Sources; }; // Parse an index file. The input must be a RIFF or YAML file. llvm::Expected readIndexFile(llvm::StringRef); @@ -50,8 +50,8 @@ llvm::Expected readIndexFile(llvm::StringRef); struct IndexFileOut { const SymbolSlab *Symbols = nullptr; const RefSlab *Refs = nullptr; - // Digest of the source file that generated the contents. - const IndexFileIn::FileDigest *Digest = nullptr; + // Keys are URIs of the source files. + const IncludeGraph *Sources = nullptr; // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; diff --git a/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp b/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp index 0beaf6a730d0..19263d0165a9 100644 --- a/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp +++ b/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp @@ -129,8 +129,6 @@ TEST_F(BackgroundIndexTest, ShardStorageWriteTest) { )cpp"; std::string A_CC = "#include \"A.h\"\nvoid g() { (void)common; }"; FS.Files[testPath("root/A.cc")] = A_CC; - auto Digest = llvm::SHA1::hash( - {reinterpret_cast(A_CC.data()), A_CC.size()}); llvm::StringMap Storage; size_t CacheHits = 0; @@ -165,7 +163,6 @@ TEST_F(BackgroundIndexTest, ShardStorageWriteTest) { EXPECT_NE(ShardSource, nullptr); EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre()); EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")})); - EXPECT_EQ(*ShardSource->Digest, Digest); } } // namespace clangd diff --git a/clang-tools-extra/unittests/clangd/SerializationTests.cpp b/clang-tools-extra/unittests/clangd/SerializationTests.cpp index cf1bc4281909..a30369b0f775 100644 --- a/clang-tools-extra/unittests/clangd/SerializationTests.cpp +++ b/clang-tools-extra/unittests/clangd/SerializationTests.cpp @@ -173,31 +173,44 @@ TEST(SerializationTest, BinaryConversions) { UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); } -TEST(SerializationTest, HashTest) { +TEST(SerializationTest, SrcsTest) { auto In = readIndexFile(YAML); EXPECT_TRUE(bool(In)) << In.takeError(); - std::string TestContent("TESTCONTENT"); - auto Digest = + std::string TestContent("TestContent"); + IncludeGraphNode IGN; + IGN.Digest = llvm::SHA1::hash({reinterpret_cast(TestContent.data()), TestContent.size()}); + IGN.DirectIncludes = {"inc1", "inc2"}; + IGN.URI = "URI"; + IGN.IsTU = true; + IncludeGraph Sources; + Sources[IGN.URI] = IGN; // Write to binary format, and parse again. IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; - Out.Digest = &Digest; - std::string Serialized = to_string(Out); + Out.Sources = &Sources; + { + std::string Serialized = to_string(Out); - auto In2 = readIndexFile(Serialized); - ASSERT_TRUE(bool(In2)) << In.takeError(); - ASSERT_EQ(In2->Digest, Digest); - ASSERT_TRUE(In2->Symbols); - ASSERT_TRUE(In2->Refs); - - // Assert the YAML serializations match, for nice comparisons and diffs. - EXPECT_THAT(YAMLFromSymbols(*In2->Symbols), - UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); - EXPECT_THAT(YAMLFromRefs(*In2->Refs), - UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + auto In = readIndexFile(Serialized); + ASSERT_TRUE(bool(In)) << In.takeError(); + ASSERT_TRUE(In->Symbols); + ASSERT_TRUE(In->Refs); + ASSERT_TRUE(In->Sources); + ASSERT_TRUE(In->Sources->count(IGN.URI)); + // Assert the YAML serializations match, for nice comparisons and diffs. + EXPECT_THAT(YAMLFromSymbols(*In->Symbols), + UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); + EXPECT_THAT(YAMLFromRefs(*In->Refs), + UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + auto IGNDeserialized = In->Sources->lookup(IGN.URI); + EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest); + EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes); + EXPECT_EQ(IGNDeserialized.URI, IGN.URI); + EXPECT_EQ(IGNDeserialized.IsTU, IGN.IsTU); + } } } // namespace