[clangd] Put direct headers into srcs section.

Summary:
Currently, there's no way of knowing about header files
using compilation database, since it doesn't contain header files as entries.

Using this information, restoring from cache using compile commands becomes
possible instead of doing directory traversal. Also, we can issue indexing
actions for out-of-date headers even if source files depending on them haven't
changed.

Reviewers: sammccall

Subscribers: ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, cfe-commits

Differential Revision: https://reviews.llvm.org/D54817

llvm-svn: 347669
This commit is contained in:
Kadir Cetinkaya 2018-11-27 16:08:53 +00:00
parent 5f4feea910
commit d08eab4281
9 changed files with 147 additions and 61 deletions

View File

@ -48,6 +48,20 @@ struct Inclusion {
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion&);
// Contains information about one file in the build grpah and its direct
// dependencies. Doesn't own the strings it references (IncludeGraph is
// self-contained).
struct IncludeGraphNode {
// True if current file is a main file rather than a header.
bool IsTU;
llvm::StringRef URI;
FileDigest Digest;
std::vector<llvm::StringRef> DirectIncludes;
};
// FileURI and FileInclusions are references to keys of the map containing
// them.
using IncludeGraph = llvm::StringMap<IncludeGraphNode>;
// Information captured about the inclusion graph in a translation unit.
// This includes detailed information about the direct #includes, and summary
// information about all transitive includes.

View File

@ -227,5 +227,17 @@ bool IsRangeConsecutive(const Range &Left, const Range &Right) {
Left.end.character == Right.start.character;
}
FileDigest digest(StringRef Content) {
return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
}
Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
bool Invalid = false;
StringRef Content = SM.getBufferData(FID, &Invalid);
if (Invalid)
return None;
return digest(Content);
}
} // namespace clangd
} // namespace clang

View File

@ -16,13 +16,22 @@
#include "Protocol.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Tooling/Core/Replacement.h"
#include "llvm/Support/SHA1.h"
namespace clang {
class SourceManager;
namespace clangd {
// We tend to generate digests for source codes in a lot of different places.
// This represents the type for those digests to prevent us hard coding details
// of hashing function at every place that needs to store this information.
using FileDigest = decltype(llvm::SHA1::hash({}));
FileDigest digest(StringRef Content);
Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
// Counts the number of UTF-16 code units needed to represent a string (LSP
// specifies string lengths in UTF-16 code units).
size_t lspLength(StringRef Code);

View File

@ -11,6 +11,7 @@
#include "ClangdUnit.h"
#include "Compiler.h"
#include "Logger.h"
#include "SourceCode.h"
#include "Threading.h"
#include "Trace.h"
#include "URI.h"
@ -149,19 +150,6 @@ void BackgroundIndex::enqueueTask(Task T) {
QueueCV.notify_all();
}
static BackgroundIndex::FileDigest digest(StringRef Content) {
return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
}
static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
FileID FID) {
bool Invalid = false;
StringRef Content = SM.getBufferData(FID, &Invalid);
if (Invalid)
return None;
return digest(Content);
}
// Resolves URI to file paths with cache.
class URIToFileCache {
public:
@ -193,8 +181,7 @@ private:
};
/// Given index results from a TU, only update files in \p FilesToUpdate.
void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
RefSlab Refs,
void BackgroundIndex::update(StringRef MainFile, IndexFileIn Index,
const StringMap<FileDigest> &FilesToUpdate,
BackgroundIndexStorage *IndexStorage) {
// Partition symbols/references into files.
@ -204,7 +191,7 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
};
StringMap<File> Files;
URIToFileCache URICache(MainFile);
for (const auto &Sym : Symbols) {
for (const auto &Sym : *Index.Symbols) {
if (Sym.CanonicalDeclaration) {
auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
if (FilesToUpdate.count(DeclPath) != 0)
@ -222,7 +209,7 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
}
}
DenseMap<const Ref *, SymbolID> RefToIDs;
for (const auto &SymRefs : Refs) {
for (const auto &SymRefs : *Index.Refs) {
for (const auto &R : SymRefs.second) {
auto Path = URICache.resolve(R.Location.FileURI);
if (FilesToUpdate.count(Path) != 0) {
@ -250,12 +237,11 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
auto Hash = FilesToUpdate.lookup(Path);
// We need to store shards before updating the index, since the latter
// consumes slabs.
// FIXME: Store Hash in the Shard.
if (IndexStorage) {
IndexFileOut Shard;
Shard.Symbols = SS.get();
Shard.Refs = RS.get();
Shard.Digest = &Hash;
if (auto Error = IndexStorage->storeShard(Path, Shard))
elog("Failed to write background-index shard for file {0}: {1}", Path,
std::move(Error));
@ -275,8 +261,8 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
// \p FileDigests contains file digests for the current indexed files, and all
// changed files will be added to \p FilesToUpdate.
decltype(SymbolCollector::Options::FileFilter) createFileFilter(
const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
const llvm::StringMap<FileDigest> &FileDigests,
llvm::StringMap<FileDigest> &FilesToUpdate) {
return [&FileDigests, &FilesToUpdate](const SourceManager &SM, FileID FID) {
StringRef Path;
if (const auto *F = SM.getFileEntryForID(FID))
@ -375,8 +361,11 @@ Error BackgroundIndex::index(tooling::CompileCommand Cmd,
Symbols.size(), Refs.numRefs());
SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate,
IndexStorage);
IndexFileIn Index;
Index.Symbols = std::move(Symbols);
Index.Refs = std::move(Refs);
update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage);
{
// Make sure hash for the main file is always updated even if there is no
// index data in it.

View File

@ -84,11 +84,10 @@ public:
LLVM_NODISCARD bool
blockUntilIdleForTest(llvm::Optional<double> TimeoutSeconds = 10);
using FileDigest = decltype(llvm::SHA1::hash({}));
private:
/// Given index results from a TU, only update files in \p FilesToUpdate.
void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs,
/// Also stores new index information on IndexStorage.
void update(llvm::StringRef MainFile, IndexFileIn Index,
const llvm::StringMap<FileDigest> &FilesToUpdate,
BackgroundIndexStorage *IndexStorage);

View File

@ -247,6 +247,31 @@ SymbolLocation readLocation(Reader &Data, ArrayRef<StringRef> Strings) {
return Loc;
}
IncludeGraphNode readIncludeGraphNode(Reader &Data,
llvm::ArrayRef<llvm::StringRef> Strings) {
IncludeGraphNode IGN;
IGN.IsTU = Data.consume8();
IGN.URI = Data.consumeString(Strings);
llvm::StringRef Digest = Data.consume(IGN.Digest.size());
std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
IGN.DirectIncludes.resize(Data.consumeVar());
for (llvm::StringRef &Include : IGN.DirectIncludes)
Include = Data.consumeString(Strings);
return IGN;
}
void writeIncludeGraphNode(const IncludeGraphNode &IGN,
const StringTableOut &Strings, raw_ostream &OS) {
OS.write(IGN.IsTU);
writeVar(Strings.index(IGN.URI), OS);
llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
IGN.Digest.size());
OS << Hash;
writeVar(IGN.DirectIncludes.size(), OS);
for (llvm::StringRef Include : IGN.DirectIncludes)
writeVar(Strings.index(Include), OS);
}
void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
raw_ostream &OS) {
OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
@ -333,7 +358,7 @@ std::pair<SymbolID, std::vector<Ref>> readRefs(Reader &Data,
// A file is a RIFF chunk with type 'CdIx'.
// It contains the sections:
// - meta: version number
// - srcs: checksum of the source file
// - srcs: information related to include graph
// - stri: string table
// - symb: symbols
// - refs: references to symbols
@ -367,10 +392,20 @@ Expected<IndexFileIn> readRIFF(StringRef Data) {
IndexFileIn Result;
if (Chunks.count("srcs")) {
Reader Hash(Chunks.lookup("srcs"));
Result.Digest.emplace();
llvm::StringRef Digest = Hash.consume(Result.Digest->size());
std::copy(Digest.bytes_begin(), Digest.bytes_end(), Result.Digest->begin());
Reader SrcsReader(Chunks.lookup("srcs"));
Result.Sources.emplace();
while (!SrcsReader.eof()) {
auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
auto Entry = Result.Sources->try_emplace(IGN.URI).first;
Entry->getValue() = std::move(IGN);
// We change all the strings inside the structure to point at the keys in
// the map, since it is the only copy of the string that's going to live.
Entry->getValue().URI = Entry->getKey();
for (auto &Include : Entry->getValue().DirectIncludes)
Include = Result.Sources->try_emplace(Include).first->getKey();
}
if (SrcsReader.err())
return makeError("malformed or truncated include uri");
}
if (Chunks.count("symb")) {
@ -397,6 +432,13 @@ Expected<IndexFileIn> readRIFF(StringRef Data) {
return std::move(Result);
}
template <class Callback>
void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
CB(IGN.URI);
for (llvm::StringRef &Include : IGN.DirectIncludes)
CB(Include);
}
void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
assert(Data.Symbols && "An index file without symbols makes no sense!");
riff::File RIFF;
@ -409,18 +451,19 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
}
RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
if (Data.Digest) {
llvm::StringRef Hash(reinterpret_cast<const char *>(Data.Digest->data()),
Data.Digest->size());
RIFF.Chunks.push_back({riff::fourCC("srcs"), Hash});
}
StringTableOut Strings;
std::vector<Symbol> Symbols;
for (const auto &Sym : *Data.Symbols) {
Symbols.emplace_back(Sym);
visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); });
}
std::vector<IncludeGraphNode> Sources;
if (Data.Sources)
for (const auto &Source : *Data.Sources) {
Sources.push_back(Source.getValue());
visitStrings(Sources.back(), [&](StringRef &S) { Strings.intern(S); });
}
std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
if (Data.Refs) {
for (const auto &Sym : *Data.Refs) {
@ -458,6 +501,16 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
}
std::string SrcsSection;
{
{
raw_string_ostream SrcsOS(SrcsSection);
for (const auto &SF : Sources)
writeIncludeGraphNode(SF, Strings, SrcsOS);
}
RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
}
OS << RIFF;
}

View File

@ -24,6 +24,7 @@
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
#include "Headers.h"
#include "Index.h"
#include "llvm/Support/Error.h"
@ -37,11 +38,10 @@ enum class IndexFileFormat {
// Holds the contents of an index file that was read.
struct IndexFileIn {
using FileDigest = std::array<uint8_t, 20>;
llvm::Optional<SymbolSlab> Symbols;
llvm::Optional<RefSlab> Refs;
// Digest of the source file that generated the contents.
llvm::Optional<FileDigest> Digest;
// Keys are URIs of the source files.
llvm::Optional<IncludeGraph> Sources;
};
// Parse an index file. The input must be a RIFF or YAML file.
llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
@ -50,8 +50,8 @@ llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
struct IndexFileOut {
const SymbolSlab *Symbols = nullptr;
const RefSlab *Refs = nullptr;
// Digest of the source file that generated the contents.
const IndexFileIn::FileDigest *Digest = nullptr;
// Keys are URIs of the source files.
const IncludeGraph *Sources = nullptr;
// TODO: Support serializing Dex posting lists.
IndexFileFormat Format = IndexFileFormat::RIFF;

View File

@ -129,8 +129,6 @@ TEST_F(BackgroundIndexTest, ShardStorageWriteTest) {
)cpp";
std::string A_CC = "#include \"A.h\"\nvoid g() { (void)common; }";
FS.Files[testPath("root/A.cc")] = A_CC;
auto Digest = llvm::SHA1::hash(
{reinterpret_cast<const uint8_t *>(A_CC.data()), A_CC.size()});
llvm::StringMap<std::string> Storage;
size_t CacheHits = 0;
@ -165,7 +163,6 @@ TEST_F(BackgroundIndexTest, ShardStorageWriteTest) {
EXPECT_NE(ShardSource, nullptr);
EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre());
EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")}));
EXPECT_EQ(*ShardSource->Digest, Digest);
}
} // namespace clangd

View File

@ -173,31 +173,44 @@ TEST(SerializationTest, BinaryConversions) {
UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
}
TEST(SerializationTest, HashTest) {
TEST(SerializationTest, SrcsTest) {
auto In = readIndexFile(YAML);
EXPECT_TRUE(bool(In)) << In.takeError();
std::string TestContent("TESTCONTENT");
auto Digest =
std::string TestContent("TestContent");
IncludeGraphNode IGN;
IGN.Digest =
llvm::SHA1::hash({reinterpret_cast<const uint8_t *>(TestContent.data()),
TestContent.size()});
IGN.DirectIncludes = {"inc1", "inc2"};
IGN.URI = "URI";
IGN.IsTU = true;
IncludeGraph Sources;
Sources[IGN.URI] = IGN;
// Write to binary format, and parse again.
IndexFileOut Out(*In);
Out.Format = IndexFileFormat::RIFF;
Out.Digest = &Digest;
Out.Sources = &Sources;
{
std::string Serialized = to_string(Out);
auto In2 = readIndexFile(Serialized);
ASSERT_TRUE(bool(In2)) << In.takeError();
ASSERT_EQ(In2->Digest, Digest);
ASSERT_TRUE(In2->Symbols);
ASSERT_TRUE(In2->Refs);
auto In = readIndexFile(Serialized);
ASSERT_TRUE(bool(In)) << In.takeError();
ASSERT_TRUE(In->Symbols);
ASSERT_TRUE(In->Refs);
ASSERT_TRUE(In->Sources);
ASSERT_TRUE(In->Sources->count(IGN.URI));
// Assert the YAML serializations match, for nice comparisons and diffs.
EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
EXPECT_THAT(YAMLFromRefs(*In2->Refs),
EXPECT_THAT(YAMLFromRefs(*In->Refs),
UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
auto IGNDeserialized = In->Sources->lookup(IGN.URI);
EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
EXPECT_EQ(IGNDeserialized.IsTU, IGN.IsTU);
}
}
} // namespace