[clangd] Serialization support for RelationSlab

Summary: This builds on D59407 to provide YAML and RIFF serialization support.

Reviewers: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62459

llvm-svn: 362353
This commit is contained in:
Nathan Ridge 2019-06-03 05:07:52 +00:00
parent 3fc299df3d
commit 92524f9bf8
4 changed files with 180 additions and 6 deletions

View File

@ -24,6 +24,29 @@ llvm::Error makeError(const llvm::Twine &Msg) {
return llvm::make_error<llvm::StringError>(Msg,
llvm::inconvertibleErrorCode());
}
} // namespace
RelationKind symbolRoleToRelationKind(index::SymbolRole Role) {
// SymbolRole is used to record relations in the index.
// Only handle the relations we actually store currently.
// If we start storing more relations, this list can be expanded.
switch (Role) {
case index::SymbolRole::RelationBaseOf:
return RelationKind::BaseOf;
default:
llvm_unreachable("Unsupported symbol role");
}
}
index::SymbolRole relationKindToSymbolRole(RelationKind Kind) {
switch (Kind) {
case RelationKind::BaseOf:
return index::SymbolRole::RelationBaseOf;
}
llvm_unreachable("Invalid relation kind");
}
namespace {
// IO PRIMITIVES
// We use little-endian 32 bit ints, sometimes with variable-length encoding.
@ -358,6 +381,28 @@ readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
return Result;
}
// RELATIONS ENCODING
// A relations section is a flat list of relations. Each relation has:
// - SymbolID (subject): 8 bytes
// - relation kind (predicate): 1 byte
// - SymbolID (object): 8 bytes
// In the future, we might prefer a packed representation if the need arises.
void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
OS << R.Subject.raw();
RelationKind Kind = symbolRoleToRelationKind(R.Predicate);
OS.write(static_cast<uint8_t>(Kind));
OS << R.Object.raw();
}
Relation readRelation(Reader &Data) {
SymbolID Subject = Data.consumeID();
index::SymbolRole Predicate =
relationKindToSymbolRole(static_cast<RelationKind>(Data.consume8()));
SymbolID Object = Data.consumeID();
return {Subject, Predicate, Object};
}
// FILE ENCODING
// A file is a RIFF chunk with type 'CdIx'.
// It contains the sections:
@ -434,6 +479,17 @@ llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
return makeError("malformed or truncated refs");
Result.Refs = std::move(Refs).build();
}
if (Chunks.count("rela")) {
Reader RelationsReader(Chunks.lookup("rela"));
RelationSlab::Builder Relations;
while (!RelationsReader.eof()) {
auto Relation = readRelation(RelationsReader);
Relations.insert(Relation);
}
if (RelationsReader.err())
return makeError("malformed or truncated relations");
Result.Relations = std::move(Relations).build();
}
return std::move(Result);
}
@ -483,6 +539,14 @@ void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
}
}
std::vector<Relation> Relations;
if (Data.Relations) {
for (const auto &Relation : *Data.Relations) {
Relations.emplace_back(Relation);
// No strings to be interned in relations.
}
}
std::string StringSection;
{
llvm::raw_string_ostream StringOS(StringSection);
@ -508,6 +572,16 @@ void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
}
std::string RelationSection;
if (Data.Relations) {
{
llvm::raw_string_ostream RelationOS{RelationSection};
for (const auto &Relation : Relations)
writeRelation(Relation, RelationOS);
}
RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
}
std::string SrcsSection;
{
{
@ -561,6 +635,7 @@ std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
SymbolSlab Symbols;
RefSlab Refs;
RelationSlab Relations;
{
trace::Span Tracer("ParseIndex");
if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
@ -568,6 +643,8 @@ std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
Symbols = std::move(*I->Symbols);
if (I->Refs)
Refs = std::move(*I->Refs);
if (I->Relations)
Relations = std::move(*I->Relations);
} else {
llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n";
return nullptr;
@ -576,15 +653,17 @@ std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
size_t NumSym = Symbols.size();
size_t NumRefs = Refs.numRefs();
size_t NumRelations = Relations.size();
trace::Span Tracer("BuildIndex");
auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs))
: MemIndex::build(std::move(Symbols), std::move(Refs));
vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
" - number of symbols: {3}\n"
" - number of refs: {4}\n",
" - number of refs: {4}\n"
" - numnber of relations: {5}",
UseDex ? "Dex" : "MemIndex", SymbolFilename,
Index->estimateMemoryUsage(), NumSym, NumRefs);
Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
return Index;
}

View File

@ -41,6 +41,7 @@ enum class IndexFileFormat {
struct IndexFileIn {
llvm::Optional<SymbolSlab> Symbols;
llvm::Optional<RefSlab> Refs;
llvm::Optional<RelationSlab> Relations;
// Keys are URIs of the source files.
llvm::Optional<IncludeGraph> Sources;
};
@ -51,6 +52,7 @@ llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
struct IndexFileOut {
const SymbolSlab *Symbols = nullptr;
const RefSlab *Refs = nullptr;
const RelationSlab *Relations = nullptr;
// Keys are URIs of the source files.
const IncludeGraph *Sources = nullptr;
// TODO: Support serializing Dex posting lists.
@ -59,7 +61,8 @@ struct IndexFileOut {
IndexFileOut() = default;
IndexFileOut(const IndexFileIn &I)
: Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr),
Refs(I.Refs ? I.Refs.getPointer() : nullptr) {}
Refs(I.Refs ? I.Refs.getPointer() : nullptr),
Relations(I.Relations ? I.Relations.getPointer() : nullptr) {}
};
// Serializes an index file.
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
@ -67,12 +70,18 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
// Convert a single symbol to YAML, a nice debug representation.
std::string toYAML(const Symbol &);
std::string toYAML(const std::pair<SymbolID, ArrayRef<Ref>> &);
std::string toYAML(const Relation &);
// Build an in-memory static index from an index file.
// The size should be relatively small, so data can be managed in memory.
std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef Filename,
bool UseDex = true);
// Used for serializing SymbolRole as used in Relation.
enum class RelationKind : uint8_t { BaseOf };
RelationKind symbolRoleToRelationKind(index::SymbolRole);
index::SymbolRole relationKindToSymbolRole(RelationKind);
} // namespace clangd
} // namespace clang

View File

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "Index.h"
#include "Relation.h"
#include "Serialization.h"
#include "SymbolLocation.h"
#include "SymbolOrigin.h"
@ -35,10 +36,11 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Ref)
namespace {
using RefBundle =
std::pair<clang::clangd::SymbolID, std::vector<clang::clangd::Ref>>;
// This is a pale imitation of std::variant<Symbol, RefBundle>
// This is a pale imitation of std::variant<Symbol, RefBundle, Relation>
struct VariantEntry {
llvm::Optional<clang::clangd::Symbol> Symbol;
llvm::Optional<RefBundle> Refs;
llvm::Optional<clang::clangd::Relation> Relation;
};
// A class helps YAML to serialize the 32-bit encoded position (Line&Column),
// as YAMLIO can't directly map bitfields.
@ -53,6 +55,8 @@ namespace yaml {
using clang::clangd::Ref;
using clang::clangd::RefKind;
using clang::clangd::Relation;
using clang::clangd::RelationKind;
using clang::clangd::Symbol;
using clang::clangd::SymbolID;
using clang::clangd::SymbolLocation;
@ -60,6 +64,7 @@ using clang::clangd::SymbolOrigin;
using clang::index::SymbolInfo;
using clang::index::SymbolKind;
using clang::index::SymbolLanguage;
using clang::index::SymbolRole;
// Helper to (de)serialize the SymbolID. We serialize it as a hex string.
struct NormalizedSymbolID {
@ -275,6 +280,37 @@ template <> struct MappingTraits<Ref> {
}
};
struct NormalizedSymbolRole {
NormalizedSymbolRole(IO &) {}
NormalizedSymbolRole(IO &IO, SymbolRole R) {
Kind = static_cast<uint8_t>(clang::clangd::symbolRoleToRelationKind(R));
}
SymbolRole denormalize(IO &IO) {
return clang::clangd::relationKindToSymbolRole(
static_cast<RelationKind>(Kind));
}
uint8_t Kind = 0;
};
template <> struct MappingTraits<SymbolID> {
static void mapping(IO &IO, SymbolID &ID) {
MappingNormalization<NormalizedSymbolID, SymbolID> NSymbolID(IO, ID);
IO.mapRequired("ID", NSymbolID->HexString);
}
};
template <> struct MappingTraits<Relation> {
static void mapping(IO &IO, Relation &Relation) {
MappingNormalization<NormalizedSymbolRole, SymbolRole> NRole(
IO, Relation.Predicate);
IO.mapRequired("Subject", Relation.Subject);
IO.mapRequired("Predicate", NRole->Kind);
IO.mapRequired("Object", Relation.Object);
}
};
template <> struct MappingTraits<VariantEntry> {
static void mapping(IO &IO, VariantEntry &Variant) {
if (IO.mapTag("!Symbol", Variant.Symbol.hasValue())) {
@ -285,6 +321,10 @@ template <> struct MappingTraits<VariantEntry> {
if (!IO.outputting())
Variant.Refs.emplace();
MappingTraits<RefBundle>::mapping(IO, *Variant.Refs);
} else if (IO.mapTag("!Relations", Variant.Relation.hasValue())) {
if (!IO.outputting())
Variant.Relation.emplace();
MappingTraits<Relation>::mapping(IO, *Variant.Relation);
}
}
};
@ -308,11 +348,18 @@ void writeYAML(const IndexFileOut &O, llvm::raw_ostream &OS) {
Entry.Refs = Sym;
Yout << Entry;
}
if (O.Relations)
for (auto &R : *O.Relations) {
VariantEntry Entry;
Entry.Relation = R;
Yout << Entry;
}
}
llvm::Expected<IndexFileIn> readYAML(llvm::StringRef Data) {
SymbolSlab::Builder Symbols;
RefSlab::Builder Refs;
RelationSlab::Builder Relations;
llvm::BumpPtrAllocator
Arena; // store the underlying data of Position::FileURI.
llvm::UniqueStringSaver Strings(Arena);
@ -329,12 +376,15 @@ llvm::Expected<IndexFileIn> readYAML(llvm::StringRef Data) {
if (Variant.Refs)
for (const auto &Ref : Variant.Refs->second)
Refs.insert(Variant.Refs->first, Ref);
if (Variant.Relation)
Relations.insert(*Variant.Relation);
Yin.nextDocument();
}
IndexFileIn Result;
Result.Symbols.emplace(std::move(Symbols).build());
Result.Refs.emplace(std::move(Refs).build());
Result.Relations.emplace(std::move(Relations).build());
return std::move(Result);
}
@ -360,5 +410,16 @@ std::string toYAML(const std::pair<SymbolID, llvm::ArrayRef<Ref>> &Data) {
return Buf;
}
std::string toYAML(const Relation &R) {
std::string Buf;
{
llvm::raw_string_ostream OS(Buf);
llvm::yaml::Output Yout(OS);
Relation Rel = R; // copy: Yout<< requires mutability.
Yout << Rel;
}
return Buf;
}
} // namespace clangd
} // namespace clang

View File

@ -82,6 +82,14 @@ References:
End:
Line: 5
Column: 8
...
--- !Relations
Subject:
ID: 6481EE7AF2841756
Predicate: 0
Object:
ID: 6512AEC512EA3A2D
...
)";
MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
@ -139,6 +147,13 @@ TEST(SerializationTest, YAMLConversions) {
auto Ref1 = ParsedYAML->Refs->begin()->second.front();
EXPECT_EQ(Ref1.Kind, RefKind::Reference);
EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc");
SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756"));
SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D"));
ASSERT_TRUE(bool(ParsedYAML->Relations));
EXPECT_THAT(*ParsedYAML->Relations,
UnorderedElementsAre(
Relation{Base, index::SymbolRole::RelationBaseOf, Derived}));
}
std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
@ -149,8 +164,15 @@ std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
}
std::vector<std::string> YAMLFromRefs(const RefSlab &Slab) {
std::vector<std::string> Result;
for (const auto &Sym : Slab)
Result.push_back(toYAML(Sym));
for (const auto &Refs : Slab)
Result.push_back(toYAML(Refs));
return Result;
}
std::vector<std::string> YAMLFromRelations(const RelationSlab &Slab) {
std::vector<std::string> Result;
for (const auto &Rel : Slab)
Result.push_back(toYAML(Rel));
return Result;
}
@ -167,12 +189,15 @@ TEST(SerializationTest, BinaryConversions) {
ASSERT_TRUE(bool(In2)) << In.takeError();
ASSERT_TRUE(In2->Symbols);
ASSERT_TRUE(In2->Refs);
ASSERT_TRUE(In2->Relations);
// Assert the YAML serializations match, for nice comparisons and diffs.
EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
EXPECT_THAT(YAMLFromRefs(*In2->Refs),
UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
EXPECT_THAT(YAMLFromRelations(*In2->Relations),
UnorderedElementsAreArray(YAMLFromRelations(*In->Relations)));
}
TEST(SerializationTest, SrcsTest) {