[clangd] Replace StringRef in SymbolLocation with a char pointer.

Summary:
This would save us 8 bytes per ref, and buy us ~40MB in total
for llvm index (from ~300MB to ~260 MB).

The char pointer must be null-terminated, and llvm::StringSaver
guarantees it.

Reviewers: sammccall

Subscribers: ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D53427

llvm-svn: 346852
This commit is contained in:
Haojian Wu 2018-11-14 11:55:45 +00:00
parent ee78ae6b86
commit ee54a2b501
14 changed files with 96 additions and 47 deletions

View File

@ -165,7 +165,8 @@ raw_ostream &operator<<(raw_ostream &OS, const Ref &R) {
void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) {
auto &M = Refs[ID];
M.push_back(S);
M.back().Location.FileURI = UniqueStrings.save(M.back().Location.FileURI);
M.back().Location.FileURI =
UniqueStrings.save(M.back().Location.FileURI).data();
}
RefSlab RefSlab::Builder::build() && {

View File

@ -56,14 +56,19 @@ struct SymbolLocation {
uint32_t Column : 12; // 0-based
};
// The URI of the source file where a symbol occurs.
llvm::StringRef FileURI;
/// The symbol range, using half-open range [Start, End).
Position Start;
Position End;
explicit operator bool() const { return !FileURI.empty(); }
explicit operator bool() const { return !StringRef(FileURI).empty(); }
// The URI of the source file where a symbol occurs.
// The string must be null-terminated.
//
// We avoid using llvm::StringRef here to save memory.
// WARNING: unless you know what you are doing, it is recommended to use it
// via llvm::StringRef.
const char *FileURI = "";
};
inline bool operator==(const SymbolLocation::Position &L,
const SymbolLocation::Position &R) {
@ -76,12 +81,16 @@ inline bool operator<(const SymbolLocation::Position &L,
std::make_tuple(R.line(), R.column());
}
inline bool operator==(const SymbolLocation &L, const SymbolLocation &R) {
return std::tie(L.FileURI, L.Start, L.End) ==
std::tie(R.FileURI, R.Start, R.End);
assert(L.FileURI && R.FileURI);
return !std::strcmp(L.FileURI, R.FileURI) &&
std::tie(L.Start, L.End) == std::tie(R.Start, R.End);
}
inline bool operator<(const SymbolLocation &L, const SymbolLocation &R) {
return std::tie(L.FileURI, L.Start, L.End) <
std::tie(R.FileURI, R.Start, R.End);
assert(L.FileURI && R.FileURI);
int Cmp = std::strcmp(L.FileURI, R.FileURI);
if (Cmp != 0)
return Cmp < 0;
return std::tie(L.Start, L.End) < std::tie(R.Start, R.End);
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);
@ -288,12 +297,19 @@ raw_ostream &operator<<(raw_ostream &, Symbol::SymbolFlag);
template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) {
CB(S.Name);
CB(S.Scope);
CB(S.CanonicalDeclaration.FileURI);
CB(S.Definition.FileURI);
CB(S.Signature);
CB(S.CompletionSnippetSuffix);
CB(S.Documentation);
CB(S.ReturnType);
auto RawCharPointerCB = [&CB](const char *&P) {
llvm::StringRef S(P);
CB(S);
assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated");
P = S.data();
};
RawCharPointerCB(S.CanonicalDeclaration.FileURI);
RawCharPointerCB(S.Definition.FileURI);
for (auto &Include : S.IncludeHeaders)
CB(Include.IncludeHeader);
}

View File

@ -113,8 +113,9 @@ Symbol mergeSymbol(const Symbol &L, const Symbol &R) {
bool PreferR = R.Definition && !L.Definition;
// Merge include headers only if both have definitions or both have no
// definition; otherwise, only accumulate references of common includes.
assert(L.Definition.FileURI && R.Definition.FileURI);
bool MergeIncludes =
L.Definition.FileURI.empty() == R.Definition.FileURI.empty();
bool(*L.Definition.FileURI) == bool(*R.Definition.FileURI);
Symbol S = PreferR ? R : L; // The target symbol we're merging into.
const Symbol &O = PreferR ? L : R; // The "other" less-preferred symbol.

View File

@ -239,7 +239,7 @@ void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
SymbolLocation readLocation(Reader &Data, ArrayRef<StringRef> Strings) {
SymbolLocation Loc;
Loc.FileURI = Data.consumeString(Strings);
Loc.FileURI = Data.consumeString(Strings).data();
for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
Endpoint->setLine(Data.consumeVar());
Endpoint->setColumn(Data.consumeVar());
@ -409,8 +409,11 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
if (Data.Refs) {
for (const auto &Sym : *Data.Refs) {
Refs.emplace_back(Sym);
for (auto &Ref : Refs.back().second)
Strings.intern(Ref.Location.FileURI);
for (auto &Ref : Refs.back().second) {
StringRef File = Ref.Location.FileURI;
Strings.intern(File);
Ref.Location.FileURI = File.data();
}
}
}

View File

@ -225,7 +225,7 @@ Optional<SymbolLocation> getTokenLocation(SourceLocation TokLoc,
return None;
FileURIStorage = std::move(*U);
SymbolLocation Result;
Result.FileURI = FileURIStorage;
Result.FileURI = FileURIStorage.c_str();
auto Range = getTokenRange(TokLoc, SM, LangOpts);
Result.Start = Range.first;
Result.End = Range.second;
@ -524,7 +524,7 @@ void SymbolCollector::finish() {
Ref R;
R.Location.Start = Range.first;
R.Location.End = Range.second;
R.Location.FileURI = *FileURI;
R.Location.FileURI = FileURI->c_str();
R.Kind = toRefKind(LocAndRole.second);
Refs.insert(*ID, R);
}

View File

@ -20,8 +20,10 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
@ -128,9 +130,26 @@ struct NormalizedPosition {
YPosition P;
};
struct NormalizedFileURI {
NormalizedFileURI(IO &) {}
NormalizedFileURI(IO &, const char *FileURI) { URI = FileURI; }
const char *denormalize(IO &IO) {
assert(IO.getContext() &&
"Expecting an UniqueStringSaver to allocate data");
return static_cast<llvm::UniqueStringSaver *>(IO.getContext())
->save(URI)
.data();
}
std::string URI;
};
template <> struct MappingTraits<SymbolLocation> {
static void mapping(IO &IO, SymbolLocation &Value) {
IO.mapRequired("FileURI", Value.FileURI);
MappingNormalization<NormalizedFileURI, const char *> NFile(IO,
Value.FileURI);
IO.mapRequired("FileURI", NFile->URI);
MappingNormalization<NormalizedPosition, SymbolLocation::Position> NStart(
IO, Value.Start);
IO.mapRequired("Start", NStart->P);
@ -292,7 +311,9 @@ void writeYAML(const IndexFileOut &O, raw_ostream &OS) {
Expected<IndexFileIn> readYAML(StringRef Data) {
SymbolSlab::Builder Symbols;
RefSlab::Builder Refs;
yaml::Input Yin(Data);
BumpPtrAllocator Arena; // store the underlying data of Position::FileURI.
UniqueStringSaver Strings(Arena);
yaml::Input Yin(Data, &Strings);
do {
VariantEntry Variant;
Yin >> Variant;

View File

@ -52,7 +52,7 @@ std::vector<Token> generateSearchTokens(const Symbol &Sym) {
std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
Result.emplace_back(Token::Kind::Scope, Sym.Scope);
// Skip token generation for symbols with unknown declaration location.
if (!Sym.CanonicalDeclaration.FileURI.empty())
if (!StringRef(Sym.CanonicalDeclaration.FileURI).empty())
for (const auto &ProximityURI :
generateProximityURIs(Sym.CanonicalDeclaration.FileURI))
Result.emplace_back(Token::Kind::ProximityURI, ProximityURI);

View File

@ -9,14 +9,16 @@ using testing::AllOf;
using testing::Not;
using testing::UnorderedElementsAre;
using namespace llvm;
namespace clang {
namespace clangd {
MATCHER_P(Named, N, "") { return arg.Name == N; }
MATCHER(Declared, "") { return !arg.CanonicalDeclaration.FileURI.empty(); }
MATCHER(Defined, "") { return !arg.Definition.FileURI.empty(); }
MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; }
MATCHER(Declared, "") {
return !StringRef(arg.CanonicalDeclaration.FileURI).empty();
}
MATCHER(Defined, "") { return !StringRef(arg.Definition.FileURI).empty(); }
MATCHER_P(FileURI, F, "") { return StringRef(arg.Location.FileURI) == F; }
testing::Matcher<const RefSlab &>
RefsAre(std::vector<testing::Matcher<Ref>> Matchers) {
return ElementsAre(testing::Pair(_, UnorderedElementsAreArray(Matchers)));

View File

@ -583,7 +583,7 @@ TEST(CompletionTest, IncludeInsertionPreprocessorIntegrationTests) {
ClangdServer Server(CDB, FS, DiagConsumer, ClangdServer::optsForTest());
auto BarURI = URI::createFile(BarHeader).toString();
Symbol Sym = cls("ns::X");
Sym.CanonicalDeclaration.FileURI = BarURI;
Sym.CanonicalDeclaration.FileURI = BarURI.c_str();
Sym.IncludeHeaders.emplace_back(BarURI, 1);
// Shoten include path based on search dirctory and insert.
auto Results = completions(Server,
@ -614,8 +614,8 @@ TEST(CompletionTest, NoIncludeInsertionWhenDeclFoundInFile) {
Symbol SymY = cls("ns::Y");
std::string BarHeader = testPath("bar.h");
auto BarURI = URI::createFile(BarHeader).toString();
SymX.CanonicalDeclaration.FileURI = BarURI;
SymY.CanonicalDeclaration.FileURI = BarURI;
SymX.CanonicalDeclaration.FileURI = BarURI.c_str();
SymY.CanonicalDeclaration.FileURI = BarURI.c_str();
SymX.IncludeHeaders.emplace_back("<bar>", 1);
SymY.IncludeHeaders.emplace_back("<bar>", 1);
// Shoten include path based on search dirctory and insert.
@ -1252,7 +1252,7 @@ TEST(CompletionTest, OverloadBundling) {
// Differences in header-to-insert suppress bundling.
std::string DeclFile = URI::createFile(testPath("foo")).toString();
NoArgsGFunc.CanonicalDeclaration.FileURI = DeclFile;
NoArgsGFunc.CanonicalDeclaration.FileURI = DeclFile.c_str();
NoArgsGFunc.IncludeHeaders.emplace_back("<foo>", 1);
EXPECT_THAT(
completions(Context + "int y = GFunc^", {NoArgsGFunc}, Opts).Completions,
@ -1959,7 +1959,7 @@ TEST(CompletionTest, EnableSpeculativeIndexRequest) {
TEST(CompletionTest, InsertTheMostPopularHeader) {
std::string DeclFile = URI::createFile(testPath("foo")).toString();
Symbol sym = func("Func");
sym.CanonicalDeclaration.FileURI = DeclFile;
sym.CanonicalDeclaration.FileURI = DeclFile.c_str();
sym.IncludeHeaders.emplace_back("\"foo.h\"", 2);
sym.IncludeHeaders.emplace_back("\"bar.h\"", 1000);
@ -1981,7 +1981,7 @@ TEST(CompletionTest, NoInsertIncludeIfOnePresent) {
std::string DeclFile = URI::createFile(testPath("foo")).toString();
Symbol sym = func("Func");
sym.CanonicalDeclaration.FileURI = DeclFile;
sym.CanonicalDeclaration.FileURI = DeclFile.c_str();
sym.IncludeHeaders.emplace_back("\"foo.h\"", 2);
sym.IncludeHeaders.emplace_back("\"bar.h\"", 1000);

View File

@ -666,7 +666,7 @@ TEST(DexTest, ProximityPathsBoosting) {
TEST(DexTests, Refs) {
DenseMap<SymbolID, std::vector<Ref>> Refs;
auto AddRef = [&](const Symbol& Sym, StringRef Filename, RefKind Kind) {
auto AddRef = [&](const Symbol &Sym, const char *Filename, RefKind Kind) {
auto& SymbolRefs = Refs[Sym.ID];
SymbolRefs.emplace_back();
SymbolRefs.back().Kind = Kind;

View File

@ -31,6 +31,7 @@ using testing::ElementsAre;
using testing::IsEmpty;
using testing::Pair;
using testing::UnorderedElementsAre;
using namespace llvm;
MATCHER_P(RefRange, Range, "") {
return std::make_tuple(arg.Location.Start.line(), arg.Location.Start.column(),
@ -38,12 +39,13 @@ MATCHER_P(RefRange, Range, "") {
std::make_tuple(Range.start.line, Range.start.character,
Range.end.line, Range.end.character);
}
MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; }
MATCHER_P(DeclURI, U, "") { return arg.CanonicalDeclaration.FileURI == U; }
MATCHER_P(DefURI, U, "") { return arg.Definition.FileURI == U; }
MATCHER_P(FileURI, F, "") { return StringRef(arg.Location.FileURI) == F; }
MATCHER_P(DeclURI, U, "") {
return StringRef(arg.CanonicalDeclaration.FileURI) == U;
}
MATCHER_P(DefURI, U, "") { return StringRef(arg.Definition.FileURI) == U; }
MATCHER_P(QName, N, "") { return (arg.Scope + arg.Name).str() == N; }
using namespace llvm;
namespace clang {
namespace clangd {
namespace {
@ -66,7 +68,7 @@ std::unique_ptr<SymbolSlab> numSlab(int Begin, int End) {
return llvm::make_unique<SymbolSlab>(std::move(Slab).build());
}
std::unique_ptr<RefSlab> refSlab(const SymbolID &ID, StringRef Path) {
std::unique_ptr<RefSlab> refSlab(const SymbolID &ID, const char *Path) {
RefSlab::Builder Slab;
Ref R;
R.Location.FileURI = Path;

View File

@ -36,7 +36,7 @@ MATCHER_P(RefRange, Range, "") {
std::make_tuple(Range.start.line, Range.start.character,
Range.end.line, Range.end.character);
}
MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; }
MATCHER_P(FileURI, F, "") { return StringRef(arg.Location.FileURI) == F; }
TEST(SymbolLocation, Position) {
using Position = SymbolLocation::Position;
@ -222,7 +222,7 @@ TEST(MergeTest, Merge) {
Symbol M = mergeSymbol(L, R);
EXPECT_EQ(M.Name, "Foo");
EXPECT_EQ(M.CanonicalDeclaration.FileURI, "file:///left.h");
EXPECT_EQ(StringRef(M.CanonicalDeclaration.FileURI), "file:///left.h");
EXPECT_EQ(M.References, 3u);
EXPECT_EQ(M.Signature, "()");
EXPECT_EQ(M.CompletionSnippetSuffix, "{$1:0}");
@ -241,14 +241,14 @@ TEST(MergeTest, PreferSymbolWithDefn) {
R.Name = "right";
Symbol M = mergeSymbol(L, R);
EXPECT_EQ(M.CanonicalDeclaration.FileURI, "file:/left.h");
EXPECT_EQ(M.Definition.FileURI, "");
EXPECT_EQ(StringRef(M.CanonicalDeclaration.FileURI), "file:/left.h");
EXPECT_EQ(StringRef(M.Definition.FileURI), "");
EXPECT_EQ(M.Name, "left");
R.Definition.FileURI = "file:/right.cpp"; // Now right will be favored.
M = mergeSymbol(L, R);
EXPECT_EQ(M.CanonicalDeclaration.FileURI, "file:/right.h");
EXPECT_EQ(M.Definition.FileURI, "file:/right.cpp");
EXPECT_EQ(StringRef(M.CanonicalDeclaration.FileURI), "file:/right.h");
EXPECT_EQ(StringRef(M.Definition.FileURI), "file:/right.cpp");
EXPECT_EQ(M.Name, "right");
}

View File

@ -111,7 +111,7 @@ TEST(SerializationTest, YAMLConversions) {
EXPECT_EQ(Sym1.Signature, "");
EXPECT_EQ(Sym1.Documentation, "Foo doc");
EXPECT_EQ(Sym1.ReturnType, "int");
EXPECT_EQ(Sym1.CanonicalDeclaration.FileURI, "file:///path/foo.h");
EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h");
EXPECT_EQ(Sym1.Origin, SymbolOrigin::Static);
EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion);
EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated);
@ -122,7 +122,8 @@ TEST(SerializationTest, YAMLConversions) {
EXPECT_THAT(Sym2, QName("clang::Foo2"));
EXPECT_EQ(Sym2.Signature, "-sig");
EXPECT_EQ(Sym2.ReturnType, "");
EXPECT_EQ(Sym2.CanonicalDeclaration.FileURI, "file:///path/bar.h");
EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI),
"file:///path/bar.h");
EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
@ -134,7 +135,7 @@ TEST(SerializationTest, YAMLConversions) {
testing::SizeIs(1))));
auto Ref1 = ParsedYAML->Refs->begin()->second.front();
EXPECT_EQ(Ref1.Kind, RefKind::Reference);
EXPECT_EQ(Ref1.Location.FileURI, "file:///path/foo.cc");
EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc");
}
std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {

View File

@ -53,8 +53,10 @@ MATCHER_P(Snippet, S, "") {
return (arg.Name + arg.CompletionSnippetSuffix).str() == S;
}
MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
MATCHER_P(DeclURI, P, "") { return arg.CanonicalDeclaration.FileURI == P; }
MATCHER_P(DefURI, P, "") { return arg.Definition.FileURI == P; }
MATCHER_P(DeclURI, P, "") {
return StringRef(arg.CanonicalDeclaration.FileURI) == P;
}
MATCHER_P(DefURI, P, "") { return StringRef(arg.Definition.FileURI) == P; }
MATCHER_P(IncludeHeader, P, "") {
return (arg.IncludeHeaders.size() == 1) &&
(arg.IncludeHeaders.begin()->IncludeHeader == P);