[clangd] Incorporate transitive #includes into code complete proximity scoring.

Summary:
We now compute a distance from the main file to the symbol header, which
is a weighted count of:
 - some number of #include traversals from source file --> included file
 - some number of FS traversals from file --> parent directory
 - some number of FS traversals from parent directory --> child file/dir
This calculation is performed in the appropriate URI scheme.

This means we'll get some proximity boost from header files in main-file
contexts, even when these are in different directory trees.

This extended file proximity model is not yet incorporated in the index
interface/implementation.

Reviewers: ioeric

Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, cfe-commits

Differential Revision: https://reviews.llvm.org/D48441

llvm-svn: 336177
This commit is contained in:
Sam McCall 2018-07-03 08:09:29 +00:00
parent a0a52bf195
commit 3f0243fdaf
17 changed files with 653 additions and 259 deletions

View File

@ -19,6 +19,7 @@ add_clang_library(clangDaemon
Diagnostics.cpp
DraftStore.cpp
FindSymbols.cpp
FileDistance.cpp
FuzzyMatch.cpp
GlobalCompilationDatabase.cpp
Headers.cpp

View File

@ -167,7 +167,7 @@ void ClangdServer::codeComplete(PathRef File, Position Pos,
// both the old and the new version in case only one of them matches.
CodeCompleteResult Result = clangd::codeComplete(
File, IP->Command, PreambleData ? &PreambleData->Preamble : nullptr,
PreambleData ? PreambleData->Inclusions : std::vector<Inclusion>(),
PreambleData ? PreambleData->Includes : IncludeStructure(),
IP->Contents, Pos, FS, PCHs, CodeCompleteOpts);
CB(std::move(Result));
};

View File

@ -88,7 +88,7 @@ public:
CppFilePreambleCallbacks(PathRef File, PreambleParsedCallback ParsedCallback)
: File(File), ParsedCallback(ParsedCallback) {}
std::vector<Inclusion> takeInclusions() { return std::move(Inclusions); }
IncludeStructure takeIncludes() { return std::move(Includes); }
void AfterExecute(CompilerInstance &CI) override {
if (!ParsedCallback)
@ -103,15 +103,13 @@ public:
std::unique_ptr<PPCallbacks> createPPCallbacks() override {
assert(SourceMgr && "SourceMgr must be set at this point");
return collectInclusionsInMainFileCallback(
*SourceMgr,
[this](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); });
return collectIncludeStructureCallback(*SourceMgr, &Includes);
}
private:
PathRef File;
PreambleParsedCallback ParsedCallback;
std::vector<Inclusion> Inclusions;
IncludeStructure Includes;
SourceManager *SourceMgr = nullptr;
};
@ -153,15 +151,11 @@ ParsedAST::Build(std::unique_ptr<clang::CompilerInvocation> CI,
return llvm::None;
}
std::vector<Inclusion> Inclusions;
// Copy over the includes from the preamble, then combine with the
// non-preamble includes below.
if (Preamble)
Inclusions = Preamble->Inclusions;
Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback(
Clang->getSourceManager(),
[&Inclusions](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); }));
auto Includes = Preamble ? Preamble->Includes : IncludeStructure{};
Clang->getPreprocessor().addPPCallbacks(
collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
if (!Action->Execute())
log("Execute() failed when building AST for " + MainInput.getFile());
@ -179,7 +173,7 @@ ParsedAST::Build(std::unique_ptr<clang::CompilerInvocation> CI,
Diags.insert(Diags.begin(), Preamble->Diags.begin(), Preamble->Diags.end());
return ParsedAST(std::move(Preamble), std::move(Clang), std::move(Action),
std::move(ParsedDecls), std::move(Diags),
std::move(Inclusions));
std::move(Includes));
}
ParsedAST::ParsedAST(ParsedAST &&Other) = default;
@ -246,25 +240,24 @@ std::size_t ParsedAST::getUsedBytes() const {
return Total;
}
const std::vector<Inclusion> &ParsedAST::getInclusions() const {
return Inclusions;
const IncludeStructure &ParsedAST::getIncludeStructure() const {
return Includes;
}
PreambleData::PreambleData(PrecompiledPreamble Preamble,
std::vector<Diag> Diags,
std::vector<Inclusion> Inclusions)
std::vector<Diag> Diags, IncludeStructure Includes)
: Preamble(std::move(Preamble)), Diags(std::move(Diags)),
Inclusions(std::move(Inclusions)) {}
Includes(std::move(Includes)) {}
ParsedAST::ParsedAST(std::shared_ptr<const PreambleData> Preamble,
std::unique_ptr<CompilerInstance> Clang,
std::unique_ptr<FrontendAction> Action,
std::vector<Decl *> LocalTopLevelDecls,
std::vector<Diag> Diags, std::vector<Inclusion> Inclusions)
std::vector<Diag> Diags, IncludeStructure Includes)
: Preamble(std::move(Preamble)), Clang(std::move(Clang)),
Action(std::move(Action)), Diags(std::move(Diags)),
LocalTopLevelDecls(std::move(LocalTopLevelDecls)),
Inclusions(std::move(Inclusions)) {
Includes(std::move(Includes)) {
assert(this->Clang);
assert(this->Action);
}
@ -350,7 +343,7 @@ std::shared_ptr<const PreambleData> clangd::buildPreamble(
" for file " + Twine(FileName));
return std::make_shared<PreambleData>(
std::move(*BuiltPreamble), PreambleDiagnostics.take(),
SerializedDeclsCollector.takeInclusions());
SerializedDeclsCollector.takeIncludes());
} else {
log("Could not build a preamble for file " + Twine(FileName));
return nullptr;

View File

@ -45,14 +45,14 @@ namespace clangd {
// Stores Preamble and associated data.
struct PreambleData {
PreambleData(PrecompiledPreamble Preamble, std::vector<Diag> Diags,
std::vector<Inclusion> Inclusions);
IncludeStructure Includes);
tooling::CompileCommand CompileCommand;
PrecompiledPreamble Preamble;
std::vector<Diag> Diags;
// Processes like code completions and go-to-definitions will need #include
// information, and their compile action skips preamble range.
std::vector<Inclusion> Inclusions;
IncludeStructure Includes;
};
/// Information required to run clang, e.g. to parse AST or do code completion.
@ -99,14 +99,14 @@ public:
/// Returns the esitmated size of the AST and the accessory structures, in
/// bytes. Does not include the size of the preamble.
std::size_t getUsedBytes() const;
const std::vector<Inclusion> &getInclusions() const;
const IncludeStructure &getIncludeStructure() const;
private:
ParsedAST(std::shared_ptr<const PreambleData> Preamble,
std::unique_ptr<CompilerInstance> Clang,
std::unique_ptr<FrontendAction> Action,
std::vector<Decl *> LocalTopLevelDecls, std::vector<Diag> Diags,
std::vector<Inclusion> Inclusions);
IncludeStructure Includes);
// In-memory preambles must outlive the AST, it is important that this member
// goes before Clang and Action.
@ -124,7 +124,7 @@ private:
// Top-level decls inside the current file. Not that this does not include
// top-level decls from the preamble.
std::vector<Decl *> LocalTopLevelDecls;
std::vector<Inclusion> Inclusions;
IncludeStructure Includes;
};
using PreambleParsedCallback = std::function<void(

View File

@ -22,6 +22,7 @@
#include "AST.h"
#include "CodeCompletionStrings.h"
#include "Compiler.h"
#include "FileDistance.h"
#include "FuzzyMatch.h"
#include "Headers.h"
#include "Logger.h"
@ -763,7 +764,6 @@ struct SemaCompleteInput {
PathRef FileName;
const tooling::CompileCommand &Command;
PrecompiledPreamble const *Preamble;
const std::vector<Inclusion> &PreambleInclusions;
StringRef Contents;
Position Pos;
IntrusiveRefCntPtr<vfs::FileSystem> VFS;
@ -771,12 +771,11 @@ struct SemaCompleteInput {
};
// Invokes Sema code completion on a file.
// If \p Includes is set, it will be initialized after a compiler instance has
// been set up.
// If \p Includes is set, it will be updated based on the compiler invocation.
bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
const clang::CodeCompleteOptions &Options,
const SemaCompleteInput &Input,
std::unique_ptr<IncludeInserter> *Includes = nullptr) {
IncludeStructure *Includes = nullptr) {
trace::Span Tracer("Sema completion");
std::vector<const char *> ArgStrs;
for (const auto &S : Input.Command.CommandLine)
@ -837,29 +836,9 @@ bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
Input.FileName);
return false;
}
if (Includes) {
// Initialize Includes if provided.
// FIXME(ioeric): needs more consistent style support in clangd server.
auto Style = format::getStyle(format::DefaultFormatStyle, Input.FileName,
format::DefaultFallbackStyle, Input.Contents,
Input.VFS.get());
if (!Style) {
log("ERROR: failed to get FormatStyle for file " + Input.FileName +
". Fall back to use LLVM style. Error: " +
llvm::toString(Style.takeError()));
Style = format::getLLVMStyle();
}
*Includes = llvm::make_unique<IncludeInserter>(
Input.FileName, Input.Contents, *Style, Input.Command.Directory,
Clang->getPreprocessor().getHeaderSearchInfo());
for (const auto &Inc : Input.PreambleInclusions)
Includes->get()->addExisting(Inc);
Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback(
Clang->getSourceManager(), [Includes](Inclusion Inc) {
Includes->get()->addExisting(std::move(Inc));
}));
}
if (Includes)
Clang->getPreprocessor().addPPCallbacks(
collectIncludeStructureCallback(Clang->getSourceManager(), Includes));
if (!Action.Execute()) {
log("Execute() failed when running codeComplete for " + Input.FileName);
return false;
@ -949,24 +928,23 @@ clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const {
// - TopN determines the results with the best score.
class CodeCompleteFlow {
PathRef FileName;
IncludeStructure Includes; // Complete once the compiler runs.
const CodeCompleteOptions &Opts;
// Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
CompletionRecorder *Recorder = nullptr;
int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging.
bool Incomplete = false; // Would more be available with a higher limit?
llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
std::unique_ptr<IncludeInserter> Includes; // Initialized once compiler runs.
FileProximityMatcher FileProximityMatch;
// Include-insertion and proximity scoring rely on the include structure.
// This is available after Sema has run.
llvm::Optional<IncludeInserter> Inserter; // Available during runWithSema.
llvm::Optional<URIDistance> FileProximity; // Initialized once Sema runs.
public:
// A CodeCompleteFlow object is only useful for calling run() exactly once.
CodeCompleteFlow(PathRef FileName, const CodeCompleteOptions &Opts)
: FileName(FileName), Opts(Opts),
// FIXME: also use path of the main header corresponding to FileName to
// calculate the file proximity, which would capture include/ and src/
// project setup where headers and implementations are not in the same
// directory.
FileProximityMatch(ArrayRef<StringRef>({FileName})) {}
CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes,
const CodeCompleteOptions &Opts)
: FileName(FileName), Includes(Includes), Opts(Opts) {}
CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && {
trace::Span Tracer("CodeCompleteFlow");
@ -977,11 +955,45 @@ public:
CodeCompleteResult Output;
auto RecorderOwner = llvm::make_unique<CompletionRecorder>(Opts, [&]() {
assert(Recorder && "Recorder is not set");
assert(Includes && "Includes is not set");
// FIXME(ioeric): needs more consistent style support in clangd server.
auto Style =
format::getStyle("file", SemaCCInput.FileName, "LLVM",
SemaCCInput.Contents, SemaCCInput.VFS.get());
if (!Style) {
log("Failed to get FormatStyle for file" + SemaCCInput.FileName + ": " +
llvm::toString(Style.takeError()) + ". Fallback is LLVM style.");
Style = format::getLLVMStyle();
}
// If preprocessor was run, inclusions from preprocessor callback should
// already be added to Inclusions.
// already be added to Includes.
Inserter.emplace(
SemaCCInput.FileName, SemaCCInput.Contents, *Style,
SemaCCInput.Command.Directory,
Recorder->CCSema->getPreprocessor().getHeaderSearchInfo());
for (const auto &Inc : Includes.MainFileIncludes)
Inserter->addExisting(Inc);
// Most of the cost of file proximity is in initializing the FileDistance
// structures based on the observed includes, once per query. Conceptually
// that happens here (though the per-URI-scheme initialization is lazy).
// The per-result proximity scoring is (amortized) very cheap.
FileDistanceOptions ProxOpts{}; // Use defaults.
const auto &SM = Recorder->CCSema->getSourceManager();
llvm::StringMap<SourceParams> ProxSources;
for (auto &Entry : Includes.includeDepth(
SM.getFileEntryForID(SM.getMainFileID())->getName())) {
auto &Source = ProxSources[Entry.getKey()];
Source.Cost = Entry.getValue() * ProxOpts.IncludeCost;
// Symbols near our transitive includes are good, but only consider
// things in the same directory or below it. Otherwise there can be
// many false positives.
if (Entry.getValue() > 0)
Source.MaxUpTraversals = 1;
}
FileProximity.emplace(ProxSources, ProxOpts);
Output = runWithSema();
Includes.reset(); // Make sure this doesn't out-live Clang.
Inserter.reset(); // Make sure this doesn't out-live Clang.
SPAN_ATTACH(Tracer, "sema_completion_kind",
getCompletionKindString(Recorder->CCContext.getKind()));
});
@ -1044,6 +1056,7 @@ private:
Req.RestrictForCodeCompletion = true;
Req.Scopes = getQueryScopes(Recorder->CCContext,
Recorder->CCSema->getSourceManager());
// FIXME: we should send multiple weighted paths here.
Req.ProximityPaths.push_back(FileName);
log(llvm::formatv("Code complete: fuzzyFind(\"{0}\", scopes=[{1}])",
Req.Query,
@ -1124,7 +1137,7 @@ private:
SymbolQualitySignals Quality;
SymbolRelevanceSignals Relevance;
Relevance.Query = SymbolRelevanceSignals::CodeComplete;
Relevance.FileProximityMatch = &FileProximityMatch;
Relevance.FileProximityMatch = FileProximity.getPointer();
auto &First = Bundle.front();
if (auto FuzzyScore = fuzzyScore(First))
Relevance.NameMatch = *FuzzyScore;
@ -1174,7 +1187,7 @@ private:
: nullptr;
if (!Builder)
Builder.emplace(Recorder->CCSema->getASTContext(), Item, SemaCCS,
*Includes, FileName, Opts);
*Inserter, FileName, Opts);
else
Builder->add(Item, SemaCCS);
}
@ -1182,15 +1195,16 @@ private:
}
};
CodeCompleteResult codeComplete(
PathRef FileName, const tooling::CompileCommand &Command,
PrecompiledPreamble const *Preamble,
const std::vector<Inclusion> &PreambleInclusions, StringRef Contents,
Position Pos, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
std::shared_ptr<PCHContainerOperations> PCHs, CodeCompleteOptions Opts) {
return CodeCompleteFlow(FileName, Opts)
.run({FileName, Command, Preamble, PreambleInclusions, Contents, Pos, VFS,
PCHs});
CodeCompleteResult codeComplete(PathRef FileName,
const tooling::CompileCommand &Command,
PrecompiledPreamble const *Preamble,
const IncludeStructure &PreambleInclusions,
StringRef Contents, Position Pos,
IntrusiveRefCntPtr<vfs::FileSystem> VFS,
std::shared_ptr<PCHContainerOperations> PCHs,
CodeCompleteOptions Opts) {
return CodeCompleteFlow(FileName, PreambleInclusions, Opts)
.run({FileName, Command, Preamble, Contents, Pos, VFS, PCHs});
}
SignatureHelp signatureHelp(PathRef FileName,
@ -1205,11 +1219,11 @@ SignatureHelp signatureHelp(PathRef FileName,
Options.IncludeMacros = false;
Options.IncludeCodePatterns = false;
Options.IncludeBriefComments = false;
std::vector<Inclusion> PreambleInclusions = {}; // Unused for signatureHelp
IncludeStructure PreambleInclusions; // Unused for signatureHelp
semaCodeComplete(llvm::make_unique<SignatureHelpCollector>(Options, Result),
Options,
{FileName, Command, Preamble, PreambleInclusions, Contents,
Pos, std::move(VFS), std::move(PCHs)});
{FileName, Command, Preamble, Contents, Pos, std::move(VFS),
std::move(PCHs)});
return Result;
}

View File

@ -144,12 +144,14 @@ struct CodeCompleteResult {
raw_ostream &operator<<(raw_ostream &, const CodeCompleteResult &);
/// Get code completions at a specified \p Pos in \p FileName.
CodeCompleteResult codeComplete(
PathRef FileName, const tooling::CompileCommand &Command,
PrecompiledPreamble const *Preamble,
const std::vector<Inclusion> &PreambleInclusions, StringRef Contents,
Position Pos, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
std::shared_ptr<PCHContainerOperations> PCHs, CodeCompleteOptions Opts);
CodeCompleteResult codeComplete(PathRef FileName,
const tooling::CompileCommand &Command,
PrecompiledPreamble const *Preamble,
const IncludeStructure &PreambleInclusions,
StringRef Contents, Position Pos,
IntrusiveRefCntPtr<vfs::FileSystem> VFS,
std::shared_ptr<PCHContainerOperations> PCHs,
CodeCompleteOptions Opts);
/// Get signature help at a specified \p Pos in \p FileName.
SignatureHelp signatureHelp(PathRef FileName,

View File

@ -0,0 +1,173 @@
//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The FileDistance structure allows calculating the minimum distance to paths
// in a single tree.
// We simply walk up the path's ancestors until we find a node whose cost is
// known, and add the cost of walking back down. Initialization ensures this
// gives the correct path to the roots.
// We cache the results, so that the runtime is O(|A|), where A is the set of
// all distinct ancestors of visited paths.
//
// Example after initialization with /=2, /bar=0, DownCost = 1:
// / = 2
// /bar = 0
//
// After querying /foo/bar and /bar/foo:
// / = 2
// /bar = 0
// /bar/foo = 1
// /foo = 3
// /foo/bar = 4
//
// URIDistance creates FileDistance lazily for each URI scheme encountered. In
// practice this is a small constant factor.
//
//===-------------------------------------------------------------------------//
#include "FileDistance.h"
#include "Logger.h"
#include "llvm/ADT/STLExtras.h"
#include <queue>
#define DEBUG_TYPE "FileDistance"
namespace clang {
namespace clangd {
using namespace llvm;
// Convert a path into the canonical form.
// Canonical form is either "/", or "/segment" * N:
// C:\foo\bar --> /c:/foo/bar
// /foo/ --> /foo
// a/b/c --> /a/b/c
static SmallString<128> canonicalize(StringRef Path) {
SmallString<128> Result = Path.rtrim('/');
native(Result, sys::path::Style::posix);
if (Result.empty() || Result.front() != '/')
Result.insert(Result.begin(), '/');
return Result;
}
const unsigned FileDistance::kUnreachable;
FileDistance::FileDistance(StringMap<SourceParams> Sources,
const FileDistanceOptions &Opts)
: Opts(Opts) {
llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges;
// Compute the best distance following only up edges.
// Keep track of down edges, in case we can use them to improve on this.
for (const auto &S : Sources) {
auto Canonical = canonicalize(S.getKey());
LLVM_DEBUG(dbgs() << "Source " << Canonical << " = " << S.second.Cost
<< ", MaxUp=" << S.second.MaxUpTraversals << "\n");
// Walk up to ancestors of this source, assigning cost.
StringRef Rest = Canonical;
llvm::hash_code Hash = hash_value(Rest);
for (unsigned I = 0; !Rest.empty(); ++I) {
Rest = parent_path(Rest, sys::path::Style::posix);
auto NextHash = hash_value(Rest);
DownEdges[NextHash].push_back(Hash);
// We can't just break after MaxUpTraversals, must still set DownEdges.
if (I > S.getValue().MaxUpTraversals) {
if (Cache.find(Hash) != Cache.end())
break;
} else {
unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
auto R = Cache.try_emplace(Hash, Cost);
if (!R.second) {
if (Cost < R.first->second) {
R.first->second = Cost;
} else {
// If we're not the best way to get to this path, stop assigning.
break;
}
}
}
Hash = NextHash;
}
}
// Now propagate scores parent -> child if that's an improvement.
// BFS ensures we propagate down chains (must visit parents before children).
std::queue<hash_code> Next;
for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef(""))))
Next.push(Child);
while (!Next.empty()) {
auto ParentCost = Cache.lookup(Next.front());
for (auto Child : DownEdges.lookup(Next.front())) {
auto &ChildCost =
Cache.try_emplace(Child, kUnreachable).first->getSecond();
if (ParentCost + Opts.DownCost < ChildCost)
ChildCost = ParentCost + Opts.DownCost;
Next.push(Child);
}
Next.pop();
}
}
unsigned FileDistance::distance(StringRef Path) {
auto Canonical = canonicalize(Path);
unsigned Cost = kUnreachable;
SmallVector<hash_code, 16> Ancestors;
// Walk up ancestors until we find a path we know the distance for.
for (StringRef Rest = Canonical; !Rest.empty();
Rest = parent_path(Rest, sys::path::Style::posix)) {
auto Hash = hash_value(Rest);
auto It = Cache.find(Hash);
if (It != Cache.end()) {
Cost = It->second;
break;
}
Ancestors.push_back(Hash);
}
// Now we know the costs for (known node, queried node].
// Fill these in, walking down the directory tree.
for (hash_code Hash : reverse(Ancestors)) {
if (Cost != kUnreachable)
Cost += Opts.DownCost;
Cache.try_emplace(Hash, Cost);
}
LLVM_DEBUG(dbgs() << "distance(" << Path << ") = " << Cost << "\n");
return Cost;
}
unsigned URIDistance::distance(llvm::StringRef URI) {
auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable);
if (!R.second)
return R.first->getSecond();
if (auto U = clangd::URI::parse(URI)) {
LLVM_DEBUG(dbgs() << "distance(" << URI << ") = distance(" << U->body()
<< ")\n");
R.first->second = forScheme(U->scheme()).distance(U->body());
} else {
log("URIDistance::distance() of unparseable " + URI + ": " +
llvm::toString(U.takeError()));
}
return R.first->second;
}
FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
auto &Delegate = ByScheme[Scheme];
if (!Delegate) {
llvm::StringMap<SourceParams> SchemeSources;
for (const auto &Source : Sources) {
if (auto U = clangd::URI::create(Source.getKey(), Scheme))
SchemeSources.try_emplace(U->body(), Source.getValue());
else
consumeError(U.takeError());
}
LLVM_DEBUG(dbgs() << "FileDistance for scheme " << Scheme << ": "
<< SchemeSources.size() << "/" << Sources.size()
<< " sources\n");
Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
}
return *Delegate;
}
} // namespace clangd
} // namespace clang

View File

@ -0,0 +1,109 @@
//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This library measures the distance between file paths.
// It's used for ranking symbols, e.g. in code completion.
// |foo/bar.h -> foo/bar.h| = 0.
// |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
// This is an edit-distance, where edits go up or down the directory tree.
// It's not symmetrical, the costs of going up and down may not match.
//
// Dealing with multiple sources:
// In practice we care about the distance from a source file, but files near
// its main-header and #included files are considered "close".
// So we start with a set of (anchor, cost) pairs, and call the distance to a
// path the minimum of `cost + |source -> path|`.
//
// We allow each source to limit the number of up-traversals paths may start
// with. Up-traversals may reach things that are not "semantically near".
//
// Symbol URI schemes:
// Symbol locations may be represented by URIs rather than file paths directly.
// In this case we want to perform distance computations in URI space rather
// than in file-space, without performing redundant conversions.
// Therefore we have a lookup structure that accepts URIs, so that intermediate
// calculations for the same scheme can be reused.
//
// Caveats:
// Assuming up and down traversals each have uniform costs is simplistic.
// Often there are "semantic roots" whose children are almost unrelated.
// (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
//
//===----------------------------------------------------------------------===//
#include "URI.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
namespace clang {
namespace clangd {
struct FileDistanceOptions {
unsigned UpCost = 2; // |foo/bar.h -> foo|
unsigned DownCost = 1; // |foo -> foo/bar.h|
unsigned IncludeCost = 2; // |foo.cc -> included_header.h|
};
struct SourceParams {
// Base cost for paths starting at this source.
unsigned Cost = 0;
// Limits the number of upwards traversals allowed from this source.
unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
};
// Supports lookups to find the minimum distance to a file from any source.
// This object should be reused, it memoizes intermediate computations.
class FileDistance {
public:
static constexpr unsigned kUnreachable = std::numeric_limits<unsigned>::max();
FileDistance(llvm::StringMap<SourceParams> Sources,
const FileDistanceOptions &Opts = {});
// Computes the minimum distance from any source to the file path.
unsigned distance(llvm::StringRef Path);
private:
// Costs computed so far. Always contains sources and their ancestors.
// We store hash codes only. Collisions are rare and consequences aren't dire.
llvm::DenseMap<llvm::hash_code, unsigned> Cache;
FileDistanceOptions Opts;
};
// Supports lookups like FileDistance, but the lookup keys are URIs.
// We convert each of the sources to the scheme of the URI and do a FileDistance
// comparison on the bodies.
class URIDistance {
public:
URIDistance(llvm::StringMap<SourceParams> Sources,
const FileDistanceOptions &Opts = {})
: Sources(Sources), Opts(Opts) {}
// Computes the minimum distance from any source to the URI.
// Only sources that can be mapped into the URI's scheme are considered.
unsigned distance(llvm::StringRef URI);
private:
// Returns the FileDistance for a URI scheme, creating it if needed.
FileDistance &forScheme(llvm::StringRef Scheme);
// We cache the results using the original strings so we can skip URI parsing.
llvm::DenseMap<llvm::hash_code, unsigned> Cache;
llvm::StringMap<SourceParams> Sources;
llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
FileDistanceOptions Opts;
};
} // namespace clangd
} // namespace clang

View File

@ -23,9 +23,8 @@ namespace {
class RecordHeaders : public PPCallbacks {
public:
RecordHeaders(const SourceManager &SM,
std::function<void(Inclusion)> Callback)
: SM(SM), Callback(std::move(Callback)) {}
RecordHeaders(const SourceManager &SM, IncludeStructure *Out)
: SM(SM), Out(Out) {}
// Record existing #includes - both written and resolved paths. Only #includes
// in the main file are collected.
@ -36,21 +35,28 @@ public:
llvm::StringRef /*RelativePath*/,
const Module * /*Imported*/,
SrcMgr::CharacteristicKind /*FileType*/) override {
// Only inclusion directives in the main file make sense. The user cannot
// select directives not in the main file.
if (HashLoc.isInvalid() || !SM.isInMainFile(HashLoc))
return;
std::string Written =
(IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str();
std::string Resolved = (!File || File->tryGetRealPathName().empty())
? ""
: File->tryGetRealPathName();
Callback({halfOpenToRange(SM, FilenameRange), Written, Resolved});
if (SM.isInMainFile(HashLoc))
Out->MainFileIncludes.push_back({
halfOpenToRange(SM, FilenameRange),
(IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str(),
File ? File->tryGetRealPathName() : "",
});
if (File) {
auto *IncludingFileEntry = SM.getFileEntryForID(SM.getFileID(HashLoc));
if (!IncludingFileEntry) {
assert(SM.getBufferName(HashLoc).startswith("<") &&
"Expected #include location to be a file or <built-in>");
// Treat as if included from the main file.
IncludingFileEntry = SM.getFileEntryForID(SM.getMainFileID());
}
Out->recordInclude(IncludingFileEntry->getName(), File->getName(),
File->tryGetRealPathName());
}
}
private:
const SourceManager &SM;
std::function<void(Inclusion)> Callback;
IncludeStructure *Out;
};
} // namespace
@ -65,9 +71,59 @@ bool HeaderFile::valid() const {
}
std::unique_ptr<PPCallbacks>
collectInclusionsInMainFileCallback(const SourceManager &SM,
std::function<void(Inclusion)> Callback) {
return llvm::make_unique<RecordHeaders>(SM, std::move(Callback));
collectIncludeStructureCallback(const SourceManager &SM,
IncludeStructure *Out) {
return llvm::make_unique<RecordHeaders>(SM, Out);
}
void IncludeStructure::recordInclude(llvm::StringRef IncludingName,
llvm::StringRef IncludedName,
llvm::StringRef IncludedRealName) {
auto Child = fileIndex(IncludedName);
if (!IncludedRealName.empty() && RealPathNames[Child].empty())
RealPathNames[Child] = IncludedRealName;
auto Parent = fileIndex(IncludingName);
IncludeChildren[Parent].push_back(Child);
}
unsigned IncludeStructure::fileIndex(llvm::StringRef Name) {
auto R = NameToIndex.try_emplace(Name, RealPathNames.size());
if (R.second)
RealPathNames.emplace_back();
return R.first->getValue();
}
llvm::StringMap<unsigned>
IncludeStructure::includeDepth(llvm::StringRef Root) const {
// Include depth 0 is the main file only.
llvm::StringMap<unsigned> Result;
Result[Root] = 0;
std::vector<unsigned> CurrentLevel;
llvm::DenseSet<unsigned> Seen;
auto It = NameToIndex.find(Root);
if (It != NameToIndex.end()) {
CurrentLevel.push_back(It->second);
Seen.insert(It->second);
}
// Each round of BFS traversal finds the next depth level.
std::vector<unsigned> PreviousLevel;
for (unsigned Level = 1; !CurrentLevel.empty(); ++Level) {
PreviousLevel.clear();
PreviousLevel.swap(CurrentLevel);
for (const auto &Parent : PreviousLevel) {
for (const auto &Child : IncludeChildren.lookup(Parent)) {
if (Seen.insert(Child).second) {
CurrentLevel.push_back(Child);
const auto &Name = RealPathNames[Child];
// Can't include files if we don't have their real path.
if (!Name.empty())
Result[Name] = Level;
}
}
}
}
return Result;
}
/// FIXME(ioeric): we might not want to insert an absolute include path if the

View File

@ -45,10 +45,47 @@ struct Inclusion {
Path Resolved; // Resolved path of included file. Empty if not resolved.
};
// Information captured about the inclusion graph in a translation unit.
// This includes detailed information about the direct #includes, and summary
// information about all transitive includes.
//
// It should be built incrementally with collectIncludeStructureCallback().
// When we build the preamble, we capture and store its include structure along
// with the preamble data. When we use the preamble, we can copy its
// IncludeStructure and use another collectIncludeStructureCallback() to fill
// in any non-preamble inclusions.
class IncludeStructure {
public:
std::vector<Inclusion> MainFileIncludes;
// Return all transitively reachable files, and their minimum include depth.
// All transitive includes (absolute paths), with their minimum include depth.
// Root --> 0, #included file --> 1, etc.
// Root is clang's name for a file, which may not be absolute.
// Usually it should be SM.getFileEntryForID(SM.getMainFileID())->getName().
llvm::StringMap<unsigned> includeDepth(llvm::StringRef Root) const;
// This updates IncludeDepth(), but not MainFileIncludes.
void recordInclude(llvm::StringRef IncludingName,
llvm::StringRef IncludedName,
llvm::StringRef IncludedRealName);
private:
// Identifying files in a way that persists from preamble build to subsequent
// builds is surprisingly hard. FileID is unavailable in InclusionDirective(),
// and RealPathName and UniqueID are not preseved in the preamble.
// We use the FileEntry::Name, which is stable, interned into a "file index".
// The paths we want to expose are the RealPathName, so store those too.
std::vector<std::string> RealPathNames; // In file index order.
unsigned fileIndex(llvm::StringRef Name);
llvm::StringMap<unsigned> NameToIndex; // Values are file indexes.
// Maps a file's index to that of the files it includes.
llvm::DenseMap<unsigned, SmallVector<unsigned, 8>> IncludeChildren;
};
/// Returns a PPCallback that visits all inclusions in the main file.
std::unique_ptr<PPCallbacks>
collectInclusionsInMainFileCallback(const SourceManager &SM,
std::function<void(Inclusion)> Callback);
collectIncludeStructureCallback(const SourceManager &SM, IncludeStructure *Out);
// Calculates insertion edit for including a new header in a file.
class IncludeInserter {

View File

@ -7,17 +7,18 @@
//
//===---------------------------------------------------------------------===//
#include "Quality.h"
#include <cmath>
#include "FileDistance.h"
#include "URI.h"
#include "index/Index.h"
#include "clang/AST/ASTContext.h"
#include "clang/Basic/CharInfo.h"
#include "clang/AST/DeclVisitor.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Sema/CodeCompleteConsumer.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cmath>
namespace clang {
namespace clangd {
@ -187,60 +188,6 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) {
return OS;
}
/// Calculates a proximity score from \p From and \p To, which are URI strings
/// that have the same scheme. This does not parse URI. A URI (sans "<scheme>:")
/// is split into chunks by '/' and each chunk is considered a file/directory.
/// For example, "uri:///a/b/c" will be treated as /a/b/c
static float uriProximity(StringRef From, StringRef To) {
auto SchemeSplitFrom = From.split(':');
auto SchemeSplitTo = To.split(':');
assert((SchemeSplitFrom.first == SchemeSplitTo.first) &&
"URIs must have the same scheme in order to compute proximity.");
auto Split = [](StringRef URIWithoutScheme) {
SmallVector<StringRef, 8> Split;
URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
return Split;
};
SmallVector<StringRef, 8> Fs = Split(SchemeSplitFrom.second);
SmallVector<StringRef, 8> Ts = Split(SchemeSplitTo.second);
auto F = Fs.begin(), T = Ts.begin(), FE = Fs.end(), TE = Ts.end();
for (; F != FE && T != TE && *F == *T; ++F, ++T) {
}
// We penalize for traversing up and down from \p From to \p To but penalize
// less for traversing down because subprojects are more closely related than
// superprojects.
int UpDist = FE - F;
int DownDist = TE - T;
return std::pow(0.7, UpDist + DownDist/2);
}
FileProximityMatcher::FileProximityMatcher(ArrayRef<StringRef> ProximityPaths)
: ProximityPaths(ProximityPaths.begin(), ProximityPaths.end()) {}
float FileProximityMatcher::uriProximity(StringRef SymbolURI) const {
float Score = 0;
if (!ProximityPaths.empty() && !SymbolURI.empty()) {
for (const auto &Path : ProximityPaths)
// Only calculate proximity score for two URIs with the same scheme so
// that the computation can be purely text-based and thus avoid expensive
// URI encoding/decoding.
if (auto U = URI::create(Path, SymbolURI.split(':').first)) {
Score = std::max(Score, clangd::uriProximity(U->toString(), SymbolURI));
} else {
llvm::consumeError(U.takeError());
}
}
return Score;
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
const FileProximityMatcher &M) {
OS << formatv("File proximity matcher: ");
OS << formatv("ProximityPaths[{0}]", llvm::join(M.ProximityPaths.begin(),
M.ProximityPaths.end(), ","));
return OS;
}
static SymbolRelevanceSignals::AccessibleScope
ComputeScope(const NamedDecl *D) {
// Injected "Foo" within the class "Foo" has file scope, not class scope.
@ -288,6 +235,15 @@ void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
Scope = std::min(Scope, ComputeScope(SemaCCResult.Declaration));
}
static std::pair<float, unsigned> proximityScore(llvm::StringRef SymbolURI,
URIDistance *D) {
if (!D || SymbolURI.empty())
return {0.f, 0u};
unsigned Distance = D->distance(SymbolURI);
// Assume approximately default options are used for sensible scoring.
return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
}
float SymbolRelevanceSignals::evaluate() const {
float Score = 1;
@ -296,11 +252,10 @@ float SymbolRelevanceSignals::evaluate() const {
Score *= NameMatch;
float IndexProximityScore =
FileProximityMatch ? FileProximityMatch->uriProximity(SymbolURI) : 0;
// Proximity scores are [0,1] and we translate them into a multiplier in the
// range from 1 to 2.
Score *= 1 + std::max(IndexProximityScore, SemaProximityScore);
// range from 1 to 3.
Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first,
SemaProximityScore);
// Symbols like local variables may only be referenced within their scope.
// Conversely if we're in that scope, it's likely we'll reference them.
@ -331,9 +286,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
OS << formatv("\tForbidden: {0}\n", S.Forbidden);
OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI);
if (S.FileProximityMatch) {
OS << "\tIndex proximity: "
<< S.FileProximityMatch->uriProximity(S.SymbolURI) << " ("
<< *S.FileProximityMatch << ")\n";
auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch);
OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first,
Score.second);
}
OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore);
OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));

View File

@ -38,6 +38,7 @@ namespace clang {
class CodeCompletionResult;
namespace clangd {
struct Symbol;
class URIDistance;
// Signals structs are designed to be aggregated from 0 or more sources.
// A default instance has neutral signals, and sources are merged into it.
@ -69,15 +70,13 @@ struct SymbolQualitySignals {
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
const SymbolQualitySignals &);
class FileProximityMatcher;
/// Attributes of a symbol-query pair that affect how much we like it.
struct SymbolRelevanceSignals {
/// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
float NameMatch = 1;
bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
const FileProximityMatcher *FileProximityMatch = nullptr;
URIDistance *FileProximityMatch = nullptr;
/// This is used to calculate proximity between the index symbol and the
/// query.
llvm::StringRef SymbolURI;
@ -111,25 +110,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &,
/// Combine symbol quality and relevance into a single score.
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
class FileProximityMatcher {
public:
/// \p ProximityPaths are used to compute proximity scores from symbol's
/// declaring file. The best score will be used.
explicit FileProximityMatcher(
llvm::ArrayRef<llvm::StringRef> ProximityPaths);
/// Calculates the best proximity score from proximity paths to the symbol's
/// URI. Score is [0-1], 1 means \p SymbolURI exactly matches a proximity
/// path. When a path cannot be encoded into the same scheme as \p
/// SymbolURI, the proximity will be 0.
float uriProximity(llvm::StringRef SymbolURI) const;
private:
llvm::SmallVector<std::string, 2> ProximityPaths;
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &,
const FileProximityMatcher &);
};
/// TopN<T> is a lossy container that preserves only the "best" N elements.
template <typename T, typename Compare = std::greater<T>> class TopN {
public:

View File

@ -230,13 +230,10 @@ llvm::Optional<SymbolID> getSymbolID(const Decl *D) {
std::vector<Location> findDefinitions(ParsedAST &AST, Position Pos,
const SymbolIndex *Index) {
const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
SourceLocation SourceLocationBeg =
getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
std::vector<Location> Result;
// Handle goto definition for #include.
for (auto &Inc : AST.getInclusions()) {
Position Pos = sourceLocToPosition(SourceMgr, SourceLocationBeg);
for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) {
if (!Inc.Resolved.empty() && Inc.R.contains(Pos))
Result.push_back(Location{URIForFile{Inc.Resolved}, {}});
}
@ -244,6 +241,8 @@ std::vector<Location> findDefinitions(ParsedAST &AST, Position Pos,
return Result;
// Identified symbols at a specific position.
SourceLocation SourceLocationBeg =
getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
auto Symbols = getSymbolAtPosition(AST, SourceLocationBeg);
for (auto Item : Symbols.Macros) {

View File

@ -17,6 +17,7 @@ add_extra_unittest(ClangdTests
ContextTests.cpp
DraftStoreTests.cpp
FileIndexTests.cpp
FileDistanceTests.cpp
FindSymbolsTests.cpp
FuzzyMatchTests.cpp
GlobalCompilationDatabaseTests.cpp

View File

@ -0,0 +1,93 @@
//===-- FileDistanceTests.cpp ------------------------*- C++ -*-----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "FileDistance.h"
#include "TestFS.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace clang {
namespace clangd {
namespace {
TEST(FileDistanceTests, Distance) {
FileDistanceOptions Opts;
Opts.UpCost = 5;
Opts.DownCost = 3;
SourceParams CostTwo;
CostTwo.Cost = 2;
FileDistance D(
{{"tools/clang/lib/Format/FormatToken.cpp", SourceParams()},
{"tools/clang/include/clang/Format/FormatToken.h", SourceParams()},
{"include/llvm/ADT/StringRef.h", CostTwo}},
Opts);
// Source
EXPECT_EQ(D.distance("tools/clang/lib/Format/FormatToken.cpp"), 0u);
EXPECT_EQ(D.distance("include/llvm/ADT/StringRef.h"), 2u);
// Parent
EXPECT_EQ(D.distance("tools/clang/lib/Format/"), 5u);
// Child
EXPECT_EQ(D.distance("tools/clang/lib/Format/FormatToken.cpp/Oops"), 3u);
// Ancestor (up+up+up+up)
EXPECT_EQ(D.distance("/"), 22u);
// Sibling (up+down)
EXPECT_EQ(D.distance("tools/clang/lib/Format/AnotherFile.cpp"), 8u);
// Cousin (up+up+down+down)
EXPECT_EQ(D.distance("include/llvm/Support/Allocator.h"), 18u);
// First cousin, once removed (up+up+up+down+down)
EXPECT_EQ(D.distance("include/llvm-c/Core.h"), 23u);
}
TEST(FileDistanceTests, BadSource) {
// We mustn't assume that paths above sources are best reached via them.
FileDistanceOptions Opts;
Opts.UpCost = 5;
Opts.DownCost = 3;
SourceParams CostLots;
CostLots.Cost = 100;
FileDistance D({{"a", SourceParams()}, {"b/b/b", CostLots}}, Opts);
EXPECT_EQ(D.distance("b"), 8u); // a+up+down, not b+up+up
EXPECT_EQ(D.distance("b/b/b"), 14u); // a+up+down+down+down, not b
EXPECT_EQ(D.distance("b/b/b/c"), 17u); // a+up+down+down+down+down, not b+down
}
auto UseUnittestScheme = UnittestSchemeAnchorSource;
TEST(FileDistanceTests, URI) {
FileDistanceOptions Opts;
Opts.UpCost = 5;
Opts.DownCost = 3;
SourceParams CostLots;
CostLots.Cost = 1000;
URIDistance D(
{{testPath("foo"), CostLots}, {"/not/a/testpath", SourceParams()}}, Opts);
EXPECT_EQ(D.distance("file:///not/a/testpath/either"), 3u);
EXPECT_EQ(D.distance("unittest:foo"), 1000u);
EXPECT_EQ(D.distance("unittest:bar"), 1008u);
}
TEST(FileDistance, LimitUpTraversals) {
FileDistanceOptions Opts;
Opts.UpCost = Opts.DownCost = 1;
SourceParams CheapButLimited, CostLots;
CheapButLimited.MaxUpTraversals = 1;
CostLots.Cost = 100;
FileDistance D({{"/", CostLots}, {"/a/b/c", CheapButLimited}}, Opts);
EXPECT_EQ(D.distance("/a"), 101u);
EXPECT_EQ(D.distance("/a/z"), 102u);
EXPECT_EQ(D.distance("/a/b"), 1u);
EXPECT_EQ(D.distance("/a/b/z"), 2u);
}
} // namespace
} // namespace clangd
} // namespace clang

View File

@ -64,18 +64,17 @@ private:
}
protected:
std::vector<Inclusion> collectIncludes() {
IncludeStructure collectIncludes() {
auto Clang = setupClang();
PreprocessOnlyAction Action;
EXPECT_TRUE(
Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]));
std::vector<Inclusion> Inclusions;
Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback(
Clang->getSourceManager(),
[&](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); }));
IncludeStructure Includes;
Clang->getPreprocessor().addPPCallbacks(
collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
EXPECT_TRUE(Action.Execute());
Action.EndSourceFile();
return Inclusions;
return Includes;
}
// Calculates the include path, or returns "" on error or header should not be
@ -133,6 +132,14 @@ protected:
MATCHER_P(Written, Name, "") { return arg.Written == Name; }
MATCHER_P(Resolved, Name, "") { return arg.Resolved == Name; }
MATCHER_P2(Distance, File, D, "") {
if (arg.getKey() != File)
*result_listener << "file =" << arg.getKey().str();
if (arg.getValue() != D)
*result_listener << "distance =" << arg.getValue();
return arg.getKey() == File && arg.getValue() == D;
}
TEST_F(HeadersTest, CollectRewrittenAndResolved) {
FS.Files[MainFile] = R"cpp(
#include "sub/bar.h" // not shortest
@ -140,9 +147,12 @@ TEST_F(HeadersTest, CollectRewrittenAndResolved) {
std::string BarHeader = testPath("sub/bar.h");
FS.Files[BarHeader] = "";
EXPECT_THAT(collectIncludes(),
EXPECT_THAT(collectIncludes().MainFileIncludes,
UnorderedElementsAre(
AllOf(Written("\"sub/bar.h\""), Resolved(BarHeader))));
EXPECT_THAT(collectIncludes().includeDepth(MainFile),
UnorderedElementsAre(Distance(MainFile, 0u),
Distance(testPath("sub/bar.h"), 1u)));
}
TEST_F(HeadersTest, OnlyCollectInclusionsInMain) {
@ -156,8 +166,16 @@ TEST_F(HeadersTest, OnlyCollectInclusionsInMain) {
#include "bar.h"
)cpp";
EXPECT_THAT(
collectIncludes(),
collectIncludes().MainFileIncludes,
UnorderedElementsAre(AllOf(Written("\"bar.h\""), Resolved(BarHeader))));
EXPECT_THAT(collectIncludes().includeDepth(MainFile),
UnorderedElementsAre(Distance(MainFile, 0u),
Distance(testPath("sub/bar.h"), 1u),
Distance(testPath("sub/baz.h"), 2u)));
// includeDepth() also works for non-main files.
EXPECT_THAT(collectIncludes().includeDepth(testPath("sub/bar.h")),
UnorderedElementsAre(Distance(testPath("sub/bar.h"), 0u),
Distance(testPath("sub/baz.h"), 1u)));
}
TEST_F(HeadersTest, UnResolvedInclusion) {
@ -165,8 +183,10 @@ TEST_F(HeadersTest, UnResolvedInclusion) {
#include "foo.h"
)cpp";
EXPECT_THAT(collectIncludes(),
EXPECT_THAT(collectIncludes().MainFileIncludes,
UnorderedElementsAre(AllOf(Written("\"foo.h\""), Resolved(""))));
EXPECT_THAT(collectIncludes().includeDepth(MainFile),
UnorderedElementsAre(Distance(MainFile, 0u)));
}
TEST_F(HeadersTest, InsertInclude) {

View File

@ -17,6 +17,7 @@
//
//===----------------------------------------------------------------------===//
#include "FileDistance.h"
#include "Quality.h"
#include "TestFS.h"
#include "TestTU.h"
@ -162,9 +163,22 @@ TEST(QualityTests, SymbolRelevanceSignalsSanity) {
PoorNameMatch.NameMatch = 0.2f;
EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate());
SymbolRelevanceSignals WithProximity;
WithProximity.SemaProximityScore = 0.2f;
EXPECT_GT(WithProximity.evaluate(), Default.evaluate());
SymbolRelevanceSignals WithSemaProximity;
WithSemaProximity.SemaProximityScore = 0.2f;
EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate());
SymbolRelevanceSignals IndexProximate;
IndexProximate.SymbolURI = "unittest:/foo/bar.h";
llvm::StringMap<SourceParams> ProxSources;
ProxSources.try_emplace(testPath("foo/baz.h"));
URIDistance Distance(ProxSources);
IndexProximate.FileProximityMatch = &Distance;
EXPECT_GT(IndexProximate.evaluate(), Default.evaluate());
SymbolRelevanceSignals IndexDistant = IndexProximate;
IndexDistant.SymbolURI = "unittest:/elsewhere/path.h";
EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate())
<< IndexProximate << IndexDistant;
EXPECT_GT(IndexDistant.evaluate(), Default.evaluate());
SymbolRelevanceSignals Scoped;
Scoped.Scope = SymbolRelevanceSignals::FileScope;
@ -185,59 +199,6 @@ TEST(QualityTests, SortText) {
EXPECT_LT(sortText(0, "a"), sortText(0, "z"));
}
// {a,b,c} becomes /clangd-test/a/b/c
std::string joinPaths(llvm::ArrayRef<StringRef> Parts) {
return testPath(
llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator()));
}
static constexpr float ProximityBase = 0.7f;
// Calculates a proximity score for an index symbol with declaration file
// SymPath with the given URI scheme.
float URIProximity(const FileProximityMatcher &Matcher, StringRef SymPath,
StringRef Scheme = "file") {
auto U = URI::create(SymPath, Scheme);
EXPECT_TRUE(static_cast<bool>(U)) << llvm::toString(U.takeError());
return Matcher.uriProximity(U->toString());
}
TEST(QualityTests, URIProximityScores) {
FileProximityMatcher Matcher(
/*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})});
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})),
1);
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})),
ProximityBase);
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})),
std::pow(ProximityBase, 5));
EXPECT_FLOAT_EQ(
URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})),
std::pow(ProximityBase, 2));
EXPECT_FLOAT_EQ(
URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})),
std::pow(ProximityBase, 5));
EXPECT_FLOAT_EQ(
URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})),
std::pow(ProximityBase, 6));
// Note the common directory is /clang-test/
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})),
std::pow(ProximityBase, 7));
}
TEST(QualityTests, URIProximityScoresWithTestURI) {
FileProximityMatcher Matcher(
/*ProximityPaths=*/{joinPaths({"b", "c", "x"})});
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"),
1);
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"),
std::pow(ProximityBase, 2));
// unittest:///b/c/x vs unittest:///m/n/y. No common directory.
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "y"}), "unittest"),
std::pow(ProximityBase, 4));
}
} // namespace
} // namespace clangd
} // namespace clang