[clangd] Introduce intermediate representation of formatted text

Summary: That can render to markdown or plain text. Used for findHover requests.

Reviewers: malaperle, sammccall, kadircet

Reviewed By: sammccall

Subscribers: mgorny, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D58547

llvm-svn: 360151
This commit is contained in:
Ilya Biryukov 2019-05-07 14:18:18 +00:00
parent 4b81e9f8d1
commit 5b0872fcfd
5 changed files with 388 additions and 0 deletions

View File

@ -50,6 +50,7 @@ add_clang_library(clangDaemon
FileDistance.cpp
FS.cpp
FSProvider.cpp
FormattedString.cpp
FuzzyMatch.cpp
GlobalCompilationDatabase.cpp
Headers.cpp

View File

@ -0,0 +1,173 @@
//===--- FormattedString.cpp --------------------------------*- C++-*------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "FormattedString.h"
#include "clang/Basic/CharInfo.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstddef>
#include <string>
namespace clang {
namespace clangd {
namespace {
/// Escape a markdown text block. Ensures the punctuation will not introduce
/// any of the markdown constructs.
static std::string renderText(llvm::StringRef Input) {
// Escaping ASCII punctiation ensures we can't start a markdown construct.
constexpr llvm::StringLiteral Punctuation =
R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
std::string R;
for (size_t From = 0; From < Input.size();) {
size_t Next = Input.find_first_of(Punctuation, From);
R += Input.substr(From, Next - From);
if (Next == llvm::StringRef::npos)
break;
R += "\\";
R += Input[Next];
From = Next + 1;
}
return R;
}
/// Renders \p Input as an inline block of code in markdown. The returned value
/// is surrounded by backticks and the inner contents are properly escaped.
static std::string renderInlineBlock(llvm::StringRef Input) {
std::string R;
// Double all backticks to make sure we don't close the inline block early.
for (size_t From = 0; From < Input.size();) {
size_t Next = Input.find("`", From);
R += Input.substr(From, Next - From);
if (Next == llvm::StringRef::npos)
break;
R += "``"; // double the found backtick.
From = Next + 1;
}
// If results starts with a backtick, add spaces on both sides. The spaces
// are ignored by markdown renderers.
if (llvm::StringRef(R).startswith("`") || llvm::StringRef(R).endswith("`"))
return "` " + std::move(R) + " `";
// Markdown render should ignore first and last space if both are there. We
// add an extra pair of spaces in that case to make sure we render what the
// user intended.
if (llvm::StringRef(R).startswith(" ") && llvm::StringRef(R).endswith(" "))
return "` " + std::move(R) + " `";
return "`" + std::move(R) + "`";
}
/// Render \p Input as markdown code block with a specified \p Language. The
/// result is surrounded by >= 3 backticks. Although markdown also allows to use
/// '~' for code blocks, they are never used.
static std::string renderCodeBlock(llvm::StringRef Input,
llvm::StringRef Language) {
// Count the maximum number of consecutive backticks in \p Input. We need to
// start and end the code block with more.
unsigned MaxBackticks = 0;
unsigned Backticks = 0;
for (char C : Input) {
if (C == '`') {
++Backticks;
continue;
}
MaxBackticks = std::max(MaxBackticks, Backticks);
Backticks = 0;
}
MaxBackticks = std::max(Backticks, MaxBackticks);
// Use the corresponding number of backticks to start and end a code block.
std::string BlockMarker(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`');
return BlockMarker + Language.str() + "\n" + Input.str() + "\n" + BlockMarker;
}
} // namespace
void FormattedString::appendText(std::string Text) {
// We merge consecutive blocks of text to simplify the overall structure.
if (Chunks.empty() || Chunks.back().Kind != ChunkKind::PlainText) {
Chunk C;
C.Kind = ChunkKind::PlainText;
Chunks.push_back(C);
}
// FIXME: ensure there is a whitespace between the chunks.
Chunks.back().Contents += Text;
}
void FormattedString::appendCodeBlock(std::string Code, std::string Language) {
Chunk C;
C.Kind = ChunkKind::CodeBlock;
C.Contents = std::move(Code);
C.Language = std::move(Language);
Chunks.push_back(std::move(C));
}
void FormattedString::appendInlineCode(std::string Code) {
Chunk C;
C.Kind = ChunkKind::InlineCodeBlock;
C.Contents = std::move(Code);
Chunks.push_back(std::move(C));
}
std::string FormattedString::renderAsMarkdown() const {
std::string R;
for (const auto &C : Chunks) {
switch (C.Kind) {
case ChunkKind::PlainText:
R += renderText(C.Contents);
continue;
case ChunkKind::InlineCodeBlock:
// Make sure we don't glue two backticks together.
if (llvm::StringRef(R).endswith("`"))
R += " ";
R += renderInlineBlock(C.Contents);
continue;
case ChunkKind::CodeBlock:
if (!R.empty() && !llvm::StringRef(R).endswith("\n"))
R += "\n";
R += renderCodeBlock(C.Contents, C.Language);
R += "\n";
continue;
}
llvm_unreachable("unhanlded ChunkKind");
}
return R;
}
std::string FormattedString::renderAsPlainText() const {
std::string R;
auto EnsureWhitespace = [&]() {
if (R.empty() || isWhitespace(R.back()))
return;
R += " ";
};
for (const auto &C : Chunks) {
switch (C.Kind) {
case ChunkKind::PlainText:
EnsureWhitespace();
R += C.Contents;
continue;
case ChunkKind::InlineCodeBlock:
EnsureWhitespace();
R += C.Contents;
continue;
case ChunkKind::CodeBlock:
if (!R.empty())
R += "\n\n";
R += C.Contents;
if (!llvm::StringRef(C.Contents).endswith("\n"))
R += "\n";
continue;
}
llvm_unreachable("unhanlded ChunkKind");
}
while (!R.empty() && isWhitespace(R.back()))
R.pop_back();
return R;
}
} // namespace clangd
} // namespace clang

View File

@ -0,0 +1,57 @@
//===--- FormattedString.h ----------------------------------*- C++-*------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// A simple intermediate representation of formatted text that could be
// converted to plaintext or markdown.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H
#include <string>
#include <vector>
namespace clang {
namespace clangd {
/// A structured string representation that could be converted to markdown or
/// plaintext upon requrest.
class FormattedString {
public:
/// Append plain text to the end of the string.
void appendText(std::string Text);
/// Append a block of C++ code. This translates to a ``` block in markdown.
/// In a plain text representation, the code block will be surrounded by
/// newlines.
void appendCodeBlock(std::string Code, std::string Language = "cpp");
/// Append an inline block of C++ code. This translates to the ` block in
/// markdown.
void appendInlineCode(std::string Code);
std::string renderAsMarkdown() const;
std::string renderAsPlainText() const;
private:
enum class ChunkKind {
PlainText, /// A plain text paragraph.
CodeBlock, /// A block of code.
InlineCodeBlock, /// An inline block of code.
};
struct Chunk {
ChunkKind Kind = ChunkKind::PlainText;
std::string Contents;
/// Language for code block chunks. Ignored for other chunks.
std::string Language;
};
std::vector<Chunk> Chunks;
};
} // namespace clangd
} // namespace clang
#endif

View File

@ -38,6 +38,7 @@ add_unittest(ClangdUnitTests ClangdTests
FileDistanceTests.cpp
FileIndexTests.cpp
FindSymbolsTests.cpp
FormattedStringTests.cpp
FSTests.cpp
FunctionTests.cpp
FuzzyMatchTests.cpp

View File

@ -0,0 +1,156 @@
//===-- FormattedStringTests.cpp ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "FormattedString.h"
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace clang {
namespace clangd {
namespace {
TEST(FormattedString, Basic) {
FormattedString S;
EXPECT_EQ(S.renderAsPlainText(), "");
EXPECT_EQ(S.renderAsMarkdown(), "");
S.appendText("foobar");
EXPECT_EQ(S.renderAsPlainText(), "foobar");
EXPECT_EQ(S.renderAsMarkdown(), "foobar");
S = FormattedString();
S.appendInlineCode("foobar");
EXPECT_EQ(S.renderAsPlainText(), "foobar");
EXPECT_EQ(S.renderAsMarkdown(), "`foobar`");
S = FormattedString();
S.appendCodeBlock("foobar");
EXPECT_EQ(S.renderAsPlainText(), "foobar");
EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
"foobar\n"
"```\n");
}
TEST(FormattedString, CodeBlocks) {
FormattedString S;
S.appendCodeBlock("foobar");
S.appendCodeBlock("bazqux", "javascript");
EXPECT_EQ(S.renderAsPlainText(), "foobar\n\n\nbazqux");
std::string ExpectedMarkdown = R"md(```cpp
foobar
```
```javascript
bazqux
```
)md";
EXPECT_EQ(S.renderAsMarkdown(), ExpectedMarkdown);
S = FormattedString();
S.appendInlineCode("foobar");
S.appendInlineCode("bazqux");
EXPECT_EQ(S.renderAsPlainText(), "foobar bazqux");
EXPECT_EQ(S.renderAsMarkdown(), "`foobar` `bazqux`");
S = FormattedString();
S.appendText("foo");
S.appendInlineCode("bar");
S.appendText("baz");
EXPECT_EQ(S.renderAsPlainText(), "foo bar baz");
EXPECT_EQ(S.renderAsMarkdown(), "foo`bar`baz");
}
TEST(FormattedString, Escaping) {
// Check some ASCII punctuation
FormattedString S;
S.appendText("*!`");
EXPECT_EQ(S.renderAsMarkdown(), "\\*\\!\\`");
// Check all ASCII punctuation.
S = FormattedString();
std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
// Same text, with each character escaped.
std::string EscapedPunctuation;
EscapedPunctuation.reserve(2 * Punctuation.size());
for (char C : Punctuation)
EscapedPunctuation += std::string("\\") + C;
S.appendText(Punctuation);
EXPECT_EQ(S.renderAsMarkdown(), EscapedPunctuation);
// In code blocks we don't need to escape ASCII punctuation.
S = FormattedString();
S.appendInlineCode("* foo !+ bar * baz");
EXPECT_EQ(S.renderAsMarkdown(), "`* foo !+ bar * baz`");
S = FormattedString();
S.appendCodeBlock("#define FOO\n* foo !+ bar * baz");
EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
"#define FOO\n* foo !+ bar * baz\n"
"```\n");
// But we have to escape the backticks.
S = FormattedString();
S.appendInlineCode("foo`bar`baz");
EXPECT_EQ(S.renderAsMarkdown(), "`foo``bar``baz`");
S = FormattedString();
S.appendCodeBlock("foo`bar`baz");
EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
"foo`bar`baz\n"
"```\n");
// Inline code blocks starting or ending with backticks should add spaces.
S = FormattedString();
S.appendInlineCode("`foo");
EXPECT_EQ(S.renderAsMarkdown(), "` ``foo `");
S = FormattedString();
S.appendInlineCode("foo`");
EXPECT_EQ(S.renderAsMarkdown(), "` foo`` `");
S = FormattedString();
S.appendInlineCode("`foo`");
EXPECT_EQ(S.renderAsMarkdown(), "` ``foo`` `");
// Should also add extra spaces if the block stars and ends with spaces.
S = FormattedString();
S.appendInlineCode(" foo ");
EXPECT_EQ(S.renderAsMarkdown(), "` foo `");
S = FormattedString();
S.appendInlineCode("foo ");
EXPECT_EQ(S.renderAsMarkdown(), "`foo `");
S = FormattedString();
S.appendInlineCode(" foo");
EXPECT_EQ(S.renderAsMarkdown(), "` foo`");
// Code blocks might need more than 3 backticks.
S = FormattedString();
S.appendCodeBlock("foobarbaz `\nqux");
EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
"foobarbaz `\nqux\n"
"```\n");
S = FormattedString();
S.appendCodeBlock("foobarbaz ``\nqux");
EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
"foobarbaz ``\nqux\n"
"```\n");
S = FormattedString();
S.appendCodeBlock("foobarbaz ```\nqux");
EXPECT_EQ(S.renderAsMarkdown(), "````cpp\n"
"foobarbaz ```\nqux\n"
"````\n");
S = FormattedString();
S.appendCodeBlock("foobarbaz ` `` ``` ```` `\nqux");
EXPECT_EQ(S.renderAsMarkdown(), "`````cpp\n"
"foobarbaz ` `` ``` ```` `\nqux\n"
"`````\n");
}
} // namespace
} // namespace clangd
} // namespace clang