[clang-format] Adds a canonical delimiter to raw string formatting

Summary:
This patch adds canonical delimiter support to the raw string formatting.
This allows matching delimiters to be updated to the canonical one.

Reviewers: bkramer

Reviewed By: bkramer

Subscribers: klimek, cfe-commits

Differential Revision: https://reviews.llvm.org/D42187

llvm-svn: 322956
This commit is contained in:
Krasimir Georgiev 2018-01-19 16:18:47 +00:00
parent 33cb84571f
commit 412ed095f7
6 changed files with 122 additions and 25 deletions

View File

@ -1590,6 +1590,9 @@ the configuration (without a prefix: ``Auto``).
precedence over a matching enclosing function name for determining the
language of the raw string contents.
If a canonical delimiter is specified, occurences of other delimiters for
the same language will be updated to the canonical if possible.
There should be at most one specification per language and each delimiter
and enclosing function should not occur in multiple specifications.
@ -1610,6 +1613,7 @@ the configuration (without a prefix: ``Auto``).
- 'cc'
- 'cpp'
BasedOnStyle: llvm
CanonicalDelimiter: 'cc'
**ReflowComments** (``bool``)
If ``true``, clang-format will attempt to re-flow comments.

View File

@ -1369,6 +1369,8 @@ struct FormatStyle {
std::vector<std::string> Delimiters;
/// \brief A list of enclosing function names that match this language.
std::vector<std::string> EnclosingFunctions;
/// \brief The canonical delimiter for this language.
std::string CanonicalDelimiter;
/// \brief The style name on which this raw string format is based on.
/// If not specified, the raw string format is based on the style that this
/// format is based on.
@ -1376,6 +1378,7 @@ struct FormatStyle {
bool operator==(const RawStringFormat &Other) const {
return Language == Other.Language && Delimiters == Other.Delimiters &&
EnclosingFunctions == Other.EnclosingFunctions &&
CanonicalDelimiter == Other.CanonicalDelimiter &&
BasedOnStyle == Other.BasedOnStyle;
}
};
@ -1392,6 +1395,9 @@ struct FormatStyle {
/// precedence over a matching enclosing function name for determining the
/// language of the raw string contents.
///
/// If a canonical delimiter is specified, occurences of other delimiters for
/// the same language will be updated to the canonical if possible.
///
/// There should be at most one specification per language and each delimiter
/// and enclosing function should not occur in multiple specifications.
///
@ -1410,6 +1416,7 @@ struct FormatStyle {
/// - 'cc'
/// - 'cpp'
/// BasedOnStyle: llvm
/// CanonicalDelimiter: 'cc'
/// \endcode
std::vector<RawStringFormat> RawStringFormats;

View File

@ -102,6 +102,18 @@ static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) {
return Delimiter;
}
// Returns the canonical delimiter for \p Language, or the empty string if no
// canonical delimiter is specified.
static StringRef
getCanonicalRawStringDelimiter(const FormatStyle &Style,
FormatStyle::LanguageKind Language) {
for (const auto &Format : Style.RawStringFormats) {
if (Format.Language == Language)
return StringRef(Format.CanonicalDelimiter);
}
return "";
}
RawStringFormatStyleManager::RawStringFormatStyleManager(
const FormatStyle &CodeStyle) {
for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
@ -1312,14 +1324,32 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
const FormatToken &Current, LineState &State,
const FormatStyle &RawStringStyle, bool DryRun) {
unsigned StartColumn = State.Column - Current.ColumnWidth;
auto Delimiter = *getRawStringDelimiter(Current.TokenText);
StringRef OldDelimiter = *getRawStringDelimiter(Current.TokenText);
StringRef NewDelimiter =
getCanonicalRawStringDelimiter(Style, RawStringStyle.Language);
if (NewDelimiter.empty() || OldDelimiter.empty())
NewDelimiter = OldDelimiter;
// The text of a raw string is between the leading 'R"delimiter(' and the
// trailing 'delimiter)"'.
unsigned PrefixSize = 3 + Delimiter.size();
unsigned SuffixSize = 2 + Delimiter.size();
unsigned OldPrefixSize = 3 + OldDelimiter.size();
unsigned OldSuffixSize = 2 + OldDelimiter.size();
// We create a virtual text environment which expects a null-terminated
// string, so we cannot use StringRef.
std::string RawText =
Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize);
if (NewDelimiter != OldDelimiter) {
// Don't update to the canonical delimiter 'deli' if ')deli"' occurs in the
// raw string.
std::string CanonicalDelimiterSuffix = (")" + NewDelimiter + "\"").str();
if (StringRef(RawText).contains(CanonicalDelimiterSuffix))
NewDelimiter = OldDelimiter;
}
// The first start column is the column the raw text starts.
unsigned FirstStartColumn = StartColumn + PrefixSize;
unsigned NewPrefixSize = 3 + NewDelimiter.size();
unsigned NewSuffixSize = 2 + NewDelimiter.size();
// The first start column is the column the raw text starts after formatting.
unsigned FirstStartColumn = StartColumn + NewPrefixSize;
// The next start column is the intended indentation a line break inside
// the raw string at level 0. It is determined by the following rules:
@ -1330,7 +1360,7 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
// These rules have the advantage that the formatted content both does not
// violate the rectangle rule and visually flows within the surrounding
// source.
bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n';
bool ContentStartsOnNewline = Current.TokenText[OldPrefixSize] == '\n';
unsigned NextStartColumn = ContentStartsOnNewline
? State.Stack.back().Indent + Style.IndentWidth
: FirstStartColumn;
@ -1344,12 +1374,9 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
// - if the raw string prefix does not start on a newline, it is the current
// indent.
unsigned LastStartColumn = Current.NewlinesBefore
? FirstStartColumn - PrefixSize
? FirstStartColumn - NewPrefixSize
: State.Stack.back().Indent;
std::string RawText =
Current.TokenText.substr(PrefixSize).drop_back(SuffixSize);
std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",
@ -1362,8 +1389,33 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
return 0;
}
if (!DryRun) {
if (NewDelimiter != OldDelimiter) {
// In 'R"delimiter(...', the delimiter starts 2 characters after the start
// of the token.
SourceLocation PrefixDelimiterStart =
Current.Tok.getLocation().getLocWithOffset(2);
auto PrefixErr = Whitespaces.addReplacement(tooling::Replacement(
SourceMgr, PrefixDelimiterStart, OldDelimiter.size(), NewDelimiter));
if (PrefixErr) {
llvm::errs()
<< "Failed to update the prefix delimiter of a raw string: "
<< llvm::toString(std::move(PrefixErr)) << "\n";
}
// In 'R"delimiter(...)delimiter"', the suffix delimiter starts at
// position length - 1 - |delimiter|.
SourceLocation SuffixDelimiterStart =
Current.Tok.getLocation().getLocWithOffset(Current.TokenText.size() -
1 - OldDelimiter.size());
auto SuffixErr = Whitespaces.addReplacement(tooling::Replacement(
SourceMgr, SuffixDelimiterStart, OldDelimiter.size(), NewDelimiter));
if (SuffixErr) {
llvm::errs()
<< "Failed to update the suffix delimiter of a raw string: "
<< llvm::toString(std::move(SuffixErr)) << "\n";
}
}
SourceLocation OriginLoc =
Current.Tok.getLocation().getLocWithOffset(PrefixSize);
Current.Tok.getLocation().getLocWithOffset(OldPrefixSize);
for (const tooling::Replacement &Fix : Fixes.first) {
auto Err = Whitespaces.addReplacement(tooling::Replacement(
SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
@ -1376,7 +1428,7 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
}
unsigned RawLastLineEndColumn = getLastLineEndColumn(
*NewCode, FirstStartColumn, Style.TabWidth, Encoding);
State.Column = RawLastLineEndColumn + SuffixSize;
State.Column = RawLastLineEndColumn + NewSuffixSize;
return Fixes.second;
}

View File

@ -459,6 +459,7 @@ template <> struct MappingTraits<FormatStyle::RawStringFormat> {
IO.mapOptional("Language", Format.Language);
IO.mapOptional("Delimiters", Format.Delimiters);
IO.mapOptional("EnclosingFunctions", Format.EnclosingFunctions);
IO.mapOptional("CanonicalDelimiter", Format.CanonicalDelimiter);
IO.mapOptional("BasedOnStyle", Format.BasedOnStyle);
}
};
@ -713,6 +714,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
"PARSE_TEXT_PROTO",
"ParseTextProto",
},
/*CanonicalDelimiter=*/"",
/*BasedOnStyle=*/"google",
}};
GoogleStyle.SpacesBeforeTrailingComments = 2;

View File

@ -10429,13 +10429,15 @@ TEST_F(FormatTest, ParsesConfiguration) {
FormatStyle::LK_TextProto,
{"pb", "proto"},
{"PARSE_TEXT_PROTO"},
/*CanonicalDelimiter=*/"",
"llvm",
},
{
FormatStyle::LK_Cpp,
{"cc", "cpp"},
{"C_CODEBLOCK", "CPPEVAL"},
"",
/*CanonicalDelimiter=*/"cc",
/*BasedOnStyle=*/"",
},
};
@ -10453,7 +10455,8 @@ TEST_F(FormatTest, ParsesConfiguration) {
" - 'cpp'\n"
" EnclosingFunctions:\n"
" - 'C_CODEBLOCK'\n"
" - 'CPPEVAL'\n",
" - 'CPPEVAL'\n"
" CanonicalDelimiter: 'cc'",
RawStringFormats, ExpectedRawStringFormats);
}

View File

@ -66,10 +66,13 @@ protected:
FormatStyle Style = getLLVMStyle();
Style.ColumnLimit = ColumnLimit;
Style.RawStringFormats = {
{/*Language=*/FormatStyle::LK_TextProto,
/*Delimiters=*/{"pb"},
/*EnclosingFunctions=*/{},
/*BasedOnStyle=*/"google"},
{
/*Language=*/FormatStyle::LK_TextProto,
/*Delimiters=*/{"pb"},
/*EnclosingFunctions=*/{},
/*CanonicalDelimiter=*/"",
/*BasedOnStyle=*/"google",
},
};
return Style;
}
@ -77,9 +80,13 @@ protected:
FormatStyle getRawStringLLVMCppStyleBasedOn(std::string BasedOnStyle) {
FormatStyle Style = getLLVMStyle();
Style.RawStringFormats = {
{/*Language=*/FormatStyle::LK_Cpp,
/*Delimiters=*/{"cpp"},
/*EnclosingFunctions=*/{}, BasedOnStyle},
{
/*Language=*/FormatStyle::LK_Cpp,
/*Delimiters=*/{"cpp"},
/*EnclosingFunctions=*/{},
/*CanonicalDelimiter=*/"",
BasedOnStyle,
},
};
return Style;
}
@ -87,9 +94,13 @@ protected:
FormatStyle getRawStringGoogleCppStyleBasedOn(std::string BasedOnStyle) {
FormatStyle Style = getGoogleStyle(FormatStyle::LK_Cpp);
Style.RawStringFormats = {
{/*Language=*/FormatStyle::LK_Cpp,
/*Delimiters=*/{"cpp"},
/*EnclosingFunctions=*/{}, BasedOnStyle},
{
/*Language=*/FormatStyle::LK_Cpp,
/*Delimiters=*/{"cpp"},
/*EnclosingFunctions=*/{},
/*CanonicalDelimiter=*/"",
BasedOnStyle,
},
};
return Style;
}
@ -131,7 +142,13 @@ TEST_F(FormatTestRawStrings, UsesConfigurationOverBaseStyle) {
EXPECT_EQ(0, parseConfiguration("---\n"
"Language: Cpp\n"
"BasedOnStyle: Google", &Style).value());
Style.RawStringFormats = {{FormatStyle::LK_Cpp, {"cpp"}, {}, "llvm"}};
Style.RawStringFormats = {{
FormatStyle::LK_Cpp,
{"cpp"},
{},
/*CanonicalDelimiter=*/"",
/*BasedOnStyle=*/"llvm",
}};
expect_eq(R"test(int* i = R"cpp(int* j = 0;)cpp";)test",
format(R"test(int * i = R"cpp(int * j = 0;)cpp";)test", Style));
}
@ -752,6 +769,18 @@ a = ParseTextProto<ProtoType>(R"(key:value)");)test",
Style));
}
TEST_F(FormatTestRawStrings, UpdatesToCanonicalDelimiters) {
FormatStyle Style = getRawStringPbStyleWithColumns(25);
Style.RawStringFormats[0].CanonicalDelimiter = "proto";
expect_eq(R"test(a = R"proto(key: value)proto";)test",
format(R"test(a = R"pb(key:value)pb";)test", Style));
// Don't update to canonical delimiter if it occurs as a raw string suffix in
// the raw string content.
expect_eq(R"test(a = R"pb(key: ")proto")pb";)test",
format(R"test(a = R"pb(key:")proto")pb";)test", Style));
}
} // end namespace
} // end namespace format
} // end namespace clang