diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index f6bd614258ab..ec6a78ad67cd 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -1590,6 +1590,9 @@ the configuration (without a prefix: ``Auto``). precedence over a matching enclosing function name for determining the language of the raw string contents. + If a canonical delimiter is specified, occurences of other delimiters for + the same language will be updated to the canonical if possible. + There should be at most one specification per language and each delimiter and enclosing function should not occur in multiple specifications. @@ -1610,6 +1613,7 @@ the configuration (without a prefix: ``Auto``). - 'cc' - 'cpp' BasedOnStyle: llvm + CanonicalDelimiter: 'cc' **ReflowComments** (``bool``) If ``true``, clang-format will attempt to re-flow comments. diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 43d76d238eaa..e5bf1f3ebe9b 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -1369,6 +1369,8 @@ struct FormatStyle { std::vector Delimiters; /// \brief A list of enclosing function names that match this language. std::vector EnclosingFunctions; + /// \brief The canonical delimiter for this language. + std::string CanonicalDelimiter; /// \brief The style name on which this raw string format is based on. /// If not specified, the raw string format is based on the style that this /// format is based on. @@ -1376,6 +1378,7 @@ struct FormatStyle { bool operator==(const RawStringFormat &Other) const { return Language == Other.Language && Delimiters == Other.Delimiters && EnclosingFunctions == Other.EnclosingFunctions && + CanonicalDelimiter == Other.CanonicalDelimiter && BasedOnStyle == Other.BasedOnStyle; } }; @@ -1392,6 +1395,9 @@ struct FormatStyle { /// precedence over a matching enclosing function name for determining the /// language of the raw string contents. /// + /// If a canonical delimiter is specified, occurences of other delimiters for + /// the same language will be updated to the canonical if possible. + /// /// There should be at most one specification per language and each delimiter /// and enclosing function should not occur in multiple specifications. /// @@ -1410,6 +1416,7 @@ struct FormatStyle { /// - 'cc' /// - 'cpp' /// BasedOnStyle: llvm + /// CanonicalDelimiter: 'cc' /// \endcode std::vector RawStringFormats; diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 520235a31296..f7472bcd083f 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -102,6 +102,18 @@ static llvm::Optional getRawStringDelimiter(StringRef TokenText) { return Delimiter; } +// Returns the canonical delimiter for \p Language, or the empty string if no +// canonical delimiter is specified. +static StringRef +getCanonicalRawStringDelimiter(const FormatStyle &Style, + FormatStyle::LanguageKind Language) { + for (const auto &Format : Style.RawStringFormats) { + if (Format.Language == Language) + return StringRef(Format.CanonicalDelimiter); + } + return ""; +} + RawStringFormatStyleManager::RawStringFormatStyleManager( const FormatStyle &CodeStyle) { for (const auto &RawStringFormat : CodeStyle.RawStringFormats) { @@ -1312,14 +1324,32 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( const FormatToken &Current, LineState &State, const FormatStyle &RawStringStyle, bool DryRun) { unsigned StartColumn = State.Column - Current.ColumnWidth; - auto Delimiter = *getRawStringDelimiter(Current.TokenText); + StringRef OldDelimiter = *getRawStringDelimiter(Current.TokenText); + StringRef NewDelimiter = + getCanonicalRawStringDelimiter(Style, RawStringStyle.Language); + if (NewDelimiter.empty() || OldDelimiter.empty()) + NewDelimiter = OldDelimiter; // The text of a raw string is between the leading 'R"delimiter(' and the // trailing 'delimiter)"'. - unsigned PrefixSize = 3 + Delimiter.size(); - unsigned SuffixSize = 2 + Delimiter.size(); + unsigned OldPrefixSize = 3 + OldDelimiter.size(); + unsigned OldSuffixSize = 2 + OldDelimiter.size(); + // We create a virtual text environment which expects a null-terminated + // string, so we cannot use StringRef. + std::string RawText = + Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize); + if (NewDelimiter != OldDelimiter) { + // Don't update to the canonical delimiter 'deli' if ')deli"' occurs in the + // raw string. + std::string CanonicalDelimiterSuffix = (")" + NewDelimiter + "\"").str(); + if (StringRef(RawText).contains(CanonicalDelimiterSuffix)) + NewDelimiter = OldDelimiter; + } - // The first start column is the column the raw text starts. - unsigned FirstStartColumn = StartColumn + PrefixSize; + unsigned NewPrefixSize = 3 + NewDelimiter.size(); + unsigned NewSuffixSize = 2 + NewDelimiter.size(); + + // The first start column is the column the raw text starts after formatting. + unsigned FirstStartColumn = StartColumn + NewPrefixSize; // The next start column is the intended indentation a line break inside // the raw string at level 0. It is determined by the following rules: @@ -1330,7 +1360,7 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( // These rules have the advantage that the formatted content both does not // violate the rectangle rule and visually flows within the surrounding // source. - bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n'; + bool ContentStartsOnNewline = Current.TokenText[OldPrefixSize] == '\n'; unsigned NextStartColumn = ContentStartsOnNewline ? State.Stack.back().Indent + Style.IndentWidth : FirstStartColumn; @@ -1344,12 +1374,9 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( // - if the raw string prefix does not start on a newline, it is the current // indent. unsigned LastStartColumn = Current.NewlinesBefore - ? FirstStartColumn - PrefixSize + ? FirstStartColumn - NewPrefixSize : State.Stack.back().Indent; - std::string RawText = - Current.TokenText.substr(PrefixSize).drop_back(SuffixSize); - std::pair Fixes = internal::reformat( RawStringStyle, RawText, {tooling::Range(0, RawText.size())}, FirstStartColumn, NextStartColumn, LastStartColumn, "", @@ -1362,8 +1389,33 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( return 0; } if (!DryRun) { + if (NewDelimiter != OldDelimiter) { + // In 'R"delimiter(...', the delimiter starts 2 characters after the start + // of the token. + SourceLocation PrefixDelimiterStart = + Current.Tok.getLocation().getLocWithOffset(2); + auto PrefixErr = Whitespaces.addReplacement(tooling::Replacement( + SourceMgr, PrefixDelimiterStart, OldDelimiter.size(), NewDelimiter)); + if (PrefixErr) { + llvm::errs() + << "Failed to update the prefix delimiter of a raw string: " + << llvm::toString(std::move(PrefixErr)) << "\n"; + } + // In 'R"delimiter(...)delimiter"', the suffix delimiter starts at + // position length - 1 - |delimiter|. + SourceLocation SuffixDelimiterStart = + Current.Tok.getLocation().getLocWithOffset(Current.TokenText.size() - + 1 - OldDelimiter.size()); + auto SuffixErr = Whitespaces.addReplacement(tooling::Replacement( + SourceMgr, SuffixDelimiterStart, OldDelimiter.size(), NewDelimiter)); + if (SuffixErr) { + llvm::errs() + << "Failed to update the suffix delimiter of a raw string: " + << llvm::toString(std::move(SuffixErr)) << "\n"; + } + } SourceLocation OriginLoc = - Current.Tok.getLocation().getLocWithOffset(PrefixSize); + Current.Tok.getLocation().getLocWithOffset(OldPrefixSize); for (const tooling::Replacement &Fix : Fixes.first) { auto Err = Whitespaces.addReplacement(tooling::Replacement( SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()), @@ -1376,7 +1428,7 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( } unsigned RawLastLineEndColumn = getLastLineEndColumn( *NewCode, FirstStartColumn, Style.TabWidth, Encoding); - State.Column = RawLastLineEndColumn + SuffixSize; + State.Column = RawLastLineEndColumn + NewSuffixSize; return Fixes.second; } diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 88c90a865a83..0da0cea708c3 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -459,6 +459,7 @@ template <> struct MappingTraits { IO.mapOptional("Language", Format.Language); IO.mapOptional("Delimiters", Format.Delimiters); IO.mapOptional("EnclosingFunctions", Format.EnclosingFunctions); + IO.mapOptional("CanonicalDelimiter", Format.CanonicalDelimiter); IO.mapOptional("BasedOnStyle", Format.BasedOnStyle); } }; @@ -713,6 +714,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { "PARSE_TEXT_PROTO", "ParseTextProto", }, + /*CanonicalDelimiter=*/"", /*BasedOnStyle=*/"google", }}; GoogleStyle.SpacesBeforeTrailingComments = 2; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index ac5184ef02bb..dcb1089a82cd 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -10429,13 +10429,15 @@ TEST_F(FormatTest, ParsesConfiguration) { FormatStyle::LK_TextProto, {"pb", "proto"}, {"PARSE_TEXT_PROTO"}, + /*CanonicalDelimiter=*/"", "llvm", }, { FormatStyle::LK_Cpp, {"cc", "cpp"}, {"C_CODEBLOCK", "CPPEVAL"}, - "", + /*CanonicalDelimiter=*/"cc", + /*BasedOnStyle=*/"", }, }; @@ -10453,7 +10455,8 @@ TEST_F(FormatTest, ParsesConfiguration) { " - 'cpp'\n" " EnclosingFunctions:\n" " - 'C_CODEBLOCK'\n" - " - 'CPPEVAL'\n", + " - 'CPPEVAL'\n" + " CanonicalDelimiter: 'cc'", RawStringFormats, ExpectedRawStringFormats); } diff --git a/clang/unittests/Format/FormatTestRawStrings.cpp b/clang/unittests/Format/FormatTestRawStrings.cpp index d5ba7923d5d1..941aa2ed7cec 100644 --- a/clang/unittests/Format/FormatTestRawStrings.cpp +++ b/clang/unittests/Format/FormatTestRawStrings.cpp @@ -66,10 +66,13 @@ protected: FormatStyle Style = getLLVMStyle(); Style.ColumnLimit = ColumnLimit; Style.RawStringFormats = { - {/*Language=*/FormatStyle::LK_TextProto, - /*Delimiters=*/{"pb"}, - /*EnclosingFunctions=*/{}, - /*BasedOnStyle=*/"google"}, + { + /*Language=*/FormatStyle::LK_TextProto, + /*Delimiters=*/{"pb"}, + /*EnclosingFunctions=*/{}, + /*CanonicalDelimiter=*/"", + /*BasedOnStyle=*/"google", + }, }; return Style; } @@ -77,9 +80,13 @@ protected: FormatStyle getRawStringLLVMCppStyleBasedOn(std::string BasedOnStyle) { FormatStyle Style = getLLVMStyle(); Style.RawStringFormats = { - {/*Language=*/FormatStyle::LK_Cpp, - /*Delimiters=*/{"cpp"}, - /*EnclosingFunctions=*/{}, BasedOnStyle}, + { + /*Language=*/FormatStyle::LK_Cpp, + /*Delimiters=*/{"cpp"}, + /*EnclosingFunctions=*/{}, + /*CanonicalDelimiter=*/"", + BasedOnStyle, + }, }; return Style; } @@ -87,9 +94,13 @@ protected: FormatStyle getRawStringGoogleCppStyleBasedOn(std::string BasedOnStyle) { FormatStyle Style = getGoogleStyle(FormatStyle::LK_Cpp); Style.RawStringFormats = { - {/*Language=*/FormatStyle::LK_Cpp, - /*Delimiters=*/{"cpp"}, - /*EnclosingFunctions=*/{}, BasedOnStyle}, + { + /*Language=*/FormatStyle::LK_Cpp, + /*Delimiters=*/{"cpp"}, + /*EnclosingFunctions=*/{}, + /*CanonicalDelimiter=*/"", + BasedOnStyle, + }, }; return Style; } @@ -131,7 +142,13 @@ TEST_F(FormatTestRawStrings, UsesConfigurationOverBaseStyle) { EXPECT_EQ(0, parseConfiguration("---\n" "Language: Cpp\n" "BasedOnStyle: Google", &Style).value()); - Style.RawStringFormats = {{FormatStyle::LK_Cpp, {"cpp"}, {}, "llvm"}}; + Style.RawStringFormats = {{ + FormatStyle::LK_Cpp, + {"cpp"}, + {}, + /*CanonicalDelimiter=*/"", + /*BasedOnStyle=*/"llvm", + }}; expect_eq(R"test(int* i = R"cpp(int* j = 0;)cpp";)test", format(R"test(int * i = R"cpp(int * j = 0;)cpp";)test", Style)); } @@ -752,6 +769,18 @@ a = ParseTextProto(R"(key:value)");)test", Style)); } +TEST_F(FormatTestRawStrings, UpdatesToCanonicalDelimiters) { + FormatStyle Style = getRawStringPbStyleWithColumns(25); + Style.RawStringFormats[0].CanonicalDelimiter = "proto"; + expect_eq(R"test(a = R"proto(key: value)proto";)test", + format(R"test(a = R"pb(key:value)pb";)test", Style)); + + // Don't update to canonical delimiter if it occurs as a raw string suffix in + // the raw string content. + expect_eq(R"test(a = R"pb(key: ")proto")pb";)test", + format(R"test(a = R"pb(key:")proto")pb";)test", Style)); +} + } // end namespace } // end namespace format } // end namespace clang