From be633908be11c87e0fe7fc295fc617e54396ca9a Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Fri, 14 Jun 2013 11:46:10 +0000 Subject: [PATCH] Don't remove backslashes from block comments. Summary: Don't remove backslashes from block comments. Previously this /* \ \ \ \ \ \ */ would be turned to this: /* */ which spoils some kinds of ASCII-art, people use in their comments. The behavior was related to handling escaped newlines in block comments inside preprocessor directives. This patch makes handling it in a more civilized way. Reviewers: klimek Reviewed By: klimek CC: cfe-commits Differential Revision: http://llvm-reviews.chandlerc.com/D979 llvm-svn: 183978 --- clang/lib/Format/BreakableToken.cpp | 43 ++++++++++++++++----------- clang/lib/Format/BreakableToken.h | 26 ++++++++-------- clang/lib/Format/Format.cpp | 32 ++++++++------------ clang/unittests/Format/FormatTest.cpp | 17 +++++++++++ 4 files changed, 68 insertions(+), 50 deletions(-) diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index d915aef2f947..48c1dee5bd7f 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -119,13 +119,11 @@ unsigned BreakableSingleLineToken::getLineLengthAfterSplit( encoding::getCodePointCount(Line.substr(Offset, Length), Encoding); } -BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok, - unsigned StartColumn, - StringRef Prefix, - StringRef Postfix, - encoding::Encoding Encoding) - : BreakableToken(Tok, Encoding), StartColumn(StartColumn), Prefix(Prefix), - Postfix(Postfix) { +BreakableSingleLineToken::BreakableSingleLineToken( + const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, + StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding) + : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn), + Prefix(Prefix), Postfix(Postfix) { assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); @@ -133,8 +131,10 @@ BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok, BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, + bool InPPDirective, encoding::Encoding Encoding) - : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", Encoding) {} + : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective, + Encoding) {} BreakableToken::Split BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, @@ -145,7 +145,6 @@ BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, void BreakableStringLiteral::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces) { Whitespaces.replaceWhitespaceInToken( Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, @@ -162,10 +161,11 @@ static StringRef getLineCommentPrefix(StringRef Comment) { BreakableLineComment::BreakableLineComment(const FormatToken &Token, unsigned StartColumn, + bool InPPDirective, encoding::Encoding Encoding) : BreakableSingleLineToken(Token, StartColumn, getLineCommentPrefix(Token.TokenText), "", - Encoding) { + InPPDirective, Encoding) { OriginalPrefix = Prefix; if (Token.TokenText.size() > Prefix.size() && isAlphanumeric(Token.TokenText[Prefix.size()])) { @@ -184,7 +184,7 @@ BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, } void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, - Split Split, bool InPPDirective, + Split Split, WhitespaceManager &Whitespaces) { Whitespaces.replaceWhitespaceInToken( Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, @@ -193,7 +193,6 @@ void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, void BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, - unsigned InPPDirective, WhitespaceManager &Whitespaces) { if (OriginalPrefix != Prefix) { Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", @@ -203,8 +202,9 @@ BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, BreakableBlockComment::BreakableBlockComment( const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn, - unsigned OriginalStartColumn, bool FirstInLine, encoding::Encoding Encoding) - : BreakableToken(Token, Encoding) { + unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, + encoding::Encoding Encoding) + : BreakableToken(Token, InPPDirective, Encoding) { StringRef TokenText(Token.TokenText); assert(TokenText.startswith("/*") && TokenText.endswith("*/")); TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); @@ -264,8 +264,18 @@ BreakableBlockComment::BreakableBlockComment( void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, unsigned LineIndex, int IndentDelta) { + // When in a preprocessor directive, the trailing backslash in a block comment + // is not needed, but can serve a purpose of uniformity with necessary escaped + // newlines outside the comment. In this case we remove it here before + // trimming the trailing whitespace. The backslash will be re-added later when + // inserting a line break. + size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); + if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) + --EndOfPreviousLine; + // Calculate the end of the non-whitespace text in the previous line. - size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t"); + EndOfPreviousLine = + Lines[LineIndex - 1].find_last_not_of(" \t", EndOfPreviousLine); if (EndOfPreviousLine == StringRef::npos) EndOfPreviousLine = 0; else @@ -313,7 +323,7 @@ BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, } void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, - Split Split, bool InPPDirective, + Split Split, WhitespaceManager &Whitespaces) { StringRef Text = Lines[LineIndex].substr(TailOffset); StringRef Prefix = Decoration; @@ -334,7 +344,6 @@ void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, void BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, - unsigned InPPDirective, WhitespaceManager &Whitespaces) { if (LineIndex == 0) return; diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h index 4c8755b95b50..9dab473d8a84 100644 --- a/clang/lib/Format/BreakableToken.h +++ b/clang/lib/Format/BreakableToken.h @@ -59,20 +59,20 @@ public: /// \brief Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces) = 0; /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. virtual void replaceWhitespaceBefore(unsigned LineIndex, - unsigned InPPDirective, WhitespaceManager &Whitespaces) {} protected: - BreakableToken(const FormatToken &Tok, encoding::Encoding Encoding) - : Tok(Tok), Encoding(Encoding) {} + BreakableToken(const FormatToken &Tok, bool InPPDirective, + encoding::Encoding Encoding) + : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {} const FormatToken &Tok; - encoding::Encoding Encoding; + const bool InPPDirective; + const encoding::Encoding Encoding; }; /// \brief Base class for single line tokens that can be broken. @@ -88,7 +88,7 @@ public: protected: BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, - encoding::Encoding Encoding); + bool InPPDirective, encoding::Encoding Encoding); // The column in which the token starts. unsigned StartColumn; @@ -107,12 +107,11 @@ public: /// \p StartColumn specifies the column in which the token will start /// after formatting. BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, - encoding::Encoding Encoding); + bool InPPDirective, encoding::Encoding Encoding); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const; virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces); }; @@ -123,14 +122,13 @@ public: /// \p StartColumn specifies the column in which the comment will start /// after formatting. BreakableLineComment(const FormatToken &Token, unsigned StartColumn, - encoding::Encoding Encoding); + bool InPPDirective, encoding::Encoding Encoding); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const; virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces); + WhitespaceManager &Whitespaces); virtual void replaceWhitespaceBefore(unsigned LineIndex, - unsigned InPPDirective, WhitespaceManager &Whitespaces); private: @@ -148,7 +146,8 @@ public: /// If the comment starts a line after formatting, set \p FirstInLine to true. BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn, unsigned OriginaStartColumn, - bool FirstInLine, encoding::Encoding Encoding); + bool FirstInLine, bool InPPDirective, + encoding::Encoding Encoding); virtual unsigned getLineCount() const; virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, @@ -157,9 +156,8 @@ public: virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const; virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces); + WhitespaceManager &Whitespaces); virtual void replaceWhitespaceBefore(unsigned LineIndex, - unsigned InPPDirective, WhitespaceManager &Whitespaces); private: diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index e0a05b64ecc6..92d58ba04c7b 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -804,7 +804,6 @@ private: /// cover the cost of the additional line breaks. unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun) { - unsigned UnbreakableTailLength = Current.UnbreakableTailLength; llvm::OwningPtr Token; unsigned StartColumn = State.Column - Current.CodePointCount; unsigned OriginalStartColumn = @@ -814,37 +813,34 @@ private: if (Current.is(tok::string_literal) && Current.Type != TT_ImplicitStringLiteral) { // Only break up default narrow strings. - const char *LiteralData = - SourceMgr.getCharacterData(Current.getStartOfNonWhitespace()); - if (!LiteralData || *LiteralData != '"') + if (!Current.TokenText.startswith("\"")) return 0; - Token.reset(new BreakableStringLiteral(Current, StartColumn, Encoding)); + Token.reset(new BreakableStringLiteral(Current, StartColumn, + Line.InPPDirective, Encoding)); } else if (Current.Type == TT_BlockComment) { - BreakableBlockComment *BBC = new BreakableBlockComment( + Token.reset(new BreakableBlockComment( Style, Current, StartColumn, OriginalStartColumn, !Current.Previous, - Encoding); - Token.reset(BBC); + Line.InPPDirective, Encoding)); } else if (Current.Type == TT_LineComment && (Current.Previous == NULL || Current.Previous->Type != TT_ImplicitStringLiteral)) { - Token.reset(new BreakableLineComment(Current, StartColumn, Encoding)); + Token.reset(new BreakableLineComment(Current, StartColumn, + Line.InPPDirective, Encoding)); } else { return 0; } - if (UnbreakableTailLength >= getColumnLimit()) + if (Current.UnbreakableTailLength >= getColumnLimit()) return 0; - unsigned RemainingSpace = getColumnLimit() - UnbreakableTailLength; + unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength; bool BreakInserted = false; unsigned Penalty = 0; unsigned PositionAfterLastLineInToken = 0; for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); LineIndex != EndIndex; ++LineIndex) { - if (!DryRun) { - Token->replaceWhitespaceBefore(LineIndex, Line.InPPDirective, - Whitespaces); - } + if (!DryRun) + Token->replaceWhitespaceBefore(LineIndex, Whitespaces); unsigned TailOffset = 0; unsigned RemainingTokenColumns = Token->getLineLengthAfterSplit( LineIndex, TailOffset, StringRef::npos); @@ -858,10 +854,8 @@ private: LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); assert(NewRemainingTokenColumns < RemainingTokenColumns); - if (!DryRun) { - Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective, - Whitespaces); - } + if (!DryRun) + Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString : Style.PenaltyBreakComment; unsigned ColumnsUsed = diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index c8bd945380a5..32f9d95d6680 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -1961,6 +1961,10 @@ TEST_F(FormatTest, EscapedNewlineAtStartOfToken) { EXPECT_EQ("template f();", format("\\\ntemplate f();")); } +TEST_F(FormatTest, NoEscapedNewlineHandlingInBlockComments) { + EXPECT_EQ("/* \\ \\ \\\n*/", format("\\\n/* \\ \\ \\\n*/")); +} + TEST_F(FormatTest, CalculateSpaceOnConsecutiveLinesInMacro) { verifyFormat("#define A \\\n" " int v( \\\n" @@ -4605,6 +4609,19 @@ TEST_F(FormatTest, BreakStringLiterals) { format("#define A \"some text other\";", AlignLeft)); } +TEST_F(FormatTest, SkipsUnknownStringLiterals) { + EXPECT_EQ("u8\"unsupported literal\";", + format("u8\"unsupported literal\";", getLLVMStyleWithColumns(15))); + EXPECT_EQ("u\"unsupported literal\";", + format("u\"unsupported literal\";", getLLVMStyleWithColumns(15))); + EXPECT_EQ("U\"unsupported literal\";", + format("U\"unsupported literal\";", getLLVMStyleWithColumns(15))); + EXPECT_EQ("L\"unsupported literal\";", + format("L\"unsupported literal\";", getLLVMStyleWithColumns(15))); + EXPECT_EQ("R\"x(raw literal)x\";", + format("R\"x(raw literal)x\";", getLLVMStyleWithColumns(15))); +} + TEST_F(FormatTest, BreakStringLiteralsBeforeUnbreakableTokenSequence) { EXPECT_EQ("someFunction(\"aaabbbcccd\"\n" " \"ddeeefff\");",