Don't remove backslashes from block comments.
Summary: Don't remove backslashes from block comments. Previously this /* \ \ \ \ \ \ */ would be turned to this: /* */ which spoils some kinds of ASCII-art, people use in their comments. The behavior was related to handling escaped newlines in block comments inside preprocessor directives. This patch makes handling it in a more civilized way. Reviewers: klimek Reviewed By: klimek CC: cfe-commits Differential Revision: http://llvm-reviews.chandlerc.com/D979 llvm-svn: 183978
This commit is contained in:
parent
7e325fb477
commit
be633908be
|
@ -119,13 +119,11 @@ unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
|
|||
encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
|
||||
}
|
||||
|
||||
BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
|
||||
unsigned StartColumn,
|
||||
StringRef Prefix,
|
||||
StringRef Postfix,
|
||||
encoding::Encoding Encoding)
|
||||
: BreakableToken(Tok, Encoding), StartColumn(StartColumn), Prefix(Prefix),
|
||||
Postfix(Postfix) {
|
||||
BreakableSingleLineToken::BreakableSingleLineToken(
|
||||
const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
|
||||
StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding)
|
||||
: BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn),
|
||||
Prefix(Prefix), Postfix(Postfix) {
|
||||
assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
|
||||
Line = Tok.TokenText.substr(
|
||||
Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
|
||||
|
@ -133,8 +131,10 @@ BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
|
|||
|
||||
BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
|
||||
unsigned StartColumn,
|
||||
bool InPPDirective,
|
||||
encoding::Encoding Encoding)
|
||||
: BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", Encoding) {}
|
||||
: BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
|
||||
Encoding) {}
|
||||
|
||||
BreakableToken::Split
|
||||
BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
|
||||
|
@ -145,7 +145,6 @@ BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
|
|||
|
||||
void BreakableStringLiteral::insertBreak(unsigned LineIndex,
|
||||
unsigned TailOffset, Split Split,
|
||||
bool InPPDirective,
|
||||
WhitespaceManager &Whitespaces) {
|
||||
Whitespaces.replaceWhitespaceInToken(
|
||||
Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
|
||||
|
@ -162,10 +161,11 @@ static StringRef getLineCommentPrefix(StringRef Comment) {
|
|||
|
||||
BreakableLineComment::BreakableLineComment(const FormatToken &Token,
|
||||
unsigned StartColumn,
|
||||
bool InPPDirective,
|
||||
encoding::Encoding Encoding)
|
||||
: BreakableSingleLineToken(Token, StartColumn,
|
||||
getLineCommentPrefix(Token.TokenText), "",
|
||||
Encoding) {
|
||||
InPPDirective, Encoding) {
|
||||
OriginalPrefix = Prefix;
|
||||
if (Token.TokenText.size() > Prefix.size() &&
|
||||
isAlphanumeric(Token.TokenText[Prefix.size()])) {
|
||||
|
@ -184,7 +184,7 @@ BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
|
|||
}
|
||||
|
||||
void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
|
||||
Split Split, bool InPPDirective,
|
||||
Split Split,
|
||||
WhitespaceManager &Whitespaces) {
|
||||
Whitespaces.replaceWhitespaceInToken(
|
||||
Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,
|
||||
|
@ -193,7 +193,6 @@ void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
|
|||
|
||||
void
|
||||
BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
|
||||
unsigned InPPDirective,
|
||||
WhitespaceManager &Whitespaces) {
|
||||
if (OriginalPrefix != Prefix) {
|
||||
Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
|
||||
|
@ -203,8 +202,9 @@ BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
|
|||
|
||||
BreakableBlockComment::BreakableBlockComment(
|
||||
const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,
|
||||
unsigned OriginalStartColumn, bool FirstInLine, encoding::Encoding Encoding)
|
||||
: BreakableToken(Token, Encoding) {
|
||||
unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
|
||||
encoding::Encoding Encoding)
|
||||
: BreakableToken(Token, InPPDirective, Encoding) {
|
||||
StringRef TokenText(Token.TokenText);
|
||||
assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
|
||||
TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
|
||||
|
@ -264,8 +264,18 @@ BreakableBlockComment::BreakableBlockComment(
|
|||
void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
|
||||
unsigned LineIndex,
|
||||
int IndentDelta) {
|
||||
// When in a preprocessor directive, the trailing backslash in a block comment
|
||||
// is not needed, but can serve a purpose of uniformity with necessary escaped
|
||||
// newlines outside the comment. In this case we remove it here before
|
||||
// trimming the trailing whitespace. The backslash will be re-added later when
|
||||
// inserting a line break.
|
||||
size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
|
||||
if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))
|
||||
--EndOfPreviousLine;
|
||||
|
||||
// Calculate the end of the non-whitespace text in the previous line.
|
||||
size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
|
||||
EndOfPreviousLine =
|
||||
Lines[LineIndex - 1].find_last_not_of(" \t", EndOfPreviousLine);
|
||||
if (EndOfPreviousLine == StringRef::npos)
|
||||
EndOfPreviousLine = 0;
|
||||
else
|
||||
|
@ -313,7 +323,7 @@ BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
|
|||
}
|
||||
|
||||
void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
|
||||
Split Split, bool InPPDirective,
|
||||
Split Split,
|
||||
WhitespaceManager &Whitespaces) {
|
||||
StringRef Text = Lines[LineIndex].substr(TailOffset);
|
||||
StringRef Prefix = Decoration;
|
||||
|
@ -334,7 +344,6 @@ void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
|
|||
|
||||
void
|
||||
BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
|
||||
unsigned InPPDirective,
|
||||
WhitespaceManager &Whitespaces) {
|
||||
if (LineIndex == 0)
|
||||
return;
|
||||
|
|
|
@ -59,20 +59,20 @@ public:
|
|||
|
||||
/// \brief Emits the previously retrieved \p Split via \p Whitespaces.
|
||||
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
|
||||
bool InPPDirective,
|
||||
WhitespaceManager &Whitespaces) = 0;
|
||||
|
||||
/// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
|
||||
virtual void replaceWhitespaceBefore(unsigned LineIndex,
|
||||
unsigned InPPDirective,
|
||||
WhitespaceManager &Whitespaces) {}
|
||||
|
||||
protected:
|
||||
BreakableToken(const FormatToken &Tok, encoding::Encoding Encoding)
|
||||
: Tok(Tok), Encoding(Encoding) {}
|
||||
BreakableToken(const FormatToken &Tok, bool InPPDirective,
|
||||
encoding::Encoding Encoding)
|
||||
: Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {}
|
||||
|
||||
const FormatToken &Tok;
|
||||
encoding::Encoding Encoding;
|
||||
const bool InPPDirective;
|
||||
const encoding::Encoding Encoding;
|
||||
};
|
||||
|
||||
/// \brief Base class for single line tokens that can be broken.
|
||||
|
@ -88,7 +88,7 @@ public:
|
|||
protected:
|
||||
BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
|
||||
StringRef Prefix, StringRef Postfix,
|
||||
encoding::Encoding Encoding);
|
||||
bool InPPDirective, encoding::Encoding Encoding);
|
||||
|
||||
// The column in which the token starts.
|
||||
unsigned StartColumn;
|
||||
|
@ -107,12 +107,11 @@ public:
|
|||
/// \p StartColumn specifies the column in which the token will start
|
||||
/// after formatting.
|
||||
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
|
||||
encoding::Encoding Encoding);
|
||||
bool InPPDirective, encoding::Encoding Encoding);
|
||||
|
||||
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
|
||||
unsigned ColumnLimit) const;
|
||||
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
|
||||
bool InPPDirective,
|
||||
WhitespaceManager &Whitespaces);
|
||||
};
|
||||
|
||||
|
@ -123,14 +122,13 @@ public:
|
|||
/// \p StartColumn specifies the column in which the comment will start
|
||||
/// after formatting.
|
||||
BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
|
||||
encoding::Encoding Encoding);
|
||||
bool InPPDirective, encoding::Encoding Encoding);
|
||||
|
||||
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
|
||||
unsigned ColumnLimit) const;
|
||||
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
|
||||
bool InPPDirective, WhitespaceManager &Whitespaces);
|
||||
WhitespaceManager &Whitespaces);
|
||||
virtual void replaceWhitespaceBefore(unsigned LineIndex,
|
||||
unsigned InPPDirective,
|
||||
WhitespaceManager &Whitespaces);
|
||||
|
||||
private:
|
||||
|
@ -148,7 +146,8 @@ public:
|
|||
/// If the comment starts a line after formatting, set \p FirstInLine to true.
|
||||
BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
|
||||
unsigned StartColumn, unsigned OriginaStartColumn,
|
||||
bool FirstInLine, encoding::Encoding Encoding);
|
||||
bool FirstInLine, bool InPPDirective,
|
||||
encoding::Encoding Encoding);
|
||||
|
||||
virtual unsigned getLineCount() const;
|
||||
virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
|
||||
|
@ -157,9 +156,8 @@ public:
|
|||
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
|
||||
unsigned ColumnLimit) const;
|
||||
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
|
||||
bool InPPDirective, WhitespaceManager &Whitespaces);
|
||||
WhitespaceManager &Whitespaces);
|
||||
virtual void replaceWhitespaceBefore(unsigned LineIndex,
|
||||
unsigned InPPDirective,
|
||||
WhitespaceManager &Whitespaces);
|
||||
|
||||
private:
|
||||
|
|
|
@ -804,7 +804,6 @@ private:
|
|||
/// cover the cost of the additional line breaks.
|
||||
unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
|
||||
bool DryRun) {
|
||||
unsigned UnbreakableTailLength = Current.UnbreakableTailLength;
|
||||
llvm::OwningPtr<BreakableToken> Token;
|
||||
unsigned StartColumn = State.Column - Current.CodePointCount;
|
||||
unsigned OriginalStartColumn =
|
||||
|
@ -814,37 +813,34 @@ private:
|
|||
if (Current.is(tok::string_literal) &&
|
||||
Current.Type != TT_ImplicitStringLiteral) {
|
||||
// Only break up default narrow strings.
|
||||
const char *LiteralData =
|
||||
SourceMgr.getCharacterData(Current.getStartOfNonWhitespace());
|
||||
if (!LiteralData || *LiteralData != '"')
|
||||
if (!Current.TokenText.startswith("\""))
|
||||
return 0;
|
||||
|
||||
Token.reset(new BreakableStringLiteral(Current, StartColumn, Encoding));
|
||||
Token.reset(new BreakableStringLiteral(Current, StartColumn,
|
||||
Line.InPPDirective, Encoding));
|
||||
} else if (Current.Type == TT_BlockComment) {
|
||||
BreakableBlockComment *BBC = new BreakableBlockComment(
|
||||
Token.reset(new BreakableBlockComment(
|
||||
Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
|
||||
Encoding);
|
||||
Token.reset(BBC);
|
||||
Line.InPPDirective, Encoding));
|
||||
} else if (Current.Type == TT_LineComment &&
|
||||
(Current.Previous == NULL ||
|
||||
Current.Previous->Type != TT_ImplicitStringLiteral)) {
|
||||
Token.reset(new BreakableLineComment(Current, StartColumn, Encoding));
|
||||
Token.reset(new BreakableLineComment(Current, StartColumn,
|
||||
Line.InPPDirective, Encoding));
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
if (UnbreakableTailLength >= getColumnLimit())
|
||||
if (Current.UnbreakableTailLength >= getColumnLimit())
|
||||
return 0;
|
||||
unsigned RemainingSpace = getColumnLimit() - UnbreakableTailLength;
|
||||
unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength;
|
||||
|
||||
bool BreakInserted = false;
|
||||
unsigned Penalty = 0;
|
||||
unsigned PositionAfterLastLineInToken = 0;
|
||||
for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
|
||||
LineIndex != EndIndex; ++LineIndex) {
|
||||
if (!DryRun) {
|
||||
Token->replaceWhitespaceBefore(LineIndex, Line.InPPDirective,
|
||||
Whitespaces);
|
||||
}
|
||||
if (!DryRun)
|
||||
Token->replaceWhitespaceBefore(LineIndex, Whitespaces);
|
||||
unsigned TailOffset = 0;
|
||||
unsigned RemainingTokenColumns = Token->getLineLengthAfterSplit(
|
||||
LineIndex, TailOffset, StringRef::npos);
|
||||
|
@ -858,10 +854,8 @@ private:
|
|||
LineIndex, TailOffset + Split.first + Split.second,
|
||||
StringRef::npos);
|
||||
assert(NewRemainingTokenColumns < RemainingTokenColumns);
|
||||
if (!DryRun) {
|
||||
Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
|
||||
Whitespaces);
|
||||
}
|
||||
if (!DryRun)
|
||||
Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
|
||||
Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString
|
||||
: Style.PenaltyBreakComment;
|
||||
unsigned ColumnsUsed =
|
||||
|
|
|
@ -1961,6 +1961,10 @@ TEST_F(FormatTest, EscapedNewlineAtStartOfToken) {
|
|||
EXPECT_EQ("template <class T> f();", format("\\\ntemplate <class T> f();"));
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, NoEscapedNewlineHandlingInBlockComments) {
|
||||
EXPECT_EQ("/* \\ \\ \\\n*/", format("\\\n/* \\ \\ \\\n*/"));
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, CalculateSpaceOnConsecutiveLinesInMacro) {
|
||||
verifyFormat("#define A \\\n"
|
||||
" int v( \\\n"
|
||||
|
@ -4605,6 +4609,19 @@ TEST_F(FormatTest, BreakStringLiterals) {
|
|||
format("#define A \"some text other\";", AlignLeft));
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, SkipsUnknownStringLiterals) {
|
||||
EXPECT_EQ("u8\"unsupported literal\";",
|
||||
format("u8\"unsupported literal\";", getLLVMStyleWithColumns(15)));
|
||||
EXPECT_EQ("u\"unsupported literal\";",
|
||||
format("u\"unsupported literal\";", getLLVMStyleWithColumns(15)));
|
||||
EXPECT_EQ("U\"unsupported literal\";",
|
||||
format("U\"unsupported literal\";", getLLVMStyleWithColumns(15)));
|
||||
EXPECT_EQ("L\"unsupported literal\";",
|
||||
format("L\"unsupported literal\";", getLLVMStyleWithColumns(15)));
|
||||
EXPECT_EQ("R\"x(raw literal)x\";",
|
||||
format("R\"x(raw literal)x\";", getLLVMStyleWithColumns(15)));
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, BreakStringLiteralsBeforeUnbreakableTokenSequence) {
|
||||
EXPECT_EQ("someFunction(\"aaabbbcccd\"\n"
|
||||
" \"ddeeefff\");",
|
||||
|
|
Loading…
Reference in New Issue