Calculate and store ColumnWidth instead of CodePointCount in FormatTokens.
Summary: This fixes various issues with mixed tabs and spaces handling, e.g. when realigning block comments. Reviewers: klimek, djasper Reviewed By: djasper CC: cfe-commits Differential Revision: http://llvm-reviews.chandlerc.com/D1608 llvm-svn: 190395
This commit is contained in:
parent
3767ccf318
commit
39856b71a6
|
@ -337,8 +337,10 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
|
|||
// if leading tabs are intermixed with spaces, that is not a high priority.
|
||||
|
||||
// Adjust the start column uniformly accross all lines.
|
||||
StartOfLineColumn[LineIndex] =
|
||||
std::max<int>(0, Whitespace.size() + IndentDelta);
|
||||
StartOfLineColumn[LineIndex] = std::max<int>(
|
||||
0,
|
||||
encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
|
||||
IndentDelta);
|
||||
}
|
||||
|
||||
unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
|
||||
|
|
|
@ -201,7 +201,7 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
|
|||
State.NextToken->WhitespaceRange.getEnd()) -
|
||||
SourceMgr.getSpellingColumnNumber(
|
||||
State.NextToken->WhitespaceRange.getBegin());
|
||||
State.Column += WhitespaceLength + State.NextToken->CodePointCount;
|
||||
State.Column += WhitespaceLength + State.NextToken->ColumnWidth;
|
||||
State.NextToken = State.NextToken->Next;
|
||||
return 0;
|
||||
}
|
||||
|
@ -257,11 +257,11 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
|
|||
State.Line->StartsDefinition))) {
|
||||
State.Column = State.Stack.back().Indent;
|
||||
} else if (Current.Type == TT_ObjCSelectorName) {
|
||||
if (State.Stack.back().ColonPos > Current.CodePointCount) {
|
||||
State.Column = State.Stack.back().ColonPos - Current.CodePointCount;
|
||||
if (State.Stack.back().ColonPos > Current.ColumnWidth) {
|
||||
State.Column = State.Stack.back().ColonPos - Current.ColumnWidth;
|
||||
} else {
|
||||
State.Column = State.Stack.back().Indent;
|
||||
State.Stack.back().ColonPos = State.Column + Current.CodePointCount;
|
||||
State.Stack.back().ColonPos = State.Column + Current.ColumnWidth;
|
||||
}
|
||||
} else if (Current.is(tok::l_square) && Current.Type != TT_ObjCMethodExpr &&
|
||||
Current.Type != TT_LambdaLSquare) {
|
||||
|
@ -307,7 +307,7 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
|
|||
if (!Current.isTrailingComment())
|
||||
State.Stack.back().LastSpace = State.Column;
|
||||
if (Current.isMemberAccess())
|
||||
State.Stack.back().LastSpace += Current.CodePointCount;
|
||||
State.Stack.back().LastSpace += Current.ColumnWidth;
|
||||
State.StartOfLineLevel = State.ParenLevel;
|
||||
State.LowestLevelOnLine = State.ParenLevel;
|
||||
|
||||
|
@ -343,8 +343,8 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
|
|||
State.Stack.back().VariablePos = State.Column;
|
||||
// Move over * and & if they are bound to the variable name.
|
||||
const FormatToken *Tok = &Previous;
|
||||
while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) {
|
||||
State.Stack.back().VariablePos -= Tok->CodePointCount;
|
||||
while (Tok && State.Stack.back().VariablePos >= Tok->ColumnWidth) {
|
||||
State.Stack.back().VariablePos -= Tok->ColumnWidth;
|
||||
if (Tok->SpacesRequiredBefore != 0)
|
||||
break;
|
||||
Tok = Tok->Previous;
|
||||
|
@ -361,12 +361,12 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
|
|||
if (Current.Type == TT_ObjCSelectorName &&
|
||||
State.Stack.back().ColonPos == 0) {
|
||||
if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
|
||||
State.Column + Spaces + Current.CodePointCount)
|
||||
State.Column + Spaces + Current.ColumnWidth)
|
||||
State.Stack.back().ColonPos =
|
||||
State.Stack.back().Indent + Current.LongestObjCSelectorName;
|
||||
else
|
||||
State.Stack.back().ColonPos =
|
||||
State.Column + Spaces + Current.CodePointCount;
|
||||
State.Column + Spaces + Current.ColumnWidth;
|
||||
}
|
||||
|
||||
if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr &&
|
||||
|
@ -436,7 +436,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
|
|||
std::min(State.LowestLevelOnLine, State.ParenLevel);
|
||||
if (Current.isMemberAccess())
|
||||
State.Stack.back().StartOfFunctionCall =
|
||||
Current.LastInChainOfCalls ? 0 : State.Column + Current.CodePointCount;
|
||||
Current.LastInChainOfCalls ? 0 : State.Column + Current.ColumnWidth;
|
||||
if (Current.Type == TT_CtorInitializerColon) {
|
||||
// Indent 2 from the column, so:
|
||||
// SomeClass::SomeClass()
|
||||
|
@ -592,7 +592,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
|
|||
State.StartOfStringLiteral = 0;
|
||||
}
|
||||
|
||||
State.Column += Current.CodePointCount;
|
||||
State.Column += Current.ColumnWidth;
|
||||
State.NextToken = State.NextToken->Next;
|
||||
unsigned Penalty = breakProtrudingToken(Current, State, DryRun);
|
||||
if (State.Column > getColumnLimit(State)) {
|
||||
|
@ -618,8 +618,7 @@ ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
|
|||
for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
|
||||
State.Stack[i].BreakBeforeParameter = true;
|
||||
|
||||
unsigned ColumnsUsed =
|
||||
State.Column - Current.CodePointCount + Current.FirstLineColumnWidth;
|
||||
unsigned ColumnsUsed = State.Column;
|
||||
// We can only affect layout of the first and the last line, so the penalty
|
||||
// for all other lines is constant, and we ignore it.
|
||||
State.Column = Current.LastLineColumnWidth;
|
||||
|
@ -636,14 +635,14 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
|||
return 0;
|
||||
|
||||
llvm::OwningPtr<BreakableToken> Token;
|
||||
unsigned StartColumn = State.Column - Current.CodePointCount;
|
||||
unsigned StartColumn = State.Column - Current.ColumnWidth;
|
||||
|
||||
if (Current.is(tok::string_literal) &&
|
||||
Current.Type != TT_ImplicitStringLiteral) {
|
||||
// Don't break string literals with (in case of non-raw strings, escaped)
|
||||
// newlines. As clang-format must not change the string's content, it is
|
||||
// unlikely that we'll end up with a better format.
|
||||
if (Current.isMultiline())
|
||||
if (Current.IsMultiline)
|
||||
return addMultilineStringLiteral(Current, State);
|
||||
|
||||
// Only break up default narrow strings.
|
||||
|
@ -657,11 +656,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
|||
Token.reset(new BreakableStringLiteral(
|
||||
Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
|
||||
} else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
|
||||
unsigned OriginalStartColumn =
|
||||
SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
|
||||
1;
|
||||
Token.reset(new BreakableBlockComment(
|
||||
Current, StartColumn, OriginalStartColumn, !Current.Previous,
|
||||
Current, StartColumn, Current.OriginalColumn, !Current.Previous,
|
||||
State.Line->InPPDirective, Encoding, Style));
|
||||
} else if (Current.Type == TT_LineComment &&
|
||||
(Current.Previous == NULL ||
|
||||
|
@ -673,10 +669,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
|||
// FIXME: If we want to handle them correctly, we'll need to adjust
|
||||
// leading whitespace in consecutive lines when changing indentation of
|
||||
// the first line similar to what we do with block comments.
|
||||
if (Current.isMultiline()) {
|
||||
State.Column = StartColumn + Current.FirstLineColumnWidth;
|
||||
if (Current.IsMultiline)
|
||||
return 0;
|
||||
}
|
||||
|
||||
Token.reset(new BreakableLineComment(
|
||||
Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
|
||||
|
@ -759,12 +753,12 @@ bool ContinuationIndenter::NextIsMultilineString(const LineState &State) {
|
|||
// AlwaysBreakBeforeMultilineStrings implementation.
|
||||
if (Current.TokenText.startswith("R\""))
|
||||
return false;
|
||||
if (Current.isMultiline())
|
||||
if (Current.IsMultiline)
|
||||
return true;
|
||||
if (Current.getNextNonComment() &&
|
||||
Current.getNextNonComment()->is(tok::string_literal))
|
||||
return true; // Implicit concatenation.
|
||||
if (State.Column + Current.CodePointCount + Current.UnbreakableTailLength >
|
||||
if (State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
|
||||
Style.ColumnLimit)
|
||||
return true; // String will be split.
|
||||
return false;
|
||||
|
|
|
@ -610,7 +610,7 @@ private:
|
|||
FormatTok->WhitespaceRange =
|
||||
SourceRange(GreaterLocation, GreaterLocation);
|
||||
FormatTok->TokenText = ">";
|
||||
FormatTok->CodePointCount = 1;
|
||||
FormatTok->ColumnWidth = 1;
|
||||
GreaterStashed = false;
|
||||
return FormatTok;
|
||||
}
|
||||
|
@ -666,6 +666,10 @@ private:
|
|||
Column = 0;
|
||||
FormatTok->TokenText = FormatTok->TokenText.substr(2);
|
||||
}
|
||||
|
||||
FormatTok->WhitespaceRange = SourceRange(
|
||||
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
|
||||
|
||||
FormatTok->OriginalColumn = Column;
|
||||
|
||||
TrailingWhitespace = 0;
|
||||
|
@ -685,24 +689,29 @@ private:
|
|||
}
|
||||
|
||||
// Now FormatTok is the next non-whitespace token.
|
||||
FormatTok->CodePointCount =
|
||||
encoding::getCodePointCount(FormatTok->TokenText, Encoding);
|
||||
|
||||
if (FormatTok->isOneOf(tok::string_literal, tok::comment)) {
|
||||
StringRef Text = FormatTok->TokenText;
|
||||
size_t FirstNewlinePos = Text.find('\n');
|
||||
if (FirstNewlinePos != StringRef::npos) {
|
||||
// FIXME: Handle embedded tabs.
|
||||
FormatTok->FirstLineColumnWidth = encoding::columnWidthWithTabs(
|
||||
Text.substr(0, FirstNewlinePos), 0, Style.TabWidth, Encoding);
|
||||
FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
|
||||
Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
|
||||
Encoding);
|
||||
}
|
||||
StringRef Text = FormatTok->TokenText;
|
||||
size_t FirstNewlinePos = Text.find('\n');
|
||||
if (FirstNewlinePos != StringRef::npos) {
|
||||
FormatTok->IsMultiline = true;
|
||||
// The last line of the token always starts in column 0.
|
||||
// Thus, the length can be precomputed even in the presence of tabs.
|
||||
FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
|
||||
Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
|
||||
Encoding);
|
||||
Text = Text.substr(0, FirstNewlinePos);
|
||||
}
|
||||
// FIXME: Add the CodePointCount to Column.
|
||||
FormatTok->WhitespaceRange = SourceRange(
|
||||
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
|
||||
|
||||
// FIXME: ColumnWidth actually depends on the start column, we need to
|
||||
// take this into account when the token is moved.
|
||||
FormatTok->ColumnWidth =
|
||||
encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
|
||||
|
||||
// FIXME: For multi-line tokens this should be LastLineColumnWidth.
|
||||
// For line comments this should probably be zero. But before changing,
|
||||
// we need good tests for this.
|
||||
Column += FormatTok->ColumnWidth;
|
||||
|
||||
return FormatTok;
|
||||
}
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ unsigned CommaSeparatedList::format(LineState &State,
|
|||
// Calculate the number of code points we have to format this list. As the
|
||||
// first token is already placed, we have to subtract it.
|
||||
unsigned RemainingCodePoints = Style.ColumnLimit - State.Column +
|
||||
State.NextToken->Previous->CodePointCount;
|
||||
State.NextToken->Previous->ColumnWidth;
|
||||
|
||||
// Find the best ColumnFormat, i.e. the best number of columns to use.
|
||||
const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
|
||||
|
@ -82,7 +82,7 @@ unsigned CommaSeparatedList::format(LineState &State,
|
|||
static unsigned CodePointsBetween(const FormatToken *Begin,
|
||||
const FormatToken *End) {
|
||||
assert(End->TotalLength >= Begin->TotalLength);
|
||||
return End->TotalLength - Begin->TotalLength + Begin->CodePointCount;
|
||||
return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
|
||||
}
|
||||
|
||||
void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
|
||||
|
|
|
@ -83,7 +83,7 @@ class AnnotatedLine;
|
|||
struct FormatToken {
|
||||
FormatToken()
|
||||
: NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
|
||||
CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0),
|
||||
ColumnWidth(0), LastLineColumnWidth(0), IsMultiline(false),
|
||||
IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
|
||||
BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
|
||||
CanBreakBefore(false), ClosesTemplateDeclaration(false),
|
||||
|
@ -114,22 +114,17 @@ struct FormatToken {
|
|||
/// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
|
||||
unsigned LastNewlineOffset;
|
||||
|
||||
/// \brief The length of the non-whitespace parts of the token in CodePoints.
|
||||
/// \brief The width of the non-whitespace parts of the token (or its first
|
||||
/// line for multi-line tokens) in columns.
|
||||
/// We need this to correctly measure number of columns a token spans.
|
||||
unsigned CodePointCount;
|
||||
unsigned ColumnWidth;
|
||||
|
||||
/// \brief Contains the number of code points in the first line of a
|
||||
/// multi-line string literal or comment. Zero if there's no newline in the
|
||||
/// \brief Contains the width in columns of the last line of a multi-line
|
||||
/// token.
|
||||
unsigned FirstLineColumnWidth;
|
||||
|
||||
/// \brief Contains the number of code points in the last line of a
|
||||
/// multi-line string literal or comment. Can be zero for line comments.
|
||||
unsigned LastLineColumnWidth;
|
||||
|
||||
/// \brief Returns \c true if the token text contains newlines (escaped or
|
||||
/// not).
|
||||
bool isMultiline() const { return FirstLineColumnWidth != 0; }
|
||||
/// \brief Whether the token text contains newlines (escaped or not).
|
||||
bool IsMultiline;
|
||||
|
||||
/// \brief Indicates that this is the first token.
|
||||
bool IsFirst;
|
||||
|
@ -189,12 +184,8 @@ struct FormatToken {
|
|||
/// token.
|
||||
unsigned TotalLength;
|
||||
|
||||
/// \brief The original column of this token, including expanded tabs.
|
||||
/// The configured IndentWidth is used as tab width. Only tabs in whitespace
|
||||
/// are expanded.
|
||||
/// FIXME: This is currently only used on the first token of an unwrapped
|
||||
/// line, and the implementation is not correct for other tokens (see the
|
||||
/// FIXMEs in FormatTokenLexer::getNextToken()).
|
||||
/// \brief The original 0-based column of this token, including expanded tabs.
|
||||
/// The configured TabWidth is used as tab width.
|
||||
unsigned OriginalColumn;
|
||||
|
||||
/// \brief The length of following tokens until the next natural split point,
|
||||
|
|
|
@ -326,10 +326,10 @@ private:
|
|||
Line.First->Type == TT_ObjCMethodSpecifier) {
|
||||
Tok->Type = TT_ObjCMethodExpr;
|
||||
Tok->Previous->Type = TT_ObjCSelectorName;
|
||||
if (Tok->Previous->CodePointCount >
|
||||
if (Tok->Previous->ColumnWidth >
|
||||
Contexts.back().LongestObjCSelectorName) {
|
||||
Contexts.back().LongestObjCSelectorName =
|
||||
Tok->Previous->CodePointCount;
|
||||
Tok->Previous->ColumnWidth;
|
||||
}
|
||||
if (Contexts.back().FirstObjCSelectorName == NULL)
|
||||
Contexts.back().FirstObjCSelectorName = Tok->Previous;
|
||||
|
@ -1022,7 +1022,8 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
|
|||
}
|
||||
|
||||
void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
|
||||
Line.First->TotalLength = Line.First->CodePointCount;
|
||||
Line.First->TotalLength =
|
||||
Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
|
||||
if (!Line.First->Next)
|
||||
return;
|
||||
FormatToken *Current = Line.First->Next;
|
||||
|
@ -1055,11 +1056,11 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
|
|||
Current->CanBreakBefore =
|
||||
Current->MustBreakBefore || canBreakBefore(Line, *Current);
|
||||
if (Current->MustBreakBefore || !Current->Children.empty() ||
|
||||
(Current->is(tok::string_literal) && Current->isMultiline()))
|
||||
Current->IsMultiline)
|
||||
Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
|
||||
else
|
||||
Current->TotalLength = Current->Previous->TotalLength +
|
||||
Current->CodePointCount +
|
||||
Current->ColumnWidth +
|
||||
Current->SpacesRequiredBefore;
|
||||
// FIXME: Only calculate this if CanBreakBefore is true once static
|
||||
// initializers etc. are sorted out.
|
||||
|
@ -1095,7 +1096,7 @@ void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
|
|||
UnbreakableTailLength = 0;
|
||||
} else {
|
||||
UnbreakableTailLength +=
|
||||
Current->CodePointCount + Current->SpacesRequiredBefore;
|
||||
Current->ColumnWidth + Current->SpacesRequiredBefore;
|
||||
}
|
||||
Current = Current->Previous;
|
||||
}
|
||||
|
|
|
@ -5738,33 +5738,48 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
|
|||
// FIXME: To correctly count mixed whitespace we need to
|
||||
// also correctly count mixed whitespace in front of the comment.
|
||||
|
||||
// Tab.TabWidth = 8;
|
||||
// Tab.IndentWidth = 8;
|
||||
// EXPECT_EQ("/*\n"
|
||||
// "\t a\t\tcomment\n"
|
||||
// "\t in multiple lines\n"
|
||||
// " */",
|
||||
// format(" /*\t \t \n"
|
||||
// " \t \t a\t\tcomment\t \t\n"
|
||||
// " \t \t in multiple lines\t\n"
|
||||
// " \t */",
|
||||
// Tab));
|
||||
// Tab.UseTab = false;
|
||||
// EXPECT_EQ("/*\n"
|
||||
// " a\t\tcomment\n"
|
||||
// " in multiple lines\n"
|
||||
// " */",
|
||||
// format(" /*\t \t \n"
|
||||
// " \t \t a\t\tcomment\t \t\n"
|
||||
// " \t \t in multiple lines\t\n"
|
||||
// " \t */",
|
||||
// Tab));
|
||||
// EXPECT_EQ("/* some\n"
|
||||
// " comment */",
|
||||
// format(" \t \t /* some\n"
|
||||
// " \t \t comment */",
|
||||
// Tab));
|
||||
Tab.TabWidth = 8;
|
||||
Tab.IndentWidth = 8;
|
||||
EXPECT_EQ("/*\n"
|
||||
"\t a\t\tcomment\n"
|
||||
"\t in multiple lines\n"
|
||||
" */",
|
||||
format(" /*\t \t \n"
|
||||
" \t \t a\t\tcomment\t \t\n"
|
||||
" \t \t in multiple lines\t\n"
|
||||
" \t */",
|
||||
Tab));
|
||||
Tab.UseTab = false;
|
||||
EXPECT_EQ("/*\n"
|
||||
" a\t\tcomment\n"
|
||||
" in multiple lines\n"
|
||||
" */",
|
||||
format(" /*\t \t \n"
|
||||
" \t \t a\t\tcomment\t \t\n"
|
||||
" \t \t in multiple lines\t\n"
|
||||
" \t */",
|
||||
Tab));
|
||||
EXPECT_EQ("/* some\n"
|
||||
" comment */",
|
||||
format(" \t \t /* some\n"
|
||||
" \t \t comment */",
|
||||
Tab));
|
||||
EXPECT_EQ("int a; /* some\n"
|
||||
" comment */",
|
||||
format(" \t \t int a; /* some\n"
|
||||
" \t \t comment */",
|
||||
Tab));
|
||||
|
||||
EXPECT_EQ("int a; /* some\n"
|
||||
"comment */",
|
||||
format(" \t \t int\ta; /* some\n"
|
||||
" \t \t comment */",
|
||||
Tab));
|
||||
EXPECT_EQ("f(\"\t\t\"); /* some\n"
|
||||
" comment */",
|
||||
format(" \t \t f(\"\t\t\"); /* some\n"
|
||||
" \t \t comment */",
|
||||
Tab));
|
||||
EXPECT_EQ("{\n"
|
||||
" /*\n"
|
||||
" * Comment\n"
|
||||
|
|
Loading…
Reference in New Issue