Calculate and store ColumnWidth instead of CodePointCount in FormatTokens.

Summary:
This fixes various issues with mixed tabs and spaces handling, e.g.
when realigning block comments.

Reviewers: klimek, djasper

Reviewed By: djasper

CC: cfe-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D1608

llvm-svn: 190395
This commit is contained in:
Alexander Kornienko 2013-09-10 09:38:25 +00:00
parent 3767ccf318
commit 39856b71a6
7 changed files with 107 additions and 95 deletions

View File

@ -337,8 +337,10 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
// if leading tabs are intermixed with spaces, that is not a high priority.
// Adjust the start column uniformly accross all lines.
StartOfLineColumn[LineIndex] =
std::max<int>(0, Whitespace.size() + IndentDelta);
StartOfLineColumn[LineIndex] = std::max<int>(
0,
encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
IndentDelta);
}
unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }

View File

@ -201,7 +201,7 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
State.NextToken->WhitespaceRange.getEnd()) -
SourceMgr.getSpellingColumnNumber(
State.NextToken->WhitespaceRange.getBegin());
State.Column += WhitespaceLength + State.NextToken->CodePointCount;
State.Column += WhitespaceLength + State.NextToken->ColumnWidth;
State.NextToken = State.NextToken->Next;
return 0;
}
@ -257,11 +257,11 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
State.Line->StartsDefinition))) {
State.Column = State.Stack.back().Indent;
} else if (Current.Type == TT_ObjCSelectorName) {
if (State.Stack.back().ColonPos > Current.CodePointCount) {
State.Column = State.Stack.back().ColonPos - Current.CodePointCount;
if (State.Stack.back().ColonPos > Current.ColumnWidth) {
State.Column = State.Stack.back().ColonPos - Current.ColumnWidth;
} else {
State.Column = State.Stack.back().Indent;
State.Stack.back().ColonPos = State.Column + Current.CodePointCount;
State.Stack.back().ColonPos = State.Column + Current.ColumnWidth;
}
} else if (Current.is(tok::l_square) && Current.Type != TT_ObjCMethodExpr &&
Current.Type != TT_LambdaLSquare) {
@ -307,7 +307,7 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
if (!Current.isTrailingComment())
State.Stack.back().LastSpace = State.Column;
if (Current.isMemberAccess())
State.Stack.back().LastSpace += Current.CodePointCount;
State.Stack.back().LastSpace += Current.ColumnWidth;
State.StartOfLineLevel = State.ParenLevel;
State.LowestLevelOnLine = State.ParenLevel;
@ -343,8 +343,8 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
State.Stack.back().VariablePos = State.Column;
// Move over * and & if they are bound to the variable name.
const FormatToken *Tok = &Previous;
while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) {
State.Stack.back().VariablePos -= Tok->CodePointCount;
while (Tok && State.Stack.back().VariablePos >= Tok->ColumnWidth) {
State.Stack.back().VariablePos -= Tok->ColumnWidth;
if (Tok->SpacesRequiredBefore != 0)
break;
Tok = Tok->Previous;
@ -361,12 +361,12 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
if (Current.Type == TT_ObjCSelectorName &&
State.Stack.back().ColonPos == 0) {
if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
State.Column + Spaces + Current.CodePointCount)
State.Column + Spaces + Current.ColumnWidth)
State.Stack.back().ColonPos =
State.Stack.back().Indent + Current.LongestObjCSelectorName;
else
State.Stack.back().ColonPos =
State.Column + Spaces + Current.CodePointCount;
State.Column + Spaces + Current.ColumnWidth;
}
if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr &&
@ -436,7 +436,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
std::min(State.LowestLevelOnLine, State.ParenLevel);
if (Current.isMemberAccess())
State.Stack.back().StartOfFunctionCall =
Current.LastInChainOfCalls ? 0 : State.Column + Current.CodePointCount;
Current.LastInChainOfCalls ? 0 : State.Column + Current.ColumnWidth;
if (Current.Type == TT_CtorInitializerColon) {
// Indent 2 from the column, so:
// SomeClass::SomeClass()
@ -592,7 +592,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.StartOfStringLiteral = 0;
}
State.Column += Current.CodePointCount;
State.Column += Current.ColumnWidth;
State.NextToken = State.NextToken->Next;
unsigned Penalty = breakProtrudingToken(Current, State, DryRun);
if (State.Column > getColumnLimit(State)) {
@ -618,8 +618,7 @@ ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
State.Stack[i].BreakBeforeParameter = true;
unsigned ColumnsUsed =
State.Column - Current.CodePointCount + Current.FirstLineColumnWidth;
unsigned ColumnsUsed = State.Column;
// We can only affect layout of the first and the last line, so the penalty
// for all other lines is constant, and we ignore it.
State.Column = Current.LastLineColumnWidth;
@ -636,14 +635,14 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
return 0;
llvm::OwningPtr<BreakableToken> Token;
unsigned StartColumn = State.Column - Current.CodePointCount;
unsigned StartColumn = State.Column - Current.ColumnWidth;
if (Current.is(tok::string_literal) &&
Current.Type != TT_ImplicitStringLiteral) {
// Don't break string literals with (in case of non-raw strings, escaped)
// newlines. As clang-format must not change the string's content, it is
// unlikely that we'll end up with a better format.
if (Current.isMultiline())
if (Current.IsMultiline)
return addMultilineStringLiteral(Current, State);
// Only break up default narrow strings.
@ -657,11 +656,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
Token.reset(new BreakableStringLiteral(
Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
} else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
unsigned OriginalStartColumn =
SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
1;
Token.reset(new BreakableBlockComment(
Current, StartColumn, OriginalStartColumn, !Current.Previous,
Current, StartColumn, Current.OriginalColumn, !Current.Previous,
State.Line->InPPDirective, Encoding, Style));
} else if (Current.Type == TT_LineComment &&
(Current.Previous == NULL ||
@ -673,10 +669,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
// FIXME: If we want to handle them correctly, we'll need to adjust
// leading whitespace in consecutive lines when changing indentation of
// the first line similar to what we do with block comments.
if (Current.isMultiline()) {
State.Column = StartColumn + Current.FirstLineColumnWidth;
if (Current.IsMultiline)
return 0;
}
Token.reset(new BreakableLineComment(
Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
@ -759,12 +753,12 @@ bool ContinuationIndenter::NextIsMultilineString(const LineState &State) {
// AlwaysBreakBeforeMultilineStrings implementation.
if (Current.TokenText.startswith("R\""))
return false;
if (Current.isMultiline())
if (Current.IsMultiline)
return true;
if (Current.getNextNonComment() &&
Current.getNextNonComment()->is(tok::string_literal))
return true; // Implicit concatenation.
if (State.Column + Current.CodePointCount + Current.UnbreakableTailLength >
if (State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
Style.ColumnLimit)
return true; // String will be split.
return false;

View File

@ -610,7 +610,7 @@ private:
FormatTok->WhitespaceRange =
SourceRange(GreaterLocation, GreaterLocation);
FormatTok->TokenText = ">";
FormatTok->CodePointCount = 1;
FormatTok->ColumnWidth = 1;
GreaterStashed = false;
return FormatTok;
}
@ -666,6 +666,10 @@ private:
Column = 0;
FormatTok->TokenText = FormatTok->TokenText.substr(2);
}
FormatTok->WhitespaceRange = SourceRange(
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
FormatTok->OriginalColumn = Column;
TrailingWhitespace = 0;
@ -685,24 +689,29 @@ private:
}
// Now FormatTok is the next non-whitespace token.
FormatTok->CodePointCount =
encoding::getCodePointCount(FormatTok->TokenText, Encoding);
if (FormatTok->isOneOf(tok::string_literal, tok::comment)) {
StringRef Text = FormatTok->TokenText;
size_t FirstNewlinePos = Text.find('\n');
if (FirstNewlinePos != StringRef::npos) {
// FIXME: Handle embedded tabs.
FormatTok->FirstLineColumnWidth = encoding::columnWidthWithTabs(
Text.substr(0, FirstNewlinePos), 0, Style.TabWidth, Encoding);
FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
Encoding);
}
StringRef Text = FormatTok->TokenText;
size_t FirstNewlinePos = Text.find('\n');
if (FirstNewlinePos != StringRef::npos) {
FormatTok->IsMultiline = true;
// The last line of the token always starts in column 0.
// Thus, the length can be precomputed even in the presence of tabs.
FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
Encoding);
Text = Text.substr(0, FirstNewlinePos);
}
// FIXME: Add the CodePointCount to Column.
FormatTok->WhitespaceRange = SourceRange(
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
// FIXME: ColumnWidth actually depends on the start column, we need to
// take this into account when the token is moved.
FormatTok->ColumnWidth =
encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
// FIXME: For multi-line tokens this should be LastLineColumnWidth.
// For line comments this should probably be zero. But before changing,
// we need good tests for this.
Column += FormatTok->ColumnWidth;
return FormatTok;
}

View File

@ -42,7 +42,7 @@ unsigned CommaSeparatedList::format(LineState &State,
// Calculate the number of code points we have to format this list. As the
// first token is already placed, we have to subtract it.
unsigned RemainingCodePoints = Style.ColumnLimit - State.Column +
State.NextToken->Previous->CodePointCount;
State.NextToken->Previous->ColumnWidth;
// Find the best ColumnFormat, i.e. the best number of columns to use.
const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
@ -82,7 +82,7 @@ unsigned CommaSeparatedList::format(LineState &State,
static unsigned CodePointsBetween(const FormatToken *Begin,
const FormatToken *End) {
assert(End->TotalLength >= Begin->TotalLength);
return End->TotalLength - Begin->TotalLength + Begin->CodePointCount;
return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
}
void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {

View File

@ -83,7 +83,7 @@ class AnnotatedLine;
struct FormatToken {
FormatToken()
: NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0),
ColumnWidth(0), LastLineColumnWidth(0), IsMultiline(false),
IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
CanBreakBefore(false), ClosesTemplateDeclaration(false),
@ -114,22 +114,17 @@ struct FormatToken {
/// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
unsigned LastNewlineOffset;
/// \brief The length of the non-whitespace parts of the token in CodePoints.
/// \brief The width of the non-whitespace parts of the token (or its first
/// line for multi-line tokens) in columns.
/// We need this to correctly measure number of columns a token spans.
unsigned CodePointCount;
unsigned ColumnWidth;
/// \brief Contains the number of code points in the first line of a
/// multi-line string literal or comment. Zero if there's no newline in the
/// \brief Contains the width in columns of the last line of a multi-line
/// token.
unsigned FirstLineColumnWidth;
/// \brief Contains the number of code points in the last line of a
/// multi-line string literal or comment. Can be zero for line comments.
unsigned LastLineColumnWidth;
/// \brief Returns \c true if the token text contains newlines (escaped or
/// not).
bool isMultiline() const { return FirstLineColumnWidth != 0; }
/// \brief Whether the token text contains newlines (escaped or not).
bool IsMultiline;
/// \brief Indicates that this is the first token.
bool IsFirst;
@ -189,12 +184,8 @@ struct FormatToken {
/// token.
unsigned TotalLength;
/// \brief The original column of this token, including expanded tabs.
/// The configured IndentWidth is used as tab width. Only tabs in whitespace
/// are expanded.
/// FIXME: This is currently only used on the first token of an unwrapped
/// line, and the implementation is not correct for other tokens (see the
/// FIXMEs in FormatTokenLexer::getNextToken()).
/// \brief The original 0-based column of this token, including expanded tabs.
/// The configured TabWidth is used as tab width.
unsigned OriginalColumn;
/// \brief The length of following tokens until the next natural split point,

View File

@ -326,10 +326,10 @@ private:
Line.First->Type == TT_ObjCMethodSpecifier) {
Tok->Type = TT_ObjCMethodExpr;
Tok->Previous->Type = TT_ObjCSelectorName;
if (Tok->Previous->CodePointCount >
if (Tok->Previous->ColumnWidth >
Contexts.back().LongestObjCSelectorName) {
Contexts.back().LongestObjCSelectorName =
Tok->Previous->CodePointCount;
Tok->Previous->ColumnWidth;
}
if (Contexts.back().FirstObjCSelectorName == NULL)
Contexts.back().FirstObjCSelectorName = Tok->Previous;
@ -1022,7 +1022,8 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
}
void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
Line.First->TotalLength = Line.First->CodePointCount;
Line.First->TotalLength =
Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
if (!Line.First->Next)
return;
FormatToken *Current = Line.First->Next;
@ -1055,11 +1056,11 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
Current->CanBreakBefore =
Current->MustBreakBefore || canBreakBefore(Line, *Current);
if (Current->MustBreakBefore || !Current->Children.empty() ||
(Current->is(tok::string_literal) && Current->isMultiline()))
Current->IsMultiline)
Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
else
Current->TotalLength = Current->Previous->TotalLength +
Current->CodePointCount +
Current->ColumnWidth +
Current->SpacesRequiredBefore;
// FIXME: Only calculate this if CanBreakBefore is true once static
// initializers etc. are sorted out.
@ -1095,7 +1096,7 @@ void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
UnbreakableTailLength = 0;
} else {
UnbreakableTailLength +=
Current->CodePointCount + Current->SpacesRequiredBefore;
Current->ColumnWidth + Current->SpacesRequiredBefore;
}
Current = Current->Previous;
}

View File

@ -5738,33 +5738,48 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
// FIXME: To correctly count mixed whitespace we need to
// also correctly count mixed whitespace in front of the comment.
// Tab.TabWidth = 8;
// Tab.IndentWidth = 8;
// EXPECT_EQ("/*\n"
// "\t a\t\tcomment\n"
// "\t in multiple lines\n"
// " */",
// format(" /*\t \t \n"
// " \t \t a\t\tcomment\t \t\n"
// " \t \t in multiple lines\t\n"
// " \t */",
// Tab));
// Tab.UseTab = false;
// EXPECT_EQ("/*\n"
// " a\t\tcomment\n"
// " in multiple lines\n"
// " */",
// format(" /*\t \t \n"
// " \t \t a\t\tcomment\t \t\n"
// " \t \t in multiple lines\t\n"
// " \t */",
// Tab));
// EXPECT_EQ("/* some\n"
// " comment */",
// format(" \t \t /* some\n"
// " \t \t comment */",
// Tab));
Tab.TabWidth = 8;
Tab.IndentWidth = 8;
EXPECT_EQ("/*\n"
"\t a\t\tcomment\n"
"\t in multiple lines\n"
" */",
format(" /*\t \t \n"
" \t \t a\t\tcomment\t \t\n"
" \t \t in multiple lines\t\n"
" \t */",
Tab));
Tab.UseTab = false;
EXPECT_EQ("/*\n"
" a\t\tcomment\n"
" in multiple lines\n"
" */",
format(" /*\t \t \n"
" \t \t a\t\tcomment\t \t\n"
" \t \t in multiple lines\t\n"
" \t */",
Tab));
EXPECT_EQ("/* some\n"
" comment */",
format(" \t \t /* some\n"
" \t \t comment */",
Tab));
EXPECT_EQ("int a; /* some\n"
" comment */",
format(" \t \t int a; /* some\n"
" \t \t comment */",
Tab));
EXPECT_EQ("int a; /* some\n"
"comment */",
format(" \t \t int\ta; /* some\n"
" \t \t comment */",
Tab));
EXPECT_EQ("f(\"\t\t\"); /* some\n"
" comment */",
format(" \t \t f(\"\t\t\"); /* some\n"
" \t \t comment */",
Tab));
EXPECT_EQ("{\n"
" /*\n"
" * Comment\n"