diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 3e2e0ce7cf3dff36d970df2d0cd7808a2fcdbbe7..320913c2d46f096c02c9ac107d5746844d710a6f 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -56,13 +56,10 @@ void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset, AdditionalPrefix = ""; } - unsigned WhitespaceStartColumn = - getContentStartColumn(LineIndex, TailOffset) + Split.first; unsigned BreakOffset = Text.data() - TokenText.data() + Split.first; unsigned CharsToRemove = Split.second; Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix, - InPPDirective, IndentAtLineBreak, - WhitespaceStartColumn); + InPPDirective, IndentAtLineBreak); } BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr, @@ -144,16 +141,25 @@ void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset, if (LineIndex == Lines.size() - 1) return; StringRef Text = Lines[LineIndex].substr(TailOffset); + + // FIXME: The algorithm for trimming a line should naturally yield a + // non-change if there is nothing to trim; removing this line breaks the + // algorithm; investigate the root cause, and make sure to either document + // why exactly this is needed for remove it. if (!Text.endswith(" ") && !InPPDirective) return; StringRef TrimmedLine = Text.rtrim(); - unsigned WhitespaceStartColumn = - getLineLengthAfterSplit(LineIndex, TailOffset); unsigned BreakOffset = TrimmedLine.end() - TokenText.data(); unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1; + // FIXME: It seems like we're misusing the call to breakToken to remove + // whitespace instead of breaking a token. We should make this an explicit + // call option to the WhitespaceManager, or handle trimming and alignment + // of comments completely within in the WhitespaceManger. Passing '0' here + // and relying on this not breaking assumptions of the WhitespaceManager seems + // like a bad idea. Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective, - 0, WhitespaceStartColumn); + 0); } BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr, diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h index c1303183d31246e17a3ad9098cb79081b89a1237..c7ba044cf1e06708763d98bba61993fd22b6ca7e 100644 --- a/lib/Format/BreakableToken.h +++ b/lib/Format/BreakableToken.h @@ -78,7 +78,10 @@ public: if (ColumnLimit <= getDecorationLength()) return Split(StringRef::npos, 0); unsigned MaxSplit = ColumnLimit - getDecorationLength(); - assert(MaxSplit < Text.size()); + // FIXME: Reduce unit test case. + if (Text.empty()) + return Split(StringRef::npos, 0); + MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1); StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); if (SpaceOffset != StringRef::npos && SpaceOffset != 0) return Split(SpaceOffset + 1, 0); @@ -94,10 +97,8 @@ public: virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, bool InPPDirective, WhitespaceManager &Whitespaces) { - unsigned WhitespaceStartColumn = StartColumn + Split.first + 2; Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second, - "\"", "\"", InPPDirective, StartColumn, - WhitespaceStartColumn); + "\"", "\"", InPPDirective, StartColumn); } private: diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 1f36f9ab64c0212301da9c10bbeb3b98da30eddc..6e81e44fe0335587fec9c9d632053a7314d9b012 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -242,10 +242,7 @@ public: Whitespaces(Whitespaces), Count(0) {} /// \brief Formats an \c UnwrappedLine. - /// - /// \returns The column after the last token in the last line of the - /// \c UnwrappedLine. - unsigned format(const AnnotatedLine *NextLine) { + void format(const AnnotatedLine *NextLine) { // Initialize state dependent on indent. LineState State; State.Column = FirstIndent; @@ -271,7 +268,6 @@ public: while (State.NextToken != NULL) { addTokenToState(false, false, State); } - return State.Column; } // If the ObjC method declaration does not fit on a line, we should format @@ -280,7 +276,7 @@ public: State.Stack.back().BreakBeforeParameter = true; // Find best solution in solution space. - return analyzeSolutionSpace(State); + analyzeSolutionSpace(State); } private: @@ -483,7 +479,6 @@ private: unsigned ContinuationIndent = std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4; if (Newline) { - unsigned WhitespaceStartColumn = State.Column; if (Current.is(tok::r_brace)) { State.Column = Line.Level * Style.IndentWidth; } else if (Current.is(tok::string_literal) && @@ -545,12 +540,8 @@ private: NewLines = std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); - if (!Line.InPPDirective) - Whitespaces.replaceWhitespace(Current, NewLines, State.Column, - WhitespaceStartColumn); - else - Whitespaces.replacePPWhitespace(Current, NewLines, State.Column, - WhitespaceStartColumn); + Whitespaces.replaceWhitespace(Current, NewLines, State.Column, + State.Column, Line.InPPDirective); } State.Stack.back().LastSpace = State.Column; @@ -603,7 +594,8 @@ private: unsigned Spaces = State.NextToken->SpacesRequiredBefore; if (!DryRun) - Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column); + Whitespaces.replaceWhitespace(Current, 0, Spaces, + State.Column + Spaces); if (Current.Type == TT_ObjCSelectorName && State.Stack.back().ColonPos == 0) { @@ -786,11 +778,10 @@ private: /// already handled in \c addNextStateToQueue; the returned penalty will only /// cover the cost of the additional line breaks. unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State, - bool DryRun, - unsigned UnbreakableTailLength = 0) { + bool DryRun) { + unsigned UnbreakableTailLength = Current.UnbreakableTailLength; llvm::OwningPtr<BreakableToken> Token; - unsigned StartColumn = State.Column - Current.FormatTok.TokenLength - - UnbreakableTailLength; + unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; if (Current.is(tok::string_literal) && Current.Type != TT_ImplicitStringLiteral) { // Only break up default narrow strings. @@ -812,15 +803,7 @@ private: Current.Parent->Type != TT_ImplicitStringLiteral)) { Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn)); } else { - // If a token that we cannot breaks protrudes, it means we were unable to - // break a sequence of tokens due to disallowed breaks between the tokens. - // Thus, we recursively search backwards to try to find a breakable token. - if (State.Column <= getColumnLimit() || - Current.CanBreakBefore || !Current.Parent) - return 0; - return breakProtrudingToken( - *Current.Parent, State, DryRun, - UnbreakableTailLength + Current.FormatTok.TokenLength); + return 0; } if (UnbreakableTailLength >= getColumnLimit()) return 0; @@ -836,7 +819,7 @@ private: Token->getLineLengthAfterSplit(LineIndex, TailOffset); while (RemainingTokenLength > RemainingSpace) { BreakableToken::Split Split = - Token->getSplit(LineIndex, TailOffset, RemainingSpace); + Token->getSplit(LineIndex, TailOffset, getColumnLimit()); if (Split.first == StringRef::npos) break; assert(Split.first != 0); @@ -860,7 +843,7 @@ private: } if (BreakInserted) { - State.Column = PositionAfterLastLineInToken + UnbreakableTailLength; + State.Column = PositionAfterLastLineInToken; for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) State.Stack[i].BreakBeforeParameter = true; State.Stack.back().LastSpace = StartColumn; @@ -904,7 +887,7 @@ private: /// the solution space (\c LineStates are the nodes). The algorithm tries to /// find the shortest path (the one with lowest penalty) from \p InitialState /// to a state where all tokens are placed. - unsigned analyzeSolutionSpace(LineState &InitialState) { + void analyzeSolutionSpace(LineState &InitialState) { std::set<LineState> Seen; // Insert start element into queue. @@ -939,15 +922,12 @@ private: if (Queue.empty()) // We were unable to find a solution, do nothing. // FIXME: Add diagnostic? - return 0; + return; // Reconstruct the solution. reconstructPath(InitialState, Queue.top().second); DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); DEBUG(llvm::dbgs() << "---\n"); - - // Return the column after the last token of the solution. - return Queue.top().second->State.Column; } void reconstructPath(LineState &State, StateNode *Current) { @@ -1191,7 +1171,6 @@ public: LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); UnwrappedLineParser Parser(Style, Tokens, *this); bool StructuralError = Parser.parse(); - unsigned PreviousEndOfLineColumn = 0; TokenAnnotator Annotator(Style, SourceMgr, Lex, Tokens.getIdentTable().get("in")); for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { @@ -1247,7 +1226,7 @@ public: if (PreviousLineWasTouched) { unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u); Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0, - /*WhitespaceStartColumn*/ 0); + /*TargetColumn*/ 0); } } else if (TheLine.Type != LT_Invalid && (WasMoved || FormatPPDirective || touchesLine(TheLine))) { @@ -1257,46 +1236,49 @@ public: // we break apart a line consisting of multiple unwrapped lines. (FirstTok.NewlinesBefore == 0 || !StructuralError)) { formatFirstToken(TheLine.First, PreviousLineLastToken, Indent, - TheLine.InPPDirective, PreviousEndOfLineColumn); + TheLine.InPPDirective); } else { Indent = LevelIndent = SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1; } UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, TheLine.First, Whitespaces); - PreviousEndOfLineColumn = - Formatter.format(I + 1 != E ? &*(I + 1) : NULL); + Formatter.format(I + 1 != E ? &*(I + 1) : NULL); IndentForLevel[TheLine.Level] = LevelIndent; PreviousLineWasTouched = true; } else { - if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) { - unsigned LevelIndent = - SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1; - // Remove trailing whitespace of the previous line if it was touched. - if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) - formatFirstToken(TheLine.First, PreviousLineLastToken, LevelIndent, - TheLine.InPPDirective, PreviousEndOfLineColumn); - - if (static_cast<int>(LevelIndent) - Offset >= 0) - LevelIndent -= Offset; - if (TheLine.First.isNot(tok::comment)) - IndentForLevel[TheLine.Level] = LevelIndent; + // Format the first token if necessary, and notify the WhitespaceManager + // about the unchanged whitespace. + for (const AnnotatedToken *Tok = &TheLine.First; Tok != NULL; + Tok = Tok->Children.empty() ? NULL : &Tok->Children[0]) { + if (Tok == &TheLine.First && + (Tok->FormatTok.NewlinesBefore > 0 || Tok->FormatTok.IsFirst)) { + unsigned LevelIndent = SourceMgr.getSpellingColumnNumber( + Tok->FormatTok.Tok.getLocation()) - + 1; + // Remove trailing whitespace of the previous line if it was + // touched. + if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) { + formatFirstToken(*Tok, PreviousLineLastToken, LevelIndent, + TheLine.InPPDirective); + } else { + Whitespaces.addUntouchableToken(Tok->FormatTok, + TheLine.InPPDirective); + } + + if (static_cast<int>(LevelIndent) - Offset >= 0) + LevelIndent -= Offset; + if (Tok->isNot(tok::comment)) + IndentForLevel[TheLine.Level] = LevelIndent; + } else { + Whitespaces.addUntouchableToken(Tok->FormatTok, + TheLine.InPPDirective); + } } // If we did not reformat this unwrapped line, the column at the end of // the last token is unchanged - thus, we can calculate the end of the // last token. - SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation(); - PreviousEndOfLineColumn = - SourceMgr.getSpellingColumnNumber(LastLoc) + - Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1; PreviousLineWasTouched = false; - if (TheLine.Last->is(tok::comment)) - Whitespaces.addUntouchableComment( - SourceMgr.getSpellingColumnNumber( - TheLine.Last->FormatTok.Tok.getLocation()) - - 1); - else - Whitespaces.alignComments(); } PreviousLineLastToken = I->Last; } @@ -1556,7 +1538,7 @@ private: /// Returns the indent level of the \c UnwrappedLine. void formatFirstToken(const AnnotatedToken &RootToken, const AnnotatedToken *PreviousToken, unsigned Indent, - bool InPPDirective, unsigned PreviousEndOfLineColumn) { + bool InPPDirective) { const FormatToken &Tok = RootToken.FormatTok; unsigned Newlines = @@ -1564,17 +1546,13 @@ private: if (Newlines == 0 && !Tok.IsFirst) Newlines = 1; - if (!InPPDirective || Tok.HasUnescapedNewline) { - // Insert extra new line before access specifiers. - if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) && - RootToken.isAccessSpecifier() && Tok.NewlinesBefore == 1) - ++Newlines; + // Insert extra new line before access specifiers. + if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) && + RootToken.isAccessSpecifier() && Tok.NewlinesBefore == 1) + ++Newlines; - Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0); - } else { - Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent, - PreviousEndOfLineColumn); - } + Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, Indent, + InPPDirective && !Tok.HasUnescapedNewline); } FormatStyle Style; diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 4541c6b3c4bbc95c023c39d998e55387a3c6abc3..c914d4beab254c393840ac1f829c0c015ce5e0da 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -643,9 +643,9 @@ private: else Current.Type = TT_BlockComment; } else if (Current.is(tok::r_paren)) { - bool ParensNotExpr = !Current.Parent || - Current.Parent->Type == TT_PointerOrReference || - Current.Parent->Type == TT_TemplateCloser; + bool ParensNotExpr = + !Current.Parent || Current.Parent->Type == TT_PointerOrReference || + Current.Parent->Type == TT_TemplateCloser; bool ParensCouldEndDecl = !Current.Children.empty() && Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace); @@ -920,11 +920,28 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current = Current->Children.empty() ? NULL : &Current->Children[0]; } + calculateUnbreakableTailLengths(Line); DEBUG({ printDebugInfo(Line); }); } +void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { + unsigned UnbreakableTailLength = 0; + AnnotatedToken *Current = Line.Last; + while (Current != NULL) { + Current->UnbreakableTailLength = UnbreakableTailLength; + if (Current->CanBreakBefore || + Current->isOneOf(tok::comment, tok::string_literal)) { + UnbreakableTailLength = 0; + } else { + UnbreakableTailLength += + Current->FormatTok.TokenLength + Current->SpacesRequiredBefore; + } + Current = Current->Parent; + } +} + unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok) { const AnnotatedToken &Left = *Tok.Parent; diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index be4390a59260e6264ed47216534d828cc03407e8..c72730e30c374bae7eda8e24d838d08f6e137593 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -76,9 +76,10 @@ public: CanBreakBefore(false), MustBreakBefore(false), ClosesTemplateDeclaration(false), MatchingParen(NULL), ParameterCount(0), TotalLength(FormatTok.TokenLength), - BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0), - DefinesFunctionType(false), Parent(NULL), FakeRParens(0), - LastInChainOfCalls(false), PartOfMultiVariableDeclStmt(false) {} + UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0), + LongestObjCSelectorName(0), DefinesFunctionType(false), Parent(NULL), + FakeRParens(0), LastInChainOfCalls(false), + PartOfMultiVariableDeclStmt(false) {} bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } @@ -153,6 +154,10 @@ public: /// \brief The total length of the line up to and including this token. unsigned TotalLength; + /// \brief The length of following tokens until the next natural split point, + /// or the next token that can be broken. + unsigned UnbreakableTailLength; + // FIXME: Come up with a 'cleaner' concept. /// \brief The binding strength of a token. This is a combined value of /// operator precedence, parenthesis nesting, etc. @@ -268,6 +273,8 @@ private: void printDebugInfo(const AnnotatedLine &Line); + void calculateUnbreakableTailLengths(AnnotatedLine &Line); + const FormatStyle &Style; SourceManager &SourceMgr; Lexer &Lex; diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index 7ffebacba58262478bc5ab9ee26607ee96a2f14e..d4953b4a784c09ee6aaae04f428fc6ecfec1039d 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -18,18 +18,41 @@ namespace clang { namespace format { -void WhitespaceManager::replaceWhitespace(const AnnotatedToken &Tok, - unsigned NewLines, unsigned Spaces, - unsigned WhitespaceStartColumn) { - if (NewLines > 0) - alignEscapedNewlines(); +bool +WhitespaceManager::Change::IsBeforeInFile::operator()(const Change &C1, + const Change &C2) const { + return SourceMgr.isBeforeInTranslationUnit( + C1.OriginalWhitespaceRange.getBegin(), + C2.OriginalWhitespaceRange.getBegin()); +} - // 2+ newlines mean an empty line separating logic scopes. - if (NewLines >= 2) - alignComments(); +WhitespaceManager::Change::Change( + bool CreateReplacement, const SourceRange &OriginalWhitespaceRange, + unsigned Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, + StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, + tok::TokenKind Kind, bool ContinuesPPDirective) + : CreateReplacement(CreateReplacement), + OriginalWhitespaceRange(OriginalWhitespaceRange), + StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), + PreviousLinePostfix(PreviousLinePostfix), + CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), + ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces) {} + +void WhitespaceManager::replaceWhitespace(const AnnotatedToken &Tok, + unsigned Newlines, unsigned Spaces, + unsigned StartOfTokenColumn, + bool InPPDirective) { + Changes.push_back(Change( + true, SourceRange(Tok.FormatTok.WhiteSpaceStart, + Tok.FormatTok.WhiteSpaceStart.getLocWithOffset( + Tok.FormatTok.WhiteSpaceLength)), + Spaces, StartOfTokenColumn, Newlines, "", "", Tok.FormatTok.Tok.getKind(), + InPPDirective && !Tok.FormatTok.IsFirst)); // Align line comments if they are trailing or if they continue other // trailing comments. + // FIXME: Pull this out and generalize so it works the same way in broken + // comments and unbroken comments with trailing whitespace. if (Tok.isTrailingComment()) { SourceLocation TokenEndLoc = Tok.FormatTok.getStartOfNonWhitespace() .getLocWithOffset(Tok.FormatTok.TokenLength); @@ -37,87 +60,187 @@ void WhitespaceManager::replaceWhitespace(const AnnotatedToken &Tok, if (Tok.FormatTok.TrailingWhiteSpaceLength != 0) Replaces.insert(tooling::Replacement( SourceMgr, TokenEndLoc, Tok.FormatTok.TrailingWhiteSpaceLength, "")); - - bool LineExceedsColumnLimit = - Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength > - Style.ColumnLimit; - // Align comment with other comments. - if ((Tok.Parent != NULL || !Comments.empty()) && - !LineExceedsColumnLimit) { - unsigned MinColumn = - NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces; - unsigned MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength; - Comments.push_back(StoredToken( - Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength, - MinColumn, MaxColumn, NewLines, Spaces)); - return; - } } +} - // If this line does not have a trailing comment, align the stored comments. - if (Tok.Children.empty() && !Tok.isTrailingComment()) - alignComments(); - - storeReplacement(Tok.FormatTok.WhiteSpaceStart, - Tok.FormatTok.WhiteSpaceLength, - getNewLineText(NewLines, Spaces)); -} - -void WhitespaceManager::replacePPWhitespace(const AnnotatedToken &Tok, - unsigned NewLines, unsigned Spaces, - unsigned WhitespaceStartColumn) { - if (NewLines == 0) { - replaceWhitespace(Tok, NewLines, Spaces, WhitespaceStartColumn); - } else { - // The earliest position for "\" is 2 after the last token. - unsigned MinColumn = WhitespaceStartColumn + 2; - unsigned MaxColumn = Style.ColumnLimit; - EscapedNewlines.push_back(StoredToken( - Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength, - MinColumn, MaxColumn, NewLines, Spaces)); - } +void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, + bool InPPDirective) { + Changes.push_back(Change( + false, + SourceRange(Tok.WhiteSpaceStart, + Tok.WhiteSpaceStart.getLocWithOffset(Tok.WhiteSpaceLength)), + Tok.WhiteSpaceLength - Tok.NewlinesBefore, + SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1, + Tok.NewlinesBefore, "", "", Tok.Tok.getKind(), + InPPDirective && !Tok.IsFirst)); } void WhitespaceManager::breakToken(const FormatToken &Tok, unsigned Offset, - unsigned ReplaceChars, StringRef Prefix, - StringRef Postfix, bool InPPDirective, - unsigned Spaces, - unsigned WhitespaceStartColumn) { - SourceLocation Location = - Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); - if (InPPDirective) { - // The earliest position for "\" is 2 after the last token. - unsigned MinColumn = WhitespaceStartColumn + 2; - unsigned MaxColumn = Style.ColumnLimit; - StoredToken StoredTok = StoredToken(Location, ReplaceChars, MinColumn, - MaxColumn, /*NewLines=*/ 1, Spaces); - StoredTok.Prefix = Prefix; - StoredTok.Postfix = Postfix; - EscapedNewlines.push_back(StoredTok); - } else { - std::string ReplacementText = - (Prefix + getNewLineText(1, Spaces) + Postfix).str(); - Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars, - ReplacementText)); - } + unsigned ReplaceChars, + StringRef PreviousPostfix, + StringRef CurrentPrefix, bool InPPDirective, + unsigned Spaces) { + Changes.push_back(Change( + true, SourceRange(Tok.getStartOfNonWhitespace().getLocWithOffset(Offset), + Tok.getStartOfNonWhitespace().getLocWithOffset( + Offset + ReplaceChars)), + Spaces, Spaces, 1, PreviousPostfix, CurrentPrefix, + // FIXME: Unify token adjustment, so we don't split it between + // BreakableToken and the WhitespaceManager. That would also allow us to + // correctly store a tok::TokenKind instead of rolling our own enum. + tok::unknown, InPPDirective && !Tok.IsFirst)); +} + +void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc, + unsigned ReplaceChars, StringRef Text) { + Replaces.insert( + tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text)); } const tooling::Replacements &WhitespaceManager::generateReplacements() { - alignComments(); + if (Changes.empty()) + return Replaces; + + std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr)); + calculateLineBreakInformation(); + alignTrailingComments(); alignEscapedNewlines(); + generateChanges(); + return Replaces; } -void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc, - unsigned ReplaceChars, StringRef Text) { - Replaces.insert( - tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text)); +void WhitespaceManager::calculateLineBreakInformation() { + Changes[0].PreviousEndOfTokenColumn = 0; + for (unsigned i = 1, e = Changes.size(); i != e; ++i) { + unsigned OriginalWhitespaceStart = + SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin()); + unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset( + Changes[i - 1].OriginalWhitespaceRange.getEnd()); + Changes[i - 1].TokenLength = + OriginalWhitespaceStart - PreviousOriginalWhitespaceEnd + + Changes[i].PreviousLinePostfix.size() + + Changes[i - 1].CurrentLinePrefix.size(); + + Changes[i].PreviousEndOfTokenColumn = + Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength; + + Changes[i - 1].IsTrailingComment = + (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof) && + Changes[i - 1].Kind == tok::comment; + } + Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment; +} + +void WhitespaceManager::alignTrailingComments() { + unsigned MinColumn = 0; + unsigned MaxColumn = UINT_MAX; + unsigned StartOfSequence = 0; + bool BreakBeforeNext = false; + unsigned Newlines = 0; + for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; + // FIXME: Correctly handle ChangeMaxColumn in PP directives. + unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength; + Newlines += Changes[i].NewlinesBefore; + if (Changes[i].IsTrailingComment) { + if (BreakBeforeNext || Newlines > 1 || + (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) || + // Break the comment sequence if the previous line did not end + // in a trailing comment. + (Changes[i].NewlinesBefore == 1 && i > 0 && + !Changes[i - 1].IsTrailingComment)) { + alignTrailingComments(StartOfSequence, i, MinColumn); + MinColumn = ChangeMinColumn; + MaxColumn = ChangeMaxColumn; + StartOfSequence = i; + } else { + MinColumn = std::max(MinColumn, ChangeMinColumn); + MaxColumn = std::min(MaxColumn, ChangeMaxColumn); + } + BreakBeforeNext = + (i == 0) || (Changes[i].NewlinesBefore > 1) || + (Changes[i].NewlinesBefore == 1 && !Changes[i - 1].IsTrailingComment); + Newlines = 0; + } + } + alignTrailingComments(StartOfSequence, Changes.size(), MinColumn); } -void WhitespaceManager::addUntouchableComment(unsigned Column) { - StoredToken Tok = StoredToken(SourceLocation(), 0, Column, Column, 0, 0); - Tok.Untouchable = true; - Comments.push_back(Tok); +void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End, + unsigned Column) { + for (unsigned i = Start; i != End; ++i) { + if (Changes[i].IsTrailingComment) { + assert(Column >= Changes[i].StartOfTokenColumn); + Changes[i].Spaces += Column - Changes[i].StartOfTokenColumn; + Changes[i].StartOfTokenColumn = Column; + } + } +} + +void WhitespaceManager::alignEscapedNewlines() { + unsigned MaxEndOfLine = 0; + unsigned StartOfMacro = 0; + for (unsigned i = 1, e = Changes.size(); i < e; ++i) { + Change &C = Changes[i]; + if (C.NewlinesBefore > 0) { + if (C.ContinuesPPDirective) { + if (Style.AlignEscapedNewlinesLeft) + MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine); + else + MaxEndOfLine = Style.ColumnLimit; + } else { + alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine); + MaxEndOfLine = 0; + StartOfMacro = i; + } + } + } + alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine); +} + +void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End, + unsigned Column) { + for (unsigned i = Start; i < End; ++i) { + Change &C = Changes[i]; + if (C.NewlinesBefore > 0) { + assert(C.ContinuesPPDirective); + if (C.PreviousEndOfTokenColumn + 1 > Column) + C.EscapedNewlineColumn = 0; + else + C.EscapedNewlineColumn = Column; + } + } +} + +void WhitespaceManager::generateChanges() { + for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + const Change &C = Changes[i]; + if (C.CreateReplacement) { + std::string ReplacementText = + C.PreviousLinePostfix + + (C.ContinuesPPDirective + ? getNewLineText(C.NewlinesBefore, C.Spaces, + C.PreviousEndOfTokenColumn, + C.EscapedNewlineColumn) + : getNewLineText(C.NewlinesBefore, C.Spaces)) + + C.CurrentLinePrefix; + storeReplacement(C.OriginalWhitespaceRange, ReplacementText); + } + } +} + +void WhitespaceManager::storeReplacement(const SourceRange &Range, + StringRef Text) { + unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) - + SourceMgr.getFileOffset(Range.getBegin()); + // Don't create a replacement, if it does not change anything. + if (StringRef(SourceMgr.getCharacterData(Range.getBegin()), + WhitespaceLength) == + Text) + return; + Replaces.insert(tooling::Replacement( + SourceMgr, CharSourceRange::getCharRange(Range), Text)); } std::string WhitespaceManager::getNewLineText(unsigned NewLines, @@ -127,12 +250,12 @@ std::string WhitespaceManager::getNewLineText(unsigned NewLines, std::string WhitespaceManager::getNewLineText(unsigned NewLines, unsigned Spaces, - unsigned WhitespaceStartColumn, + unsigned PreviousEndOfTokenColumn, unsigned EscapedNewlineColumn) { std::string NewLineText; if (NewLines > 0) { unsigned Offset = - std::min<int>(EscapedNewlineColumn - 1, WhitespaceStartColumn); + std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn); for (unsigned i = 0; i < NewLines; ++i) { NewLineText += std::string(EscapedNewlineColumn - Offset - 1, ' '); NewLineText += "\\\n"; @@ -150,70 +273,5 @@ std::string WhitespaceManager::getIndentText(unsigned Spaces) { std::string(Spaces % Style.IndentWidth, ' '); } -void WhitespaceManager::alignComments() { - unsigned MinColumn = 0; - unsigned MaxColumn = UINT_MAX; - token_iterator Start = Comments.begin(); - for (token_iterator I = Start, E = Comments.end(); I != E; ++I) { - if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) { - alignComments(Start, I, MinColumn); - MinColumn = I->MinColumn; - MaxColumn = I->MaxColumn; - Start = I; - } else { - MinColumn = std::max(MinColumn, I->MinColumn); - MaxColumn = std::min(MaxColumn, I->MaxColumn); - } - } - alignComments(Start, Comments.end(), MinColumn); - Comments.clear(); -} - -void WhitespaceManager::alignComments(token_iterator I, token_iterator E, - unsigned Column) { - while (I != E) { - if (!I->Untouchable) { - unsigned Spaces = I->Spaces + Column - I->MinColumn; - storeReplacement(I->ReplacementLoc, I->ReplacementLength, - getNewLineText(I->NewLines, Spaces)); - } - ++I; - } -} - -void WhitespaceManager::alignEscapedNewlines() { - unsigned MinColumn; - if (Style.AlignEscapedNewlinesLeft) { - MinColumn = 0; - for (token_iterator I = EscapedNewlines.begin(), E = EscapedNewlines.end(); - I != E; ++I) { - if (I->MinColumn > MinColumn) - MinColumn = I->MinColumn; - } - } else { - MinColumn = Style.ColumnLimit; - } - - for (token_iterator I = EscapedNewlines.begin(), E = EscapedNewlines.end(); - I != E; ++I) { - // I->MinColumn - 2 is the end of the previous token (i.e. the - // WhitespaceStartColumn). - storeReplacement( - I->ReplacementLoc, I->ReplacementLength, - I->Prefix + getNewLineText(I->NewLines, I->Spaces, I->MinColumn - 2, - MinColumn) + I->Postfix); - - } - EscapedNewlines.clear(); -} - -void WhitespaceManager::storeReplacement(SourceLocation Loc, unsigned Length, - const std::string Text) { - // Don't create a replacement, if it does not change anything. - if (StringRef(SourceMgr.getCharacterData(Loc), Length) == Text) - return; - Replaces.insert(tooling::Replacement(SourceMgr, Loc, Length, Text)); -} - } // namespace format } // namespace clang diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index 1b24b3f6d79d8b3eb79223198ab1c45425807fd5..701ecff4e055311da8adce6255ec833c1df2af21 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -28,6 +28,13 @@ namespace format { /// /// This includes special handling for certain constructs, e.g. the alignment of /// trailing line comments. +/// +/// To guarantee correctness of alignment operations, the \c WhitespaceManager +/// must be informed about every token in the source file; for each token, there +/// must be exactly one call to either \c replaceWhitespace or +/// \c addUntouchableToken. +/// +/// There may be multiple calls to \c breakToken for a given token. class WhitespaceManager { public: WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style) @@ -35,81 +42,130 @@ public: /// \brief Replaces the whitespace in front of \p Tok. Only call once for /// each \c AnnotatedToken. - void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, - unsigned Spaces, unsigned WhitespaceStartColumn); + void replaceWhitespace(const AnnotatedToken &Tok, unsigned Newlines, + unsigned Spaces, unsigned StartOfTokenColumn, + bool InPPDirective = false); - /// \brief Like \c replaceWhitespace, but additionally adds right-aligned - /// backslashes to escape newlines inside a preprocessor directive. + /// \brief Adds information about an unchangable token's whitespace. /// - /// This function and \c replaceWhitespace have the same behavior if - /// \c Newlines == 0. - void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, - unsigned Spaces, unsigned WhitespaceStartColumn); + /// Needs to be called for every token for which \c replaceWhitespace + /// was not called. + void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); /// \brief Inserts a line break into the middle of a token. /// - /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line - /// break and \p Postfix before the rest of the token starts in the next line. + /// Will break at \p Offset inside \p Tok, putting \p PreviousPostfix before + /// the line break and \p CurrentPrefix before the rest of the token starts in + /// the next line. /// - /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are - /// used to generate the correct line break. + /// \p InPPDirective and \p Spaces are used to generate the correct line + /// break. void breakToken(const FormatToken &Tok, unsigned Offset, - unsigned ReplaceChars, StringRef Prefix, StringRef Postfix, - bool InPPDirective, unsigned Spaces, - unsigned WhitespaceStartColumn); + unsigned ReplaceChars, StringRef PreviousPostfix, + StringRef CurrentPrefix, bool InPPDirective, unsigned Spaces); /// \brief Returns all the \c Replacements created during formatting. const tooling::Replacements &generateReplacements(); + /// \brief Replaces \p ReplaceChars after \p SourceLoc with \p Text. + /// + /// FIXME: This is currently used to align comments outside of the \c + /// WhitespaceManager. Once this has been moved inside, get rid of this + /// method. void addReplacement(const SourceLocation &SourceLoc, unsigned ReplaceChars, StringRef Text); - void addUntouchableComment(unsigned Column); +private: + /// \brief Represents a change before a token, a break inside a token, + /// or the layout of an unchanged token (or whitespace within). + struct Change { + /// \brief Functor to sort changes in original source order. + class IsBeforeInFile { + public: + IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} + bool operator()(const Change &C1, const Change &C2) const; + + private: + const SourceManager &SourceMgr; + }; + + Change() {} + + /// \brief Creates a \c Change. + /// + /// The generated \c Change will replace the characters at + /// \p OriginalWhitespaceRange with a concatenation of + /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces + /// and \p CurrentLinePrefix. + /// + /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out + /// trailing comments and escaped newlines. + Change(bool CreateReplacement, const SourceRange &OriginalWhitespaceRange, + unsigned Spaces, unsigned StartOfTokenColumn, + unsigned NewlinesBefore, StringRef PreviousLinePostfix, + StringRef CurrentLinePrefix, tok::TokenKind Kind, + bool ContinuesPPDirective); + + bool CreateReplacement; + // Changes might be in the middle of a token, so we cannot just keep the + // FormatToken around to query its information. + SourceRange OriginalWhitespaceRange; + unsigned StartOfTokenColumn; + unsigned NewlinesBefore; + std::string PreviousLinePostfix; + std::string CurrentLinePrefix; + // The kind of the token whose whitespace this change replaces, or in which + // this change inserts whitespace. + // FIXME: Currently this is not set correctly for breaks inside comments, as + // the \c BreakableToken is still doing its own alignment. + tok::TokenKind Kind; + bool ContinuesPPDirective; + + // The number of spaces in front of the token or broken part of the token. + // This will be adapted when aligning tokens. + unsigned Spaces; - /// \brief Try to align all stashed comments. - void alignComments(); - /// \brief Try to align all stashed escaped newlines. - void alignEscapedNewlines(); + // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and + // \c EscapedNewlineColumn will be calculated in + // \c calculateLineBreakInformation. + bool IsTrailingComment; + unsigned TokenLength; + unsigned PreviousEndOfTokenColumn; + unsigned EscapedNewlineColumn; -private: - std::string getNewLineText(unsigned NewLines, unsigned Spaces); + }; - std::string getNewLineText(unsigned NewLines, unsigned Spaces, - unsigned WhitespaceStartColumn, - unsigned EscapedNewlineColumn); + /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens + /// or token parts in a line and \c PreviousEndOfTokenColumn and + /// \c EscapedNewlineColumn for the first tokens or token parts in a line. + void calculateLineBreakInformation(); - std::string getIndentText(unsigned Spaces); + /// \brief Align trailing comments over all \c Changes. + void alignTrailingComments(); - /// \brief Structure to store tokens for later layout and alignment. - struct StoredToken { - StoredToken(SourceLocation ReplacementLoc, unsigned ReplacementLength, - unsigned MinColumn, unsigned MaxColumn, unsigned NewLines, - unsigned Spaces) - : ReplacementLoc(ReplacementLoc), ReplacementLength(ReplacementLength), - MinColumn(MinColumn), MaxColumn(MaxColumn), NewLines(NewLines), - Spaces(Spaces), Untouchable(false) {} - SourceLocation ReplacementLoc; - unsigned ReplacementLength; - unsigned MinColumn; - unsigned MaxColumn; - unsigned NewLines; - unsigned Spaces; - bool Untouchable; - std::string Prefix; - std::string Postfix; - }; - SmallVector<StoredToken, 16> Comments; - SmallVector<StoredToken, 16> EscapedNewlines; - typedef SmallVector<StoredToken, 16>::iterator token_iterator; + /// \brief Align trailing comments from change \p Start to change \p End at + /// the specified \p Column. + void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); + + /// \brief Align escaped newlines over all \c Changes. + void alignEscapedNewlines(); - /// \brief Put all the comments between \p I and \p E into \p Column. - void alignComments(token_iterator I, token_iterator E, unsigned Column); + /// \brief Align escaped newlines from change \p Start to change \p End at + /// the specified \p Column. + void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); - /// \brief Stores \p Text as the replacement for the whitespace in front of - /// \p Tok. - void storeReplacement(SourceLocation Loc, unsigned Length, - const std::string Text); + /// \brief Fill \c Replaces with the replacements for all effective changes. + void generateChanges(); + + /// \brief Stores \p Text as the replacement for the whitespace in \p Range. + void storeReplacement(const SourceRange &Range, StringRef Text); + std::string getNewLineText(unsigned NewLines, unsigned Spaces); + std::string getNewLineText(unsigned NewLines, unsigned Spaces, + unsigned PreviousEndOfTokenColumn, + unsigned EscapedNewlineColumn); + std::string getIndentText(unsigned Spaces); + SmallVector<Change, 16> Changes; SourceManager &SourceMgr; tooling::Replacements Replaces; const FormatStyle &Style; diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index 5ea531f350e63a4720f62479258a02a2f03f6958..6530c30d7d7ce4977272256956242232fcb4421b 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -1528,6 +1528,13 @@ TEST_F(FormatTest, MacroCallsWithoutTrailingSemicolon) { "};")); } +TEST_F(FormatTest, LayoutMacroDefinitionsStatementsSpanningBlocks) { + verifyFormat("#define A \\\n" + " f({ \\\n" + " g(); \\\n" + " });", getLLVMStyleWithColumns(11)); +} + TEST_F(FormatTest, IndentPreprocessorDirectivesAtZero) { EXPECT_EQ("{\n {\n#define A\n }\n}", format("{{\n#define A\n}}")); }