diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 95ae7b56dc4146e8539755f739d9dd1990668336..b4eae19d47d376808dcfc706022abde458398ed2 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -1157,7 +1157,8 @@ public: encoding::Encoding Encoding) : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0), TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style), - IdentTable(getFormattingLangOpts()), Encoding(Encoding) { + IdentTable(getFormattingLangOpts()), Encoding(Encoding), + FirstInLineIndex(0) { Lex.SetKeepWhitespaceMode(true); for (const std::string& ForEachMacro : Style.ForEachMacros) @@ -1167,9 +1168,12 @@ public: ArrayRef<FormatToken *> lex() { assert(Tokens.empty()); + assert(FirstInLineIndex == 0); do { Tokens.push_back(getNextToken()); tryMergePreviousTokens(); + if (Tokens.back()->NewlinesBefore > 0) + FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->Tok.isNot(tok::eof)); return Tokens; } @@ -1180,6 +1184,8 @@ private: void tryMergePreviousTokens() { if (tryMerge_TMacro()) return; + if (tryMergeConflictMarkers()) + return; if (Style.Language == FormatStyle::LK_JavaScript) { static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; @@ -1254,6 +1260,68 @@ private: return true; } + bool tryMergeConflictMarkers() { + if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) + return false; + + // Conflict lines look like: + // <marker> <text from the vcs> + // For example: + // >>>>>>> /file/in/file/system at revision 1234 + // + // We merge all tokens in a line that starts with a conflict marker + // into a single token with a special token type that the unwrapped line + // parser will use to correctly rebuild the underlying code. + + FileID ID; + // Get the position of the first token in the line. + unsigned FirstInLineOffset; + std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( + Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); + StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); + // Calculate the offset of the start of the current line. + auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); + if (LineOffset == StringRef::npos) { + LineOffset = 0; + } else { + ++LineOffset; + } + + auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); + StringRef LineStart; + if (FirstSpace == StringRef::npos) { + LineStart = Buffer.substr(LineOffset); + } else { + LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); + } + + TokenType Type = TT_Unknown; + if (LineStart == "<<<<<<<" || LineStart == ">>>>") { + Type = TT_ConflictStart; + } else if (LineStart == "|||||||" || LineStart == "=======" || + LineStart == "====") { + Type = TT_ConflictAlternative; + } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { + Type = TT_ConflictEnd; + } + + if (Type != TT_Unknown) { + FormatToken *Next = Tokens.back(); + + Tokens.resize(FirstInLineIndex + 1); + // We do not need to build a complete token here, as we will skip it + // during parsing anyway (as we must not touch whitespace around conflict + // markers). + Tokens.back()->Type = Type; + Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); + + Tokens.push_back(Next); + return true; + } + + return false; + } + FormatToken *getNextToken() { if (GreaterStashed) { // Create a synthesized second '>' token. @@ -1401,6 +1469,8 @@ private: IdentifierTable IdentTable; encoding::Encoding Encoding; llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; + // Index (in 'Tokens') of the last token that starts a new line. + unsigned FirstInLineIndex; SmallVector<FormatToken *, 16> Tokens; SmallVector<IdentifierInfo*, 8> ForEachMacros; diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index a5aaa6f6de7b365791f5ddb580c3a7bba53db08e..249eecc0ba15a6304a0a8cdbc2f52e18436db28e 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -33,19 +33,22 @@ enum TokenType { TT_BlockComment, TT_CastRParen, TT_ConditionalExpr, + TT_ConflictAlternative, + TT_ConflictEnd, + TT_ConflictStart, TT_CtorInitializerColon, TT_CtorInitializerComma, TT_DesignatedInitializerPeriod, TT_DictLiteral, - TT_ImplicitStringLiteral, - TT_InlineASMColon, - TT_InheritanceColon, TT_FunctionLBrace, TT_FunctionTypeLParen, + TT_ImplicitStringLiteral, + TT_InheritanceColon, + TT_InlineASMColon, TT_LambdaLSquare, TT_LineComment, - TT_ObjCBlockLParen, TT_ObjCBlockLBrace, + TT_ObjCBlockLParen, TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr, diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 004c836483095f54ebeef018ac48f4761f6b2942..d63b38bcd1ac12023112e16c88323b6dfbd912b3 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -465,14 +465,14 @@ void UnwrappedLineParser::parsePPDirective() { } } -void UnwrappedLineParser::pushPPConditional() { - if (!PPStack.empty() && PPStack.back() == PP_Unreachable) +void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { + if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) PPStack.push_back(PP_Unreachable); else PPStack.push_back(PP_Conditional); } -void UnwrappedLineParser::parsePPIf(bool IfDef) { +void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { ++PPBranchLevel; assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { @@ -480,37 +480,22 @@ void UnwrappedLineParser::parsePPIf(bool IfDef) { PPLevelBranchCount.push_back(0); } PPChainBranchIndex.push(0); - nextToken(); - bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && - StringRef(FormatTok->Tok.getLiteralData(), - FormatTok->Tok.getLength()) == "0") || - FormatTok->Tok.is(tok::kw_false); - if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) { - PPStack.push_back(PP_Unreachable); - } else { - pushPPConditional(); - } - parsePPUnknown(); + bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; + conditionalCompilationCondition(Unreachable || Skip); } -void UnwrappedLineParser::parsePPElse() { +void UnwrappedLineParser::conditionalCompilationAlternative() { if (!PPStack.empty()) PPStack.pop_back(); assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); if (!PPChainBranchIndex.empty()) ++PPChainBranchIndex.top(); - if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && - PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) { - PPStack.push_back(PP_Unreachable); - } else { - pushPPConditional(); - } - parsePPUnknown(); + conditionalCompilationCondition( + PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && + PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); } -void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } - -void UnwrappedLineParser::parsePPEndIf() { +void UnwrappedLineParser::conditionalCompilationEnd() { assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { @@ -524,6 +509,27 @@ void UnwrappedLineParser::parsePPEndIf() { PPChainBranchIndex.pop(); if (!PPStack.empty()) PPStack.pop_back(); +} + +void UnwrappedLineParser::parsePPIf(bool IfDef) { + nextToken(); + bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && + StringRef(FormatTok->Tok.getLiteralData(), + FormatTok->Tok.getLength()) == "0") || + FormatTok->Tok.is(tok::kw_false); + conditionalCompilationStart(!IfDef && IsLiteralFalse); + parsePPUnknown(); +} + +void UnwrappedLineParser::parsePPElse() { + conditionalCompilationAlternative(); + parsePPUnknown(); +} + +void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } + +void UnwrappedLineParser::parsePPEndIf() { + conditionalCompilationEnd(); parsePPUnknown(); } @@ -1406,6 +1412,19 @@ void UnwrappedLineParser::readToken() { flushComments(isOnNewLine(*FormatTok)); parsePPDirective(); } + while (FormatTok->Type == TT_ConflictStart || + FormatTok->Type == TT_ConflictEnd || + FormatTok->Type == TT_ConflictAlternative) { + if (FormatTok->Type == TT_ConflictStart) { + conditionalCompilationStart(/*Unreachable=*/false); + } else if (FormatTok->Type == TT_ConflictAlternative) { + conditionalCompilationAlternative(); + } else if(FormatTok->Type == TT_ConflictEnd) { + conditionalCompilationEnd(); + } + FormatTok = Tokens->getNextToken(); + FormatTok->MustBreakBefore = true; + } if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && !Line->InPPDirective) { diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 6eaa415b6b81a6748d850274f113707ec5d78b46..8f0c5a3ef41e4957fcf7331cb7a280c87624a5a9 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -107,7 +107,16 @@ private: void flushComments(bool NewlineBeforeNext); void pushToken(FormatToken *Tok); void calculateBraceTypes(); - void pushPPConditional(); + + // Marks a conditional compilation edge (for example, an '#if', '#ifdef', + // '#else' or merge conflict marker). If 'Unreachable' is true, assumes + // this branch either cannot be taken (for example '#if false'), or should + // not be taken in this round. + void conditionalCompilationCondition(bool Unreachable); + void conditionalCompilationStart(bool Unreachable); + void conditionalCompilationAlternative(); + void conditionalCompilationEnd(); + bool isOnNewLine(const FormatToken& FormatTok); // FIXME: We are constantly running into bugs where Line.Level is incorrectly diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index 382276abb6027120ffecc50694dc7f7d128085fe..fb33bd685bec1ba54c8219d80127c18a7e316f90 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -8402,5 +8402,97 @@ TEST_F(FormatTest, HandleUnbalancedImplicitBracesAcrossPPBranches) { EXPECT_EQ(code, format(code)); } +TEST_F(FormatTest, HandleConflictMarkers) { + // Git/SVN conflict markers. + EXPECT_EQ("int a;\n" + "void f() {\n" + " callme(some(parameter1,\n" + "<<<<<<< text by the vcs\n" + " parameter2),\n" + "||||||| text by the vcs\n" + " parameter2),\n" + " parameter3,\n" + "======= text by the vcs\n" + " parameter2, parameter3),\n" + ">>>>>>> text by the vcs\n" + " otherparameter);\n", + format("int a;\n" + "void f() {\n" + " callme(some(parameter1,\n" + "<<<<<<< text by the vcs\n" + " parameter2),\n" + "||||||| text by the vcs\n" + " parameter2),\n" + " parameter3,\n" + "======= text by the vcs\n" + " parameter2,\n" + " parameter3),\n" + ">>>>>>> text by the vcs\n" + " otherparameter);\n")); + + // Perforce markers. + EXPECT_EQ("void f() {\n" + " function(\n" + ">>>> text by the vcs\n" + " parameter,\n" + "==== text by the vcs\n" + " parameter,\n" + "==== text by the vcs\n" + " parameter,\n" + "<<<< text by the vcs\n" + " parameter);\n", + format("void f() {\n" + " function(\n" + ">>>> text by the vcs\n" + " parameter,\n" + "==== text by the vcs\n" + " parameter,\n" + "==== text by the vcs\n" + " parameter,\n" + "<<<< text by the vcs\n" + " parameter);\n")); + + EXPECT_EQ("<<<<<<<\n" + "|||||||\n" + "=======\n" + ">>>>>>>", + format("<<<<<<<\n" + "|||||||\n" + "=======\n" + ">>>>>>>")); + + EXPECT_EQ("<<<<<<<\n" + "|||||||\n" + "int i;\n" + "=======\n" + ">>>>>>>", + format("<<<<<<<\n" + "|||||||\n" + "int i;\n" + "=======\n" + ">>>>>>>")); + + // FIXME: Handle parsing of macros around conflict markers correctly: + EXPECT_EQ("#define Macro \\\n" + "<<<<<<<\n" + "Something \\\n" + "|||||||\n" + "Else \\\n" + "=======\n" + "Other \\\n" + ">>>>>>>\n" + "End int i;\n", + format("#define Macro \\\n" + "<<<<<<<\n" + " Something \\\n" + "|||||||\n" + " Else \\\n" + "=======\n" + " Other \\\n" + ">>>>>>>\n" + " End\n" + "int i;\n")); +} + } // end namespace tooling } // end namespace clang