From 54b4e4468ec2bcd381ede70e1391bcdb59b8fd1a Mon Sep 17 00:00:00 2001
From: Daniel Jasper <djasper@google.com>
Date: Wed, 22 May 2013 05:27:42 +0000
Subject: [PATCH] Cut-off clang-format analysis.

If clang-format is confronted with long and deeply nested lines (e.g.
complex static initializers or function calls), it can currently try too
hard to find the optimal solution and never finish. The reason is that
the memoization does not work effectively for deeply nested lines.

This patch removes an earlier workaround and instead opts for
accepting a non-optimal solution in rare cases. However, it only does
so only in cases where it would have to analyze an excessive number of
states (currently set to 10000 - the most complex line in Format.cpp
requires ~800 states) so this should not change the behavior in a
relevant way.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@182449 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Format/Format.cpp           | 35 ++++++++++++++++++++++-----------
 lib/Format/TokenAnnotator.cpp   |  2 --
 lib/Format/TokenAnnotator.h     | 16 +--------------
 unittests/Format/FormatTest.cpp | 20 +++++++++++++++++++
 4 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 9be917c799f..dd0ece50642 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -257,6 +257,7 @@ public:
     State.ParenLevel = 0;
     State.StartOfStringLiteral = 0;
     State.StartOfLineLevel = State.ParenLevel;
+    State.IgnoreStackForComparison = false;
 
     // The first token has already been indented and thus consumed.
     moveStateToNextToken(State, /*DryRun=*/ false);
@@ -420,6 +421,21 @@ private:
     /// levels.
     std::vector<ParenState> Stack;
 
+    /// \brief Ignore the stack of \c ParenStates for state comparison.
+    ///
+    /// In long and deeply nested unwrapped lines, the current algorithm can
+    /// be insufficient for finding the best formatting with a reasonable amount
+    /// of time and memory. Setting this flag will effectively lead to the
+    /// algorithm not analyzing some combinations. However, these combinations
+    /// rarely contain the optimal solution: In short, accepting a higher
+    /// penalty early would need to lead to different values in the \c
+    /// ParenState stack (in an otherwise identical state) and these different
+    /// values would need to lead to a significant amount of avoided penalty
+    /// later.
+    ///
+    /// FIXME: Come up with a better algorithm instead.
+    bool IgnoreStackForComparison;
+
     /// \brief Comparison operator to be able to used \c LineState in \c map.
     bool operator<(const LineState &Other) const {
       if (NextToken != Other.NextToken)
@@ -435,6 +451,8 @@ private:
         return StartOfLineLevel < Other.StartOfLineLevel;
       if (StartOfStringLiteral != Other.StartOfStringLiteral)
         return StartOfStringLiteral < Other.StartOfStringLiteral;
+      if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
+        return false;
       return Stack < Other.Stack;
     }
   };
@@ -713,18 +731,6 @@ private:
         AvoidBinPacking = !Style.BinPackParameters;
       }
 
-      if (Current.NoMoreTokensOnLevel && Current.FakeLParens.empty()) {
-        // This parenthesis was the last token possibly making use of Indent and
-        // LastSpace of the next higher ParenLevel. Thus, erase them to achieve
-        // better memoization results.
-        for (unsigned i = State.Stack.size() - 1; i > 0; --i) {
-          State.Stack[i].Indent = 0;
-          State.Stack[i].LastSpace = 0;
-          if (!State.Stack[i].ForFakeParenthesis)
-            break;
-        }
-      }
-
       State.Stack.push_back(ParenState(NewIndent, LastSpace, AvoidBinPacking,
                                        State.Stack.back().NoLineBreak));
       ++State.ParenLevel;
@@ -917,6 +923,11 @@ private:
       }
       Queue.pop();
 
+      // Cut off the analysis of certain solutions if the analysis gets too
+      // complex. See description of IgnoreStackForComparison.
+      if (Count > 10000)
+        Node->State.IgnoreStackForComparison = true;
+
       if (!Seen.insert(Node->State).second)
         // State already examined with lower penalty.
         continue;
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 43ed6c31a63..c8cdac9ae32 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -160,8 +160,6 @@ private:
         if (CurrentToken->Children.empty() ||
             !CurrentToken->Children[0].isOneOf(tok::l_paren, tok::l_square))
           Left->DefinesFunctionType = false;
-        if (CurrentToken->Parent->closesScope())
-          CurrentToken->Parent->MatchingParen->NoMoreTokensOnLevel = true;
         Left->MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = Left;
 
diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h
index 227dd523960..be4390a5926 100644
--- a/lib/Format/TokenAnnotator.h
+++ b/lib/Format/TokenAnnotator.h
@@ -78,8 +78,7 @@ public:
         ParameterCount(0), TotalLength(FormatTok.TokenLength),
         BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
         DefinesFunctionType(false), Parent(NULL), FakeRParens(0),
-        LastInChainOfCalls(false), PartOfMultiVariableDeclStmt(false),
-        NoMoreTokensOnLevel(false) {}
+        LastInChainOfCalls(false), PartOfMultiVariableDeclStmt(false) {}
 
   bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
 
@@ -189,19 +188,6 @@ public:
   /// Only set if \c Type == \c TT_StartOfName.
   bool PartOfMultiVariableDeclStmt;
 
-  /// \brief Set to \c true for "("-tokens if this is the last token other than
-  /// ")" in the next higher parenthesis level.
-  ///
-  /// If this is \c true, no more formatting decisions have to be made on the
-  /// next higher parenthesis level, enabling optimizations.
-  ///
-  /// Example:
-  /// \code
-  /// aaaaaa(aaaaaa());
-  ///              ^  // Set to true for this parenthesis.
-  /// \endcode
-  bool NoMoreTokensOnLevel;
-
   /// \brief Returns the previous token ignoring comments.
   AnnotatedToken *getPreviousNoneComment() const;
 
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index 329faf70429..29bb527740d 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -1826,6 +1826,26 @@ TEST_F(FormatTest, MemoizationTests) {
       "                                                aaaaa,\n"
       "                                                aaaaa))))))))))));",
       getLLVMStyleWithColumns(65));
+  verifyFormat(
+      "a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(), a), a), a), a),\n"
+      "                                  a),\n"
+      "                                a),\n"
+      "                              a),\n"
+      "                            a),\n"
+      "                          a),\n"
+      "                        a),\n"
+      "                      a),\n"
+      "                    a),\n"
+      "                  a),\n"
+      "                a),\n"
+      "              a),\n"
+      "            a),\n"
+      "          a),\n"
+      "        a),\n"
+      "      a),\n"
+      "    a),\n"
+      "  a)",
+      getLLVMStyleWithColumns(65));
 
   // This test takes VERY long when memoization is broken.
   FormatStyle OnePerLine = getLLVMStyle();
-- 
GitLab