From 8c6f4375dc7607ec15e8b961d009729dbd27c91c Mon Sep 17 00:00:00 2001
From: Daniel Jasper <djasper@google.com>
Date: Mon, 20 Jul 2015 23:28:07 +0000
Subject: [PATCH] clang-format: Fix crasher when a UTF8 character is found in
 an escape sequence. Discovered by the fuzzer.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@242738 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Format/Encoding.h           | 2 +-
 unittests/Format/FormatTest.cpp | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h
index 766d29274ce..592d7201a8a 100644
--- a/lib/Format/Encoding.h
+++ b/lib/Format/Encoding.h
@@ -135,7 +135,7 @@ inline unsigned getEscapeSequenceLength(StringRef Text) {
         ++I;
       return I;
     }
-    return 2;
+    return 1 + getNumBytesForUTF8(Text[1]);
   }
 }
 
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index b2c5307341d..252ae4db3f8 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -9555,6 +9555,11 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
             "\"八九十\tqq\"",
             format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"",
                    getLLVMStyleWithColumns(11)));
+
+  // UTF8 character in an escape sequence.
+  EXPECT_EQ("\"aaaaaa\"\n"
+            "\"\\\xC2\x8D\"",
+            format("\"aaaaaa\\\xC2\x8D\"", getLLVMStyleWithColumns(10)));
 }
 
 TEST_F(FormatTest, HandlesDoubleWidthCharsInMultiLineStrings) {
-- 
GitLab