From f5c6ef2efb15d1bab9c64df5038895e6f13c9c71 Mon Sep 17 00:00:00 2001
From: Paul Robinson <paul_robinson@playstation.sony.com>
Date: Wed, 13 May 2015 21:18:15 +0000
Subject: [PATCH] Fix dependency file escaping.

When writing a dependency (.d) file, if space or # is immediately
preceded by one or more backslashes, escape the backslashes as well as
the space or # character. Otherwise leave backslash alone.
This straddles the fence between BSD Make (which does no escaping at
all, and does not support space or # in filespecs) and GNU Make (which
does support escaping, but will fall back to the filespec as-written
if the escaping doesn't match an existing file).

Differential Revision: http://reviews.llvm.org/D9208


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@237296 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Frontend/DependencyFile.cpp         | 62 +++++++++++++++++++++++--
 test/Frontend/dependency-gen-escaping.c |  8 ++++
 2 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/lib/Frontend/DependencyFile.cpp b/lib/Frontend/DependencyFile.cpp
index 6bea22ed592..1bbe5eaa8c4 100644
--- a/lib/Frontend/DependencyFile.cpp
+++ b/lib/Frontend/DependencyFile.cpp
@@ -292,9 +292,58 @@ void DFGImpl::AddFilename(StringRef Filename) {
     Files.push_back(Filename);
 }
 
-/// PrintFilename - GCC escapes spaces, # and $, but apparently not ' or " or
-/// other scary characters. NMake/Jom has a different set of scary characters,
-/// but wraps filespecs in double-quotes to avoid misinterpreting them;
+/// Print the filename, with escaping or quoting that accommodates the three
+/// most likely tools that use dependency files: GNU Make, BSD Make, and
+/// NMake/Jom.
+///
+/// BSD Make is the simplest case: It does no escaping at all.  This means
+/// characters that are normally delimiters, i.e. space and # (the comment
+/// character) simply aren't supported in filenames.
+///
+/// GNU Make does allow space and # in filenames, but to avoid being treated
+/// as a delimiter or comment, these must be escaped with a backslash. Because
+/// backslash is itself the escape character, if a backslash appears in a
+/// filename, it should be escaped as well.  (As a special case, $ is escaped
+/// as $$, which is the normal Make way to handle the $ character.)
+/// For compatibility with BSD Make and historical practice, if GNU Make
+/// un-escapes characters in a filename but doesn't find a match, it will
+/// retry with the unmodified original string.
+///
+/// GCC tries to accommodate both Make formats by escaping any space or #
+/// characters in the original filename, but not escaping any backslash
+/// characters.  That way, filenames with backslashes will be handled
+/// correctly by BSD Make, and by GNU Make in its fallback mode of using the
+/// unmodified original string; filenames with # or space characters aren't
+/// supported by BSD Make at all, but will be handled correctly by GNU Make
+/// due to the escaping.
+///
+/// A corner case that GCC does not handle is when the original filename has
+/// a backslash immediately followed by # or space. It will therefore take a
+/// dependency from a directive such as
+///     #include "a\#b.h"
+/// and emit it as
+///     a\\#b.h
+/// which GNU Make will interpret as
+///     a\
+/// followed by a comment. Failing to find this file, it will fall back to the
+/// original string, and look for
+///     a\\#b.h
+/// which probably doesn't exist either; in any case it won't find
+///     a\#b.h
+/// which is the actual filename specified by the include directive.
+///
+/// Clang escapes space, # and $ like GCC does, but also handles the case of
+/// backslash immediately preceding space or # by doubling those backslashes.
+/// This means Clang will emit the dependency from
+///     #include "a\#b.h"
+/// as
+///     a\\\#b.h
+/// which GNU Make will un-escape into
+///     a\#b.h
+/// which is the correct original filename.
+///
+/// NMake/Jom has a different set of scary characters, but wraps filespecs in
+/// double-quotes to avoid misinterpreting them; see
 /// https://msdn.microsoft.com/en-us/library/dd9y37ha.aspx for NMake info,
 /// https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
 /// for Windows file-naming info.
@@ -311,9 +360,12 @@ static void PrintFilename(raw_ostream &OS, StringRef Filename,
     return;
   }
   for (unsigned i = 0, e = Filename.size(); i != e; ++i) {
-    if (Filename[i] == ' ' || Filename[i] == '#')
+    if (Filename[i] == ' ' || Filename[i] == '#') {
       OS << '\\';
-    else if (Filename[i] == '$') // $ is escaped by $$.
+      unsigned j = i;
+      while (j > 0 && Filename[--j] == '\\')
+        OS << '\\';
+    } else if (Filename[i] == '$') // $ is escaped by $$.
       OS << '$';
     OS << Filename[i];
   }
diff --git a/test/Frontend/dependency-gen-escaping.c b/test/Frontend/dependency-gen-escaping.c
index a6da66a169a..efa463a6eed 100644
--- a/test/Frontend/dependency-gen-escaping.c
+++ b/test/Frontend/dependency-gen-escaping.c
@@ -16,3 +16,11 @@
 #include "$$.h"
 #include "##.h"
 #include "normal.h"
+
+// Backslash followed by # or space is handled differently than GCC does,
+// because GCC doesn't emit this obscure corner case the way GNU Make wants it.
+// CHECK: a\b\\\#c\\\ d.h
+// These combinations are just another case for NMAKE.
+// NMAKE: "a\b\#c\ d.h"
+
+#include "a\b\#c\ d.h"
-- 
GitLab