Skip to content
Snippets Groups Projects
Commit b306e0b5 authored by Reid Kleckner's avatar Reid Kleckner
Browse files

-frewrite-includes: Normalize line endings to match the main source file

It is very common to include headers with DOS-style line endings, such
as windows.h, from source files with Unix-style line endings.
Previously, we would end up with mixed line endings and #endifs that
appeared to be on the same line:

  #if 0 /* expanded by -frewrite-includes */
  #include <windows.h>^M#endif /* expanded by -frewrite-includes */

Clang treats either of \r or \n as a line ending character, so this is
purely a cosmetic issue.

This has no automated test because most Unix tools on Windows will
implictly convert CRLF to LF when reading files, making it very hard to
detect line ending mismatches. FileCheck doesn't understand {{\r}}
either.

Fixes PR20552.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@217259 91177308-0d34-0410-b5e6-96231b3b80d8
parent 3e4e598f
No related branches found
No related tags found
No related merge requests found
...@@ -40,6 +40,7 @@ class InclusionRewriter : public PPCallbacks { ...@@ -40,6 +40,7 @@ class InclusionRewriter : public PPCallbacks {
Preprocessor &PP; ///< Used to find inclusion directives. Preprocessor &PP; ///< Used to find inclusion directives.
SourceManager &SM; ///< Used to read and manage source files. SourceManager &SM; ///< Used to read and manage source files.
raw_ostream &OS; ///< The destination stream for rewritten contents. raw_ostream &OS; ///< The destination stream for rewritten contents.
StringRef MainEOL; ///< The line ending marker to use.
const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines. const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
bool ShowLineMarkers; ///< Show #line markers. bool ShowLineMarkers; ///< Show #line markers.
bool UseLineDirective; ///< Use of line directives or line markers. bool UseLineDirective; ///< Use of line directives or line markers.
...@@ -54,6 +55,7 @@ public: ...@@ -54,6 +55,7 @@ public:
void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) { void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) {
PredefinesBuffer = Buf; PredefinesBuffer = Buf;
} }
void detectMainFileEOL();
private: private:
void FileChanged(SourceLocation Loc, FileChangeReason Reason, void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType, SrcMgr::CharacteristicKind FileType,
...@@ -67,8 +69,8 @@ private: ...@@ -67,8 +69,8 @@ private:
const Module *Imported) override; const Module *Imported) override;
void WriteLineInfo(const char *Filename, int Line, void WriteLineInfo(const char *Filename, int Line,
SrcMgr::CharacteristicKind FileType, SrcMgr::CharacteristicKind FileType,
StringRef EOL, StringRef Extra = StringRef()); StringRef Extra = StringRef());
void WriteImplicitModuleImport(const Module *Mod, StringRef EOL); void WriteImplicitModuleImport(const Module *Mod);
void OutputContentUpTo(const MemoryBuffer &FromFile, void OutputContentUpTo(const MemoryBuffer &FromFile,
unsigned &WriteFrom, unsigned WriteTo, unsigned &WriteFrom, unsigned WriteTo,
StringRef EOL, int &lines, StringRef EOL, int &lines,
...@@ -88,9 +90,9 @@ private: ...@@ -88,9 +90,9 @@ private:
/// Initializes an InclusionRewriter with a \p PP source and \p OS destination. /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
bool ShowLineMarkers) bool ShowLineMarkers)
: PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(nullptr), : PP(PP), SM(PP.getSourceManager()), OS(OS), MainEOL("\n"),
ShowLineMarkers(ShowLineMarkers), PredefinesBuffer(nullptr), ShowLineMarkers(ShowLineMarkers),
LastInsertedFileChange(FileChanges.end()) { LastInsertedFileChange(FileChanges.end()) {
// If we're in microsoft mode, use normal #line instead of line markers. // If we're in microsoft mode, use normal #line instead of line markers.
UseLineDirective = PP.getLangOpts().MicrosoftExt; UseLineDirective = PP.getLangOpts().MicrosoftExt;
} }
...@@ -101,7 +103,7 @@ InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, ...@@ -101,7 +103,7 @@ InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
/// any \p Extra context specifiers in GNU line directives. /// any \p Extra context specifiers in GNU line directives.
void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
SrcMgr::CharacteristicKind FileType, SrcMgr::CharacteristicKind FileType,
StringRef EOL, StringRef Extra) { StringRef Extra) {
if (!ShowLineMarkers) if (!ShowLineMarkers)
return; return;
if (UseLineDirective) { if (UseLineDirective) {
...@@ -125,13 +127,12 @@ void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, ...@@ -125,13 +127,12 @@ void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
// should be treated as being wrapped in an implicit extern "C" block." // should be treated as being wrapped in an implicit extern "C" block."
OS << " 3 4"; OS << " 3 4";
} }
OS << EOL; OS << MainEOL;
} }
void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod, void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod) {
StringRef EOL) {
OS << "@import " << Mod->getFullModuleName() << ";" OS << "@import " << Mod->getFullModuleName() << ";"
<< " /* clang -frewrite-includes: implicit import */" << EOL; << " /* clang -frewrite-includes: implicit import */" << MainEOL;
} }
/// FileChanged - Whenever the preprocessor enters or exits a #include file /// FileChanged - Whenever the preprocessor enters or exits a #include file
...@@ -197,23 +198,33 @@ InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { ...@@ -197,23 +198,33 @@ InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
/// Detect the likely line ending style of \p FromFile by examining the first /// Detect the likely line ending style of \p FromFile by examining the first
/// newline found within it. /// newline found within it.
static StringRef DetectEOL(const MemoryBuffer &FromFile) { static StringRef DetectEOL(const MemoryBuffer &FromFile) {
// detect what line endings the file uses, so that added content does not mix // Detect what line endings the file uses, so that added content does not mix
// the style // the style. We need to check for "\r\n" first because "\n\r" will match
// "\r\n\r\n".
const char *Pos = strchr(FromFile.getBufferStart(), '\n'); const char *Pos = strchr(FromFile.getBufferStart(), '\n');
if (!Pos) if (!Pos)
return "\n"; return "\n";
if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
return "\n\r";
if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
return "\r\n"; return "\r\n";
if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
return "\n\r";
return "\n"; return "\n";
} }
void InclusionRewriter::detectMainFileEOL() {
bool Invalid;
const MemoryBuffer &FromFile = *SM.getBuffer(SM.getMainFileID(), &Invalid);
assert(!Invalid);
if (Invalid)
return; // Should never happen, but whatever.
MainEOL = DetectEOL(FromFile);
}
/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
/// \p WriteTo - 1. /// \p WriteTo - 1.
void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
unsigned &WriteFrom, unsigned WriteTo, unsigned &WriteFrom, unsigned WriteTo,
StringRef EOL, int &Line, StringRef LocalEOL, int &Line,
bool EnsureNewline) { bool EnsureNewline) {
if (WriteTo <= WriteFrom) if (WriteTo <= WriteFrom)
return; return;
...@@ -222,14 +233,37 @@ void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, ...@@ -222,14 +233,37 @@ void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
WriteFrom = WriteTo; WriteFrom = WriteTo;
return; return;
} }
OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
// count lines manually, it's faster than getPresumedLoc() // If we would output half of a line ending, advance one character to output
Line += std::count(FromFile.getBufferStart() + WriteFrom, // the whole line ending. All buffers are null terminated, so looking ahead
FromFile.getBufferStart() + WriteTo, '\n'); // one byte is safe.
if (EnsureNewline) { if (LocalEOL.size() == 2 &&
char LastChar = FromFile.getBufferStart()[WriteTo - 1]; LocalEOL[0] == (FromFile.getBufferStart() + WriteTo)[-1] &&
if (LastChar != '\n' && LastChar != '\r') LocalEOL[1] == (FromFile.getBufferStart() + WriteTo)[0])
OS << EOL; WriteTo++;
StringRef TextToWrite(FromFile.getBufferStart() + WriteFrom,
WriteTo - WriteFrom);
if (MainEOL == LocalEOL) {
OS << TextToWrite;
// count lines manually, it's faster than getPresumedLoc()
Line += TextToWrite.count(LocalEOL);
if (EnsureNewline && !TextToWrite.endswith(LocalEOL))
OS << MainEOL;
} else {
// Output the file one line at a time, rewriting the line endings as we go.
StringRef Rest = TextToWrite;
while (!Rest.empty()) {
StringRef LineText;
std::tie(LineText, Rest) = Rest.split(LocalEOL);
OS << LineText;
Line++;
if (!Rest.empty())
OS << MainEOL;
}
if (TextToWrite.endswith(LocalEOL) || EnsureNewline)
OS << MainEOL;
} }
WriteFrom = WriteTo; WriteFrom = WriteTo;
} }
...@@ -242,10 +276,11 @@ void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, ...@@ -242,10 +276,11 @@ void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
const Token &StartToken, const Token &StartToken,
const MemoryBuffer &FromFile, const MemoryBuffer &FromFile,
StringRef EOL, StringRef LocalEOL,
unsigned &NextToWrite, int &Line) { unsigned &NextToWrite, int &Line) {
OutputContentUpTo(FromFile, NextToWrite, OutputContentUpTo(FromFile, NextToWrite,
SM.getFileOffset(StartToken.getLocation()), EOL, Line, false); SM.getFileOffset(StartToken.getLocation()), LocalEOL, Line,
false);
Token DirectiveToken; Token DirectiveToken;
do { do {
DirectiveLex.LexFromRawLexer(DirectiveToken); DirectiveLex.LexFromRawLexer(DirectiveToken);
...@@ -254,11 +289,12 @@ void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, ...@@ -254,11 +289,12 @@ void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
// OutputContentUpTo() would not output anything anyway. // OutputContentUpTo() would not output anything anyway.
return; return;
} }
OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; OS << "#if 0 /* expanded by -frewrite-includes */" << MainEOL;
OutputContentUpTo(FromFile, NextToWrite, OutputContentUpTo(FromFile, NextToWrite,
SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), SM.getFileOffset(DirectiveToken.getLocation()) +
EOL, Line, true); DirectiveToken.getLength(),
OS << "#endif /* expanded by -frewrite-includes */" << EOL; LocalEOL, Line, true);
OS << "#endif /* expanded by -frewrite-includes */" << MainEOL;
} }
/// Find the next identifier in the pragma directive specified by \p RawToken. /// Find the next identifier in the pragma directive specified by \p RawToken.
...@@ -358,13 +394,13 @@ bool InclusionRewriter::Process(FileID FileId, ...@@ -358,13 +394,13 @@ bool InclusionRewriter::Process(FileID FileId,
Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
RawLex.SetCommentRetentionState(false); RawLex.SetCommentRetentionState(false);
StringRef EOL = DetectEOL(FromFile); StringRef LocalEOL = DetectEOL(FromFile);
// Per the GNU docs: "1" indicates entering a new file. // Per the GNU docs: "1" indicates entering a new file.
if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID()) if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
WriteLineInfo(FileName, 1, FileType, EOL, ""); WriteLineInfo(FileName, 1, FileType, "");
else else
WriteLineInfo(FileName, 1, FileType, EOL, " 1"); WriteLineInfo(FileName, 1, FileType, " 1");
if (SM.getFileIDSize(FileId) == 0) if (SM.getFileIDSize(FileId) == 0)
return false; return false;
...@@ -392,15 +428,15 @@ bool InclusionRewriter::Process(FileID FileId, ...@@ -392,15 +428,15 @@ bool InclusionRewriter::Process(FileID FileId,
case tok::pp_include: case tok::pp_include:
case tok::pp_include_next: case tok::pp_include_next:
case tok::pp_import: { case tok::pp_import: {
CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, NextToWrite,
Line); Line);
if (FileId != PP.getPredefinesFileID()) if (FileId != PP.getPredefinesFileID())
WriteLineInfo(FileName, Line - 1, FileType, EOL, ""); WriteLineInfo(FileName, Line - 1, FileType, "");
StringRef LineInfoExtra; StringRef LineInfoExtra;
if (const FileChange *Change = FindFileChangeLocation( if (const FileChange *Change = FindFileChangeLocation(
HashToken.getLocation())) { HashToken.getLocation())) {
if (Change->Mod) { if (Change->Mod) {
WriteImplicitModuleImport(Change->Mod, EOL); WriteImplicitModuleImport(Change->Mod);
// else now include and recursively process the file // else now include and recursively process the file
} else if (Process(Change->Id, Change->FileType)) { } else if (Process(Change->Id, Change->FileType)) {
...@@ -413,7 +449,7 @@ bool InclusionRewriter::Process(FileID FileId, ...@@ -413,7 +449,7 @@ bool InclusionRewriter::Process(FileID FileId,
} }
// fix up lineinfo (since commented out directive changed line // fix up lineinfo (since commented out directive changed line
// numbers) for inclusions that were skipped due to header guards // numbers) for inclusions that were skipped due to header guards
WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra); WriteLineInfo(FileName, Line, FileType, LineInfoExtra);
break; break;
} }
case tok::pp_pragma: { case tok::pp_pragma: {
...@@ -421,17 +457,17 @@ bool InclusionRewriter::Process(FileID FileId, ...@@ -421,17 +457,17 @@ bool InclusionRewriter::Process(FileID FileId,
if (Identifier == "clang" || Identifier == "GCC") { if (Identifier == "clang" || Identifier == "GCC") {
if (NextIdentifierName(RawLex, RawToken) == "system_header") { if (NextIdentifierName(RawLex, RawToken) == "system_header") {
// keep the directive in, commented out // keep the directive in, commented out
CommentOutDirective(RawLex, HashToken, FromFile, EOL, CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
NextToWrite, Line); NextToWrite, Line);
// update our own type // update our own type
FileType = SM.getFileCharacteristic(RawToken.getLocation()); FileType = SM.getFileCharacteristic(RawToken.getLocation());
WriteLineInfo(FileName, Line, FileType, EOL); WriteLineInfo(FileName, Line, FileType);
} }
} else if (Identifier == "once") { } else if (Identifier == "once") {
// keep the directive in, commented out // keep the directive in, commented out
CommentOutDirective(RawLex, HashToken, FromFile, EOL, CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
NextToWrite, Line); NextToWrite, Line);
WriteLineInfo(FileName, Line, FileType, EOL); WriteLineInfo(FileName, Line, FileType);
} }
break; break;
} }
...@@ -471,12 +507,12 @@ bool InclusionRewriter::Process(FileID FileId, ...@@ -471,12 +507,12 @@ bool InclusionRewriter::Process(FileID FileId,
// Replace the macro with (0) or (1), followed by the commented // Replace the macro with (0) or (1), followed by the commented
// out macro for reference. // out macro for reference.
OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc), OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
EOL, Line, false); LocalEOL, Line, false);
OS << '(' << (int) HasFile << ")/*"; OS << '(' << (int) HasFile << ")/*";
OutputContentUpTo(FromFile, NextToWrite, OutputContentUpTo(FromFile, NextToWrite,
SM.getFileOffset(RawToken.getLocation()) + SM.getFileOffset(RawToken.getLocation()) +
RawToken.getLength(), RawToken.getLength(),
EOL, Line, false); LocalEOL, Line, false);
OS << "*/"; OS << "*/";
} }
} while (RawToken.isNot(tok::eod)); } while (RawToken.isNot(tok::eod));
...@@ -484,8 +520,8 @@ bool InclusionRewriter::Process(FileID FileId, ...@@ -484,8 +520,8 @@ bool InclusionRewriter::Process(FileID FileId,
OutputContentUpTo(FromFile, NextToWrite, OutputContentUpTo(FromFile, NextToWrite,
SM.getFileOffset(RawToken.getLocation()) + SM.getFileOffset(RawToken.getLocation()) +
RawToken.getLength(), RawToken.getLength(),
EOL, Line, /*EnsureNewLine*/ true); LocalEOL, Line, /*EnsureNewline=*/ true);
WriteLineInfo(FileName, Line, FileType, EOL); WriteLineInfo(FileName, Line, FileType);
} }
break; break;
} }
...@@ -500,11 +536,11 @@ bool InclusionRewriter::Process(FileID FileId, ...@@ -500,11 +536,11 @@ bool InclusionRewriter::Process(FileID FileId,
do { do {
RawLex.LexFromRawLexer(RawToken); RawLex.LexFromRawLexer(RawToken);
} while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof)); } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
OutputContentUpTo( OutputContentUpTo(FromFile, NextToWrite,
FromFile, NextToWrite, SM.getFileOffset(RawToken.getLocation()) +
SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(), RawToken.getLength(),
EOL, Line, /*EnsureNewLine*/ true); LocalEOL, Line, /*EnsureNewline=*/ true);
WriteLineInfo(FileName, Line, FileType, EOL); WriteLineInfo(FileName, Line, FileType);
RawLex.SetKeepWhitespaceMode(false); RawLex.SetKeepWhitespaceMode(false);
} }
default: default:
...@@ -516,8 +552,8 @@ bool InclusionRewriter::Process(FileID FileId, ...@@ -516,8 +552,8 @@ bool InclusionRewriter::Process(FileID FileId,
RawLex.LexFromRawLexer(RawToken); RawLex.LexFromRawLexer(RawToken);
} }
OutputContentUpTo(FromFile, NextToWrite, OutputContentUpTo(FromFile, NextToWrite,
SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line, SM.getFileOffset(SM.getLocForEndOfFile(FileId)), LocalEOL,
/*EnsureNewline*/true); Line, /*EnsureNewline=*/true);
return true; return true;
} }
...@@ -527,6 +563,8 @@ void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, ...@@ -527,6 +563,8 @@ void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
SourceManager &SM = PP.getSourceManager(); SourceManager &SM = PP.getSourceManager();
InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
Opts.ShowLineMarkers); Opts.ShowLineMarkers);
Rewrite->detectMainFileEOL();
PP.addPPCallbacks(Rewrite); PP.addPPCallbacks(Rewrite);
PP.IgnorePragmas(); PP.IgnorePragmas();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment