diff --git a/Lex/Preprocessor.cpp b/Lex/Preprocessor.cpp index 8376b9f8a30b39ca5f3a88c27c39890c5150b556..93017937d532cde6aabb7454e5ab05a79e578c4a 100644 --- a/Lex/Preprocessor.cpp +++ b/Lex/Preprocessor.cpp @@ -36,6 +36,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MemoryBuffer.h" #include <iostream> using namespace clang; @@ -253,6 +254,42 @@ CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) { } +/// AdvanceToTokenCharacter - Given a location that specifies the start of a +/// token, return a new location that specifies a character within the token. +SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, + unsigned CharNo) { + // If they request the first char of the token, we're trivially done. + if (CharNo == 0) return TokStart; + + // Figure out how many physical characters away the specified logical + // character is. This needs to take into consideration newlines and + // trigraphs. + const char *TokStartPtr = SourceMgr.getCharacterData(TokStart); + const char *TokPtr = TokStartPtr; + + // The usual case is that tokens don't contain anything interesting. Skip + // over the uninteresting characters. If a token only consists of simple + // chars, this method is extremely fast. + while (CharNo && Lexer::isObviouslySimpleCharacter(*TokPtr)) + ++TokPtr, --CharNo; + + // If we have a character that may be a trigraph or escaped newline, create a + // lexer to parse it correctly. + unsigned FileID = TokStart.getFileID(); + const llvm::MemoryBuffer *SrcBuf = SourceMgr.getBuffer(FileID); + if (CharNo != 0) { + // Create a lexer starting at this token position. + Lexer TheLexer(SrcBuf, FileID, *this, TokPtr); + LexerToken Tok; + // Skip over characters the remaining characters. + for (; CharNo; --CharNo) + TheLexer.getAndAdvanceChar(TokPtr, Tok); + } + return SourceLocation(FileID, TokPtr-SrcBuf->getBufferStart()); +} + + + //===----------------------------------------------------------------------===// // Source File Location Methods. //===----------------------------------------------------------------------===// diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 4a8965efa2a486c4eed62ebb0f7ccf22b589cd69..173e72f3f1c1400629f396fe7e3a410cf0a8d88b 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -200,6 +200,7 @@ private: //===--------------------------------------------------------------------===// // Lexer character reading interfaces. +public: // This lexer is built on two interfaces for reading characters, both of which // automatically provide phase 1/2 translation. getAndAdvanceChar is used @@ -239,6 +240,7 @@ private: return C; } +private: /// ConsumeChar - When a character (identified by PeekCharAndSize) is consumed /// and added to a given token, check to see if there are diagnostics that /// need to be emitted or flags that need to be set on the token. If so, do diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index 4b6ea1f436db07ecf3b3dc05c7c1b9feb8295072..d1fa97872da8e625535b628d4582e585a5eb5469 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -279,6 +279,10 @@ public: void DumpToken(const LexerToken &Tok, bool DumpFlags = false) const; void DumpMacro(const MacroInfo &MI) const; + /// AdvanceToTokenCharacter - Given a location that specifies the start of a + /// token, return a new location that specifies a character within the token. + SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char); + /// IncrementPasteCounter - Increment the counters for the number of token /// paste operations performed. If fast was specified, this is a 'fast paste' /// case we handled.