From c059f89d888cd214e18ed09a7b47339201526381 Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis <akyrtzi@gmail.com> Date: Mon, 7 Jan 2013 19:16:30 +0000 Subject: [PATCH] [libclang] Simplify annotation of preprocessing tokens and remove the AnnotateTokensData DenseMap and the lookups associated with it. Instead of lexing for preprocessor tokens, associating the annotation with the location in a map, and later lookup in the map in order to adjust the cursor annotation, just annotate the cursor while lexing for preprocessor tokens. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@171775 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Index/annotate-tokens-pp.c | 2 +- tools/libclang/CIndex.cpp | 145 ++++++++++++++------------------ 2 files changed, 64 insertions(+), 83 deletions(-) diff --git a/test/Index/annotate-tokens-pp.c b/test/Index/annotate-tokens-pp.c index b7c0a8030b6..2ebc58fc60b 100644 --- a/test/Index/annotate-tokens-pp.c +++ b/test/Index/annotate-tokens-pp.c @@ -192,7 +192,7 @@ struct A // CHECK: Identifier: "x" [25:25 - 25:26] DeclRefExpr=x:24:7 // CHECK: Punctuation: "," [25:26 - 25:27] // CHECK: Punctuation: "{" [25:28 - 25:29] CompoundStmt= -// CHECK: Keyword: "int" [25:30 - 25:33] DeclStmt= +// CHECK: Keyword: "int" [25:30 - 25:33] VarDecl=z:25:34 (Definition) // CHECK: Identifier: "z" [25:34 - 25:35] VarDecl=z:25:34 (Definition) // CHECK: Punctuation: "=" [25:36 - 25:37] VarDecl=z:25:34 (Definition) // CHECK: Identifier: "x" [25:38 - 25:39] DeclRefExpr=x:24:7 diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp index 51ed343f528..797344eada1 100644 --- a/tools/libclang/CIndex.cpp +++ b/tools/libclang/CIndex.cpp @@ -4875,7 +4875,6 @@ void clang_disposeTokens(CXTranslationUnit TU, // Token annotation APIs. //===----------------------------------------------------------------------===// -typedef llvm::DenseMap<unsigned, CXCursor> AnnotateTokensData; static enum CXChildVisitResult AnnotateTokensVisitor(CXCursor cursor, CXCursor parent, CXClientData client_data); @@ -4884,7 +4883,6 @@ static bool AnnotateTokensPostChildrenVisitor(CXCursor cursor, namespace { class AnnotateTokensWorker { - AnnotateTokensData &Annotated; CXToken *Tokens; CXCursor *Cursors; unsigned NumTokens; @@ -4919,10 +4917,9 @@ class AnnotateTokensWorker { SourceRange); public: - AnnotateTokensWorker(AnnotateTokensData &annotated, - CXToken *tokens, CXCursor *cursors, unsigned numTokens, + AnnotateTokensWorker(CXToken *tokens, CXCursor *cursors, unsigned numTokens, CXTranslationUnit tu, SourceRange RegionOfInterest) - : Annotated(annotated), Tokens(tokens), Cursors(cursors), + : Tokens(tokens), Cursors(cursors), NumTokens(numTokens), TokIdx(0), PreprocessingTokIdx(0), AnnotateVis(tu, AnnotateTokensVisitor, this, @@ -4955,25 +4952,13 @@ void AnnotateTokensWorker::AnnotateTokens() { // Walk the AST within the region of interest, annotating tokens // along the way. AnnotateVis.visitFileRegion(); +} - for (unsigned I = 0 ; I < TokIdx ; ++I) { - AnnotateTokensData::iterator Pos = Annotated.find(Tokens[I].int_data[1]); - if (Pos != Annotated.end() && !clang_isPreprocessing(Cursors[I].kind)) - Cursors[I] = Pos->second; - } - - // Finish up annotating any tokens left. - if (!MoreTokens()) +static inline void updateCursorAnnotation(CXCursor &Cursor, + const CXCursor &updateC) { + if (clang_isInvalid(updateC.kind) || clang_isPreprocessing(Cursor.kind)) return; - - const CXCursor &C = clang_getNullCursor(); - for (unsigned I = TokIdx ; I < NumTokens ; ++I) { - if (I < PreprocessingTokIdx && clang_isPreprocessing(Cursors[I].kind)) - continue; - - AnnotateTokensData::iterator Pos = Annotated.find(Tokens[I].int_data[1]); - Cursors[I] = (Pos == Annotated.end()) ? C : Pos->second; - } + Cursor = updateC; } /// \brief It annotates and advances tokens with a cursor until the comparison @@ -4992,7 +4977,7 @@ void AnnotateTokensWorker::annotateAndAdvanceTokens(CXCursor updateC, SourceLocation TokLoc = GetTokenLoc(I); if (LocationCompare(SrcMgr, TokLoc, range) == compResult) { - Cursors[I] = updateC; + updateCursorAnnotation(Cursors[I], updateC); AdvanceToken(); continue; } @@ -5037,7 +5022,6 @@ void AnnotateTokensWorker::annotateAndAdvanceFunctionMacroTokens( enum CXChildVisitResult AnnotateTokensWorker::Visit(CXCursor cursor, CXCursor parent) { - CXSourceLocation Loc = clang_getCursorLocation(cursor); SourceRange cursorRange = getRawCursorExtent(cursor); if (cursorRange.isInvalid()) return CXChildVisit_Recurse; @@ -5119,7 +5103,9 @@ AnnotateTokensWorker::Visit(CXCursor cursor, CXCursor parent) { case RangeAfter: break; case RangeOverlap: - Cursors[I] = cursor; + // We may have already annotated macro names inside macro definitions. + if (Cursors[I].kind != CXCursor_MacroExpansion) + Cursors[I] = cursor; AdvanceToken(); // For macro expansions, just note where the beginning of the macro // expansion occurs. @@ -5140,27 +5126,7 @@ AnnotateTokensWorker::Visit(CXCursor cursor, CXCursor parent) { if (cursorRange.isInvalid()) return CXChildVisit_Continue; - SourceLocation L = SourceLocation::getFromRawEncoding(Loc.int_data); - const enum CXCursorKind cursorK = clang_getCursorKind(cursor); - - // If the location of the cursor occurs within a macro instantiation, record - // the spelling location of the cursor in our annotation map. We can then - // paper over the token labelings during a post-processing step to try and - // get cursor mappings for tokens that are the *arguments* of a macro - // instantiation. - if (L.isMacroID()) { - unsigned rawEncoding = SrcMgr.getSpellingLoc(L).getRawEncoding(); - // Only invalidate the old annotation if it isn't part of a preprocessing - // directive. Here we assume that the default construction of CXCursor - // results in CXCursor.kind being an initialized value (i.e., 0). If - // this isn't the case, we can fix by doing lookup + insertion. - - CXCursor &oldC = Annotated[rawEncoding]; - if (!clang_isPreprocessing(oldC.kind)) - oldC = cursor; - } - const enum CXCursorKind K = clang_getCursorKind(parent); const CXCursor updateC = (clang_isInvalid(K) || K == CXCursor_TranslationUnit) @@ -5180,7 +5146,7 @@ AnnotateTokensWorker::Visit(CXCursor cursor, CXCursor parent) { if (E->getLocStart().isValid() && D->getLocation().isValid() && E->getLocStart() == D->getLocation() && E->getLocStart() == GetTokenLoc(I)) { - Cursors[I] = updateC; + updateCursorAnnotation(Cursors[I], updateC); AdvanceToken(); } } @@ -5319,9 +5285,26 @@ namespace { }; } +/// \brief Used by \c annotatePreprocessorTokens. +/// \returns true if lexing was finished, false otherwise. +static bool lexNext(Lexer &Lex, Token &Tok, + unsigned &NextIdx, unsigned NumTokens) { + if (NextIdx >= NumTokens) + return true; + + ++NextIdx; + Lex.LexFromRawLexer(Tok); + if (Tok.is(tok::eof)) + return true; + + return false; +} + static void annotatePreprocessorTokens(CXTranslationUnit TU, SourceRange RegionOfInterest, - AnnotateTokensData &Annotated) { + CXCursor *Cursors, + CXToken *Tokens, + unsigned NumTokens) { ASTUnit *CXXUnit = static_cast<ASTUnit *>(TU->TUData); SourceManager &SourceMgr = CXXUnit->getSourceManager(); @@ -5345,44 +5328,48 @@ static void annotatePreprocessorTokens(CXTranslationUnit TU, Buffer.end()); Lex.SetCommentRetentionState(true); + unsigned NextIdx = 0; // Lex tokens in raw mode until we hit the end of the range, to avoid // entering #includes or expanding macros. while (true) { Token Tok; - Lex.LexFromRawLexer(Tok); + if (lexNext(Lex, Tok, NextIdx, NumTokens)) + break; + unsigned TokIdx = NextIdx-1; + assert(Tok.getLocation() == + SourceLocation::getFromRawEncoding(Tokens[TokIdx].int_data[1])); reprocess: if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) { - // We have found a preprocessing directive. Gobble it up so that we - // don't see it while preprocessing these tokens later, but keep track - // of all of the token locations inside this preprocessing directive so - // that we can annotate them appropriately. + // We have found a preprocessing directive. Annotate the tokens + // appropriately. // // FIXME: Some simple tests here could identify macro definitions and // #undefs, to provide specific cursor kinds for those. - SmallVector<SourceLocation, 32> Locations; + + SourceLocation BeginLoc = Tok.getLocation(); + bool finished = false; do { - Locations.push_back(Tok.getLocation()); - Lex.LexFromRawLexer(Tok); - } while (!Tok.isAtStartOfLine() && !Tok.is(tok::eof)); - - using namespace cxcursor; - CXCursor Cursor - = MakePreprocessingDirectiveCursor(SourceRange(Locations.front(), - Locations.back()), - TU); - for (unsigned I = 0, N = Locations.size(); I != N; ++I) { - Annotated[Locations[I].getRawEncoding()] = Cursor; - } - - if (Tok.isAtStartOfLine()) - goto reprocess; + if (lexNext(Lex, Tok, NextIdx, NumTokens)) { + finished = true; + break; + } + } while (!Tok.isAtStartOfLine()); + + unsigned LastIdx = finished ? NextIdx-1 : NextIdx-2; + assert(TokIdx <= LastIdx); + SourceLocation EndLoc = + SourceLocation::getFromRawEncoding(Tokens[LastIdx].int_data[1]); + CXCursor Cursor = + MakePreprocessingDirectiveCursor(SourceRange(BeginLoc, EndLoc), TU); + + for (; TokIdx <= LastIdx; ++TokIdx) + Cursors[TokIdx] = Cursor; - continue; + if (finished) + break; + goto reprocess; } - - if (Tok.is(tok::eof)) - break; } } @@ -5406,13 +5393,9 @@ static void clang_annotateTokensImpl(void *UserData) { cxloc::translateSourceLocation(clang_getTokenLocation(TU, Tokens[NumTokens-1]))); - // A mapping from the source locations found when re-lexing or traversing the - // region of interest to the corresponding cursors. - AnnotateTokensData Annotated; - // Relex the tokens within the source range to look for preprocessing // directives. - annotatePreprocessorTokens(TU, RegionOfInterest, Annotated); + annotatePreprocessorTokens(TU, RegionOfInterest, Cursors, Tokens, NumTokens); if (CXXUnit->getPreprocessor().getPreprocessingRecord()) { // Search and mark tokens that are macro argument expansions. @@ -5428,8 +5411,7 @@ static void clang_annotateTokensImpl(void *UserData) { // Annotate all of the source locations in the region of interest that map to // a specific cursor. - AnnotateTokensWorker W(Annotated, Tokens, Cursors, NumTokens, - TU, RegionOfInterest); + AnnotateTokensWorker W(Tokens, Cursors, NumTokens, TU, RegionOfInterest); // FIXME: We use a ridiculous stack size here because the data-recursion // algorithm uses a large stack frame than the non-data recursive version, @@ -6170,12 +6152,11 @@ MacroInfo *cxindex::getMacroInfo(const IdentifierInfo &II, CXTranslationUnit TU){ if (MacroDefLoc.isInvalid() || !TU) return 0; - - ASTUnit *Unit = static_cast<ASTUnit *>(TU->TUData); - Preprocessor &PP = Unit->getPreprocessor(); if (!II.hadMacroDefinition()) return 0; + ASTUnit *Unit = static_cast<ASTUnit *>(TU->TUData); + Preprocessor &PP = Unit->getPreprocessor(); MacroInfo *MI = PP.getMacroInfoHistory(const_cast<IdentifierInfo*>(&II)); while (MI) { if (MacroDefLoc == MI->getDefinitionLoc()) -- GitLab