diff --git a/Driver/CacheTokens.cpp b/Driver/CacheTokens.cpp index aef59cbe352be1252665b29a2dcd3548816b4445..5409a64e8bcd24774104091e54bc93fe3cf86e50 100644 --- a/Driver/CacheTokens.cpp +++ b/Driver/CacheTokens.cpp @@ -30,33 +30,34 @@ using namespace clang; typedef uint32_t Offset; -typedef std::vector<std::pair<Offset, llvm::StringMapEntry<Offset>*> > - SpellMapTy; - namespace { class VISIBILITY_HIDDEN PCHEntry { Offset TokenData, PPCondData; - union { Offset SpellingOff; SpellMapTy* Spellings; }; public: PCHEntry() {} - PCHEntry(Offset td, Offset ppcd, SpellMapTy* sp) - : TokenData(td), PPCondData(ppcd), Spellings(sp) {} + PCHEntry(Offset td, Offset ppcd) + : TokenData(td), PPCondData(ppcd) {} - Offset getTokenOffset() const { return TokenData; } + Offset getTokenOffset() const { return TokenData; } Offset getPPCondTableOffset() const { return PPCondData; } - SpellMapTy& getSpellings() const { return *Spellings; } - - void setSpellingTableOffset(Offset off) { SpellingOff = off; } - Offset getSpellingTableOffset() const { return SpellingOff; } +}; +class OffsetOpt { + bool valid; + Offset off; +public: + OffsetOpt() : valid(false) {} + bool hasOffset() const { return valid; } + Offset getOffset() const { assert(valid); return off; } + void setOffset(Offset o) { off = o; valid = true; } }; } // end anonymous namespace typedef llvm::DenseMap<const FileEntry*, PCHEntry> PCHMap; typedef llvm::DenseMap<const IdentifierInfo*,uint32_t> IDMap; -typedef llvm::StringMap<Offset, llvm::BumpPtrAllocator> CachedStrsTy; +typedef llvm::StringMap<OffsetOpt, llvm::BumpPtrAllocator> CachedStrsTy; namespace { class VISIBILITY_HIDDEN PTHWriter { @@ -66,8 +67,8 @@ class VISIBILITY_HIDDEN PTHWriter { uint32_t idcount; PCHMap PM; CachedStrsTy CachedStrs; - - SpellMapTy* CurSpellMap; + Offset CurStrOffset; + std::vector<llvm::StringMapEntry<OffsetOpt>*> StrEntries; //// Get the persistent id for the given IdentifierInfo*. uint32_t ResolveID(const IdentifierInfo* II); @@ -106,11 +107,11 @@ class VISIBILITY_HIDDEN PTHWriter { std::pair<Offset,std::pair<Offset, Offset> > EmitIdentifierTable(); Offset EmitFileTable(); PCHEntry LexTokens(Lexer& L); - void EmitCachedSpellings(); + Offset EmitCachedSpellings(); public: PTHWriter(llvm::raw_fd_ostream& out, Preprocessor& pp) - : Out(out), PP(pp), idcount(0) {} + : Out(out), PP(pp), idcount(0), CurStrOffset(0) {} void GeneratePTH(); }; @@ -132,14 +133,10 @@ uint32_t PTHWriter::ResolveID(const IdentifierInfo* II) { } void PTHWriter::EmitToken(const Token& T) { - uint32_t fpos = PP.getSourceManager().getFullFilePos(T.getLocation()); - Emit32(((uint32_t) T.getKind()) | (((uint32_t) T.getFlags()) << 8) | (((uint32_t) T.getLength()) << 16)); - Emit32(ResolveID(T.getIdentifierInfo())); - Emit32(fpos); - + // Literals (strings, numbers, characters) get cached spellings. if (T.isLiteral()) { // FIXME: This uses the slow getSpelling(). Perhaps we do better @@ -148,12 +145,21 @@ void PTHWriter::EmitToken(const Token& T) { const char* s = spelling.c_str(); // Get the string entry. - llvm::StringMapEntry<Offset> *E = - &CachedStrs.GetOrCreateValue(s, s+spelling.size()); - - // Store the address of the string entry in our spelling map. - CurSpellMap->push_back(std::make_pair(fpos, E)); + llvm::StringMapEntry<OffsetOpt> *E = + &CachedStrs.GetOrCreateValue(s, s+spelling.size()); + + if (!E->getValue().hasOffset()) { + E->getValue().setOffset(CurStrOffset); + StrEntries.push_back(E); + CurStrOffset += spelling.size() + 1; + } + + Emit32(E->getValue().getOffset()); } + else + Emit32(ResolveID(T.getIdentifierInfo())); + + Emit32(PP.getSourceManager().getFullFilePos(T.getLocation())); } namespace { @@ -251,7 +257,6 @@ Offset PTHWriter::EmitFileTable() { EmitBuf(Name, Name+size); Emit32(I->second.getTokenOffset()); Emit32(I->second.getPPCondTableOffset()); - Emit32(I->second.getSpellingTableOffset()); } return off; @@ -268,11 +273,6 @@ PCHEntry PTHWriter::LexTokens(Lexer& L) { PPCondTable PPCond; std::vector<unsigned> PPStartCond; bool ParsingPreprocessorDirective = false; - - // Allocate a spelling map for this source file. - llvm::OwningPtr<SpellMapTy> Spellings(new SpellMapTy()); - CurSpellMap = Spellings.get(); - Token Tok; do { @@ -401,56 +401,22 @@ PCHEntry PTHWriter::LexTokens(Lexer& L) { Emit32(x == i ? 0 : x); } - return PCHEntry(off, PPCondOff, Spellings.take()); + return PCHEntry(off, PPCondOff); } -void PTHWriter::EmitCachedSpellings() { - // Write each cached string to the PTH file and update the - // the string map entry to contain the relevant offset. - // - // FIXME: We can write the strings out in order of their frequency. This - // may result in better locality. - // - for (CachedStrsTy::iterator I = CachedStrs.begin(), E = CachedStrs.end(); - I!=E; ++I) { - - Offset off = Out.tell(); - - // Write out the length of the string before the string itself. - unsigned len = I->getKeyLength(); - Emit16(len); +Offset PTHWriter::EmitCachedSpellings() { + // Write each cached strings to the PTH file. + Offset SpellingsOff = Out.tell(); + + for (std::vector<llvm::StringMapEntry<OffsetOpt>*>::iterator + I = StrEntries.begin(), E = StrEntries.end(); I!=E; ++I) { - // Write out the string data. - const char* data = I->getKeyData(); - EmitBuf(data, data+len); - - // Write out a single blank character. - Emit8(' '); - - // Now patch the offset of the string in the PTH file into the string map. - I->setValue(off); + const char* data = (*I)->getKeyData(); + EmitBuf(data, data + (*I)->getKeyLength()); + Emit8('\0'); } - // Now emit the spelling tables. - for (PCHMap::iterator I=PM.begin(), E=PM.end(); I!=E; ++I) { - SpellMapTy& spellings = I->second.getSpellings(); - I->second.setSpellingTableOffset(Out.tell()); - - // Write out the number of spellings. - unsigned n = spellings.size(); - Emit32(n); - - for (unsigned i = 0; i < n; ++i) { - // Write out the offset of the token within the source file. - Emit32(spellings[i].first); - - // Write out the offset of the spelling data within the PTH file. - Emit32(spellings[i].second->getValue()); - } - - // Delete the spelling map for this source file. - delete &spellings; - } + return SpellingsOff; } void PTHWriter::GeneratePTH() { @@ -490,7 +456,7 @@ void PTHWriter::GeneratePTH() { = EmitIdentifierTable(); // Write out the cached strings table. - EmitCachedSpellings(); + Offset SpellingOff = EmitCachedSpellings(); // Write out the file table. Offset FileTableOff = EmitFileTable(); @@ -501,6 +467,7 @@ void PTHWriter::GeneratePTH() { Emit32(IdTableOff.second.first); Emit32(IdTableOff.second.second); Emit32(FileTableOff); + Emit32(SpellingOff); // Now write the offset in the prologue. Out.seek(JumpOffset); diff --git a/include/clang/Lex/PTHLexer.h b/include/clang/Lex/PTHLexer.h index c6837d2a8b3a6fc4b7d9ec63953c0c679add974b..369b818a1fc9ea117e5e1eb1b81e938e78a7fe98 100644 --- a/include/clang/Lex/PTHLexer.h +++ b/include/clang/Lex/PTHLexer.h @@ -45,10 +45,6 @@ class PTHLexer : public PreprocessorLexer { /// to process when doing quick skipping of preprocessor blocks. const unsigned char* CurPPCondPtr; - /// MySpellingMgr - Reference to the spelling manager used to get spellings - /// for the source file indicated by \c FileID. - PTHSpellingSearch& MySpellingSrch; - PTHLexer(const PTHLexer&); // DO NOT IMPLEMENT void operator=(const PTHLexer&); // DO NOT IMPLEMENT @@ -65,8 +61,7 @@ protected: /// Create a PTHLexer for the specified token stream. PTHLexer(Preprocessor& pp, FileID FID, const unsigned char *D, - const unsigned char* ppcond, - PTHSpellingSearch& mySpellingSrch, PTHManager &PM); + const unsigned char* ppcond, PTHManager &PM); public: ~PTHLexer() {} @@ -95,16 +90,6 @@ public: /// IndirectLex - An indirect call to 'Lex' that can be invoked via /// the PreprocessorLexer interface. void IndirectLex(Token &Result) { Lex(Result); } - - /// Returns the cached spelling of a token. - /// \param[in] sloc The SourceLocation of the token. - /// \param[out] Buffer If a token's spelling is found in the PTH file then - /// upon exit from this method \c Buffer will be set to the address of - /// the character array representing that spelling. No characters - /// are copied. - /// \returns The number of characters for the spelling of the token. This - /// value is 0 if the spelling could not be found in the PTH file. - unsigned getSpelling(SourceLocation sloc, const char *&Buffer); /// getSourceLocation - Return a source location for the token in /// the current file. diff --git a/include/clang/Lex/PTHManager.h b/include/clang/Lex/PTHManager.h index 31cf78f03635e52af90dd12061d8eae035c7f601..b77cda1f0b7dde735791efac64bb6430454dfc05 100644 --- a/include/clang/Lex/PTHManager.h +++ b/include/clang/Lex/PTHManager.h @@ -29,42 +29,13 @@ namespace clang { class FileEntry; class PTHLexer; -class PTHManager; - -class PTHSpellingSearch { - PTHManager& PTHMgr; - - const unsigned char* const TableBeg; - const unsigned char* const TableEnd; - - const unsigned NumSpellings; - const unsigned char* LinearItr; - -public: - enum { SpellingEntrySize = 4*2 }; - - unsigned getSpellingBinarySearch(unsigned fpos, const char *&Buffer); - unsigned getSpellingLinearSearch(unsigned fpos, const char *&Buffer); - - PTHSpellingSearch(PTHManager& pm, unsigned numSpellings, - const unsigned char* tableBeg) - : PTHMgr(pm), - TableBeg(tableBeg), - TableEnd(tableBeg + numSpellings*SpellingEntrySize), - NumSpellings(numSpellings), - LinearItr(tableBeg) {} -}; class PTHManager : public IdentifierInfoLookup { friend class PTHLexer; - friend class PTHSpellingSearch; /// The memory mapped PTH file. const llvm::MemoryBuffer* Buf; - - /// A map from FileIDs to SpellingSearch objects. - llvm::DenseMap<FileID, PTHSpellingSearch*> SpellingMap; - + /// Alloc - Allocator used for IdentifierInfo objects. llvm::BumpPtrAllocator Alloc; @@ -84,7 +55,7 @@ class PTHManager : public IdentifierInfoLookup { /// SortedIdTable - Array ordering persistent identifier IDs by the lexical /// order of their corresponding strings. This is used by get(). const unsigned char* const SortedIdTable; - + /// NumIds - The number of identifiers in the PTH file. const unsigned NumIds; @@ -92,11 +63,16 @@ class PTHManager : public IdentifierInfoLookup { /// PTHLexer objects. Preprocessor* PP; + /// SpellingBase - The base offset within the PTH memory buffer that + /// contains the cached spellings for literals. + const unsigned char* const SpellingBase; + /// This constructor is intended to only be called by the static 'Create' /// method. PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, const unsigned char* idDataTable, IdentifierInfo** perIDCache, - const unsigned char* sortedIdTable, unsigned numIds); + const unsigned char* sortedIdTable, unsigned numIds, + const unsigned char* spellingBase); // Do not implement. PTHManager(); @@ -119,7 +95,7 @@ class PTHManager : public IdentifierInfoLookup { public: // The current PTH version. - enum { Version = 0 }; + enum { Version = 1 }; ~PTHManager(); @@ -138,11 +114,7 @@ public: /// CreateLexer - Return a PTHLexer that "lexes" the cached tokens for the /// specified file. This method returns NULL if no cached tokens exist. /// It is the responsibility of the caller to 'delete' the returned object. - PTHLexer *CreateLexer(FileID FID); - - unsigned getSpelling(SourceLocation Loc, const char *&Buffer); -private: - unsigned getSpelling(FileID FID, unsigned fpos, const char *& Buffer); + PTHLexer *CreateLexer(FileID FID); }; } // end namespace clang diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index 095c1db1de3c3213e6ec156ec0dec0617dc04304..4e57069385d08bc1095fb7e25e0cb1980a53290a 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -461,12 +461,6 @@ public: // If the token is carrying a literal data pointer, just use it. if (const char *D = Tok.getLiteralData()) return *D; - - if (PTH) { - const char *Data; - if (PTH->getSpelling(Tok.getLocation(), Data)) - return *Data; - } // Otherwise, fall back on getCharacterData, which is slower, but always // works. diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index d94ab7052564791ea37a5e5a0ffb7285e1934b4e..f9f2b21061203e497c44f1af2e44c81a83d0f595 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -56,11 +56,9 @@ static inline uint32_t ReadLE32(const unsigned char *&Data) { //===----------------------------------------------------------------------===// PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D, - const unsigned char *ppcond, - PTHSpellingSearch &mySpellingSrch, PTHManager &PM) + const unsigned char *ppcond, PTHManager &PM) : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0), - PPCond(ppcond), CurPPCondPtr(ppcond), MySpellingSrch(mySpellingSrch), - PTHMgr(PM) { + PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) { FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID); } @@ -98,7 +96,10 @@ LexNextToken: Tok.setLength(Len); // Handle identifiers. - if (IdentifierID) { + if (Tok.isLiteral()) { + Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID)); + } + else if (IdentifierID) { MIOpt.ReadToken(); IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1); @@ -304,119 +305,6 @@ SourceLocation PTHLexer::getSourceLocation() { return FileStartLoc.getFileLocWithOffset(Offset); } -//===----------------------------------------------------------------------===// -// getSpelling() - Use cached data in PTH files for getSpelling(). -//===----------------------------------------------------------------------===// - -unsigned PTHManager::getSpelling(FileID FID, unsigned FPos, - const char *&Buffer) { - llvm::DenseMap<FileID, PTHSpellingSearch*>::iterator I =SpellingMap.find(FID); - - if (I == SpellingMap.end()) - return 0; - - return I->second->getSpellingBinarySearch(FPos, Buffer); -} - -unsigned PTHManager::getSpelling(SourceLocation Loc, const char *&Buffer) { - SourceManager &SM = PP->getSourceManager(); - Loc = SM.getSpellingLoc(Loc); - std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); - return getSpelling(LocInfo.first, LocInfo.second, Buffer); -} - -unsigned PTHManager::getSpellingAtPTHOffset(unsigned PTHOffset, - const char *&Buffer) { - assert(PTHOffset < Buf->getBufferSize()); - const unsigned char* Ptr = - (const unsigned char*)Buf->getBufferStart() + PTHOffset; - - // The string is prefixed by 16 bits for its length, followed by the string - // itself. - unsigned Len = ReadUnalignedLE16(Ptr); - Buffer = (const char *)Ptr; - return Len; -} - -unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned FPos, - const char *&Buffer) { - const unsigned char *Ptr = LinearItr; - unsigned Len = 0; - - if (Ptr == TableEnd) - return getSpellingBinarySearch(FPos, Buffer); - - do { - uint32_t TokOffset = ReadLE32(Ptr); - - if (TokOffset > FPos) - return getSpellingBinarySearch(FPos, Buffer); - - // Did we find a matching token offset for this spelling? - if (TokOffset == FPos) { - uint32_t SpellingPTHOffset = ReadLE32(Ptr); - Len = PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer); - break; - } - } while (Ptr != TableEnd); - - LinearItr = Ptr; - return Len; -} - - -unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned FPos, - const char *&Buffer) { - - assert((TableEnd - TableBeg) % SpellingEntrySize == 0); - assert(TableEnd >= TableBeg); - - if (TableEnd == TableBeg) - return 0; - - unsigned min = 0; - const unsigned char *tb = TableBeg; - unsigned max = NumSpellings; - - do { - unsigned i = (max - min) / 2 + min; - const unsigned char *Ptr = tb + (i * SpellingEntrySize); - - uint32_t TokOffset = ReadLE32(Ptr); - if (TokOffset > FPos) { - max = i; - assert(!(max == min) || (min == i)); - continue; - } - - if (TokOffset < FPos) { - if (i == min) - break; - - min = i; - continue; - } - - uint32_t SpellingPTHOffset = ReadLE32(Ptr); - return PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer); - } - while (min != max); - - return 0; -} - -unsigned PTHLexer::getSpelling(SourceLocation Loc, const char *&Buffer) { - SourceManager &SM = PP->getSourceManager(); - std::pair<FileID, unsigned> LocInfo = SM.getDecomposedSpellingLoc(Loc); - - FileID FID = LocInfo.first; - unsigned FPos = LocInfo.second; - - if (FID == getFileID()) - return MySpellingSrch.getSpellingLinearSearch(FPos, Buffer); - return PTHMgr.getSpelling(FID, FPos, Buffer); -} - //===----------------------------------------------------------------------===// // Internal Data Structures for PTH file lookup and resolving identifiers. //===----------------------------------------------------------------------===// @@ -431,11 +319,10 @@ public: class Val { uint32_t TokenOff; uint32_t PPCondOff; - uint32_t SpellingOff; public: Val() : TokenOff(~0) {} - Val(uint32_t toff, uint32_t poff, uint32_t soff) - : TokenOff(toff), PPCondOff(poff), SpellingOff(soff) {} + Val(uint32_t toff, uint32_t poff) + : TokenOff(toff), PPCondOff(poff) {} bool isValid() const { return TokenOff != ~((uint32_t)0); } @@ -447,12 +334,7 @@ public: uint32_t getPPCondOffset() const { assert(isValid() && "PTHFileLookup entry initialized."); return PPCondOff; - } - - uint32_t getSpellingOffset() const { - assert(isValid() && "PTHFileLookup entry initialized."); - return SpellingOff; - } + } }; private: @@ -481,10 +363,9 @@ public: uint32_t TokenOff = ReadLE32(D); uint32_t PPCondOff = ReadLE32(D); - uint32_t SpellingOff = ReadLE32(D); FileMap.GetOrCreateValue(s, s+Len).getValue() = - Val(TokenOff, PPCondOff, SpellingOff); + Val(TokenOff, PPCondOff); } } }; @@ -497,10 +378,11 @@ public: PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, const unsigned char* idDataTable, IdentifierInfo** perIDCache, - const unsigned char* sortedIdTable, unsigned numIds) + const unsigned char* sortedIdTable, unsigned numIds, + const unsigned char* spellingBase) : Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), IdDataTable(idDataTable), SortedIdTable(sortedIdTable), - NumIds(numIds), PP(0) {} + NumIds(numIds), PP(0), SpellingBase(spellingBase) {} PTHManager::~PTHManager() { delete Buf; @@ -573,6 +455,14 @@ PTHManager* PTHManager::Create(const std::string& file) { return 0; // FIXME: Proper error diagnostic? } + // Get the location of the spelling cache. + const unsigned char* spellingBaseOffset = EndTable + sizeof(uint32_t)*4; + const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset); + if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) { + assert(false && "Invalid PTH file."); + return 0; + } + // Get the number of IdentifierInfos and pre-allocate the identifier cache. uint32_t NumIds = ReadLE32(IData); @@ -591,7 +481,7 @@ PTHManager* PTHManager::Create(const std::string& file) { // Create the new PTHManager. return new PTHManager(File.take(), FL.take(), IData, PerIDCache, - SortedIdTable, NumIds); + SortedIdTable, NumIds, spellingBase); } IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) { // Look in the PTH file for the string data for the IdentifierInfo object. @@ -678,18 +568,6 @@ PTHLexer *PTHManager::CreateLexer(FileID FID) { uint32_t Len = ReadLE32(ppcond); if (Len == 0) ppcond = 0; - // Get the location of the spelling table. - const unsigned char* spellingTable = BufStart + FileData.getSpellingOffset(); - - Len = ReadLE32(spellingTable); - if (Len == 0) spellingTable = 0; - - assert(data < (const unsigned char*)Buf->getBufferEnd()); - - // Create the SpellingSearch object for this FileID. - PTHSpellingSearch* ss = new PTHSpellingSearch(*this, Len, spellingTable); - SpellingMap[FID] = ss; - assert(PP && "No preprocessor set yet!"); - return new PTHLexer(*PP, FID, data, ppcond, *ss, *this); + return new PTHLexer(*PP, FID, data, ppcond, *this); } diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index cb0c850e7ead302ade47cab936acc939a256ffbf..e6bf1777a891a030b83d1392047a7afb4c89536d 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -198,17 +198,9 @@ void Preprocessor::PrintStats() { /// UCNs, etc. std::string Preprocessor::getSpelling(const Token &Tok) const { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); - const char* TokStart; - - if (PTH) { - if (unsigned Len = PTH->getSpelling(Tok.getLocation(), TokStart)) { - assert(!Tok.needsCleaning()); - return std::string(TokStart, TokStart+Len); - } - } - + // If this token contains nothing interesting, return it directly. - TokStart = SourceMgr.getCharacterData(Tok.getLocation()); + const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation()); if (!Tok.needsCleaning()) return std::string(TokStart, TokStart+Tok.getLength()); @@ -248,23 +240,6 @@ unsigned Preprocessor::getSpelling(const Token &Tok, return II->getLength(); } - // If using PTH, try and get the spelling from the PTH file. - if (PTH) { - unsigned Len; - - if (CurPTHLexer) { - Len = CurPTHLexer.get()->getSpelling(Tok.getLocation(), Buffer); - } else { - Len = PTH->getSpelling(Tok.getLocation(), Buffer); - } - - // Did we find a spelling? If so return its length. Otherwise fall - // back to the default behavior for getting the spelling by looking at - // at the source code. - if (Len) - return Len; - } - // Otherwise, compute the start of the token in the input lexer buffer. const char *TokStart = 0;