From b0b4f74b6b088f4319f2941b8e74ff41e1aa0f10 Mon Sep 17 00:00:00 2001 From: Ted Kremenek Date: Tue, 13 Jan 2009 22:05:50 +0000 Subject: [PATCH] PTH: Fix remaining cases where the spelling cache in the PTH file was being missed when it shouldn't. This shaves another 7% off PTH time for -Eonly on Cocoa.h llvm-svn: 62186 --- clang/include/clang/Lex/PTHManager.h | 16 +++++----- clang/lib/Lex/PTHLexer.cpp | 35 ++++++++++---------- clang/lib/Lex/Preprocessor.cpp | 48 ++++++++++++++++++++-------- 3 files changed, 62 insertions(+), 37 deletions(-) diff --git a/clang/include/clang/Lex/PTHManager.h b/clang/include/clang/Lex/PTHManager.h index eefe574a54b8..60beee364503 100644 --- a/clang/include/clang/Lex/PTHManager.h +++ b/clang/include/clang/Lex/PTHManager.h @@ -34,10 +34,10 @@ class PTHManager; class PTHSpellingSearch { PTHManager& PTHMgr; - const char* TableBeg; - const char* TableEnd; + const char* const TableBeg; + const char* const TableEnd; - unsigned SpellingsLeft; + const unsigned NumSpellings; const char* LinearItr; public: @@ -50,7 +50,7 @@ public: : PTHMgr(pm), TableBeg(tableBeg), TableEnd(tableBeg + numSpellings*SpellingEntrySize), - SpellingsLeft(numSpellings), + NumSpellings(numSpellings), LinearItr(tableBeg) {} }; @@ -101,8 +101,6 @@ class PTHManager { /// getSpellingAtPTHOffset - Used by PTHLexer classes to get the cached /// spelling for a token. unsigned getSpellingAtPTHOffset(unsigned PTHOffset, const char*& Buffer); - - unsigned getSpelling(unsigned FileID, unsigned fpos, const char *& Buffer); public: @@ -111,11 +109,13 @@ public: /// Create - This method creates PTHManager objects. The 'file' argument /// is the name of the PTH file. This method returns NULL upon failure. static PTHManager* Create(const std::string& file, Preprocessor& PP); - + /// CreateLexer - Return a PTHLexer that "lexes" the cached tokens for the /// specified file. This method returns NULL if no cached tokens exist. /// It is the responsibility of the caller to 'delete' the returned object. - PTHLexer* CreateLexer(unsigned FileID, const FileEntry* FE); + PTHLexer* CreateLexer(unsigned FileID, const FileEntry* FE); + + unsigned getSpelling(unsigned FileID, unsigned fpos, const char *& Buffer); }; } // end namespace clang diff --git a/clang/lib/Lex/PTHLexer.cpp b/clang/lib/Lex/PTHLexer.cpp index a9eb88a19dbc..4a84f9b3afa1 100644 --- a/clang/lib/Lex/PTHLexer.cpp +++ b/clang/lib/Lex/PTHLexer.cpp @@ -23,7 +23,6 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/Support/Streams.h" using namespace clang; @@ -311,8 +310,8 @@ unsigned PTHManager::getSpelling(unsigned FileID, unsigned fpos, if (I == SpellingMap.end()) return 0; - - return I->second->getSpellingBinarySearch(fpos, Buffer); + + return I->second->getSpellingBinarySearch(fpos, Buffer); } unsigned PTHManager::getSpellingAtPTHOffset(unsigned PTHOffset, @@ -335,7 +334,7 @@ unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos, const char* p = LinearItr; unsigned len = 0; - if (!SpellingsLeft) + if (p == TableEnd) return getSpellingBinarySearch(fpos, Buffer); do { @@ -348,8 +347,6 @@ unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos, if (TokOffset > fpos) return getSpellingBinarySearch(fpos, Buffer); - --SpellingsLeft; - // Did we find a matching token offset for this spelling? if (TokOffset == fpos) { uint32_t SpellingPTHOffset = @@ -358,14 +355,15 @@ unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos, | (((uint32_t) ((uint8_t) p[6])) << 16) | (((uint32_t) ((uint8_t) p[7])) << 24); + p += SpellingEntrySize; len = PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer); break; } // No match. Keep on looking. - p += sizeof(uint32_t)*2; + p += SpellingEntrySize; } - while (SpellingsLeft); + while (p != TableEnd); LinearItr = p; return len; @@ -374,13 +372,18 @@ unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos, unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos, const char *& Buffer) { - assert ((TableEnd - TableBeg) % SpellingEntrySize == 0); + assert((TableEnd - TableBeg) % SpellingEntrySize == 0); + + if (TableEnd == TableBeg) + return 0; + + assert(TableEnd > TableBeg); unsigned min = 0; const char* tb = TableBeg; - unsigned max = (TableEnd - tb) / SpellingEntrySize; + unsigned max = NumSpellings; - while (min != max) { + do { unsigned i = (max - min) / 2 + min; const char* p = tb + (i * SpellingEntrySize); @@ -392,6 +395,7 @@ unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos, if (TokOffset > fpos) { max = i; + assert(!(max == min) || (min == i)); continue; } @@ -408,6 +412,7 @@ unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos, return PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer); } + while (min != max); return 0; } @@ -415,13 +420,11 @@ unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos, unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) { SourceManager& SM = PP->getSourceManager(); sloc = SM.getPhysicalLoc(sloc); - unsigned fid = SM.getCanonicalFileID(sloc); + unsigned fid = sloc.getFileID(); unsigned fpos = SM.getFullFilePos(sloc); - if (fid == FileID) - return MySpellingSrch.getSpellingLinearSearch(fpos, Buffer); - - return PTHMgr.getSpelling(fid, fpos, Buffer); + return (fid == FileID ) ? MySpellingSrch.getSpellingLinearSearch(fpos, Buffer) + : PTHMgr.getSpelling(fid, fpos, Buffer); } //===----------------------------------------------------------------------===// diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index ee6b0f888cf1..a815265e7c9a 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -195,9 +195,20 @@ void Preprocessor::PrintStats() { /// UCNs, etc. std::string Preprocessor::getSpelling(const Token &Tok) const { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + const char* TokStart; + + if (PTH) { + SourceLocation sloc = SourceMgr.getPhysicalLoc(Tok.getLocation()); + unsigned fid = sloc.getFileID(); + unsigned fpos = SourceMgr.getFullFilePos(sloc); + if (unsigned len = PTH->getSpelling(fid, fpos, TokStart)) { + assert(!Tok.needsCleaning()); + return std::string(TokStart, TokStart+len); + } + } // If this token contains nothing interesting, return it directly. - const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation()); + TokStart = SourceMgr.getCharacterData(Tok.getLocation()); if (!Tok.needsCleaning()) return std::string(TokStart, TokStart+Tok.getLength()); @@ -238,21 +249,32 @@ unsigned Preprocessor::getSpelling(const Token &Tok, } // If using PTH, try and get the spelling from the PTH file. - if (CurPTHLexer) { - // We perform the const_cast<> here because we will only have a PTHLexer - // when grabbing a stream of tokens from the PTH file (and thus the - // Preprocessor state is allowed to change). The PTHLexer can assume we are - // getting token spellings in the order of tokens, and thus can update - // its internal state so that it can quickly fetch spellings from the PTH - // file. - unsigned len = - const_cast(CurPTHLexer.get())->getSpelling(Tok.getLocation(), - Buffer); + if (PTH) { + unsigned len; + if (CurPTHLexer) { + // We perform the const_cast<> here because we will only have a PTHLexer + // when grabbing a stream of tokens from the PTH file (and thus the + // Preprocessor state is allowed to change). The PTHLexer can assume we are + // getting token spellings in the order of tokens, and thus can update + // its internal state so that it can quickly fetch spellings from the PTH + // file. + len = + const_cast(CurPTHLexer.get())->getSpelling(Tok.getLocation(), + Buffer); + } + else { + SourceLocation sloc = SourceMgr.getPhysicalLoc(Tok.getLocation()); + unsigned fid = sloc.getFileID(); + unsigned fpos = SourceMgr.getFullFilePos(sloc); + len = PTH->getSpelling(fid, fpos, Buffer); + } + // Did we find a spelling? If so return its length. Otherwise fall // back to the default behavior for getting the spelling by looking at - // at the source code. - if (len) return len; + // at the source code. + if (len) + return len; } // Otherwise, compute the start of the token in the input lexer buffer.