PTH: Fix remaining cases where the spelling cache in the PTH file was being missed when it shouldn't. This shaves another 7% off PTH time for -Eonly on Cocoa.h

llvm-svn: 62186
2009-01-13 22:05:50 +00:00 · 2009-01-13 22:05:50 +00:00 · b0b4f74b6b
parent dd25a9d0aa
commit b0b4f74b6b
3 changed files with 62 additions and 37 deletions
--- a/clang/include/clang/Lex/PTHManager.h
+++ b/clang/include/clang/Lex/PTHManager.h
@ -34,10 +34,10 @@ class PTHManager;
 class PTHSpellingSearch {
  PTHManager& PTHMgr;
  
-  const char* TableBeg;
-  const char* TableEnd;
+  const char* const TableBeg;
+  const char* const TableEnd;
  
-  unsigned SpellingsLeft;
+  const unsigned NumSpellings;
  const char* LinearItr;
  
 public:
@ -50,7 +50,7 @@ public:
    : PTHMgr(pm),
      TableBeg(tableBeg),
      TableEnd(tableBeg + numSpellings*SpellingEntrySize),
-      SpellingsLeft(numSpellings),
+      NumSpellings(numSpellings),
      LinearItr(tableBeg) {}
 };  
  
@ -101,8 +101,6 @@ class PTHManager {
  /// getSpellingAtPTHOffset - Used by PTHLexer classes to get the cached 
  ///  spelling for a token.
  unsigned getSpellingAtPTHOffset(unsigned PTHOffset, const char*& Buffer);
-
-  unsigned getSpelling(unsigned FileID, unsigned fpos, const char *& Buffer);
  
 public:
  
@ -111,11 +109,13 @@ public:
  /// Create - This method creates PTHManager objects.  The 'file' argument
  ///  is the name of the PTH file.  This method returns NULL upon failure.
  static PTHManager* Create(const std::string& file, Preprocessor& PP);
-  
+
  /// CreateLexer - Return a PTHLexer that "lexes" the cached tokens for the
  ///  specified file.  This method returns NULL if no cached tokens exist.
  ///  It is the responsibility of the caller to 'delete' the returned object.
-  PTHLexer* CreateLexer(unsigned FileID, const FileEntry* FE);  
+  PTHLexer* CreateLexer(unsigned FileID, const FileEntry* FE);
+  
+  unsigned getSpelling(unsigned FileID, unsigned fpos, const char *& Buffer);
 };
  
 }  // end namespace clang
--- a/clang/lib/Lex/PTHLexer.cpp
+++ b/clang/lib/Lex/PTHLexer.cpp
@ -23,7 +23,6 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/Streams.h"

 using namespace clang;

@ -311,8 +310,8 @@ unsigned PTHManager::getSpelling(unsigned FileID, unsigned fpos,

  if (I == SpellingMap.end())
      return 0;
-    
-  return I->second->getSpellingBinarySearch(fpos, Buffer);  
+
+  return I->second->getSpellingBinarySearch(fpos, Buffer);
 }

 unsigned PTHManager::getSpellingAtPTHOffset(unsigned PTHOffset,
@ -335,7 +334,7 @@ unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos,
  const char* p = LinearItr;
  unsigned len = 0;
  
-  if (!SpellingsLeft)
+  if (p == TableEnd)
    return getSpellingBinarySearch(fpos, Buffer);
  
  do {
@ -348,8 +347,6 @@ unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos,
    if (TokOffset > fpos)
      return getSpellingBinarySearch(fpos, Buffer);
    
-    --SpellingsLeft;
-    
    // Did we find a matching token offset for this spelling?
    if (TokOffset == fpos) {
      uint32_t SpellingPTHOffset = 
@ -358,14 +355,15 @@ unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos,
        | (((uint32_t) ((uint8_t) p[6])) << 16)
        | (((uint32_t) ((uint8_t) p[7])) << 24);
      
+      p += SpellingEntrySize;
      len = PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer);
      break;
    }

    // No match.  Keep on looking.
-    p += sizeof(uint32_t)*2;
+    p += SpellingEntrySize;
  }
-  while (SpellingsLeft);
+  while (p != TableEnd);

  LinearItr = p;
  return len;
@ -374,13 +372,18 @@ unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos,
 unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos,
                                                    const char *& Buffer) {
  
-  assert ((TableEnd - TableBeg) % SpellingEntrySize == 0);
+  assert((TableEnd - TableBeg) % SpellingEntrySize == 0);
+  
+  if (TableEnd == TableBeg)
+    return 0;
+  
+  assert(TableEnd > TableBeg);
  
  unsigned min = 0;
  const char* tb = TableBeg;
-  unsigned max = (TableEnd - tb) / SpellingEntrySize;
+  unsigned max = NumSpellings;

-  while (min != max) {
+  do {
    unsigned i = (max - min) / 2 + min;
    const char* p = tb + (i * SpellingEntrySize);
    
@ -392,6 +395,7 @@ unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos,
    
    if (TokOffset > fpos) {
      max = i;
+      assert(!(max == min) || (min == i));
      continue;
    }
    
@ -408,6 +412,7 @@ unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos,
    
    return PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer);
  }
+  while (min != max);
  
  return 0;
 }
@ -415,13 +420,11 @@ unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos,
 unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
  SourceManager& SM = PP->getSourceManager();
  sloc = SM.getPhysicalLoc(sloc);
-  unsigned fid = SM.getCanonicalFileID(sloc);
+  unsigned fid = sloc.getFileID();
  unsigned fpos = SM.getFullFilePos(sloc);
  
-  if (fid == FileID)
-    return MySpellingSrch.getSpellingLinearSearch(fpos, Buffer);
-
-  return PTHMgr.getSpelling(fid, fpos, Buffer);
+  return (fid == FileID ) ? MySpellingSrch.getSpellingLinearSearch(fpos, Buffer)
+                          : PTHMgr.getSpelling(fid, fpos, Buffer);  
 }

 //===----------------------------------------------------------------------===//
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@ -195,9 +195,20 @@ void Preprocessor::PrintStats() {
 /// UCNs, etc.
 std::string Preprocessor::getSpelling(const Token &Tok) const {
  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+  const char* TokStart;
+  
+  if (PTH) {
+    SourceLocation sloc = SourceMgr.getPhysicalLoc(Tok.getLocation());
+    unsigned fid = sloc.getFileID();
+    unsigned fpos = SourceMgr.getFullFilePos(sloc);
+    if (unsigned len = PTH->getSpelling(fid, fpos, TokStart)) {
+      assert(!Tok.needsCleaning());
+      return std::string(TokStart, TokStart+len);
+    }
+  }
  
  // If this token contains nothing interesting, return it directly.
-  const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  TokStart = SourceMgr.getCharacterData(Tok.getLocation());
  if (!Tok.needsCleaning())
    return std::string(TokStart, TokStart+Tok.getLength());
  
@ -238,21 +249,32 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
  }

  // If using PTH, try and get the spelling from the PTH file.
-  if (CurPTHLexer) {
-    // We perform the const_cast<> here because we will only have a PTHLexer 
-    // when grabbing a stream of tokens from the PTH file (and thus the
-    // Preprocessor state is allowed to change).  The PTHLexer can assume we are
-    // getting token spellings in the order of tokens, and thus can update
-    // its internal state so that it can quickly fetch spellings from the PTH
-    // file.
-    unsigned len =
-      const_cast<PTHLexer*>(CurPTHLexer.get())->getSpelling(Tok.getLocation(),
-                                                            Buffer);
+  if (PTH) {
+    unsigned len;
    
+    if (CurPTHLexer) {
+      // We perform the const_cast<> here because we will only have a PTHLexer 
+      // when grabbing a stream of tokens from the PTH file (and thus the
+      // Preprocessor state is allowed to change).  The PTHLexer can assume we are
+      // getting token spellings in the order of tokens, and thus can update
+      // its internal state so that it can quickly fetch spellings from the PTH
+      // file.
+      len =
+        const_cast<PTHLexer*>(CurPTHLexer.get())->getSpelling(Tok.getLocation(),
+                                                              Buffer);      
+    }
+    else {
+      SourceLocation sloc = SourceMgr.getPhysicalLoc(Tok.getLocation());
+      unsigned fid = sloc.getFileID();
+      unsigned fpos = SourceMgr.getFullFilePos(sloc);      
+      len = PTH->getSpelling(fid, fpos, Buffer);      
+    }
+
    // Did we find a spelling?  If so return its length.  Otherwise fall
    // back to the default behavior for getting the spelling by looking at
-    // at the source code.
-    if (len) return len;
+    // at the source code.    
+    if (len)
+      return len;
  }

  // Otherwise, compute the start of the token in the input lexer buffer.