Enhance PTH 'getSpelling' caching:

- Refactor caching logic into a helper class PTHSpellingSearch - Allow "random accesses" in the spelling cache, thus catching the remaining cases where 'getSpelling' wasn't hitting the PTH cache For -Eonly, PTH, Cocoa.h: - This reduces wall time by 3% (user time unchanged, sys time reduced) - This reduces the amount of paged source by 1112K. The remaining 1112K still being paged in is from somewhere else (investigating). llvm-svn: 62009
2009-01-09 22:05:30 +00:00 · 2009-01-09 22:05:30 +00:00 · 47b8cf6deb
parent 020cddcfee
commit 47b8cf6deb
3 changed files with 154 additions and 30 deletions
--- a/clang/include/clang/Lex/PTHLexer.h
+++ b/clang/include/clang/Lex/PTHLexer.h
@ -20,8 +20,13 @@
 namespace clang {
  
 class PTHManager;
+class PTHSpellingSearch;
  
 class PTHLexer : public PreprocessorLexer {
+private:
+  /// FileID - The SourceManager FileID for the original source file.
+  unsigned FileID;
+  
  /// TokBuf - Buffer from PTH file containing raw token data.
  const char* TokBuf;
  
@ -41,13 +46,10 @@ class PTHLexer : public PreprocessorLexer {
  /// CurPPCondPtr - Pointer inside PPCond that refers to the next entry
  ///  to process when doing quick skipping of preprocessor blocks.
  const char* CurPPCondPtr;
-  
-  /// Pointer to a side table containing offsets in the PTH file
-  ///  for token spellings.
-  const char* SpellingTable;
-  
-  /// Number of cached spellings left in the cached source file.
-  unsigned SpellingsLeft;
+
+  /// MySpellingMgr - Reference to the spelling manager used to get spellings
+  ///  for the source file indicated by \c FileID.
+  PTHSpellingSearch& MySpellingSrch;  

  PTHLexer(const PTHLexer&);  // DO NOT IMPLEMENT
  void operator=(const PTHLexer&); // DO NOT IMPLEMENT
@ -59,14 +61,17 @@ class PTHLexer : public PreprocessorLexer {
  PTHManager& PTHMgr;
  
  Token EofToken;
-  
-public:  
+
+protected:
+  friend class PTHManager;

  /// Create a PTHLexer for the specified token stream.
  PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, 
-           const char* ppcond, const char* spellingTable, unsigned numSpellings,
+           const char* ppcond,
+           PTHSpellingSearch& mySpellingSrch,
           PTHManager& PM);
-  
+public:  
+
  ~PTHLexer() {}
    
  /// Lex - Return the next token.
--- a/clang/include/clang/Lex/PTHManager.h
+++ b/clang/include/clang/Lex/PTHManager.h
@ -16,6 +16,7 @@

 #include "clang/Lex/PTHLexer.h"
 #include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/DenseMap.h"
 #include <string>

 namespace llvm {
@ -28,14 +29,41 @@ class FileEntry;
 class IdentifierInfo;
 class IdentifierTable;
 class PTHLexer;
+class PTHManager;
+
+class PTHSpellingSearch {
+  PTHManager& PTHMgr;
+  
+  const char* TableBeg;
+  const char* TableEnd;
+  
+  unsigned SpellingsLeft;
+  const char* LinearItr;
+  
+public:
+  enum { SpellingEntrySize = 4*2 };
+  
+  unsigned getSpellingBinarySearch(unsigned fpos, const char *&Buffer);
+  unsigned getSpellingLinearSearch(unsigned fpos, const char *&Buffer);
+  
+  PTHSpellingSearch(PTHManager& pm, unsigned numSpellings, const char* tableBeg)
+    : PTHMgr(pm),
+      TableBeg(tableBeg),
+      TableEnd(tableBeg + numSpellings*SpellingEntrySize),
+      SpellingsLeft(numSpellings),
+      LinearItr(tableBeg) {}
+};  
  
 class PTHManager {
-  
  friend class PTHLexer;
+  friend class PTHSpellingSearch;
  
  /// The memory mapped PTH file.
  const llvm::MemoryBuffer* Buf;
  
+  /// A map from FileIDs to SpellingSearch objects.
+  llvm::DenseMap<unsigned,PTHSpellingSearch*> SpellingMap;
+  
  /// IdMap - A lazily generated cache mapping from persistent identifiers to
  ///  IdentifierInfo*.
  IdentifierInfo** PerIDCache;
@ -70,10 +98,12 @@ class PTHManager {
  ///  objects from the PTH file.
  IdentifierInfo* GetIdentifierInfo(unsigned);
  
-  /// GetSpelling - Used by PTHLexer classes to get the cached spelling
-  ///  for a token.
-  unsigned GetSpelling(unsigned PTHOffset, const char*& Buffer);
+  /// getSpellingAtPTHOffset - Used by PTHLexer classes to get the cached 
+  ///  spelling for a token.
+  unsigned getSpellingAtPTHOffset(unsigned PTHOffset, const char*& Buffer);

+  unsigned getSpelling(unsigned FileID, unsigned fpos, const char *& Buffer);
+  
 public:
  
  ~PTHManager();
--- a/clang/lib/Lex/PTHLexer.cpp
+++ b/clang/lib/Lex/PTHLexer.cpp
@ -23,6 +23,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/Streams.h"

 using namespace clang;

@ -50,12 +51,14 @@ static inline uint32_t Read32(const char*& data) {

 PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
                   const char* ppcond,
-                   const char* spellingTable, unsigned NumSpellings,
+                   PTHSpellingSearch& mySpellingSrch,
                   PTHManager& PM)
  : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
-    PPCond(ppcond), CurPPCondPtr(ppcond), 
-    SpellingTable(spellingTable), SpellingsLeft(NumSpellings),
-    PTHMgr(PM) {}
+    PPCond(ppcond), CurPPCondPtr(ppcond), MySpellingSrch(mySpellingSrch),
+    PTHMgr(PM)
+{      
+  FileID = fileloc.getFileID();
+}

 void PTHLexer::Lex(Token& Tok) {
 LexNextToken:
@ -100,6 +103,13 @@ LexNextToken:
  //===--------------------------------------==//
  // Process the token.
  //===--------------------------------------==//
+#if 0  
+  SourceManager& SM = PP->getSourceManager();
+  llvm::cerr << SM.getFileEntryForID(FileID)->getName()
+    << ':' << SM.getLogicalLineNumber(Tok.getLocation())
+    << ':' << SM.getLogicalColumnNumber(Tok.getLocation())
+    << '\n';
+#endif  

  if (k == tok::identifier) {
    MIOpt.ReadToken();
@ -289,7 +299,25 @@ SourceLocation PTHLexer::getSourceLocation() {
  return SourceLocation::getFileLoc(FileID, offset);
 }

-unsigned PTHManager::GetSpelling(unsigned PTHOffset, const char *& Buffer) {
+//===----------------------------------------------------------------------===//
+// getSpelling() - Use cached data in PTH files for getSpelling().
+//===----------------------------------------------------------------------===//
+
+unsigned PTHManager::getSpelling(unsigned FileID, unsigned fpos,
+                                 const char *& Buffer) {
+  
+  llvm::DenseMap<unsigned,PTHSpellingSearch*>::iterator I =
+    SpellingMap.find(FileID);
+
+  if (I == SpellingMap.end())
+      return 0;
+    
+  return I->second->getSpellingBinarySearch(fpos, Buffer);  
+}
+
+unsigned PTHManager::getSpellingAtPTHOffset(unsigned PTHOffset,
+                                            const char *& Buffer) {
+
  const char* p = Buf->getBufferStart() + PTHOffset;
  assert(p < Buf->getBufferEnd());
  
@ -302,13 +330,15 @@ unsigned PTHManager::GetSpelling(unsigned PTHOffset, const char *& Buffer) {
  return len;
 }

-unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
-  const char* p = SpellingTable;
-  SourceManager& SM = PP->getSourceManager();
-  unsigned fpos = SM.getFullFilePos(SM.getPhysicalLoc(sloc));
+unsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos,
+                                                    const char *&Buffer) {
+  const char* p = LinearItr;
  unsigned len = 0;
-
-  while (SpellingsLeft) {
+  
+  if (!SpellingsLeft)
+    return getSpellingBinarySearch(fpos, Buffer);
+  
+  do {
    uint32_t TokOffset = 
      ((uint32_t) ((uint8_t) p[0]))
      | (((uint32_t) ((uint8_t) p[1])) << 8)
@ -316,7 +346,7 @@ unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
      | (((uint32_t) ((uint8_t) p[3])) << 24);
    
    if (TokOffset > fpos)
-      break;
+      return getSpellingBinarySearch(fpos, Buffer);
    
    --SpellingsLeft;
    
@ -328,18 +358,72 @@ unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
        | (((uint32_t) ((uint8_t) p[6])) << 16)
        | (((uint32_t) ((uint8_t) p[7])) << 24);
      
-      len = PTHMgr.GetSpelling(SpellingPTHOffset, Buffer);
+      len = PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer);
      break;
    }

    // No match.  Keep on looking.
    p += sizeof(uint32_t)*2;
  }
+  while (SpellingsLeft);

-  SpellingTable = p;
+  LinearItr = p;
  return len;
 }

+unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos,
+                                                    const char *& Buffer) {
+  
+  assert ((TableEnd - TableBeg) % SpellingEntrySize == 0);
+  
+  unsigned min = 0;
+  const char* tb = TableBeg;
+  unsigned max = (TableEnd - tb) / SpellingEntrySize;
+
+  while (min != max) {
+    unsigned i = (max - min) / 2 + min;
+    const char* p = tb + (i * SpellingEntrySize);
+    
+    uint32_t TokOffset = 
+      ((uint32_t) ((uint8_t) p[0]))
+      | (((uint32_t) ((uint8_t) p[1])) << 8)
+      | (((uint32_t) ((uint8_t) p[2])) << 16)
+      | (((uint32_t) ((uint8_t) p[3])) << 24);
+    
+    if (TokOffset > fpos) {
+      max = i;
+      continue;
+    }
+    
+    if (TokOffset < fpos) {
+      min = i;
+      continue;
+    }
+    
+    uint32_t SpellingPTHOffset = 
+        ((uint32_t) ((uint8_t) p[4]))
+        | (((uint32_t) ((uint8_t) p[5])) << 8)
+        | (((uint32_t) ((uint8_t) p[6])) << 16)
+        | (((uint32_t) ((uint8_t) p[7])) << 24);
+    
+    return PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer);
+  }
+  
+  return 0;
+}
+
+unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
+  SourceManager& SM = PP->getSourceManager();
+  sloc = SM.getPhysicalLoc(sloc);
+  unsigned fid = SM.getCanonicalFileID(sloc);
+  unsigned fpos = SM.getFullFilePos(sloc);
+  
+  if (fid == FileID)
+    return MySpellingSrch.getSpellingLinearSearch(fpos, Buffer);
+
+  return PTHMgr.getSpelling(fid, fpos, Buffer);
+}
+
 //===----------------------------------------------------------------------===//
 // Internal Data Structures for PTH file lookup and resolving identifiers.
 //===----------------------------------------------------------------------===//
@ -538,6 +622,11 @@ PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
  if (len == 0) spellingTable = 0;

  assert(data < Buf->getBufferEnd());
+  
+  // Create the SpellingSearch object for this FileID.
+  PTHSpellingSearch* ss = new PTHSpellingSearch(*this, len, spellingTable);
+  SpellingMap[FileID] = ss;
+  
  return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond,
-                      spellingTable, len, *this); 
+                      *ss, *this); 
 }