Lazy deserialization of macro definitions for precompiled headers.

This optimization improves performance on the Carbon-prefixed "Hello,
World!" example by 57%. For reference, we're now about 2.25x faster
than GCC PCH. We're also pulling in far less of the PCH file:

*** PCH Statistics:
  411/20693 types read (1.986179%)
  2553/59230 declarations read (4.310316%)
  1093/44646 identifiers read (2.448148%)
  1/32954 statements read (0.003035%)
  21/6187 macros read (0.339421%)

llvm-svn: 69755
This commit is contained in:
Douglas Gregor 2009-04-21 23:56:24 +00:00
parent fe95afd0bc
commit c3366a555b
5 changed files with 192 additions and 86 deletions

View File

@ -198,7 +198,6 @@ namespace clang {
/// [PP_COUNTER_VALUE, Val] /// [PP_COUNTER_VALUE, Val]
PP_COUNTER_VALUE = 4 PP_COUNTER_VALUE = 4
}; };
/// \defgroup PCHAST Precompiled header AST constants /// \defgroup PCHAST Precompiled header AST constants
/// ///

View File

@ -171,6 +171,11 @@ private:
/// in the PCH file. /// in the PCH file.
unsigned TotalNumStatements; unsigned TotalNumStatements;
/// \brief The number of macros de-serialized from the PCH file.
unsigned NumMacrosRead;
/// \brief The total number of macros stored in the PCH file.
unsigned TotalNumMacros;
/// \brief FIXME: document! /// \brief FIXME: document!
llvm::SmallVector<uint64_t, 4> SpecialTypes; llvm::SmallVector<uint64_t, 4> SpecialTypes;
@ -311,6 +316,9 @@ public:
/// \brief Reads a statement from the current stream position. /// \brief Reads a statement from the current stream position.
Stmt *ReadStmt(); Stmt *ReadStmt();
/// \brief Reads the macro record located at the given offset.
void ReadMacroRecord(uint64_t Offset);
/// \brief Retrieve the AST context that this PCH reader /// \brief Retrieve the AST context that this PCH reader
/// supplements. /// supplements.
ASTContext &getContext() { return Context; } ASTContext &getContext() { return Context; }

View File

@ -98,6 +98,14 @@ private:
/// table, shifted left by one bit with the low bit set. /// table, shifted left by one bit with the low bit set.
llvm::SmallVector<uint64_t, 16> IdentifierOffsets; llvm::SmallVector<uint64_t, 16> IdentifierOffsets;
/// \brief Offsets of each of the macro identifiers into the
/// bitstream.
///
/// For each identifier that is associated with a macro, this map
/// provides the offset into the bitstream where that macro is
/// defined.
llvm::DenseMap<const IdentifierInfo *, uint64_t> MacroOffsets;
/// \brief Declarations encountered that might be external /// \brief Declarations encountered that might be external
/// definitions. /// definitions.
/// ///
@ -125,6 +133,9 @@ private:
/// \brief The number of statements written to the PCH file. /// \brief The number of statements written to the PCH file.
unsigned NumStatements; unsigned NumStatements;
/// \brief The number of macros written to the PCH file.
unsigned NumMacros;
void WriteTargetTriple(const TargetInfo &Target); void WriteTargetTriple(const TargetInfo &Target);
void WriteLanguageOptions(const LangOptions &LangOpts); void WriteLanguageOptions(const LangOptions &LangOpts);
void WriteSourceManagerBlock(SourceManager &SourceMgr); void WriteSourceManagerBlock(SourceManager &SourceMgr);
@ -134,7 +145,7 @@ private:
uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, DeclContext *DC); uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, DeclContext *DC);
uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC); uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC);
void WriteDeclsBlock(ASTContext &Context); void WriteDeclsBlock(ASTContext &Context);
void WriteIdentifierTable(); void WriteIdentifierTable(Preprocessor &PP);
void WriteAttributeRecord(const Attr *Attr); void WriteAttributeRecord(const Attr *Attr);
public: public:
@ -160,6 +171,16 @@ public:
/// \brief Emit a reference to an identifier /// \brief Emit a reference to an identifier
void AddIdentifierRef(const IdentifierInfo *II, RecordData &Record); void AddIdentifierRef(const IdentifierInfo *II, RecordData &Record);
/// \brief Retrieve the offset of the macro definition for the given
/// identifier.
///
/// The identifier must refer to a macro.
uint64_t getMacroOffset(const IdentifierInfo *II) {
assert(MacroOffsets.find(II) != MacroOffsets.end() &&
"Identifier does not name a macro");
return MacroOffsets[II];
}
/// \brief Emit a reference to a type. /// \brief Emit a reference to a type.
void AddTypeRef(QualType T, RecordData &Record); void AddTypeRef(QualType T, RecordData &Record);

View File

@ -36,6 +36,23 @@
using namespace clang; using namespace clang;
namespace {
/// \brief Helper class that saves the current stream position and
/// then restores it when destroyed.
struct VISIBILITY_HIDDEN SavedStreamPosition {
explicit SavedStreamPosition(llvm::BitstreamReader &Stream)
: Stream(Stream), Offset(Stream.GetCurrentBitNo()) { }
~SavedStreamPosition() {
Stream.JumpToBit(Offset);
}
private:
llvm::BitstreamReader &Stream;
uint64_t Offset;
};
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Declaration deserialization // Declaration deserialization
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -1053,6 +1070,8 @@ public:
using namespace clang::io; using namespace clang::io;
uint32_t Bits = ReadUnalignedLE32(d); // FIXME: use these? uint32_t Bits = ReadUnalignedLE32(d); // FIXME: use these?
(void)Bits; (void)Bits;
bool hasMacroDefinition = (Bits >> 3) & 0x01;
pch::IdentID ID = ReadUnalignedLE32(d); pch::IdentID ID = ReadUnalignedLE32(d);
DataLen -= 8; DataLen -= 8;
@ -1064,8 +1083,13 @@ public:
k.first, k.first + k.second); k.first, k.first + k.second);
Reader.SetIdentifierInfo(ID, II); Reader.SetIdentifierInfo(ID, II);
// FIXME: If this identifier is a macro, deserialize the macro // If this identifier is a macro, deserialize the macro
// definition now. // definition.
if (hasMacroDefinition) {
uint32_t Offset = ReadUnalignedLE64(d);
Reader.ReadMacroRecord(Offset);
DataLen -= 8;
}
// Read all of the declarations visible at global scope with this // Read all of the declarations visible at global scope with this
// name. // name.
@ -1323,14 +1347,116 @@ PCHReader::PCHReadResult PCHReader::ReadSourceManagerBlock() {
} }
} }
void PCHReader::ReadMacroRecord(uint64_t Offset) {
// Keep track of where we are in the stream, then jump back there
// after reading this macro.
SavedStreamPosition SavedPosition(Stream);
Stream.JumpToBit(Offset);
RecordData Record;
llvm::SmallVector<IdentifierInfo*, 16> MacroArgs;
MacroInfo *Macro = 0;
while (true) {
unsigned Code = Stream.ReadCode();
switch (Code) {
case llvm::bitc::END_BLOCK:
return;
case llvm::bitc::ENTER_SUBBLOCK:
// No known subblocks, always skip them.
Stream.ReadSubBlockID();
if (Stream.SkipBlock()) {
Error("Malformed block record");
return;
}
continue;
case llvm::bitc::DEFINE_ABBREV:
Stream.ReadAbbrevRecord();
continue;
default: break;
}
// Read a record.
Record.clear();
pch::PreprocessorRecordTypes RecType =
(pch::PreprocessorRecordTypes)Stream.ReadRecord(Code, Record);
switch (RecType) {
case pch::PP_COUNTER_VALUE:
// Skip this record.
break;
case pch::PP_MACRO_OBJECT_LIKE:
case pch::PP_MACRO_FUNCTION_LIKE: {
// If we already have a macro, that means that we've hit the end
// of the definition of the macro we were looking for. We're
// done.
if (Macro)
return;
IdentifierInfo *II = DecodeIdentifierInfo(Record[0]);
if (II == 0) {
Error("Macro must have a name");
return;
}
SourceLocation Loc = SourceLocation::getFromRawEncoding(Record[1]);
bool isUsed = Record[2];
MacroInfo *MI = PP.AllocateMacroInfo(Loc);
MI->setIsUsed(isUsed);
if (RecType == pch::PP_MACRO_FUNCTION_LIKE) {
// Decode function-like macro info.
bool isC99VarArgs = Record[3];
bool isGNUVarArgs = Record[4];
MacroArgs.clear();
unsigned NumArgs = Record[5];
for (unsigned i = 0; i != NumArgs; ++i)
MacroArgs.push_back(DecodeIdentifierInfo(Record[6+i]));
// Install function-like macro info.
MI->setIsFunctionLike();
if (isC99VarArgs) MI->setIsC99Varargs();
if (isGNUVarArgs) MI->setIsGNUVarargs();
MI->setArgumentList(&MacroArgs[0], MacroArgs.size(),
PP.getPreprocessorAllocator());
}
// Finally, install the macro.
PP.setMacroInfo(II, MI);
// Remember that we saw this macro last so that we add the tokens that
// form its body to it.
Macro = MI;
++NumMacrosRead;
break;
}
case pch::PP_TOKEN: {
// If we see a TOKEN before a PP_MACRO_*, then the file is
// erroneous, just pretend we didn't see this.
if (Macro == 0) break;
Token Tok;
Tok.startToken();
Tok.setLocation(SourceLocation::getFromRawEncoding(Record[0]));
Tok.setLength(Record[1]);
if (IdentifierInfo *II = DecodeIdentifierInfo(Record[2]))
Tok.setIdentifierInfo(II);
Tok.setKind((tok::TokenKind)Record[3]);
Tok.setFlag((Token::TokenFlags)Record[4]);
Macro->AddTokenToBody(Tok);
break;
}
}
}
}
bool PCHReader::ReadPreprocessorBlock() { bool PCHReader::ReadPreprocessorBlock() {
if (Stream.EnterSubBlock(pch::PREPROCESSOR_BLOCK_ID)) if (Stream.EnterSubBlock(pch::PREPROCESSOR_BLOCK_ID))
return Error("Malformed preprocessor block record"); return Error("Malformed preprocessor block record");
RecordData Record; RecordData Record;
llvm::SmallVector<IdentifierInfo*, 16> MacroArgs;
MacroInfo *LastMacro = 0;
while (true) { while (true) {
unsigned Code = Stream.ReadCode(); unsigned Code = Stream.ReadCode();
switch (Code) { switch (Code) {
@ -1365,58 +1491,10 @@ bool PCHReader::ReadPreprocessorBlock() {
break; break;
case pch::PP_MACRO_OBJECT_LIKE: case pch::PP_MACRO_OBJECT_LIKE:
case pch::PP_MACRO_FUNCTION_LIKE: { case pch::PP_MACRO_FUNCTION_LIKE:
IdentifierInfo *II = DecodeIdentifierInfo(Record[0]); case pch::PP_TOKEN:
if (II == 0) // Once we've hit a macro definition or a token, we're done.
return Error("Macro must have a name"); return false;
SourceLocation Loc = SourceLocation::getFromRawEncoding(Record[1]);
bool isUsed = Record[2];
MacroInfo *MI = PP.AllocateMacroInfo(Loc);
MI->setIsUsed(isUsed);
if (RecType == pch::PP_MACRO_FUNCTION_LIKE) {
// Decode function-like macro info.
bool isC99VarArgs = Record[3];
bool isGNUVarArgs = Record[4];
MacroArgs.clear();
unsigned NumArgs = Record[5];
for (unsigned i = 0; i != NumArgs; ++i)
MacroArgs.push_back(DecodeIdentifierInfo(Record[6+i]));
// Install function-like macro info.
MI->setIsFunctionLike();
if (isC99VarArgs) MI->setIsC99Varargs();
if (isGNUVarArgs) MI->setIsGNUVarargs();
MI->setArgumentList(&MacroArgs[0], MacroArgs.size(),
PP.getPreprocessorAllocator());
}
// Finally, install the macro.
PP.setMacroInfo(II, MI);
// Remember that we saw this macro last so that we add the tokens that
// form its body to it.
LastMacro = MI;
break;
}
case pch::PP_TOKEN: {
// If we see a TOKEN before a PP_MACRO_*, then the file is eroneous, just
// pretend we didn't see this.
if (LastMacro == 0) break;
Token Tok;
Tok.startToken();
Tok.setLocation(SourceLocation::getFromRawEncoding(Record[0]));
Tok.setLength(Record[1]);
if (IdentifierInfo *II = DecodeIdentifierInfo(Record[2]))
Tok.setIdentifierInfo(II);
Tok.setKind((tok::TokenKind)Record[3]);
Tok.setFlag((Token::TokenFlags)Record[4]);
LastMacro->AddTokenToBody(Tok);
break;
}
} }
} }
} }
@ -1573,6 +1651,7 @@ PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) {
case pch::STATISTICS: case pch::STATISTICS:
TotalNumStatements = Record[0]; TotalNumStatements = Record[0];
TotalNumMacros = Record[1];
break; break;
} }
@ -1582,23 +1661,6 @@ PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) {
return Failure; return Failure;
} }
namespace {
/// \brief Helper class that saves the current stream position and
/// then restores it when destroyed.
struct VISIBILITY_HIDDEN SavedStreamPosition {
explicit SavedStreamPosition(llvm::BitstreamReader &Stream)
: Stream(Stream), Offset(Stream.GetCurrentBitNo()) { }
~SavedStreamPosition() {
Stream.JumpToBit(Offset);
}
private:
llvm::BitstreamReader &Stream;
uint64_t Offset;
};
}
PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) { PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) {
// Set the PCH file name. // Set the PCH file name.
this->FileName = FileName; this->FileName = FileName;
@ -2364,6 +2426,9 @@ void PCHReader::PrintStats() {
std::fprintf(stderr, " %u/%u statements read (%f%%)\n", std::fprintf(stderr, " %u/%u statements read (%f%%)\n",
NumStatementsRead, TotalNumStatements, NumStatementsRead, TotalNumStatements,
((float)NumStatementsRead/TotalNumStatements * 100)); ((float)NumStatementsRead/TotalNumStatements * 100));
std::fprintf(stderr, " %u/%u macros read (%f%%)\n",
NumMacrosRead, TotalNumMacros,
((float)NumMacrosRead/TotalNumMacros * 100));
std::fprintf(stderr, "\n"); std::fprintf(stderr, "\n");
} }

View File

@ -1439,8 +1439,6 @@ void PCHWriter::WritePreprocessor(const Preprocessor &PP) {
// Loop over all the macro definitions that are live at the end of the file, // Loop over all the macro definitions that are live at the end of the file,
// emitting each to the PP section. // emitting each to the PP section.
// FIXME: Eventually we want to emit an index so that we can lazily load
// macros.
for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end(); for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
I != E; ++I) { I != E; ++I) {
// FIXME: This emits macros in hash table order, we should do it in a stable // FIXME: This emits macros in hash table order, we should do it in a stable
@ -1452,7 +1450,9 @@ void PCHWriter::WritePreprocessor(const Preprocessor &PP) {
if (MI->isBuiltinMacro()) if (MI->isBuiltinMacro())
continue; continue;
// FIXME: Remove this identifier reference?
AddIdentifierRef(I->first, Record); AddIdentifierRef(I->first, Record);
MacroOffsets[I->first] = Stream.GetCurrentBitNo();
Record.push_back(MI->getDefinitionLoc().getRawEncoding()); Record.push_back(MI->getDefinitionLoc().getRawEncoding());
Record.push_back(MI->isUsed()); Record.push_back(MI->isUsed());
@ -1494,7 +1494,7 @@ void PCHWriter::WritePreprocessor(const Preprocessor &PP) {
Stream.EmitRecord(pch::PP_TOKEN, Record); Stream.EmitRecord(pch::PP_TOKEN, Record);
Record.clear(); Record.clear();
} }
++NumMacros;
} }
Stream.ExitBlock(); Stream.ExitBlock();
@ -1715,6 +1715,7 @@ void PCHWriter::WriteDeclsBlock(ASTContext &Context) {
namespace { namespace {
class VISIBILITY_HIDDEN PCHIdentifierTableTrait { class VISIBILITY_HIDDEN PCHIdentifierTableTrait {
PCHWriter &Writer; PCHWriter &Writer;
Preprocessor &PP;
public: public:
typedef const IdentifierInfo* key_type; typedef const IdentifierInfo* key_type;
@ -1723,19 +1724,23 @@ public:
typedef pch::IdentID data_type; typedef pch::IdentID data_type;
typedef data_type data_type_ref; typedef data_type data_type_ref;
PCHIdentifierTableTrait(PCHWriter &Writer) : Writer(Writer) { } PCHIdentifierTableTrait(PCHWriter &Writer, Preprocessor &PP)
: Writer(Writer), PP(PP) { }
static unsigned ComputeHash(const IdentifierInfo* II) { static unsigned ComputeHash(const IdentifierInfo* II) {
return clang::BernsteinHash(II->getName()); return clang::BernsteinHash(II->getName());
} }
static std::pair<unsigned,unsigned> std::pair<unsigned,unsigned>
EmitKeyDataLength(llvm::raw_ostream& Out, const IdentifierInfo* II, EmitKeyDataLength(llvm::raw_ostream& Out, const IdentifierInfo* II,
pch::IdentID ID) { pch::IdentID ID) {
unsigned KeyLen = strlen(II->getName()) + 1; unsigned KeyLen = strlen(II->getName()) + 1;
clang::io::Emit16(Out, KeyLen); clang::io::Emit16(Out, KeyLen);
unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags
// 4 bytes for the persistent ID // 4 bytes for the persistent ID
if (II->hasMacroDefinition() &&
!PP.getMacroInfo(const_cast<IdentifierInfo *>(II))->isBuiltinMacro())
DataLen += 8;
for (IdentifierResolver::iterator D = IdentifierResolver::begin(II), for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
DEnd = IdentifierResolver::end(); DEnd = IdentifierResolver::end();
D != DEnd; ++D) D != DEnd; ++D)
@ -1755,15 +1760,21 @@ public:
void EmitData(llvm::raw_ostream& Out, const IdentifierInfo* II, void EmitData(llvm::raw_ostream& Out, const IdentifierInfo* II,
pch::IdentID ID, unsigned) { pch::IdentID ID, unsigned) {
uint32_t Bits = 0; uint32_t Bits = 0;
bool hasMacroDefinition =
II->hasMacroDefinition() &&
!PP.getMacroInfo(const_cast<IdentifierInfo *>(II))->isBuiltinMacro();
Bits = Bits | (uint32_t)II->getTokenID(); Bits = Bits | (uint32_t)II->getTokenID();
Bits = (Bits << 8) | (uint32_t)II->getObjCOrBuiltinID(); Bits = (Bits << 8) | (uint32_t)II->getObjCOrBuiltinID();
Bits = (Bits << 10) | II->hasMacroDefinition(); Bits = (Bits << 10) | hasMacroDefinition;
Bits = (Bits << 1) | II->isExtensionToken(); Bits = (Bits << 1) | II->isExtensionToken();
Bits = (Bits << 1) | II->isPoisoned(); Bits = (Bits << 1) | II->isPoisoned();
Bits = (Bits << 1) | II->isCPlusPlusOperatorKeyword(); Bits = (Bits << 1) | II->isCPlusPlusOperatorKeyword();
clang::io::Emit32(Out, Bits); clang::io::Emit32(Out, Bits);
clang::io::Emit32(Out, ID); clang::io::Emit32(Out, ID);
if (hasMacroDefinition)
clang::io::Emit64(Out, Writer.getMacroOffset(II));
// Emit the declaration IDs in reverse order, because the // Emit the declaration IDs in reverse order, because the
// IdentifierResolver provides the declarations as they would be // IdentifierResolver provides the declarations as they would be
// visible (e.g., the function "stat" would come before the struct // visible (e.g., the function "stat" would come before the struct
@ -1785,7 +1796,7 @@ public:
/// The identifier table consists of a blob containing string data /// The identifier table consists of a blob containing string data
/// (the actual identifiers themselves) and a separate "offsets" index /// (the actual identifiers themselves) and a separate "offsets" index
/// that maps identifier IDs to locations within the blob. /// that maps identifier IDs to locations within the blob.
void PCHWriter::WriteIdentifierTable() { void PCHWriter::WriteIdentifierTable(Preprocessor &PP) {
using namespace llvm; using namespace llvm;
// Create and write out the blob that contains the identifier // Create and write out the blob that contains the identifier
@ -1806,7 +1817,7 @@ void PCHWriter::WriteIdentifierTable() {
llvm::SmallVector<char, 4096> IdentifierTable; llvm::SmallVector<char, 4096> IdentifierTable;
uint32_t BucketOffset; uint32_t BucketOffset;
{ {
PCHIdentifierTableTrait Trait(*this); PCHIdentifierTableTrait Trait(*this, PP);
llvm::raw_svector_ostream Out(IdentifierTable); llvm::raw_svector_ostream Out(IdentifierTable);
BucketOffset = Generator.Emit(Out, Trait); BucketOffset = Generator.Emit(Out, Trait);
} }
@ -1964,7 +1975,8 @@ void PCHWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) {
} }
PCHWriter::PCHWriter(llvm::BitstreamWriter &Stream) PCHWriter::PCHWriter(llvm::BitstreamWriter &Stream)
: Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS), NumStatements(0) { } : Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS),
NumStatements(0), NumMacros(0) { }
void PCHWriter::WritePCH(Sema &SemaRef) { void PCHWriter::WritePCH(Sema &SemaRef) {
ASTContext &Context = SemaRef.Context; ASTContext &Context = SemaRef.Context;
@ -1989,7 +2001,7 @@ void PCHWriter::WritePCH(Sema &SemaRef) {
WritePreprocessor(PP); WritePreprocessor(PP);
WriteTypesBlock(Context); WriteTypesBlock(Context);
WriteDeclsBlock(Context); WriteDeclsBlock(Context);
WriteIdentifierTable(); WriteIdentifierTable(PP);
Stream.EmitRecord(pch::TYPE_OFFSET, TypeOffsets); Stream.EmitRecord(pch::TYPE_OFFSET, TypeOffsets);
Stream.EmitRecord(pch::DECL_OFFSET, DeclOffsets); Stream.EmitRecord(pch::DECL_OFFSET, DeclOffsets);
@ -2004,6 +2016,7 @@ void PCHWriter::WritePCH(Sema &SemaRef) {
// Some simple statistics // Some simple statistics
Record.clear(); Record.clear();
Record.push_back(NumStatements); Record.push_back(NumStatements);
Record.push_back(NumMacros);
Stream.EmitRecord(pch::STATISTICS, Record); Stream.EmitRecord(pch::STATISTICS, Record);
Stream.ExitBlock(); Stream.ExitBlock();
} }