Lazy deserialization of macro definitions for precompiled headers.

This optimization improves performance on the Carbon-prefixed "Hello,
World!" example by 57%. For reference, we're now about 2.25x faster
than GCC PCH. We're also pulling in far less of the PCH file:

*** PCH Statistics:
  411/20693 types read (1.986179%)
  2553/59230 declarations read (4.310316%)
  1093/44646 identifiers read (2.448148%)
  1/32954 statements read (0.003035%)
  21/6187 macros read (0.339421%)

llvm-svn: 69755
This commit is contained in:
Douglas Gregor 2009-04-21 23:56:24 +00:00
parent fe95afd0bc
commit c3366a555b
5 changed files with 192 additions and 86 deletions

View File

@ -198,7 +198,6 @@ namespace clang {
/// [PP_COUNTER_VALUE, Val]
PP_COUNTER_VALUE = 4
};
/// \defgroup PCHAST Precompiled header AST constants
///

View File

@ -171,6 +171,11 @@ private:
/// in the PCH file.
unsigned TotalNumStatements;
/// \brief The number of macros de-serialized from the PCH file.
unsigned NumMacrosRead;
/// \brief The total number of macros stored in the PCH file.
unsigned TotalNumMacros;
/// \brief FIXME: document!
llvm::SmallVector<uint64_t, 4> SpecialTypes;
@ -311,6 +316,9 @@ public:
/// \brief Reads a statement from the current stream position.
Stmt *ReadStmt();
/// \brief Reads the macro record located at the given offset.
void ReadMacroRecord(uint64_t Offset);
/// \brief Retrieve the AST context that this PCH reader
/// supplements.
ASTContext &getContext() { return Context; }

View File

@ -98,6 +98,14 @@ private:
/// table, shifted left by one bit with the low bit set.
llvm::SmallVector<uint64_t, 16> IdentifierOffsets;
/// \brief Offsets of each of the macro identifiers into the
/// bitstream.
///
/// For each identifier that is associated with a macro, this map
/// provides the offset into the bitstream where that macro is
/// defined.
llvm::DenseMap<const IdentifierInfo *, uint64_t> MacroOffsets;
/// \brief Declarations encountered that might be external
/// definitions.
///
@ -125,6 +133,9 @@ private:
/// \brief The number of statements written to the PCH file.
unsigned NumStatements;
/// \brief The number of macros written to the PCH file.
unsigned NumMacros;
void WriteTargetTriple(const TargetInfo &Target);
void WriteLanguageOptions(const LangOptions &LangOpts);
void WriteSourceManagerBlock(SourceManager &SourceMgr);
@ -134,7 +145,7 @@ private:
uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, DeclContext *DC);
uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC);
void WriteDeclsBlock(ASTContext &Context);
void WriteIdentifierTable();
void WriteIdentifierTable(Preprocessor &PP);
void WriteAttributeRecord(const Attr *Attr);
public:
@ -160,6 +171,16 @@ public:
/// \brief Emit a reference to an identifier
void AddIdentifierRef(const IdentifierInfo *II, RecordData &Record);
/// \brief Retrieve the offset of the macro definition for the given
/// identifier.
///
/// The identifier must refer to a macro.
uint64_t getMacroOffset(const IdentifierInfo *II) {
assert(MacroOffsets.find(II) != MacroOffsets.end() &&
"Identifier does not name a macro");
return MacroOffsets[II];
}
/// \brief Emit a reference to a type.
void AddTypeRef(QualType T, RecordData &Record);

View File

@ -36,6 +36,23 @@
using namespace clang;
namespace {
/// \brief Helper class that saves the current stream position and
/// then restores it when destroyed.
struct VISIBILITY_HIDDEN SavedStreamPosition {
explicit SavedStreamPosition(llvm::BitstreamReader &Stream)
: Stream(Stream), Offset(Stream.GetCurrentBitNo()) { }
~SavedStreamPosition() {
Stream.JumpToBit(Offset);
}
private:
llvm::BitstreamReader &Stream;
uint64_t Offset;
};
}
//===----------------------------------------------------------------------===//
// Declaration deserialization
//===----------------------------------------------------------------------===//
@ -1053,6 +1070,8 @@ public:
using namespace clang::io;
uint32_t Bits = ReadUnalignedLE32(d); // FIXME: use these?
(void)Bits;
bool hasMacroDefinition = (Bits >> 3) & 0x01;
pch::IdentID ID = ReadUnalignedLE32(d);
DataLen -= 8;
@ -1064,8 +1083,13 @@ public:
k.first, k.first + k.second);
Reader.SetIdentifierInfo(ID, II);
// FIXME: If this identifier is a macro, deserialize the macro
// definition now.
// If this identifier is a macro, deserialize the macro
// definition.
if (hasMacroDefinition) {
uint32_t Offset = ReadUnalignedLE64(d);
Reader.ReadMacroRecord(Offset);
DataLen -= 8;
}
// Read all of the declarations visible at global scope with this
// name.
@ -1323,14 +1347,116 @@ PCHReader::PCHReadResult PCHReader::ReadSourceManagerBlock() {
}
}
void PCHReader::ReadMacroRecord(uint64_t Offset) {
// Keep track of where we are in the stream, then jump back there
// after reading this macro.
SavedStreamPosition SavedPosition(Stream);
Stream.JumpToBit(Offset);
RecordData Record;
llvm::SmallVector<IdentifierInfo*, 16> MacroArgs;
MacroInfo *Macro = 0;
while (true) {
unsigned Code = Stream.ReadCode();
switch (Code) {
case llvm::bitc::END_BLOCK:
return;
case llvm::bitc::ENTER_SUBBLOCK:
// No known subblocks, always skip them.
Stream.ReadSubBlockID();
if (Stream.SkipBlock()) {
Error("Malformed block record");
return;
}
continue;
case llvm::bitc::DEFINE_ABBREV:
Stream.ReadAbbrevRecord();
continue;
default: break;
}
// Read a record.
Record.clear();
pch::PreprocessorRecordTypes RecType =
(pch::PreprocessorRecordTypes)Stream.ReadRecord(Code, Record);
switch (RecType) {
case pch::PP_COUNTER_VALUE:
// Skip this record.
break;
case pch::PP_MACRO_OBJECT_LIKE:
case pch::PP_MACRO_FUNCTION_LIKE: {
// If we already have a macro, that means that we've hit the end
// of the definition of the macro we were looking for. We're
// done.
if (Macro)
return;
IdentifierInfo *II = DecodeIdentifierInfo(Record[0]);
if (II == 0) {
Error("Macro must have a name");
return;
}
SourceLocation Loc = SourceLocation::getFromRawEncoding(Record[1]);
bool isUsed = Record[2];
MacroInfo *MI = PP.AllocateMacroInfo(Loc);
MI->setIsUsed(isUsed);
if (RecType == pch::PP_MACRO_FUNCTION_LIKE) {
// Decode function-like macro info.
bool isC99VarArgs = Record[3];
bool isGNUVarArgs = Record[4];
MacroArgs.clear();
unsigned NumArgs = Record[5];
for (unsigned i = 0; i != NumArgs; ++i)
MacroArgs.push_back(DecodeIdentifierInfo(Record[6+i]));
// Install function-like macro info.
MI->setIsFunctionLike();
if (isC99VarArgs) MI->setIsC99Varargs();
if (isGNUVarArgs) MI->setIsGNUVarargs();
MI->setArgumentList(&MacroArgs[0], MacroArgs.size(),
PP.getPreprocessorAllocator());
}
// Finally, install the macro.
PP.setMacroInfo(II, MI);
// Remember that we saw this macro last so that we add the tokens that
// form its body to it.
Macro = MI;
++NumMacrosRead;
break;
}
case pch::PP_TOKEN: {
// If we see a TOKEN before a PP_MACRO_*, then the file is
// erroneous, just pretend we didn't see this.
if (Macro == 0) break;
Token Tok;
Tok.startToken();
Tok.setLocation(SourceLocation::getFromRawEncoding(Record[0]));
Tok.setLength(Record[1]);
if (IdentifierInfo *II = DecodeIdentifierInfo(Record[2]))
Tok.setIdentifierInfo(II);
Tok.setKind((tok::TokenKind)Record[3]);
Tok.setFlag((Token::TokenFlags)Record[4]);
Macro->AddTokenToBody(Tok);
break;
}
}
}
}
bool PCHReader::ReadPreprocessorBlock() {
if (Stream.EnterSubBlock(pch::PREPROCESSOR_BLOCK_ID))
return Error("Malformed preprocessor block record");
RecordData Record;
llvm::SmallVector<IdentifierInfo*, 16> MacroArgs;
MacroInfo *LastMacro = 0;
while (true) {
unsigned Code = Stream.ReadCode();
switch (Code) {
@ -1365,58 +1491,10 @@ bool PCHReader::ReadPreprocessorBlock() {
break;
case pch::PP_MACRO_OBJECT_LIKE:
case pch::PP_MACRO_FUNCTION_LIKE: {
IdentifierInfo *II = DecodeIdentifierInfo(Record[0]);
if (II == 0)
return Error("Macro must have a name");
SourceLocation Loc = SourceLocation::getFromRawEncoding(Record[1]);
bool isUsed = Record[2];
MacroInfo *MI = PP.AllocateMacroInfo(Loc);
MI->setIsUsed(isUsed);
if (RecType == pch::PP_MACRO_FUNCTION_LIKE) {
// Decode function-like macro info.
bool isC99VarArgs = Record[3];
bool isGNUVarArgs = Record[4];
MacroArgs.clear();
unsigned NumArgs = Record[5];
for (unsigned i = 0; i != NumArgs; ++i)
MacroArgs.push_back(DecodeIdentifierInfo(Record[6+i]));
// Install function-like macro info.
MI->setIsFunctionLike();
if (isC99VarArgs) MI->setIsC99Varargs();
if (isGNUVarArgs) MI->setIsGNUVarargs();
MI->setArgumentList(&MacroArgs[0], MacroArgs.size(),
PP.getPreprocessorAllocator());
}
// Finally, install the macro.
PP.setMacroInfo(II, MI);
// Remember that we saw this macro last so that we add the tokens that
// form its body to it.
LastMacro = MI;
break;
}
case pch::PP_TOKEN: {
// If we see a TOKEN before a PP_MACRO_*, then the file is eroneous, just
// pretend we didn't see this.
if (LastMacro == 0) break;
Token Tok;
Tok.startToken();
Tok.setLocation(SourceLocation::getFromRawEncoding(Record[0]));
Tok.setLength(Record[1]);
if (IdentifierInfo *II = DecodeIdentifierInfo(Record[2]))
Tok.setIdentifierInfo(II);
Tok.setKind((tok::TokenKind)Record[3]);
Tok.setFlag((Token::TokenFlags)Record[4]);
LastMacro->AddTokenToBody(Tok);
break;
}
case pch::PP_MACRO_FUNCTION_LIKE:
case pch::PP_TOKEN:
// Once we've hit a macro definition or a token, we're done.
return false;
}
}
}
@ -1573,6 +1651,7 @@ PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) {
case pch::STATISTICS:
TotalNumStatements = Record[0];
TotalNumMacros = Record[1];
break;
}
@ -1582,23 +1661,6 @@ PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) {
return Failure;
}
namespace {
/// \brief Helper class that saves the current stream position and
/// then restores it when destroyed.
struct VISIBILITY_HIDDEN SavedStreamPosition {
explicit SavedStreamPosition(llvm::BitstreamReader &Stream)
: Stream(Stream), Offset(Stream.GetCurrentBitNo()) { }
~SavedStreamPosition() {
Stream.JumpToBit(Offset);
}
private:
llvm::BitstreamReader &Stream;
uint64_t Offset;
};
}
PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) {
// Set the PCH file name.
this->FileName = FileName;
@ -2364,6 +2426,9 @@ void PCHReader::PrintStats() {
std::fprintf(stderr, " %u/%u statements read (%f%%)\n",
NumStatementsRead, TotalNumStatements,
((float)NumStatementsRead/TotalNumStatements * 100));
std::fprintf(stderr, " %u/%u macros read (%f%%)\n",
NumMacrosRead, TotalNumMacros,
((float)NumMacrosRead/TotalNumMacros * 100));
std::fprintf(stderr, "\n");
}

View File

@ -1439,8 +1439,6 @@ void PCHWriter::WritePreprocessor(const Preprocessor &PP) {
// Loop over all the macro definitions that are live at the end of the file,
// emitting each to the PP section.
// FIXME: Eventually we want to emit an index so that we can lazily load
// macros.
for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
I != E; ++I) {
// FIXME: This emits macros in hash table order, we should do it in a stable
@ -1452,7 +1450,9 @@ void PCHWriter::WritePreprocessor(const Preprocessor &PP) {
if (MI->isBuiltinMacro())
continue;
// FIXME: Remove this identifier reference?
AddIdentifierRef(I->first, Record);
MacroOffsets[I->first] = Stream.GetCurrentBitNo();
Record.push_back(MI->getDefinitionLoc().getRawEncoding());
Record.push_back(MI->isUsed());
@ -1494,7 +1494,7 @@ void PCHWriter::WritePreprocessor(const Preprocessor &PP) {
Stream.EmitRecord(pch::PP_TOKEN, Record);
Record.clear();
}
++NumMacros;
}
Stream.ExitBlock();
@ -1715,6 +1715,7 @@ void PCHWriter::WriteDeclsBlock(ASTContext &Context) {
namespace {
class VISIBILITY_HIDDEN PCHIdentifierTableTrait {
PCHWriter &Writer;
Preprocessor &PP;
public:
typedef const IdentifierInfo* key_type;
@ -1723,19 +1724,23 @@ public:
typedef pch::IdentID data_type;
typedef data_type data_type_ref;
PCHIdentifierTableTrait(PCHWriter &Writer) : Writer(Writer) { }
PCHIdentifierTableTrait(PCHWriter &Writer, Preprocessor &PP)
: Writer(Writer), PP(PP) { }
static unsigned ComputeHash(const IdentifierInfo* II) {
return clang::BernsteinHash(II->getName());
}
static std::pair<unsigned,unsigned>
std::pair<unsigned,unsigned>
EmitKeyDataLength(llvm::raw_ostream& Out, const IdentifierInfo* II,
pch::IdentID ID) {
unsigned KeyLen = strlen(II->getName()) + 1;
clang::io::Emit16(Out, KeyLen);
unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags
// 4 bytes for the persistent ID
if (II->hasMacroDefinition() &&
!PP.getMacroInfo(const_cast<IdentifierInfo *>(II))->isBuiltinMacro())
DataLen += 8;
for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
DEnd = IdentifierResolver::end();
D != DEnd; ++D)
@ -1755,15 +1760,21 @@ public:
void EmitData(llvm::raw_ostream& Out, const IdentifierInfo* II,
pch::IdentID ID, unsigned) {
uint32_t Bits = 0;
bool hasMacroDefinition =
II->hasMacroDefinition() &&
!PP.getMacroInfo(const_cast<IdentifierInfo *>(II))->isBuiltinMacro();
Bits = Bits | (uint32_t)II->getTokenID();
Bits = (Bits << 8) | (uint32_t)II->getObjCOrBuiltinID();
Bits = (Bits << 10) | II->hasMacroDefinition();
Bits = (Bits << 10) | hasMacroDefinition;
Bits = (Bits << 1) | II->isExtensionToken();
Bits = (Bits << 1) | II->isPoisoned();
Bits = (Bits << 1) | II->isCPlusPlusOperatorKeyword();
clang::io::Emit32(Out, Bits);
clang::io::Emit32(Out, ID);
if (hasMacroDefinition)
clang::io::Emit64(Out, Writer.getMacroOffset(II));
// Emit the declaration IDs in reverse order, because the
// IdentifierResolver provides the declarations as they would be
// visible (e.g., the function "stat" would come before the struct
@ -1785,7 +1796,7 @@ public:
/// The identifier table consists of a blob containing string data
/// (the actual identifiers themselves) and a separate "offsets" index
/// that maps identifier IDs to locations within the blob.
void PCHWriter::WriteIdentifierTable() {
void PCHWriter::WriteIdentifierTable(Preprocessor &PP) {
using namespace llvm;
// Create and write out the blob that contains the identifier
@ -1806,7 +1817,7 @@ void PCHWriter::WriteIdentifierTable() {
llvm::SmallVector<char, 4096> IdentifierTable;
uint32_t BucketOffset;
{
PCHIdentifierTableTrait Trait(*this);
PCHIdentifierTableTrait Trait(*this, PP);
llvm::raw_svector_ostream Out(IdentifierTable);
BucketOffset = Generator.Emit(Out, Trait);
}
@ -1964,7 +1975,8 @@ void PCHWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) {
}
PCHWriter::PCHWriter(llvm::BitstreamWriter &Stream)
: Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS), NumStatements(0) { }
: Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS),
NumStatements(0), NumMacros(0) { }
void PCHWriter::WritePCH(Sema &SemaRef) {
ASTContext &Context = SemaRef.Context;
@ -1989,7 +2001,7 @@ void PCHWriter::WritePCH(Sema &SemaRef) {
WritePreprocessor(PP);
WriteTypesBlock(Context);
WriteDeclsBlock(Context);
WriteIdentifierTable();
WriteIdentifierTable(PP);
Stream.EmitRecord(pch::TYPE_OFFSET, TypeOffsets);
Stream.EmitRecord(pch::DECL_OFFSET, DeclOffsets);
@ -2004,6 +2016,7 @@ void PCHWriter::WritePCH(Sema &SemaRef) {
// Some simple statistics
Record.clear();
Record.push_back(NumStatements);
Record.push_back(NumMacros);
Stream.EmitRecord(pch::STATISTICS, Record);
Stream.ExitBlock();
}