From c3366a555b382af03e207ce72a0ab14f3a0f6988 Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Tue, 21 Apr 2009 23:56:24 +0000 Subject: [PATCH] Lazy deserialization of macro definitions for precompiled headers. This optimization improves performance on the Carbon-prefixed "Hello, World!" example by 57%. For reference, we're now about 2.25x faster than GCC PCH. We're also pulling in far less of the PCH file: *** PCH Statistics: 411/20693 types read (1.986179%) 2553/59230 declarations read (4.310316%) 1093/44646 identifiers read (2.448148%) 1/32954 statements read (0.003035%) 21/6187 macros read (0.339421%) llvm-svn: 69755 --- clang/include/clang/Frontend/PCHBitCodes.h | 1 - clang/include/clang/Frontend/PCHReader.h | 8 + clang/include/clang/Frontend/PCHWriter.h | 23 ++- clang/lib/Frontend/PCHReader.cpp | 213 ++++++++++++++------- clang/lib/Frontend/PCHWriter.cpp | 33 +++- 5 files changed, 192 insertions(+), 86 deletions(-) diff --git a/clang/include/clang/Frontend/PCHBitCodes.h b/clang/include/clang/Frontend/PCHBitCodes.h index 32f94852b241..d21e10cd6974 100644 --- a/clang/include/clang/Frontend/PCHBitCodes.h +++ b/clang/include/clang/Frontend/PCHBitCodes.h @@ -198,7 +198,6 @@ namespace clang { /// [PP_COUNTER_VALUE, Val] PP_COUNTER_VALUE = 4 }; - /// \defgroup PCHAST Precompiled header AST constants /// diff --git a/clang/include/clang/Frontend/PCHReader.h b/clang/include/clang/Frontend/PCHReader.h index 3af147f7a9b9..d5157bc3ce4e 100644 --- a/clang/include/clang/Frontend/PCHReader.h +++ b/clang/include/clang/Frontend/PCHReader.h @@ -171,6 +171,11 @@ private: /// in the PCH file. unsigned TotalNumStatements; + /// \brief The number of macros de-serialized from the PCH file. + unsigned NumMacrosRead; + /// \brief The total number of macros stored in the PCH file. + unsigned TotalNumMacros; + /// \brief FIXME: document! llvm::SmallVector SpecialTypes; @@ -311,6 +316,9 @@ public: /// \brief Reads a statement from the current stream position. Stmt *ReadStmt(); + /// \brief Reads the macro record located at the given offset. + void ReadMacroRecord(uint64_t Offset); + /// \brief Retrieve the AST context that this PCH reader /// supplements. ASTContext &getContext() { return Context; } diff --git a/clang/include/clang/Frontend/PCHWriter.h b/clang/include/clang/Frontend/PCHWriter.h index 68003ef9717c..2db253e7ddbb 100644 --- a/clang/include/clang/Frontend/PCHWriter.h +++ b/clang/include/clang/Frontend/PCHWriter.h @@ -98,6 +98,14 @@ private: /// table, shifted left by one bit with the low bit set. llvm::SmallVector IdentifierOffsets; + /// \brief Offsets of each of the macro identifiers into the + /// bitstream. + /// + /// For each identifier that is associated with a macro, this map + /// provides the offset into the bitstream where that macro is + /// defined. + llvm::DenseMap MacroOffsets; + /// \brief Declarations encountered that might be external /// definitions. /// @@ -125,6 +133,9 @@ private: /// \brief The number of statements written to the PCH file. unsigned NumStatements; + /// \brief The number of macros written to the PCH file. + unsigned NumMacros; + void WriteTargetTriple(const TargetInfo &Target); void WriteLanguageOptions(const LangOptions &LangOpts); void WriteSourceManagerBlock(SourceManager &SourceMgr); @@ -134,7 +145,7 @@ private: uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, DeclContext *DC); uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC); void WriteDeclsBlock(ASTContext &Context); - void WriteIdentifierTable(); + void WriteIdentifierTable(Preprocessor &PP); void WriteAttributeRecord(const Attr *Attr); public: @@ -160,6 +171,16 @@ public: /// \brief Emit a reference to an identifier void AddIdentifierRef(const IdentifierInfo *II, RecordData &Record); + /// \brief Retrieve the offset of the macro definition for the given + /// identifier. + /// + /// The identifier must refer to a macro. + uint64_t getMacroOffset(const IdentifierInfo *II) { + assert(MacroOffsets.find(II) != MacroOffsets.end() && + "Identifier does not name a macro"); + return MacroOffsets[II]; + } + /// \brief Emit a reference to a type. void AddTypeRef(QualType T, RecordData &Record); diff --git a/clang/lib/Frontend/PCHReader.cpp b/clang/lib/Frontend/PCHReader.cpp index 00075c88b670..6a6cd641202f 100644 --- a/clang/lib/Frontend/PCHReader.cpp +++ b/clang/lib/Frontend/PCHReader.cpp @@ -36,6 +36,23 @@ using namespace clang; +namespace { + /// \brief Helper class that saves the current stream position and + /// then restores it when destroyed. + struct VISIBILITY_HIDDEN SavedStreamPosition { + explicit SavedStreamPosition(llvm::BitstreamReader &Stream) + : Stream(Stream), Offset(Stream.GetCurrentBitNo()) { } + + ~SavedStreamPosition() { + Stream.JumpToBit(Offset); + } + + private: + llvm::BitstreamReader &Stream; + uint64_t Offset; + }; +} + //===----------------------------------------------------------------------===// // Declaration deserialization //===----------------------------------------------------------------------===// @@ -1053,6 +1070,8 @@ public: using namespace clang::io; uint32_t Bits = ReadUnalignedLE32(d); // FIXME: use these? (void)Bits; + bool hasMacroDefinition = (Bits >> 3) & 0x01; + pch::IdentID ID = ReadUnalignedLE32(d); DataLen -= 8; @@ -1064,8 +1083,13 @@ public: k.first, k.first + k.second); Reader.SetIdentifierInfo(ID, II); - // FIXME: If this identifier is a macro, deserialize the macro - // definition now. + // If this identifier is a macro, deserialize the macro + // definition. + if (hasMacroDefinition) { + uint32_t Offset = ReadUnalignedLE64(d); + Reader.ReadMacroRecord(Offset); + DataLen -= 8; + } // Read all of the declarations visible at global scope with this // name. @@ -1323,14 +1347,116 @@ PCHReader::PCHReadResult PCHReader::ReadSourceManagerBlock() { } } +void PCHReader::ReadMacroRecord(uint64_t Offset) { + // Keep track of where we are in the stream, then jump back there + // after reading this macro. + SavedStreamPosition SavedPosition(Stream); + + Stream.JumpToBit(Offset); + RecordData Record; + llvm::SmallVector MacroArgs; + MacroInfo *Macro = 0; + while (true) { + unsigned Code = Stream.ReadCode(); + switch (Code) { + case llvm::bitc::END_BLOCK: + return; + + case llvm::bitc::ENTER_SUBBLOCK: + // No known subblocks, always skip them. + Stream.ReadSubBlockID(); + if (Stream.SkipBlock()) { + Error("Malformed block record"); + return; + } + continue; + + case llvm::bitc::DEFINE_ABBREV: + Stream.ReadAbbrevRecord(); + continue; + default: break; + } + + // Read a record. + Record.clear(); + pch::PreprocessorRecordTypes RecType = + (pch::PreprocessorRecordTypes)Stream.ReadRecord(Code, Record); + switch (RecType) { + case pch::PP_COUNTER_VALUE: + // Skip this record. + break; + + case pch::PP_MACRO_OBJECT_LIKE: + case pch::PP_MACRO_FUNCTION_LIKE: { + // If we already have a macro, that means that we've hit the end + // of the definition of the macro we were looking for. We're + // done. + if (Macro) + return; + + IdentifierInfo *II = DecodeIdentifierInfo(Record[0]); + if (II == 0) { + Error("Macro must have a name"); + return; + } + SourceLocation Loc = SourceLocation::getFromRawEncoding(Record[1]); + bool isUsed = Record[2]; + + MacroInfo *MI = PP.AllocateMacroInfo(Loc); + MI->setIsUsed(isUsed); + + if (RecType == pch::PP_MACRO_FUNCTION_LIKE) { + // Decode function-like macro info. + bool isC99VarArgs = Record[3]; + bool isGNUVarArgs = Record[4]; + MacroArgs.clear(); + unsigned NumArgs = Record[5]; + for (unsigned i = 0; i != NumArgs; ++i) + MacroArgs.push_back(DecodeIdentifierInfo(Record[6+i])); + + // Install function-like macro info. + MI->setIsFunctionLike(); + if (isC99VarArgs) MI->setIsC99Varargs(); + if (isGNUVarArgs) MI->setIsGNUVarargs(); + MI->setArgumentList(&MacroArgs[0], MacroArgs.size(), + PP.getPreprocessorAllocator()); + } + + // Finally, install the macro. + PP.setMacroInfo(II, MI); + + // Remember that we saw this macro last so that we add the tokens that + // form its body to it. + Macro = MI; + ++NumMacrosRead; + break; + } + + case pch::PP_TOKEN: { + // If we see a TOKEN before a PP_MACRO_*, then the file is + // erroneous, just pretend we didn't see this. + if (Macro == 0) break; + + Token Tok; + Tok.startToken(); + Tok.setLocation(SourceLocation::getFromRawEncoding(Record[0])); + Tok.setLength(Record[1]); + if (IdentifierInfo *II = DecodeIdentifierInfo(Record[2])) + Tok.setIdentifierInfo(II); + Tok.setKind((tok::TokenKind)Record[3]); + Tok.setFlag((Token::TokenFlags)Record[4]); + Macro->AddTokenToBody(Tok); + break; + } + } + } +} + bool PCHReader::ReadPreprocessorBlock() { if (Stream.EnterSubBlock(pch::PREPROCESSOR_BLOCK_ID)) return Error("Malformed preprocessor block record"); RecordData Record; - llvm::SmallVector MacroArgs; - MacroInfo *LastMacro = 0; - while (true) { unsigned Code = Stream.ReadCode(); switch (Code) { @@ -1365,58 +1491,10 @@ bool PCHReader::ReadPreprocessorBlock() { break; case pch::PP_MACRO_OBJECT_LIKE: - case pch::PP_MACRO_FUNCTION_LIKE: { - IdentifierInfo *II = DecodeIdentifierInfo(Record[0]); - if (II == 0) - return Error("Macro must have a name"); - SourceLocation Loc = SourceLocation::getFromRawEncoding(Record[1]); - bool isUsed = Record[2]; - - MacroInfo *MI = PP.AllocateMacroInfo(Loc); - MI->setIsUsed(isUsed); - - if (RecType == pch::PP_MACRO_FUNCTION_LIKE) { - // Decode function-like macro info. - bool isC99VarArgs = Record[3]; - bool isGNUVarArgs = Record[4]; - MacroArgs.clear(); - unsigned NumArgs = Record[5]; - for (unsigned i = 0; i != NumArgs; ++i) - MacroArgs.push_back(DecodeIdentifierInfo(Record[6+i])); - - // Install function-like macro info. - MI->setIsFunctionLike(); - if (isC99VarArgs) MI->setIsC99Varargs(); - if (isGNUVarArgs) MI->setIsGNUVarargs(); - MI->setArgumentList(&MacroArgs[0], MacroArgs.size(), - PP.getPreprocessorAllocator()); - } - - // Finally, install the macro. - PP.setMacroInfo(II, MI); - - // Remember that we saw this macro last so that we add the tokens that - // form its body to it. - LastMacro = MI; - break; - } - - case pch::PP_TOKEN: { - // If we see a TOKEN before a PP_MACRO_*, then the file is eroneous, just - // pretend we didn't see this. - if (LastMacro == 0) break; - - Token Tok; - Tok.startToken(); - Tok.setLocation(SourceLocation::getFromRawEncoding(Record[0])); - Tok.setLength(Record[1]); - if (IdentifierInfo *II = DecodeIdentifierInfo(Record[2])) - Tok.setIdentifierInfo(II); - Tok.setKind((tok::TokenKind)Record[3]); - Tok.setFlag((Token::TokenFlags)Record[4]); - LastMacro->AddTokenToBody(Tok); - break; - } + case pch::PP_MACRO_FUNCTION_LIKE: + case pch::PP_TOKEN: + // Once we've hit a macro definition or a token, we're done. + return false; } } } @@ -1573,6 +1651,7 @@ PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) { case pch::STATISTICS: TotalNumStatements = Record[0]; + TotalNumMacros = Record[1]; break; } @@ -1582,23 +1661,6 @@ PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) { return Failure; } -namespace { - /// \brief Helper class that saves the current stream position and - /// then restores it when destroyed. - struct VISIBILITY_HIDDEN SavedStreamPosition { - explicit SavedStreamPosition(llvm::BitstreamReader &Stream) - : Stream(Stream), Offset(Stream.GetCurrentBitNo()) { } - - ~SavedStreamPosition() { - Stream.JumpToBit(Offset); - } - - private: - llvm::BitstreamReader &Stream; - uint64_t Offset; - }; -} - PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) { // Set the PCH file name. this->FileName = FileName; @@ -2364,6 +2426,9 @@ void PCHReader::PrintStats() { std::fprintf(stderr, " %u/%u statements read (%f%%)\n", NumStatementsRead, TotalNumStatements, ((float)NumStatementsRead/TotalNumStatements * 100)); + std::fprintf(stderr, " %u/%u macros read (%f%%)\n", + NumMacrosRead, TotalNumMacros, + ((float)NumMacrosRead/TotalNumMacros * 100)); std::fprintf(stderr, "\n"); } diff --git a/clang/lib/Frontend/PCHWriter.cpp b/clang/lib/Frontend/PCHWriter.cpp index 116725062430..bdcd3593142e 100644 --- a/clang/lib/Frontend/PCHWriter.cpp +++ b/clang/lib/Frontend/PCHWriter.cpp @@ -1439,8 +1439,6 @@ void PCHWriter::WritePreprocessor(const Preprocessor &PP) { // Loop over all the macro definitions that are live at the end of the file, // emitting each to the PP section. - // FIXME: Eventually we want to emit an index so that we can lazily load - // macros. for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end(); I != E; ++I) { // FIXME: This emits macros in hash table order, we should do it in a stable @@ -1452,7 +1450,9 @@ void PCHWriter::WritePreprocessor(const Preprocessor &PP) { if (MI->isBuiltinMacro()) continue; + // FIXME: Remove this identifier reference? AddIdentifierRef(I->first, Record); + MacroOffsets[I->first] = Stream.GetCurrentBitNo(); Record.push_back(MI->getDefinitionLoc().getRawEncoding()); Record.push_back(MI->isUsed()); @@ -1494,7 +1494,7 @@ void PCHWriter::WritePreprocessor(const Preprocessor &PP) { Stream.EmitRecord(pch::PP_TOKEN, Record); Record.clear(); } - + ++NumMacros; } Stream.ExitBlock(); @@ -1715,6 +1715,7 @@ void PCHWriter::WriteDeclsBlock(ASTContext &Context) { namespace { class VISIBILITY_HIDDEN PCHIdentifierTableTrait { PCHWriter &Writer; + Preprocessor &PP; public: typedef const IdentifierInfo* key_type; @@ -1723,19 +1724,23 @@ public: typedef pch::IdentID data_type; typedef data_type data_type_ref; - PCHIdentifierTableTrait(PCHWriter &Writer) : Writer(Writer) { } + PCHIdentifierTableTrait(PCHWriter &Writer, Preprocessor &PP) + : Writer(Writer), PP(PP) { } static unsigned ComputeHash(const IdentifierInfo* II) { return clang::BernsteinHash(II->getName()); } - static std::pair + std::pair EmitKeyDataLength(llvm::raw_ostream& Out, const IdentifierInfo* II, pch::IdentID ID) { unsigned KeyLen = strlen(II->getName()) + 1; clang::io::Emit16(Out, KeyLen); unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags // 4 bytes for the persistent ID + if (II->hasMacroDefinition() && + !PP.getMacroInfo(const_cast(II))->isBuiltinMacro()) + DataLen += 8; for (IdentifierResolver::iterator D = IdentifierResolver::begin(II), DEnd = IdentifierResolver::end(); D != DEnd; ++D) @@ -1755,15 +1760,21 @@ public: void EmitData(llvm::raw_ostream& Out, const IdentifierInfo* II, pch::IdentID ID, unsigned) { uint32_t Bits = 0; + bool hasMacroDefinition = + II->hasMacroDefinition() && + !PP.getMacroInfo(const_cast(II))->isBuiltinMacro(); Bits = Bits | (uint32_t)II->getTokenID(); Bits = (Bits << 8) | (uint32_t)II->getObjCOrBuiltinID(); - Bits = (Bits << 10) | II->hasMacroDefinition(); + Bits = (Bits << 10) | hasMacroDefinition; Bits = (Bits << 1) | II->isExtensionToken(); Bits = (Bits << 1) | II->isPoisoned(); Bits = (Bits << 1) | II->isCPlusPlusOperatorKeyword(); clang::io::Emit32(Out, Bits); clang::io::Emit32(Out, ID); + if (hasMacroDefinition) + clang::io::Emit64(Out, Writer.getMacroOffset(II)); + // Emit the declaration IDs in reverse order, because the // IdentifierResolver provides the declarations as they would be // visible (e.g., the function "stat" would come before the struct @@ -1785,7 +1796,7 @@ public: /// The identifier table consists of a blob containing string data /// (the actual identifiers themselves) and a separate "offsets" index /// that maps identifier IDs to locations within the blob. -void PCHWriter::WriteIdentifierTable() { +void PCHWriter::WriteIdentifierTable(Preprocessor &PP) { using namespace llvm; // Create and write out the blob that contains the identifier @@ -1806,7 +1817,7 @@ void PCHWriter::WriteIdentifierTable() { llvm::SmallVector IdentifierTable; uint32_t BucketOffset; { - PCHIdentifierTableTrait Trait(*this); + PCHIdentifierTableTrait Trait(*this, PP); llvm::raw_svector_ostream Out(IdentifierTable); BucketOffset = Generator.Emit(Out, Trait); } @@ -1964,7 +1975,8 @@ void PCHWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) { } PCHWriter::PCHWriter(llvm::BitstreamWriter &Stream) - : Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS), NumStatements(0) { } + : Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS), + NumStatements(0), NumMacros(0) { } void PCHWriter::WritePCH(Sema &SemaRef) { ASTContext &Context = SemaRef.Context; @@ -1989,7 +2001,7 @@ void PCHWriter::WritePCH(Sema &SemaRef) { WritePreprocessor(PP); WriteTypesBlock(Context); WriteDeclsBlock(Context); - WriteIdentifierTable(); + WriteIdentifierTable(PP); Stream.EmitRecord(pch::TYPE_OFFSET, TypeOffsets); Stream.EmitRecord(pch::DECL_OFFSET, DeclOffsets); @@ -2004,6 +2016,7 @@ void PCHWriter::WritePCH(Sema &SemaRef) { // Some simple statistics Record.clear(); Record.push_back(NumStatements); + Record.push_back(NumMacros); Stream.EmitRecord(pch::STATISTICS, Record); Stream.ExitBlock(); }