[PCH] When serializing Stmts, keep track of when sub statements are referenced again and

in such a case just write out a reference of a previously serialized Stmt, instead
of serializing it all over again.

This saves memory + space + [de]serializing time, and avoids blowing up memory
with pathological cases. rdar://10293911

llvm-svn: 142696
This commit is contained in:
Argyrios Kyrtzidis 2011-10-21 23:02:28 +00:00
parent d4590a5d5f
commit 6a59897d50
6 changed files with 113 additions and 5 deletions

View File

@ -875,6 +875,8 @@ namespace clang {
STMT_STOP = 100,
/// \brief A NULL expression.
STMT_NULL_PTR,
/// \brief A reference to a previously [de]serialized Stmt record.
STMT_REF_PTR,
/// \brief A NullStmt record.
STMT_NULL,
/// \brief A CompoundStmt record.

View File

@ -24,6 +24,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Bitcode/BitstreamWriter.h"
#include <map>
#include <queue>
@ -331,7 +332,9 @@ private:
SmallVector<QueuedCXXBaseSpecifiers, 2> CXXBaseSpecifiersToWrite;
/// \brief Write the given subexpression to the bitstream.
void WriteSubStmt(Stmt *S);
void WriteSubStmt(Stmt *S,
llvm::DenseMap<Stmt *, uint64_t> &SubStmtEntries,
llvm::DenseSet<Stmt *> &ParentStmts);
void WriteBlockInfoBlock();
void WriteMetadata(ASTContext &Context, StringRef isysroot,

View File

@ -1445,6 +1445,10 @@ Stmt *ASTReader::ReadStmtFromStream(Module &F) {
ReadingKindTracker ReadingKind(Read_Stmt, *this);
llvm::BitstreamCursor &Cursor = F.DeclsCursor;
// Map of offset to previously deserialized stmt. The offset points
/// just after the stmt record.
llvm::DenseMap<uint64_t, Stmt *> StmtEntries;
#ifndef NDEBUG
unsigned PrevNumStmts = StmtStack.size();
#endif
@ -1483,11 +1487,19 @@ Stmt *ASTReader::ReadStmtFromStream(Module &F) {
Idx = 0;
Record.clear();
bool Finished = false;
bool IsStmtReference = false;
switch ((StmtCode)Cursor.ReadRecord(Code, Record)) {
case STMT_STOP:
Finished = true;
break;
case STMT_REF_PTR:
IsStmtReference = true;
assert(StmtEntries.find(Record[0]) != StmtEntries.end() &&
"No stmt was recorded for this offset reference!");
S = StmtEntries[Record[Idx++]];
break;
case STMT_NULL_PTR:
S = 0;
break;
@ -2041,8 +2053,11 @@ Stmt *ASTReader::ReadStmtFromStream(Module &F) {
++NumStatementsRead;
if (S)
if (S && !IsStmtReference) {
Reader.Visit(S);
StmtEntries[Cursor.GetCurrentBitNo()] = S;
}
assert(Idx == Record.size() && "Invalid deserialization of statement");
StmtStack.push_back(S);

View File

@ -1443,7 +1443,9 @@ unsigned ASTWriter::getOpaqueValueID(OpaqueValueExpr *e) {
/// \brief Write the given substatement or subexpression to the
/// bitstream.
void ASTWriter::WriteSubStmt(Stmt *S) {
void ASTWriter::WriteSubStmt(Stmt *S,
llvm::DenseMap<Stmt *, uint64_t> &SubStmtEntries,
llvm::DenseSet<Stmt *> &ParentStmts) {
RecordData Record;
ASTStmtWriter Writer(*this, Record);
++NumStatements;
@ -1453,6 +1455,32 @@ void ASTWriter::WriteSubStmt(Stmt *S) {
return;
}
llvm::DenseMap<Stmt *, uint64_t>::iterator I = SubStmtEntries.find(S);
if (I != SubStmtEntries.end()) {
Record.push_back(I->second);
Stream.EmitRecord(serialization::STMT_REF_PTR, Record);
return;
}
#ifndef NDEBUG
assert(!ParentStmts.count(S) && "There is a Stmt cycle!");
struct ParentStmtInserterRAII {
Stmt *S;
llvm::DenseSet<Stmt *> &ParentStmts;
ParentStmtInserterRAII(Stmt *S, llvm::DenseSet<Stmt *> &ParentStmts)
: S(S), ParentStmts(ParentStmts) {
ParentStmts.insert(S);
}
~ParentStmtInserterRAII() {
ParentStmts.erase(S);
}
};
ParentStmtInserterRAII ParentStmtInserter(S, ParentStmts);
#endif
// Redirect ASTWriter::AddStmt to collect sub stmts.
SmallVector<Stmt *, 16> SubStmts;
CollectedStmts = &SubStmts;
@ -1478,9 +1506,11 @@ void ASTWriter::WriteSubStmt(Stmt *S) {
// This simplifies reading and allows to store a variable number of sub stmts
// without knowing it in advance.
while (!SubStmts.empty())
WriteSubStmt(SubStmts.pop_back_val());
WriteSubStmt(SubStmts.pop_back_val(), SubStmtEntries, ParentStmts);
Stream.EmitRecord(Writer.Code, Record, Writer.AbbrevToUse);
SubStmtEntries[S] = Stream.GetCurrentBitNo();
}
/// \brief Flush all of the statements that have been added to the
@ -1488,8 +1518,14 @@ void ASTWriter::WriteSubStmt(Stmt *S) {
void ASTWriter::FlushStmts() {
RecordData Record;
/// \brief Set of parent Stmts for the currently serializing sub stmt.
llvm::DenseSet<Stmt *> ParentStmts;
/// \brief Offsets of sub stmts already serialized. The offset points
/// just after the stmt record.
llvm::DenseMap<Stmt *, uint64_t> SubStmtEntries;
for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) {
WriteSubStmt(StmtsToEmit[I]);
WriteSubStmt(StmtsToEmit[I], SubStmtEntries, ParentStmts);
assert(N == StmtsToEmit.size() &&
"Substatement written via AddStmt rather than WriteSubStmt!");
@ -1498,6 +1534,9 @@ void ASTWriter::FlushStmts() {
// expression records that follow this one are part of a different
// expression.
Stream.EmitRecord(serialization::STMT_STOP, Record);
SubStmtEntries.clear();
ParentStmts.clear();
}
StmtsToEmit.clear();

View File

@ -0,0 +1,7 @@
// Test this without pch.
// RUN: %clang_cc1 %s -include %s.h -emit-llvm -o %t.withoutpch.ll
// Test with pch.
// RUN: %clang_cc1 %s.h -emit-pch -o %t.pch
// RUN: %clang_cc1 %s -include-pch %t.pch -emit-llvm -o %t.withpch.ll
// RUN: diff %t.withoutpch.ll %t.withpch.ll

View File

@ -0,0 +1,42 @@
static void *FooToken = &FooToken;
static void *FooTable[256] = {
[0x3] = (void *[256]) { // 1
[0x5b] = (void *[256]) { // 2
[0x81] = (void *[256]) { // 3
[0x42] = (void *[256]) { // 4
[0xa2] = (void *[256]) { // 5
[0xe] = (void *[256]) { // 6
[0x20] = (void *[256]) { // 7
[0xd7] = (void *[256]) { // 8
[0x39] = (void *[256]) { // 9
[0xf1] = (void *[256]) { // 10
[0xa4] = (void *[256]) { // 11
[0xa8] = (void *[256]) { // 12
[0x21] = (void *[256]) { // 13
[0x86] = (void *[256]) { // 14
[0x1d] = (void *[256]) { // 15
[0xdc] = (void *[256]) { // 16
[0xa5] = (void *[256]) { // 17
[0xef] = (void *[256]) { // 18
[0x9] = (void *[256]) { // 19
[0x34] = &FooToken,
},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
}
};