[modules] Compress files embedded into a .pcm file, to reduce the disk usage of -fembed-all-files mode.

llvm-svn: 259976
This commit is contained in:
Richard Smith 2016-02-06 02:06:43 +00:00
parent a40030f308
commit aada85c5f7
6 changed files with 103 additions and 38 deletions

View File

@ -591,9 +591,12 @@ namespace clang {
/// SM_SLOC_BUFFER_ENTRY record or a SM_SLOC_FILE_ENTRY with an
/// overridden buffer.
SM_SLOC_BUFFER_BLOB = 3,
/// \brief Describes a zlib-compressed blob that contains the data for
/// a buffer entry.
SM_SLOC_BUFFER_BLOB_COMPRESSED = 4,
/// \brief Describes a source location entry (SLocEntry) for a
/// macro expansion.
SM_SLOC_EXPANSION_ENTRY = 4
SM_SLOC_EXPANSION_ENTRY = 5
};
/// \brief Record types used within a preprocessor block.

View File

@ -48,6 +48,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitstreamReader.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
@ -1203,6 +1204,32 @@ bool ASTReader::ReadSLocEntry(int ID) {
return true;
}
// Local helper to read the (possibly-compressed) buffer data following the
// entry record.
auto ReadBuffer = [this](
BitstreamCursor &SLocEntryCursor,
StringRef Name) -> std::unique_ptr<llvm::MemoryBuffer> {
RecordData Record;
StringRef Blob;
unsigned Code = SLocEntryCursor.ReadCode();
unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob);
if (RecCode == SM_SLOC_BUFFER_BLOB_COMPRESSED) {
SmallString<0> Uncompressed;
if (llvm::zlib::uncompress(Blob, Uncompressed, Record[0]) !=
llvm::zlib::StatusOK) {
Error("could not decompress embedded file contents");
return nullptr;
}
return llvm::MemoryBuffer::getMemBufferCopy(Uncompressed, Name);
} else if (RecCode == SM_SLOC_BUFFER_BLOB) {
return llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name, true);
} else {
Error("AST record has invalid code");
return nullptr;
}
};
ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second;
F->SLocEntryCursor.JumpToBit(F->SLocEntryOffsets[ID - F->SLocEntryBaseID]);
BitstreamCursor &SLocEntryCursor = F->SLocEntryCursor;
@ -1258,24 +1285,16 @@ bool ASTReader::ReadSLocEntry(int ID) {
FileDeclIDs[FID] = FileDeclsInfo(F, llvm::makeArrayRef(FirstDecl,
NumFileDecls));
}
const SrcMgr::ContentCache *ContentCache
= SourceMgr.getOrCreateContentCache(File,
/*isSystemFile=*/FileCharacter != SrcMgr::C_User);
if (OverriddenBuffer && !ContentCache->BufferOverridden &&
ContentCache->ContentsEntry == ContentCache->OrigEntry &&
!ContentCache->getRawBuffer()) {
unsigned Code = SLocEntryCursor.ReadCode();
Record.clear();
unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob);
if (RecCode != SM_SLOC_BUFFER_BLOB) {
Error("AST record has invalid code");
auto Buffer = ReadBuffer(SLocEntryCursor, File->getName());
if (!Buffer)
return true;
}
std::unique_ptr<llvm::MemoryBuffer> Buffer
= llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), File->getName());
SourceMgr.overrideFileContents(File, std::move(Buffer));
}
@ -1292,18 +1311,10 @@ bool ASTReader::ReadSLocEntry(int ID) {
(F->Kind == MK_ImplicitModule || F->Kind == MK_ExplicitModule)) {
IncludeLoc = getImportLocation(F);
}
unsigned Code = SLocEntryCursor.ReadCode();
Record.clear();
unsigned RecCode
= SLocEntryCursor.readRecord(Code, Record, &Blob);
if (RecCode != SM_SLOC_BUFFER_BLOB) {
Error("AST record has invalid code");
auto Buffer = ReadBuffer(SLocEntryCursor, Name);
if (!Buffer)
return true;
}
std::unique_ptr<llvm::MemoryBuffer> Buffer =
llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name);
SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID,
BaseOffset + Offset, IncludeLoc);
break;

View File

@ -50,6 +50,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitstreamWriter.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
@ -962,6 +963,7 @@ void ASTWriter::WriteBlockInfoBlock() {
RECORD(SM_SLOC_FILE_ENTRY);
RECORD(SM_SLOC_BUFFER_ENTRY);
RECORD(SM_SLOC_BUFFER_BLOB);
RECORD(SM_SLOC_BUFFER_BLOB_COMPRESSED);
RECORD(SM_SLOC_EXPANSION_ENTRY);
// Preprocessor Block.
@ -1631,11 +1633,15 @@ static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) {
/// \brief Create an abbreviation for the SLocEntry that refers to a
/// buffer's blob.
static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream) {
static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream,
bool Compressed) {
using namespace llvm;
auto *Abbrev = new BitCodeAbbrev();
Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_BLOB));
Abbrev->Add(BitCodeAbbrevOp(Compressed ? SM_SLOC_BUFFER_BLOB_COMPRESSED
: SM_SLOC_BUFFER_BLOB));
if (Compressed)
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Uncompressed size
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob
return Stream.EmitAbbrev(Abbrev);
}
@ -1857,12 +1863,14 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
RecordData Record;
// Enter the source manager block.
Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 3);
Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 4);
// Abbreviations for the various kinds of source-location entries.
unsigned SLocFileAbbrv = CreateSLocFileAbbrev(Stream);
unsigned SLocBufferAbbrv = CreateSLocBufferAbbrev(Stream);
unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream);
unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream, false);
unsigned SLocBufferBlobCompressedAbbrv =
CreateSLocBufferBlobAbbrev(Stream, true);
unsigned SLocExpansionAbbrv = CreateSLocExpansionAbbrev(Stream);
// Write out the source location entry table. We skip the first
@ -1902,6 +1910,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
Record.push_back(File.hasLineDirectives());
const SrcMgr::ContentCache *Content = File.getContentCache();
bool EmitBlob = false;
if (Content->OrigEntry) {
assert(Content->OrigEntry == Content->ContentsEntry &&
"Writing to AST an overridden file is not supported");
@ -1923,14 +1932,8 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
Stream.EmitRecordWithAbbrev(SLocFileAbbrv, Record);
if (Content->BufferOverridden || Content->IsTransient) {
RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
const llvm::MemoryBuffer *Buffer
= Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record,
StringRef(Buffer->getBufferStart(),
Buffer->getBufferSize() + 1));
}
if (Content->BufferOverridden || Content->IsTransient)
EmitBlob = true;
} else {
// The source location entry is a buffer. The blob associated
// with this entry contains the contents of the buffer.
@ -1943,15 +1946,34 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
const char *Name = Buffer->getBufferIdentifier();
Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record,
StringRef(Name, strlen(Name) + 1));
RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record,
StringRef(Buffer->getBufferStart(),
Buffer->getBufferSize() + 1));
EmitBlob = true;
if (strcmp(Name, "<built-in>") == 0) {
PreloadSLocs.push_back(SLocEntryOffsets.size());
}
}
if (EmitBlob) {
// Include the implicit terminating null character in the on-disk buffer
// if we're writing it uncompressed.
const llvm::MemoryBuffer *Buffer =
Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
StringRef Blob(Buffer->getBufferStart(), Buffer->getBufferSize() + 1);
// Compress the buffer if possible. We expect that almost all PCM
// consumers will not want its contents.
SmallString<0> CompressedBuffer;
if (llvm::zlib::compress(Blob.drop_back(1), CompressedBuffer) ==
llvm::zlib::StatusOK) {
RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED,
Blob.size() - 1};
Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record,
CompressedBuffer);
} else {
RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, Blob);
}
}
} else {
// The source location entry is a macro expansion.
const SrcMgr::ExpansionInfo &Expansion = SLoc->getExpansion();

View File

@ -0,0 +1,23 @@
// REQUIRES: zlib
// REQUIRES: shell
//
// RUN: rm -rf %t
// RUN: mkdir %t
// RUN: echo '//////////////////////////////////////////////////////////////////////' > %t/a.h
// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
// RUN: echo 'module a { header "a.h" }' > %t/modulemap
//
// RUN: %clang_cc1 -fmodules -I%t -fmodules-cache-path=%t -fmodule-name=a -emit-module %t/modulemap -fmodules-embed-all-files -o %t/a.pcm
//
// The above embeds ~4.5MB of highly-predictable /s and \ns into the pcm file.
// Check that the resulting file is under 40KB:
//
// RUN: wc -c %t/a.pcm | FileCheck --check-prefix=CHECK-SIZE %s
// CHECK-SIZE: {{(^|[^0-9])[123][0-9][0-9][0-9][0-9]($|[^0-9])}}

View File

@ -467,6 +467,11 @@ else:
if config.enable_backtrace == "1":
config.available_features.add("backtrace")
if config.have_zlib == "1":
config.available_features.add("zlib")
else:
config.available_features.add("nozlib")
# Check if we should run long running tests.
if lit_config.params.get("run_long_tests", None) == "true":
config.available_features.add("long_tests")

View File

@ -14,6 +14,7 @@ config.clang_tools_dir = "@CLANG_TOOLS_DIR@"
config.host_triple = "@LLVM_HOST_TRIPLE@"
config.target_triple = "@TARGET_TRIPLE@"
config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
config.have_zlib = "@HAVE_LIBZ@"
config.clang_arcmt = @ENABLE_CLANG_ARCMT@
config.clang_staticanalyzer = @ENABLE_CLANG_STATIC_ANALYZER@
config.clang_examples = @ENABLE_CLANG_EXAMPLES@