diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 32c33388d1e1..5ee73f168896 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -591,9 +591,12 @@ namespace clang { /// SM_SLOC_BUFFER_ENTRY record or a SM_SLOC_FILE_ENTRY with an /// overridden buffer. SM_SLOC_BUFFER_BLOB = 3, + /// \brief Describes a zlib-compressed blob that contains the data for + /// a buffer entry. + SM_SLOC_BUFFER_BLOB_COMPRESSED = 4, /// \brief Describes a source location entry (SLocEntry) for a /// macro expansion. - SM_SLOC_EXPANSION_ENTRY = 4 + SM_SLOC_EXPANSION_ENTRY = 5 }; /// \brief Record types used within a preprocessor block. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index dbfc5c31db13..1c62b4e57b0b 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -48,6 +48,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/BitstreamReader.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -1203,6 +1204,32 @@ bool ASTReader::ReadSLocEntry(int ID) { return true; } + // Local helper to read the (possibly-compressed) buffer data following the + // entry record. + auto ReadBuffer = [this]( + BitstreamCursor &SLocEntryCursor, + StringRef Name) -> std::unique_ptr { + RecordData Record; + StringRef Blob; + unsigned Code = SLocEntryCursor.ReadCode(); + unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob); + + if (RecCode == SM_SLOC_BUFFER_BLOB_COMPRESSED) { + SmallString<0> Uncompressed; + if (llvm::zlib::uncompress(Blob, Uncompressed, Record[0]) != + llvm::zlib::StatusOK) { + Error("could not decompress embedded file contents"); + return nullptr; + } + return llvm::MemoryBuffer::getMemBufferCopy(Uncompressed, Name); + } else if (RecCode == SM_SLOC_BUFFER_BLOB) { + return llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name, true); + } else { + Error("AST record has invalid code"); + return nullptr; + } + }; + ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second; F->SLocEntryCursor.JumpToBit(F->SLocEntryOffsets[ID - F->SLocEntryBaseID]); BitstreamCursor &SLocEntryCursor = F->SLocEntryCursor; @@ -1258,24 +1285,16 @@ bool ASTReader::ReadSLocEntry(int ID) { FileDeclIDs[FID] = FileDeclsInfo(F, llvm::makeArrayRef(FirstDecl, NumFileDecls)); } - + const SrcMgr::ContentCache *ContentCache = SourceMgr.getOrCreateContentCache(File, /*isSystemFile=*/FileCharacter != SrcMgr::C_User); if (OverriddenBuffer && !ContentCache->BufferOverridden && ContentCache->ContentsEntry == ContentCache->OrigEntry && !ContentCache->getRawBuffer()) { - unsigned Code = SLocEntryCursor.ReadCode(); - Record.clear(); - unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob); - - if (RecCode != SM_SLOC_BUFFER_BLOB) { - Error("AST record has invalid code"); + auto Buffer = ReadBuffer(SLocEntryCursor, File->getName()); + if (!Buffer) return true; - } - - std::unique_ptr Buffer - = llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), File->getName()); SourceMgr.overrideFileContents(File, std::move(Buffer)); } @@ -1292,18 +1311,10 @@ bool ASTReader::ReadSLocEntry(int ID) { (F->Kind == MK_ImplicitModule || F->Kind == MK_ExplicitModule)) { IncludeLoc = getImportLocation(F); } - unsigned Code = SLocEntryCursor.ReadCode(); - Record.clear(); - unsigned RecCode - = SLocEntryCursor.readRecord(Code, Record, &Blob); - if (RecCode != SM_SLOC_BUFFER_BLOB) { - Error("AST record has invalid code"); + auto Buffer = ReadBuffer(SLocEntryCursor, Name); + if (!Buffer) return true; - } - - std::unique_ptr Buffer = - llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name); SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID, BaseOffset + Offset, IncludeLoc); break; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 3e153327923b..985bcb05b4d0 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -50,6 +50,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/BitstreamWriter.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -962,6 +963,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(SM_SLOC_FILE_ENTRY); RECORD(SM_SLOC_BUFFER_ENTRY); RECORD(SM_SLOC_BUFFER_BLOB); + RECORD(SM_SLOC_BUFFER_BLOB_COMPRESSED); RECORD(SM_SLOC_EXPANSION_ENTRY); // Preprocessor Block. @@ -1631,11 +1633,15 @@ static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) { /// \brief Create an abbreviation for the SLocEntry that refers to a /// buffer's blob. -static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream) { +static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream, + bool Compressed) { using namespace llvm; auto *Abbrev = new BitCodeAbbrev(); - Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_BLOB)); + Abbrev->Add(BitCodeAbbrevOp(Compressed ? SM_SLOC_BUFFER_BLOB_COMPRESSED + : SM_SLOC_BUFFER_BLOB)); + if (Compressed) + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Uncompressed size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob return Stream.EmitAbbrev(Abbrev); } @@ -1857,12 +1863,14 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, RecordData Record; // Enter the source manager block. - Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 3); + Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 4); // Abbreviations for the various kinds of source-location entries. unsigned SLocFileAbbrv = CreateSLocFileAbbrev(Stream); unsigned SLocBufferAbbrv = CreateSLocBufferAbbrev(Stream); - unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream); + unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream, false); + unsigned SLocBufferBlobCompressedAbbrv = + CreateSLocBufferBlobAbbrev(Stream, true); unsigned SLocExpansionAbbrv = CreateSLocExpansionAbbrev(Stream); // Write out the source location entry table. We skip the first @@ -1902,6 +1910,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, Record.push_back(File.hasLineDirectives()); const SrcMgr::ContentCache *Content = File.getContentCache(); + bool EmitBlob = false; if (Content->OrigEntry) { assert(Content->OrigEntry == Content->ContentsEntry && "Writing to AST an overridden file is not supported"); @@ -1923,14 +1932,8 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, Stream.EmitRecordWithAbbrev(SLocFileAbbrv, Record); - if (Content->BufferOverridden || Content->IsTransient) { - RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB}; - const llvm::MemoryBuffer *Buffer - = Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager()); - Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, - StringRef(Buffer->getBufferStart(), - Buffer->getBufferSize() + 1)); - } + if (Content->BufferOverridden || Content->IsTransient) + EmitBlob = true; } else { // The source location entry is a buffer. The blob associated // with this entry contains the contents of the buffer. @@ -1943,15 +1946,34 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, const char *Name = Buffer->getBufferIdentifier(); Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record, StringRef(Name, strlen(Name) + 1)); - RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB}; - Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, - StringRef(Buffer->getBufferStart(), - Buffer->getBufferSize() + 1)); + EmitBlob = true; if (strcmp(Name, "") == 0) { PreloadSLocs.push_back(SLocEntryOffsets.size()); } } + + if (EmitBlob) { + // Include the implicit terminating null character in the on-disk buffer + // if we're writing it uncompressed. + const llvm::MemoryBuffer *Buffer = + Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager()); + StringRef Blob(Buffer->getBufferStart(), Buffer->getBufferSize() + 1); + + // Compress the buffer if possible. We expect that almost all PCM + // consumers will not want its contents. + SmallString<0> CompressedBuffer; + if (llvm::zlib::compress(Blob.drop_back(1), CompressedBuffer) == + llvm::zlib::StatusOK) { + RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED, + Blob.size() - 1}; + Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record, + CompressedBuffer); + } else { + RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB}; + Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, Blob); + } + } } else { // The source location entry is a macro expansion. const SrcMgr::ExpansionInfo &Expansion = SLoc->getExpansion(); diff --git a/clang/test/Modules/embed-files-compressed.cpp b/clang/test/Modules/embed-files-compressed.cpp new file mode 100644 index 000000000000..cf33a662f91f --- /dev/null +++ b/clang/test/Modules/embed-files-compressed.cpp @@ -0,0 +1,23 @@ +// REQUIRES: zlib +// REQUIRES: shell +// +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo '//////////////////////////////////////////////////////////////////////' > %t/a.h +// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h +// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h +// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h +// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h +// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h +// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h +// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h +// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h +// RUN: echo 'module a { header "a.h" }' > %t/modulemap +// +// RUN: %clang_cc1 -fmodules -I%t -fmodules-cache-path=%t -fmodule-name=a -emit-module %t/modulemap -fmodules-embed-all-files -o %t/a.pcm +// +// The above embeds ~4.5MB of highly-predictable /s and \ns into the pcm file. +// Check that the resulting file is under 40KB: +// +// RUN: wc -c %t/a.pcm | FileCheck --check-prefix=CHECK-SIZE %s +// CHECK-SIZE: {{(^|[^0-9])[123][0-9][0-9][0-9][0-9]($|[^0-9])}} diff --git a/clang/test/lit.cfg b/clang/test/lit.cfg index d73274c03f2f..e2166342685f 100644 --- a/clang/test/lit.cfg +++ b/clang/test/lit.cfg @@ -467,6 +467,11 @@ else: if config.enable_backtrace == "1": config.available_features.add("backtrace") +if config.have_zlib == "1": + config.available_features.add("zlib") +else: + config.available_features.add("nozlib") + # Check if we should run long running tests. if lit_config.params.get("run_long_tests", None) == "true": config.available_features.add("long_tests") diff --git a/clang/test/lit.site.cfg.in b/clang/test/lit.site.cfg.in index 332bcec14874..9dced0740e07 100644 --- a/clang/test/lit.site.cfg.in +++ b/clang/test/lit.site.cfg.in @@ -14,6 +14,7 @@ config.clang_tools_dir = "@CLANG_TOOLS_DIR@" config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" +config.have_zlib = "@HAVE_LIBZ@" config.clang_arcmt = @ENABLE_CLANG_ARCMT@ config.clang_staticanalyzer = @ENABLE_CLANG_STATIC_ANALYZER@ config.clang_examples = @ENABLE_CLANG_EXAMPLES@