Pre-compute the tail of the archive

An archive looks like

<header>
<symbol table>
<tail>

The symbol table refers to offsets in the tail. A complication is that
we would like to support symbol tables that use 64 bit offsets if it
turns out that any of the offsets is too big.

This patch changes the archive writer to first compute the tail. We
cannot just compute one big StringRef since that would require reading
every member upfront, but we can represent it as a series of
StringRefs.

Having done that it is much easier to compute the symbol table and all
offsets are computed before it is written. With this if there is an
accounting problem it will show up with a regular symbol table, not
just when a 64 bit one is needed.

llvm-svn: 314844
This commit is contained in:
Rafael Espindola 2017-10-03 20:59:43 +00:00
parent a952b44ed5
commit 476a7f9293
1 changed files with 184 additions and 177 deletions

View File

@ -167,11 +167,11 @@ printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name,
} }
static void static void
printBSDMemberHeader(raw_ostream &Out, StringRef Name, printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name,
const sys::TimePoint<std::chrono::seconds> &ModTime, const sys::TimePoint<std::chrono::seconds> &ModTime,
unsigned UID, unsigned GID, unsigned Perms, unsigned UID, unsigned GID, unsigned Perms,
unsigned Size) { unsigned Size) {
uint64_t PosAfterHeader = Out.tell() + 60 + Name.size(); uint64_t PosAfterHeader = Pos + 60 + Name.size();
// Pad so that even 64 bit object files are aligned. // Pad so that even 64 bit object files are aligned.
unsigned Pad = OffsetToAlignment(PosAfterHeader, 8); unsigned Pad = OffsetToAlignment(PosAfterHeader, 8);
unsigned NameWithPadding = Name.size() + Pad; unsigned NameWithPadding = Name.size() + Pad;
@ -179,7 +179,6 @@ printBSDMemberHeader(raw_ostream &Out, StringRef Name,
printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, printRestOfMemberHeader(Out, ModTime, UID, GID, Perms,
NameWithPadding + Size); NameWithPadding + Size);
Out << Name; Out << Name;
assert(PosAfterHeader == Out.tell());
while (Pad--) while (Pad--)
Out.write(uint8_t(0)); Out.write(uint8_t(0));
} }
@ -188,21 +187,6 @@ static bool useStringTable(bool Thin, StringRef Name) {
return Thin || Name.size() >= 16 || Name.contains('/'); return Thin || Name.size() >= 16 || Name.contains('/');
} }
static void
printMemberHeader(raw_ostream &Out, object::Archive::Kind Kind, bool Thin,
StringRef Name,
std::vector<unsigned>::iterator &StringMapIndexIter,
const sys::TimePoint<std::chrono::seconds> &ModTime,
unsigned UID, unsigned GID, unsigned Perms, unsigned Size) {
if (isBSDLike(Kind))
return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
if (!useStringTable(Thin, Name))
return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
Out << '/';
printWithSpacePadding(Out, *StringMapIndexIter++, 15);
printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
}
// Compute the relative path from From to To. // Compute the relative path from From to To.
static std::string computeRelativePath(StringRef From, StringRef To) { static std::string computeRelativePath(StringRef From, StringRef To) {
if (sys::path::is_absolute(From) || sys::path::is_absolute(To)) if (sys::path::is_absolute(From) || sys::path::is_absolute(To))
@ -232,41 +216,56 @@ static std::string computeRelativePath(StringRef From, StringRef To) {
return Relative.str(); return Relative.str();
} }
static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, static void addToStringTable(raw_ostream &Out, StringRef ArcName,
ArrayRef<NewArchiveMember> Members, const NewArchiveMember &M, bool Thin) {
std::vector<unsigned> &StringMapIndexes, StringRef ID = M.Buf->getBufferIdentifier();
bool Thin) { if (Thin) {
unsigned StartOffset = 0; if (M.IsNew)
for (const NewArchiveMember &M : Members) { Out << computeRelativePath(ArcName, ID);
StringRef Path = M.Buf->getBufferIdentifier(); else
StringRef Name = M.MemberName; Out << ID;
if (!useStringTable(Thin, Name)) } else
continue; Out << M.MemberName;
if (StartOffset == 0) { Out << "/\n";
printWithSpacePadding(Out, "//", 58); }
Out << "`\n";
StartOffset = Out.tell();
}
StringMapIndexes.push_back(Out.tell() - StartOffset);
if (Thin) { static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
if (M.IsNew) raw_ostream &StringTable,
Out << computeRelativePath(ArcName, Path); object::Archive::Kind Kind, bool Thin,
else StringRef ArcName, const NewArchiveMember &M,
Out << M.Buf->getBufferIdentifier(); unsigned Size) {
} else if (isBSDLike(Kind))
Out << Name; return printBSDMemberHeader(Out, Pos, M.MemberName, M.ModTime, M.UID, M.GID,
M.Perms, Size);
if (!useStringTable(Thin, M.MemberName))
return printGNUSmallMemberHeader(Out, M.MemberName, M.ModTime, M.UID, M.GID,
M.Perms, Size);
Out << '/';
uint64_t NamePos = StringTable.tell();
addToStringTable(StringTable, ArcName, M, Thin);
printWithSpacePadding(Out, NamePos, 15);
printRestOfMemberHeader(Out, M.ModTime, M.UID, M.GID, M.Perms, Size);
}
Out << "/\n"; namespace {
} struct MemberData {
if (StartOffset == 0) std::vector<unsigned> Symbols;
return; std::string Header;
if (Out.tell() % 2) StringRef Data;
Out << '\n'; StringRef Padding;
int Pos = Out.tell(); };
Out.seek(StartOffset - 12); } // namespace
printWithSpacePadding(Out, Pos - StartOffset, 10);
Out.seek(Pos); static MemberData computeStringTable(StringRef Names) {
unsigned Size = Names.size();
unsigned Pad = OffsetToAlignment(Size, 2);
std::string Header;
raw_string_ostream Out(Header);
printWithSpacePadding(Out, "//", 48);
printWithSpacePadding(Out, Size + Pad, 10);
Out << "`\n";
Out.flush();
return {{}, std::move(Header), Names, Pad ? "\n" : ""};
} }
static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) {
@ -289,97 +288,137 @@ static bool isArchiveSymbol(const object::BasicSymbolRef &S) {
return true; return true;
} }
// Returns the offset of the first reference to a member offset. static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
static Expected<unsigned> bool Deterministic, ArrayRef<MemberData> Members,
writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, StringRef StringTable) {
ArrayRef<NewArchiveMember> Members, if (StringTable.empty())
std::vector<unsigned> &MemberOffsetRefs, bool Deterministic) { return;
unsigned HeaderStartOffset = 0;
unsigned BodyStartOffset = 0;
SmallString<128> NameBuf;
raw_svector_ostream NameOS(NameBuf);
LLVMContext Context;
for (unsigned MemberNum = 0, N = Members.size(); MemberNum < N; ++MemberNum) {
MemoryBufferRef MemberBuffer = Members[MemberNum].Buf->getMemBufferRef();
Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
object::SymbolicFile::createSymbolicFile(
MemberBuffer, llvm::file_magic::unknown, &Context);
if (!ObjOrErr) {
// FIXME: check only for "not an object file" errors.
consumeError(ObjOrErr.takeError());
continue;
}
object::SymbolicFile &Obj = *ObjOrErr.get();
if (!HeaderStartOffset) { unsigned NumSyms = 0;
HeaderStartOffset = Out.tell(); for (const MemberData &M : Members)
if (isBSDLike(Kind)) NumSyms += M.Symbols.size();
printBSDMemberHeader(Out, "__.SYMDEF", now(Deterministic), 0, 0, 0, 0);
else
printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0);
BodyStartOffset = Out.tell();
print32(Out, Kind, 0); // number of entries or bytes
}
for (const object::BasicSymbolRef &S : Obj.symbols()) { unsigned Size = 0;
if (!isArchiveSymbol(S)) Size += 4; // Number of entries
continue;
unsigned NameOffset = NameOS.tell();
if (std::error_code EC = S.printName(NameOS))
return errorCodeToError(EC);
NameOS << '\0';
MemberOffsetRefs.push_back(MemberNum);
if (isBSDLike(Kind))
print32(Out, Kind, NameOffset);
print32(Out, Kind, 0); // member offset
}
}
if (HeaderStartOffset == 0)
return 0;
// ld64 prefers the cctools type archive which pads its string table to a
// boundary of sizeof(int32_t).
if (isBSDLike(Kind)) if (isBSDLike(Kind))
for (unsigned P = OffsetToAlignment(NameOS.tell(), sizeof(int32_t)); P--;) Size += NumSyms * 8; // Table
NameOS << '\0'; else
Size += NumSyms * 4; // Table
StringRef StringTable = NameOS.str();
if (isBSDLike(Kind)) if (isBSDLike(Kind))
print32(Out, Kind, StringTable.size()); // byte count of the string table Size += 4; // byte count
Out << StringTable; Size += StringTable.size();
// If there are no symbols, emit an empty symbol table, to satisfy Solaris
// tools, older versions of which expect a symbol table in a non-empty
// archive, regardless of whether there are any symbols in it.
if (StringTable.size() == 0)
print32(Out, Kind, 0);
// ld64 expects the members to be 8-byte aligned for 64-bit content and at // ld64 expects the members to be 8-byte aligned for 64-bit content and at
// least 4-byte aligned for 32-bit content. Opt for the larger encoding // least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly. // uniformly.
// We do this for all bsd formats because it simplifies aligning members. // We do this for all bsd formats because it simplifies aligning members.
unsigned Alignment = isBSDLike(Kind) ? 8 : 2; unsigned Alignment = isBSDLike(Kind) ? 8 : 2;
unsigned Pad = OffsetToAlignment(Out.tell(), Alignment); unsigned Pad = OffsetToAlignment(Size, Alignment);
while (Pad--) Size += Pad;
Out.write(uint8_t(0));
// Patch up the size of the symbol table now that we know how big it is. if (isBSDLike(Kind))
unsigned Pos = Out.tell(); printBSDMemberHeader(Out, Out.tell(), "__.SYMDEF", now(Deterministic), 0, 0,
const unsigned MemberHeaderSize = 60; 0, Size);
Out.seek(HeaderStartOffset + 48); // offset of the size field. else
printWithSpacePadding(Out, Pos - MemberHeaderSize - HeaderStartOffset, 10); printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, Size);
uint64_t Pos = Out.tell() + Size;
// Patch up the number of symbols.
Out.seek(BodyStartOffset);
unsigned NumSyms = MemberOffsetRefs.size();
if (isBSDLike(Kind)) if (isBSDLike(Kind))
print32(Out, Kind, NumSyms * 8); print32(Out, Kind, NumSyms * 8);
else else
print32(Out, Kind, NumSyms); print32(Out, Kind, NumSyms);
Out.seek(Pos); for (const MemberData &M : Members) {
return BodyStartOffset + 4; for (unsigned StringOffset : M.Symbols) {
if (isBSDLike(Kind))
print32(Out, Kind, StringOffset);
print32(Out, Kind, Pos); // member offset
}
Pos += M.Header.size() + M.Data.size() + M.Padding.size();
}
if (isBSDLike(Kind))
print32(Out, Kind, StringTable.size()); // byte count of the string table
Out << StringTable;
while (Pad--)
Out.write(uint8_t(0));
}
static Expected<std::vector<unsigned>>
getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
std::vector<unsigned> Ret;
LLVMContext Context;
Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
object::SymbolicFile::createSymbolicFile(Buf, llvm::file_magic::unknown,
&Context);
if (!ObjOrErr) {
// FIXME: check only for "not an object file" errors.
consumeError(ObjOrErr.takeError());
return Ret;
}
HasObject = true;
object::SymbolicFile &Obj = *ObjOrErr.get();
for (const object::BasicSymbolRef &S : Obj.symbols()) {
if (!isArchiveSymbol(S))
continue;
Ret.push_back(SymNames.tell());
if (auto EC = S.printName(SymNames))
return errorCodeToError(EC);
SymNames << '\0';
}
return Ret;
}
static Expected<std::vector<MemberData>>
computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
object::Archive::Kind Kind, bool Thin, StringRef ArcName,
ArrayRef<NewArchiveMember> NewMembers) {
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
// This ignores the symbol table, but we only need the value mod 8 and the
// symbol table is aligned to be a multiple of 8 bytes
uint64_t Pos = 0;
std::vector<MemberData> Ret;
bool HasObject = false;
for (const NewArchiveMember &M : NewMembers) {
std::string Header;
raw_string_ostream Out(Header);
MemoryBufferRef Buf = M.Buf->getMemBufferRef();
StringRef Data = Thin ? "" : Buf.getBuffer();
// ld64 expects the members to be 8-byte aligned for 64-bit content and at
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly. This matches the behaviour with cctools and ensures that ld64
// is happy with archives that we generate.
unsigned MemberPadding = Kind == object::Archive::K_DARWIN
? OffsetToAlignment(Data.size(), 8)
: 0;
unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2);
StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding);
printMemberHeader(Out, Pos, StringTable, Kind, Thin, ArcName, M,
Buf.getBufferSize() + MemberPadding);
Out.flush();
Expected<std::vector<unsigned>> Symbols =
getSymbols(Buf, SymNames, HasObject);
if (auto E = Symbols.takeError())
return std::move(E);
Pos += Header.size() + Data.size() + Padding.size();
Ret.push_back({std::move(*Symbols), std::move(Header), Data, Padding});
}
// If there are no symbols, emit an empty symbol table, to satisfy Solaris
// tools, older versions of which expect a symbol table in a non-empty
// archive, regardless of whether there are any symbols in it.
if (HasObject && SymNames.tell() == 0)
SymNames << '\0' << '\0' << '\0';
return Ret;
} }
Error llvm::writeArchive(StringRef ArcName, Error llvm::writeArchive(StringRef ArcName,
@ -388,6 +427,21 @@ Error llvm::writeArchive(StringRef ArcName,
bool Deterministic, bool Thin, bool Deterministic, bool Thin,
std::unique_ptr<MemoryBuffer> OldArchiveBuf) { std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
SmallString<0> SymNamesBuf;
raw_svector_ostream SymNames(SymNamesBuf);
SmallString<0> StringTableBuf;
raw_svector_ostream StringTable(StringTableBuf);
Expected<std::vector<MemberData>> DataOrErr =
computeMemberData(StringTable, SymNames, Kind, Thin, ArcName, NewMembers);
if (Error E = DataOrErr.takeError())
return E;
std::vector<MemberData> &Data = *DataOrErr;
if (!StringTableBuf.empty())
Data.insert(Data.begin(), computeStringTable(StringTableBuf));
SmallString<128> TmpArchive; SmallString<128> TmpArchive;
int TmpArchiveFD; int TmpArchiveFD;
if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a", if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a",
@ -401,58 +455,11 @@ Error llvm::writeArchive(StringRef ArcName,
else else
Out << "!<arch>\n"; Out << "!<arch>\n";
std::vector<unsigned> MemberOffsetRefs; if (WriteSymtab)
writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf);
unsigned MemberReferenceOffset = 0; for (const MemberData &M : Data)
if (WriteSymtab) { Out << M.Header << M.Data << M.Padding;
Expected<unsigned> MemberReferenceOffsetOrErr = writeSymbolTable(
Out, Kind, NewMembers, MemberOffsetRefs, Deterministic);
if (auto E = MemberReferenceOffsetOrErr.takeError())
return E;
MemberReferenceOffset = MemberReferenceOffsetOrErr.get();
}
std::vector<unsigned> StringMapIndexes;
if (!isBSDLike(Kind))
writeStringTable(Out, ArcName, NewMembers, StringMapIndexes, Thin);
std::vector<unsigned>::iterator StringMapIndexIter = StringMapIndexes.begin();
std::vector<unsigned> MemberOffset;
for (const NewArchiveMember &M : NewMembers) {
MemoryBufferRef File = M.Buf->getMemBufferRef();
unsigned Padding = 0;
unsigned Pos = Out.tell();
MemberOffset.push_back(Pos);
// ld64 expects the members to be 8-byte aligned for 64-bit content and at
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly. This matches the behaviour with cctools and ensures that ld64
// is happy with archives that we generate.
if (Kind == object::Archive::K_DARWIN)
Padding = OffsetToAlignment(M.Buf->getBufferSize(), 8);
printMemberHeader(Out, Kind, Thin, M.MemberName, StringMapIndexIter,
M.ModTime, M.UID, M.GID, M.Perms,
M.Buf->getBufferSize() + Padding);
if (!Thin)
Out << File.getBuffer();
while (Padding--)
Out << '\n';
if (Out.tell() % 2)
Out << '\n';
}
if (MemberReferenceOffset) {
Out.seek(MemberReferenceOffset);
for (unsigned MemberNum : MemberOffsetRefs) {
if (isBSDLike(Kind))
Out.seek(Out.tell() + 4); // skip over the string offset
print32(Out, Kind, MemberOffset[MemberNum]);
}
}
Output.keep(); Output.keep();
Out.close(); Out.close();