hanchenye-llvm-project/lld/COFF/InputFiles.cpp

362 lines
11 KiB
C++
Raw Normal View History

//===- InputFiles.cpp -----------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Chunks.h"
#include "Error.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/LTO/LTOModule.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm::COFF;
using namespace llvm::object;
using namespace llvm::support::endian;
using llvm::RoundUpToAlignment;
using llvm::Triple;
using llvm::support::ulittle32_t;
using llvm::sys::fs::file_magic;
using llvm::sys::fs::identify_magic;
namespace lld {
namespace coff {
int InputFile::NextIndex = 0;
// Returns the last element of a path, which is supposed to be a filename.
static StringRef getBasename(StringRef Path) {
size_t Pos = Path.find_last_of("\\/");
if (Pos == StringRef::npos)
return Path;
return Path.substr(Pos + 1);
}
// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
std::string InputFile::getShortName() {
if (ParentName == "")
return getName().lower();
std::string Res = (getBasename(ParentName) + "(" +
getBasename(getName()) + ")").str();
return StringRef(Res).lower();
}
void ArchiveFile::parse() {
// Parse a MemoryBufferRef as an archive file.
auto ArchiveOrErr = Archive::create(MB);
error(ArchiveOrErr, "Failed to parse static library");
File = std::move(*ArchiveOrErr);
// Allocate a buffer for Lazy objects.
size_t NumSyms = File->getNumberOfSymbols();
size_t BufSize = NumSyms * sizeof(Lazy);
Lazy *Buf = (Lazy *)Alloc.Allocate(BufSize, llvm::alignOf<Lazy>());
COFF: Change the order of adding symbols to the symbol table. Previously, the order of adding symbols to the symbol table was simple. We have a list of all input files. We read each file from beginning of the list and add all symbols in it to the symbol table. This patch changes that order. Now all archive files are added to the symbol table first, and then all the other object files are added. This shouldn't change the behavior in single-threading, and make room to parallelize in multi-threading. In the first step, only lazy symbols are added to the symbol table because archives contain only Lazy symbols. Member object files found to be necessary are queued. In the second step, defined and undefined symbols are added from object files. Adding an undefined symbol to the symbol table may cause more member files to be added to the queue. We simply continue reading all object files until the queue is empty. Finally, new archive or object files may be added to the queues by object files' directive sections (which contain new command line options). The above process is repeated until we get no new files. Symbols defined both in object files and in archives can make results undeterministic. If an archive is read before an object, a new member file gets linked, while in the other way, no new file would be added. That is the most popular cause of an undeterministic result or linking failure as I observed. Separating phases of adding lazy symbols and undefined symbols makes that deterministic. Adding symbols in each phase should be parallelizable. llvm-svn: 241107
2015-07-01 03:35:21 +08:00
LazySymbols.reserve(NumSyms);
// Read the symbol table to construct Lazy objects.
for (const Archive::Symbol &Sym : File->symbols())
LazySymbols.push_back(new (Buf++) Lazy(this, Sym));
// Seen is a map from member files to boolean values. Initially
// all members are mapped to false, which indicates all these files
// are not read yet.
for (const Archive::Child &Child : File->children())
Seen[Child.getChildOffset()].clear();
}
// Returns a buffer pointing to a member file containing a given symbol.
// This function is thread-safe.
MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) {
auto ItOrErr = Sym->getMember();
error(ItOrErr,
Twine("Could not get the member for symbol ") + Sym->getName());
Archive::child_iterator It = *ItOrErr;
// Return an empty buffer if we have already returned the same buffer.
if (Seen[It->getChildOffset()].test_and_set())
return MemoryBufferRef();
ErrorOr<MemoryBufferRef> Ret = It->getMemoryBufferRef();
error(Ret, Twine("Could not get the buffer for the member defining symbol ") +
Sym->getName());
return *Ret;
}
void ObjectFile::parse() {
// Parse a memory buffer as a COFF file.
auto BinOrErr = createBinary(MB);
error(BinOrErr, "Failed to parse object file");
std::unique_ptr<Binary> Bin = std::move(*BinOrErr);
if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) {
Bin.release();
COFFObj.reset(Obj);
} else {
error(Twine(getName()) + " is not a COFF file.");
}
// Read section and symbol tables.
initializeChunks();
initializeSymbols();
initializeSEH();
}
void ObjectFile::initializeChunks() {
uint32_t NumSections = COFFObj->getNumberOfSections();
Chunks.reserve(NumSections);
SparseChunks.resize(NumSections + 1);
for (uint32_t I = 1; I < NumSections + 1; ++I) {
const coff_section *Sec;
StringRef Name;
std::error_code EC = COFFObj->getSection(I, Sec);
error(EC, Twine("getSection failed: ") + Name);
EC = COFFObj->getSectionName(Sec, Name);
error(EC, Twine("getSectionName failed: ") + Name);
if (Name == ".sxdata") {
SXData = Sec;
continue;
}
if (Name == ".drectve") {
ArrayRef<uint8_t> Data;
COFFObj->getSectionContents(Sec, Data);
Directives = std::string((const char *)Data.data(), Data.size());
continue;
}
// Skip non-DWARF debug info. MSVC linker converts the sections into
// a PDB file, but we don't support that.
if (Name == ".debug" || Name.startswith(".debug$"))
continue;
// We want to preserve DWARF debug sections only when /debug is on.
if (!Config->Debug && Name.startswith(".debug"))
continue;
if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
continue;
auto *C = new (Alloc) SectionChunk(this, Sec);
Chunks.push_back(C);
SparseChunks[I] = C;
}
}
void ObjectFile::initializeSymbols() {
uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
SymbolBodies.reserve(NumSymbols);
SparseSymbolBodies.resize(NumSymbols);
llvm::SmallVector<Undefined *, 8> WeakAliases;
int32_t LastSectionNumber = 0;
for (uint32_t I = 0; I < NumSymbols; ++I) {
// Get a COFFSymbolRef object.
auto SymOrErr = COFFObj->getSymbol(I);
error(SymOrErr, Twine("broken object file: ") + getName());
COFFSymbolRef Sym = *SymOrErr;
const void *AuxP = nullptr;
if (Sym.getNumberOfAuxSymbols())
AuxP = COFFObj->getSymbol(I + 1)->getRawPtr();
bool IsFirst = (LastSectionNumber != Sym.getSectionNumber());
SymbolBody *Body = nullptr;
if (Sym.isUndefined()) {
Body = createUndefined(Sym);
} else if (Sym.isWeakExternal()) {
Body = createWeakExternal(Sym, AuxP);
WeakAliases.push_back((Undefined *)Body);
} else {
Body = createDefined(Sym, AuxP, IsFirst);
}
if (Body) {
SymbolBodies.push_back(Body);
SparseSymbolBodies[I] = Body;
}
I += Sym.getNumberOfAuxSymbols();
LastSectionNumber = Sym.getSectionNumber();
}
for (Undefined *U : WeakAliases)
U->WeakAlias = SparseSymbolBodies[(uintptr_t)U->WeakAlias];
}
Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) {
StringRef Name;
COFFObj->getSymbolName(Sym, Name);
return new (Alloc) Undefined(Name);
}
Undefined *ObjectFile::createWeakExternal(COFFSymbolRef Sym, const void *AuxP) {
StringRef Name;
COFFObj->getSymbolName(Sym, Name);
auto *U = new (Alloc) Undefined(Name);
auto *Aux = (const coff_aux_weak_external *)AuxP;
U->WeakAlias = (Undefined *)(uintptr_t)Aux->TagIndex;
return U;
}
Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP,
bool IsFirst) {
StringRef Name;
if (Sym.isCommon()) {
auto *C = new (Alloc) CommonChunk(Sym);
Chunks.push_back(C);
return new (Alloc) DefinedCommon(this, Sym, C);
}
if (Sym.isAbsolute()) {
COFFObj->getSymbolName(Sym, Name);
// Skip special symbols.
if (Name == "@comp.id")
return nullptr;
// COFF spec 5.10.1. The .sxdata section.
if (Name == "@feat.00") {
if (Sym.getValue() & 1)
SEHCompat = true;
return nullptr;
}
return new (Alloc) DefinedAbsolute(Name, Sym);
}
if (Sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_DEBUG)
return nullptr;
// Nothing else to do without a section chunk.
auto *SC = cast_or_null<SectionChunk>(SparseChunks[Sym.getSectionNumber()]);
if (!SC)
return nullptr;
// Handle associative sections
if (IsFirst && AuxP) {
auto *Aux = reinterpret_cast<const coff_aux_section_definition *>(AuxP);
if (Aux->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
if (auto *ParentSC = cast_or_null<SectionChunk>(
SparseChunks[Aux->getNumber(Sym.isBigObj())]))
ParentSC->addAssociative(SC);
}
auto *B = new (Alloc) DefinedRegular(this, Sym, SC);
if (SC->isCOMDAT() && Sym.getValue() == 0 && !AuxP)
SC->setSymbol(B);
return B;
}
void ObjectFile::initializeSEH() {
if (!SEHCompat || !SXData)
return;
ArrayRef<uint8_t> A;
COFFObj->getSectionContents(SXData, A);
if (A.size() % 4 != 0)
error(".sxdata must be an array of symbol table indices");
auto *I = reinterpret_cast<const ulittle32_t *>(A.data());
auto *E = reinterpret_cast<const ulittle32_t *>(A.data() + A.size());
for (; I != E; ++I)
SEHandlers.insert(SparseSymbolBodies[*I]);
}
MachineTypes ObjectFile::getMachineType() {
if (COFFObj)
return static_cast<MachineTypes>(COFFObj->getMachine());
return IMAGE_FILE_MACHINE_UNKNOWN;
}
StringRef ltrim1(StringRef S, const char *Chars) {
if (!S.empty() && strchr(Chars, S[0]))
return S.substr(1);
return S;
}
void ImportFile::parse() {
const char *Buf = MB.getBufferStart();
const char *End = MB.getBufferEnd();
const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf);
// Check if the total size is valid.
if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData))
error("broken import library");
// Read names and create an __imp_ symbol.
StringRef Name = StringAlloc.save(StringRef(Buf + sizeof(*Hdr)));
StringRef ImpName = StringAlloc.save(Twine("__imp_") + Name);
const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1;
DLLName = StringRef(NameStart).lower();
StringRef ExtName;
switch (Hdr->getNameType()) {
case IMPORT_ORDINAL:
ExtName = "";
break;
case IMPORT_NAME:
ExtName = Name;
break;
case IMPORT_NAME_NOPREFIX:
ExtName = ltrim1(Name, "?@_");
break;
case IMPORT_NAME_UNDECORATE:
ExtName = ltrim1(Name, "?@_");
ExtName = ExtName.substr(0, ExtName.find('@'));
break;
}
ImpSym = new (Alloc) DefinedImportData(DLLName, ImpName, ExtName, Hdr);
SymbolBodies.push_back(ImpSym);
// If type is function, we need to create a thunk which jump to an
// address pointed by the __imp_ symbol. (This allows you to call
// DLL functions just like regular non-DLL functions.)
if (Hdr->getType() != llvm::COFF::IMPORT_CODE)
return;
ThunkSym = new (Alloc) DefinedImportThunk(Name, ImpSym, Hdr->Machine);
SymbolBodies.push_back(ThunkSym);
}
void BitcodeFile::parse() {
std::string Err;
M.reset(LTOModule::createFromBuffer(MB.getBufferStart(),
MB.getBufferSize(),
llvm::TargetOptions(), Err));
if (!Err.empty())
error(Err);
llvm::StringSaver Saver(Alloc);
for (unsigned I = 0, E = M->getSymbolCount(); I != E; ++I) {
lto_symbol_attributes Attrs = M->getSymbolAttributes(I);
if ((Attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL)
continue;
StringRef SymName = Saver.save(M->getSymbolName(I));
int SymbolDef = Attrs & LTO_SYMBOL_DEFINITION_MASK;
if (SymbolDef == LTO_SYMBOL_DEFINITION_UNDEFINED) {
SymbolBodies.push_back(new (Alloc) Undefined(SymName));
} else {
bool Replaceable =
(SymbolDef == LTO_SYMBOL_DEFINITION_TENTATIVE || // common
(Attrs & LTO_SYMBOL_COMDAT) || // comdat
(SymbolDef == LTO_SYMBOL_DEFINITION_WEAK && // weak external
(Attrs & LTO_SYMBOL_ALIAS)));
SymbolBodies.push_back(new (Alloc) DefinedBitcode(this, SymName,
Replaceable));
}
}
Directives = M->getLinkerOpts();
}
MachineTypes BitcodeFile::getMachineType() {
if (!M)
return IMAGE_FILE_MACHINE_UNKNOWN;
switch (Triple(M->getTargetTriple()).getArch()) {
case Triple::x86_64:
return AMD64;
case Triple::x86:
return I386;
case Triple::arm:
return ARMNT;
default:
return IMAGE_FILE_MACHINE_UNKNOWN;
}
}
} // namespace coff
} // namespace lld