hanchenye-llvm-project/lld/COFF/InputFiles.h

200 lines
6.2 KiB
C
Raw Normal View History

//===- InputFiles.h -------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_INPUT_FILES_H
#define LLD_COFF_INPUT_FILES_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/LTO/LTOModule.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/StringSaver.h"
#include <memory>
#include <set>
#include <vector>
namespace lld {
namespace coff {
using llvm::LTOModule;
using llvm::object::Archive;
using llvm::object::COFFObjectFile;
using llvm::object::COFFSymbolRef;
class Chunk;
class Defined;
COFF: Change the order of adding symbols to the symbol table. Previously, the order of adding symbols to the symbol table was simple. We have a list of all input files. We read each file from beginning of the list and add all symbols in it to the symbol table. This patch changes that order. Now all archive files are added to the symbol table first, and then all the other object files are added. This shouldn't change the behavior in single-threading, and make room to parallelize in multi-threading. In the first step, only lazy symbols are added to the symbol table because archives contain only Lazy symbols. Member object files found to be necessary are queued. In the second step, defined and undefined symbols are added from object files. Adding an undefined symbol to the symbol table may cause more member files to be added to the queue. We simply continue reading all object files until the queue is empty. Finally, new archive or object files may be added to the queues by object files' directive sections (which contain new command line options). The above process is repeated until we get no new files. Symbols defined both in object files and in archives can make results undeterministic. If an archive is read before an object, a new member file gets linked, while in the other way, no new file would be added. That is the most popular cause of an undeterministic result or linking failure as I observed. Separating phases of adding lazy symbols and undefined symbols makes that deterministic. Adding symbols in each phase should be parallelizable. llvm-svn: 241107
2015-07-01 03:35:21 +08:00
class Lazy;
class SymbolBody;
class Undefined;
// The root class of input files.
class InputFile {
public:
enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind };
Kind kind() const { return FileKind; }
virtual ~InputFile() {}
// Returns the filename.
StringRef getName() { return MB.getBufferIdentifier(); }
// Returns symbols defined by this file.
virtual std::vector<SymbolBody *> &getSymbols() = 0;
// Reads a file (constructors don't do that). Returns an error if a
// file is broken.
virtual std::error_code parse() = 0;
// Returns a short, human-friendly filename. If this is a member of
// an archive file, a returned value includes parent's filename.
// Used for logging or debugging.
std::string getShortName();
// Sets a parent filename if this file is created from an archive.
void setParentName(StringRef N) { ParentName = N; }
// Returns .drectve section contents if exist.
StringRef getDirectives() { return StringRef(Directives).trim(); }
// Each file has a unique index. The index number is used to
// resolve ties in symbol resolution.
int Index;
static int NextIndex;
protected:
InputFile(Kind K, MemoryBufferRef M)
: Index(NextIndex++), MB(M), FileKind(K) {}
MemoryBufferRef MB;
std::string Directives;
private:
const Kind FileKind;
StringRef ParentName;
};
// .lib or .a file.
class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; }
std::error_code parse() override;
// Returns a memory buffer for a given symbol. An empty memory buffer
// is returned if we have already returned the same memory buffer.
// (So that we don't instantiate same members more than once.)
ErrorOr<MemoryBufferRef> getMember(const Archive::Symbol *Sym);
COFF: Change the order of adding symbols to the symbol table. Previously, the order of adding symbols to the symbol table was simple. We have a list of all input files. We read each file from beginning of the list and add all symbols in it to the symbol table. This patch changes that order. Now all archive files are added to the symbol table first, and then all the other object files are added. This shouldn't change the behavior in single-threading, and make room to parallelize in multi-threading. In the first step, only lazy symbols are added to the symbol table because archives contain only Lazy symbols. Member object files found to be necessary are queued. In the second step, defined and undefined symbols are added from object files. Adding an undefined symbol to the symbol table may cause more member files to be added to the queue. We simply continue reading all object files until the queue is empty. Finally, new archive or object files may be added to the queues by object files' directive sections (which contain new command line options). The above process is repeated until we get no new files. Symbols defined both in object files and in archives can make results undeterministic. If an archive is read before an object, a new member file gets linked, while in the other way, no new file would be added. That is the most popular cause of an undeterministic result or linking failure as I observed. Separating phases of adding lazy symbols and undefined symbols makes that deterministic. Adding symbols in each phase should be parallelizable. llvm-svn: 241107
2015-07-01 03:35:21 +08:00
std::vector<Lazy *> &getLazySymbols() { return LazySymbols; }
// All symbols returned by ArchiveFiles are of Lazy type.
std::vector<SymbolBody *> &getSymbols() override {
llvm_unreachable("internal error");
}
private:
std::unique_ptr<Archive> File;
std::string Filename;
COFF: Change the order of adding symbols to the symbol table. Previously, the order of adding symbols to the symbol table was simple. We have a list of all input files. We read each file from beginning of the list and add all symbols in it to the symbol table. This patch changes that order. Now all archive files are added to the symbol table first, and then all the other object files are added. This shouldn't change the behavior in single-threading, and make room to parallelize in multi-threading. In the first step, only lazy symbols are added to the symbol table because archives contain only Lazy symbols. Member object files found to be necessary are queued. In the second step, defined and undefined symbols are added from object files. Adding an undefined symbol to the symbol table may cause more member files to be added to the queue. We simply continue reading all object files until the queue is empty. Finally, new archive or object files may be added to the queues by object files' directive sections (which contain new command line options). The above process is repeated until we get no new files. Symbols defined both in object files and in archives can make results undeterministic. If an archive is read before an object, a new member file gets linked, while in the other way, no new file would be added. That is the most popular cause of an undeterministic result or linking failure as I observed. Separating phases of adding lazy symbols and undefined symbols makes that deterministic. Adding symbols in each phase should be parallelizable. llvm-svn: 241107
2015-07-01 03:35:21 +08:00
std::vector<Lazy *> LazySymbols;
std::map<const char *, std::atomic_flag> Seen;
llvm::MallocAllocator Alloc;
};
// .obj or .o file. This may be a member of an archive file.
class ObjectFile : public InputFile {
public:
explicit ObjectFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == ObjectKind; }
std::error_code parse() override;
std::vector<Chunk *> &getChunks() { return Chunks; }
std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; }
// Returns a SymbolBody object for the SymbolIndex'th symbol in the
// underlying object file.
SymbolBody *getSymbolBody(uint32_t SymbolIndex) {
return SparseSymbolBodies[SymbolIndex];
}
// Returns the underying COFF file.
COFFObjectFile *getCOFFObj() { return COFFObj.get(); }
private:
std::error_code initializeChunks();
std::error_code initializeSymbols();
Defined *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst);
Undefined *createUndefined(COFFSymbolRef Sym);
Undefined *createWeakExternal(COFFSymbolRef Sym, const void *Aux);
std::unique_ptr<COFFObjectFile> COFFObj;
llvm::BumpPtrAllocator Alloc;
// List of all chunks defined by this file. This includes both section
// chunks and non-section chunks for common symbols.
std::vector<Chunk *> Chunks;
// This vector contains the same chunks as Chunks, but they are
// indexed such that you can get a SectionChunk by section index.
// Nonexistent section indices are filled with null pointers.
// (Because section number is 1-based, the first slot is always a
// null pointer.)
std::vector<Chunk *> SparseChunks;
// List of all symbols referenced or defined by this file.
std::vector<SymbolBody *> SymbolBodies;
// This vector contains the same symbols as SymbolBodies, but they
// are indexed such that you can get a SymbolBody by symbol
// index. Nonexistent indices (which are occupied by auxiliary
// symbols in the real symbol table) are filled with null pointers.
std::vector<SymbolBody *> SparseSymbolBodies;
};
// This type represents import library members that contain DLL names
// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
// for details about the format.
class ImportFile : public InputFile {
public:
explicit ImportFile(MemoryBufferRef M)
: InputFile(ImportKind, M), StringAlloc(StringAllocAux) {}
static bool classof(const InputFile *F) { return F->kind() == ImportKind; }
std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; }
private:
std::error_code parse() override;
std::vector<SymbolBody *> SymbolBodies;
llvm::BumpPtrAllocator Alloc;
llvm::BumpPtrAllocator StringAllocAux;
llvm::BumpPtrStringSaver StringAlloc;
};
// Used for LTO.
class BitcodeFile : public InputFile {
public:
explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; }
LTOModule *getModule() const { return M.get(); }
LTOModule *releaseModule() { return M.release(); }
private:
std::error_code parse() override;
std::vector<SymbolBody *> SymbolBodies;
llvm::BumpPtrAllocator Alloc;
std::unique_ptr<LTOModule> M;
};
} // namespace coff
} // namespace lld
#endif