[LLD] [COFF] Support linking directly against DLLs in MinGW mode

GNU ld.bfd supports linking directly against DLLs without using an
import library, and some projects have picked up on this habit.
(There's no one single unsurmountable issue with using import
libraries, but this is a regularly surfacing missing feature.)

As long as one is linking by name (instead of by ordinal), the DLL
export table contains most of the information needed. (One can
inspect what section a symbol points at, to see if it's a function
or data symbol. The practical implementation of this loops over all
sections for each symbol, but as long as they're not very many, that
should hopefully be tolerable performance wise.)

One exception where the information in the DLL isn't entirely enough
is on i386 with stdcall functions; depending on how they're done,
the exported function name can be a plain undecorated name, while
the import library would contain the full decorated symbol name. This
issue is addressed separately in a different patch.

This is implemented mimicing the structure of a regular import library,
with one InputFile corresponding to the static archive that just adds
lazy symbols, which then are fetched when they are needed. When such
a symbol is fetched, we synthesize a coff_import_header structure
in memory and create a regular ImportFile out of it.

The implementation could be even smaller by just creating ImportFiles
for every symbol available immediately, but that would have the
drawback of actually ending up importing all symbols unless running
with GC enabled (and mingw mode defaults to having it disabled for
historical reasons).

Differential Revision: https://reviews.llvm.org/D104530
This commit is contained in:
Martin Storsjö 2021-06-16 16:59:46 +03:00
parent 86c5afa6e6
commit a9ff1ce1b9
10 changed files with 292 additions and 2 deletions

View File

@ -234,6 +234,10 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
error(filename + ": is not a native COFF file. Recompile without /GL");
break;
case file_magic::pecoff_executable:
if (config->mingw) {
symtab->addFile(make<DLLFile>(mbref));
break;
}
if (filename.endswith_insensitive(".dll")) {
error(filename + ": bad file type. Did you specify a DLL instead of an "
"import library?");

View File

@ -1138,3 +1138,93 @@ std::string lld::coff::replaceThinLTOSuffix(StringRef path) {
return (path + repl).str();
return std::string(path);
}
static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) {
for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) {
const coff_section *sec = CHECK(coffObj->getSection(i), file);
if (rva >= sec->VirtualAddress &&
rva <= sec->VirtualAddress + sec->VirtualSize) {
return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0;
}
}
return false;
}
void DLLFile::parse() {
// Parse a memory buffer as a PE-COFF executable.
std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
if (auto *obj = dyn_cast<COFFObjectFile>(bin.get())) {
bin.release();
coffObj.reset(obj);
} else {
error(toString(this) + " is not a COFF file");
return;
}
if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) {
error(toString(this) + " is not a PE-COFF executable");
return;
}
for (const auto &exp : coffObj->export_directories()) {
StringRef dllName, symbolName;
uint32_t exportRVA;
checkError(exp.getDllName(dllName));
checkError(exp.getSymbolName(symbolName));
checkError(exp.getExportRVA(exportRVA));
if (symbolName.empty())
continue;
bool code = isRVACode(coffObj.get(), exportRVA, this);
Symbol *s = make<Symbol>();
s->dllName = dllName;
s->symbolName = symbolName;
s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA;
s->nameType = ImportNameType::IMPORT_NAME;
if (coffObj->getMachine() == I386) {
s->symbolName = symbolName = saver.save("_" + symbolName);
s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
}
StringRef impName = saver.save("__imp_" + symbolName);
symtab->addLazyDLLSymbol(this, s, impName);
if (code)
symtab->addLazyDLLSymbol(this, s, symbolName);
}
}
MachineTypes DLLFile::getMachineType() {
if (coffObj)
return static_cast<MachineTypes>(coffObj->getMachine());
return IMAGE_FILE_MACHINE_UNKNOWN;
}
void DLLFile::makeImport(DLLFile::Symbol *s) {
if (!seen.insert(s->symbolName).second)
return;
size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
size_t size = sizeof(coff_import_header) + impSize;
char *buf = bAlloc.Allocate<char>(size);
memset(buf, 0, size);
char *p = buf;
auto *imp = reinterpret_cast<coff_import_header *>(p);
p += sizeof(*imp);
imp->Sig2 = 0xFFFF;
imp->Machine = coffObj->getMachine();
imp->SizeOfData = impSize;
imp->OrdinalHint = 0; // Only linking by name
imp->TypeInfo = (s->nameType << 2) | s->importType;
// Write symbol name and DLL name.
memcpy(p, s->symbolName.data(), s->symbolName.size());
p += s->symbolName.size() + 1;
memcpy(p, s->dllName.data(), s->dllName.size());
MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
ImportFile *impFile = make<ImportFile>(mbref);
symtab->addFile(impFile);
}

View File

@ -14,6 +14,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
@ -67,7 +68,8 @@ public:
LazyObjectKind,
PDBKind,
ImportKind,
BitcodeKind
BitcodeKind,
DLLKind
};
Kind kind() const { return fileKind; }
virtual ~InputFile() {}
@ -393,6 +395,28 @@ private:
std::vector<Symbol *> symbols;
};
// .dll file.
class DLLFile : public InputFile {
public:
explicit DLLFile(MemoryBufferRef m) : InputFile(DLLKind, m) {}
static bool classof(const InputFile *f) { return f->kind() == DLLKind; }
void parse() override;
MachineTypes getMachineType() override;
struct Symbol {
StringRef dllName;
StringRef symbolName;
llvm::COFF::ImportNameType nameType;
llvm::COFF::ImportType importType;
};
void makeImport(Symbol *s);
private:
std::unique_ptr<COFFObjectFile> coffObj;
llvm::StringSet<> seen;
};
inline bool isBitcode(MemoryBufferRef mb) {
return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
}

View File

@ -75,6 +75,11 @@ static void forceLazy(Symbol *s) {
case Symbol::Kind::LazyObjectKind:
cast<LazyObject>(s)->file->fetch();
break;
case Symbol::Kind::LazyDLLSymbolKind: {
auto *l = cast<LazyDLLSymbol>(s);
l->file->makeImport(l->sym);
break;
}
default:
llvm_unreachable(
"symbol passed to forceLazy is not a LazyArchive or LazyObject");
@ -540,6 +545,22 @@ void SymbolTable::addLazyObject(LazyObjFile *f, StringRef n) {
f->fetch();
}
void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
StringRef n) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(n);
if (wasInserted) {
replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
return;
}
auto *u = dyn_cast<Undefined>(s);
if (!u || u->weakAlias || s->pendingArchiveLoad)
return;
s->pendingArchiveLoad = true;
f->makeImport(sym);
}
static std::string getSourceLocationBitcode(BitcodeFile *file) {
std::string res("\n>>> defined at ");
StringRef source = file->obj->getSourceFileName();

View File

@ -87,6 +87,7 @@ public:
Symbol *addUndefined(StringRef name, InputFile *f, bool isWeakAlias);
void addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym);
void addLazyObject(LazyObjFile *f, StringRef n);
void addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym, StringRef n);
Symbol *addAbsolute(StringRef n, COFFSymbolRef s);
Symbol *addRegular(InputFile *f, StringRef n,
const llvm::object::coff_symbol_generic *s = nullptr,

View File

@ -70,6 +70,8 @@ InputFile *Symbol::getFile() {
return sym->file;
if (auto *sym = dyn_cast<LazyObject>(this))
return sym->file;
if (auto *sym = dyn_cast<LazyDLLSymbol>(this))
return sym->file;
return nullptr;
}

View File

@ -61,6 +61,7 @@ public:
UndefinedKind,
LazyArchiveKind,
LazyObjectKind,
LazyDLLSymbolKind,
LastDefinedCOFFKind = DefinedCommonKind,
LastDefinedKind = DefinedSyntheticKind,
@ -92,7 +93,8 @@ public:
bool isLive() const;
bool isLazy() const {
return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind ||
symbolKind == LazyDLLSymbolKind;
}
private:
@ -309,6 +311,18 @@ public:
LazyObjFile *file;
};
class LazyDLLSymbol : public Symbol {
public:
LazyDLLSymbol(DLLFile *f, DLLFile::Symbol *s, StringRef n)
: Symbol(LazyDLLSymbolKind, n), file(f), sym(s) {}
static bool classof(const Symbol *s) {
return s->kind() == LazyDLLSymbolKind;
}
DLLFile *file;
DLLFile::Symbol *sym;
};
// Undefined symbols.
class Undefined : public Symbol {
public:
@ -423,6 +437,7 @@ inline uint64_t Defined::getRVA() {
return cast<DefinedRegular>(this)->getRVA();
case LazyArchiveKind:
case LazyObjectKind:
case LazyDLLSymbolKind:
case UndefinedKind:
llvm_unreachable("Cannot get the address for an undefined symbol.");
}
@ -447,6 +462,7 @@ inline Chunk *Defined::getChunk() {
return cast<DefinedCommon>(this)->getChunk();
case LazyArchiveKind:
case LazyObjectKind:
case LazyDLLSymbolKind:
case UndefinedKind:
llvm_unreachable("Cannot get the chunk of an undefined symbol.");
}
@ -467,6 +483,7 @@ union SymbolUnion {
alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)];
alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)];
alignas(LazyObject) char j[sizeof(LazyObject)];
alignas(LazyDLLSymbol) char k[sizeof(LazyDLLSymbol)];
};
template <typename T, typename... ArgT>

View File

@ -1583,6 +1583,7 @@ static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms,
break;
case Symbol::LazyArchiveKind:
case Symbol::LazyObjectKind:
case Symbol::LazyDLLSymbolKind:
case Symbol::UndefinedKind:
// Undefined symbols resolve to zero, so they don't have an RVA. Lazy
// symbols shouldn't have relocations.

View File

@ -0,0 +1,64 @@
# REQUIRES: x86
## Test creating a DLL and linking against the DLL without using an import
## library.
## Test on i386 with cdecl decorated symbols.
## Linking the executable with -opt:noref, to make sure that we don't
## pull in more import entries than what's needed, even if not running GC.
# RUN: split-file %s %t.dir
# RUN: llvm-mc -filetype=obj -triple=i386-windows-gnu %t.dir/lib.s -o %t.lib.o
# RUN: lld-link -safeseh:no -noentry -dll -def:%t.dir/lib.def %t.lib.o -out:%t.lib.dll -implib:%t.implib.lib
# RUN: llvm-mc -filetype=obj -triple=i386-windows-gnu %t.dir/main.s -o %t.main.o
# RUN: lld-link -lldmingw %t.main.o -out:%t.main.exe %t.lib.dll -opt:noref -verbose 2>&1 | FileCheck --check-prefix=LOG %s
# RUN: llvm-readobj --coff-imports %t.main.exe | FileCheck %s
#--- lib.s
.text
.global _func1
_func1:
ret
.global _func2
_func2:
ret
.global _func3
_func3:
ret
.data
.global _variable
_variable:
.int 42
#--- lib.def
EXPORTS
func1
func2
func3
variable
#--- main.s
.text
.global _mainCRTStartup
_mainCRTStartup:
call _func2
movl .refptr._variable, %eax
movl (%eax), %eax
ret
.section .rdata$.refptr._variable,"dr",discard,.refptr._variable
.globl .refptr._variable
.refptr._variable:
.long _variable
# CHECK: Import {
# CHECK-NEXT: Name: link-dll-i386.s.tmp.lib.dll
# CHECK-NEXT: ImportLookupTableRVA:
# CHECK-NEXT: ImportAddressTableRVA
# CHECK-NEXT: Symbol: func2
# CHECK-NEXT: Symbol: variable
# CHECK-NEXT: }
# LOG: Automatically importing _variable from link-dll-i386.s.tmp.lib.dll

66
lld/test/COFF/link-dll.s Normal file
View File

@ -0,0 +1,66 @@
# REQUIRES: x86
## Test creating a DLL and linking against the DLL without using an import
## library.
## Explicitly creating an import library but naming it differently than the
## DLL, to avoid any risk of implicitly referencing it instead of the DLL
## itself.
## Linking the executable with -opt:noref, to make sure that we don't
## pull in more import entries than what's needed, even if not running GC.
# RUN: split-file %s %t.dir
# RUN: llvm-mc -filetype=obj -triple=x86_64-windows-gnu %t.dir/lib.s -o %t.lib.o
# RUN: lld-link -noentry -dll -def:%t.dir/lib.def %t.lib.o -out:%t.lib.dll -implib:%t.implib.lib
# RUN: llvm-mc -filetype=obj -triple=x86_64-windows-gnu %t.dir/main.s -o %t.main.o
# RUN: lld-link -lldmingw %t.main.o -out:%t.main.exe %t.lib.dll -opt:noref -verbose 2>&1 | FileCheck --check-prefix=LOG %s
# RUN: llvm-readobj --coff-imports %t.main.exe | FileCheck %s
#--- lib.s
.text
.global func1
func1:
ret
.global func2
func2:
ret
.global func3
func3:
ret
.data
.global variable
variable:
.int 42
#--- lib.def
EXPORTS
func1
func2
func3
variable
#--- main.s
.text
.global mainCRTStartup
mainCRTStartup:
call func2
movq .refptr.variable(%rip), %rax
movl (%rax), %eax
ret
.section .rdata$.refptr.variable,"dr",discard,.refptr.variable
.globl .refptr.variable
.refptr.variable:
.quad variable
# CHECK: Import {
# CHECK-NEXT: Name: link-dll.s.tmp.lib.dll
# CHECK-NEXT: ImportLookupTableRVA:
# CHECK-NEXT: ImportAddressTableRVA
# CHECK-NEXT: Symbol: func2
# CHECK-NEXT: Symbol: variable
# CHECK-NEXT: }
# LOG: Automatically importing variable from link-dll.s.tmp.lib.dll