[lld][WebAssembly] Implement --unresolved-symbols

This is a more full featured version of ``--allow-undefined``.
The semantics of the different methods are as follows:

report-all:

   Report all unresolved symbols.  This is the default.  Normally the
   linker will generate an error message for each reported unresolved
   symbol but the option ``--warn-unresolved-symbols`` can change this
   to a warning.

ignore-all:

   Resolve all undefined symbols to zero.  For data and function
   addresses this is trivial.  For direct function calls, the linker
   will generate a trapping stub function in place of the undefined
   function.

import-functions:

   Generate WebAssembly imports for any undefined functions.  Undefined
   data symbols are resolved to zero as in `ignore-all`.  This
   corresponds to the legacy ``--allow-undefined`` flag.

The plan is to followup with a new mode called `import-dynamic` which
allows for statically linked binaries to refer to both data and
functions symbols from the embedder.

Differential Revision: https://reviews.llvm.org/D79248
This commit is contained in:
Sam Clegg 2020-05-01 09:14:59 -07:00
parent e5d2409689
commit 206884bf90
16 changed files with 289 additions and 55 deletions

View File

@ -71,7 +71,31 @@ WebAssembly-specific options:
.. option:: --allow-undefined
Allow undefined symbols in linked binary.
Allow undefined symbols in linked binary. This is the legacy
flag which corresponds to ``--unresolved-symbols=import-functions``.
.. option:: --unresolved-symbols=<method>
This is a more full featured version of ``--allow-undefined``.
The semanatics of the different methods are as follows:
report-all:
Report all unresolved symbols. This is the default. Normally the linker
will generate an error message for each reported unresolved symbol but the
option ``--warn-unresolved-symbols`` can change this to a warning.
ignore-all:
Resolve all undefined symbols to zero. For data and function addresses
this is trivial. For direct function calls, the linker will generate a
trapping stub function in place of the undefined function.
import-functions:
Generate WebAssembly imports for any undefined functions. Undefined data
symbols are resolved to zero as in ``ignore-all``. This corresponds to
the legacy ``--allow-undefined`` flag.
.. option:: --import-memory

View File

@ -39,5 +39,5 @@ if.end:
; CHECK-NOT: Type: DATA
; CHECK-DATA: Type: DATA
; CHECK: Name: 'undefined:ret32'
; CHECK: Name: 'undefined_weak:ret32'
; CHECK-NOT: Name: ret32

View File

@ -47,8 +47,8 @@ define void @_start() {
; CHECK-NEXT: Name: name
; CHECK-NEXT: FunctionNames:
; CHECK-NEXT: - Index: 0
; DEMANGLE-NEXT: Name: 'undefined:bar(int)'
; MANGLE-NEXT: Name: 'undefined:_Z3bari'
; DEMANGLE-NEXT: Name: 'undefined_weak:bar(int)'
; MANGLE-NEXT: Name: 'undefined_weak:_Z3bari'
; CHECK-NEXT: - Index: 1
; DEMANGLE-NEXT: Name: 'foo(int)'
; MANGLE-NEXT: Name: _Z3fooi

View File

@ -17,4 +17,4 @@ entry:
ret void
}
; CHECK: Name: 'undefined:foo'
; CHECK: Name: 'undefined_weak:foo'

View File

@ -89,11 +89,11 @@ define i32 @callWeakFuncs() {
; CHECK-NEXT: Name: name
; CHECK-NEXT: FunctionNames:
; CHECK-NEXT: - Index: 0
; CHECK-NEXT: Name: 'undefined:weakFunc1'
; CHECK-NEXT: Name: 'undefined_weak:weakFunc1'
; CHECK-NEXT: - Index: 1
; CHECK-NEXT: Name: 'undefined:weakFunc2'
; CHECK-NEXT: Name: 'undefined_weak:weakFunc2'
; CHECK-NEXT: - Index: 2
; CHECK-NEXT: Name: 'undefined:weakFunc3'
; CHECK-NEXT: Name: 'undefined_weak:weakFunc3'
; CHECK-NEXT: - Index: 3
; CHECK-NEXT: Name: callWeakFuncs
; CHECK-NEXT: ...

View File

@ -0,0 +1,94 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t1.o
## Check that %t1.o contains undefined symbol undef.
# RUN: not wasm-ld %t1.o -o /dev/null 2>&1 | \
# RUN: FileCheck -check-prefix=ERRUND %s
# ERRUND: error: {{.*}}1.o: undefined symbol: undef
## report-all is the default one. Check that we get the same error
# RUN: not wasm-ld %t1.o -o /dev/null --unresolved-symbols=report-all 2>&1 | \
# RUN: FileCheck -check-prefix=ERRUND %s
## Error out if unknown option value was set.
# RUN: not wasm-ld %t1.o -o /dev/null --unresolved-symbols=xxx 2>&1 | \
# RUN: FileCheck -check-prefix=ERR1 %s
# ERR1: unknown --unresolved-symbols value: xxx
## Check alias.
# RUN: not wasm-ld %t1.o -o /dev/null --unresolved-symbols xxx 2>&1 | \
# RUN: FileCheck -check-prefix=ERR1 %s
## Ignore all should not produce error and should not produce
# any imports. It should create a stub function in the place of the missing
# function symbol.
# RUN: wasm-ld %t1.o -o %t2.wasm --unresolved-symbols=ignore-all
# RUN: obj2yaml %t2.wasm | FileCheck -check-prefix=IGNORE %s
# IGNORE-NOT: - Type: IMPORT
# IGNORE-NOT: - Type: ELEM
#
# IGNORE: - Type: CODE
# IGNORE-NEXT: Functions:
# IGNORE-NEXT: - Index: 0
# IGNORE-NEXT: Locals: []
# IGNORE-NEXT: Body: 000B
# IGNORE-NEXT: - Index: 1
# IGNORE-NEXT: Locals: []
# IGNORE-NEXT: Body: 1080808080001082808080001083808080000B
# IGNORE-NEXT: - Index: 2
# IGNORE-NEXT: Locals: []
# IGNORE-NEXT: Body: 4180808080000F0B
# IGNORE-NEXT: - Index: 3
# IGNORE-NEXT: Locals: []
# IGNORE-NEXT: Body: 4180808080000F0B
#
# IGNORE: - Type: CUSTOM
# IGNORE-NEXT: Name: name
# IGNORE-NEXT: FunctionNames:
# IGNORE-NEXT: - Index: 0
# IGNORE-NEXT: Name: undefined
# IGNORE-NEXT: - Index: 1
# IGNORE-NEXT: Name: _start
# IGNORE-NEXT: - Index: 2
# IGNORE-NEXT: Name: get_data_addr
# IGNORE-NEXT: - Index: 3
# IGNORE-NEXT: Name: get_func_addr
## import-functions should not produce errors and should resolve in
# imports for the missing functions but not the missing data symbols.
# `--allow-undefined` should behave exactly the same.
# RUN: wasm-ld %t1.o -o %t3.wasm --unresolved-symbols=import-functions
# RUN: obj2yaml %t3.wasm | FileCheck -check-prefix=IMPORT %s
# IMPORT: - Type: IMPORT
# IMPORT-NEXT: Imports:
# IMPORT-NEXT: - Module: env
# IMPORT-NEXT: Field: undef
# IMPORT-NEXT: Kind: FUNCTION
# IMPORT-NEXT: SigIndex: 0
# IMPORT-NEXT: - Type: FUNCTION
## Do not report undefines if linking relocatable.
# RUN: wasm-ld -r %t1.o -o %t4.wasm --unresolved-symbols=report-all
# RUN: llvm-readobj %t4.wasm > /dev/null 2>&1
.globl _start
_start:
.functype _start () -> ()
call undef
call get_data_addr
call get_func_addr
end_function
.globl get_data_addr
get_data_addr:
.functype get_data_addr () -> (i32)
i32.const undef_data
return
end_function
.globl get_func_addr
get_func_addr:
.functype get_func_addr () -> (i32)
i32.const undef
return
end_function
.functype undef () -> ()

View File

@ -24,6 +24,7 @@ define void @_start() #0 {
entry:
%call1 = call i32* @get_address_of_global_var()
%call2 = call i8* @get_address_of_foo()
call i32 @foo()
ret void
}
@ -41,7 +42,7 @@ entry:
; CHECK-NEXT: ParamTypes: []
; CHECK-NEXT: ReturnTypes: []
; CHECK-NEXT: - Type: FUNCTION
; CHECK-NEXT: FunctionTypes: [ 0, 0, 1 ]
; CHECK-NEXT: FunctionTypes: [ 0, 0, 0, 1 ]
; CHECK-NEXT: - Type: TABLE
; CHECK-NEXT: Tables:
; CHECK-NEXT: - Index: 0
@ -68,16 +69,19 @@ entry:
; CHECK-NEXT: Index: 0
; CHECK-NEXT: - Name: _start
; CHECK-NEXT: Kind: FUNCTION
; CHECK-NEXT: Index: 2
; CHECK-NEXT: Index: 3
; CHECK-NEXT: - Type: CODE
; CHECK-NEXT: Functions:
; CHECK-NEXT: - Index: 0
; CHECK-NEXT: Locals:
; CHECK-NEXT: Body: 4180808080000B
; CHECK-NEXT: Body: 000B
; CHECK-NEXT: - Index: 1
; CHECK-NEXT: Locals:
; CHECK-NEXT: Body: 4180808080000B
; CHECK-NEXT: - Index: 2
; CHECK-NEXT: Locals:
; CHECK-NEXT: Body: 1081808080001A1080808080001A0B
; CHECK-NEXT: Body: 4180808080000B
; CHECK-NEXT: - Index: 3
; CHECK-NEXT: Locals:
; CHECK-NEXT: Body: 1082808080001A1081808080001A1080808080001A0B
; CHECK-NEXT: ...

View File

@ -17,12 +17,17 @@
namespace lld {
namespace wasm {
// For --unresolved-symbols.
// The `ImportFuncs` mode is an additional mode that corresponds to the
// --allow-undefined flag which turns undefined functions in imports
// as opposed ed to Ignore or Warn which turn them into unreachables.
enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportFuncs };
// This struct contains the global configuration for the linker.
// Most fields are direct mapping from the command line options
// and such fields have the same name as the corresponding options.
// Most fields are initialized by the driver.
struct Configuration {
bool allowUndefined;
bool bsymbolic;
bool checkFeatures;
bool compressRelocations;
@ -57,6 +62,7 @@ struct Configuration {
unsigned ltoo;
unsigned optimize;
llvm::StringRef thinLTOJobs;
UnresolvedPolicy unresolvedSymbols;
llvm::StringRef entry;
llvm::StringRef mapFile;

View File

@ -331,9 +331,35 @@ static StringRef getEntry(opt::InputArgList &args) {
return arg->getValue();
}
// Determines what we should do if there are remaining unresolved
// symbols after the name resolution.
static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &args) {
UnresolvedPolicy errorOrWarn = args.hasFlag(OPT_error_unresolved_symbols,
OPT_warn_unresolved_symbols, true)
? UnresolvedPolicy::ReportError
: UnresolvedPolicy::Warn;
if (auto *arg = args.getLastArg(OPT_unresolved_symbols)) {
StringRef s = arg->getValue();
if (s == "ignore-all")
return UnresolvedPolicy::Ignore;
if (s == "import-functions")
return UnresolvedPolicy::ImportFuncs;
if (s == "report-all")
return errorOrWarn;
error("unknown --unresolved-symbols value: " + s);
}
// Legacy --allow-undefined flag which is equivalent to
// --unresolve-symbols=ignore-all
if (args.hasArg(OPT_allow_undefined))
return UnresolvedPolicy::ImportFuncs;
return errorOrWarn;
}
// Initializes Config members by the command line options.
static void readConfigs(opt::InputArgList &args) {
config->allowUndefined = args.hasArg(OPT_allow_undefined);
config->bsymbolic = args.hasArg(OPT_Bsymbolic);
config->checkFeatures =
args.hasFlag(OPT_check_features, OPT_no_check_features, true);
@ -376,6 +402,7 @@ static void readConfigs(opt::InputArgList &args) {
config->thinLTOCachePolicy = CHECK(
parseCachePruningPolicy(args.getLastArgValue(OPT_thinlto_cache_policy)),
"--thinlto-cache-policy: invalid cache policy");
config->unresolvedSymbols = getUnresolvedSymbolPolicy(args);
errorHandler().verbose = args.hasArg(OPT_verbose);
LLVM_DEBUG(errorHandler().verbose = true);
@ -440,7 +467,7 @@ static void setConfigs() {
if (config->shared) {
config->importMemory = true;
config->allowUndefined = true;
config->unresolvedSymbols = UnresolvedPolicy::ImportFuncs;
}
}
@ -939,9 +966,11 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
Symbol *sym = symtab->find(arg->getValue());
if (sym && sym->isDefined())
sym->forceExport = true;
else if (!config->allowUndefined)
else if (config->unresolvedSymbols == UnresolvedPolicy::ReportError)
error(Twine("symbol exported via --export not found: ") +
arg->getValue());
else if (config->unresolvedSymbols == UnresolvedPolicy::Warn)
warn(Twine("symbol exported via --export not found: ") + arg->getValue());
}
if (!config->relocatable) {

View File

@ -36,6 +36,9 @@ defm demangle: B<"demangle",
def emit_relocs: F<"emit-relocs">, HelpText<"Generate relocations in output">;
def error_unresolved_symbols: F<"error-unresolved-symbols">,
HelpText<"Report unresolved symbols as errors">;
defm export_dynamic: B<"export-dynamic",
"Put symbols in the dynamic symbol table",
"Do not put symbols in the dynamic symbol table (default)">;
@ -112,18 +115,24 @@ defm trace_symbol: Eq<"trace-symbol", "Trace references to symbols">;
defm undefined: Eq<"undefined", "Force undefined symbol during linking">;
defm unresolved_symbols:
Eq<"unresolved-symbols", "Determine how to handle unresolved symbols">;
def v: Flag<["-"], "v">, HelpText<"Display the version number">;
def verbose: F<"verbose">, HelpText<"Verbose mode">;
def version: F<"version">, HelpText<"Display the version number and exit">;
def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
HelpText<"Linker option extensions">;
def warn_unresolved_symbols: F<"warn-unresolved-symbols">,
HelpText<"Report unresolved symbols as warnings">;
defm wrap: Eq<"wrap", "Use wrapper functions for symbol">,
MetaVarName<"<symbol>=<symbol>">;
def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
HelpText<"Linker option extensions">;
// The follow flags are unique to wasm
def allow_undefined: F<"allow-undefined">,

View File

@ -10,6 +10,7 @@
#include "InputChunks.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "SyntheticSections.h"
using namespace llvm;
@ -39,15 +40,36 @@ static bool allowUndefined(const Symbol* sym) {
if (auto *g = dyn_cast<UndefinedGlobal>(sym))
if (g->importName)
return true;
return (config->allowUndefined ||
config->allowUndefinedSymbols.count(sym->getName()) != 0);
if (auto *g = dyn_cast<UndefinedGlobal>(sym))
if (g->importName)
return true;
return config->allowUndefinedSymbols.count(sym->getName()) != 0;
}
static void reportUndefined(const Symbol* sym) {
assert(sym->isUndefined());
assert(!sym->isWeak());
if (!allowUndefined(sym))
error(toString(sym->getFile()) + ": undefined symbol: " + toString(*sym));
static void reportUndefined(Symbol *sym) {
if (!allowUndefined(sym)) {
switch (config->unresolvedSymbols) {
case UnresolvedPolicy::ReportError:
error(toString(sym->getFile()) + ": undefined symbol: " + toString(*sym));
break;
case UnresolvedPolicy::Warn:
warn(toString(sym->getFile()) + ": undefined symbol: " + toString(*sym));
break;
case UnresolvedPolicy::Ignore:
if (auto *f = dyn_cast<UndefinedFunction>(sym)) {
if (!f->stubFunction) {
LLVM_DEBUG(dbgs()
<< "ignoring undefined symbol: " + toString(*sym) + "\n");
f->stubFunction = symtab->createUndefinedStub(*f->getSignature());
f->stubFunction->markLive();
f->setTableIndex(0);
}
}
break;
case UnresolvedPolicy::ImportFuncs:
break;
}
}
}
static void addGOTEntry(Symbol *sym) {
@ -131,7 +153,6 @@ void scanRelocations(InputChunk *chunk) {
if (sym->isUndefined() && !config->relocatable && !sym->isWeak())
reportUndefined(sym);
}
}
}

View File

@ -673,40 +673,58 @@ InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym,
// to be exported outside the object file.
replaceSymbol<DefinedFunction>(sym, debugName, WASM_SYMBOL_BINDING_LOCAL,
nullptr, func);
// Ensure it compares equal to the null pointer, and so that table relocs
// don't pull in the stub body (only call-operand relocs should do that).
func->setTableIndex(0);
return func;
}
void SymbolTable::replaceWithUndefined(Symbol *sym) {
// Add a synthetic dummy for weak undefined functions. These dummies will
// be GC'd if not used as the target of any "call" instructions.
StringRef debugName = saver.save("undefined_weak:" + toString(*sym));
replaceWithUnreachable(sym, *sym->getSignature(), debugName);
// Hide our dummy to prevent export.
sym->setHidden(true);
}
// For weak undefined functions, there may be "call" instructions that reference
// the symbol. In this case, we need to synthesise a dummy/stub function that
// will abort at runtime, so that relocations can still provided an operand to
// the call instruction that passes Wasm validation.
void SymbolTable::handleWeakUndefines() {
for (Symbol *sym : getSymbols()) {
if (!sym->isUndefWeak())
continue;
const WasmSignature *sig = sym->getSignature();
if (!sig) {
// It is possible for undefined functions not to have a signature (eg. if
// added via "--undefined"), but weak undefined ones do have a signature.
// Lazy symbols may not be functions and therefore Sig can still be null
// in some circumstance.
assert(!isa<FunctionSymbol>(sym));
continue;
if (sym->isUndefWeak()) {
if (sym->getSignature()) {
replaceWithUndefined(sym);
} else {
// It is possible for undefined functions not to have a signature (eg.
// if added via "--undefined"), but weak undefined ones do have a
// signature. Lazy symbols may not be functions and therefore Sig can
// still be null in some circumstance.
assert(!isa<FunctionSymbol>(sym));
}
}
// Add a synthetic dummy for weak undefined functions. These dummies will
// be GC'd if not used as the target of any "call" instructions.
StringRef debugName = saver.save("undefined:" + toString(*sym));
InputFunction* func = replaceWithUnreachable(sym, *sig, debugName);
// Ensure it compares equal to the null pointer, and so that table relocs
// don't pull in the stub body (only call-operand relocs should do that).
func->setTableIndex(0);
// Hide our dummy to prevent export.
sym->setHidden(true);
}
}
DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) {
if (stubFunctions.count(sig))
return stubFunctions[sig];
LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n");
auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>());
sym->isUsedInRegularObj = true;
sym->canInline = true;
sym->traced = false;
sym->forceExport = false;
sym->signature = &sig;
replaceSymbol<DefinedFunction>(
sym, "undefined_stub", WASM_SYMBOL_VISIBILITY_HIDDEN, nullptr, nullptr);
replaceWithUnreachable(sym, sig, "undefined_stub");
stubFunctions[sig] = sym;
return sym;
}
static void reportFunctionSignatureMismatch(StringRef symName,
FunctionSymbol *a,
FunctionSymbol *b, bool isError) {

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/BinaryFormat/WasmTraits.h"
namespace lld {
namespace wasm {
@ -88,6 +89,7 @@ public:
void handleSymbolVariants();
void handleWeakUndefines();
DefinedFunction *createUndefinedStub(const WasmSignature &sig);
std::vector<ObjFile *> objectFiles;
std::vector<SharedFile *> sharedFiles;
@ -103,6 +105,7 @@ private:
const InputFile *file, Symbol **out);
InputFunction *replaceWithUnreachable(Symbol *sym, const WasmSignature &sig,
StringRef debugName);
void replaceWithUndefined(Symbol *sym);
// Maps symbol names to index into the symVector. -1 means that symbols
// is to not yet in the vector but it should have tracing enabled if it is
@ -113,6 +116,7 @@ private:
// For certain symbols types, e.g. function symbols, we allow for multiple
// variants of the same symbol with different signatures.
llvm::DenseMap<llvm::CachedHashStringRef, std::vector<Symbol *>> symVariants;
llvm::DenseMap<WasmSignature, DefinedFunction *> stubFunctions;
// Comdat groups define "link once" sections. If two comdat groups have the
// same name, only one of them is linked, and the other is ignored. This set

View File

@ -109,6 +109,9 @@ const WasmSignature *Symbol::getSignature() const {
InputChunk *Symbol::getChunk() const {
if (auto *f = dyn_cast<DefinedFunction>(this))
return f->function;
if (auto *f = dyn_cast<UndefinedFunction>(this))
if (f->stubFunction)
return f->stubFunction->function;
if (auto *d = dyn_cast<DefinedData>(this))
return d->segment;
return nullptr;
@ -207,6 +210,11 @@ bool Symbol::isNoStrip() const {
uint32_t FunctionSymbol::getFunctionIndex() const {
if (auto *f = dyn_cast<DefinedFunction>(this))
return f->function->getFunctionIndex();
if (const auto *u = dyn_cast<UndefinedFunction>(this)) {
if (u->stubFunction) {
return u->stubFunction->getFunctionIndex();
}
}
assert(functionIndex != INVALID_INDEX);
return functionIndex;
}

View File

@ -217,6 +217,7 @@ public:
llvm::Optional<StringRef> importName;
llvm::Optional<StringRef> importModule;
DefinedFunction *stubFunction = nullptr;
bool isCalledDirectly;
};
@ -516,7 +517,7 @@ union SymbolUnion {
// It is important to keep the size of SymbolUnion small for performance and
// memory usage reasons. 96 bytes is a soft limit based on the size of
// UndefinedFunction on a 64-bit system.
static_assert(sizeof(SymbolUnion) <= 112, "SymbolUnion too large");
static_assert(sizeof(SymbolUnion) <= 120, "SymbolUnion too large");
void printTraceSymbol(Symbol *sym);
void printTraceSymbolUndefined(StringRef name, const InputFile* file);

View File

@ -539,6 +539,25 @@ void Writer::populateTargetFeatures() {
}
}
static bool shouldImport(Symbol *sym) {
// We don't generate imports for data symbols. They however can be imported
// as GOT entries.
if (isa<DataSymbol>(sym))
return false;
if (config->relocatable ||
config->unresolvedSymbols == UnresolvedPolicy::ImportFuncs)
return true;
if (config->allowUndefinedSymbols.count(sym->getName()) != 0)
return true;
if (auto *g = dyn_cast<UndefinedGlobal>(sym))
return g->importName.hasValue();
if (auto *f = dyn_cast<UndefinedFunction>(sym))
return f->importName.hasValue();
return false;
}
void Writer::calculateImports() {
for (Symbol *sym : symtab->getSymbols()) {
if (!sym->isUndefined())
@ -549,13 +568,10 @@ void Writer::calculateImports() {
continue;
if (!sym->isUsedInRegularObj)
continue;
// We don't generate imports for data symbols. They however can be imported
// as GOT entries.
if (isa<DataSymbol>(sym))
continue;
LLVM_DEBUG(dbgs() << "import: " << sym->getName() << "\n");
out.importSec->addImport(sym);
if (shouldImport(sym)) {
LLVM_DEBUG(dbgs() << "import: " << sym->getName() << "\n");
out.importSec->addImport(sym);
}
}
}