[lld][WebAssembly] Take advantage of extended const expressions when available

In particular we use these in two places:

1. When building PIC code we no longer need to combine output segments
   into a single segment that can be initialized at `__memory_base`.
   Instead each segment can encode its offset from `__memory_base` in
   its initializer.  e.g.

```
(i32.add (global.get __memory_base) (i32.const offset)
```

2. When building PIC code we no longer need to relocation internalized
   global addresses.  We can just initialize them with their correct
   offsets.

Differential Revision: https://reviews.llvm.org/D121420
This commit is contained in:
Sam Clegg 2022-03-07 15:50:30 -08:00
parent 384e890dd3
commit 4690bf2ed3
7 changed files with 146 additions and 32 deletions

View File

@ -92,6 +92,51 @@ declare void @external_func()
; DISASSEM-NEXT: call 2
; DISASSEM-NEXT: end
; Run the same test with extended-const support. When this is available
; we don't need __wasm_apply_global_relocs and instead rely on the add
; instruction in the InitExpr. We also, therefore, do not need these globals
; to be mutable.
; RUN: llc -relocation-model=pic -mattr=+extended-const,+mutable-globals,+atomics,+bulk-memory -filetype=obj %s -o %t.extended.o
; RUN: wasm-ld --no-gc-sections --allow-undefined --experimental-pic -pie -o %t.extended.wasm %t.extended.o
; RUN: obj2yaml %t.extended.wasm | FileCheck %s --check-prefix=EXTENDED-CONST
; EXTENDED-CONST-NOT: __wasm_apply_global_relocs
; EXTENDED-CONST: - Type: GLOBAL
; EXTENDED-CONST-NEXT: Globals:
; EXTENDED-CONST-NEXT: - Index: 4
; EXTENDED-CONST-NEXT: Type: I32
; EXTENDED-CONST-NEXT: Mutable: false
; EXTENDED-CONST-NEXT: InitExpr:
; EXTENDED-CONST-NEXT: Opcode: GLOBAL_GET
; EXTENDED-CONST-NEXT: Index: 1
; EXTENDED-CONST-NEXT: - Index: 5
; EXTENDED-CONST-NEXT: Type: I32
; EXTENDED-CONST-NEXT: Mutable: false
; EXTENDED-CONST-NEXT: InitExpr:
; EXTENDED-CONST-NEXT: Extended: true
; EXTENDED-CONST-NEXT: Body: 230141046A0B
; EXTENDED-CONST-NEXT: - Index: 6
; EXTENDED-CONST-NEXT: Type: I32
; EXTENDED-CONST-NEXT: Mutable: false
; EXTENDED-CONST-NEXT: InitExpr:
; EXTENDED-CONST-NEXT: Extended: true
; This instruction sequence decodes to:
; (global.get[0x23] 0x1 i32.const[0x41] 0x0C i32.add[0x6A] end[0x0b])
; EXTENDED-CONST-NEXT: Body: 2301410C6A0B
; EXTENDED-CONST: - Type: START
; EXTENDED-CONST-NEXT: StartFunction: 2
; EXTENDED-CONST: FunctionNames:
; EXTENDED-CONST-NEXT: - Index: 0
; EXTENDED-CONST-NEXT: Name: external_func
; EXTENDED-CONST-NEXT: - Index: 1
; EXTENDED-CONST-NEXT: Name: __wasm_call_ctors
; EXTENDED-CONST-NEXT: - Index: 2
; EXTENDED-CONST-NEXT: Name: __wasm_apply_data_relocs
; Run the same test with threading support. In this mode
; we expect __wasm_init_memory and __wasm_apply_data_relocs
; to be generated along with __wasm_start as the start
@ -100,7 +145,7 @@ declare void @external_func()
; RUN: llc -relocation-model=pic -mattr=+mutable-globals,+atomics,+bulk-memory -filetype=obj %s -o %t.shmem.o
; RUN: wasm-ld --no-gc-sections --shared-memory --allow-undefined --experimental-pic -pie -o %t.shmem.wasm %t.shmem.o
; RUN: obj2yaml %t.shmem.wasm | FileCheck %s --check-prefix=SHMEM
; RUN: llvm-objdump --disassemble-symbols=__wasm_start --no-show-raw-insn --no-leading-addr %t.shmem.wasm | FileCheck %s --check-prefixes DISASSEM-SHMEM
; RUN: llvm-objdump --disassemble-symbols=__wasm_start --no-show-raw-insn --no-leading-addr %t.shmem.wasm | FileCheck %s --check-prefix DISASSEM-SHMEM
; SHMEM: - Type: START
; SHMEM-NEXT: StartFunction: 6
@ -132,4 +177,3 @@ declare void @external_func()
; SHMEM-NEXT: Name: get_data_address
; SHMEM-NEXT: - Index: 9
; SHMEM-NEXT: Name: _start

View File

@ -51,6 +51,9 @@ tls1:
# RUN: wasm-ld --experimental-pic --no-gc-sections --no-entry -pie -o %t-pie.wasm %t.o
# RUN: obj2yaml %t-pie.wasm | FileCheck %s --check-prefixes=PIE,PIC
# RUN: wasm-ld --experimental-pic --features=atomics,bulk-memory,extended-const --no-gc-sections --no-entry -pie -o %t-extended-const.wasm %t.o
# RUN: obj2yaml %t-extended-const.wasm | FileCheck %s --check-prefixes=EXT-CONST
# CHECK: - Type: GLOBAL
# __stack_pointer
# CHECK-NEXT: Globals:
@ -136,3 +139,24 @@ tls1:
# PIC-NEXT: Index: {{\d*}}
# PIC-NEXT: Content: 2B0000002A000000
# PIC-NEXT: - Type: CUSTOM
# Unless we have extended-const, in which case the merging is not needed.
# The first segment is placed directly at `__memory_base` and the second
# one is offset from `__memory_base` using `i32.add` and a constant.
# EXT-CONST: - Type: DATA
# EXT-CONST-NEXT: Segments:
# EXT-CONST-NEXT: - SectionOffset: 6
# EXT-CONST-NEXT: InitFlags: 0
# EXT-CONST-NEXT: Offset:
# EXT-CONST-NEXT: Opcode: GLOBAL_GET
# EXT-CONST-NEXT: Index: 1
# EXT-CONST-NEXT: Content: 2B000000
# EXT-CONST-NEXT: - SectionOffset: 18
# EXT-CONST-NEXT: InitFlags: 0
# EXT-CONST-NEXT: Offset:
# EXT-CONST-NEXT: Extended: true
# This instruction sequence decodes to:
# (global.get[0x23] 0x1 i32.const[0x41] 0x04 i32.add[0x6A] end[0x0b])
# EXT-CONST-NEXT: Body: 230141046A0B
# EXT-CONST-NEXT: Content: 2A000000

View File

@ -35,6 +35,7 @@ struct Configuration {
bool exportAll;
bool exportDynamic;
bool exportTable;
bool extendedConst;
bool growableTable;
bool gcSections;
bool importMemory;

View File

@ -143,12 +143,14 @@ void DataSection::finalizeContents() {
});
#endif
assert((config->sharedMemory || !config->isPic || activeCount <= 1) &&
assert((config->sharedMemory || !config->isPic || config->extendedConst ||
activeCount <= 1) &&
"output segments should have been combined by now");
writeUleb128(os, segmentCount, "data segment count");
os.flush();
bodySize = dataSectionHeader.size();
bool is64 = config->is64.getValueOr(false);
for (OutputSegment *segment : segments) {
if (!segment->requiredInBinary())
@ -158,15 +160,27 @@ void DataSection::finalizeContents() {
if (segment->initFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX)
writeUleb128(os, 0, "memory index");
if ((segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
WasmInitExpr initExpr;
initExpr.Extended = false;
if (config->isPic) {
initExpr.Inst.Opcode = WASM_OPCODE_GLOBAL_GET;
initExpr.Inst.Value.Global = WasmSym::memoryBase->getGlobalIndex();
if (config->isPic && config->extendedConst) {
writeU8(os, WASM_OPCODE_GLOBAL_GET, "global get");
writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(),
"literal (global index)");
if (segment->startVA) {
writePtrConst(os, segment->startVA, is64, "offset");
writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add");
}
writeU8(os, WASM_OPCODE_END, "opcode:end");
} else {
initExpr = intConst(segment->startVA, config->is64.getValueOr(false));
WasmInitExpr initExpr;
initExpr.Extended = false;
if (config->isPic) {
assert(segment->startVA == 0);
initExpr.Inst.Opcode = WASM_OPCODE_GLOBAL_GET;
initExpr.Inst.Value.Global = WasmSym::memoryBase->getGlobalIndex();
} else {
initExpr = intConst(segment->startVA, is64);
}
writeInitExpr(os, initExpr);
}
writeInitExpr(os, initExpr);
}
writeUleb128(os, segment->size, "segment size");
os.flush();

View File

@ -414,6 +414,7 @@ void GlobalSection::addInternalGOTEntry(Symbol *sym) {
}
void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const {
assert(!config->extendedConst);
bool is64 = config->is64.getValueOr(false);
unsigned opcode_ptr_const = is64 ? WASM_OPCODE_I64_CONST
: WASM_OPCODE_I32_CONST;
@ -469,10 +470,10 @@ void GlobalSection::writeBody() {
for (const Symbol *sym : internalGotSymbols) {
bool mutable_ = false;
if (!sym->isStub) {
// In the case of dynamic linking, these global must to be mutable since
// they get updated to the correct runtime value during
// `__wasm_apply_global_relocs`.
if (config->isPic && !sym->isTLS())
// In the case of dynamic linking, unless we have 'extended-const'
// available, these global must to be mutable since they get updated to
// the correct runtime value during `__wasm_apply_global_relocs`.
if (!config->extendedConst && config->isPic && !sym->isTLS())
mutable_ = true;
// With multi-theadeding any TLS globals must be mutable since they get
// set during `__wasm_apply_global_tls_relocs`
@ -480,17 +481,33 @@ void GlobalSection::writeBody() {
mutable_ = true;
}
WasmGlobalType type{itype, mutable_};
WasmInitExpr initExpr;
if (auto *d = dyn_cast<DefinedData>(sym))
initExpr = intConst(d->getVA(), is64);
else if (auto *f = dyn_cast<FunctionSymbol>(sym))
initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64);
else {
assert(isa<UndefinedData>(sym));
initExpr = intConst(0, is64);
}
writeGlobalType(os, type);
writeInitExpr(os, initExpr);
if (config->extendedConst && config->isPic && !sym->isTLS() &&
isa<DefinedData>(sym)) {
// We can use an extended init expression to add a constant
// offset of __memory_base.
auto *d = cast<DefinedData>(sym);
writeU8(os, WASM_OPCODE_GLOBAL_GET, "global get");
writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(),
"literal (global index)");
if (d->getVA()) {
writePtrConst(os, d->getVA(), is64, "offset");
writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add");
}
writeU8(os, WASM_OPCODE_END, "opcode:end");
} else {
WasmInitExpr initExpr;
if (auto *d = dyn_cast<DefinedData>(sym))
initExpr = intConst(d->getVA(), is64);
else if (auto *f = dyn_cast<FunctionSymbol>(sym))
initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64);
else {
assert(isa<UndefinedData>(sym));
initExpr = intConst(0, is64);
}
writeInitExpr(os, initExpr);
}
}
for (const DefinedData *sym : dataAddressGlobals) {
WasmGlobalType type{itype, false};

View File

@ -288,6 +288,8 @@ public:
// transform a `global.get` to an `i32.const`.
void addInternalGOTEntry(Symbol *sym);
bool needsRelocations() {
if (config->extendedConst)
return false;
return llvm::find_if(internalGotSymbols, [=](Symbol *sym) {
return !sym->isTLS();
}) != internalGotSymbols.end();

View File

@ -450,7 +450,7 @@ void Writer::populateTargetFeatures() {
auto &explicitFeatures = config->features.getValue();
allowed.insert(explicitFeatures.begin(), explicitFeatures.end());
if (!config->checkFeatures)
return;
goto done;
}
// Find the sets of used, required, and disallowed features
@ -486,7 +486,7 @@ void Writer::populateTargetFeatures() {
allowed.insert(std::string(key));
if (!config->checkFeatures)
return;
goto done;
if (config->sharedMemory) {
if (disallowed.count("shared-mem"))
@ -537,12 +537,19 @@ void Writer::populateTargetFeatures() {
}
}
done:
// Normally we don't include bss segments in the binary. In particular if
// memory is not being imported then we can assume its zero initialized.
// In the case the memory is imported, we and we can use the memory.fill
// instrction than we can also avoid inluding the segments.
if (config->importMemory && !allowed.count("bulk-memory"))
config->emitBssSegments = true;
if (allowed.count("extended-const"))
config->extendedConst = true;
for (auto &feature : allowed)
log("Allowed feature: " + feature);
}
void Writer::checkImportExportTargetFeatures() {
@ -921,9 +928,9 @@ void Writer::combineOutputSegments() {
// With PIC code we currently only support a single active data segment since
// we only have a single __memory_base to use as our base address. This pass
// combines all data segments into a single .data segment.
// This restructions can be relaxed once we have extended constant
// expressions available:
// https://github.com/WebAssembly/extended-const
// This restriction does not apply when the extended const extension is
// available: https://github.com/WebAssembly/extended-const
assert(!config->extendedConst);
assert(config->isPic && !config->sharedMemory);
if (segments.size() <= 1)
return;
@ -1555,7 +1562,14 @@ void Writer::run() {
}
}
if (config->isPic && !config->sharedMemory) {
log("-- populateTargetFeatures");
populateTargetFeatures();
// When outputting PIC code each segment lives at at fixes offset from the
// `__memory_base` import. Unless we support the extended const expression we
// can't do addition inside the constant expression, so we much combine the
// segments into a single one that can live at `__memory_base`.
if (config->isPic && !config->extendedConst && !config->sharedMemory) {
// In shared memory mode all data segments are passive and initialized
// via __wasm_init_memory.
log("-- combineOutputSegments");
@ -1572,8 +1586,6 @@ void Writer::run() {
scanRelocations();
log("-- finalizeIndirectFunctionTable");
finalizeIndirectFunctionTable();
log("-- populateTargetFeatures");
populateTargetFeatures();
log("-- createSyntheticInitFunctions");
createSyntheticInitFunctions();
log("-- assignIndexes");