From 72dc195d7898f660c04a93b0c4d5bce1b3370c14 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 17 Mar 2017 13:05:04 +0000 Subject: [PATCH] Change our linker script expr representation. This fixes pr32031 by representing the expressions results as a SectionBase and offset. This allows us to use an input section directly instead of getting lost trying to compute an offset in an outputsection when not all the information is available yet. This also creates a struct to represent the *value* of and expression, allowing the expression itself to be a simple typedef. I think this is easier to read and will make it easier to extend the expression computation to handle more complicated cases. llvm-svn: 298079 --- lld/ELF/LinkerScript.cpp | 198 +++++++++++-------- lld/ELF/LinkerScript.h | 45 ++--- lld/test/ELF/linkerscript/obj-symbol-value.s | 19 ++ 3 files changed, 153 insertions(+), 109 deletions(-) create mode 100644 lld/test/ELF/linkerscript/obj-symbol-value.s diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 8731b378f52b..b05d55492b96 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -53,6 +53,60 @@ using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; +uint64_t ExprValue::getValue() const { + if (Sec) + return Sec->getOffset(Val) + Sec->getOutputSection()->Addr; + return Val; +} + +static ExprValue add(ExprValue A, ExprValue B) { + return {A.Sec, A.ForceAbsolute, A.Val + B.getValue()}; +} +static ExprValue sub(ExprValue A, ExprValue B) { + return {A.Sec, A.Val - B.getValue()}; +} +static ExprValue mul(ExprValue A, ExprValue B) { + return A.getValue() * B.getValue(); +} +static ExprValue div(ExprValue A, ExprValue B) { + if (uint64_t BV = B.getValue()) + return A.getValue() / BV; + error("division by zero"); + return 0; +} +static ExprValue leftShift(ExprValue A, ExprValue B) { + return A.getValue() << B.getValue(); +} +static ExprValue rightShift(ExprValue A, ExprValue B) { + return A.getValue() >> B.getValue(); +} +static ExprValue lessThan(ExprValue A, ExprValue B) { + return A.getValue() < B.getValue(); +} +static ExprValue greaterThan(ExprValue A, ExprValue B) { + return A.getValue() > B.getValue(); +} +static ExprValue greaterThanOrEqual(ExprValue A, ExprValue B) { + return A.getValue() >= B.getValue(); +} +static ExprValue lessThanOrEqual(ExprValue A, ExprValue B) { + return A.getValue() <= B.getValue(); +} +static ExprValue equal(ExprValue A, ExprValue B) { + return A.getValue() == B.getValue(); +} +static ExprValue notEqual(ExprValue A, ExprValue B) { + return A.getValue() != B.getValue(); +} +static ExprValue bitAnd(ExprValue A, ExprValue B) { + return A.getValue() & B.getValue(); +} +static ExprValue bitOr(ExprValue A, ExprValue B) { + return A.getValue() | B.getValue(); +} +static ExprValue bitNot(ExprValue A) { return ~A.getValue(); } +static ExprValue minus(ExprValue A) { return -A.getValue(); } + LinkerScriptBase *elf::ScriptBase; ScriptConfiguration *elf::ScriptConfig; @@ -63,8 +117,8 @@ template static SymbolBody *addRegular(SymbolAssignment *Cmd) { Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false, /*File*/ nullptr); Sym->Binding = STB_GLOBAL; - SectionBase *Sec = - Cmd->Expression.IsAbsolute() ? nullptr : Cmd->Expression.Section(); + ExprValue Value = Cmd->Expression(); + SectionBase *Sec = Value.isAbsolute() ? nullptr : Value.Sec; replaceBody(Sym, Cmd->Name, /*IsLocal=*/false, Visibility, STT_NOTYPE, 0, 0, Sec, nullptr); return Sym->body(); @@ -87,7 +141,8 @@ OutputSection *LinkerScriptBase::getOutputSection(const Twine &Loc, if (Sec->Name == Name) return Sec; - error(Loc + ": undefined section " + Name); + if (ErrorOnMissingSection) + error(Loc + ": undefined section " + Name); return &FakeSec; } @@ -105,7 +160,7 @@ uint64_t LinkerScriptBase::getOutputSectionSize(StringRef Name) { } void LinkerScriptBase::setDot(Expr E, const Twine &Loc, bool InSec) { - uint64_t Val = E(); + uint64_t Val = E().getValue(); if (Val < Dot) { if (InSec) error(Loc + ": unable to move location counter backward for: " + @@ -132,12 +187,15 @@ void LinkerScriptBase::assignSymbol(SymbolAssignment *Cmd, bool InSec) { return; auto *Sym = cast(Cmd->Sym); - Sym->Value = Cmd->Expression(); - if (!Cmd->Expression.IsAbsolute()) { - Sym->Section = Cmd->Expression.Section(); - if (auto *Sec = dyn_cast_or_null(Sym->Section)) - if (Sec->Flags & SHF_ALLOC) - Sym->Value -= Sec->Addr; + ExprValue V = Cmd->Expression(); + if (V.isAbsolute()) { + Sym->Value = V.getValue(); + } else { + Sym->Section = V.Sec; + if (Sym->Section->Flags & SHF_ALLOC) + Sym->Value = V.Val; + else + Sym->Value = V.getValue(); } } @@ -373,7 +431,7 @@ void LinkerScript::processCommands(OutputSectionFactory &Factory) { // is given, input sections are aligned to that value, whether the // given value is larger or smaller than the original section alignment. if (Cmd->SubalignExpr) { - uint32_t Subalign = Cmd->SubalignExpr(); + uint32_t Subalign = Cmd->SubalignExpr().getValue(); for (InputSectionBase *S : V) S->Alignment = Subalign; } @@ -557,12 +615,12 @@ void LinkerScriptBase::assignOffsets(OutputSectionCommand *Cmd) { if (Cmd->LMAExpr) { uint64_t D = Dot; - LMAOffset = [=] { return Cmd->LMAExpr() - D; }; + LMAOffset = [=] { return Cmd->LMAExpr().getValue() - D; }; } // Handle align (e.g. ".foo : ALIGN(16) { ... }"). if (Cmd->AlignExpr) - Sec->updateAlignment(Cmd->AlignExpr()); + Sec->updateAlignment(Cmd->AlignExpr().getValue()); // Try and find an appropriate memory region to assign offsets in. CurMemRegion = findMemoryRegion(Cmd, Sec); @@ -776,6 +834,7 @@ void LinkerScriptBase::processNonSectionCommands() { void LinkerScriptBase::assignAddresses(std::vector &Phdrs) { // Assign addresses as instructed by linker script SECTIONS sub-commands. Dot = 0; + ErrorOnMissingSection = true; switchTo(Aether); for (const std::unique_ptr &Base : Opt.Commands) { @@ -820,7 +879,7 @@ std::vector LinkerScriptBase::createPhdrs() { Phdr.add(Out::ProgramHeaders); if (Cmd.LMAExpr) { - Phdr.p_paddr = Cmd.LMAExpr(); + Phdr.p_paddr = Cmd.LMAExpr().getValue(); Phdr.HasLMA = true; } } @@ -888,7 +947,8 @@ void LinkerScript::writeDataBytes(StringRef Name, uint8_t *Buf) { auto *Cmd = dyn_cast(Opt.Commands[I].get()); for (const std::unique_ptr &Base : Cmd->Commands) if (auto *Data = dyn_cast(Base.get())) - writeInt(Buf + Data->Offset, Data->Expression(), Data->Size); + writeInt(Buf + Data->Offset, Data->Expression().getValue(), + Data->Size); } bool LinkerScriptBase::hasLMA(StringRef Name) { @@ -912,11 +972,15 @@ int LinkerScriptBase::getSectionIndex(StringRef Name) { } template -uint64_t LinkerScript::getSymbolValue(const Twine &Loc, StringRef S) { +ExprValue LinkerScript::getSymbolValue(const Twine &Loc, StringRef S) { if (S == ".") - return Dot; - if (SymbolBody *B = Symtab::X->find(S)) - return B->getVA(); + return {CurOutSec, Dot - CurOutSec->Addr}; + if (SymbolBody *B = Symtab::X->find(S)) { + if (auto *D = dyn_cast(B)) + return {D->Section, D->Value}; + auto *C = cast(B); + return {In::Common, C->Offset}; + } error(Loc + ": symbol not found: " + S); return 0; } @@ -925,24 +989,6 @@ template bool LinkerScript::isDefined(StringRef S) { return Symtab::X->find(S) != nullptr; } -template bool LinkerScript::isAbsolute(StringRef S) { - if (S == ".") - return false; - SymbolBody *Sym = Symtab::X->find(S); - auto *DR = dyn_cast_or_null(Sym); - return DR && !DR->Section; -} - -// Gets section symbol belongs to. Symbol "." doesn't belong to any -// specific section but isn't absolute at the same time, so we try -// to find suitable section for it as well. -template -OutputSection *LinkerScript::getSymbolSection(StringRef S) { - if (SymbolBody *Sym = Symtab::X->find(S)) - return Sym->getOutputSection(); - return CurOutSec; -} - // Returns indices of ELF headers containing specific section, identified // by Name. Each index is a zero based number of ELF header listed within // PHDRS {} script block. @@ -1256,7 +1302,7 @@ void ScriptParser::readPhdrs() { expect("("); // Passing 0 for the value of dot is a bit of a hack. It means that // we accept expressions like ".|1". - PhdrCmd.Flags = readExpr()(); + PhdrCmd.Flags = readExpr()().getValue(); expect(")"); } else setError("unexpected header attribute: " + Tok); @@ -1422,7 +1468,7 @@ Expr ScriptParser::readAssert() { StringRef Msg = unquote(next()); expect(")"); return [=] { - if (!E()) + if (!E().getValue()) error(Msg); return ScriptBase->getDot(); }; @@ -1554,7 +1600,7 @@ SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { Expr E = readExpr(); if (Op == "+=") { std::string Loc = getCurrentLocation(); - E = [=] { return ScriptBase->getSymbolValue(Loc, Name) + E(); }; + E = [=] { return add(ScriptBase->getSymbolValue(Loc, Name), E()); }; } return new SymbolAssignment(Name, E, getCurrentLocation()); } @@ -1572,48 +1618,35 @@ Expr ScriptParser::readExpr() { } static Expr combine(StringRef Op, Expr L, Expr R) { - auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); }; - auto GetOutSec = [=] { - SectionBase *S = L.Section(); - return S ? S : R.Section(); - }; - if (Op == "*") - return [=] { return L() * R(); }; + return [=] { return mul(L(), R()); }; if (Op == "/") { - return [=]() -> uint64_t { - uint64_t RHS = R(); - if (RHS == 0) { - error("division by zero"); - return 0; - } - return L() / RHS; - }; + return [=] { return div(L(), R()); }; } if (Op == "+") - return {[=] { return L() + R(); }, IsAbs, GetOutSec}; + return [=] { return add(L(), R()); }; if (Op == "-") - return {[=] { return L() - R(); }, IsAbs, GetOutSec}; + return [=] { return sub(L(), R()); }; if (Op == "<<") - return [=] { return L() << R(); }; + return [=] { return leftShift(L(), R()); }; if (Op == ">>") - return [=] { return L() >> R(); }; + return [=] { return rightShift(L(), R()); }; if (Op == "<") - return [=] { return L() < R(); }; + return [=] { return lessThan(L(), R()); }; if (Op == ">") - return [=] { return L() > R(); }; + return [=] { return greaterThan(L(), R()); }; if (Op == ">=") - return [=] { return L() >= R(); }; + return [=] { return greaterThanOrEqual(L(), R()); }; if (Op == "<=") - return [=] { return L() <= R(); }; + return [=] { return lessThanOrEqual(L(), R()); }; if (Op == "==") - return [=] { return L() == R(); }; + return [=] { return ::equal(L(), R()); }; if (Op == "!=") - return [=] { return L() != R(); }; + return [=] { return notEqual(L(), R()); }; if (Op == "&") - return [=] { return L() & R(); }; + return [=] { return bitAnd(L(), R()); }; if (Op == "|") - return [=] { return L() | R(); }; + return [=] { return bitOr(L(), R()); }; llvm_unreachable("invalid operator"); } @@ -1718,25 +1751,28 @@ Expr ScriptParser::readPrimary() { if (Tok == "~") { Expr E = readPrimary(); - return [=] { return ~E(); }; + return [=] { return bitNot(E()); }; } if (Tok == "-") { Expr E = readPrimary(); - return [=] { return -E(); }; + return [=] { return minus(E()); }; } // Built-in functions are parsed here. // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. if (Tok == "ABSOLUTE") { - Expr E = readParenExpr(); - E.IsAbsolute = [] { return true; }; - return E; + Expr Inner = readParenExpr(); + return [=] { + ExprValue I = Inner(); + I.ForceAbsolute = true; + return I; + }; } if (Tok == "ADDR") { StringRef Name = readParenLiteral(); - return {[=] { return ScriptBase->getOutputSection(Location, Name)->Addr; }, - [=] { return false; }, - [=] { return ScriptBase->getOutputSection(Location, Name); }}; + return [=]() -> ExprValue { + return {ScriptBase->getOutputSection(Location, Name), 0}; + }; } if (Tok == "LOADADDR") { StringRef Name = readParenLiteral(); @@ -1751,10 +1787,10 @@ Expr ScriptParser::readPrimary() { if (consume(",")) { Expr E2 = readExpr(); expect(")"); - return [=] { return alignTo(E(), E2()); }; + return [=] { return alignTo(E().getValue(), E2().getValue()); }; } expect(")"); - return [=] { return alignTo(ScriptBase->getDot(), E()); }; + return [=] { return alignTo(ScriptBase->getDot(), E().getValue()); }; } if (Tok == "CONSTANT") { StringRef Name = readParenLiteral(); @@ -1778,7 +1814,7 @@ Expr ScriptParser::readPrimary() { expect(","); readExpr(); expect(")"); - return [=] { return alignTo(ScriptBase->getDot(), E()); }; + return [=] { return alignTo(ScriptBase->getDot(), E().getValue()); }; } if (Tok == "DATA_SEGMENT_END") { expect("("); @@ -1817,16 +1853,14 @@ Expr ScriptParser::readPrimary() { // Tok is a symbol name. if (Tok != "." && !isValidCIdentifier(Tok)) setError("malformed number: " + Tok); - return {[=] { return ScriptBase->getSymbolValue(Location, Tok); }, - [=] { return ScriptBase->isAbsolute(Tok); }, - [=] { return ScriptBase->getSymbolSection(Tok); }}; + return [=] { return ScriptBase->getSymbolValue(Location, Tok); }; } Expr ScriptParser::readTernary(Expr Cond) { Expr L = readExpr(); expect(":"); Expr R = readExpr(); - return [=] { return Cond() ? L() : R(); }; + return [=] { return Cond().getValue() ? L() : R(); }; } Expr ScriptParser::readParenExpr() { diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 5b6b09c3a4eb..1dc2e049df49 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -38,28 +38,23 @@ class OutputSectionFactory; class InputSectionBase; class SectionBase; +struct ExprValue { + SectionBase *Sec; + uint64_t Val; + bool ForceAbsolute; + + ExprValue(SectionBase *Sec, bool ForceAbsolute, uint64_t Val) + : Sec(Sec), Val(Val), ForceAbsolute(ForceAbsolute) {} + ExprValue(SectionBase *Sec, uint64_t Val) : ExprValue(Sec, false, Val) {} + ExprValue(uint64_t Val) : ExprValue(nullptr, Val) {} + bool isAbsolute() const { return ForceAbsolute || Sec == nullptr; } + uint64_t getValue() const; +}; + // This represents an expression in the linker script. // ScriptParser::readExpr reads an expression and returns an Expr. -// Later, we evaluate the expression by calling the function -// with the value of special context variable ".". -struct Expr { - std::function Val; - std::function IsAbsolute; - - // If expression is section-relative the function below is used - // to get the output section pointer. - std::function Section; - - uint64_t operator()() const { return Val(); } - operator bool() const { return (bool)Val; } - - Expr(std::function Val, std::function IsAbsolute, - std::function Section) - : Val(Val), IsAbsolute(IsAbsolute), Section(Section) {} - template - Expr(T V) : Expr(V, [] { return true; }, [] { return nullptr; }) {} - Expr() : Expr(nullptr) {} -}; +// Later, we evaluate the expression by calling the function. +typedef std::function Expr; // Parses a linker script. Calling this function updates // Config and ScriptConfig. @@ -205,7 +200,6 @@ struct MemoryRegion { uint32_t NegFlags; }; - // ScriptConfiguration holds linker script parse results. struct ScriptConfiguration { // Used to assign addresses to sections. @@ -248,6 +242,7 @@ protected: void process(BaseCommand &Base); OutputSection *Aether; + bool ErrorOnMissingSection = false; // "ScriptConfig" is a bit too long, so define a short name for it. ScriptConfiguration &Opt = *ScriptConfig; @@ -269,10 +264,8 @@ public: uint64_t getOutputSectionSize(StringRef S); void discard(ArrayRef V); - virtual uint64_t getSymbolValue(const Twine &Loc, StringRef S) = 0; + virtual ExprValue getSymbolValue(const Twine &Loc, StringRef S) = 0; virtual bool isDefined(StringRef S) = 0; - virtual bool isAbsolute(StringRef S) = 0; - virtual OutputSection *getSymbolSection(StringRef S) = 0; std::vector *OutputSections; void addOrphanSections(OutputSectionFactory &Factory); @@ -303,10 +296,8 @@ public: void addSymbol(SymbolAssignment *Cmd); void processCommands(OutputSectionFactory &Factory); - uint64_t getSymbolValue(const Twine &Loc, StringRef S) override; + ExprValue getSymbolValue(const Twine &Loc, StringRef S) override; bool isDefined(StringRef S) override; - bool isAbsolute(StringRef S) override; - OutputSection *getSymbolSection(StringRef S) override; }; // Variable template is a C++14 feature, so we can't template diff --git a/lld/test/ELF/linkerscript/obj-symbol-value.s b/lld/test/ELF/linkerscript/obj-symbol-value.s new file mode 100644 index 000000000000..1c0b1190e815 --- /dev/null +++ b/lld/test/ELF/linkerscript/obj-symbol-value.s @@ -0,0 +1,19 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +# RUN: echo "SECTIONS { foo = bar; .bar : { *(.bar*) } }" > %t.script +# RUN: ld.lld %t.o --script %t.script -o %t.so -shared +# RUN: llvm-readobj -t %t.so | FileCheck %s + +# CHECK: Symbol { +# CHECK: Name: bar +# CHECK-NEXT: Value: 0x[[VAL:.*]] +# CHECK: Name: foo +# CHECK-NEXT: Value: 0x[[VAL]] + +.section .bar.1, "a" +.quad 0 + +.section .bar.2, "a" +.quad 0 +.global bar +bar: