[ELF] - Linkerscript: implemented BYTE/SHORT/LONG/QUAD commands.

The BYTE, SHORT, LONG, and QUAD commands store one, two, four, and eight bytes (respectively). 
After storing the bytes, the location counter is incremented by the number of bytes
stored.

Previously our scripts handles these commands incorrectly. For example:
SECTIONS  {
  .foo : {
 *(.foo.1)
 BYTE(0x11)
...
We accepted the script above treating BYTE as input section description. 
These commands are used in the wild though.

Differential revision: https://reviews.llvm.org/D24830

llvm-svn: 282429
This commit is contained in:
George Rimar 2016-09-26 19:22:50 +00:00
parent 595307a468
commit e38cbab5a4
4 changed files with 131 additions and 2 deletions

View File

@ -38,6 +38,7 @@
using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace lld;
using namespace lld::elf;
@ -94,6 +95,10 @@ bool AssertCommand::classof(const BaseCommand *C) {
return C->Kind == AssertKind;
}
bool BytesDataCommand::classof(const BaseCommand *C) {
return C->Kind == BytesDataKind;
}
template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) {
return !S || !S->Live;
}
@ -408,6 +413,7 @@ void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) {
}
template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) {
// This handles the assignments to symbol or to a location counter (.)
if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) {
if (AssignCmd->Name == ".") {
// Update to location counter means update to section size.
@ -418,6 +424,18 @@ template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) {
assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA());
return;
}
// Handle BYTE(), SHORT(), LONG(), or QUAD().
if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) {
DataCmd->Offset = Dot - CurOutSec->getVA();
Dot += DataCmd->Size;
CurOutSec->setSize(Dot - CurOutSec->getVA());
return;
}
// It handles single input section description command,
// calculates and assigns the offsets for each section and also
// updates the output section size.
auto &ICmd = cast<InputSectionDescription>(Base);
for (InputSectionData *ID : ICmd.Sections) {
auto *IB = static_cast<InputSectionBase<ELFT> *>(ID);
@ -689,6 +707,41 @@ ArrayRef<uint8_t> LinkerScript<ELFT>::getFiller(StringRef Name) {
return {};
}
template <class ELFT>
static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) {
const endianness E = ELFT::TargetEndianness;
switch (Size) {
case 1:
*Buf = (uint8_t)Data;
break;
case 2:
write16<E>(Buf, Data);
break;
case 4:
write32<E>(Buf, Data);
break;
case 8:
write64<E>(Buf, Data);
break;
default:
llvm_unreachable("unsupported Size argument");
}
}
template <class ELFT>
void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) {
int I = getSectionIndex(Name);
if (I == INT_MAX)
return;
OutputSectionCommand *Cmd =
dyn_cast<OutputSectionCommand>(Opt.Commands[I].get());
for (const std::unique_ptr<BaseCommand> &Base2 : Cmd->Commands)
if (auto *DataCmd = dyn_cast<BytesDataCommand>(Base2.get()))
writeInt<ELFT>(&Buf[DataCmd->Offset], DataCmd->Data, DataCmd->Size);
}
template <class ELFT> Expr LinkerScript<ELFT>::getLma(StringRef Name) {
for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands)
if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()))
@ -815,6 +868,7 @@ private:
void readVersionScriptCommand();
SymbolAssignment *readAssignment(StringRef Name);
BytesDataCommand *readBytesDataCommand(StringRef Tok);
std::vector<uint8_t> readFill();
OutputSectionCommand *readOutputSectionDescription(StringRef OutSec);
std::vector<uint8_t> readOutputSectionFiller(StringRef Tok);
@ -1254,6 +1308,8 @@ ScriptParser::readOutputSectionDescription(StringRef OutSec) {
StringRef Tok = next();
if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok, false))
Cmd->Commands.emplace_back(Assignment);
else if (BytesDataCommand *Data = readBytesDataCommand(Tok))
Cmd->Commands.emplace_back(Data);
else if (Tok == "FILL")
Cmd->Filler = readFill();
else if (Tok == "SORT")
@ -1451,6 +1507,25 @@ static bool readInteger(StringRef Tok, uint64_t &Result) {
return true;
}
BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) {
int Size = StringSwitch<unsigned>(Tok)
.Case("BYTE", 1)
.Case("SHORT", 2)
.Case("LONG", 4)
.Case("QUAD", 8)
.Default(-1);
if (Size == -1)
return nullptr;
expect("(");
uint64_t Val = 0;
StringRef S = next();
if (!readInteger(S, Val))
setError("unexpected value: " + S);
expect(")");
return new BytesDataCommand(Val, Size);
}
Expr ScriptParser::readPrimary() {
if (peek() == "(")
return readParenExpr();

View File

@ -44,10 +44,11 @@ void readVersionScript(MemoryBufferRef MB);
// This enum is used to implement linker script SECTIONS command.
// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
enum SectionsCommandKind {
AssignmentKind,
AssignmentKind, // . = expr or <sym> = expr
OutputSectionKind,
InputSectionKind,
AssertKind
AssertKind, // ASSERT(expr)
BytesDataKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
};
struct BaseCommand {
@ -138,6 +139,15 @@ struct AssertCommand : BaseCommand {
Expr Expression;
};
struct BytesDataCommand : BaseCommand {
BytesDataCommand(uint64_t Data, unsigned Size)
: BaseCommand(BytesDataKind), Data(Data), Size(Size) {}
static bool classof(const BaseCommand *C);
uint64_t Data;
unsigned Offset;
unsigned Size;
};
struct PhdrsCommand {
StringRef Name;
unsigned Type;
@ -194,6 +204,7 @@ public:
bool ignoreInterpSection();
ArrayRef<uint8_t> getFiller(StringRef Name);
void writeDataBytes(StringRef Name, uint8_t *Buf);
Expr getLma(StringRef Name);
bool shouldKeep(InputSectionBase<ELFT> *S);
void assignOffsets(OutputSectionCommand *Cmd);

View File

@ -1005,6 +1005,9 @@ template <class ELFT> void OutputSection<ELFT>::writeTo(uint8_t *Buf) {
for (InputSection<ELFT> *C : Sections)
C->writeTo(Buf);
}
// Linker scripts may have BYTE()-family commands with which you
// can write arbitrary bytes to the output. Process them if any.
Script<ELFT>::X->writeDataBytes(this->Name, Buf);
}
template <class ELFT>

View File

@ -0,0 +1,40 @@
# REQUIRES: x86,mips
# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
# RUN: echo "SECTIONS \
# RUN: { \
# RUN: .foo : { \
# RUN: *(.foo.1) \
# RUN: BYTE(0x11) \
# RUN: *(.foo.2) \
# RUN: SHORT(0x1122) \
# RUN: *(.foo.3) \
# RUN: LONG(0x11223344) \
# RUN: *(.foo.4) \
# RUN: QUAD(0x1122334455667788) \
# RUN: } \
# RUN: }" > %t.script
# RUN: ld.lld -o %t --script %t.script %t.o
# RUN: llvm-objdump -s %t | FileCheck %s
# CHECK: Contents of section .foo:
# CHECK-NEXT: ff11ff22 11ff4433 2211ff88 77665544
# CHECK-NEXT: 332211
# RUN: llvm-mc -filetype=obj -triple=mips64-unknown-linux %s -o %tmips64be
# RUN: ld.lld --script %t.script %tmips64be -o %t2
# RUN: llvm-objdump -s %t2 | FileCheck %s --check-prefix=BE
# BE: Contents of section .foo:
# BE-NEXT: ff11ff11 22ff1122 3344ff11 22334455
# BE-NEXT: 667788
.section .foo.1, "a"
.byte 0xFF
.section .foo.2, "a"
.byte 0xFF
.section .foo.3, "a"
.byte 0xFF
.section .foo.4, "a"
.byte 0xFF