Implement the ldr-pseudo opcode for ARM assembly

The ldr-pseudo opcode is a convenience for loading 32-bit constants.
It is converted into a pc-relative load from a constant pool. For
example,

  ldr r0, =0x10001
  ldr r1, =bar

will generate this output in the final assembly

  ldr r0, .Ltmp0
  ldr r1, .Ltmp1
  ...
  .Ltmp0: .long 0x10001
  .Ltmp1: .long bar

Sketch of the LDR pseudo implementation:
  Keep a map from Section => ConstantPool

  When parsing ldr r0, =val
    parse val as an MCExpr
    get ConstantPool for current Section
    Label = CreateTempSymbol()
    remember val in ConstantPool at next free slot
    add operand to ldr that is MCSymbolRef of Label

  On finishParse() callback
    Write out all non-empty constant pools
    for each Entry in ConstantPool
      Emit Entry.Label
      Emit Entry.Value

Possible improvements to be added in a later patch:
  1. Does not convert load of small constants to mov
     (e.g. ldr r0, =0x1 => mov r0, 0x1)
  2. Does reuse constant pool entries for same constant

The implementation was tested for ARM, Thumb1, and Thumb2 targets on
linux and darwin.

llvm-svn: 197708
This commit is contained in:
David Peixotto 2013-12-19 18:12:36 +00:00
parent e615393356
commit e407d093e8
5 changed files with 582 additions and 1 deletions

View File

@ -36,6 +36,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
@ -50,11 +51,69 @@ class ARMOperand;
enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
// A class to keep track of assembler-generated constant pools that are use to
// implement the ldr-pseudo.
class ConstantPool {
typedef SmallVector<std::pair<MCSymbol *, const MCExpr *>, 4> EntryVecTy;
EntryVecTy Entries;
public:
// Initialize a new empty constant pool
ConstantPool() { }
// Add a new entry to the constant pool in the next slot.
// \param Value is the new entry to put in the constant pool.
//
// \returns a MCExpr that references the newly inserted value
const MCExpr *addEntry(const MCExpr *Value, MCContext &Context) {
MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
Entries.push_back(std::make_pair(CPEntryLabel, Value));
return MCSymbolRefExpr::Create(CPEntryLabel, Context);
}
// Emit the contents of the constant pool using the provided streamer.
void emitEntries(MCStreamer &Streamer) const {
Streamer.EmitCodeAlignment(4); // align to 4-byte address
Streamer.EmitDataRegion(MCDR_DataRegion);
for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end();
I != E; ++I) {
Streamer.EmitLabel(I->first);
Streamer.EmitValue(I->second, 4);
}
Streamer.EmitDataRegion(MCDR_DataRegionEnd);
}
};
// Map type used to keep track of per-Section constant pools used by the
// ldr-pseudo opcode. The map associates a section to its constant pool. The
// constant pool is a vector of (label, value) pairs. When the ldr
// pseudo is parsed we insert a new (label, value) pair into the constant pool
// for the current section and add MCSymbolRefExpr to the new label as
// an opcode to the ldr. After we have parsed all the user input we
// output the (label, value) pairs in each constant pool at the end of the
// section.
typedef std::map<const MCSection *, ConstantPool> ConstantPoolMapTy;
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
const MCInstrInfo &MII;
const MCRegisterInfo *MRI;
ConstantPoolMapTy ConstantPools;
// Assembler created constant pools for ldr pseudo
ConstantPool *getConstantPool(const MCSection *Section) {
ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
if (CP == ConstantPools.end())
return 0;
return &CP->second;
}
ConstantPool &getOrCreateConstantPool(const MCSection *Section) {
return ConstantPools[Section];
}
ARMTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = getParser().getStreamer().getTargetStreamer();
@ -296,7 +355,7 @@ public:
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm);
void onLabelParsed(MCSymbol *Symbol);
void finishParse();
};
} // end anonymous namespace
@ -4656,6 +4715,24 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
return false;
}
case AsmToken::Equal: {
if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
return Error(Parser.getTok().getLoc(), "unexpected token in operand");
const MCSection *Section =
getParser().getStreamer().getCurrentSection().first;
assert(Section);
Parser.Lex(); // Eat '='
const MCExpr *SubExprVal;
if (getParser().parseExpression(SubExprVal))
return true;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *CPLoc =
getOrCreateConstantPool(Section).addEntry(SubExprVal, getContext());
Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E));
return false;
}
}
}
@ -8396,3 +8473,18 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
}
return Match_InvalidOperand;
}
void ARMAsmParser::finishParse() {
// Dump contents of assembler constant pools.
MCStreamer &Streamer = getParser().getStreamer();
for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(),
CPE = ConstantPools.end();
CPI != CPE; ++CPI) {
const MCSection *Section = CPI->first;
ConstantPool &CP = CPI->second;
// Dump assembler constant pools at the end of the section.
Streamer.SwitchSection(Section);
CP.emitEntries(Streamer);
}
}

View File

@ -0,0 +1,241 @@
@ This test has a partner (ldr-pseudo.s) that contains matching
@ tests for the ldr-pseudo on linux targets. We need separate files
@ because the syntax for switching sections and temporary labels differs
@ between darwin and linux. Any tests added here should have a matching
@ test added there.
@RUN: llvm-mc -triple armv7-apple-darwin %s | FileCheck %s
@RUN: llvm-mc -triple thumbv5-apple-darwin %s | FileCheck %s
@RUN: llvm-mc -triple thumbv7-apple-darwin %s | FileCheck %s
@
@ Check that large constants are converted to ldr from constant pool
@
@ simple test
.section __TEXT,b,regular,pure_instructions
@ CHECK-LABEL: f3:
f3:
ldr r0, =0x10001
@ CHECK: ldr r0, Ltmp0
@ loading multiple constants
.section __TEXT,c,regular,pure_instructions
@ CHECK-LABEL: f4:
f4:
ldr r0, =0x10002
@ CHECK: ldr r0, Ltmp1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
ldr r0, =0x10003
@ CHECK: ldr r0, Ltmp2
adds r0, r0, #1
adds r0, r0, #1
@ TODO: the same constants should have the same constant pool location
.section __TEXT,d,regular,pure_instructions
@ CHECK-LABEL: f5:
f5:
ldr r0, =0x10004
@ CHECK: ldr r0, Ltmp3
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
ldr r0, =0x10004
@ CHECK: ldr r0, Ltmp4
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
@ a section defined in multiple pieces should be merged and use a single constant pool
.section __TEXT,e,regular,pure_instructions
@ CHECK-LABEL: f6:
f6:
ldr r0, =0x10006
@ CHECK: ldr r0, Ltmp5
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
.section __TEXT,f,regular,pure_instructions
@ CHECK-LABEL: f7:
f7:
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
.section __TEXT,e,regular,pure_instructions
@ CHECK-LABEL: f8:
f8:
adds r0, r0, #1
ldr r0, =0x10007
@ CHECK: ldr r0, Ltmp6
adds r0, r0, #1
adds r0, r0, #1
@
@ Check that symbols can be loaded using ldr pseudo
@
@ load an undefined symbol
.section __TEXT,g,regular,pure_instructions
@ CHECK-LABEL: f9:
f9:
ldr r0, =foo
@ CHECK: ldr r0, Ltmp7
@ load a symbol from another section
.section __TEXT,h,regular,pure_instructions
@ CHECK-LABEL: f10:
f10:
ldr r0, =f5
@ CHECK: ldr r0, Ltmp8
@ load a symbol from the same section
.section __TEXT,i,regular,pure_instructions
@ CHECK-LABEL: f11:
f11:
ldr r0, =f12
@ CHECK: ldr r0, Ltmp9
@ CHECK-LABEL: f12:
f12:
adds r0, r0, #1
adds r0, r0, #1
.section __TEXT,j,regular,pure_instructions
@ mix of symbols and constants
@ CHECK-LABEL: f13:
f13:
adds r0, r0, #1
adds r0, r0, #1
ldr r0, =0x101
@ CHECK: ldr r0, Ltmp10
adds r0, r0, #1
adds r0, r0, #1
ldr r0, =bar
@ CHECK: ldr r0, Ltmp11
adds r0, r0, #1
adds r0, r0, #1
@
@ Check for correct usage in other contexts
@
@ usage in macro
.macro useit_in_a_macro
ldr r0, =0x10008
ldr r0, =baz
.endm
.section __TEXT,k,regular,pure_instructions
@ CHECK-LABEL: f14:
f14:
useit_in_a_macro
@ CHECK: ldr r0, Ltmp12
@ CHECK: ldr r0, Ltmp13
@ usage with expressions
.section __TEXT,l,regular,pure_instructions
@ CHECK-LABEL: f15:
f15:
ldr r0, =0x10001+8
@ CHECK: ldr r0, Ltmp14
adds r0, r0, #1
ldr r0, =bar+4
@ CHECK: ldr r0, Ltmp15
adds r0, r0, #1
@
@ Constant Pools
@
@ CHECK: .section __TEXT,b,regular,pure_instructions
@ CHECK: .align 2
@ CHECK: .data_region
@ CHECK-LABEL: Ltmp0:
@ CHECK: .long 65537
@ CHECK: .end_data_region
@ CHECK: .section __TEXT,c,regular,pure_instructions
@ CHECK: .align 2
@ CHECK: .data_region
@ CHECK-LABEL: Ltmp1:
@ CHECK: .long 65538
@ CHECK-LABEL: Ltmp2:
@ CHECK: .long 65539
@ CHECK: .end_data_region
@ CHECK: .section __TEXT,d,regular,pure_instructions
@ CHECK: .align 2
@ CHECK: .data_region
@ CHECK-LABEL: Ltmp3:
@ CHECK: .long 65540
@ CHECK-LABEL: Ltmp4:
@ CHECK: .long 65540
@ CHECK: .end_data_region
@ CHECK: .section __TEXT,e,regular,pure_instructions
@ CHECK: .align 2
@ CHECK: .data_region
@ CHECK-LABEL: Ltmp5:
@ CHECK: .long 65542
@ CHECK-LABEL: Ltmp6:
@ CHECK: .long 65543
@ CHECK: .end_data_region
@ Should not switch to section because it has no constant pool
@ CHECK-NOT: .section __TEXT,f,regular,pure_instructions
@ CHECK: .section __TEXT,g,regular,pure_instructions
@ CHECK: .align 2
@ CHECK: .data_region
@ CHECK-LABEL: Ltmp7:
@ CHECK: .long foo
@ CHECK: .end_data_region
@ CHECK: .section __TEXT,h,regular,pure_instructions
@ CHECK: .align 2
@ CHECK: .data_region
@ CHECK-LABEL: Ltmp8:
@ CHECK: .long f5
@ CHECK: .end_data_region
@ CHECK: .section __TEXT,i,regular,pure_instructions
@ CHECK: .align 2
@ CHECK: .data_region
@ CHECK-LABEL: Ltmp9:
@ CHECK: .long f12
@ CHECK: .end_data_region
@ CHECK: .section __TEXT,j,regular,pure_instructions
@ CHECK: .align 2
@ CHECK: .data_region
@ CHECK-LABEL: Ltmp10:
@ CHECK: .long 257
@ CHECK-LABEL: Ltmp11:
@ CHECK: .long bar
@ CHECK: .end_data_region
@ CHECK: .section __TEXT,k,regular,pure_instructions
@ CHECK: .align 2
@ CHECK: .data_region
@ CHECK-LABEL: Ltmp12:
@ CHECK: .long 65544
@ CHECK-LABEL: Ltmp13:
@ CHECK: .long baz
@ CHECK: .end_data_region
@ CHECK: .section __TEXT,l,regular,pure_instructions
@ CHECK: .align 2
@ CHECK: .data_region
@ CHECK-LABEL: Ltmp14:
@ CHECK: .long 65545
@ CHECK-LABEL: Ltmp15:
@ CHECK: .long bar+4
@ CHECK: .end_data_region

View File

@ -0,0 +1,17 @@
@RUN: not llvm-mc -triple=armv7-unknown-linux-gnueabi -filetype=obj %s -o %t1 2> %t2
@RUN: cat %t2 | FileCheck %s
@RUN: not llvm-mc -triple=armv7-darwin-apple -filetype=obj %s -o %t1_darwin 2> %t2_darwin
@RUN: cat %t2_darwin | FileCheck %s
@These tests look for errors that should be reported for invalid object layout
@with the ldr pseudo. They are tested separately from parse errors because they
@only trigger when the file has successfully parsed and the object file is about
@to be written out.
.text
foo:
ldr r0, =0x101
.space 8000
@ CHECK: error: out of range pc-relative fixup value
@ CHECK: ldr r0, =0x101
@ CHECK: ^

View File

@ -0,0 +1,10 @@
@RUN: not llvm-mc -triple=armv7-unknown-linux-gnueabi < %s 2>&1 | FileCheck %s
@RUN: not llvm-mc -triple=armv7-apple-darwin < %s 2>&1 | FileCheck %s
.text
bar:
mov r0, =0x101
@ CHECK: error: unexpected token in operand
@ CHECK: mov r0, =0x101
@ CHECK: ^

View File

@ -0,0 +1,221 @@
@ This test has a partner (ldr-pseudo-darwin.s) that contains matching
@ tests for the ldr-pseudo on darwin targets. We need separate files
@ because the syntax for switching sections and temporary labels differs
@ between darwin and linux. Any tests added here should have a matching
@ test added there.
@RUN: llvm-mc -triple armv7-unknown-linux-gnueabi %s | FileCheck %s
@RUN: llvm-mc -triple thumbv5-unknown-linux-gnueabi %s | FileCheck %s
@RUN: llvm-mc -triple thumbv7-unknown-linux-gnueabi %s | FileCheck %s
@
@ Check that large constants are converted to ldr from constant pool
@
@ simple test
.section b,"ax",%progbits
@ CHECK-LABEL: f3:
f3:
ldr r0, =0x10001
@ CHECK: ldr r0, .Ltmp0
@ loading multiple constants
.section c,"ax",%progbits
@ CHECK-LABEL: f4:
f4:
ldr r0, =0x10002
@ CHECK: ldr r0, .Ltmp1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
ldr r0, =0x10003
@ CHECK: ldr r0, .Ltmp2
adds r0, r0, #1
adds r0, r0, #1
@ TODO: the same constants should have the same constant pool location
.section d,"ax",%progbits
@ CHECK-LABEL: f5:
f5:
ldr r0, =0x10004
@ CHECK: ldr r0, .Ltmp3
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
ldr r0, =0x10004
@ CHECK: ldr r0, .Ltmp4
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
@ a section defined in multiple pieces should be merged and use a single constant pool
.section e,"ax",%progbits
@ CHECK-LABEL: f6:
f6:
ldr r0, =0x10006
@ CHECK: ldr r0, .Ltmp5
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
.section f, "ax", %progbits
@ CHECK-LABEL: f7:
f7:
adds r0, r0, #1
adds r0, r0, #1
adds r0, r0, #1
.section e, "ax", %progbits
@ CHECK-LABEL: f8:
f8:
adds r0, r0, #1
ldr r0, =0x10007
@ CHECK: ldr r0, .Ltmp6
adds r0, r0, #1
adds r0, r0, #1
@
@ Check that symbols can be loaded using ldr pseudo
@
@ load an undefined symbol
.section g,"ax",%progbits
@ CHECK-LABEL: f9:
f9:
ldr r0, =foo
@ CHECK: ldr r0, .Ltmp7
@ load a symbol from another section
.section h,"ax",%progbits
@ CHECK-LABEL: f10:
f10:
ldr r0, =f5
@ CHECK: ldr r0, .Ltmp8
@ load a symbol from the same section
.section i,"ax",%progbits
@ CHECK-LABEL: f11:
f11:
ldr r0, =f12
@ CHECK: ldr r0, .Ltmp9
@ CHECK-LABEL: f12:
f12:
adds r0, r0, #1
adds r0, r0, #1
.section j,"ax",%progbits
@ mix of symbols and constants
@ CHECK-LABEL: f13:
f13:
adds r0, r0, #1
adds r0, r0, #1
ldr r0, =0x101
@ CHECK: ldr r0, .Ltmp10
adds r0, r0, #1
adds r0, r0, #1
ldr r0, =bar
@ CHECK: ldr r0, .Ltmp11
adds r0, r0, #1
adds r0, r0, #1
@
@ Check for correct usage in other contexts
@
@ usage in macro
.macro useit_in_a_macro
ldr r0, =0x10008
ldr r0, =baz
.endm
.section k,"ax",%progbits
@ CHECK-LABEL: f14:
f14:
useit_in_a_macro
@ CHECK: ldr r0, .Ltmp12
@ CHECK: ldr r0, .Ltmp13
@ usage with expressions
.section l, "ax", %progbits
@ CHECK-LABEL: f15:
f15:
ldr r0, =0x10001+8
@ CHECK: ldr r0, .Ltmp14
adds r0, r0, #1
ldr r0, =bar+4
@ CHECK: ldr r0, .Ltmp15
adds r0, r0, #1
@
@ Constant Pools
@
@ CHECK: .section b,"ax",%progbits
@ CHECK: .align 2
@ CHECK-LABEL: .Ltmp0:
@ CHECK: .long 65537
@ CHECK: .section c,"ax",%progbits
@ CHECK: .align 2
@ CHECK-LABEL: .Ltmp1:
@ CHECK: .long 65538
@ CHECK-LABEL: .Ltmp2:
@ CHECK: .long 65539
@ CHECK: .section d,"ax",%progbits
@ CHECK: .align 2
@ CHECK-LABEL: .Ltmp3:
@ CHECK: .long 65540
@ CHECK-LABEL: .Ltmp4:
@ CHECK: .long 65540
@ CHECK: .section e,"ax",%progbits
@ CHECK: .align 2
@ CHECK-LABEL: .Ltmp5:
@ CHECK: .long 65542
@ CHECK-LABEL: .Ltmp6:
@ CHECK: .long 65543
@ Should not switch to section because it has no constant pool
@ CHECK-NOT: .section f,"ax",%progbits
@ CHECK: .section g,"ax",%progbits
@ CHECK: .align 2
@ CHECK-LABEL: .Ltmp7:
@ CHECK: .long foo
@ CHECK: .section h,"ax",%progbits
@ CHECK: .align 2
@ CHECK-LABEL: .Ltmp8:
@ CHECK: .long f5
@ CHECK: .section i,"ax",%progbits
@ CHECK: .align 2
@ CHECK-LABEL: .Ltmp9:
@ CHECK: .long f12
@ CHECK: .section j,"ax",%progbits
@ CHECK: .align 2
@ CHECK-LABEL: .Ltmp10:
@ CHECK: .long 257
@ CHECK-LABEL: .Ltmp11:
@ CHECK: .long bar
@ CHECK: .section k,"ax",%progbits
@ CHECK: .align 2
@ CHECK-LABEL: .Ltmp12:
@ CHECK: .long 65544
@ CHECK-LABEL: .Ltmp13:
@ CHECK: .long baz
@ CHECK: .section l,"ax",%progbits
@ CHECK: .align 2
@ CHECK-LABEL: .Ltmp14:
@ CHECK: .long 65545
@ CHECK-LABEL: .Ltmp15:
@ CHECK: .long bar+4