AArch64: implement large code model access to global variables.

The MOVZ/MOVK instruction sequence may not be the most efficient (a
literal-pool load could be better) but adding that would require
reinstating the ConstantIslands pass.

For now the sequence is correct, and that's enough. Beware, as of
commit GNU ld does not appear to support the relocations needed for
this. Its primary purpose (for now) will be to support JITed code,
since in that case there is no guarantee of where your code will end
up in memory relative to external symbols it references.

llvm-svn: 181117
This commit is contained in:
Tim Northover 2013-05-04 16:53:46 +00:00
parent df9e574105
commit 2dbef3452c
9 changed files with 206 additions and 14 deletions

View File

@ -70,6 +70,15 @@ public:
return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
}
/// Used for pre-lowered address-reference nodes, so we already know
/// the fields match. This operand's job is simply to add an
/// appropriate shift operand (i.e. 0) to the MOVZ/MOVK instruction.
bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
Imm = N;
Shift = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,

View File

@ -781,6 +781,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL";
case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
default: return NULL;
@ -1845,12 +1846,33 @@ AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
}
SDValue
AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
// TableGen doesn't have easy access to the CodeModel or RelocationModel, so
// we make that distinction here.
AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
SelectionDAG &DAG) const {
assert(getTargetMachine().getCodeModel() == CodeModel::Large);
assert(getTargetMachine().getRelocationModel() == Reloc::Static);
// We support the small memory model for now.
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
SDValue GlobalAddr = DAG.getNode(
AArch64ISD::WrapperLarge, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
if (GN->getOffset() != 0)
return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
DAG.getConstant(GN->getOffset(), PtrVT));
return GlobalAddr;
}
SDValue
AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
SelectionDAG &DAG) const {
assert(getTargetMachine().getCodeModel() == CodeModel::Small);
EVT PtrVT = getPointerTy();
@ -1929,6 +1951,22 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
return GlobalRef;
}
SDValue
AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
// TableGen doesn't have easy access to the CodeModel or RelocationModel, so
// we make those distinctions here.
switch (getTargetMachine().getCodeModel()) {
case CodeModel::Small:
return LowerGlobalAddressELFSmall(Op, DAG);
case CodeModel::Large:
return LowerGlobalAddressELFLarge(Op, DAG);
default:
llvm_unreachable("Only small and large code models supported now");
}
}
SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
SDValue DescAddr,
DebugLoc DL,

View File

@ -103,7 +103,12 @@ namespace AArch64ISD {
UBFX,
// Wraps an address which the ISelLowering phase has decided should be
// created using the small absolute memory model: i.e. adrp/add or
// created using the large memory model style: i.e. a sequence of four
// movz/movk instructions.
WrapperLarge,
// Wraps an address which the ISelLowering phase has decided should be
// created using the small memory model style: i.e. adrp/add or
// adrp/mem-op. This exists to prevent bare TargetAddresses which may never
// get selected.
WrapperSmall
@ -206,7 +211,11 @@ public:
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL,
SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;

View File

@ -70,12 +70,20 @@ def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
// made for a variable/address at ISelLowering.
// + The output of ISelLowering should be selectable (hence the Wrapper,
// rather than a bare target opcode)
def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>,
SDTCisVT<3, i32>,
SDTCisPtrTy<0>]>;
def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisSameAs<0, 4>,
SDTCisPtrTy<0>]>;
def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>;
def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>;
def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>,
SDTCisVT<3, i32>,
SDTCisPtrTy<0>]>;
def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>;
def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
@ -3871,7 +3879,7 @@ multiclass movw_operands<string prefix, string instname, int width> {
let DiagnosticType = "MOVWUImm16";
}
def _imm : Operand<i32> {
def _imm : Operand<i64> {
let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
let PrintMethod = "printMoveWideImmOperand";
let EncoderMethod = "getMoveWideImmOpValue";
@ -3942,7 +3950,7 @@ multiclass movalias_operand<string prefix, string basename,
# "A64Imms::" # immpredicate # ">";
}
def _movimm : Operand<i32> {
def _movimm : Operand<i64> {
let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
@ -3966,6 +3974,15 @@ def : movalias<MOVZxii, GPR64, movz64_movimm>;
def : movalias<MOVNwii, GPR32, movn32_movimm>;
def : movalias<MOVNxii, GPR64, movn64_movimm>;
def movw_addressref : ComplexPattern<i64, 2, "SelectMOVWAddressRef">;
def : Pat<(A64WrapperLarge movw_addressref:$G3, movw_addressref:$G2,
movw_addressref:$G1, movw_addressref:$G0),
(MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref:$G3),
movw_addressref:$G2),
movw_addressref:$G1),
movw_addressref:$G0)>;
//===----------------------------------------------------------------------===//
// PC-relative addressing instructions
//===----------------------------------------------------------------------===//

View File

@ -68,6 +68,18 @@ AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
case AArch64II::MO_TPREL_G0_NC:
Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
break;
case AArch64II::MO_ABS_G3:
Expr = AArch64MCExpr::CreateABS_G3(Expr, OutContext);
break;
case AArch64II::MO_ABS_G2_NC:
Expr = AArch64MCExpr::CreateABS_G2_NC(Expr, OutContext);
break;
case AArch64II::MO_ABS_G1_NC:
Expr = AArch64MCExpr::CreateABS_G1_NC(Expr, OutContext);
break;
case AArch64II::MO_ABS_G0_NC:
Expr = AArch64MCExpr::CreateABS_G0_NC(Expr, OutContext);
break;
case AArch64II::MO_NO_FLAG:
// Expr is already correct
break;

View File

@ -133,6 +133,26 @@ public:
return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
}
static const AArch64MCExpr *CreateABS_G3(const MCExpr *Expr,
MCContext &Ctx) {
return Create(VK_AARCH64_ABS_G3, Expr, Ctx);
}
static const AArch64MCExpr *CreateABS_G2_NC(const MCExpr *Expr,
MCContext &Ctx) {
return Create(VK_AARCH64_ABS_G2_NC, Expr, Ctx);
}
static const AArch64MCExpr *CreateABS_G1_NC(const MCExpr *Expr,
MCContext &Ctx) {
return Create(VK_AARCH64_ABS_G1_NC, Expr, Ctx);
}
static const AArch64MCExpr *CreateABS_G0_NC(const MCExpr *Expr,
MCContext &Ctx) {
return Create(VK_AARCH64_ABS_G0_NC, Expr, Ctx);
}
/// @}
/// @name Accessors
/// @{

View File

@ -1037,7 +1037,14 @@ namespace AArch64II {
// MO_LO12 - On a symbol operand, this represents a relocation containing
// lower 12 bits of the address. Used in add/sub/ldr/str.
MO_LO12
MO_LO12,
// MO_ABS_G* - Represent the 16-bit granules of an absolute reference using
// movz/movk instructions.
MO_ABS_G3,
MO_ABS_G2_NC,
MO_ABS_G1_NC,
MO_ABS_G0_NC
};
}

View File

@ -0,0 +1,61 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large < %s | FileCheck %s
@var8 = global i8 0
@var16 = global i16 0
@var32 = global i32 0
@var64 = global i64 0
define i8* @global_addr() {
; CHECK: global_addr:
ret i8* @var8
; The movz/movk calculation should end up returned directly in x0.
; CHECK: movz x0, #:abs_g3:var8
; CHECK: movk x0, #:abs_g2_nc:var8
; CHECK: movk x0, #:abs_g1_nc:var8
; CHECK: movk x0, #:abs_g0_nc:var8
; CHECK-NEXT: ret
}
define i8 @global_i8() {
; CHECK: global_i8:
%val = load i8* @var8
ret i8 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var8
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var8
; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var8
; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var8
; CHECK: ldrb w0, [x[[ADDR_REG]]]
}
define i16 @global_i16() {
; CHECK: global_i16:
%val = load i16* @var16
ret i16 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var16
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var16
; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var16
; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var16
; CHECK: ldrh w0, [x[[ADDR_REG]]]
}
define i32 @global_i32() {
; CHECK: global_i32:
%val = load i32* @var32
ret i32 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var32
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var32
; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var32
; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var32
; CHECK: ldr w0, [x[[ADDR_REG]]]
}
define i64 @global_i64() {
; CHECK: global_i64:
%val = load i64* @var64
ret i64 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var64
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var64
; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var64
; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var64
; CHECK: ldr x0, [x[[ADDR_REG]]]
}

View File

@ -1,4 +1,5 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -o - < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - < %s | FileCheck --check-prefix=CHECK-LARGE %s
declare extern_weak i32 @var()
@ -11,6 +12,12 @@ define i32()* @foo() {
; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:.LCPI0_0]
; In the large model, the usual relocations are absolute and can
; materialise 0.
; CHECK-LARGE: movz x0, #:abs_g3:var
; CHECK-LARGE: movk x0, #:abs_g2_nc:var
; CHECK-LARGE: movk x0, #:abs_g1_nc:var
; CHECK-LARGE: movk x0, #:abs_g0_nc:var
}
@ -24,6 +31,13 @@ define i32* @bar() {
; CHECK: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI1_0]
; CHECK: add x0, [[BASE]], #20
ret i32* %addr
; In the large model, the usual relocations are absolute and can
; materialise 0.
; CHECK-LARGE: movz x0, #:abs_g3:arr_var
; CHECK-LARGE: movk x0, #:abs_g2_nc:arr_var
; CHECK-LARGE: movk x0, #:abs_g1_nc:arr_var
; CHECK-LARGE: movk x0, #:abs_g0_nc:arr_var
}
@defined_weak_var = internal unnamed_addr global i32 0
@ -32,4 +46,9 @@ define i32* @wibble() {
ret i32* @defined_weak_var
; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
; CHECK: add x0, [[BASE]], #:lo12:defined_weak_var
; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var
; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var
; CHECK-LARGE: movk x0, #:abs_g1_nc:defined_weak_var
; CHECK-LARGE: movk x0, #:abs_g0_nc:defined_weak_var
}