[ARM] Eliminate redundant "mov rN, sp" instructions in Thumb1.

This takes sequences like "mov r4, sp; str r0, [r4]", and optimizes them
to something like "str r0, [sp]".

For regular stack variables, this optimization was already implemented:
we lower loads and stores using frame indexes, which are expanded later.
However, when constructing a call frame for a call with more than four
arguments, the existing optimization doesn't apply.  We need to use
stores which are actually relative to the current value of sp, and don't
have an associated frame index.

This patch adds a special case to handle that construct.  At the DAG
level, this is an ISD::STORE where the address is a CopyFromReg from SP
(plus a small constant offset).

This applies only to Thumb1: in Thumb2 or ARM mode, a regular store
instruction can access SP directly, so the COPY gets eliminated by
existing code.

The change to ARMDAGToDAGISel::SelectThumbAddrModeSP is a related
cleanup: we shouldn't pretend that it can select anything other than
frame indexes.

Differential Revision: https://reviews.llvm.org/D59568

llvm-svn: 356601
This commit is contained in:
Eli Friedman 2019-03-20 19:40:45 +00:00
parent 8ca6ab33b7
commit 638be660d7
6 changed files with 75 additions and 46 deletions

View File

@ -1141,23 +1141,19 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
if (!CurDAG->isBaseWithConstantOffset(N))
return false;
RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
(LHSR && LHSR->getReg() == ARM::SP)) {
if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
// If the RHS is + imm8 * scale, fold into addr mode.
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
// For LHS+RHS to result in an offset that's a multiple of 4 the object
// indexed by the LHS must be 4-byte aligned.
MachineFrameInfo &MFI = MF->getFrameInfo();
if (MFI.getObjectAlignment(FI) < 4)
MFI.setObjectAlignment(FI, 4);
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
// For LHS+RHS to result in an offset that's a multiple of 4 the object
// indexed by the LHS must be 4-byte aligned.
MachineFrameInfo &MFI = MF->getFrameInfo();
if (MFI.getObjectAlignment(FI) < 4)
MFI.setObjectAlignment(FI, 4);
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
return true;
}
@ -2601,6 +2597,44 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
case ISD::STORE: {
// For Thumb1, match an sp-relative store in C++. This is a little
// unfortunate, but I don't think I can make the chain check work
// otherwise. (The chain of the store has to be the same as the chain
// of the CopyFromReg, or else we can't replace the CopyFromReg with
// a direct reference to "SP".)
//
// This is only necessary on Thumb1 because Thumb1 sp-relative stores use
// a different addressing mode from other four-byte stores.
//
// This pattern usually comes up with call arguments.
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Ptr = ST->getBasePtr();
if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
int RHSC = 0;
if (Ptr.getOpcode() == ISD::ADD &&
isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
Ptr = Ptr.getOperand(0);
if (Ptr.getOpcode() == ISD::CopyFromReg &&
cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
Ptr.getOperand(0) == ST->getChain()) {
SDValue Ops[] = {ST->getValue(),
CurDAG->getRegister(ARM::SP, MVT::i32),
CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32),
ST->getChain()};
MachineSDNode *ResNode =
CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
MachineMemOperand *MemOp = ST->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
ReplaceNode(N, ResNode);
return;
}
}
break;
}
case ISD::WRITE_REGISTER:
if (tryWriteRegister(N))
return;

View File

@ -254,8 +254,8 @@ declare void @_ZSt9terminatev()
; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -12
; CHECK-THUMB-FP-ELIM: .cfi_offset r5, -16
; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -20
; CHECK-THUMB-FP-ELIM: sub sp, #60
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 80
; CHECK-THUMB-FP-ELIM: sub sp, #52
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 72
; CHECK-THUMB-FP-ELIM: .cfi_endproc
; CHECK-THUMB-V7-FP-LABEL: _Z4testiiiiiddddd:

View File

@ -36,11 +36,10 @@ entry:
}
; CHECK-LABEL: test_args_sp
; Load `e`
; CHECK: ldr r0, [sp, #40]
; CHECK-NEXT: mov r5, sp
; CHECK-NEXT: str r3, [r5]
; CHECK: ldr r0, [sp, #32]
; CHECK-NEXT: str r3, [sp]
; Pass `e` on stack
; CHECK-NEXT: str r0, [r5, #4]
; CHECK-NEXT: str r0, [sp, #4]
; CHECK: bl g
; int test_varargs_sp(int a, ...) {
@ -92,10 +91,9 @@ entry:
; CHECK-NEXT: mov sp, r4
; Load `e` via FP
; CHECK: ldr r0, [r7, #8]
; CHECK-NEXT: mov r5, sp
; CHECK-NEXT: str r3, [r5]
; CHECK-NEXT: str r3, [sp]
; Pass `e` as argument
; CHECK-NEXT: str r0, [r5, #4]
; CHECK-NEXT: str r0, [sp, #4]
; CHECK: bl g
; int test_varargs_realign(int a, ...) {
@ -147,9 +145,9 @@ entry:
; CHECK: sub sp, #4
; Load `e` via FP
; CHECK: ldr r5, [r7, #8]
; CHECK-NEXT: mov r0, sp
; Pass `d` and `e` as arguments
; CHECK-NEXT: stm r0!, {r3, r5}
; CHECK-NEXT: str r3, [sp]
; CHECK-NEXT: str r5, [sp, #4]
; CHECK: bl g
; int test_varargs_vla(int a, ...) {

View File

@ -45,13 +45,12 @@ entry:
; CHECK: adcs r3, r4
; CHECK: adds r4, r2, r7
; CHECK: adcs r1, r6
; CHECK: mov r2, sp
; CHECK: str r4, [r2]
; CHECK: str r1, [r2, #4]
; CHECK: ldr r6, [r0, #16]
; CHECK: ldr r7, [r0, #24]
; CHECK: adcs r7, r6
; CHECK: str r7, [r2, #8]
; CHECK: ldr r6, [r0, #20]
; CHECK: str r4, [sp]
; CHECK: str r1, [sp, #4]
; CHECK: ldr r2, [r0, #16]
; CHECK: ldr r6, [r0, #24]
; CHECK: adcs r6, r2
; CHECK: str r6, [sp, #8]
; CHECK: ldr r2, [r0, #20]
; CHECK: ldr r0, [r0, #28]
; CHECK: adcs r0, r6
; CHECK: adcs r0, r2

View File

@ -501,10 +501,9 @@ if.end: ; preds = %for.body, %if.else
; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
;
; Setup of the varags.
; CHECK: mov [[TMP_SP:r[0-9]+]], sp
; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]]]
; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #4]
; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #8]
; CHECK: str r1, [sp]
; CHECK-NEXT: str r1, [sp, #4]
; CHECK-NEXT: str r1, [sp, #8]
; CHECK: movs r0, r1
; CHECK-NEXT: movs r2, r1
; CHECK-NEXT: movs r3, r1

View File

@ -12,15 +12,14 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: mov r7, r2
; THUMBV6-NEXT: mov r4, r0
; THUMBV6-NEXT: movs r5, #0
; THUMBV6-NEXT: mov r0, sp
; THUMBV6-NEXT: str r5, [r0, #12]
; THUMBV6-NEXT: str r5, [r0, #8]
; THUMBV6-NEXT: ldr r1, [sp, #116]
; THUMBV6-NEXT: str r1, [sp, #72] @ 4-byte Spill
; THUMBV6-NEXT: str r1, [r0, #4]
; THUMBV6-NEXT: ldr r1, [sp, #112]
; THUMBV6-NEXT: str r1, [sp, #44] @ 4-byte Spill
; THUMBV6-NEXT: str r1, [r0]
; THUMBV6-NEXT: str r5, [sp, #12]
; THUMBV6-NEXT: str r5, [sp, #8]
; THUMBV6-NEXT: ldr r0, [sp, #116]
; THUMBV6-NEXT: str r0, [sp, #72] @ 4-byte Spill
; THUMBV6-NEXT: str r0, [sp, #4]
; THUMBV6-NEXT: ldr r0, [sp, #112]
; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
; THUMBV6-NEXT: str r0, [sp]
; THUMBV6-NEXT: mov r0, r2
; THUMBV6-NEXT: mov r1, r3
; THUMBV6-NEXT: mov r2, r5