From 638be660d732b0913869b0ede3f18e4d97e59dd1 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 20 Mar 2019 19:40:45 +0000 Subject: [PATCH] [ARM] Eliminate redundant "mov rN, sp" instructions in Thumb1. This takes sequences like "mov r4, sp; str r0, [r4]", and optimizes them to something like "str r0, [sp]". For regular stack variables, this optimization was already implemented: we lower loads and stores using frame indexes, which are expanded later. However, when constructing a call frame for a call with more than four arguments, the existing optimization doesn't apply. We need to use stores which are actually relative to the current value of sp, and don't have an associated frame index. This patch adds a special case to handle that construct. At the DAG level, this is an ISD::STORE where the address is a CopyFromReg from SP (plus a small constant offset). This applies only to Thumb1: in Thumb2 or ARM mode, a regular store instruction can access SP directly, so the COPY gets eliminated by existing code. The change to ARMDAGToDAGISel::SelectThumbAddrModeSP is a related cleanup: we shouldn't pretend that it can select anything other than frame indexes. Differential Revision: https://reviews.llvm.org/D59568 llvm-svn: 356601 --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 60 +++++++++++++++---- llvm/test/CodeGen/ARM/debug-frame.ll | 4 +- llvm/test/CodeGen/Thumb/frame-access.ll | 16 +++-- llvm/test/CodeGen/Thumb/pr35836_2.ll | 17 +++--- .../CodeGen/Thumb/thumb-shrink-wrapping.ll | 7 +-- .../Thumb/umulo-128-legalisation-lowering.ll | 17 +++--- 6 files changed, 75 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 18a123375be9..5facc8d6b111 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1141,23 +1141,19 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, if (!CurDAG->isBaseWithConstantOffset(N)) return false; - RegisterSDNode *LHSR = dyn_cast(N.getOperand(0)); - if (N.getOperand(0).getOpcode() == ISD::FrameIndex || - (LHSR && LHSR->getReg() == ARM::SP)) { + if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { // If the RHS is + imm8 * scale, fold into addr mode. int RHSC; if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast(Base)->getIndex(); - // For LHS+RHS to result in an offset that's a multiple of 4 the object - // indexed by the LHS must be 4-byte aligned. - MachineFrameInfo &MFI = MF->getFrameInfo(); - if (MFI.getObjectAlignment(FI) < 4) - MFI.setObjectAlignment(FI, 4); - Base = CurDAG->getTargetFrameIndex( - FI, TLI->getPointerTy(CurDAG->getDataLayout())); - } + int FI = cast(Base)->getIndex(); + // For LHS+RHS to result in an offset that's a multiple of 4 the object + // indexed by the LHS must be 4-byte aligned. + MachineFrameInfo &MFI = MF->getFrameInfo(); + if (MFI.getObjectAlignment(FI) < 4) + MFI.setObjectAlignment(FI, 4); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); return true; } @@ -2601,6 +2597,44 @@ void ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::STORE: { + // For Thumb1, match an sp-relative store in C++. This is a little + // unfortunate, but I don't think I can make the chain check work + // otherwise. (The chain of the store has to be the same as the chain + // of the CopyFromReg, or else we can't replace the CopyFromReg with + // a direct reference to "SP".) + // + // This is only necessary on Thumb1 because Thumb1 sp-relative stores use + // a different addressing mode from other four-byte stores. + // + // This pattern usually comes up with call arguments. + StoreSDNode *ST = cast(N); + SDValue Ptr = ST->getBasePtr(); + if (Subtarget->isThumb1Only() && ST->isUnindexed()) { + int RHSC = 0; + if (Ptr.getOpcode() == ISD::ADD && + isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) + Ptr = Ptr.getOperand(0); + + if (Ptr.getOpcode() == ISD::CopyFromReg && + cast(Ptr.getOperand(1))->getReg() == ARM::SP && + Ptr.getOperand(0) == ST->getChain()) { + SDValue Ops[] = {ST->getValue(), + CurDAG->getRegister(ARM::SP, MVT::i32), + CurDAG->getTargetConstant(RHSC, dl, MVT::i32), + getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32), + ST->getChain()}; + MachineSDNode *ResNode = + CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); + MachineMemOperand *MemOp = ST->getMemOperand(); + CurDAG->setNodeMemRefs(cast(ResNode), {MemOp}); + ReplaceNode(N, ResNode); + return; + } + } + break; + } case ISD::WRITE_REGISTER: if (tryWriteRegister(N)) return; diff --git a/llvm/test/CodeGen/ARM/debug-frame.ll b/llvm/test/CodeGen/ARM/debug-frame.ll index 6efe58afb38a..b561be465d25 100644 --- a/llvm/test/CodeGen/ARM/debug-frame.ll +++ b/llvm/test/CodeGen/ARM/debug-frame.ll @@ -254,8 +254,8 @@ declare void @_ZSt9terminatev() ; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -12 ; CHECK-THUMB-FP-ELIM: .cfi_offset r5, -16 ; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -20 -; CHECK-THUMB-FP-ELIM: sub sp, #60 -; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 80 +; CHECK-THUMB-FP-ELIM: sub sp, #52 +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 72 ; CHECK-THUMB-FP-ELIM: .cfi_endproc ; CHECK-THUMB-V7-FP-LABEL: _Z4testiiiiiddddd: diff --git a/llvm/test/CodeGen/Thumb/frame-access.ll b/llvm/test/CodeGen/Thumb/frame-access.ll index a9d2999c050b..b472d235d39d 100644 --- a/llvm/test/CodeGen/Thumb/frame-access.ll +++ b/llvm/test/CodeGen/Thumb/frame-access.ll @@ -36,11 +36,10 @@ entry: } ; CHECK-LABEL: test_args_sp ; Load `e` -; CHECK: ldr r0, [sp, #40] -; CHECK-NEXT: mov r5, sp -; CHECK-NEXT: str r3, [r5] +; CHECK: ldr r0, [sp, #32] +; CHECK-NEXT: str r3, [sp] ; Pass `e` on stack -; CHECK-NEXT: str r0, [r5, #4] +; CHECK-NEXT: str r0, [sp, #4] ; CHECK: bl g ; int test_varargs_sp(int a, ...) { @@ -92,10 +91,9 @@ entry: ; CHECK-NEXT: mov sp, r4 ; Load `e` via FP ; CHECK: ldr r0, [r7, #8] -; CHECK-NEXT: mov r5, sp -; CHECK-NEXT: str r3, [r5] +; CHECK-NEXT: str r3, [sp] ; Pass `e` as argument -; CHECK-NEXT: str r0, [r5, #4] +; CHECK-NEXT: str r0, [sp, #4] ; CHECK: bl g ; int test_varargs_realign(int a, ...) { @@ -147,9 +145,9 @@ entry: ; CHECK: sub sp, #4 ; Load `e` via FP ; CHECK: ldr r5, [r7, #8] -; CHECK-NEXT: mov r0, sp ; Pass `d` and `e` as arguments -; CHECK-NEXT: stm r0!, {r3, r5} +; CHECK-NEXT: str r3, [sp] +; CHECK-NEXT: str r5, [sp, #4] ; CHECK: bl g ; int test_varargs_vla(int a, ...) { diff --git a/llvm/test/CodeGen/Thumb/pr35836_2.ll b/llvm/test/CodeGen/Thumb/pr35836_2.ll index af115e8ce21a..26e27293dc78 100644 --- a/llvm/test/CodeGen/Thumb/pr35836_2.ll +++ b/llvm/test/CodeGen/Thumb/pr35836_2.ll @@ -45,13 +45,12 @@ entry: ; CHECK: adcs r3, r4 ; CHECK: adds r4, r2, r7 ; CHECK: adcs r1, r6 -; CHECK: mov r2, sp -; CHECK: str r4, [r2] -; CHECK: str r1, [r2, #4] -; CHECK: ldr r6, [r0, #16] -; CHECK: ldr r7, [r0, #24] -; CHECK: adcs r7, r6 -; CHECK: str r7, [r2, #8] -; CHECK: ldr r6, [r0, #20] +; CHECK: str r4, [sp] +; CHECK: str r1, [sp, #4] +; CHECK: ldr r2, [r0, #16] +; CHECK: ldr r6, [r0, #24] +; CHECK: adcs r6, r2 +; CHECK: str r6, [sp, #8] +; CHECK: ldr r2, [r0, #20] ; CHECK: ldr r0, [r0, #28] -; CHECK: adcs r0, r6 +; CHECK: adcs r0, r2 diff --git a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll index d334706c8e84..f22f1288d834 100644 --- a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -501,10 +501,9 @@ if.end: ; preds = %for.body, %if.else ; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] ; ; Setup of the varags. -; CHECK: mov [[TMP_SP:r[0-9]+]], sp -; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]]] -; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #4] -; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #8] +; CHECK: str r1, [sp] +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: str r1, [sp, #8] ; CHECK: movs r0, r1 ; CHECK-NEXT: movs r2, r1 ; CHECK-NEXT: movs r3, r1 diff --git a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll index 35cee36f3838..73c61bb199dc 100644 --- a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll @@ -12,15 +12,14 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV6-NEXT: mov r7, r2 ; THUMBV6-NEXT: mov r4, r0 ; THUMBV6-NEXT: movs r5, #0 -; THUMBV6-NEXT: mov r0, sp -; THUMBV6-NEXT: str r5, [r0, #12] -; THUMBV6-NEXT: str r5, [r0, #8] -; THUMBV6-NEXT: ldr r1, [sp, #116] -; THUMBV6-NEXT: str r1, [sp, #72] @ 4-byte Spill -; THUMBV6-NEXT: str r1, [r0, #4] -; THUMBV6-NEXT: ldr r1, [sp, #112] -; THUMBV6-NEXT: str r1, [sp, #44] @ 4-byte Spill -; THUMBV6-NEXT: str r1, [r0] +; THUMBV6-NEXT: str r5, [sp, #12] +; THUMBV6-NEXT: str r5, [sp, #8] +; THUMBV6-NEXT: ldr r0, [sp, #116] +; THUMBV6-NEXT: str r0, [sp, #72] @ 4-byte Spill +; THUMBV6-NEXT: str r0, [sp, #4] +; THUMBV6-NEXT: ldr r0, [sp, #112] +; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill +; THUMBV6-NEXT: str r0, [sp] ; THUMBV6-NEXT: mov r0, r2 ; THUMBV6-NEXT: mov r1, r3 ; THUMBV6-NEXT: mov r2, r5