diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index ca2fcfa036f3..11d41775421a 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -707,12 +707,12 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(ScratchExecCopy); } - if (hasFP(MF)) { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - uint32_t NumBytes = MFI.getStackSize(); - uint32_t RoundedSize = FuncInfo->isStackRealigned() ? - NumBytes + MFI.getMaxAlignment() : NumBytes; + const MachineFrameInfo &MFI = MF.getFrameInfo(); + uint32_t NumBytes = MFI.getStackSize(); + uint32_t RoundedSize = FuncInfo->isStackRealigned() ? + NumBytes + MFI.getMaxAlignment() : NumBytes; + if (RoundedSize != 0 && hasFP(MF)) { const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) .addReg(StackPtrReg) @@ -863,14 +863,10 @@ bool SIFrameLowering::hasFP(const MachineFunction &MF) const { // API SP if there are calls. if (MF.getInfo()->isEntryFunction()) return true; - - // Retain behavior of always omitting the FP for leaf functions when - // possible. - if (MF.getTarget().Options.DisableFramePointerElim(MF)) - return true; } return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || MFI.hasStackMap() || MFI.hasPatchPoint() || - MF.getSubtarget().getRegisterInfo()->needsStackRealignment(MF); + MF.getSubtarget().getRegisterInfo()->needsStackRealignment(MF) || + MF.getTarget().Options.DisableFramePointerElim(MF); } diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index f1d10aca515a..69c8b53f55cc 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -9,15 +9,22 @@ define void @callee_no_stack() #0 { ret void } -; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim: +; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt +; GCN-NEXT: s_mov_b32 s5, s32 ; GCN-NEXT: s_setpc_b64 -define void @callee_no_stack_no_fp_elim() #1 { +define void @callee_no_stack_no_fp_elim_all() #1 { ret void } -; Requires frame pointer for access to local regular object. +; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_nonleaf: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt +; GCN-NEXT: s_setpc_b64 +define void @callee_no_stack_no_fp_elim_nonleaf() #2 { + ret void +} ; GCN-LABEL: {{^}}callee_with_stack: ; GCN: ; %bb.0: @@ -32,6 +39,35 @@ define void @callee_with_stack() #0 { ret void } +; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt +; GCN-NEXT: s_mov_b32 s5, s32 +; GCN-NEXT: s_add_u32 s32, s32, 0x200 +; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}} +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}} +; GCN-NEXT: s_sub_u32 s32, s32, 0x200 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @callee_with_stack_no_fp_elim_all() #1 { + %alloca = alloca i32, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca + ret void +} + +; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_non_leaf: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt +; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}} +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32{{$}} +; GCN-NEXT: s_waitcnt +; GCN-NEXT: s_setpc_b64 +define void @callee_with_stack_no_fp_elim_non_leaf() #2 { + %alloca = alloca i32, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca + ret void +} + ; GCN-LABEL: {{^}}callee_with_stack_and_call: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt @@ -151,4 +187,5 @@ define void @spill_only_csr_sgpr() { } attributes #0 = { nounwind } -attributes #1 = { nounwind "no-frame-pointer-elim"="true" } +attributes #1 = { nounwind "frame-pointer"="all" } +attributes #2 = { nounwind "frame-pointer"="non-leaf" }