AMDGPU: Don't enable all lanes with non-CSR VGPR spills

If the only VGPRs used for SGPR spilling were not CSRs, this was
enabling all laness and immediately restoring exec. This is the usual
situation in leaf functions.

llvm-svn: 361848
This commit is contained in:
Matt Arsenault 2019-05-28 16:46:02 +00:00
parent 7166843f1e
commit 24e80b8d04
2 changed files with 63 additions and 37 deletions

View File

@ -613,30 +613,36 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
if (LiveRegs.empty()) {
LiveRegs.init(TRI);
LiveRegs.addLiveIns(MBB);
}
// To avoid clobbering VGPRs in lanes that weren't active on function entry,
// turn on all lanes before doing the spill to memory.
unsigned ScratchExecCopy
= findScratchNonCalleeSaveRegister(MF, LiveRegs,
AMDGPU::SReg_64_XEXECRegClass);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
.addImm(-1);
unsigned ScratchExecCopy = AMDGPU::NoRegister;
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
: FuncInfo->getSGPRSpillVGPRs()) {
if (!Reg.FI.hasValue())
continue;
if (ScratchExecCopy == AMDGPU::NoRegister) {
if (LiveRegs.empty()) {
LiveRegs.init(TRI);
LiveRegs.addLiveIns(MBB);
}
ScratchExecCopy
= findScratchNonCalleeSaveRegister(MF, LiveRegs,
AMDGPU::SReg_64_XEXECRegClass);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
ScratchExecCopy)
.addImm(-1);
}
TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
&TII->getRegisterInfo());
}
if (ScratchExecCopy != AMDGPU::NoRegister) {
// FIXME: Split block and make terminator.
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
.addReg(ScratchExecCopy);
@ -654,27 +660,31 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc DL;
if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
unsigned ScratchExecCopy = AMDGPU::NoRegister;
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
: FuncInfo->getSGPRSpillVGPRs()) {
if (!Reg.FI.hasValue())
continue;
if (ScratchExecCopy == AMDGPU::NoRegister) {
// See emitPrologue
LivePhysRegs LiveRegs(*ST.getRegisterInfo());
LiveRegs.addLiveIns(MBB);
unsigned ScratchExecCopy
ScratchExecCopy
= findScratchNonCalleeSaveRegister(MF, LiveRegs,
AMDGPU::SReg_64_XEXECRegClass);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
.addImm(-1);
}
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
: FuncInfo->getSGPRSpillVGPRs()) {
if (!Reg.FI.hasValue())
continue;
TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
&TII->getRegisterInfo());
}
if (ScratchExecCopy != AMDGPU::NoRegister) {
// FIXME: Split block and make terminator.
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
.addReg(ScratchExecCopy);

View File

@ -135,5 +135,21 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
ret void
}
; Has no spilled CSR VGPRs used for SGPR spilling, so no need to
; enable all lanes and restore.
; GCN-LABEL: {{^}}spill_only_csr_sgpr:
; GCN: s_waitcnt
; GCN-NEXT: v_writelane_b32 v0, s42, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; clobber s42
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_readlane_b32 s42, v0, 0
; GCN-NEXT: s_setpc_b64
define void @spill_only_csr_sgpr() {
call void asm sideeffect "; clobber s42", "~{s42}"()
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind "no-frame-pointer-elim"="true" }