[AMDGPU] Turn s_and_saveexec_b64 into s_and_b64 if result is unused
With SI_END_CF elimination for some nested control flow we can now eliminate saved exec register completely by turning a saveexec version of instruction into just a logical instruction. Differential Revision: https://reviews.llvm.org/D36007 llvm-svn: 309766
This commit is contained in:
parent
4f345060dd
commit
da0edef1bd
|
@ -87,6 +87,30 @@ static unsigned isCopyToExec(const MachineInstr &MI) {
|
|||
return AMDGPU::NoRegister;
|
||||
}
|
||||
|
||||
/// If \p MI is a logical operation on an exec value,
|
||||
/// return the register copied to.
|
||||
static unsigned isLogicalOpOnExec(const MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::S_AND_B64:
|
||||
case AMDGPU::S_OR_B64:
|
||||
case AMDGPU::S_XOR_B64:
|
||||
case AMDGPU::S_ANDN2_B64:
|
||||
case AMDGPU::S_ORN2_B64:
|
||||
case AMDGPU::S_NAND_B64:
|
||||
case AMDGPU::S_NOR_B64:
|
||||
case AMDGPU::S_XNOR_B64: {
|
||||
const MachineOperand &Src1 = MI.getOperand(1);
|
||||
if (Src1.isReg() && Src1.getReg() == AMDGPU::EXEC)
|
||||
return MI.getOperand(0).getReg();
|
||||
const MachineOperand &Src2 = MI.getOperand(2);
|
||||
if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC)
|
||||
return MI.getOperand(0).getReg();
|
||||
}
|
||||
}
|
||||
|
||||
return AMDGPU::NoRegister;
|
||||
}
|
||||
|
||||
static unsigned getSaveExecOp(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case AMDGPU::S_AND_B64:
|
||||
|
@ -209,8 +233,24 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
|
|||
// Scan backwards to find the def.
|
||||
auto CopyToExecInst = &*I;
|
||||
auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
|
||||
if (CopyFromExecInst == E)
|
||||
if (CopyFromExecInst == E) {
|
||||
auto PrepareExecInst = std::next(I);
|
||||
if (PrepareExecInst == E)
|
||||
continue;
|
||||
// Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
|
||||
if (CopyToExecInst->getOperand(1).isKill() &&
|
||||
isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
|
||||
DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
|
||||
|
||||
PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
|
||||
|
||||
DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
|
||||
|
||||
CopyToExecInst->eraseFromParent();
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isLiveOut(MBB, CopyToExec)) {
|
||||
// The copied register is live out and has a second use in another block.
|
||||
|
|
|
@ -147,6 +147,30 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
|
|||
}
|
||||
|
||||
Changed = true;
|
||||
|
||||
// If the only use of saved exec in the removed instruction is S_AND_B64
|
||||
// fold the copy now.
|
||||
auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
|
||||
if (!SaveExec || !SaveExec->isFullCopy())
|
||||
continue;
|
||||
|
||||
unsigned SavedExec = SaveExec->getOperand(0).getReg();
|
||||
bool SafeToReplace = true;
|
||||
for (auto& U : MRI.use_nodbg_instructions(SavedExec)) {
|
||||
if (U.getParent() != SaveExec->getParent()) {
|
||||
SafeToReplace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n');
|
||||
}
|
||||
|
||||
if (SafeToReplace) {
|
||||
LIS->RemoveMachineInstrFromMaps(*SaveExec);
|
||||
SaveExec->eraseFromParent();
|
||||
MRI.replaceRegWith(SavedExec, AMDGPU::EXEC);
|
||||
LIS->removeInterval(SavedExec);
|
||||
}
|
||||
}
|
||||
|
||||
if (Changed) {
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF]]
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: s_and_b64 exec, exec, vcc
|
||||
; GCN-NEXT: ; mask branch [[ENDIF]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN: store_dword
|
||||
|
|
|
@ -0,0 +1,147 @@
|
|||
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
# GCN-LABEL: name: reduce_and_saveexec
|
||||
# GCN: %exec = S_AND_B64 %exec, killed %vcc
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: reduce_and_saveexec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%exec = COPY killed %sgpr0_sgpr1
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: reduce_and_saveexec_commuted
|
||||
# GCN: %exec = S_AND_B64 killed %vcc, %exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: reduce_and_saveexec_commuted
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_AND_B64 killed %vcc, %exec, implicit-def %scc
|
||||
%exec = COPY killed %sgpr0_sgpr1
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: reduce_and_saveexec_liveout
|
||||
# GCN: %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc
|
||||
# GCN-NEXT: %exec = COPY
|
||||
name: reduce_and_saveexec_liveout
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%exec = COPY %sgpr0_sgpr1
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_saveexec
|
||||
# GCN: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: and_saveexec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = COPY %exec
|
||||
%sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
|
||||
%exec = S_MOV_B64_term %sgpr2_sgpr3
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: reduce_or_saveexec
|
||||
# GCN: %exec = S_OR_B64 %exec, killed %vcc
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: reduce_or_saveexec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%exec = COPY killed %sgpr0_sgpr1
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: reduce_xor_saveexec
|
||||
# GCN: %exec = S_XOR_B64 %exec, killed %vcc
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: reduce_xor_saveexec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_XOR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%exec = COPY killed %sgpr0_sgpr1
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: reduce_andn2_saveexec
|
||||
# GCN: %exec = S_ANDN2_B64 %exec, killed %vcc
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: reduce_andn2_saveexec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_ANDN2_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%exec = COPY killed %sgpr0_sgpr1
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: reduce_orn2_saveexec
|
||||
# GCN: %exec = S_ORN2_B64 %exec, killed %vcc
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: reduce_orn2_saveexec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_ORN2_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%exec = COPY killed %sgpr0_sgpr1
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: reduce_nand_saveexec
|
||||
# GCN: %exec = S_NAND_B64 %exec, killed %vcc
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: reduce_nand_saveexec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_NAND_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%exec = COPY killed %sgpr0_sgpr1
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: reduce_nor_saveexec
|
||||
# GCN: %exec = S_NOR_B64 %exec, killed %vcc
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: reduce_nor_saveexec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_NOR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%exec = COPY killed %sgpr0_sgpr1
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: reduce_xnor_saveexec
|
||||
# GCN: %exec = S_XNOR_B64 %exec, killed %vcc
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: reduce_xnor_saveexec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_XNOR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%exec = COPY killed %sgpr0_sgpr1
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
Loading…
Reference in New Issue