AMDGPU: Remove spurious out branches after a kill
The sequence like this: v_cmpx_le_f32_e32 vcc, 0, v0 s_branch BB0_30 s_cbranch_execnz BB0_30 ; BB#29: exp null off, off, off, off done vm s_endpgm BB0_30: ; %endif110 is likely wrong. The s_branch instruction will unconditionally jump to BB0_30 and the skip block (exp done + endpgm) inserted for performing the kill instruction will never be executed. This results in a GPU hang with Star Ruler 2. The s_branch instruction is added during the "Control Flow Optimizer" pass which seems to re-organize the basic blocks, and we assume that SI_KILL_TERMINATOR is always the last instruction inside a basic block. Thus, after inserting a skip block we just go to the next BB without looking at the subsequent instructions after the kill, and the s_branch op is never removed. Instead, we should remove the unconditional out branches and let skip the two instructions if the exec mask is non-zero. This patch fixes the GPU hang and doesn't introduce any regressions with "make check". Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99019 Patch by Samuel Pitoiset <samuel.pitoiset@gmail.com> llvm-svn: 292985
This commit is contained in:
parent
f1cf0278e8
commit
bf67cf7e4b
|
@ -263,6 +263,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
|||
BI != BE; BI = NextBB) {
|
||||
NextBB = std::next(BI);
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
bool HaveSkipBlock = false;
|
||||
|
||||
if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {
|
||||
// Reached convergence point for last divergent branch.
|
||||
|
@ -290,8 +291,14 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
|||
case AMDGPU::S_BRANCH:
|
||||
// Optimize out branches to the next block.
|
||||
// FIXME: Shouldn't this be handled by BranchFolding?
|
||||
if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB()))
|
||||
if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
|
||||
MI.eraseFromParent();
|
||||
} else if (HaveSkipBlock) {
|
||||
// Remove the given unconditional branch when a skip block has been
|
||||
// inserted after the current one and let skip the two instructions
|
||||
// performing the kill if the exec mask is non-zero.
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_KILL_TERMINATOR:
|
||||
|
@ -300,9 +307,9 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
if (ExecBranchStack.empty()) {
|
||||
if (skipIfDead(MI, *NextBB)) {
|
||||
HaveSkipBlock = true;
|
||||
NextBB = std::next(BI);
|
||||
BE = MF.end();
|
||||
Next = MBB.end();
|
||||
}
|
||||
} else {
|
||||
HaveKill = true;
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold=1 %s -o - | FileCheck %s
|
||||
# https://bugs.freedesktop.org/show_bug.cgi?id=99019
|
||||
--- |
|
||||
define amdgpu_ps void @kill_uncond_branch() {
|
||||
ret void
|
||||
}
|
||||
...
|
||||
---
|
||||
|
||||
# CHECK-LABEL: name: kill_uncond_branch
|
||||
|
||||
# CHECK: bb.0:
|
||||
# CHECK: S_CBRANCH_VCCNZ %bb.1, implicit %vcc
|
||||
|
||||
# CHECK: bb.1:
|
||||
# CHECK: V_CMPX_LE_F32_e32
|
||||
# CHECK-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit %exec
|
||||
|
||||
# CHECK: bb.3:
|
||||
# CHECK-NEXT: EXP_DONE
|
||||
# CHECK: S_ENDPGM
|
||||
|
||||
# CHECK: bb.2:
|
||||
# CHECK: S_ENDPGM
|
||||
|
||||
name: kill_uncond_branch
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit %vcc
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
%vgpr0 = V_MOV_B32_e32 0, implicit %exec
|
||||
SI_KILL_TERMINATOR %vgpr0, implicit-def %exec, implicit-def %vcc, implicit %exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM
|
Loading…
Reference in New Issue