[AMDGPU] Eliminate no effect instructions before s_endpgm
Differential Revision: https://reviews.llvm.org/D36585 llvm-svn: 310987
This commit is contained in:
parent
0c6374e513
commit
a9487d92d7
|
@ -111,9 +111,62 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
|
|||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
|
||||
DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
|
||||
bool Changed = false;
|
||||
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
|
||||
// Try to remove unneeded instructions before s_endpgm.
|
||||
if (MBB.succ_empty()) {
|
||||
if (MBB.empty() || MBB.back().getOpcode() != AMDGPU::S_ENDPGM)
|
||||
continue;
|
||||
|
||||
SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
|
||||
|
||||
while (!Blocks.empty()) {
|
||||
auto CurBB = Blocks.pop_back_val();
|
||||
auto I = CurBB->rbegin(), E = CurBB->rend();
|
||||
if (I != E) {
|
||||
if (I->isUnconditionalBranch() || I->getOpcode() == AMDGPU::S_ENDPGM)
|
||||
++I;
|
||||
else if (I->isBranch())
|
||||
continue;
|
||||
}
|
||||
|
||||
while (I != E) {
|
||||
if (I->isDebugValue())
|
||||
continue;
|
||||
if (I->mayStore() || I->isBarrier() || I->isCall() ||
|
||||
I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef())
|
||||
break;
|
||||
|
||||
DEBUG(dbgs() << "Removing no effect instruction: " << *I << '\n');
|
||||
|
||||
for (auto &Op : I->operands())
|
||||
if (Op.isReg())
|
||||
RecalcRegs.insert(Op.getReg());
|
||||
|
||||
auto Next = std::next(I);
|
||||
LIS->RemoveMachineInstrFromMaps(*I);
|
||||
I->eraseFromParent();
|
||||
I = Next;
|
||||
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
if (I != E)
|
||||
continue;
|
||||
|
||||
// Try to ascend predecessors.
|
||||
for (auto *Pred : CurBB->predecessors()) {
|
||||
if (Pred->succ_size() == 1)
|
||||
Blocks.push_back(Pred);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to collapse adjacent endifs.
|
||||
auto Lead = MBB.begin(), E = MBB.end();
|
||||
if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
|
||||
continue;
|
||||
|
@ -174,9 +227,16 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
|
|||
}
|
||||
|
||||
if (Changed) {
|
||||
// Recompute liveness for both reg units of exec.
|
||||
LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC_LO, TRI));
|
||||
LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC_HI, TRI));
|
||||
for (auto Reg : RecalcRegs) {
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
|
||||
LIS->removeInterval(Reg);
|
||||
if (!MRI.reg_empty(Reg))
|
||||
LIS->createAndComputeVirtRegInterval(Reg);
|
||||
} else {
|
||||
for (MCRegUnitIterator U(Reg, TRI); U.isValid(); ++U)
|
||||
LIS->removeRegUnit(*U);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
; GCN: ds_write_b32
|
||||
|
||||
; GCN: [[BB5]]
|
||||
; GCN: s_or_b64 exec, exec
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GCN-NEXT: .Lfunc_end
|
||||
define amdgpu_ps void @ham(float %arg, float %arg1) #0 {
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
|
@ -45,7 +44,6 @@ bb.outer.end: ; preds = %bb.outer.then, %bb.
|
|||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
|
@ -90,7 +88,6 @@ bb.outer.end: ; preds = %bb.inner.then, %bb
|
|||
; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
|
@ -141,13 +138,10 @@ bb.outer.end: ; preds = %bb, %bb.then, %b
|
|||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF_INNER_OUTER_THEN:BB[0-9_]+]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF_INNER_OUTER_THEN]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_THEN]]
|
||||
; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
|
@ -183,6 +177,33 @@ bb.outer.end:
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
|
||||
; GCN: s_barrier
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = icmp ugt i32 %tmp, 1
|
||||
br i1 %tmp1, label %bb.then, label %bb.end
|
||||
|
||||
bb.then: ; preds = %bb
|
||||
%tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
|
||||
store i32 0, i32 addrspace(1)* %tmp4, align 4
|
||||
br label %bb.end
|
||||
|
||||
bb.end: ; preds = %bb.then, %bb
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable }
|
||||
attributes #1 = { nounwind convergent }
|
||||
|
|
|
@ -0,0 +1,297 @@
|
|||
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# GCN-LABEL: name: kill_all
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: kill_all
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: sgpr_32 }
|
||||
- { id: 4, class: sgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%0 = IMPLICIT_DEF
|
||||
%3 = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
|
||||
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit %exec
|
||||
%4 = S_ADD_U32 %3, 1, implicit-def %scc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: load_without_memoperand
|
||||
# GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
# GCN-NEXT: dead %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: load_without_memoperand
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: sgpr_32 }
|
||||
- { id: 4, class: sgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%0 = IMPLICIT_DEF
|
||||
%3 = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit %exec
|
||||
%4 = S_ADD_U32 %3, 1, implicit-def %scc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: load_volatile
|
||||
# GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
# GCN-NEXT: dead %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile load 4)
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: load_volatile
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: sgpr_32 }
|
||||
- { id: 4, class: sgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%0 = IMPLICIT_DEF
|
||||
%3 = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile load 4)
|
||||
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit %exec
|
||||
%4 = S_ADD_U32 %3, 1, implicit-def %scc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: store
|
||||
# GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: store
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: barrier
|
||||
# GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
# GCN-NEXT: S_BARRIER
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: barrier
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
S_BARRIER
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: call
|
||||
# GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
# GCN-NEXT: %sgpr4_sgpr5 = S_SWAPPC_B64 %sgpr2_sgpr3
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: call
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
%sgpr4_sgpr5 = S_SWAPPC_B64 %sgpr2_sgpr3
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: exp
|
||||
# GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
# GCN-NEXT: EXP 32, undef %0, undef %1, %2, undef %3, 0, 0, 15, implicit %exec
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: exp
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%2 = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
EXP 32, undef %0, undef %1, killed %2, undef %3, 0, 0, 15, implicit %exec
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: return_to_epilog
|
||||
# GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
# GCN-NEXT: SI_RETURN_TO_EPILOG killed %vgpr0
|
||||
name: return_to_epilog
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%vgpr0 = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
SI_RETURN_TO_EPILOG killed %vgpr0
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: split_block
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: successors: %bb.1
|
||||
# GCN-NOT: S_OR_B64
|
||||
# GCN: bb.1:
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: split_block
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: sgpr_32 }
|
||||
- { id: 3, class: sgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
|
||||
bb.1:
|
||||
%0 = IMPLICIT_DEF
|
||||
%2 = IMPLICIT_DEF
|
||||
%1 = V_ADD_F32_e64 0, killed %0, 0, 1, 0, 0, implicit %exec
|
||||
%3 = S_ADD_U32 %2, 1, implicit-def %scc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: split_block_empty_block
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: successors: %bb.1
|
||||
# GCN-NOT: S_OR_B64
|
||||
# GCN: bb.1:
|
||||
# GCN: bb.2:
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: split_block_empty_block
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
|
||||
bb.1:
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: split_block_uncond_branch
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: successors: %bb.1
|
||||
# GCN: S_BRANCH %bb.1
|
||||
# GCN-NOT: S_OR_B64
|
||||
# GCN: bb.1:
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: split_block_uncond_branch
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: split_block_cond_branch
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
# GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, %vcc, implicit-def %scc
|
||||
# GCN: S_CBRANCH_VCCNZ %bb.2, implicit undef %vcc
|
||||
# GCN: bb.1:
|
||||
# GCN: bb.2:
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: split_block_cond_branch
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, %vcc, implicit-def %scc
|
||||
S_CBRANCH_VCCNZ %bb.2, implicit undef %vcc
|
||||
|
||||
bb.1:
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: two_preds_both_dead
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: successors: %bb.2
|
||||
# GCN-NOT: S_OR
|
||||
# GCN: S_BRANCH %bb.2
|
||||
# GCN: bb.1:
|
||||
# GCN-NEXT: successors: %bb.2
|
||||
# GCN-NOT: S_AND
|
||||
# GCN: S_BRANCH %bb.2
|
||||
# GCN: bb.2:
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: two_preds_both_dead
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: two_preds_one_dead
|
||||
# GCN: bb.0:
|
||||
# GCN-NEXT: successors: %bb.2
|
||||
# GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
# GCN-NEXT: S_BARRIER
|
||||
# GCN-NEXT: S_BRANCH %bb.2
|
||||
# GCN: bb.1:
|
||||
# GCN-NEXT: successors: %bb.2
|
||||
# GCN-NOT: S_AND
|
||||
# GCN: S_BRANCH %bb.2
|
||||
# GCN: bb.2:
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: two_preds_one_dead
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
|
||||
S_BARRIER
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
%vcc = IMPLICIT_DEF
|
||||
%sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM
|
||||
...
|
|
@ -42,7 +42,6 @@
|
|||
; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
|
||||
|
||||
; GCN: ; BB#4: ; %bb9
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @break_loop(i32 %arg) #0 {
|
||||
bb:
|
||||
|
|
|
@ -86,7 +86,6 @@
|
|||
; GCN: buffer_store_dword
|
||||
|
||||
; GCN: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: s_or_b64 exec, exec
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @multi_divergent_region_exit_ret_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
|
||||
entry:
|
||||
|
|
|
@ -21,7 +21,6 @@ body: |
|
|||
%2 = IMPLICIT_DEF
|
||||
%3 = V_CMP_GT_U32_e64 %0, %1, implicit %exec
|
||||
%4, %5 = V_SUBBREV_U32_e64 0, %0, %3, implicit %exec
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
|
||||
|
@ -46,7 +45,6 @@ body: |
|
|||
%2 = IMPLICIT_DEF
|
||||
%3 = V_CMP_GT_U32_e64 %0, %1, implicit %exec
|
||||
%4, %5 = V_SUBB_U32_e64 %0, 0, %3, implicit %exec
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
|
||||
|
@ -71,7 +69,6 @@ body: |
|
|||
%2 = IMPLICIT_DEF
|
||||
%3 = V_CMP_GT_U32_e64 %0, %1, implicit %exec
|
||||
%4, %5 = V_ADDC_U32_e64 0, %0, %3, implicit %exec
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
|
||||
|
@ -96,6 +93,5 @@ body: |
|
|||
%2 = IMPLICIT_DEF
|
||||
%3 = V_CMP_GT_U32_e64 %0, %1, implicit %exec
|
||||
%4, %5 = V_ADDC_U32_e64 %0, 0, %3, implicit %exec
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
|
|
|
@ -42,7 +42,6 @@ bb5: ; preds = %bb3, %bb1
|
|||
; GCN: s_and_saveexec_b64
|
||||
; GCN: ; mask branch [[UNIFIED_RET:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: [[UNIFIED_RET]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GCN: .Lfunc_end
|
||||
define amdgpu_kernel void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
; GCN: ; divergent unreachable
|
||||
|
||||
; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: s_or_b64 exec, exec
|
||||
; GCN: s_endpgm
|
||||
|
||||
define amdgpu_kernel void @lower_control_flow_unreachable_terminator() #0 {
|
||||
|
@ -37,7 +36,6 @@ ret:
|
|||
; GCN: ; divergent unreachable
|
||||
|
||||
; GCN: [[RETURN]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() #0 {
|
||||
bb:
|
||||
|
|
|
@ -354,7 +354,6 @@ bb7: ; preds = %bb4
|
|||
; CHECK: buffer_store_dword
|
||||
|
||||
; CHECK: [[END]]:
|
||||
; CHECK: s_or_b64 exec, exec
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x float> %arg2) #0 {
|
||||
bb:
|
||||
|
|
|
@ -35,7 +35,6 @@ body: |
|
|||
S_NOP 0, implicit %3.sub1
|
||||
S_NOP 0, implicit %0.sub1
|
||||
S_NOP 0, implicit undef %0.sub0
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
|
||||
|
|
|
@ -330,12 +330,14 @@ endif:
|
|||
|
||||
; GCN-LABEL: {{^}}divergent_inside_uniform:
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
; GCN: [[IF_LABEL]]:
|
||||
; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
|
||||
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN: ; mask branch [[ENDIF_LABEL]]
|
||||
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: buffer_store_dword [[ONE]]
|
||||
; GCN: [[ENDIF_LABEL]]:
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
|
||||
entry:
|
||||
%u_cmp = icmp eq i32 %cond, 0
|
||||
|
|
|
@ -71,7 +71,6 @@ end:
|
|||
; SI: buffer_store_dword
|
||||
|
||||
; SI-NEXT: {{^}}[[EXIT]]:
|
||||
; SI: s_or_b64 exec, exec, [[BR_SREG]]
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
@ -98,7 +97,6 @@ exit:
|
|||
; SI: buffer_store_dword
|
||||
|
||||
; SI-NEXT: {{^}}[[EXIT]]:
|
||||
; SI: s_or_b64 exec, exec, [[BR_SREG]]
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @simple_test_v_if_ret_else_ret(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -137,7 +135,6 @@ exit:
|
|||
; SI-NEXT: buffer_store_dword
|
||||
|
||||
; SI-NEXT: {{^}}[[UNIFIED_RETURN]]: ; %UnifiedReturnBlock
|
||||
; SI: s_or_b64 exec, exec
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -230,9 +227,6 @@ exit:
|
|||
; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]]
|
||||
; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]]
|
||||
|
||||
; SI: BB#5
|
||||
; SI: s_or_b64 exec, exec, [[COND_STATE]]
|
||||
|
||||
; SI: [[LABEL_EXIT]]:
|
||||
; SI-NOT: [[COND_STATE]]
|
||||
; SI: s_endpgm
|
||||
|
|
|
@ -8,15 +8,17 @@
|
|||
|
||||
declare void @llvm.write_register.i32(metadata, i32) #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
declare void @llvm.amdgcn.wave.barrier() #2
|
||||
|
||||
define amdgpu_kernel void @write_vgpr_into_sgpr() {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
call void @llvm.write_register.i32(metadata !0, i32 %tid)
|
||||
call void @llvm.amdgcn.wave.barrier() #2
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { convergent nounwind }
|
||||
|
||||
!0 = !{!"exec_lo"}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
declare void @llvm.write_register.i32(metadata, i32) #0
|
||||
declare void @llvm.write_register.i64(metadata, i64) #0
|
||||
|
@ -8,6 +8,7 @@ define amdgpu_kernel void @test_write_m0(i32 %val) #0 {
|
|||
call void @llvm.write_register.i32(metadata !0, i32 0)
|
||||
call void @llvm.write_register.i32(metadata !0, i32 -1)
|
||||
call void @llvm.write_register.i32(metadata !0, i32 %val)
|
||||
call void @llvm.amdgcn.wave.barrier() #1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -19,6 +20,7 @@ define amdgpu_kernel void @test_write_exec(i64 %val) #0 {
|
|||
call void @llvm.write_register.i64(metadata !1, i64 0)
|
||||
call void @llvm.write_register.i64(metadata !1, i64 -1)
|
||||
call void @llvm.write_register.i64(metadata !1, i64 %val)
|
||||
call void @llvm.amdgcn.wave.barrier() #1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -30,6 +32,7 @@ define amdgpu_kernel void @test_write_flat_scratch(i64 %val) #0 {
|
|||
call void @llvm.write_register.i64(metadata !2, i64 0)
|
||||
call void @llvm.write_register.i64(metadata !2, i64 -1)
|
||||
call void @llvm.write_register.i64(metadata !2, i64 %val)
|
||||
call void @llvm.amdgcn.wave.barrier() #1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -39,6 +42,7 @@ define amdgpu_kernel void @test_write_flat_scratch(i64 %val) #0 {
|
|||
define amdgpu_kernel void @test_write_flat_scratch_lo(i32 %val) #0 {
|
||||
call void @llvm.write_register.i32(metadata !3, i32 0)
|
||||
call void @llvm.write_register.i32(metadata !3, i32 %val)
|
||||
call void @llvm.amdgcn.wave.barrier() #1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -48,6 +52,7 @@ define amdgpu_kernel void @test_write_flat_scratch_lo(i32 %val) #0 {
|
|||
define amdgpu_kernel void @test_write_flat_scratch_hi(i32 %val) #0 {
|
||||
call void @llvm.write_register.i32(metadata !4, i32 0)
|
||||
call void @llvm.write_register.i32(metadata !4, i32 %val)
|
||||
call void @llvm.amdgcn.wave.barrier() #1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -57,6 +62,7 @@ define amdgpu_kernel void @test_write_flat_scratch_hi(i32 %val) #0 {
|
|||
define amdgpu_kernel void @test_write_exec_lo(i32 %val) #0 {
|
||||
call void @llvm.write_register.i32(metadata !5, i32 0)
|
||||
call void @llvm.write_register.i32(metadata !5, i32 %val)
|
||||
call void @llvm.amdgcn.wave.barrier() #1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -66,10 +72,14 @@ define amdgpu_kernel void @test_write_exec_lo(i32 %val) #0 {
|
|||
define amdgpu_kernel void @test_write_exec_hi(i32 %val) #0 {
|
||||
call void @llvm.write_register.i32(metadata !6, i32 0)
|
||||
call void @llvm.write_register.i32(metadata !6, i32 %val)
|
||||
call void @llvm.amdgcn.wave.barrier() #1
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.wave.barrier() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { convergent nounwind }
|
||||
|
||||
!0 = !{!"m0"}
|
||||
!1 = !{!"exec"}
|
||||
|
|
Loading…
Reference in New Issue