AMDGPU: Add implicit def of SCC to kill and indirect pseudos
Summary: Kill instructions sometimes do use SCC in unusual circumstances, when v_cmpx cannot be used due to the operands that are involved. Additionally, even if SCC was never defined by the expansion, kill pseudos could previously occur between an s_cmp and an s_cbranch_scc, which breaks the SCC liveness tracking when the pseudo is expanded to split the basic block. While it would be possible to explicitly mark the SCC as live-in for the successor basic block, it's simpler to just mark the pseudo as using SCC, so that such a sequence is never emitted by instruction selection in the first place. A similar issue affects indirect source/dest pseudos in principle, although I haven't been able to come up with a test case where it actually matters (this affects instruction selection, so a MIR test can't be used). Fixes: dEQP-GLES3.functional.shaders.discard.dynamic_loop_always Change-Id: Ica8d82ecff1a763b892a1112cf1b06c948863a4f Reviewers: arsenm, rampitec Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D47761 llvm-svn: 335223
This commit is contained in:
parent
f267431901
commit
b3a9b68513
|
@ -292,14 +292,21 @@ def SI_ELSE_BREAK : CFPseudoInstSI <
|
||||||
let isReMaterializable = 1;
|
let isReMaterializable = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Uses = [EXEC], Defs = [EXEC,VCC] in {
|
let Uses = [EXEC] in {
|
||||||
|
|
||||||
multiclass PseudoInstKill <dag ins> {
|
multiclass PseudoInstKill <dag ins> {
|
||||||
|
// Even though this pseudo can usually be expanded without an SCC def, we
|
||||||
|
// conservatively assume that it has an SCC def, both because it is sometimes
|
||||||
|
// required in degenerate cases (when V_CMPX cannot be used due to constant
|
||||||
|
// bus limitations) and because it allows us to avoid having to track SCC
|
||||||
|
// liveness across basic blocks.
|
||||||
|
let Defs = [EXEC,VCC,SCC] in
|
||||||
def _PSEUDO : PseudoInstSI <(outs), ins> {
|
def _PSEUDO : PseudoInstSI <(outs), ins> {
|
||||||
let isConvergent = 1;
|
let isConvergent = 1;
|
||||||
let usesCustomInserter = 1;
|
let usesCustomInserter = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let Defs = [EXEC,VCC,SCC] in
|
||||||
def _TERMINATOR : SPseudoInstSI <(outs), ins> {
|
def _TERMINATOR : SPseudoInstSI <(outs), ins> {
|
||||||
let isTerminator = 1;
|
let isTerminator = 1;
|
||||||
}
|
}
|
||||||
|
@ -308,6 +315,7 @@ multiclass PseudoInstKill <dag ins> {
|
||||||
defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>;
|
defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>;
|
||||||
defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>;
|
defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>;
|
||||||
|
|
||||||
|
let Defs = [EXEC,VCC] in
|
||||||
def SI_ILLEGAL_COPY : SPseudoInstSI <
|
def SI_ILLEGAL_COPY : SPseudoInstSI <
|
||||||
(outs unknown:$dst), (ins unknown:$src),
|
(outs unknown:$dst), (ins unknown:$src),
|
||||||
[], " ; illegal copy $src to $dst">;
|
[], " ; illegal copy $src to $dst">;
|
||||||
|
@ -445,7 +453,7 @@ def ADJCALLSTACKDOWN : SPseudoInstSI<
|
||||||
let usesCustomInserter = 1;
|
let usesCustomInserter = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Defs = [M0, EXEC],
|
let Defs = [M0, EXEC, SCC],
|
||||||
UseNamedOperandTable = 1 in {
|
UseNamedOperandTable = 1 in {
|
||||||
|
|
||||||
class SI_INDIRECT_SRC<RegisterClass rc> : VPseudoInstSI <
|
class SI_INDIRECT_SRC<RegisterClass rc> : VPseudoInstSI <
|
||||||
|
|
|
@ -33,7 +33,7 @@ body: |
|
||||||
bb.1:
|
bb.1:
|
||||||
successors: %bb.2
|
successors: %bb.2
|
||||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||||
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit $exec
|
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
|
||||||
S_BRANCH %bb.2
|
S_BRANCH %bb.2
|
||||||
|
|
||||||
bb.2:
|
bb.2:
|
||||||
|
|
|
@ -251,6 +251,26 @@ define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; SI-LABEL: {{^}}test_scc_liveness:
|
||||||
|
; SI: v_cmp
|
||||||
|
; SI: s_and_b64 exec
|
||||||
|
; SI: s_cmp
|
||||||
|
; SI: s_cbranch_scc
|
||||||
|
define amdgpu_ps void @test_scc_liveness() #0 {
|
||||||
|
main_body:
|
||||||
|
br label %loop3
|
||||||
|
|
||||||
|
loop3: ; preds = %loop3, %main_body
|
||||||
|
%tmp = phi i32 [ 0, %main_body ], [ %tmp5, %loop3 ]
|
||||||
|
%tmp1 = icmp sgt i32 %tmp, 0
|
||||||
|
call void @llvm.amdgcn.kill(i1 %tmp1) #1
|
||||||
|
%tmp5 = add i32 %tmp, 1
|
||||||
|
br i1 %tmp1, label %endloop15, label %loop3
|
||||||
|
|
||||||
|
endloop15: ; preds = %loop3
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
declare void @llvm.amdgcn.kill(i1) #0
|
declare void @llvm.amdgcn.kill(i1) #0
|
||||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||||
declare i1 @llvm.amdgcn.wqm.vote(i1)
|
declare i1 @llvm.amdgcn.wqm.vote(i1)
|
||||||
|
|
Loading…
Reference in New Issue