[AMDGPU] Remove getBidirectionalReasonRank

This method inverts the Reason field of a scheduling candidate.
It does right comparison between RegCritical and RegExcess, but
everything else is broken. In fact it can prefer less strong reason
such as Weak over RegCritical because Weak > -RegCritical.

The CandReason enum is properly sorted, so just remove artificial
ranking.

Differential Revision: https://reviews.llvm.org/D30557

llvm-svn: 297536
This commit is contained in:
Stanislav Mekhanoshin 2017-03-11 00:29:27 +00:00
parent ee8a4f51c4
commit 79da2a7698
15 changed files with 52 additions and 63 deletions

View File

@ -179,16 +179,6 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
}
}
static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
switch (Reason) {
default:
return Reason;
case GenericSchedulerBase::RegCritical:
case GenericSchedulerBase::RegExcess:
return -Reason;
}
}
// This function is mostly cut and pasted from
// GenericScheduler::pickNodeBidirectional()
SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
@ -261,9 +251,7 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
} else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
Cand = BotCand;
} else {
int TopRank = getBidirectionalReasonRank(TopCand.Reason);
int BotRank = getBidirectionalReasonRank(BotCand.Reason);
if (TopRank > BotRank) {
if (BotCand.Reason > TopCand.Reason) {
Cand = TopCand;
} else {
Cand = BotCand;

View File

@ -120,15 +120,16 @@ ret:
; GCN-LABEL: {{^}}sink_ubfe_i16:
; GCN-NOT: lshr
; VI: s_bfe_u32 s0, s0, 0xc0004
; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
; GCN: s_cbranch_scc1
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
; VI: s_and_b32 s0, s0, 0xff
; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff
; GCN: BB2_2:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
; VI: s_and_b32 s0, s0, 0x7f
; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f
; GCN: BB2_3:
; GCN: buffer_store_short

View File

@ -51,8 +51,8 @@ define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float ad
}
; GCN-LABEL: {{^}}v_clamp_negzero_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()

View File

@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
; SI-LABEL: @simple_read2st64_f64_over_max_offset
; SI-NOT: ds_read2st64_b64
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
; SI-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI-DAG: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {

View File

@ -10,11 +10,11 @@ declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>)
declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
; GCN-LABEL: {{^}}test_copysign_f16:
; SI: buffer_load_ushort v[[MAG:[0-9]+]]
; SI: buffer_load_ushort v[[SIGN:[0-9]+]]
; SI: buffer_load_ushort v[[MAG:[0-9]+]]
; SI: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
; SI: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_F32]]
; SI: v_cvt_f16_f32_e32 v[[OUT:[0-9]+]], v[[OUT_F32]]
; VI: buffer_load_ushort v[[SIGN:[0-9]+]]

View File

@ -532,7 +532,7 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umin_i32_ret:
; GCN: flat_atomic_umin v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define void @atomic_umin_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:

View File

@ -189,7 +189,7 @@ define void @s_insertelement_v2f16_1(<2 x half> addrspace(1)* %out, <2 x half> a
}
; GCN-LABEL: {{^}}v_insertelement_v2i16_0:
; GCN: flat_load_dword [[VEC:v[0-9]+]]
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
; CIVI: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]]
; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e7, [[ELT1]]
@ -258,11 +258,11 @@ define void @v_insertelement_v2i16_0_inlineimm(<2 x i16> addrspace(1)* %out, <2
; FIXME: fold lshl_or c0, c1, v0 -> or (c0 << c1), v0
; GCN-LABEL: {{^}}v_insertelement_v2i16_1:
; GCN: flat_load_dword [[VEC:v[0-9]+]]
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3e7
; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7
; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[K]], 16, [[ELT0]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
@ -295,13 +295,13 @@ define void @v_insertelement_v2i16_1_inlineimm(<2 x i16> addrspace(1)* %out, <2
}
; GCN-LABEL: {{^}}v_insertelement_v2f16_0:
; GCN: flat_load_dword [[VEC:v[0-9]+]]
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
; CIVI: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]]
; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x4500, [[ELT1]]
; GFX9: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0x4500{{$}}
; GFX9: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VEC]]
; GFX9-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0x4500{{$}}
; GFX9-DAG: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VEC]]
; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[ELT1]], 16, [[ELT0]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
@ -337,11 +337,11 @@ define void @v_insertelement_v2f16_0_inlineimm(<2 x half> addrspace(1)* %out, <2
}
; GCN-LABEL: {{^}}v_insertelement_v2f16_1:
; GCN: flat_load_dword [[VEC:v[0-9]+]]
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4500
; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500
; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[K]], 16, [[ELT0]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]

View File

@ -3,9 +3,9 @@
; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}s_cvt_pkrtz_v2f16_f32:
; GCN: s_load_dword [[X:s[0-9]+]]
; GCN: s_load_dword [[Y:s[0-9]+]]
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]]
; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}}
; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}}
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]]
; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]]
; VI: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]]
define void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 {

View File

@ -111,11 +111,11 @@ define void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #
}
; GCN-LABEL: {{^}}v_pack_v2f16_imm_lo:
; GFX9: flat_load_dword [[VAL1:v[0-9]+]]
; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x1234{{$}}
; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]]
; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234{{$}}
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]]
; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}}
; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}}
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
; GFX9: ; use [[PACKED]]
define void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 {
@ -133,10 +133,10 @@ define void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 {
}
; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_lo:
; GFX9: flat_load_dword [[VAL1:v[0-9]+]]
; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]]
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], 4.0, [[VAL1]]
; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}}
; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}}
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
; GFX9: ; use [[PACKED]]
@ -155,11 +155,11 @@ define void @v_pack_v2f16_inline_imm_lo(i32 addrspace(1)* %in1) #0 {
}
; GCN-LABEL: {{^}}v_pack_v2f16_imm_hi:
; GFX9: flat_load_dword [[VAL0:v[0-9]+]]
; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x1234
; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]]
; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]]
; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x1234
; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234
; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]]
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]]
@ -179,10 +179,10 @@ define void @v_pack_v2f16_imm_hi(i32 addrspace(1)* %in0) #0 {
}
; GCN-LABEL: {{^}}v_pack_v2f16_inline_f16imm_hi:
; GFX9: flat_load_dword [[VAL:v[0-9]+]]
; GFX9-DAG: flat_load_dword [[VAL:v[0-9]+]]
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 1.0
; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x3c00
; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3c00
; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]]
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]]

View File

@ -103,11 +103,11 @@ define void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #
}
; GCN-LABEL: {{^}}v_pack_v2i16_imm_lo:
; GFX9: flat_load_dword [[VAL1:v[0-9]+]]
; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}}
; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]]
; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}}
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]]
; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}}
; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}}
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
; GFX9: ; use [[PACKED]]
@ -144,10 +144,10 @@ define void @v_pack_v2i16_inline_imm_lo(i32 addrspace(1)* %in1) #0 {
}
; GCN-LABEL: {{^}}v_pack_v2i16_imm_hi:
; GFX9: flat_load_dword [[VAL0:v[0-9]+]]
; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]]
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]]
; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}}
; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}}
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[VAL0]]
; GFX9: ; use [[PACKED]]

View File

@ -55,11 +55,11 @@ done: ; preds = %loop
; GCN-LABEL: {{^}}smrd_valu:
; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x2ee0
; SI: s_mov_b32
; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
; SI: s_nop 3
; SI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]]
; SI: s_mov_b32
; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8
; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]]

View File

@ -372,9 +372,9 @@ define void @add_select_fneg_negk_f32(i32 %c) #0 {
}
; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: buffer_load_dword [[Y:v[0-9]+]]
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
@ -390,9 +390,9 @@ define void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
}
; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32:
; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: buffer_load_dword [[Y:v[0-9]+]]
; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc

View File

@ -166,7 +166,7 @@ endif:
; GCN-LABEL: {{^}}uniform_if_else_ret:
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
; GCN: buffer_store_dword [[TWO]]

View File

@ -4,8 +4,8 @@
declare i32 @llvm.amdgcn.workitem.id.x() #1
; GCN-LABEL: {{^}}v_cnd_nan_nosgpr:
; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e32 v{{[0-9]}}, -1, v{{[0-9]+}}, vcc
; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]}}, -1, v{{[0-9]+}}, [[COND]]
; GCN-DAG: v{{[0-9]}}
; All nan values are converted to 0xffffffff
; GCN: s_endpgm
@ -105,8 +105,8 @@ define void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %
; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32:
; GCN-DAG: s_load_dword [[X:s[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[Z]], vcc
; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]]
define void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
@ -122,8 +122,8 @@ define void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %
; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32:
; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
; GCN-DAG: s_load_dword [[X:s[0-9]+]]
; GCN: v_cmp_nlg_f32_e64 vcc, [[X]], 0
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc
; GCN: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]]
define void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64

View File

@ -156,7 +156,7 @@ exit:
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
; SI: buffer_store_dword
; SI: v_cmp_ge_i64_e32 [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
; SI: v_cmp_ge_i64_e{{32|64}} [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]]
; SI: [[LABEL_FLOW]]: