AMDGPU: Fix packing undef parts of build_vector

llvm-svn: 339511
This commit is contained in:
Matt Arsenault 2018-08-12 08:42:46 +00:00
parent a5cf8da145
commit 3ead7d7389
5 changed files with 420 additions and 10 deletions

View File

@ -4287,21 +4287,30 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
} }
assert(VT == MVT::v2f16 || VT == MVT::v2i16); assert(VT == MVT::v2f16 || VT == MVT::v2i16);
assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
SDValue Lo = Op.getOperand(0); SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1); SDValue Hi = Op.getOperand(1);
Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo); // Avoid adding defined bits with the zero_extend.
Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi); if (Hi.isUndef()) {
Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
}
Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo); Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi); Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);
SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi, SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
DAG.getConstant(16, SL, MVT::i32)); DAG.getConstant(16, SL, MVT::i32));
if (Lo.isUndef())
return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);
Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi); SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
return DAG.getNode(ISD::BITCAST, SL, VT, Or); return DAG.getNode(ISD::BITCAST, SL, VT, Or);
} }

View File

@ -1461,13 +1461,32 @@ class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPa
def : ExpPattern<AMDGPUexport, i32, EXP>; def : ExpPattern<AMDGPUexport, i32, EXP>;
def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>; def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
// COPY_TO_REGCLASS is workaround tablegen bug from multiple outputs // COPY is workaround tablegen bug from multiple outputs
// from S_LSHL_B32's multiple outputs from implicit scc def. // from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat < def : GCNPat <
(v2i16 (build_vector (i16 0), i16:$src1)), (v2i16 (build_vector (i16 0), i16:$src1)),
(v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0)) (v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
>; >;
def : GCNPat <
(v2i16 (build_vector i16:$src0, (i16 undef))),
(v2i16 (COPY $src0))
>;
def : GCNPat <
(v2f16 (build_vector f16:$src0, (f16 undef))),
(v2f16 (COPY $src0))
>;
def : GCNPat <
(v2i16 (build_vector (i16 undef), i16:$src1)),
(v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
>;
def : GCNPat <
(v2f16 (build_vector (f16 undef), f16:$src1)),
(v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
>;
let SubtargetPredicate = HasVOP3PInsts in { let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat < def : GCNPat <

View File

@ -0,0 +1,380 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
define void @undef_lo_v2i16(i16 %arg0) {
; GFX9-LABEL: undef_lo_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_lo_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1
call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.lo);
ret void
}
define void @undef_lo_v2f16(half %arg0) {
; GFX9-LABEL: undef_lo_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_lo_v2f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <2 x half> undef, half %arg0, i32 1
call void asm sideeffect "; use $0", "v"(<2 x half> %undef.lo);
ret void
}
define void @undef_lo_op_v2f16(half %arg0) {
; GFX9-LABEL: undef_lo_op_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_lo_op_v2f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, 0x3c00
; GFX8-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, 0x7e00, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <2 x half> undef, half %arg0, i32 1
%op = fadd <2 x half> %undef.lo, <half 1.0, half 1.0>
call void asm sideeffect "; use $0", "v"(<2 x half> %op);
ret void
}
define void @undef_lo_op_v2i16(i16 %arg0) {
; GFX9-LABEL: undef_lo_op_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_movk_i32 s6, 0x63
; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_lo_op_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, 0x63
; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1
%op = add <2 x i16> %undef.lo, <i16 99, i16 99>
call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
ret void
}
define void @undef_lo3_v4i16(i16 %arg0) {
; GFX9-LABEL: undef_lo3_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_lo3_v4i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <4 x i16> undef, i16 %arg0, i32 1
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
ret void
}
define void @undef_lo3_v4f16(half %arg0) {
; GFX9-LABEL: undef_lo3_v4f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_lo3_v4f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <4 x half> undef, half %arg0, i32 1
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
ret void
}
define void @undef_lo2_v4i16(<2 x i16> %arg0) {
; GFX9-LABEL: undef_lo2_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff0000
; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_lo2_v4i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
ret void
}
define void @undef_lo2_v4f16(<2 x half> %arg0) {
; GFX9-LABEL: undef_lo2_v4f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_lo2_v4f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
ret void
}
define void @undef_hi_v2i16(i16 %arg0) {
; GFX9-LABEL: undef_hi_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_hi_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0
call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.hi);
ret void
}
define void @undef_hi_v2f16(half %arg0) {
; GFX9-LABEL: undef_hi_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_hi_v2f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <2 x half> undef, half %arg0, i32 0
call void asm sideeffect "; use $0", "v"(<2 x half> %undef.hi);
ret void
}
define void @undef_hi_op_v2f16(half %arg0) {
; GFX9-LABEL: undef_hi_op_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_hi_op_v2f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0
; GFX8-NEXT: v_or_b32_e32 v0, 0x7e000000, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <2 x half> undef, half %arg0, i32 0
%op = fadd <2 x half> %undef.hi, <half 1.0, half 1.0>
call void asm sideeffect "; use $0", "v"(<2 x half> %op);
ret void
}
define void @undef_hi_op_v2i16(i16 %arg0) {
; GFX9-LABEL: undef_hi_op_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_movk_i32 s6, 0x63
; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_hi_op_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u16_e32 v0, 0x63, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0
%op = add <2 x i16> %undef.hi, <i16 99, i16 99>
call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
ret void
}
define void @undef_hi3_v4i16(i16 %arg0) {
; GFX9-LABEL: undef_hi3_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_hi3_v4i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <4 x i16> undef, i16 %arg0, i32 0
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
ret void
}
define void @undef_hi3_v4f16(half %arg0) {
; GFX9-LABEL: undef_hi3_v4f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_hi3_v4f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.hi = insertelement <4 x half> undef, half %arg0, i32 0
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
ret void
}
define void @undef_hi2_v4i16(<2 x i16> %arg0) {
; GFX9-LABEL: undef_hi2_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_hi2_v4i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
ret void
}
define void @undef_hi2_v4f16(<2 x half> %arg0) {
; GFX9-LABEL: undef_hi2_v4f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v[0:1]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: undef_hi2_v4f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
ret void
}

View File

@ -402,9 +402,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; FIXME: materialize constant directly in VGPR ; FIXME: materialize constant directly in VGPR
; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm: ; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm:
; GFX9-DAG: s_mov_b32 [[K01:s[0-9]+]], 0x20001 ; GFX9-DAG: s_mov_b32 [[K01:s[0-9]+]], 0x20001
; GFX9-DAG: s_pack_ll_b32_b16 [[K23:s[0-9]+]], 3, s{{[0-9]+}} ; GFX9-DAG: s_mov_b32 [[K2:s[0-9]+]], 3
; GFX9: v_mov_b32_e32 v0, [[K01]] ; GFX9: v_mov_b32_e32 v0, [[K01]]
; GFX9: v_mov_b32_e32 v1, [[K23]] ; GFX9: v_mov_b32_e32 v1, [[K2]]
; GFX9: s_swappc_b64 ; GFX9: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>) call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)

View File

@ -83,8 +83,10 @@ define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src
} }
; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} ; GCN: s_waitcnt
; GFX9: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
; GFX9-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX9-NEXT: s_setpc_b64
define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 { define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 {
%src0.ext = fpext half %src0 to float %src0.ext = fpext half %src0 to float
%src1.ext = fpext half %src1 to float %src1.ext = fpext half %src1 to float