[SelectionDAG] Add [us]itofp(undef) --> 0 constant fold (PR39205)

We were missing this fold in the DAG, which I've copied directly from llvm::ConstantFoldCastInstruction

Differential Revision: https://reviews.llvm.org/D62807

llvm-svn: 362397
This commit is contained in:
Simon Pilgrim 2019-06-03 13:02:07 +00:00
parent 74467814f2
commit cb7e4e8193
6 changed files with 140 additions and 218 deletions

View File

@ -12440,6 +12440,10 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// [us]itofp(undef) = 0, because the result value is bounded.
if (N0.isUndef())
return DAG.getConstantFP(0.0, SDLoc(N), VT);
// fold (sint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
@ -12497,6 +12501,10 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// [us]itofp(undef) = 0, because the result value is bounded.
if (N0.isUndef())
return DAG.getConstantFP(0.0, SDLoc(N), VT);
// fold (uint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values

View File

@ -4440,6 +4440,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (Operand.isUndef())
return getUNDEF(VT);
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
// [us]itofp(undef) = 0, because the result value is bounded.
if (Operand.isUndef())
return getConstantFP(0.0, DL, VT);
break;
case ISD::SIGN_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");

View File

@ -120,22 +120,30 @@ define <2 x double> @slto2f64(<2 x i64> %a) {
}
define <2 x float> @sltof2f32(<2 x i64> %a) {
; NODQ-LABEL: sltof2f32:
; NODQ: # %bb.0:
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; NODQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; NODQ-NEXT: retq
; NOVLDQ-LABEL: sltof2f32:
; NOVLDQ: # %bb.0:
; NOVLDQ-NEXT: vpextrq $1, %xmm0, %rax
; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; NOVLDQ-NEXT: vmovq %xmm0, %rax
; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; NOVLDQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: sltof2f32:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sltof2f32:
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VLNODQ-NEXT: vmovq %xmm0, %rax
; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; VLNODQ-NEXT: retq
;
; DQNOVL-LABEL: sltof2f32:
; DQNOVL: # %bb.0:
; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0

View File

@ -120,22 +120,30 @@ define <2 x double> @slto2f64(<2 x i64> %a) {
}
define <2 x float> @sltof2f32(<2 x i64> %a) {
; NODQ-LABEL: sltof2f32:
; NODQ: # %bb.0:
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; NODQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; NODQ-NEXT: retq
; NOVLDQ-LABEL: sltof2f32:
; NOVLDQ: # %bb.0:
; NOVLDQ-NEXT: vpextrq $1, %xmm0, %rax
; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; NOVLDQ-NEXT: vmovq %xmm0, %rax
; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; NOVLDQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: sltof2f32:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sltof2f32:
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VLNODQ-NEXT: vmovq %xmm0, %rax
; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; VLNODQ-NEXT: retq
;
; DQNOVL-LABEL: sltof2f32:
; DQNOVL: # %bb.0:
; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0

View File

@ -1186,9 +1186,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_4f32:
@ -1197,9 +1195,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_4f32:
@ -1208,9 +1204,8 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
@ -1309,11 +1304,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_4i64_to_4f32_undef:
@ -1323,10 +1315,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; SSE41-NEXT: xorps %xmm1, %xmm1
; SSE41-NEXT: cvtsi2ss %rax, %xmm1
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: sitofp_4i64_to_4f32_undef:
@ -1335,9 +1324,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
@ -1346,9 +1333,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
@ -1357,9 +1342,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
@ -1918,7 +1902,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; VEX-NEXT: js .LBB39_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: jmp .LBB39_6
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
; VEX-NEXT: .LBB39_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
@ -1926,15 +1911,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
; VEX-NEXT: .LBB39_6:
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; VEX-NEXT: testq %rax, %rax
; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: js .LBB39_8
; VEX-NEXT: # %bb.7:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; VEX-NEXT: .LBB39_8:
; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_2i64_to_4f32:
@ -1943,9 +1920,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_2i64_to_4f32:
@ -1954,9 +1929,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
@ -2126,49 +2100,39 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE2-LABEL: uitofp_4i64_to_4f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_1
; SSE2-NEXT: # %bb.2:
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB41_3
; SSE2-NEXT: .LBB41_1:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: addss %xmm1, %xmm1
; SSE2-NEXT: .LBB41_3:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_4
; SSE2-NEXT: # %bb.5:
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: jmp .LBB41_6
; SSE2-NEXT: .LBB41_4:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: addss %xmm0, %xmm0
; SSE2-NEXT: .LBB41_3:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE2-NEXT: movq %xmm1, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_4
; SSE2-NEXT: # %bb.5:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB41_6
; SSE2-NEXT: .LBB41_4:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: addss %xmm1, %xmm1
; SSE2-NEXT: .LBB41_6:
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: js .LBB41_8
; SSE2-NEXT: # %bb.7:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: .LBB41_8:
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: uitofp_4i64_to_4f32_undef:
@ -2193,7 +2157,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE41-NEXT: # %bb.5:
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
; SSE41-NEXT: jmp .LBB41_6
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
; SSE41-NEXT: .LBB41_4:
; SSE41-NEXT: movq %rax, %rcx
; SSE41-NEXT: shrq %rcx
@ -2202,16 +2167,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
; SSE41-NEXT: addss %xmm0, %xmm0
; SSE41-NEXT: .LBB41_6:
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; SSE41-NEXT: testq %rax, %rax
; SSE41-NEXT: xorps %xmm1, %xmm1
; SSE41-NEXT: js .LBB41_8
; SSE41-NEXT: # %bb.7:
; SSE41-NEXT: xorps %xmm1, %xmm1
; SSE41-NEXT: cvtsi2ss %rax, %xmm1
; SSE41-NEXT: .LBB41_8:
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
@ -2235,7 +2191,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; VEX-NEXT: js .LBB41_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: jmp .LBB41_6
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
; VEX-NEXT: .LBB41_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
@ -2243,15 +2200,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
; VEX-NEXT: .LBB41_6:
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; VEX-NEXT: testq %rax, %rax
; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: js .LBB41_8
; VEX-NEXT: # %bb.7:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; VEX-NEXT: .LBB41_8:
; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
@ -2260,9 +2209,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
@ -2271,9 +2218,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef:

View File

@ -1186,9 +1186,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_4f32:
@ -1197,9 +1195,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_4f32:
@ -1208,9 +1204,8 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
@ -1309,11 +1304,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_4i64_to_4f32_undef:
@ -1323,10 +1315,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; SSE41-NEXT: xorps %xmm1, %xmm1
; SSE41-NEXT: cvtsi2ss %rax, %xmm1
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: sitofp_4i64_to_4f32_undef:
@ -1335,9 +1324,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
@ -1346,9 +1333,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
@ -1357,9 +1342,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
@ -1918,7 +1902,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; VEX-NEXT: js .LBB39_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: jmp .LBB39_6
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
; VEX-NEXT: .LBB39_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
@ -1926,15 +1911,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
; VEX-NEXT: .LBB39_6:
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; VEX-NEXT: testq %rax, %rax
; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: js .LBB39_8
; VEX-NEXT: # %bb.7:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; VEX-NEXT: .LBB39_8:
; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_2i64_to_4f32:
@ -1943,9 +1920,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_2i64_to_4f32:
@ -1954,9 +1929,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
@ -2126,49 +2100,39 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE2-LABEL: uitofp_4i64_to_4f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_1
; SSE2-NEXT: # %bb.2:
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB41_3
; SSE2-NEXT: .LBB41_1:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: addss %xmm1, %xmm1
; SSE2-NEXT: .LBB41_3:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_4
; SSE2-NEXT: # %bb.5:
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: jmp .LBB41_6
; SSE2-NEXT: .LBB41_4:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: addss %xmm0, %xmm0
; SSE2-NEXT: .LBB41_3:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE2-NEXT: movq %xmm1, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_4
; SSE2-NEXT: # %bb.5:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB41_6
; SSE2-NEXT: .LBB41_4:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: addss %xmm1, %xmm1
; SSE2-NEXT: .LBB41_6:
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: js .LBB41_8
; SSE2-NEXT: # %bb.7:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: .LBB41_8:
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: uitofp_4i64_to_4f32_undef:
@ -2193,7 +2157,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE41-NEXT: # %bb.5:
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
; SSE41-NEXT: jmp .LBB41_6
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
; SSE41-NEXT: .LBB41_4:
; SSE41-NEXT: movq %rax, %rcx
; SSE41-NEXT: shrq %rcx
@ -2202,16 +2167,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
; SSE41-NEXT: addss %xmm0, %xmm0
; SSE41-NEXT: .LBB41_6:
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; SSE41-NEXT: testq %rax, %rax
; SSE41-NEXT: xorps %xmm1, %xmm1
; SSE41-NEXT: js .LBB41_8
; SSE41-NEXT: # %bb.7:
; SSE41-NEXT: xorps %xmm1, %xmm1
; SSE41-NEXT: cvtsi2ss %rax, %xmm1
; SSE41-NEXT: .LBB41_8:
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
@ -2235,7 +2191,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; VEX-NEXT: js .LBB41_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: jmp .LBB41_6
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
; VEX-NEXT: .LBB41_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
@ -2243,15 +2200,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
; VEX-NEXT: .LBB41_6:
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; VEX-NEXT: testq %rax, %rax
; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: js .LBB41_8
; VEX-NEXT: # %bb.7:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; VEX-NEXT: .LBB41_8:
; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
@ -2260,9 +2209,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
@ -2271,9 +2218,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef: