From 332bbb0fea827139879ae09f78406ace3d25a94a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 16 May 2018 17:58:50 +0000 Subject: [PATCH] [x86] preserve test intent by removing undef We need to clean up the DAG floating-point undef logic. This process is similar to how we handled integer undef logic in D43141. And as we did there, I'm trying to reduce the patch by changing tests that would probably become meaningless once we make those fixes. llvm-svn: 332501 --- llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll | 61 ++++++++++++---------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll index 475c80ce3ede..e7d83dbdcddc 100644 --- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll +++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -420,50 +420,53 @@ define <4 x float> @test15(<4 x float> %A, <4 x float> %B) { define <4 x float> @test16(<4 x float> %A, <4 x float> %B) { ; SSE-LABEL: test16: ; SSE: # %bb.0: +; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: subss %xmm0, %xmm2 -; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] -; SSE-NEXT: movaps %xmm1, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] -; SSE-NEXT: subss %xmm4, %xmm3 -; SSE-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] -; SSE-NEXT: addss %xmm0, %xmm4 +; SSE-NEXT: subss %xmm3, %xmm2 +; SSE-NEXT: movaps %xmm0, %xmm4 +; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE-NEXT: movaps %xmm1, %xmm5 +; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE-NEXT: subss %xmm5, %xmm4 +; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] +; SSE-NEXT: addss %xmm3, %xmm5 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE-NEXT: addss %xmm0, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] -; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] +; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0] ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: test16: ; AVX1: # %bb.0: -; AVX1-NEXT: vsubss %xmm0, %xmm0, %xmm2 -; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX1-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] -; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] -; AVX1-NEXT: vaddss %xmm0, %xmm4, %xmm4 +; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] +; AVX1-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] +; AVX1-NEXT: vsubss %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] +; AVX1-NEXT: vaddss %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm4[0],xmm2[2,3] -; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm2[0],xmm3[2,3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm4[0],xmm1[3] ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX1-NEXT: retq ; ; AVX512-LABEL: test16: ; AVX512: # %bb.0: -; AVX512-NEXT: vsubss %xmm0, %xmm0, %xmm2 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] -; AVX512-NEXT: vsubss %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] -; AVX512-NEXT: vaddss %xmm0, %xmm4, %xmm4 -; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3] -; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] +; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512-NEXT: vsubss %xmm2, %xmm0, %xmm3 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] +; AVX512-NEXT: vsubss %xmm5, %xmm4, %xmm4 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] +; AVX512-NEXT: vaddss %xmm2, %xmm5, %xmm2 +; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] +; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 @@ -471,13 +474,13 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) { ; AVX512-NEXT: retq %1 = extractelement <4 x float> %A, i32 0 %2 = extractelement <4 x float> %B, i32 0 - %sub = fsub float %1, undef + %sub = fsub float %1, 42.0 %3 = extractelement <4 x float> %A, i32 2 %4 = extractelement <4 x float> %B, i32 2 %sub2 = fsub float %3, %4 %5 = extractelement <4 x float> %A, i32 1 %6 = extractelement <4 x float> %B, i32 1 - %add = fadd float %5, undef + %add = fadd float %5, 42.0 %7 = extractelement <4 x float> %A, i32 3 %8 = extractelement <4 x float> %B, i32 3 %add2 = fadd float %7, %8