From 66f286845cad73a280617a606e29c9009bb4da87 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Thu, 6 Jun 2019 21:49:59 +0000 Subject: [PATCH] [NFC][CodeGen] Add unary fneg tests to X86/fma4-intrinsics-x86.ll llvm-svn: 362752 --- llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll | 221 +++++++++++++++++++ 1 file changed, 221 insertions(+) diff --git a/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll b/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll index 96e285bc25f3..da56ece57710 100644 --- a/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/fma4-intrinsics-x86.ll @@ -88,6 +88,16 @@ define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 ret <4 x float> %2 } +define <4 x float> @test_x86_fma_vfmsub_ps_unary_fneg(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsub_ps_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <4 x float> %a2 + %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1) + ret <4 x float> %2 +} + define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmsub_pd: ; CHECK: # %bb.0: @@ -98,6 +108,16 @@ define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, ret <2 x double> %2 } +define <2 x double> @test_x86_fma_vfmsub_pd_unary_fneg(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsub_pd_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <2 x double> %a2 + %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1) + ret <2 x double> %2 +} + define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmsub_ps_256: ; CHECK: # %bb.0: @@ -108,6 +128,16 @@ define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, ret <8 x float> %2 } +define <8 x float> @test_x86_fma_vfmsub_ps_256_unary_fneg(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsub_ps_256_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <8 x float> %a2 + %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1) + ret <8 x float> %2 +} + define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmsub_pd_256: ; CHECK: # %bb.0: @@ -118,6 +148,16 @@ define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> % ret <4 x double> %2 } +define <4 x double> @test_x86_fma_vfmsub_pd_256_unary_fneg(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsub_pd_256_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <4 x double> %a2 + %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1) + ret <4 x double> %2 +} + ; VFNMADD define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfnmadd_ps: @@ -129,6 +169,16 @@ define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 ret <4 x float> %2 } +define <4 x float> @test_x86_fma_vfnmadd_ps_unary_fneg(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmadd_ps_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <4 x float> %a0 + %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2) + ret <4 x float> %2 +} + define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfnmadd_pd: ; CHECK: # %bb.0: @@ -139,6 +189,16 @@ define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, ret <2 x double> %2 } +define <2 x double> @test_x86_fma_vfnmadd_pd_unary_fneg(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmadd_pd_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <2 x double> %a0 + %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2) + ret <2 x double> %2 +} + define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256: ; CHECK: # %bb.0: @@ -149,6 +209,16 @@ define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1 ret <8 x float> %2 } +define <8 x float> @test_x86_fma_vfnmadd_ps_256_unary_fneg(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <8 x float> %a0 + %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2) + ret <8 x float> %2 +} + define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256: ; CHECK: # %bb.0: @@ -159,6 +229,16 @@ define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> ret <4 x double> %2 } +define <4 x double> @test_x86_fma_vfnmadd_pd_256_unary_fneg(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <4 x double> %a0 + %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2) + ret <4 x double> %2 +} + ; VFNMSUB define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfnmsub_ps: @@ -171,6 +251,17 @@ define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 ret <4 x float> %3 } +define <4 x float> @test_x86_fma_vfnmsub_ps_unary_fneg(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmsub_ps_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <4 x float> %a0 + %2 = fneg <4 x float> %a2 + %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2) + ret <4 x float> %3 +} + define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfnmsub_pd: ; CHECK: # %bb.0: @@ -182,6 +273,17 @@ define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, ret <2 x double> %3 } +define <2 x double> @test_x86_fma_vfnmsub_pd_unary_fneg(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmsub_pd_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <2 x double> %a0 + %2 = fneg <2 x double> %a2 + %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2) + ret <2 x double> %3 +} + define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256: ; CHECK: # %bb.0: @@ -193,6 +295,17 @@ define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1 ret <8 x float> %3 } +define <8 x float> @test_x86_fma_vfnmsub_ps_256_unary_fneg(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <8 x float> %a0 + %2 = fneg <8 x float> %a2 + %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2) + ret <8 x float> %3 +} + define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256: ; CHECK: # %bb.0: @@ -204,6 +317,17 @@ define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> ret <4 x double> %3 } +define <4 x double> @test_x86_fma_vfnmsub_pd_256_unary_fneg(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = fneg <4 x double> %a0 + %2 = fneg <4 x double> %a2 + %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2) + ret <4 x double> %3 +} + ; VFMADDSUB define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps: @@ -217,6 +341,18 @@ define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, ret <4 x float> %4 } +define <4 x float> @test_x86_fma_vfmaddsub_ps_unary_fneg(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + %2 = fneg <4 x float> %a2 + %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2) + %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> + ret <4 x float> %4 +} + define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd: ; CHECK: # %bb.0: @@ -229,6 +365,18 @@ define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a ret <2 x double> %4 } +define <2 x double> @test_x86_fma_vfmaddsub_pd_unary_fneg(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = fneg <2 x double> %a2 + %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2) + %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> + ret <2 x double> %4 +} + define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256: ; CHECK: # %bb.0: @@ -241,6 +389,18 @@ define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> % ret <8 x float> %4 } +define <8 x float> @test_x86_fma_vfmaddsub_ps_256_unary_fneg(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + %2 = fneg <8 x float> %a2 + %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2) + %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> + ret <8 x float> %4 +} + define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256: ; CHECK: # %bb.0: @@ -253,6 +413,18 @@ define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double ret <4 x double> %4 } +define <4 x double> @test_x86_fma_vfmaddsub_pd_256_unary_fneg(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + %2 = fneg <4 x double> %a2 + %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2) + %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> + ret <4 x double> %4 +} + ; VFMSUBADD define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps: @@ -266,6 +438,18 @@ define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, ret <4 x float> %4 } +define <4 x float> @test_x86_fma_vfmsubadd_ps_unary_fneg(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) + %2 = fneg <4 x float> %a2 + %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2) + %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> + ret <4 x float> %4 +} + define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd: ; CHECK: # %bb.0: @@ -278,6 +462,19 @@ define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a ret <2 x double> %4 } +define <2 x double> @test_x86_fma_vfmsubadd_pd_unary_fneg(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) + %2 = fneg <2 x double> %a2 + %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2) + %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> + ret <2 x double> %4 +} + + define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256: ; CHECK: # %bb.0: @@ -290,6 +487,18 @@ define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> % ret <8 x float> %4 } +define <8 x float> @test_x86_fma_vfmsubadd_ps_256_unary_fneg(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) + %2 = fneg <8 x float> %a2 + %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2) + %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> + ret <8 x float> %4 +} + define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256: ; CHECK: # %bb.0: @@ -302,6 +511,18 @@ define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double ret <4 x double> %4 } +define <4 x double> @test_x86_fma_vfmsubadd_pd_256_unary_fneg(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256_unary_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10] +; CHECK-NEXT: retq # encoding: [0xc3] + %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) + %2 = fneg <4 x double> %a2 + %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2) + %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> + ret <4 x double> %4 +} + declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #2 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #2