[X86] Use EVEX encoded intrinsics for legacy FMA intrinsics when possible.

llvm-svn: 317454
This commit is contained in:
Craig Topper 2017-11-06 05:48:26 +00:00
parent 07dac55d95
commit 70eaeae7f0
3 changed files with 65 additions and 55 deletions

View File

@ -290,8 +290,7 @@ multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
}
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, Intrinsic IntF32, Intrinsic IntF64,
SDNode OpNode> {
string OpStr, SDNode OpNodeIntrin, SDNode OpNode> {
let ExeDomain = SSEPackedSingle in
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode,
FR32, f32mem>,
@ -309,43 +308,44 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
// This is because src1 is tied to dest, and the scalar intrinsics
// require the pass-through values to come from the first source
// operand, not the second.
// TODO: Use AVX512 instructions when possible.
let Predicates = [HasFMA] in {
def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
let Predicates = [HasFMA, NoAVX512] in {
def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)),
(!cast<Instruction>(NAME#"213SSr_Int")
VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)),
(!cast<Instruction>(NAME#"213SDr_Int")
VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(IntF32 VR128:$src1, VR128:$src2, sse_load_f32:$src3),
def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2,
sse_load_f32:$src3)),
(!cast<Instruction>(NAME#"213SSm_Int")
VR128:$src1, VR128:$src2, sse_load_f32:$src3)>;
def : Pat<(IntF64 VR128:$src1, VR128:$src2, sse_load_f64:$src3),
def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2,
sse_load_f64:$src3)),
(!cast<Instruction>(NAME#"213SDm_Int")
VR128:$src1, VR128:$src2, sse_load_f64:$src3)>;
def : Pat<(IntF32 VR128:$src1, sse_load_f32:$src3, VR128:$src2),
def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, sse_load_f32:$src3,
VR128:$src2)),
(!cast<Instruction>(NAME#"132SSm_Int")
VR128:$src1, VR128:$src2, sse_load_f32:$src3)>;
def : Pat<(IntF64 VR128:$src1, sse_load_f64:$src3, VR128:$src2),
def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, sse_load_f64:$src3,
VR128:$src2)),
(!cast<Instruction>(NAME#"132SDm_Int")
VR128:$src1, VR128:$src2, sse_load_f64:$src3)>;
}
}
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadds1, X86Fmadd>, VEX_LIG;
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsubs1, X86Fmsub>, VEX_LIG;
defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadds1, X86Fnmadd>,
VEX_LIG;
defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsubs1, X86Fnmsub>,
VEX_LIG;
//===----------------------------------------------------------------------===//
@ -385,26 +385,28 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
ComplexPattern mem_cpat, Intrinsic Int> {
ValueType VT, ComplexPattern mem_cpat, SDNode OpNode> {
let isCodeGenOnly = 1 in {
def rr_Int : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG;
(VT (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, VEX_W,
VEX_LIG;
def rm_Int : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
mem_cpat:$src3))]>, VEX_W, VEX_LIG;
[(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2,
mem_cpat:$src3)))]>, VEX_W, VEX_LIG;
def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG;
(VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>,
VEX_LIG;
let hasSideEffects = 0 in
def rr_Int_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
@ -475,19 +477,19 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
int_x86_fma_vfmadd_ss>;
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, sse_load_f32,
X86Fmadds1>;
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
int_x86_fma_vfmsub_ss>;
fma4s_int<0x6E, "vfmsubss", ssmem, v4f32, sse_load_f32,
X86Fmsubs1>;
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
X86Fnmadd, loadf32>,
fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
int_x86_fma_vfnmadd_ss>;
fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32, sse_load_f32,
X86Fnmadds1>;
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
X86Fnmsub, loadf32>,
fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
int_x86_fma_vfnmsub_ss>;
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, sse_load_f32,
X86Fnmsubs1>;
// Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32>;
@ -506,19 +508,19 @@ let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
int_x86_fma_vfmadd_sd>;
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, sse_load_f64,
X86Fmadds1>;
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
int_x86_fma_vfmsub_sd>;
fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64, sse_load_f64,
X86Fmsubs1>;
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
X86Fnmadd, loadf64>,
fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
int_x86_fma_vfnmadd_sd>;
fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64, sse_load_f64,
X86Fnmadds1>;
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
X86Fnmsub, loadf64>,
fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
int_x86_fma_vfnmsub_sd>;
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, sse_load_f64,
X86Fnmsubs1>;
// Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64>;

View File

@ -1468,6 +1468,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0),
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0),
X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, ISD::FMA, 0),
X86_INTRINSIC_DATA(fma_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADDS1, 0),
X86_INTRINSIC_DATA(fma_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADDS1, 0),
X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
@ -1476,6 +1478,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(fma_vfmsub_pd_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(fma_vfmsub_ps, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(fma_vfmsub_ps_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(fma_vfmsub_sd, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0),
X86_INTRINSIC_DATA(fma_vfmsub_ss, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0),
X86_INTRINSIC_DATA(fma_vfmsubadd_pd, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(fma_vfmsubadd_ps, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
@ -1484,10 +1488,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(fma_vfnmadd_pd_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
X86_INTRINSIC_DATA(fma_vfnmadd_ps, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
X86_INTRINSIC_DATA(fma_vfnmadd_ps_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
X86_INTRINSIC_DATA(fma_vfnmadd_sd, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0),
X86_INTRINSIC_DATA(fma_vfnmadd_ss, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_pd, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_sd, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_ss, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0),
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),

View File

@ -13,7 +13,7 @@ define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss:
@ -40,7 +40,7 @@ define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1,
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_ss:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xa9,0xca]
; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xca]
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
@ -68,7 +68,7 @@ define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1,
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_sd:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd:
@ -95,7 +95,7 @@ define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_sd:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xa9,0xca]
; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xca]
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
@ -232,7 +232,7 @@ define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ss:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2]
; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss:
@ -259,7 +259,7 @@ define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1,
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_ss:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xab,0xca]
; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0xca]
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
@ -287,7 +287,7 @@ define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1,
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_sd:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd:
@ -314,7 +314,7 @@ define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_sd:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xab,0xca]
; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0xca]
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
@ -451,7 +451,7 @@ define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ss:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2]
; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss:
@ -478,7 +478,7 @@ define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_ss:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xad,0xca]
; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0xca]
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
@ -506,7 +506,7 @@ define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1,
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_sd:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd:
@ -533,7 +533,7 @@ define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double>
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_sd:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xad,0xca]
; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0xca]
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
@ -670,7 +670,7 @@ define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ss:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss:
@ -697,7 +697,7 @@ define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_ss:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xaf,0xca]
; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0xca]
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
@ -725,7 +725,7 @@ define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1,
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_sd:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd:
@ -752,7 +752,7 @@ define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double>
;
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_sd:
; CHECK-AVX512VL: # BB#0:
; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xaf,0xca]
; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0xca]
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
;