[X86] Use EVEX encoded intrinsics for legacy FMA intrinsics when possible.
llvm-svn: 317454
This commit is contained in:
parent
07dac55d95
commit
70eaeae7f0
|
@ -290,8 +290,7 @@ multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||||
string OpStr, Intrinsic IntF32, Intrinsic IntF64,
|
string OpStr, SDNode OpNodeIntrin, SDNode OpNode> {
|
||||||
SDNode OpNode> {
|
|
||||||
let ExeDomain = SSEPackedSingle in
|
let ExeDomain = SSEPackedSingle in
|
||||||
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode,
|
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode,
|
||||||
FR32, f32mem>,
|
FR32, f32mem>,
|
||||||
|
@ -309,43 +308,44 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||||
// This is because src1 is tied to dest, and the scalar intrinsics
|
// This is because src1 is tied to dest, and the scalar intrinsics
|
||||||
// require the pass-through values to come from the first source
|
// require the pass-through values to come from the first source
|
||||||
// operand, not the second.
|
// operand, not the second.
|
||||||
// TODO: Use AVX512 instructions when possible.
|
let Predicates = [HasFMA, NoAVX512] in {
|
||||||
let Predicates = [HasFMA] in {
|
def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)),
|
||||||
def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
|
|
||||||
(!cast<Instruction>(NAME#"213SSr_Int")
|
(!cast<Instruction>(NAME#"213SSr_Int")
|
||||||
VR128:$src1, VR128:$src2, VR128:$src3)>;
|
VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||||
|
|
||||||
def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
|
def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)),
|
||||||
(!cast<Instruction>(NAME#"213SDr_Int")
|
(!cast<Instruction>(NAME#"213SDr_Int")
|
||||||
VR128:$src1, VR128:$src2, VR128:$src3)>;
|
VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||||
|
|
||||||
def : Pat<(IntF32 VR128:$src1, VR128:$src2, sse_load_f32:$src3),
|
def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2,
|
||||||
|
sse_load_f32:$src3)),
|
||||||
(!cast<Instruction>(NAME#"213SSm_Int")
|
(!cast<Instruction>(NAME#"213SSm_Int")
|
||||||
VR128:$src1, VR128:$src2, sse_load_f32:$src3)>;
|
VR128:$src1, VR128:$src2, sse_load_f32:$src3)>;
|
||||||
|
|
||||||
def : Pat<(IntF64 VR128:$src1, VR128:$src2, sse_load_f64:$src3),
|
def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2,
|
||||||
|
sse_load_f64:$src3)),
|
||||||
(!cast<Instruction>(NAME#"213SDm_Int")
|
(!cast<Instruction>(NAME#"213SDm_Int")
|
||||||
VR128:$src1, VR128:$src2, sse_load_f64:$src3)>;
|
VR128:$src1, VR128:$src2, sse_load_f64:$src3)>;
|
||||||
|
|
||||||
def : Pat<(IntF32 VR128:$src1, sse_load_f32:$src3, VR128:$src2),
|
def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, sse_load_f32:$src3,
|
||||||
|
VR128:$src2)),
|
||||||
(!cast<Instruction>(NAME#"132SSm_Int")
|
(!cast<Instruction>(NAME#"132SSm_Int")
|
||||||
VR128:$src1, VR128:$src2, sse_load_f32:$src3)>;
|
VR128:$src1, VR128:$src2, sse_load_f32:$src3)>;
|
||||||
|
|
||||||
def : Pat<(IntF64 VR128:$src1, sse_load_f64:$src3, VR128:$src2),
|
def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, sse_load_f64:$src3,
|
||||||
|
VR128:$src2)),
|
||||||
(!cast<Instruction>(NAME#"132SDm_Int")
|
(!cast<Instruction>(NAME#"132SDm_Int")
|
||||||
VR128:$src1, VR128:$src2, sse_load_f64:$src3)>;
|
VR128:$src1, VR128:$src2, sse_load_f64:$src3)>;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
|
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadds1, X86Fmadd>, VEX_LIG;
|
||||||
int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
|
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsubs1, X86Fmsub>, VEX_LIG;
|
||||||
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
|
|
||||||
int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
|
|
||||||
|
|
||||||
defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
|
defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadds1, X86Fnmadd>,
|
||||||
int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
|
VEX_LIG;
|
||||||
defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
|
defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsubs1, X86Fnmsub>,
|
||||||
int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
|
VEX_LIG;
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -385,26 +385,28 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
|
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
|
||||||
ComplexPattern mem_cpat, Intrinsic Int> {
|
ValueType VT, ComplexPattern mem_cpat, SDNode OpNode> {
|
||||||
let isCodeGenOnly = 1 in {
|
let isCodeGenOnly = 1 in {
|
||||||
def rr_Int : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
|
def rr_Int : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG;
|
(VT (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, VEX_W,
|
||||||
|
VEX_LIG;
|
||||||
def rm_Int : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
|
def rm_Int : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2, memop:$src3),
|
(ins VR128:$src1, VR128:$src2, memop:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||||
[(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
|
[(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2,
|
||||||
mem_cpat:$src3))]>, VEX_W, VEX_LIG;
|
mem_cpat:$src3)))]>, VEX_W, VEX_LIG;
|
||||||
def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
|
def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, memop:$src2, VR128:$src3),
|
(ins VR128:$src1, memop:$src2, VR128:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG;
|
(VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>,
|
||||||
|
VEX_LIG;
|
||||||
let hasSideEffects = 0 in
|
let hasSideEffects = 0 in
|
||||||
def rr_Int_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
|
def rr_Int_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||||
|
@ -475,19 +477,19 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
|
||||||
let ExeDomain = SSEPackedSingle in {
|
let ExeDomain = SSEPackedSingle in {
|
||||||
// Scalar Instructions
|
// Scalar Instructions
|
||||||
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
|
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
|
||||||
fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
|
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, sse_load_f32,
|
||||||
int_x86_fma_vfmadd_ss>;
|
X86Fmadds1>;
|
||||||
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
|
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
|
||||||
fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
|
fma4s_int<0x6E, "vfmsubss", ssmem, v4f32, sse_load_f32,
|
||||||
int_x86_fma_vfmsub_ss>;
|
X86Fmsubs1>;
|
||||||
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
|
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
|
||||||
X86Fnmadd, loadf32>,
|
X86Fnmadd, loadf32>,
|
||||||
fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
|
fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32, sse_load_f32,
|
||||||
int_x86_fma_vfnmadd_ss>;
|
X86Fnmadds1>;
|
||||||
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
|
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
|
||||||
X86Fnmsub, loadf32>,
|
X86Fnmsub, loadf32>,
|
||||||
fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
|
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, sse_load_f32,
|
||||||
int_x86_fma_vfnmsub_ss>;
|
X86Fnmsubs1>;
|
||||||
// Packed Instructions
|
// Packed Instructions
|
||||||
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
|
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
|
||||||
loadv4f32, loadv8f32>;
|
loadv4f32, loadv8f32>;
|
||||||
|
@ -506,19 +508,19 @@ let ExeDomain = SSEPackedSingle in {
|
||||||
let ExeDomain = SSEPackedDouble in {
|
let ExeDomain = SSEPackedDouble in {
|
||||||
// Scalar Instructions
|
// Scalar Instructions
|
||||||
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
|
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
|
||||||
fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
|
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, sse_load_f64,
|
||||||
int_x86_fma_vfmadd_sd>;
|
X86Fmadds1>;
|
||||||
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
|
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
|
||||||
fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
|
fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64, sse_load_f64,
|
||||||
int_x86_fma_vfmsub_sd>;
|
X86Fmsubs1>;
|
||||||
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
|
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
|
||||||
X86Fnmadd, loadf64>,
|
X86Fnmadd, loadf64>,
|
||||||
fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
|
fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64, sse_load_f64,
|
||||||
int_x86_fma_vfnmadd_sd>;
|
X86Fnmadds1>;
|
||||||
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
|
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
|
||||||
X86Fnmsub, loadf64>,
|
X86Fnmsub, loadf64>,
|
||||||
fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
|
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, sse_load_f64,
|
||||||
int_x86_fma_vfnmsub_sd>;
|
X86Fnmsubs1>;
|
||||||
// Packed Instructions
|
// Packed Instructions
|
||||||
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
|
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
|
||||||
loadv2f64, loadv4f64>;
|
loadv2f64, loadv4f64>;
|
||||||
|
|
|
@ -1468,6 +1468,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||||
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0),
|
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0),
|
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, ISD::FMA, 0),
|
X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, ISD::FMA, 0),
|
||||||
|
X86_INTRINSIC_DATA(fma_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADDS1, 0),
|
||||||
|
X86_INTRINSIC_DATA(fma_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADDS1, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||||
|
@ -1476,6 +1478,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||||
X86_INTRINSIC_DATA(fma_vfmsub_pd_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
|
X86_INTRINSIC_DATA(fma_vfmsub_pd_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfmsub_ps, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
|
X86_INTRINSIC_DATA(fma_vfmsub_ps, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfmsub_ps_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
|
X86_INTRINSIC_DATA(fma_vfmsub_ps_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
|
||||||
|
X86_INTRINSIC_DATA(fma_vfmsub_sd, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0),
|
||||||
|
X86_INTRINSIC_DATA(fma_vfmsub_ss, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfmsubadd_pd, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
|
X86_INTRINSIC_DATA(fma_vfmsubadd_pd, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
|
X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfmsubadd_ps, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
|
X86_INTRINSIC_DATA(fma_vfmsubadd_ps, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
|
||||||
|
@ -1484,10 +1488,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||||
X86_INTRINSIC_DATA(fma_vfnmadd_pd_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
|
X86_INTRINSIC_DATA(fma_vfnmadd_pd_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfnmadd_ps, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
|
X86_INTRINSIC_DATA(fma_vfnmadd_ps, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfnmadd_ps_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
|
X86_INTRINSIC_DATA(fma_vfnmadd_ps_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
|
||||||
|
X86_INTRINSIC_DATA(fma_vfnmadd_sd, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0),
|
||||||
|
X86_INTRINSIC_DATA(fma_vfnmadd_ss, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfnmsub_pd, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
X86_INTRINSIC_DATA(fma_vfnmsub_pd, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
||||||
X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
||||||
|
X86_INTRINSIC_DATA(fma_vfnmsub_sd, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0),
|
||||||
|
X86_INTRINSIC_DATA(fma_vfnmsub_ss, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0),
|
||||||
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
|
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
|
||||||
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
|
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
|
||||||
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
|
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
|
||||||
|
|
|
@ -13,7 +13,7 @@ define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
|
; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss:
|
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss:
|
||||||
|
@ -40,7 +40,7 @@ define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1,
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_ss:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_ss:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xa9,0xca]
|
; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xca]
|
||||||
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
|
@ -68,7 +68,7 @@ define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1,
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_sd:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_sd:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
|
; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd:
|
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd:
|
||||||
|
@ -95,7 +95,7 @@ define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_sd:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_sd:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xa9,0xca]
|
; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xca]
|
||||||
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
|
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
|
@ -232,7 +232,7 @@ define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ss:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ss:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2]
|
; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss:
|
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss:
|
||||||
|
@ -259,7 +259,7 @@ define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1,
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_ss:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_ss:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xab,0xca]
|
; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0xca]
|
||||||
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
|
@ -287,7 +287,7 @@ define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1,
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_sd:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_sd:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
|
; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd:
|
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd:
|
||||||
|
@ -314,7 +314,7 @@ define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_sd:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_sd:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xab,0xca]
|
; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0xca]
|
||||||
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
|
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
|
@ -451,7 +451,7 @@ define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ss:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ss:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2]
|
; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss:
|
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss:
|
||||||
|
@ -478,7 +478,7 @@ define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_ss:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_ss:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xad,0xca]
|
; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0xca]
|
||||||
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
|
@ -506,7 +506,7 @@ define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1,
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_sd:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_sd:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
|
; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd:
|
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd:
|
||||||
|
@ -533,7 +533,7 @@ define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double>
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_sd:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_sd:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xad,0xca]
|
; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0xca]
|
||||||
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
|
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
|
@ -670,7 +670,7 @@ define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ss:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ss:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
|
; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss:
|
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss:
|
||||||
|
@ -697,7 +697,7 @@ define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_ss:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_ss:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xaf,0xca]
|
; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0xca]
|
||||||
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
|
@ -725,7 +725,7 @@ define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1,
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_sd:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_sd:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
|
; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd:
|
; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd:
|
||||||
|
@ -752,7 +752,7 @@ define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double>
|
||||||
;
|
;
|
||||||
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_sd:
|
; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_sd:
|
||||||
; CHECK-AVX512VL: # BB#0:
|
; CHECK-AVX512VL: # BB#0:
|
||||||
; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xaf,0xca]
|
; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0xca]
|
||||||
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
|
; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
|
||||||
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
|
|
Loading…
Reference in New Issue