[X86] Add vector masked load/store scheduler classes (PR32857)

Split off from existing vector load/store classes to remove InstRW overrides.

llvm-svn: 331760
This commit is contained in:
Simon Pilgrim 2018-05-08 12:17:55 +00:00
parent 3cd0aa3b7e
commit b0a3be04ec
12 changed files with 171 additions and 255 deletions

View File

@ -7102,22 +7102,22 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
VEX_4V, Sched<[WriteFLoad]>;
VEX_4V, Sched<[WriteFMaskedLoad]>;
def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
VEX_4V, VEX_L, Sched<[WriteFLoad]>;
VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>;
def mr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>,
VEX_4V, Sched<[WriteFStore]>;
VEX_4V, Sched<[WriteFMaskedStore]>;
def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
VEX_4V, VEX_L, Sched<[WriteFStore]>;
VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>;
}
let ExeDomain = SSEPackedSingle in
@ -7729,22 +7729,22 @@ multiclass avx2_pmovmask<string OpcodeStr,
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
VEX_4V, Sched<[WriteVecLoad]>;
VEX_4V, Sched<[WriteVecMaskedLoad]>;
def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
VEX_4V, VEX_L, Sched<[WriteVecLoad]>;
VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>;
def mr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
VEX_4V, Sched<[WriteVecStore]>;
VEX_4V, Sched<[WriteVecMaskedStore]>;
def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
VEX_4V, VEX_L, Sched<[WriteVecStore]>;
VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>;
}
defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",

View File

@ -151,9 +151,13 @@ def : InstRW<[WriteMove], (instrs COPY)>;
defm : BWWriteResPair<WriteJump, [BWPort06], 1>;
// Floating point. This covers both scalar and vector operations.
def : WriteRes<WriteFLoad, [BWPort23]> { let Latency = 5; }
def : WriteRes<WriteFStore, [BWPort237, BWPort4]>;
def : WriteRes<WriteFMove, [BWPort5]>;
defm : X86WriteRes<WriteFLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
defm : X86WriteRes<WriteFMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
defm : BWWriteResPair<WriteFAddX, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub (XMM).
@ -241,10 +245,14 @@ def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Vector integer operations.
def : WriteRes<WriteVecLoad, [BWPort23]> { let Latency = 5; }
def : WriteRes<WriteVecStore, [BWPort237, BWPort4]>;
def : WriteRes<WriteVecMove, [BWPort015]>;
defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
@ -899,16 +907,6 @@ def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
}
def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>;
def BWWriteResGroup53 : SchedWriteRes<[BWPort0,BWPort4,BWPort237,BWPort15]> {
let Latency = 5;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPD(Y?)mr",
"VMASKMOVPS(Y?)mr",
"VPMASKMOVD(Y?)mr",
"VPMASKMOVQ(Y?)mr")>;
def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
@ -1107,11 +1105,7 @@ def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> {
}
def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm",
"MMX_PACKSSWBirm",
"MMX_PACKUSWBirm",
"VMASKMOVPDrm",
"VMASKMOVPSrm",
"VPMASKMOVDrm",
"VPMASKMOVQrm")>;
"MMX_PACKUSWBirm")>;
def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> {
let Latency = 7;
@ -1212,16 +1206,6 @@ def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBDYrm",
"VPMOVSXWQYrm",
"VPMOVZXWDYrm")>;
def BWWriteResGroup94 : SchedWriteRes<[BWPort5,BWPort23]> {
let Latency = 8;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPDYrm",
"VMASKMOVPSYrm",
"VPMASKMOVDYrm",
"VPMASKMOVQYrm")>;
def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 8;
let NumMicroOps = 5;

View File

@ -143,11 +143,16 @@ defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>;
// This is quite rough, latency depends on the dividend.
defm : HWWriteResPair<WriteIDiv, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
// Scalar and vector floating point.
def : WriteRes<WriteFStore, [HWPort237, HWPort4]>;
def : WriteRes<WriteFLoad, [HWPort23]> { let Latency = 5; }
def : WriteRes<WriteFMove, [HWPort5]>;
defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>;
defm : X86WriteRes<WriteFLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteFMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>;
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFAddX, [HWPort1], 3, [1], 1, 6>;
@ -235,9 +240,13 @@ def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
}
// Vector integer operations.
def : WriteRes<WriteVecStore, [HWPort237, HWPort4]>;
def : WriteRes<WriteVecLoad, [HWPort23]> { let Latency = 5; }
def : WriteRes<WriteVecMove, [HWPort015]>;
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
@ -1156,26 +1165,6 @@ def: InstRW<[HWWriteResGroup35], (instregex "ADC(8|16|32|64)ri",
"SBB(8|16|32|64)i",
"SET(A|BE)r")>;
def HWWriteResGroup36 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 8;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[HWWriteResGroup36], (instregex "VMASKMOVPDrm",
"VMASKMOVPSrm",
"VPMASKMOVDrm",
"VPMASKMOVQrm")>;
def HWWriteResGroup36_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[HWWriteResGroup36_1], (instregex "VMASKMOVPDYrm",
"VMASKMOVPSYrm",
"VPMASKMOVDYrm",
"VPMASKMOVQYrm")>;
def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 7;
let NumMicroOps = 3;
@ -1579,16 +1568,6 @@ def HWWriteResGroup83 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>;
def HWWriteResGroup84 : SchedWriteRes<[HWPort0,HWPort4,HWPort237,HWPort15]> {
let Latency = 5;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
def: InstRW<[HWWriteResGroup84], (instregex "VMASKMOVPD(Y?)mr",
"VMASKMOVPS(Y?)mr",
"VPMASKMOVD(Y?)mr",
"VPMASKMOVQ(Y?)mr")>;
def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> {
let Latency = 10;
let NumMicroOps = 4;

View File

@ -134,10 +134,14 @@ defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>;
defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
// Scalar and vector floating point.
def : WriteRes<WriteFStore, [SBPort23, SBPort4]>;
def : WriteRes<WriteFLoad, [SBPort23]> { let Latency = 6; }
def : WriteRes<WriteFMove, [SBPort5]>;
defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
defm : X86WriteRes<WriteFLoad, [SBPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>;
@ -213,9 +217,13 @@ defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; }
// Vector integer operations.
def : WriteRes<WriteVecStore, [SBPort23, SBPort4]>;
def : WriteRes<WriteVecLoad, [SBPort23]> { let Latency = 6; }
def : WriteRes<WriteVecMove, [SBPort05]>;
defm : X86WriteRes<WriteVecLoad, [SBPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
@ -786,14 +794,6 @@ def: InstRW<[SBWriteResGroup36], (instregex "CALL64pcrel32",
"CALL(16|32|64)r",
"(V?)EXTRACTPSmr")>;
def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> {
let Latency = 5;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr",
"VMASKMOVPS(Y?)mr")>;
def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 5;
let NumMicroOps = 3;
@ -1060,14 +1060,6 @@ def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
}
def: InstRW<[SBWriteResGroup72], (instrs MUL8m)>;
def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 8;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm",
"VMASKMOVPSrm")>;
def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 8;
let NumMicroOps = 3;
@ -1169,14 +1161,6 @@ def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm",
"(V?)CVTPS2DQrm",
"(V?)CVTTPS2DQrm")>;
def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm",
"VMASKMOVPSYrm")>;
def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 9;
let NumMicroOps = 3;

View File

@ -147,10 +147,14 @@ def : WriteRes<WriteZero, []>;
defm : SKLWriteResPair<WriteJump, [SKLPort06], 1>;
// Floating point. This covers both scalar and vector operations.
def : WriteRes<WriteFLoad, [SKLPort23]> { let Latency = 6; }
def : WriteRes<WriteFStore, [SKLPort237, SKLPort4]>;
def : WriteRes<WriteFMove, [SKLPort015]>;
defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
defm : X86WriteRes<WriteFLoad, [SKLPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 5>; // Floating point add/sub.
defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub (XMM).
@ -234,9 +238,13 @@ def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Vector integer operations.
def : WriteRes<WriteVecLoad, [SKLPort23]> { let Latency = 6; }
def : WriteRes<WriteVecStore, [SKLPort237, SKLPort4]>;
def : WriteRes<WriteVecMove, [SKLPort015]>;
defm : X86WriteRes<WriteVecLoad, [SKLPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>;
defm : SKLWriteResPair<WriteVecALU, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
@ -646,16 +654,6 @@ def: InstRW<[SKLWriteResGroup17], (instrs LFENCE,
WAIT,
XGETBV)>;
def SKLWriteResGroup18 : SchedWriteRes<[SKLPort0,SKLPort237]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPD(Y?)mr",
"VMASKMOVPS(Y?)mr",
"VPMASKMOVD(Y?)mr",
"VPMASKMOVQ(Y?)mr")>;
def SKLWriteResGroup20 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@ -1246,15 +1244,11 @@ def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
}
def: InstRW<[SKLWriteResGroup91], (instregex "(V?)INSERTF128rm",
"(V?)INSERTI128rm",
"(V?)MASKMOVPDrm",
"(V?)MASKMOVPSrm",
"(V?)PADDBrm",
"(V?)PADDDrm",
"(V?)PADDQrm",
"(V?)PADDWrm",
"(V?)PBLENDDrmi",
"(V?)PMASKMOVDrm",
"(V?)PMASKMOVQrm",
"(V?)PSUBBrm",
"(V?)PSUBDrm",
"(V?)PSUBQrm",
@ -1382,15 +1376,11 @@ def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm",
"VMASKMOVPSYrm",
"VPADDBYrm",
def: InstRW<[SKLWriteResGroup110], (instregex "VPADDBYrm",
"VPADDDYrm",
"VPADDQYrm",
"VPADDWYrm",
"VPBLENDDYrmi",
"VPMASKMOVDYrm",
"VPMASKMOVQYrm",
"VPSUBBYrm",
"VPSUBDYrm",
"VPSUBQYrm",

View File

@ -147,10 +147,14 @@ def : WriteRes<WriteZero, []>;
defm : SKXWriteResPair<WriteJump, [SKXPort06], 1>;
// Floating point. This covers both scalar and vector operations.
def : WriteRes<WriteFLoad, [SKXPort23]> { let Latency = 5; }
def : WriteRes<WriteFStore, [SKXPort237, SKXPort4]>;
def : WriteRes<WriteFMove, [SKXPort015]>;
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
defm : X86WriteRes<WriteFLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 5>; // Floating point add/sub.
defm : SKXWriteResPair<WriteFAddX, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub (XMM).
@ -234,9 +238,13 @@ def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Vector integer operations.
def : WriteRes<WriteVecLoad, [SKXPort23]> { let Latency = 5; }
def : WriteRes<WriteVecStore, [SKXPort237, SKXPort4]>;
def : WriteRes<WriteVecMove, [SKXPort015]>;
defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>;
defm : SKXWriteResPair<WriteVecALU, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
@ -845,20 +853,6 @@ def: InstRW<[SKXWriteResGroup17], (instrs LFENCE,
WAIT,
XGETBV)>;
def SKXWriteResGroup18 : SchedWriteRes<[SKXPort0,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPDYmr",
"VMASKMOVPDmr",
"VMASKMOVPSYmr",
"VMASKMOVPSmr",
"VPMASKMOVDYmr",
"VPMASKMOVDmr",
"VPMASKMOVQYmr",
"VPMASKMOVQmr")>;
def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@ -1936,8 +1930,6 @@ def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)",
"VBROADCASTSSZ128m(b?)",
"VINSERTF128rm",
"VINSERTI128rm",
"VMASKMOVPDrm",
"VMASKMOVPSrm",
"VMOVAPDZ128rm(b?)",
"VMOVAPSZ128rm(b?)",
"VMOVDDUPZ128rm(b?)",
@ -1967,8 +1959,6 @@ def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)",
"VPBLENDMWZ128rm(b?)",
"VPBROADCASTDZ128m(b?)",
"VPBROADCASTQZ128m(b?)",
"VPMASKMOVDrm",
"VPMASKMOVQrm",
"VPSUBBZ128rm(b?)",
"(V?)PSUBBrm",
"VPSUBDZ128rm(b?)",
@ -2226,8 +2216,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)",
"VINSERTI64x2Z256rm(b?)",
"VINSERTI64x2Zrm(b?)",
"VINSERTI64x4Zrm(b?)",
"VMASKMOVPDYrm",
"VMASKMOVPSYrm",
"VMOVAPDZ256rm(b?)",
"VMOVAPDZrm(b?)",
"VMOVAPSZ256rm(b?)",
@ -2280,8 +2268,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)",
"VPBROADCASTDZm(b?)",
"VPBROADCASTQZ256m(b?)",
"VPBROADCASTQZm(b?)",
"VPMASKMOVDYrm",
"VPMASKMOVQYrm",
"VPSUBBYrm",
"VPSUBBZ256rm(b?)",
"VPSUBBZrm(b?)",

View File

@ -105,9 +105,13 @@ def WriteZero : SchedWrite;
defm WriteJump : X86SchedWritePair;
// Floating point. This covers both scalar and vector operations.
def WriteFLoad : SchedWrite;
def WriteFStore : SchedWrite;
def WriteFMove : SchedWrite;
def WriteFLoad : SchedWrite;
def WriteFMaskedLoad : SchedWrite;
def WriteFMaskedLoadY : SchedWrite;
def WriteFStore : SchedWrite;
def WriteFMaskedStore : SchedWrite;
def WriteFMaskedStoreY : SchedWrite;
def WriteFMove : SchedWrite;
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
@ -183,9 +187,13 @@ defm WritePHAdd : X86SchedWritePair;
defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
// Vector integer operations.
def WriteVecLoad : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecMove : SchedWrite;
def WriteVecLoad : SchedWrite;
def WriteVecMaskedLoad : SchedWrite;
def WriteVecMaskedLoadY : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecMaskedStore : SchedWrite;
def WriteVecMaskedStoreY : SchedWrite;
def WriteVecMove : SchedWrite;
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).

View File

@ -198,8 +198,14 @@ def : WriteRes<WriteNop, [AtomPort01]>;
// Floating point. This covers both scalar and vector operations.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteFLoad, [AtomPort0]>;
def : WriteRes<WriteFStore, [AtomPort0]>;
def : WriteRes<WriteFLoad, [AtomPort0]>;
def : WriteRes<WriteFMaskedLoad, [AtomPort0]>;
def : WriteRes<WriteFMaskedLoadY, [AtomPort0]>;
def : WriteRes<WriteFStore, [AtomPort0]>;
def : WriteRes<WriteFMaskedStore, [AtomPort0]>;
def : WriteRes<WriteFMaskedStoreY, [AtomPort0]>;
def : WriteRes<WriteFMove, [AtomPort01]>;
defm : X86WriteRes<WriteEMMS,[AtomPort01], 5, [5], 1>;
@ -282,8 +288,14 @@ def : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
// Vector integer operations.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteVecLoad, [AtomPort0]>;
def : WriteRes<WriteVecStore, [AtomPort0]>;
def : WriteRes<WriteVecLoad, [AtomPort0]>;
def : WriteRes<WriteVecMaskedLoad, [AtomPort0]>;
def : WriteRes<WriteVecMaskedLoadY, [AtomPort0]>;
def : WriteRes<WriteVecStore, [AtomPort0]>;
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>;
def : WriteRes<WriteVecMove, [AtomPort01]>;
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;

View File

@ -311,8 +311,14 @@ def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
// Floating point. This covers both scalar and vector operations.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX]> { let Latency = 5; }
def : WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC]>;
defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
def : WriteRes<WriteFMove, [JFPU01, JFPX]>;
def : WriteRes<WriteEMMS, [JFPU01, JFPX]> { let Latency = 2; }
@ -434,8 +440,14 @@ def : InstRW<[JWriteCVTSI2FLd], (instregex "(V)?CVTSI(64)?2S(D|S)rm")>;
// Vector integer operations.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU]> { let Latency = 5; }
def : WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC]>;
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
def : WriteRes<WriteVecMove, [JFPU01, JVALU]>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
@ -622,32 +634,6 @@ def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
VBROADCASTSSYrm)>;
def JWriteVMaskMovLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 1, 2];
}
def : InstRW<[JWriteVMaskMovLd], (instrs VMASKMOVPDrm, VMASKMOVPSrm)>;
def JWriteVMaskMovYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [2, 2, 4];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVMaskMovYLd], (instrs VMASKMOVPDYrm, VMASKMOVPSYrm)>;
def JWriteVMaskMovSt: SchedWriteRes<[JFPU01, JFPX, JSAGU]> {
let Latency = 6;
let ResourceCycles = [1, 4, 1];
}
def : InstRW<[JWriteVMaskMovSt], (instrs VMASKMOVPDmr, VMASKMOVPSmr)>;
def JWriteVMaskMovYSt: SchedWriteRes<[JFPU01, JFPX, JSAGU]> {
let Latency = 6;
let ResourceCycles = [2, 4, 2];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVMaskMovYSt], (instrs VMASKMOVPDYmr, VMASKMOVPSYmr)>;
def JWriteJVZEROALL: SchedWriteRes<[]> {
let Latency = 90;
let NumMicroOps = 73;

View File

@ -125,10 +125,14 @@ defm : SLMWriteResPair<WriteBZHI, [SLM_IEC_RSV0], 1>;
defm : SLMWriteResPair<WriteIDiv, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
// Scalar and vector floating point.
def : WriteRes<WriteFStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStoreY, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
@ -193,9 +197,13 @@ defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
// Vector integer operations.
def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStoreY, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>;

View File

@ -188,9 +188,13 @@ def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
}
// Floating point operations
def : WriteRes<WriteFStore, [ZnAGU]>;
def : WriteRes<WriteFMove, [ZnFPU]>;
def : WriteRes<WriteFLoad, [ZnAGU]> { let Latency = 8; }
defm : X86WriteRes<WriteFLoad, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
@ -260,10 +264,14 @@ defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>;
def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
// Vector integer operations which uses FPU units
def : WriteRes<WriteVecStore, [ZnAGU]>;
def : WriteRes<WriteVecMove, [ZnFPU]>;
def : WriteRes<WriteVecLoad, [ZnAGU]> { let Latency = 8; }
def : WriteRes<WriteEMMS, [ZnFPU]> { let Latency = 2; }
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>;
defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>;
defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>;
@ -1030,11 +1038,8 @@ def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
// MASKMOVDQU.
def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
// VPMASKMOVQ.
// VPMASKMOVD.
// ymm
def : InstRW<[ZnWriteFPU01Op2],(instregex "VPMASKMOVQrm")>;
def : InstRW<[ZnWriteFPU01Op2Y],(instregex "VPMASKMOVQYrm")>;
def : InstRW<[WriteMicrocoded],
(instregex "VPMASKMOVD(Y?)rm")>;
// m, v,v.
@ -1168,32 +1173,6 @@ def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> {
def : InstRW<[ZnWriteVINSERT128r], (instregex "VINSERTF128rr")>;
def : InstRW<[ZnWriteVINSERT128Ld], (instregex "VINSERTF128rm")>;
// VMASKMOVP S/D.
// x,x,m.
def ZnWriteVMASKMOVPLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
let Latency = 8;
}
// y,y,m.
def ZnWriteVMASKMOVPLdY : SchedWriteRes<[ZnAGU, ZnFPU01]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1, 2];
}
def ZnWriteVMASKMOVPm : SchedWriteRes<[ZnAGU, ZnFPU01]> {
let Latency = 4;
}
def : InstRW<[ZnWriteVMASKMOVPLd], (instregex "VMASKMOVP(S|D)rm")>;
def : InstRW<[ZnWriteVMASKMOVPLdY], (instregex "VMASKMOVP(S|D)Yrm")>;
def : InstRW<[ZnWriteVMASKMOVPm], (instregex "VMASKMOVP(S|D)mr")>;
// m256,y,y.
def ZnWriteVMASKMOVPYmr : SchedWriteRes<[ZnAGU,ZnFPU01]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [1, 2];
}
def : InstRW<[ZnWriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>;
// VGATHERDPS.
// x.
def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPSrm")>;

View File

@ -3384,8 +3384,8 @@ declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readn
define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
; GENERIC-LABEL: test_pmaskmovd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [6:0.50]
; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -3433,8 +3433,8 @@ declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
; GENERIC-LABEL: test_pmaskmovd_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [6:0.50]
; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -3482,8 +3482,8 @@ declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
; GENERIC-LABEL: test_pmaskmovq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [6:0.50]
; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -3531,8 +3531,8 @@ declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
; GENERIC-LABEL: test_pmaskmovq_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [6:0.50]
; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;