[X86] Add vector masked load/store scheduler classes (PR32857)
Split off from existing vector load/store classes to remove InstRW overrides. llvm-svn: 331760
This commit is contained in:
parent
3cd0aa3b7e
commit
b0a3be04ec
|
@ -7102,22 +7102,22 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
|
|||
(ins VR128:$src1, f128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
|
||||
VEX_4V, Sched<[WriteFLoad]>;
|
||||
VEX_4V, Sched<[WriteFMaskedLoad]>;
|
||||
def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, f256mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
|
||||
VEX_4V, VEX_L, Sched<[WriteFLoad]>;
|
||||
VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>;
|
||||
def mr : AVX8I<opc_mr, MRMDestMem, (outs),
|
||||
(ins f128mem:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>,
|
||||
VEX_4V, Sched<[WriteFStore]>;
|
||||
VEX_4V, Sched<[WriteFMaskedStore]>;
|
||||
def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
|
||||
(ins f256mem:$dst, VR256:$src1, VR256:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
|
||||
VEX_4V, VEX_L, Sched<[WriteFStore]>;
|
||||
VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
|
@ -7729,22 +7729,22 @@ multiclass avx2_pmovmask<string OpcodeStr,
|
|||
(ins VR128:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
|
||||
VEX_4V, Sched<[WriteVecLoad]>;
|
||||
VEX_4V, Sched<[WriteVecMaskedLoad]>;
|
||||
def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, i256mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
|
||||
VEX_4V, VEX_L, Sched<[WriteVecLoad]>;
|
||||
VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>;
|
||||
def mr : AVX28I<0x8e, MRMDestMem, (outs),
|
||||
(ins i128mem:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
|
||||
VEX_4V, Sched<[WriteVecStore]>;
|
||||
VEX_4V, Sched<[WriteVecMaskedStore]>;
|
||||
def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
|
||||
(ins i256mem:$dst, VR256:$src1, VR256:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
|
||||
VEX_4V, VEX_L, Sched<[WriteVecStore]>;
|
||||
VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>;
|
||||
}
|
||||
|
||||
defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
|
||||
|
|
|
@ -151,9 +151,13 @@ def : InstRW<[WriteMove], (instrs COPY)>;
|
|||
defm : BWWriteResPair<WriteJump, [BWPort06], 1>;
|
||||
|
||||
// Floating point. This covers both scalar and vector operations.
|
||||
def : WriteRes<WriteFLoad, [BWPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteFStore, [BWPort237, BWPort4]>;
|
||||
def : WriteRes<WriteFMove, [BWPort5]>;
|
||||
defm : X86WriteRes<WriteFLoad, [BWPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteFMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
|
||||
|
||||
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
|
||||
defm : BWWriteResPair<WriteFAddX, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub (XMM).
|
||||
|
@ -241,10 +245,14 @@ def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
|
|||
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
||||
|
||||
// Vector integer operations.
|
||||
def : WriteRes<WriteVecLoad, [BWPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteVecStore, [BWPort237, BWPort4]>;
|
||||
def : WriteRes<WriteVecMove, [BWPort015]>;
|
||||
defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
|
||||
defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
|
||||
|
||||
defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
|
||||
defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
|
||||
|
@ -899,16 +907,6 @@ def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
|
|||
}
|
||||
def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>;
|
||||
|
||||
def BWWriteResGroup53 : SchedWriteRes<[BWPort0,BWPort4,BWPort237,BWPort15]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [1,1,1,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPD(Y?)mr",
|
||||
"VMASKMOVPS(Y?)mr",
|
||||
"VPMASKMOVD(Y?)mr",
|
||||
"VPMASKMOVQ(Y?)mr")>;
|
||||
|
||||
def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 5;
|
||||
|
@ -1107,11 +1105,7 @@ def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> {
|
|||
}
|
||||
def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm",
|
||||
"MMX_PACKSSWBirm",
|
||||
"MMX_PACKUSWBirm",
|
||||
"VMASKMOVPDrm",
|
||||
"VMASKMOVPSrm",
|
||||
"VPMASKMOVDrm",
|
||||
"VPMASKMOVQrm")>;
|
||||
"MMX_PACKUSWBirm")>;
|
||||
|
||||
def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> {
|
||||
let Latency = 7;
|
||||
|
@ -1212,16 +1206,6 @@ def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBDYrm",
|
|||
"VPMOVSXWQYrm",
|
||||
"VPMOVZXWDYrm")>;
|
||||
|
||||
def BWWriteResGroup94 : SchedWriteRes<[BWPort5,BWPort23]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPDYrm",
|
||||
"VMASKMOVPSYrm",
|
||||
"VPMASKMOVDYrm",
|
||||
"VPMASKMOVQYrm")>;
|
||||
|
||||
def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 5;
|
||||
|
|
|
@ -143,11 +143,16 @@ defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>;
|
|||
|
||||
// This is quite rough, latency depends on the dividend.
|
||||
defm : HWWriteResPair<WriteIDiv, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
|
||||
|
||||
// Scalar and vector floating point.
|
||||
def : WriteRes<WriteFStore, [HWPort237, HWPort4]>;
|
||||
def : WriteRes<WriteFLoad, [HWPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteFMove, [HWPort5]>;
|
||||
defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>;
|
||||
defm : X86WriteRes<WriteFLoad, [HWPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteFMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>;
|
||||
|
||||
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>;
|
||||
defm : HWWriteResPair<WriteFAddX, [HWPort1], 3, [1], 1, 6>;
|
||||
|
@ -235,9 +240,13 @@ def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
|
|||
}
|
||||
|
||||
// Vector integer operations.
|
||||
def : WriteRes<WriteVecStore, [HWPort237, HWPort4]>;
|
||||
def : WriteRes<WriteVecLoad, [HWPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteVecMove, [HWPort015]>;
|
||||
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
|
||||
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
|
||||
|
||||
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
|
||||
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
|
||||
|
@ -1156,26 +1165,6 @@ def: InstRW<[HWWriteResGroup35], (instregex "ADC(8|16|32|64)ri",
|
|||
"SBB(8|16|32|64)i",
|
||||
"SET(A|BE)r")>;
|
||||
|
||||
def HWWriteResGroup36 : SchedWriteRes<[HWPort5,HWPort23]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2,1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup36], (instregex "VMASKMOVPDrm",
|
||||
"VMASKMOVPSrm",
|
||||
"VPMASKMOVDrm",
|
||||
"VPMASKMOVQrm")>;
|
||||
|
||||
def HWWriteResGroup36_1 : SchedWriteRes<[HWPort5,HWPort23]> {
|
||||
let Latency = 9;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2,1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup36_1], (instregex "VMASKMOVPDYrm",
|
||||
"VMASKMOVPSYrm",
|
||||
"VPMASKMOVDYrm",
|
||||
"VPMASKMOVQYrm")>;
|
||||
|
||||
def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -1579,16 +1568,6 @@ def HWWriteResGroup83 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> {
|
|||
}
|
||||
def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>;
|
||||
|
||||
def HWWriteResGroup84 : SchedWriteRes<[HWPort0,HWPort4,HWPort237,HWPort15]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [1,1,1,1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup84], (instregex "VMASKMOVPD(Y?)mr",
|
||||
"VMASKMOVPS(Y?)mr",
|
||||
"VPMASKMOVD(Y?)mr",
|
||||
"VPMASKMOVQ(Y?)mr")>;
|
||||
|
||||
def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> {
|
||||
let Latency = 10;
|
||||
let NumMicroOps = 4;
|
||||
|
|
|
@ -134,10 +134,14 @@ defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>;
|
|||
defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
|
||||
|
||||
// Scalar and vector floating point.
|
||||
def : WriteRes<WriteFStore, [SBPort23, SBPort4]>;
|
||||
def : WriteRes<WriteFLoad, [SBPort23]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFMove, [SBPort5]>;
|
||||
defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
|
||||
defm : X86WriteRes<WriteFLoad, [SBPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
|
||||
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
|
||||
|
||||
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>;
|
||||
|
@ -213,9 +217,13 @@ defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
|
|||
def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; }
|
||||
|
||||
// Vector integer operations.
|
||||
def : WriteRes<WriteVecStore, [SBPort23, SBPort4]>;
|
||||
def : WriteRes<WriteVecLoad, [SBPort23]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVecMove, [SBPort05]>;
|
||||
defm : X86WriteRes<WriteVecLoad, [SBPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
|
||||
defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
|
||||
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
|
||||
|
||||
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
|
||||
|
@ -786,14 +794,6 @@ def: InstRW<[SBWriteResGroup36], (instregex "CALL64pcrel32",
|
|||
"CALL(16|32|64)r",
|
||||
"(V?)EXTRACTPSmr")>;
|
||||
|
||||
def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,1,1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr",
|
||||
"VMASKMOVPS(Y?)mr")>;
|
||||
|
||||
def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -1060,14 +1060,6 @@ def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
|
|||
}
|
||||
def: InstRW<[SBWriteResGroup72], (instrs MUL8m)>;
|
||||
|
||||
def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,2];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm",
|
||||
"VMASKMOVPSrm")>;
|
||||
|
||||
def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -1169,14 +1161,6 @@ def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm",
|
|||
"(V?)CVTPS2DQrm",
|
||||
"(V?)CVTTPS2DQrm")>;
|
||||
|
||||
def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> {
|
||||
let Latency = 9;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,2];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm",
|
||||
"VMASKMOVPSYrm")>;
|
||||
|
||||
def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
|
||||
let Latency = 9;
|
||||
let NumMicroOps = 3;
|
||||
|
|
|
@ -147,10 +147,14 @@ def : WriteRes<WriteZero, []>;
|
|||
defm : SKLWriteResPair<WriteJump, [SKLPort06], 1>;
|
||||
|
||||
// Floating point. This covers both scalar and vector operations.
|
||||
def : WriteRes<WriteFLoad, [SKLPort23]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFStore, [SKLPort237, SKLPort4]>;
|
||||
def : WriteRes<WriteFMove, [SKLPort015]>;
|
||||
defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
|
||||
defm : X86WriteRes<WriteFLoad, [SKLPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
|
||||
|
||||
defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 5>; // Floating point add/sub.
|
||||
defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub (XMM).
|
||||
|
@ -234,9 +238,13 @@ def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
|
|||
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
||||
|
||||
// Vector integer operations.
|
||||
def : WriteRes<WriteVecLoad, [SKLPort23]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVecStore, [SKLPort237, SKLPort4]>;
|
||||
def : WriteRes<WriteVecMove, [SKLPort015]>;
|
||||
defm : X86WriteRes<WriteVecLoad, [SKLPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>;
|
||||
|
||||
defm : SKLWriteResPair<WriteVecALU, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
|
||||
defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
|
||||
|
@ -646,16 +654,6 @@ def: InstRW<[SKLWriteResGroup17], (instrs LFENCE,
|
|||
WAIT,
|
||||
XGETBV)>;
|
||||
|
||||
def SKLWriteResGroup18 : SchedWriteRes<[SKLPort0,SKLPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPD(Y?)mr",
|
||||
"VMASKMOVPS(Y?)mr",
|
||||
"VPMASKMOVD(Y?)mr",
|
||||
"VPMASKMOVQ(Y?)mr")>;
|
||||
|
||||
def SKLWriteResGroup20 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
|
@ -1246,15 +1244,11 @@ def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
|
|||
}
|
||||
def: InstRW<[SKLWriteResGroup91], (instregex "(V?)INSERTF128rm",
|
||||
"(V?)INSERTI128rm",
|
||||
"(V?)MASKMOVPDrm",
|
||||
"(V?)MASKMOVPSrm",
|
||||
"(V?)PADDBrm",
|
||||
"(V?)PADDDrm",
|
||||
"(V?)PADDQrm",
|
||||
"(V?)PADDWrm",
|
||||
"(V?)PBLENDDrmi",
|
||||
"(V?)PMASKMOVDrm",
|
||||
"(V?)PMASKMOVQrm",
|
||||
"(V?)PSUBBrm",
|
||||
"(V?)PSUBDrm",
|
||||
"(V?)PSUBQrm",
|
||||
|
@ -1382,15 +1376,11 @@ def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm",
|
||||
"VMASKMOVPSYrm",
|
||||
"VPADDBYrm",
|
||||
def: InstRW<[SKLWriteResGroup110], (instregex "VPADDBYrm",
|
||||
"VPADDDYrm",
|
||||
"VPADDQYrm",
|
||||
"VPADDWYrm",
|
||||
"VPBLENDDYrmi",
|
||||
"VPMASKMOVDYrm",
|
||||
"VPMASKMOVQYrm",
|
||||
"VPSUBBYrm",
|
||||
"VPSUBDYrm",
|
||||
"VPSUBQYrm",
|
||||
|
|
|
@ -147,10 +147,14 @@ def : WriteRes<WriteZero, []>;
|
|||
defm : SKXWriteResPair<WriteJump, [SKXPort06], 1>;
|
||||
|
||||
// Floating point. This covers both scalar and vector operations.
|
||||
def : WriteRes<WriteFLoad, [SKXPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteFStore, [SKXPort237, SKXPort4]>;
|
||||
def : WriteRes<WriteFMove, [SKXPort015]>;
|
||||
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
|
||||
defm : X86WriteRes<WriteFLoad, [SKXPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
|
||||
|
||||
defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 5>; // Floating point add/sub.
|
||||
defm : SKXWriteResPair<WriteFAddX, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub (XMM).
|
||||
|
@ -234,9 +238,13 @@ def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
|
|||
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
||||
|
||||
// Vector integer operations.
|
||||
def : WriteRes<WriteVecLoad, [SKXPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteVecStore, [SKXPort237, SKXPort4]>;
|
||||
def : WriteRes<WriteVecMove, [SKXPort015]>;
|
||||
defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>;
|
||||
|
||||
defm : SKXWriteResPair<WriteVecALU, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
|
||||
defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
|
||||
|
@ -845,20 +853,6 @@ def: InstRW<[SKXWriteResGroup17], (instrs LFENCE,
|
|||
WAIT,
|
||||
XGETBV)>;
|
||||
|
||||
def SKXWriteResGroup18 : SchedWriteRes<[SKXPort0,SKXPort237]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPDYmr",
|
||||
"VMASKMOVPDmr",
|
||||
"VMASKMOVPSYmr",
|
||||
"VMASKMOVPSmr",
|
||||
"VPMASKMOVDYmr",
|
||||
"VPMASKMOVDmr",
|
||||
"VPMASKMOVQYmr",
|
||||
"VPMASKMOVQmr")>;
|
||||
|
||||
def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
|
@ -1936,8 +1930,6 @@ def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)",
|
|||
"VBROADCASTSSZ128m(b?)",
|
||||
"VINSERTF128rm",
|
||||
"VINSERTI128rm",
|
||||
"VMASKMOVPDrm",
|
||||
"VMASKMOVPSrm",
|
||||
"VMOVAPDZ128rm(b?)",
|
||||
"VMOVAPSZ128rm(b?)",
|
||||
"VMOVDDUPZ128rm(b?)",
|
||||
|
@ -1967,8 +1959,6 @@ def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)",
|
|||
"VPBLENDMWZ128rm(b?)",
|
||||
"VPBROADCASTDZ128m(b?)",
|
||||
"VPBROADCASTQZ128m(b?)",
|
||||
"VPMASKMOVDrm",
|
||||
"VPMASKMOVQrm",
|
||||
"VPSUBBZ128rm(b?)",
|
||||
"(V?)PSUBBrm",
|
||||
"VPSUBDZ128rm(b?)",
|
||||
|
@ -2226,8 +2216,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)",
|
|||
"VINSERTI64x2Z256rm(b?)",
|
||||
"VINSERTI64x2Zrm(b?)",
|
||||
"VINSERTI64x4Zrm(b?)",
|
||||
"VMASKMOVPDYrm",
|
||||
"VMASKMOVPSYrm",
|
||||
"VMOVAPDZ256rm(b?)",
|
||||
"VMOVAPDZrm(b?)",
|
||||
"VMOVAPSZ256rm(b?)",
|
||||
|
@ -2280,8 +2268,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)",
|
|||
"VPBROADCASTDZm(b?)",
|
||||
"VPBROADCASTQZ256m(b?)",
|
||||
"VPBROADCASTQZm(b?)",
|
||||
"VPMASKMOVDYrm",
|
||||
"VPMASKMOVQYrm",
|
||||
"VPSUBBYrm",
|
||||
"VPSUBBZ256rm(b?)",
|
||||
"VPSUBBZrm(b?)",
|
||||
|
|
|
@ -105,9 +105,13 @@ def WriteZero : SchedWrite;
|
|||
defm WriteJump : X86SchedWritePair;
|
||||
|
||||
// Floating point. This covers both scalar and vector operations.
|
||||
def WriteFLoad : SchedWrite;
|
||||
def WriteFStore : SchedWrite;
|
||||
def WriteFMove : SchedWrite;
|
||||
def WriteFLoad : SchedWrite;
|
||||
def WriteFMaskedLoad : SchedWrite;
|
||||
def WriteFMaskedLoadY : SchedWrite;
|
||||
def WriteFStore : SchedWrite;
|
||||
def WriteFMaskedStore : SchedWrite;
|
||||
def WriteFMaskedStoreY : SchedWrite;
|
||||
def WriteFMove : SchedWrite;
|
||||
|
||||
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
|
||||
defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
|
||||
|
@ -183,9 +187,13 @@ defm WritePHAdd : X86SchedWritePair;
|
|||
defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
|
||||
|
||||
// Vector integer operations.
|
||||
def WriteVecLoad : SchedWrite;
|
||||
def WriteVecStore : SchedWrite;
|
||||
def WriteVecMove : SchedWrite;
|
||||
def WriteVecLoad : SchedWrite;
|
||||
def WriteVecMaskedLoad : SchedWrite;
|
||||
def WriteVecMaskedLoadY : SchedWrite;
|
||||
def WriteVecStore : SchedWrite;
|
||||
def WriteVecMaskedStore : SchedWrite;
|
||||
def WriteVecMaskedStoreY : SchedWrite;
|
||||
def WriteVecMove : SchedWrite;
|
||||
|
||||
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
|
||||
defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
|
||||
|
|
|
@ -198,8 +198,14 @@ def : WriteRes<WriteNop, [AtomPort01]>;
|
|||
// Floating point. This covers both scalar and vector operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteFLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteFStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteFLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteFMaskedLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteFMaskedLoadY, [AtomPort0]>;
|
||||
|
||||
def : WriteRes<WriteFStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteFMaskedStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteFMaskedStoreY, [AtomPort0]>;
|
||||
|
||||
def : WriteRes<WriteFMove, [AtomPort01]>;
|
||||
defm : X86WriteRes<WriteEMMS,[AtomPort01], 5, [5], 1>;
|
||||
|
||||
|
@ -282,8 +288,14 @@ def : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
|
|||
// Vector integer operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteVecLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecMaskedLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecMaskedLoadY, [AtomPort0]>;
|
||||
|
||||
def : WriteRes<WriteVecStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>;
|
||||
|
||||
def : WriteRes<WriteVecMove, [AtomPort01]>;
|
||||
|
||||
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
|
||||
|
|
|
@ -311,8 +311,14 @@ def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
|
|||
// Floating point. This covers both scalar and vector operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX]> { let Latency = 5; }
|
||||
def : WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC]>;
|
||||
defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
|
||||
|
||||
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
|
||||
|
||||
def : WriteRes<WriteFMove, [JFPU01, JFPX]>;
|
||||
def : WriteRes<WriteEMMS, [JFPU01, JFPX]> { let Latency = 2; }
|
||||
|
||||
|
@ -434,8 +440,14 @@ def : InstRW<[JWriteCVTSI2FLd], (instregex "(V)?CVTSI(64)?2S(D|S)rm")>;
|
|||
// Vector integer operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU]> { let Latency = 5; }
|
||||
def : WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC]>;
|
||||
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
|
||||
|
||||
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
|
||||
|
||||
def : WriteRes<WriteVecMove, [JFPU01, JVALU]>;
|
||||
|
||||
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
|
||||
|
@ -622,32 +634,6 @@ def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
|||
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
|
||||
VBROADCASTSSYrm)>;
|
||||
|
||||
def JWriteVMaskMovLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [1, 1, 2];
|
||||
}
|
||||
def : InstRW<[JWriteVMaskMovLd], (instrs VMASKMOVPDrm, VMASKMOVPSrm)>;
|
||||
|
||||
def JWriteVMaskMovYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [2, 2, 4];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVMaskMovYLd], (instrs VMASKMOVPDYrm, VMASKMOVPSYrm)>;
|
||||
|
||||
def JWriteVMaskMovSt: SchedWriteRes<[JFPU01, JFPX, JSAGU]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [1, 4, 1];
|
||||
}
|
||||
def : InstRW<[JWriteVMaskMovSt], (instrs VMASKMOVPDmr, VMASKMOVPSmr)>;
|
||||
|
||||
def JWriteVMaskMovYSt: SchedWriteRes<[JFPU01, JFPX, JSAGU]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [2, 4, 2];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVMaskMovYSt], (instrs VMASKMOVPDYmr, VMASKMOVPSYmr)>;
|
||||
|
||||
def JWriteJVZEROALL: SchedWriteRes<[]> {
|
||||
let Latency = 90;
|
||||
let NumMicroOps = 73;
|
||||
|
|
|
@ -125,10 +125,14 @@ defm : SLMWriteResPair<WriteBZHI, [SLM_IEC_RSV0], 1>;
|
|||
defm : SLMWriteResPair<WriteIDiv, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
|
||||
|
||||
// Scalar and vector floating point.
|
||||
def : WriteRes<WriteFStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
|
||||
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
|
||||
def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFMaskedStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFMaskedStoreY, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
|
||||
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
|
||||
|
||||
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
|
||||
|
@ -193,9 +197,13 @@ defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
|
|||
def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||
|
||||
// Vector integer operations.
|
||||
def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
|
||||
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecMaskedStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecMaskedStoreY, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
|
||||
|
||||
defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>;
|
||||
|
|
|
@ -188,9 +188,13 @@ def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
|
|||
}
|
||||
|
||||
// Floating point operations
|
||||
def : WriteRes<WriteFStore, [ZnAGU]>;
|
||||
def : WriteRes<WriteFMove, [ZnFPU]>;
|
||||
def : WriteRes<WriteFLoad, [ZnAGU]> { let Latency = 8; }
|
||||
defm : X86WriteRes<WriteFLoad, [ZnAGU], 8, [1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
|
||||
defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
|
||||
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
|
||||
|
||||
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
|
||||
defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
|
||||
|
@ -260,10 +264,14 @@ defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>;
|
|||
def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
|
||||
|
||||
// Vector integer operations which uses FPU units
|
||||
def : WriteRes<WriteVecStore, [ZnAGU]>;
|
||||
def : WriteRes<WriteVecMove, [ZnFPU]>;
|
||||
def : WriteRes<WriteVecLoad, [ZnAGU]> { let Latency = 8; }
|
||||
def : WriteRes<WriteEMMS, [ZnFPU]> { let Latency = 2; }
|
||||
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>;
|
||||
defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
|
||||
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>;
|
||||
|
||||
defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>;
|
||||
|
@ -1030,11 +1038,8 @@ def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
|
|||
// MASKMOVDQU.
|
||||
def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
|
||||
|
||||
// VPMASKMOVQ.
|
||||
// VPMASKMOVD.
|
||||
// ymm
|
||||
def : InstRW<[ZnWriteFPU01Op2],(instregex "VPMASKMOVQrm")>;
|
||||
def : InstRW<[ZnWriteFPU01Op2Y],(instregex "VPMASKMOVQYrm")>;
|
||||
|
||||
def : InstRW<[WriteMicrocoded],
|
||||
(instregex "VPMASKMOVD(Y?)rm")>;
|
||||
// m, v,v.
|
||||
|
@ -1168,32 +1173,6 @@ def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> {
|
|||
def : InstRW<[ZnWriteVINSERT128r], (instregex "VINSERTF128rr")>;
|
||||
def : InstRW<[ZnWriteVINSERT128Ld], (instregex "VINSERTF128rm")>;
|
||||
|
||||
// VMASKMOVP S/D.
|
||||
// x,x,m.
|
||||
def ZnWriteVMASKMOVPLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
||||
let Latency = 8;
|
||||
}
|
||||
// y,y,m.
|
||||
def ZnWriteVMASKMOVPLdY : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1, 2];
|
||||
}
|
||||
def ZnWriteVMASKMOVPm : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
||||
let Latency = 4;
|
||||
}
|
||||
def : InstRW<[ZnWriteVMASKMOVPLd], (instregex "VMASKMOVP(S|D)rm")>;
|
||||
def : InstRW<[ZnWriteVMASKMOVPLdY], (instregex "VMASKMOVP(S|D)Yrm")>;
|
||||
def : InstRW<[ZnWriteVMASKMOVPm], (instregex "VMASKMOVP(S|D)mr")>;
|
||||
|
||||
// m256,y,y.
|
||||
def ZnWriteVMASKMOVPYmr : SchedWriteRes<[ZnAGU,ZnFPU01]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1, 2];
|
||||
}
|
||||
def : InstRW<[ZnWriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>;
|
||||
|
||||
// VGATHERDPS.
|
||||
// x.
|
||||
def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPSrm")>;
|
||||
|
|
|
@ -3384,8 +3384,8 @@ declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readn
|
|||
define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||
; GENERIC-LABEL: test_pmaskmovd:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [6:0.50]
|
||||
; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
|
||||
; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -3433,8 +3433,8 @@ declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
|
|||
define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
|
||||
; GENERIC-LABEL: test_pmaskmovd_ymm:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [6:0.50]
|
||||
; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
|
||||
; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
|
||||
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -3482,8 +3482,8 @@ declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
|
|||
define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
|
||||
; GENERIC-LABEL: test_pmaskmovq:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [6:0.50]
|
||||
; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
|
||||
; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -3531,8 +3531,8 @@ declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
|
|||
define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
|
||||
; GENERIC-LABEL: test_pmaskmovq_ymm:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [6:0.50]
|
||||
; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
|
||||
; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
|
||||
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue