[X86] Add SchedWriteFRnd fp rounding scheduler classes

Split off from SchedWriteFAdd for fp rounding/bit-manipulation instructions.

Fixes an issue on btver2 which only had the ymm version using the JSTC pipe instead of JFPA.

llvm-svn: 331515
This commit is contained in:
Simon Pilgrim 2018-05-04 12:59:24 +00:00
parent 07e8daa66b
commit be51b20127
15 changed files with 85 additions and 182 deletions

View File

@ -7990,7 +7990,7 @@ let Predicates = [HasERI] in {
}
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
SchedWriteFAdd.Scl>, T8PD, EVEX_4V;
SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@ -8057,9 +8057,9 @@ let Predicates = [HasERI] in {
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
}
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFAdd>,
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
SchedWriteFAdd>, EVEX;
SchedWriteFRnd>, EVEX;
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
@ -8274,12 +8274,12 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
}
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless",
SchedWriteFAdd.Scl, f32x_info>,
SchedWriteFRnd.Scl, f32x_info>,
AVX512AIi8Base, EVEX_4V,
EVEX_CD8<32, CD8VT1>;
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd",
SchedWriteFAdd.Scl, f64x_info>,
SchedWriteFRnd.Scl, f64x_info>,
VEX_W, AVX512AIi8Base, EVEX_4V,
EVEX_CD8<64, CD8VT1>;
@ -9381,13 +9381,13 @@ multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
}
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
X86VReduce, X86VReduceRnd, SchedWriteFAdd, HasDQI>,
X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, EVEX;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
X86VRndScale, X86VRndScaleRnd, SchedWriteFAdd, HasAVX512>,
X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
X86VGetMant, X86VGetMantRnd, SchedWriteFAdd, HasAVX512>,
X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
@ -9407,17 +9407,17 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>,
0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>,
0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>,
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>,
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
let Predicates = [HasAVX512] in {

View File

@ -5529,27 +5529,27 @@ let Predicates = [HasAVX, NoVLX] in {
let ExeDomain = SSEPackedSingle in {
// Intrinsic form
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
loadv4f32, X86VRndScale, SchedWriteFAdd.XMM>,
loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
loadv8f32, X86VRndScale, SchedWriteFAdd.YMM>,
loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>,
VEX, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
loadv2f64, X86VRndScale, SchedWriteFAdd.XMM>,
loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
loadv4f64, X86VRndScale, SchedWriteFAdd.YMM>,
loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>,
VEX, VEX_L, VEX_WIG;
}
}
let Predicates = [HasAVX, NoAVX512] in {
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl,
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales, 0>,
VEX_4V, VEX_LIG, VEX_WIG;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl>,
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
VEX_4V, VEX_LIG, VEX_WIG;
}
@ -5624,15 +5624,15 @@ let Predicates = [HasAVX, NoVLX] in {
let ExeDomain = SSEPackedSingle in
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
memopv4f32, X86VRndScale, SchedWriteFAdd.XMM>;
memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>;
let ExeDomain = SSEPackedDouble in
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
memopv2f64, X86VRndScale, SchedWriteFAdd.XMM>;
memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>;
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl>;
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl,
defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales>;
let Predicates = [UseSSE41] in {

View File

@ -76,20 +76,20 @@ multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
let ExeDomain = SSEPackedSingle in {
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
ssmem, sse_load_f32, SchedWriteFAdd.XMM>;
ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
SchedWriteFAdd.XMM>;
SchedWriteFRnd.XMM>;
defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
SchedWriteFAdd.YMM>;
SchedWriteFRnd.YMM>;
}
let ExeDomain = SSEPackedDouble in {
defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
sdmem, sse_load_f64, SchedWriteFAdd.XMM>;
sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
SchedWriteFAdd.XMM>;
SchedWriteFRnd.XMM>;
defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
SchedWriteFAdd.YMM>;
SchedWriteFRnd.YMM>;
}
multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,

View File

@ -176,7 +176,11 @@ defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply
defm : BWWriteResPair<WriteDPPD, [BWPort0,BWPort1,BWPort5], 9, [1,1,1], 3, 5>; // Floating point double dot product.
defm : BWWriteResPair<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product.
defm : BWWriteResPair<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM).
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
defm : X86WriteRes<WriteFRnd, [BWPort23], 6, [1], 1>; // Floating point rounding.
defm : X86WriteRes<WriteFRndY, [BWPort23], 6, [1], 1>; // Floating point rounding (YMM/ZMM).
defm : X86WriteRes<WriteFRndLd, [BWPort1,BWPort23], 11, [2,1], 3>;
defm : X86WriteRes<WriteFRndYLd, [BWPort1,BWPort23], 12, [2,1], 3>;
defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
@ -926,11 +930,7 @@ def: InstRW<[BWWriteResGroup58], (instregex "LD_F(32|64|80)m",
"VMOVUPDYrm",
"VMOVUPSYrm",
"VPBROADCASTDYrm",
"VPBROADCASTQYrm",
"(V?)ROUNDPD(Y?)r",
"(V?)ROUNDPS(Y?)r",
"(V?)ROUNDSDr",
"(V?)ROUNDSSr")>;
"VPBROADCASTQYrm")>;
def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 6;
@ -1405,16 +1405,6 @@ def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> {
def: InstRW<[BWWriteResGroup126], (instregex "VRCPPSYr",
"VRSQRTPSYr")>;
def BWWriteResGroup127 : SchedWriteRes<[BWPort1,BWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[BWWriteResGroup127], (instregex "(V?)ROUNDPDm",
"(V?)ROUNDPSm",
"(V?)ROUNDSDm",
"(V?)ROUNDSSm")>;
def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
@ -1458,9 +1448,7 @@ def BWWriteResGroup135 : SchedWriteRes<[BWPort1,BWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
"VROUNDPDYm",
"VROUNDPSYm")>;
def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
def BWWriteResGroup137 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 11;

View File

@ -173,6 +173,10 @@ defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : X86WriteRes<WriteFRnd, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFRndY, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFRndLd, [HWPort1,HWPort23], 12, [2,1], 3>;
defm : X86WriteRes<WriteFRndYLd, [HWPort1,HWPort23], 13, [2,1], 3>;
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 6>;
@ -645,11 +649,7 @@ def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm",
"(V?)MOVUPDrm",
"(V?)MOVUPSrm",
"VPBROADCASTDrm",
"VPBROADCASTQrm",
"(V?)ROUNDPD(Y?)r",
"(V?)ROUNDPS(Y?)r",
"(V?)ROUNDSDr",
"(V?)ROUNDSSr")>;
"VPBROADCASTQrm")>;
def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> {
let Latency = 7;
@ -1760,19 +1760,7 @@ def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[HWWriteResGroup103], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
"VROUNDPDYm",
"VROUNDPSYm")>;
def HWWriteResGroup103_1 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 12;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[HWWriteResGroup103_1], (instregex "(V?)ROUNDPDm",
"(V?)ROUNDPSm",
"(V?)ROUNDSDm",
"(V?)ROUNDSSm")>;
def: InstRW<[HWWriteResGroup103], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 12;

View File

@ -160,6 +160,8 @@ defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
@ -1157,11 +1159,7 @@ def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> {
def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm",
"MMX_CVTTPS2PIirm",
"(V?)CVTPS2DQrm",
"(V?)CVTTPS2DQrm",
"(V?)ROUNDPDm",
"(V?)ROUNDPSm",
"(V?)ROUNDSDm",
"(V?)ROUNDSSm")>;
"(V?)CVTTPS2DQrm")>;
def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 9;

View File

@ -173,6 +173,8 @@ defm : SKLWriteResPair<WriteDPPD, [SKLPort5,SKLPort01], 9, [1,2], 3, 6>; // F
defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>; // Floating point single dot product.
defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
defm : SKLWriteResPair<WriteFRnd, [SKLPort01], 8, [2], 2, 6>; // Floating point rounding.
defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM).
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
@ -1335,16 +1337,6 @@ def SKLWriteResGroup103 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort015
}
def: InstRW<[SKLWriteResGroup103], (instrs LOOP)>;
def SKLWriteResGroup105 : SchedWriteRes<[SKLPort01]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def: InstRW<[SKLWriteResGroup105], (instregex "(V?)ROUNDPD(Y?)r",
"(V?)ROUNDPS(Y?)r",
"(V?)ROUNDSDr",
"(V?)ROUNDSSr")>;
def SKLWriteResGroup106 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 8;
let NumMicroOps = 2;
@ -1796,16 +1788,6 @@ def SKLWriteResGroup166_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
}
def: InstRW<[SKLWriteResGroup166_1], (instregex "VDIVPDYrr")>;
def SKLWriteResGroup168 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let Latency = 14;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDPDm")>;
def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDPSm")>;
def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDSDm")>;
def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDSSm")>;
def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 14;
let NumMicroOps = 3;
@ -1829,14 +1811,6 @@ def: InstRW<[SKLWriteResGroup171], (instregex "DIVR_FPrST0",
"DIVR_FST0r",
"DIVR_FrST0")>;
def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let Latency = 15;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm",
"VROUNDPSYm")>;
def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 15;
let NumMicroOps = 10;

View File

@ -173,6 +173,8 @@ defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Fl
defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; // Floating point single dot product.
defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFRnd, [SKXPort015], 8, [2], 2, 6>; // Floating point rounding.
defm : SKXWriteResPair<WriteFRndY, [SKXPort015], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM).
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
@ -2127,24 +2129,6 @@ def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,S
}
def: InstRW<[SKXWriteResGroup114], (instrs VSCATTERDPSZmr)>;
def SKXWriteResGroup116 : SchedWriteRes<[SKXPort015]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ128rri",
"VRNDSCALEPDZ256rri",
"VRNDSCALEPDZrri",
"VRNDSCALEPSZ128rri",
"VRNDSCALEPSZ256rri",
"VRNDSCALEPSZrri",
"VRNDSCALESDr",
"VRNDSCALESSr",
"(V?)ROUNDPD(Y?)r",
"(V?)ROUNDPS(Y?)r",
"(V?)ROUNDSDr",
"(V?)ROUNDSSr")>;
def SKXWriteResGroup117 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
@ -3007,20 +2991,6 @@ def SKXWriteResGroup184_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
}
def: InstRW<[SKXWriteResGroup184_1], (instregex "VDIVPD(Y|Z256)rr")>;
def SKXWriteResGroup186 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 14;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPDZ128rm(b?)i",
"VRNDSCALEPSZ128rm(b?)i",
"VRNDSCALESDm(b?)",
"VRNDSCALESSm(b?)",
"(V?)ROUNDPDm",
"(V?)ROUNDPSm",
"(V?)ROUNDSDm",
"(V?)ROUNDSSm")>;
def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 14;
let NumMicroOps = 3;
@ -3067,18 +3037,6 @@ def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_FPrST0",
"DIVR_FST0r",
"DIVR_FrST0")>;
def SKXWriteResGroup192 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 15;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i",
"VRNDSCALEPDZrm(b?)i",
"VRNDSCALEPSZ256rm(b?)i",
"VRNDSCALEPSZrm(b?)i",
"VROUNDPDYm",
"VROUNDPSYm")>;
def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 15;
let NumMicroOps = 8;

View File

@ -123,6 +123,8 @@ defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM/ZMM).
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
@ -258,6 +260,8 @@ def SchedWriteFRcp
: X86SchedWriteWidths<WriteFRcp, WriteFRcp, WriteFRcpY, WriteFRcpY>;
def SchedWriteFRsqrt
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrt, WriteFRsqrtY, WriteFRsqrtY>;
def SchedWriteFRnd
: X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndY>;
def SchedWriteFLogic
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>;

View File

@ -218,6 +218,8 @@ defm : AtomWriteResPair<WriteFDivY, [AtomPort01], [AtomPort01], 34, 34,
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFSqrtY, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFRndY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;

View File

@ -337,6 +337,8 @@ defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>;
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResYMMPair<WriteFSqrtY, [JFPU1, JFPM], 42, [2, 42], 2>;
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRnd, [JFPU1, JSTC], 3>;
defm : JWriteResYMMPair<WriteFRndY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
@ -563,8 +565,7 @@ def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> {
let NumMicroOps = 2;
}
def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr,
VCVTPS2DQYrr, VCVTTPS2DQYrr,
VROUNDPDYr, VROUNDPSYr)>;
VCVTPS2DQYrr, VCVTTPS2DQYrr)>;
def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
let Latency = 8;
@ -572,8 +573,7 @@ def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
let NumMicroOps = 2;
}
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
VCVTPS2DQYrm, VCVTTPS2DQYrm,
VROUNDPDYm, VROUNDPSYm)>;
VCVTPS2DQYrm, VCVTTPS2DQYrm)>;
def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
let Latency = 2;

View File

@ -151,6 +151,8 @@ defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;

View File

@ -120,7 +120,8 @@ multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
// This multiclass is for folded loads for floating point units.
multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [], int UOps = 1> {
int Lat, list<int> Res = [], int UOps = 1,
int LoadLat = 7, int LoadUOps = 0> {
// Register variant takes 1-cycle on Execution Port.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
@ -129,11 +130,11 @@ multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
}
// Memory variant also uses a cycle on ZnAGU
// adds 7 cycles to the latency.
// adds LoadLat cycles to the latency (default = 7).
def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
let Latency = !add(Lat, 7);
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = UOps;
let NumMicroOps = !add(UOps, LoadUOps);
}
}
@ -208,6 +209,8 @@ defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
@ -1524,20 +1527,6 @@ def ZnWriteVRCPPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
}
def : InstRW<[ZnWriteVRCPPSLd], (instregex "VRCPPSYm")>;
// ROUND SS/SD PS/PD.
// v,v,i.
def ZnWriteROUNDr : SchedWriteRes<[ZnFPU3]> {
let Latency = 4;
}
def : InstRW<[ZnWriteROUNDr], (instregex "(V?)ROUND(S|P)(S|D)(Y?)r")>;
// v,m,i.
def ZnWriteROUNDm : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let Latency = 11;
let NumMicroOps = 2;
}
def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(S|P)(S|D)(Y?)m")>;
// DPPS.
// x,x,i / v,v,v,i.
def : SchedAlias<WriteDPPS, ZnWriteMicrocoded>;

View File

@ -1720,7 +1720,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: 48.00 2.00 - 355.50 907.50 402.00 398.00 381.00 - 43.00 114.00 117.50 117.50 38.00
# CHECK-NEXT: 48.00 2.00 - 347.50 907.50 394.00 406.00 381.00 - 43.00 122.00 117.50 117.50 38.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@ -2318,18 +2318,18 @@ vzeroupper
# CHECK-NEXT: - - - - 2.00 - 2.00 2.00 - - - - - - vrcpps (%rax), %ymm2
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundpd $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundpd $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vroundpd $1, %ymm0, %ymm2
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vroundpd $1, (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundps $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundps $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundps $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundps $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vroundps $1, %ymm0, %ymm2
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vroundps $1, (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundsd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundsd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundsd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vrsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vrsqrtps %ymm0, %ymm2

View File

@ -270,7 +270,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: 6.00 - - 37.00 23.00 57.50 42.50 44.00 - 5.00 5.00 32.50 32.50 10.00
# CHECK-NEXT: 6.00 - - 29.00 23.00 49.50 50.50 44.00 - 5.00 13.00 32.50 32.50 10.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@ -362,12 +362,12 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - 2.50 0.50 1.00 - - - 0.50 0.50 2.00 pmulld (%rax), %xmm2
# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - ptest %xmm0, %xmm1
# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - 1.00 - - - - - - ptest (%rax), %xmm1
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundpd $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundps $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundps $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundsd $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundss $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundpd $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundps $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundps $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundsd $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundss $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundss $1, (%rax), %xmm2