[X86] Add SchedWriteFRnd fp rounding scheduler classes
Split off from SchedWriteFAdd for fp rounding/bit-manipulation instructions. Fixes an issue on btver2 which only had the ymm version using the JSTC pipe instead of JFPA. llvm-svn: 331515
This commit is contained in:
parent
07e8daa66b
commit
be51b20127
|
@ -7990,7 +7990,7 @@ let Predicates = [HasERI] in {
|
|||
}
|
||||
|
||||
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
|
||||
SchedWriteFAdd.Scl>, T8PD, EVEX_4V;
|
||||
SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
|
||||
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
|
||||
|
||||
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
|
@ -8057,9 +8057,9 @@ let Predicates = [HasERI] in {
|
|||
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
|
||||
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
|
||||
}
|
||||
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFAdd>,
|
||||
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
|
||||
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
|
||||
SchedWriteFAdd>, EVEX;
|
||||
SchedWriteFRnd>, EVEX;
|
||||
|
||||
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
|
||||
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
|
||||
|
@ -8274,12 +8274,12 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
|
||||
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless",
|
||||
SchedWriteFAdd.Scl, f32x_info>,
|
||||
SchedWriteFRnd.Scl, f32x_info>,
|
||||
AVX512AIi8Base, EVEX_4V,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd",
|
||||
SchedWriteFAdd.Scl, f64x_info>,
|
||||
SchedWriteFRnd.Scl, f64x_info>,
|
||||
VEX_W, AVX512AIi8Base, EVEX_4V,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
|
@ -9381,13 +9381,13 @@ multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
|
|||
}
|
||||
|
||||
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
|
||||
X86VReduce, X86VReduceRnd, SchedWriteFAdd, HasDQI>,
|
||||
X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
|
||||
AVX512AIi8Base, EVEX;
|
||||
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
|
||||
X86VRndScale, X86VRndScaleRnd, SchedWriteFAdd, HasAVX512>,
|
||||
X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
|
||||
AVX512AIi8Base, EVEX;
|
||||
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
|
||||
X86VGetMant, X86VGetMantRnd, SchedWriteFAdd, HasAVX512>,
|
||||
X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
|
||||
AVX512AIi8Base, EVEX;
|
||||
|
||||
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
|
||||
|
@ -9407,17 +9407,17 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
|
|||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
|
||||
0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>,
|
||||
0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
|
||||
0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>,
|
||||
0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
|
||||
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>,
|
||||
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
|
||||
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>,
|
||||
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
|
|
|
@ -5529,27 +5529,27 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
let ExeDomain = SSEPackedSingle in {
|
||||
// Intrinsic form
|
||||
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
|
||||
loadv4f32, X86VRndScale, SchedWriteFAdd.XMM>,
|
||||
loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
|
||||
VEX, VEX_WIG;
|
||||
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
|
||||
loadv8f32, X86VRndScale, SchedWriteFAdd.YMM>,
|
||||
loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
|
||||
loadv2f64, X86VRndScale, SchedWriteFAdd.XMM>,
|
||||
loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
|
||||
VEX, VEX_WIG;
|
||||
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
|
||||
loadv4f64, X86VRndScale, SchedWriteFAdd.YMM>,
|
||||
loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
}
|
||||
let Predicates = [HasAVX, NoAVX512] in {
|
||||
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl,
|
||||
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
|
||||
v4f32, v2f64, X86RndScales, 0>,
|
||||
VEX_4V, VEX_LIG, VEX_WIG;
|
||||
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl>,
|
||||
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
|
||||
VEX_4V, VEX_LIG, VEX_WIG;
|
||||
}
|
||||
|
||||
|
@ -5624,15 +5624,15 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
|
||||
memopv4f32, X86VRndScale, SchedWriteFAdd.XMM>;
|
||||
memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
|
||||
memopv2f64, X86VRndScale, SchedWriteFAdd.XMM>;
|
||||
memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>;
|
||||
|
||||
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl>;
|
||||
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
|
||||
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl,
|
||||
defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
|
||||
v4f32, v2f64, X86RndScales>;
|
||||
|
||||
let Predicates = [UseSSE41] in {
|
||||
|
|
|
@ -76,20 +76,20 @@ multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
|||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
|
||||
ssmem, sse_load_f32, SchedWriteFAdd.XMM>;
|
||||
ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
|
||||
defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
|
||||
SchedWriteFAdd.XMM>;
|
||||
SchedWriteFRnd.XMM>;
|
||||
defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
|
||||
SchedWriteFAdd.YMM>;
|
||||
SchedWriteFRnd.YMM>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
|
||||
sdmem, sse_load_f64, SchedWriteFAdd.XMM>;
|
||||
sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
|
||||
defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
|
||||
SchedWriteFAdd.XMM>;
|
||||
SchedWriteFRnd.XMM>;
|
||||
defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
|
||||
SchedWriteFAdd.YMM>;
|
||||
SchedWriteFRnd.YMM>;
|
||||
}
|
||||
|
||||
multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
|
|
@ -176,7 +176,11 @@ defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply
|
|||
defm : BWWriteResPair<WriteDPPD, [BWPort0,BWPort1,BWPort5], 9, [1,1,1], 3, 5>; // Floating point double dot product.
|
||||
defm : BWWriteResPair<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product.
|
||||
defm : BWWriteResPair<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM).
|
||||
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
|
||||
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
|
||||
defm : X86WriteRes<WriteFRnd, [BWPort23], 6, [1], 1>; // Floating point rounding.
|
||||
defm : X86WriteRes<WriteFRndY, [BWPort23], 6, [1], 1>; // Floating point rounding (YMM/ZMM).
|
||||
defm : X86WriteRes<WriteFRndLd, [BWPort1,BWPort23], 11, [2,1], 3>;
|
||||
defm : X86WriteRes<WriteFRndYLd, [BWPort1,BWPort23], 12, [2,1], 3>;
|
||||
defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
|
||||
defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
|
||||
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
|
||||
|
@ -926,11 +930,7 @@ def: InstRW<[BWWriteResGroup58], (instregex "LD_F(32|64|80)m",
|
|||
"VMOVUPDYrm",
|
||||
"VMOVUPSYrm",
|
||||
"VPBROADCASTDYrm",
|
||||
"VPBROADCASTQYrm",
|
||||
"(V?)ROUNDPD(Y?)r",
|
||||
"(V?)ROUNDPS(Y?)r",
|
||||
"(V?)ROUNDSDr",
|
||||
"(V?)ROUNDSSr")>;
|
||||
"VPBROADCASTQYrm")>;
|
||||
|
||||
def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
|
||||
let Latency = 6;
|
||||
|
@ -1405,16 +1405,6 @@ def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> {
|
|||
def: InstRW<[BWWriteResGroup126], (instregex "VRCPPSYr",
|
||||
"VRSQRTPSYr")>;
|
||||
|
||||
def BWWriteResGroup127 : SchedWriteRes<[BWPort1,BWPort23]> {
|
||||
let Latency = 11;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup127], (instregex "(V?)ROUNDPDm",
|
||||
"(V?)ROUNDPSm",
|
||||
"(V?)ROUNDSDm",
|
||||
"(V?)ROUNDSSm")>;
|
||||
|
||||
def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
|
||||
let Latency = 11;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -1458,9 +1448,7 @@ def BWWriteResGroup135 : SchedWriteRes<[BWPort1,BWPort23]> {
|
|||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
|
||||
"VROUNDPDYm",
|
||||
"VROUNDPSYm")>;
|
||||
def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
|
||||
|
||||
def BWWriteResGroup137 : SchedWriteRes<[BWPort0,BWFPDivider]> {
|
||||
let Latency = 11;
|
||||
|
|
|
@ -173,6 +173,10 @@ defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
|
|||
defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
|
||||
defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
|
||||
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
|
||||
defm : X86WriteRes<WriteFRnd, [HWPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteFRndY, [HWPort23], 6, [1], 1>;
|
||||
defm : X86WriteRes<WriteFRndLd, [HWPort1,HWPort23], 12, [2,1], 3>;
|
||||
defm : X86WriteRes<WriteFRndYLd, [HWPort1,HWPort23], 13, [2,1], 3>;
|
||||
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
|
||||
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 6>;
|
||||
|
@ -645,11 +649,7 @@ def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm",
|
|||
"(V?)MOVUPDrm",
|
||||
"(V?)MOVUPSrm",
|
||||
"VPBROADCASTDrm",
|
||||
"VPBROADCASTQrm",
|
||||
"(V?)ROUNDPD(Y?)r",
|
||||
"(V?)ROUNDPS(Y?)r",
|
||||
"(V?)ROUNDSDr",
|
||||
"(V?)ROUNDSSr")>;
|
||||
"VPBROADCASTQrm")>;
|
||||
|
||||
def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> {
|
||||
let Latency = 7;
|
||||
|
@ -1760,19 +1760,7 @@ def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
|
|||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2,1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup103], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
|
||||
"VROUNDPDYm",
|
||||
"VROUNDPSYm")>;
|
||||
|
||||
def HWWriteResGroup103_1 : SchedWriteRes<[HWPort1,HWPort23]> {
|
||||
let Latency = 12;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [2,1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup103_1], (instregex "(V?)ROUNDPDm",
|
||||
"(V?)ROUNDPSm",
|
||||
"(V?)ROUNDSDm",
|
||||
"(V?)ROUNDSSm")>;
|
||||
def: InstRW<[HWWriteResGroup103], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
|
||||
|
||||
def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
|
||||
let Latency = 12;
|
||||
|
|
|
@ -160,6 +160,8 @@ defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
|
|||
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
|
||||
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
|
||||
defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
|
||||
|
@ -1157,11 +1159,7 @@ def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> {
|
|||
def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm",
|
||||
"MMX_CVTTPS2PIirm",
|
||||
"(V?)CVTPS2DQrm",
|
||||
"(V?)CVTTPS2DQrm",
|
||||
"(V?)ROUNDPDm",
|
||||
"(V?)ROUNDPSm",
|
||||
"(V?)ROUNDSDm",
|
||||
"(V?)ROUNDSSm")>;
|
||||
"(V?)CVTTPS2DQrm")>;
|
||||
|
||||
def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> {
|
||||
let Latency = 9;
|
||||
|
|
|
@ -173,6 +173,8 @@ defm : SKLWriteResPair<WriteDPPD, [SKLPort5,SKLPort01], 9, [1,2], 3, 6>; // F
|
|||
defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>; // Floating point single dot product.
|
||||
defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
|
||||
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
|
||||
defm : SKLWriteResPair<WriteFRnd, [SKLPort01], 8, [2], 2, 6>; // Floating point rounding.
|
||||
defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM).
|
||||
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
|
||||
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
|
||||
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
|
||||
|
@ -1335,16 +1337,6 @@ def SKLWriteResGroup103 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort015
|
|||
}
|
||||
def: InstRW<[SKLWriteResGroup103], (instrs LOOP)>;
|
||||
|
||||
def SKLWriteResGroup105 : SchedWriteRes<[SKLPort01]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup105], (instregex "(V?)ROUNDPD(Y?)r",
|
||||
"(V?)ROUNDPS(Y?)r",
|
||||
"(V?)ROUNDSDr",
|
||||
"(V?)ROUNDSSr")>;
|
||||
|
||||
def SKLWriteResGroup106 : SchedWriteRes<[SKLPort0,SKLPort23]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 2;
|
||||
|
@ -1796,16 +1788,6 @@ def SKLWriteResGroup166_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
|
|||
}
|
||||
def: InstRW<[SKLWriteResGroup166_1], (instregex "VDIVPDYrr")>;
|
||||
|
||||
def SKLWriteResGroup168 : SchedWriteRes<[SKLPort23,SKLPort01]> {
|
||||
let Latency = 14;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,2];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDPDm")>;
|
||||
def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDPSm")>;
|
||||
def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDSDm")>;
|
||||
def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDSSm")>;
|
||||
|
||||
def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
|
||||
let Latency = 14;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -1829,14 +1811,6 @@ def: InstRW<[SKLWriteResGroup171], (instregex "DIVR_FPrST0",
|
|||
"DIVR_FST0r",
|
||||
"DIVR_FrST0")>;
|
||||
|
||||
def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort01]> {
|
||||
let Latency = 15;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,2];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm",
|
||||
"VROUNDPSYm")>;
|
||||
|
||||
def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> {
|
||||
let Latency = 15;
|
||||
let NumMicroOps = 10;
|
||||
|
|
|
@ -173,6 +173,8 @@ defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Fl
|
|||
defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; // Floating point single dot product.
|
||||
defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
|
||||
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
|
||||
defm : SKXWriteResPair<WriteFRnd, [SKXPort015], 8, [2], 2, 6>; // Floating point rounding.
|
||||
defm : SKXWriteResPair<WriteFRndY, [SKXPort015], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM).
|
||||
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
|
||||
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
|
||||
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
|
||||
|
@ -2127,24 +2129,6 @@ def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,S
|
|||
}
|
||||
def: InstRW<[SKXWriteResGroup114], (instrs VSCATTERDPSZmr)>;
|
||||
|
||||
def SKXWriteResGroup116 : SchedWriteRes<[SKXPort015]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ128rri",
|
||||
"VRNDSCALEPDZ256rri",
|
||||
"VRNDSCALEPDZrri",
|
||||
"VRNDSCALEPSZ128rri",
|
||||
"VRNDSCALEPSZ256rri",
|
||||
"VRNDSCALEPSZrri",
|
||||
"VRNDSCALESDr",
|
||||
"VRNDSCALESSr",
|
||||
"(V?)ROUNDPD(Y?)r",
|
||||
"(V?)ROUNDPS(Y?)r",
|
||||
"(V?)ROUNDSDr",
|
||||
"(V?)ROUNDSSr")>;
|
||||
|
||||
def SKXWriteResGroup117 : SchedWriteRes<[SKXPort0,SKXPort23]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 2;
|
||||
|
@ -3007,20 +2991,6 @@ def SKXWriteResGroup184_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
|
|||
}
|
||||
def: InstRW<[SKXWriteResGroup184_1], (instregex "VDIVPD(Y|Z256)rr")>;
|
||||
|
||||
def SKXWriteResGroup186 : SchedWriteRes<[SKXPort23,SKXPort015]> {
|
||||
let Latency = 14;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,2];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPDZ128rm(b?)i",
|
||||
"VRNDSCALEPSZ128rm(b?)i",
|
||||
"VRNDSCALESDm(b?)",
|
||||
"VRNDSCALESSm(b?)",
|
||||
"(V?)ROUNDPDm",
|
||||
"(V?)ROUNDPSm",
|
||||
"(V?)ROUNDSDm",
|
||||
"(V?)ROUNDSSm")>;
|
||||
|
||||
def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
|
||||
let Latency = 14;
|
||||
let NumMicroOps = 3;
|
||||
|
@ -3067,18 +3037,6 @@ def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_FPrST0",
|
|||
"DIVR_FST0r",
|
||||
"DIVR_FrST0")>;
|
||||
|
||||
def SKXWriteResGroup192 : SchedWriteRes<[SKXPort23,SKXPort015]> {
|
||||
let Latency = 15;
|
||||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,2];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i",
|
||||
"VRNDSCALEPDZrm(b?)i",
|
||||
"VRNDSCALEPSZ256rm(b?)i",
|
||||
"VRNDSCALEPSZrm(b?)i",
|
||||
"VROUNDPDYm",
|
||||
"VROUNDPSYm")>;
|
||||
|
||||
def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
|
||||
let Latency = 15;
|
||||
let NumMicroOps = 8;
|
||||
|
|
|
@ -123,6 +123,8 @@ defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
|
|||
defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
|
||||
defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
|
||||
defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
|
||||
defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
|
||||
defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM/ZMM).
|
||||
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
|
||||
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
|
||||
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
|
||||
|
@ -258,6 +260,8 @@ def SchedWriteFRcp
|
|||
: X86SchedWriteWidths<WriteFRcp, WriteFRcp, WriteFRcpY, WriteFRcpY>;
|
||||
def SchedWriteFRsqrt
|
||||
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrt, WriteFRsqrtY, WriteFRsqrtY>;
|
||||
def SchedWriteFRnd
|
||||
: X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndY>;
|
||||
def SchedWriteFLogic
|
||||
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>;
|
||||
|
||||
|
|
|
@ -218,6 +218,8 @@ defm : AtomWriteResPair<WriteFDivY, [AtomPort01], [AtomPort01], 34, 34,
|
|||
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
|
||||
defm : AtomWriteResPair<WriteFSqrtY, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
|
||||
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
|
||||
defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFRndY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
|
||||
defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
|
||||
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
|
||||
|
|
|
@ -337,6 +337,8 @@ defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>;
|
|||
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
|
||||
defm : JWriteResYMMPair<WriteFSqrtY, [JFPU1, JFPM], 42, [2, 42], 2>;
|
||||
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResFpuPair<WriteFRnd, [JFPU1, JSTC], 3>;
|
||||
defm : JWriteResYMMPair<WriteFRndY, [JFPU1, JSTC], 3, [2,2], 2>;
|
||||
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
|
||||
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
|
||||
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
|
||||
|
@ -563,8 +565,7 @@ def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> {
|
|||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr,
|
||||
VCVTPS2DQYrr, VCVTTPS2DQYrr,
|
||||
VROUNDPDYr, VROUNDPSYr)>;
|
||||
VCVTPS2DQYrr, VCVTTPS2DQYrr)>;
|
||||
|
||||
def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
|
||||
let Latency = 8;
|
||||
|
@ -572,8 +573,7 @@ def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
|
|||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
|
||||
VCVTPS2DQYrm, VCVTTPS2DQYrm,
|
||||
VROUNDPDYm, VROUNDPSYm)>;
|
||||
VCVTPS2DQYrm, VCVTTPS2DQYrm)>;
|
||||
|
||||
def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
|
||||
let Latency = 2;
|
||||
|
|
|
@ -151,6 +151,8 @@ defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
|
|||
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
|
||||
|
|
|
@ -120,7 +120,8 @@ multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
|
|||
// This multiclass is for folded loads for floating point units.
|
||||
multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
|
||||
list<ProcResourceKind> ExePorts,
|
||||
int Lat, list<int> Res = [], int UOps = 1> {
|
||||
int Lat, list<int> Res = [], int UOps = 1,
|
||||
int LoadLat = 7, int LoadUOps = 0> {
|
||||
// Register variant takes 1-cycle on Execution Port.
|
||||
def : WriteRes<SchedRW, ExePorts> {
|
||||
let Latency = Lat;
|
||||
|
@ -129,11 +130,11 @@ multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
|
|||
}
|
||||
|
||||
// Memory variant also uses a cycle on ZnAGU
|
||||
// adds 7 cycles to the latency.
|
||||
// adds LoadLat cycles to the latency (default = 7).
|
||||
def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
|
||||
let Latency = !add(Lat, 7);
|
||||
let Latency = !add(Lat, LoadLat);
|
||||
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
|
||||
let NumMicroOps = UOps;
|
||||
let NumMicroOps = !add(UOps, LoadUOps);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -208,6 +209,8 @@ defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>;
|
|||
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
|
||||
defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>;
|
||||
defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
|
||||
defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
|
||||
defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
|
||||
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
|
||||
|
@ -1524,20 +1527,6 @@ def ZnWriteVRCPPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|||
}
|
||||
def : InstRW<[ZnWriteVRCPPSLd], (instregex "VRCPPSYm")>;
|
||||
|
||||
// ROUND SS/SD PS/PD.
|
||||
// v,v,i.
|
||||
def ZnWriteROUNDr : SchedWriteRes<[ZnFPU3]> {
|
||||
let Latency = 4;
|
||||
}
|
||||
def : InstRW<[ZnWriteROUNDr], (instregex "(V?)ROUND(S|P)(S|D)(Y?)r")>;
|
||||
|
||||
// v,m,i.
|
||||
def ZnWriteROUNDm : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
||||
let Latency = 11;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(S|P)(S|D)(Y?)m")>;
|
||||
|
||||
// DPPS.
|
||||
// x,x,i / v,v,v,i.
|
||||
def : SchedAlias<WriteDPPS, ZnWriteMicrocoded>;
|
||||
|
|
|
@ -1720,7 +1720,7 @@ vzeroupper
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: 48.00 2.00 - 355.50 907.50 402.00 398.00 381.00 - 43.00 114.00 117.50 117.50 38.00
|
||||
# CHECK-NEXT: 48.00 2.00 - 347.50 907.50 394.00 406.00 381.00 - 43.00 122.00 117.50 117.50 38.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
|
@ -2318,18 +2318,18 @@ vzeroupper
|
|||
# CHECK-NEXT: - - - - 2.00 - 2.00 2.00 - - - - - - vrcpps (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vrcpss (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundpd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundpd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundpd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundpd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vroundpd $1, %ymm0, %ymm2
|
||||
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vroundpd $1, (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundps $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundps $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vroundps $1, %ymm0, %ymm2
|
||||
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vroundps $1, (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundsd $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundss $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundsd $1, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundsd $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundss $1, %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundss $1, (%rax), %xmm1, %xmm2
|
||||
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vrsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vrsqrtps %ymm0, %ymm2
|
||||
|
|
|
@ -270,7 +270,7 @@ roundss $1, (%rax), %xmm2
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: 6.00 - - 37.00 23.00 57.50 42.50 44.00 - 5.00 5.00 32.50 32.50 10.00
|
||||
# CHECK-NEXT: 6.00 - - 29.00 23.00 49.50 50.50 44.00 - 5.00 13.00 32.50 32.50 10.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
|
@ -362,12 +362,12 @@ roundss $1, (%rax), %xmm2
|
|||
# CHECK-NEXT: - - - - - 2.50 0.50 1.00 - - - 0.50 0.50 2.00 pmulld (%rax), %xmm2
|
||||
# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - ptest %xmm0, %xmm1
|
||||
# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - 1.00 - - - - - - ptest (%rax), %xmm1
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundpd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundpd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundps $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundsd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundsd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundss $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundss $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundpd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundpd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundps $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundsd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundsd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundss $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundss $1, (%rax), %xmm2
|
||||
|
||||
|
|
Loading…
Reference in New Issue