[X86][F16C] Add WriteCvtF2FSt scheduling class
Fixes the classification of VCVTPS2PHmr/VCVTPS2PHYmr which were tagged as WriteCvtF2FLd_WriteRMW (PR36887) llvm-svn: 330737
This commit is contained in:
parent
11b1e8898a
commit
f0945aa0e0
|
@ -7645,47 +7645,43 @@ let Predicates = [HasVLX] in {
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
|
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
|
||||||
X86MemOperand x86memop, X86FoldableSchedWrite sched> {
|
X86MemOperand x86memop> {
|
||||||
defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
|
defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
|
||||||
(ins _src.RC:$src1, i32u8imm:$src2),
|
(ins _src.RC:$src1, i32u8imm:$src2),
|
||||||
"vcvtps2ph", "$src2, $src1", "$src1, $src2",
|
"vcvtps2ph", "$src2, $src1", "$src1, $src2",
|
||||||
(X86cvtps2ph (_src.VT _src.RC:$src1),
|
(X86cvtps2ph (_src.VT _src.RC:$src1),
|
||||||
(i32 imm:$src2)), 0, 0>,
|
(i32 imm:$src2)), 0, 0>,
|
||||||
AVX512AIi8Base, Sched<[sched]>;
|
AVX512AIi8Base, Sched<[WriteCvtF2F]>;
|
||||||
let hasSideEffects = 0, mayStore = 1 in {
|
let hasSideEffects = 0, mayStore = 1 in {
|
||||||
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
|
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
|
||||||
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
|
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
|
||||||
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||||
Sched<[sched.Folded, ReadAfterLd]>;
|
Sched<[WriteCvtF2FSt]>;
|
||||||
def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
|
def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
|
||||||
(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
|
(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
|
||||||
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
|
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
|
||||||
EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
|
EVEX_K, Sched<[WriteCvtF2FSt]>;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
|
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
|
||||||
X86FoldableSchedWrite sched> {
|
|
||||||
let hasSideEffects = 0 in
|
let hasSideEffects = 0 in
|
||||||
defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
|
defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
|
||||||
(outs _dest.RC:$dst),
|
(outs _dest.RC:$dst),
|
||||||
(ins _src.RC:$src1, i32u8imm:$src2),
|
(ins _src.RC:$src1, i32u8imm:$src2),
|
||||||
"vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
|
"vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
|
||||||
EVEX_B, AVX512AIi8Base, Sched<[sched]>;
|
EVEX_B, AVX512AIi8Base, Sched<[WriteCvtF2F]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX512] in {
|
let Predicates = [HasAVX512] in {
|
||||||
defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, WriteCvtF2F>,
|
defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
|
||||||
avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
|
avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
|
||||||
WriteCvtF2F>, EVEX, EVEX_V512,
|
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
|
||||||
EVEX_CD8<32, CD8VH>;
|
|
||||||
let Predicates = [HasVLX] in {
|
let Predicates = [HasVLX] in {
|
||||||
defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
|
defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
|
||||||
WriteCvtF2F>, EVEX, EVEX_V256,
|
EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
|
||||||
EVEX_CD8<32, CD8VH>;
|
defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
|
||||||
defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
|
EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
|
||||||
WriteCvtF2F>, EVEX, EVEX_V128,
|
|
||||||
EVEX_CD8<32, CD8VH>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(store (f64 (extractelt
|
def : Pat<(store (f64 (extractelt
|
||||||
|
|
|
@ -7283,12 +7283,11 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> {
|
||||||
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
[(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
|
[(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
|
||||||
TAPD, VEX, Sched<[WriteCvtF2F]>;
|
TAPD, VEX, Sched<[WriteCvtF2F]>;
|
||||||
let hasSideEffects = 0, mayStore = 1,
|
let hasSideEffects = 0, mayStore = 1 in
|
||||||
SchedRW = [WriteCvtF2FLd, WriteRMW] in
|
|
||||||
def mr : Ii8<0x1D, MRMDestMem, (outs),
|
def mr : Ii8<0x1D, MRMDestMem, (outs),
|
||||||
(ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
|
(ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
|
||||||
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
|
||||||
TAPD, VEX;
|
TAPD, VEX, Sched<[WriteCvtF2FSt]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasF16C, NoVLX] in {
|
let Predicates = [HasF16C, NoVLX] in {
|
||||||
|
|
|
@ -170,6 +170,12 @@ defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vecto
|
||||||
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends.
|
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends.
|
||||||
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
|
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
|
||||||
|
|
||||||
|
def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
|
||||||
|
let Latency = 4;
|
||||||
|
let NumMicroOps = 3;
|
||||||
|
let ResourceCycles = [1,1,1];
|
||||||
|
}
|
||||||
|
|
||||||
// FMA Scheduling helper class.
|
// FMA Scheduling helper class.
|
||||||
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
||||||
|
|
||||||
|
@ -806,8 +812,7 @@ def: InstRW<[BWWriteResGroup44], (instregex "ISTT_FP16m",
|
||||||
"IST_F32m",
|
"IST_F32m",
|
||||||
"IST_FP16m",
|
"IST_FP16m",
|
||||||
"IST_FP32m",
|
"IST_FP32m",
|
||||||
"IST_FP64m",
|
"IST_FP64m")>;
|
||||||
"VCVTPS2PH(Y?)mr")>;
|
|
||||||
|
|
||||||
def BWWriteResGroup45 : SchedWriteRes<[BWPort0156]> {
|
def BWWriteResGroup45 : SchedWriteRes<[BWPort0156]> {
|
||||||
let Latency = 4;
|
let Latency = 4;
|
||||||
|
|
|
@ -169,6 +169,12 @@ defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>;
|
||||||
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>;
|
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>;
|
||||||
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
|
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
|
||||||
|
|
||||||
|
def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
|
||||||
|
let Latency = 5;
|
||||||
|
let NumMicroOps = 4;
|
||||||
|
let ResourceCycles = [1,1,1,1];
|
||||||
|
}
|
||||||
|
|
||||||
// Vector integer operations.
|
// Vector integer operations.
|
||||||
def : WriteRes<WriteVecStore, [HWPort237, HWPort4]>;
|
def : WriteRes<WriteVecStore, [HWPort237, HWPort4]>;
|
||||||
def : WriteRes<WriteVecLoad, [HWPort23]> { let Latency = 5; }
|
def : WriteRes<WriteVecLoad, [HWPort23]> { let Latency = 5; }
|
||||||
|
@ -1823,13 +1829,6 @@ def: InstRW<[HWWriteResGroup84], (instregex "VMASKMOVPD(Y?)mr",
|
||||||
"VPMASKMOVD(Y?)mr",
|
"VPMASKMOVD(Y?)mr",
|
||||||
"VPMASKMOVQ(Y?)mr")>;
|
"VPMASKMOVQ(Y?)mr")>;
|
||||||
|
|
||||||
def HWWriteResGroup85 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> {
|
|
||||||
let Latency = 5;
|
|
||||||
let NumMicroOps = 4;
|
|
||||||
let ResourceCycles = [1,1,1,1];
|
|
||||||
}
|
|
||||||
def: InstRW<[HWWriteResGroup85], (instregex "VCVTPS2PHmr")>;
|
|
||||||
|
|
||||||
def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> {
|
def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> {
|
||||||
let Latency = 10;
|
let Latency = 10;
|
||||||
let NumMicroOps = 4;
|
let NumMicroOps = 4;
|
||||||
|
|
|
@ -155,6 +155,7 @@ defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>;
|
||||||
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
|
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
|
||||||
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1>;
|
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1>;
|
||||||
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
|
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
|
||||||
|
def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; }
|
||||||
|
|
||||||
// Vector integer operations.
|
// Vector integer operations.
|
||||||
def : WriteRes<WriteVecStore, [SBPort23, SBPort4]>;
|
def : WriteRes<WriteVecStore, [SBPort23, SBPort4]>;
|
||||||
|
|
|
@ -167,6 +167,12 @@ defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vec
|
||||||
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
|
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
|
||||||
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
|
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
|
||||||
|
|
||||||
|
def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
|
||||||
|
let Latency = 6;
|
||||||
|
let NumMicroOps = 4;
|
||||||
|
let ResourceCycles = [1,1,1,1];
|
||||||
|
}
|
||||||
|
|
||||||
// FMA Scheduling helper class.
|
// FMA Scheduling helper class.
|
||||||
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
||||||
|
|
||||||
|
@ -1212,13 +1218,6 @@ def SKLWriteResGroup80 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]
|
||||||
}
|
}
|
||||||
def: InstRW<[SKLWriteResGroup80], (instregex "SLDT(16|32|64)r")>;
|
def: InstRW<[SKLWriteResGroup80], (instregex "SLDT(16|32|64)r")>;
|
||||||
|
|
||||||
def SKLWriteResGroup81 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
|
|
||||||
let Latency = 6;
|
|
||||||
let NumMicroOps = 4;
|
|
||||||
let ResourceCycles = [1,1,1,1];
|
|
||||||
}
|
|
||||||
def: InstRW<[SKLWriteResGroup81], (instregex "VCVTPS2PHmr")>;
|
|
||||||
|
|
||||||
def SKLWriteResGroup82 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
|
def SKLWriteResGroup82 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
|
||||||
let Latency = 6;
|
let Latency = 6;
|
||||||
let NumMicroOps = 4;
|
let NumMicroOps = 4;
|
||||||
|
|
|
@ -167,6 +167,12 @@ defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vec
|
||||||
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
|
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
|
||||||
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
|
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
|
||||||
|
|
||||||
|
def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
|
||||||
|
let Latency = 6;
|
||||||
|
let NumMicroOps = 4;
|
||||||
|
let ResourceCycles = [1,1,1,1];
|
||||||
|
}
|
||||||
|
|
||||||
// FMA Scheduling helper class.
|
// FMA Scheduling helper class.
|
||||||
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
||||||
|
|
||||||
|
@ -2340,13 +2346,6 @@ def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]
|
||||||
}
|
}
|
||||||
def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
|
def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
|
||||||
|
|
||||||
def SKXWriteResGroup85 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
|
|
||||||
let Latency = 6;
|
|
||||||
let NumMicroOps = 4;
|
|
||||||
let ResourceCycles = [1,1,1,1];
|
|
||||||
}
|
|
||||||
def: InstRW<[SKXWriteResGroup85], (instregex "VCVTPS2PHmr")>;
|
|
||||||
|
|
||||||
def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
|
def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
|
||||||
let Latency = 6;
|
let Latency = 6;
|
||||||
let NumMicroOps = 4;
|
let NumMicroOps = 4;
|
||||||
|
|
|
@ -131,6 +131,7 @@ def WriteMMXMOVMSK : SchedWrite;
|
||||||
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
|
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
|
||||||
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
|
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
|
||||||
defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
|
defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
|
||||||
|
def WriteCvtF2FSt : SchedWrite; // // Float -> Float + store size conversion.
|
||||||
|
|
||||||
// CRC32 instruction.
|
// CRC32 instruction.
|
||||||
defm WriteCRC32 : X86SchedWritePair;
|
defm WriteCRC32 : X86SchedWritePair;
|
||||||
|
|
|
@ -227,6 +227,7 @@ defm : AtomWriteResPair<WriteFVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE
|
||||||
defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
|
defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
|
||||||
defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
|
defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
|
||||||
defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
|
defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
|
||||||
|
def : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Vector integer operations.
|
// Vector integer operations.
|
||||||
|
|
|
@ -321,6 +321,7 @@ defm : JWriteResFpuPair<WriteFVarShuffle256, [JFPU01, JFPX], 1>; // NOTE: Doesn
|
||||||
defm : JWriteResFpuPair<WriteCvtF2I, [JFPU1, JSTC], 3>; // Float -> Integer.
|
defm : JWriteResFpuPair<WriteCvtF2I, [JFPU1, JSTC], 3>; // Float -> Integer.
|
||||||
defm : JWriteResFpuPair<WriteCvtI2F, [JFPU1, JSTC], 3>; // Integer -> Float.
|
defm : JWriteResFpuPair<WriteCvtI2F, [JFPU1, JSTC], 3>; // Integer -> Float.
|
||||||
defm : JWriteResFpuPair<WriteCvtF2F, [JFPU1, JSTC], 3>; // Float -> Float size conversion.
|
defm : JWriteResFpuPair<WriteCvtF2F, [JFPU1, JSTC], 3>; // Float -> Float size conversion.
|
||||||
|
def : WriteRes<WriteCvtF2FSt, [JFPU1, JSTC, JSAGU]> { let Latency = 4; }
|
||||||
|
|
||||||
def JWriteCVTF2F : SchedWriteRes<[JFPU1, JSTC]> {
|
def JWriteCVTF2F : SchedWriteRes<[JFPU1, JSTC]> {
|
||||||
let Latency = 7;
|
let Latency = 7;
|
||||||
|
@ -491,11 +492,6 @@ def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
|
||||||
// F16C instructions.
|
// F16C instructions.
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
def JWriteCVT3St: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
|
|
||||||
let Latency = 4;
|
|
||||||
}
|
|
||||||
def : InstRW<[JWriteCVT3St], (instrs VCVTPS2PHmr)>;
|
|
||||||
|
|
||||||
def JWriteCVTPS2PHY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
|
def JWriteCVTPS2PHY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
|
||||||
let Latency = 6;
|
let Latency = 6;
|
||||||
let ResourceCycles = [2, 2, 2];
|
let ResourceCycles = [2, 2, 2];
|
||||||
|
|
|
@ -145,6 +145,7 @@ defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
|
||||||
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
|
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
|
||||||
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
|
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
|
||||||
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
|
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
|
||||||
|
def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||||
|
|
||||||
// Vector integer operations.
|
// Vector integer operations.
|
||||||
def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
|
||||||
|
|
|
@ -211,6 +211,7 @@ defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>;
|
||||||
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
|
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
|
||||||
defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU01], 5>;
|
defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU01], 5>;
|
||||||
defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20>;
|
defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20>;
|
||||||
|
def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
|
||||||
|
|
||||||
// Vector integer operations which uses FPU units
|
// Vector integer operations which uses FPU units
|
||||||
def : WriteRes<WriteVecStore, [ZnAGU]>;
|
def : WriteRes<WriteVecStore, [ZnAGU]>;
|
||||||
|
|
|
@ -125,13 +125,13 @@ define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16>
|
||||||
; GENERIC-LABEL: test_vcvtps2ph_128:
|
; GENERIC-LABEL: test_vcvtps2ph_128:
|
||||||
; GENERIC: # %bb.0:
|
; GENERIC: # %bb.0:
|
||||||
; GENERIC-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
|
; GENERIC-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; GENERIC-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
|
; GENERIC-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
|
||||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; IVY-LABEL: test_vcvtps2ph_128:
|
; IVY-LABEL: test_vcvtps2ph_128:
|
||||||
; IVY: # %bb.0:
|
; IVY: # %bb.0:
|
||||||
; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
|
; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
|
; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
|
||||||
; IVY-NEXT: retq # sched: [1:1.00]
|
; IVY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_vcvtps2ph_128:
|
; HASWELL-LABEL: test_vcvtps2ph_128:
|
||||||
|
@ -175,14 +175,14 @@ define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16>
|
||||||
; GENERIC-LABEL: test_vcvtps2ph_256:
|
; GENERIC-LABEL: test_vcvtps2ph_256:
|
||||||
; GENERIC: # %bb.0:
|
; GENERIC: # %bb.0:
|
||||||
; GENERIC-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
|
; GENERIC-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
|
||||||
; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
|
; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
|
||||||
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
|
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
|
||||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; IVY-LABEL: test_vcvtps2ph_256:
|
; IVY-LABEL: test_vcvtps2ph_256:
|
||||||
; IVY: # %bb.0:
|
; IVY: # %bb.0:
|
||||||
; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
|
; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
|
||||||
; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
|
; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
|
||||||
; IVY-NEXT: vzeroupper # sched: [100:0.33]
|
; IVY-NEXT: vzeroupper # sched: [100:0.33]
|
||||||
; IVY-NEXT: retq # sched: [1:1.00]
|
; IVY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
|
|
|
@ -27,9 +27,9 @@ vcvtps2ph $0, %ymm0, (%rax)
|
||||||
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2
|
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2
|
||||||
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2
|
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2
|
||||||
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
|
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
|
||||||
# CHECK-NEXT: 3 8 1.00 * vcvtps2ph $0, %xmm0, (%rax)
|
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
|
||||||
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2
|
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2
|
||||||
# CHECK-NEXT: 3 8 1.00 * vcvtps2ph $0, %ymm0, (%rax)
|
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
|
||||||
|
|
||||||
# CHECK: Resources:
|
# CHECK: Resources:
|
||||||
# CHECK-NEXT: [0] - SBDivider
|
# CHECK-NEXT: [0] - SBDivider
|
||||||
|
@ -43,7 +43,7 @@ vcvtps2ph $0, %ymm0, (%rax)
|
||||||
|
|
||||||
# CHECK: Resource pressure per iteration:
|
# CHECK: Resource pressure per iteration:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
|
||||||
# CHECK-NEXT: - - - 8.00 2.00 - 3.00 3.00
|
# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00
|
||||||
|
|
||||||
# CHECK: Resource pressure by instruction:
|
# CHECK: Resource pressure by instruction:
|
||||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
|
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
|
||||||
|
@ -52,7 +52,7 @@ vcvtps2ph $0, %ymm0, (%rax)
|
||||||
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2
|
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2
|
||||||
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2
|
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2
|
||||||
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2
|
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2
|
||||||
# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 vcvtps2ph $0, %xmm0, (%rax)
|
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
|
||||||
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2
|
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2
|
||||||
# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 vcvtps2ph $0, %ymm0, (%rax)
|
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue