[X86] Split WriteIDiv into div/idiv 8/16/32/64 implementations (PR36930)

I've created the necessary classes but there are still a lot of overrides that need cleaning up.

NOTE: The Znver1 model was missing some div/idiv variants in the instregex patterns and wasn't setting the resource cycles at all in the overrides.
llvm-svn: 331767
This commit is contained in:
Simon Pilgrim 2018-05-08 13:51:45 +00:00
parent 35b613974e
commit 2580554333
13 changed files with 160 additions and 177 deletions

View File

@ -280,70 +280,65 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
// unsigned division/remainder
let hasSideEffects = 1 in { // so that we don't speculatively execute
let SchedRW = [WriteIDiv] in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"div{b}\t$src", []>;
"div{b}\t$src", []>, Sched<[WriteDiv8]>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
"div{w}\t$src", []>, OpSize16;
"div{w}\t$src", []>, Sched<[WriteDiv16]>, OpSize16;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
"div{l}\t$src", []>, OpSize32;
"div{l}\t$src", []>, Sched<[WriteDiv32]>, OpSize32;
// RDX:RAX/r64 = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
"div{q}\t$src", []>;
} // SchedRW
"div{q}\t$src", []>, Sched<[WriteDiv64]>;
let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
"div{b}\t$src", []>, SchedLoadReg<WriteIDivLd>;
"div{b}\t$src", []>, SchedLoadReg<WriteDiv8.Folded>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
"div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDivLd>;
"div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16.Folded>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
"div{l}\t$src", []>, SchedLoadReg<WriteIDivLd>, OpSize32;
"div{l}\t$src", []>, SchedLoadReg<WriteDiv32.Folded>, OpSize32;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
"div{q}\t$src", []>, SchedLoadReg<WriteIDivLd>,
"div{q}\t$src", []>, SchedLoadReg<WriteDiv64.Folded>,
Requires<[In64BitMode]>;
}
// Signed division/remainder.
let SchedRW = [WriteIDiv] in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"idiv{b}\t$src", []>;
"idiv{b}\t$src", []>, Sched<[WriteIDiv8]>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
"idiv{w}\t$src", []>, OpSize16;
"idiv{w}\t$src", []>, Sched<[WriteIDiv16]>, OpSize16;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
"idiv{l}\t$src", []>, OpSize32;
"idiv{l}\t$src", []>, Sched<[WriteIDiv32]>, OpSize32;
// RDX:RAX/r64 = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
"idiv{q}\t$src", []>;
} // SchedRW
"idiv{q}\t$src", []>, Sched<[WriteIDiv64]>;
let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
"idiv{b}\t$src", []>,
SchedLoadReg<WriteIDivLd>;
"idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8.Folded>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
"idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDivLd>;
"idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16.Folded>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
"idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDivLd>;
"idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32.Folded>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
"idiv{q}\t$src", []>, SchedLoadReg<WriteIDivLd>,
"idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64.Folded>,
Requires<[In64BitMode]>;
}
} // hasSideEffects = 0

View File

@ -108,7 +108,16 @@ def : WriteRes<WriteRMW, [BWPort237,BWPort4]>;
// Arithmetic.
defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication.
defm : BWWriteResPair<WriteIDiv, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteIDiv8, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteIDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteIDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteIDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.

View File

@ -141,8 +141,14 @@ defm : HWWriteResPair<WritePOPCNT, [HWPort1], 3>;
defm : HWWriteResPair<WriteBEXTR, [HWPort06,HWPort15], 2, [1,1], 2>;
defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>;
// This is quite rough, latency depends on the dividend.
defm : HWWriteResPair<WriteIDiv, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
defm : HWWriteResPair<WriteDiv8, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
defm : HWWriteResPair<WriteDiv16, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
defm : HWWriteResPair<WriteDiv32, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
defm : HWWriteResPair<WriteDiv64, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
defm : HWWriteResPair<WriteIDiv8, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
defm : HWWriteResPair<WriteIDiv16, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
defm : HWWriteResPair<WriteIDiv32, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
defm : HWWriteResPair<WriteIDiv64, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
// Scalar and vector floating point.
defm : X86WriteRes<WriteFLoad, [HWPort23], 5, [1], 1>;

View File

@ -103,7 +103,16 @@ def : WriteRes<WriteZero, []>;
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
defm : SBWriteResPair<WriteIDiv, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteIDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteIDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : SBWriteResPair<WriteShift, [SBPort05], 1>;

View File

@ -107,7 +107,16 @@ def : WriteRes<WriteRMW, [SKLPort237,SKLPort4]>;
// Arithmetic.
defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op.
defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
defm : SKLWriteResPair<WriteIDiv, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; // Integer division.
defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteDiv32, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteDiv64, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteIDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteIDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteIDiv32, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteIDiv64, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.

View File

@ -107,7 +107,16 @@ def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>;
// Arithmetic.
defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op.
defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
defm : SKXWriteResPair<WriteIDiv, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; // Integer division.
defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteDiv32, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteDiv64, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteIDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteIDiv32, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteIDiv64, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.

View File

@ -78,9 +78,18 @@ defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
defm WriteIMul : X86SchedWritePair; // Integer multiplication.
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
defm WriteIDiv : X86SchedWritePair; // Integer division.
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
// Integer division.
defm WriteDiv8 : X86SchedWritePair;
defm WriteDiv16 : X86SchedWritePair;
defm WriteDiv32 : X86SchedWritePair;
defm WriteDiv64 : X86SchedWritePair;
defm WriteIDiv8 : X86SchedWritePair;
defm WriteIDiv16 : X86SchedWritePair;
defm WriteIDiv32 : X86SchedWritePair;
defm WriteIDiv64 : X86SchedWritePair;
defm WriteBitScan : X86SchedWritePair; // Bit scan forward/reverse.
defm WritePOPCNT : X86SchedWritePair; // Bit population count.
defm WriteLZCNT : X86SchedWritePair; // Leading zero count.

View File

@ -78,7 +78,16 @@ def : WriteRes<WriteRMW, [AtomPort0]>;
defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
defm : AtomWriteResPair<WriteIDiv, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
defm : AtomWriteResPair<WriteDiv32, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
defm : AtomWriteResPair<WriteDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
defm : AtomWriteResPair<WriteIDiv8, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteIDiv16, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteIDiv32, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
defm : AtomWriteResPair<WriteCRC32, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteCMOV, [AtomPort01], [AtomPort0]>;
@ -120,27 +129,6 @@ def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
IMUL64rmi8, IMUL64rmi32)>;
def AtomWriteDiv : SchedWriteRes<[AtomPort01]> {
let Latency = 50;
let ResourceCycles = [50];
}
def : InstRW<[AtomWriteDiv], (instrs DIV8r,
DIV16r, DIV16m,
DIV32r, DIV32m)>;
def AtomWriteDiv8Ld : SchedWriteRes<[AtomPort01]> {
let Latency = 68;
let ResourceCycles = [68];
}
def : InstRW<[AtomWriteDiv8Ld], (instrs DIV8m)>;
def AtomWriteIDiv64 : SchedWriteRes<[AtomPort01]> {
let Latency = 130;
let ResourceCycles = [130];
}
def : InstRW<[AtomWriteIDiv64], (instrs DIV64r, IDIV64r,
DIV64m, IDIV64m)>;
// Bit counts.
defm : AtomWriteResPair<WriteBitScan, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
defm : AtomWriteResPair<WritePOPCNT, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.

View File

@ -155,10 +155,19 @@ def : WriteRes<WriteRMW, [JSAGU]>;
// Arithmetic.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
defm : JWriteResIntPair<WriteIDiv, [JALU1, JDiv], 41, [1, 41], 2>; // Worst case (i64 division)
defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>;
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
defm : JWriteResIntPair<WriteDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
defm : JWriteResIntPair<WriteDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
defm : JWriteResIntPair<WriteIDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteIDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
defm : JWriteResIntPair<WriteIDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
defm : JWriteResIntPair<WriteIDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>;
defm : JWriteResIntPair<WriteCMOV, [JALU01], 1>; // Conditional move.
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
@ -196,43 +205,6 @@ def JWriteIMul64Ld : SchedWriteRes<[JLAGU, JALU1, JMul]> {
def : InstRW<[JWriteIMul64], (instrs MUL64r, IMUL64r)>;
def : InstRW<[JWriteIMul64Ld], (instrs MUL64m, IMUL64m)>;
def JWriteIDiv8 : SchedWriteRes<[JALU1, JDiv]> {
let Latency = 12;
let ResourceCycles = [1, 12];
}
def JWriteIDiv8Ld : SchedWriteRes<[JLAGU, JALU1, JDiv]> {
let Latency = 15;
let ResourceCycles = [1, 1, 12];
}
def : InstRW<[JWriteIDiv8], (instrs DIV8r, IDIV8r)>;
def : InstRW<[JWriteIDiv8Ld], (instrs DIV8m, IDIV8m)>;
def JWriteIDiv16 : SchedWriteRes<[JALU1, JDiv]> {
let Latency = 17;
let ResourceCycles = [1, 17];
let NumMicroOps = 2;
}
def JWriteIDiv16Ld : SchedWriteRes<[JLAGU, JALU1, JDiv]> {
let Latency = 20;
let ResourceCycles = [1, 1, 17];
let NumMicroOps = 2;
}
def : InstRW<[JWriteIDiv16], (instrs DIV16r, IDIV16r)>;
def : InstRW<[JWriteIDiv16Ld], (instrs DIV16m, IDIV16m)>;
def JWriteIDiv32 : SchedWriteRes<[JALU1, JDiv]> {
let Latency = 25;
let ResourceCycles = [1, 25];
let NumMicroOps = 2;
}
def JWriteIDiv32Ld : SchedWriteRes<[JLAGU, JALU1, JDiv]> {
let Latency = 28;
let ResourceCycles = [1, 1, 25];
let NumMicroOps = 2;
}
def : InstRW<[JWriteIDiv32], (instrs DIV32r, IDIV32r)>;
def : InstRW<[JWriteIDiv32Ld], (instrs DIV32m, IDIV32m)>;
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////

View File

@ -121,8 +121,14 @@ defm : SLMWriteResPair<WritePOPCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WriteBEXTR, [SLM_IEC_RSV0], 1>;
defm : SLMWriteResPair<WriteBZHI, [SLM_IEC_RSV0], 1>;
// This is quite rough, latency depends on the dividend.
defm : SLMWriteResPair<WriteIDiv, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
// Scalar and vector floating point.
def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }

View File

@ -172,15 +172,14 @@ defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1>;
defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
// IDIV
def : WriteRes<WriteIDiv, [ZnALU2, ZnDivider]> {
let Latency = 41;
let ResourceCycles = [1, 41];
}
def : WriteRes<WriteIDivLd, [ZnALU2, ZnAGU, ZnDivider]> {
let Latency = 45;
let ResourceCycles = [1, 4, 41];
}
defm : ZnWriteResPair<WriteDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>;
defm : ZnWriteResPair<WriteDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>;
defm : ZnWriteResPair<WriteDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
defm : ZnWriteResPair<WriteDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
defm : ZnWriteResPair<WriteIDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>;
defm : ZnWriteResPair<WriteIDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>;
defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
// IMULH
def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
@ -552,34 +551,6 @@ def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
}
def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
// DIV, IDIV.
// r8.
def ZnWriteDiv8 : SchedWriteRes<[ZnALU2, ZnDivider]> {
let Latency = 15;
}
def : InstRW<[ZnWriteDiv8], (instregex "DIV8r", "IDIV8r")>;
// r16.
def ZnWriteDiv16 : SchedWriteRes<[ZnALU2, ZnDivider]> {
let Latency = 17;
let NumMicroOps = 2;
}
def : InstRW<[ZnWriteDiv16], (instregex "DIV16r", "IDIV16r")>;
// r32.
def ZnWriteDiv32 : SchedWriteRes<[ZnALU2, ZnDivider]> {
let Latency = 25;
let NumMicroOps = 2;
}
def : InstRW<[ZnWriteDiv32], (instregex "DIV32r", "IDIV32r")>;
// r64.
def ZnWriteDiv64 : SchedWriteRes<[ZnALU2, ZnDivider]> {
let Latency = 41;
let NumMicroOps = 2;
}
def : InstRW<[ZnWriteDiv64], (instregex "DIV64r", "IDIV64r")>;
//-- Control transfer instructions --//
// J(E|R)CXZ.

View File

@ -5275,13 +5275,13 @@ define void @test_div(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32
; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50]
; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: divb %dil # sched: [15:1.00]
; ZNVER1-NEXT: divb (%r8) # sched: [45:41.00]
; ZNVER1-NEXT: divw %si # sched: [17:1.00]
; ZNVER1-NEXT: divw (%r9) # sched: [45:41.00]
; ZNVER1-NEXT: divl %edx # sched: [25:1.00]
; ZNVER1-NEXT: divl (%rax) # sched: [45:41.00]
; ZNVER1-NEXT: divq %rcx # sched: [41:1.00]
; ZNVER1-NEXT: divb %dil # sched: [15:15.00]
; ZNVER1-NEXT: divb (%r8) # sched: [19:15.00]
; ZNVER1-NEXT: divw %si # sched: [17:17.00]
; ZNVER1-NEXT: divw (%r9) # sched: [21:17.00]
; ZNVER1-NEXT: divl %edx # sched: [25:25.00]
; ZNVER1-NEXT: divl (%rax) # sched: [29:25.00]
; ZNVER1-NEXT: divq %rcx # sched: [41:41.00]
; ZNVER1-NEXT: divq (%r10) # sched: [45:41.00]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]
@ -5523,13 +5523,13 @@ define void @test_idiv(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32
; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50]
; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: idivb %dil # sched: [15:1.00]
; ZNVER1-NEXT: idivb (%r8) # sched: [45:41.00]
; ZNVER1-NEXT: idivw %si # sched: [17:1.00]
; ZNVER1-NEXT: idivw (%r9) # sched: [45:41.00]
; ZNVER1-NEXT: idivl %edx # sched: [25:1.00]
; ZNVER1-NEXT: idivl (%rax) # sched: [45:41.00]
; ZNVER1-NEXT: idivq %rcx # sched: [41:1.00]
; ZNVER1-NEXT: idivb %dil # sched: [15:15.00]
; ZNVER1-NEXT: idivb (%r8) # sched: [19:15.00]
; ZNVER1-NEXT: idivw %si # sched: [17:17.00]
; ZNVER1-NEXT: idivw (%r9) # sched: [21:17.00]
; ZNVER1-NEXT: idivl %edx # sched: [25:25.00]
; ZNVER1-NEXT: idivl (%rax) # sched: [29:25.00]
; ZNVER1-NEXT: idivq %rcx # sched: [41:41.00]
; ZNVER1-NEXT: idivq (%r10) # sched: [45:41.00]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retq # sched: [1:0.50]

View File

@ -612,22 +612,22 @@ subq (%rax), %rdi
# CHECK-NEXT: 2 5 0.50 * * decl (%rax)
# CHECK-NEXT: 1 1 0.25 decq %rdi
# CHECK-NEXT: 2 5 0.50 * * decq (%rax)
# CHECK-NEXT: 1 15 1.00 * divb %dil
# CHECK-NEXT: 1 45 41.00 * * divb (%rax)
# CHECK-NEXT: 2 17 1.00 * divw %si
# CHECK-NEXT: 1 45 41.00 * * divw (%rax)
# CHECK-NEXT: 2 25 1.00 * divl %edx
# CHECK-NEXT: 1 45 41.00 * * divl (%rax)
# CHECK-NEXT: 2 41 1.00 * divq %rcx
# CHECK-NEXT: 1 45 41.00 * * divq (%rax)
# CHECK-NEXT: 1 15 1.00 * idivb %dil
# CHECK-NEXT: 1 45 41.00 * * idivb (%rax)
# CHECK-NEXT: 2 17 1.00 * idivw %si
# CHECK-NEXT: 1 45 41.00 * * idivw (%rax)
# CHECK-NEXT: 2 25 1.00 * idivl %edx
# CHECK-NEXT: 1 45 41.00 * * idivl (%rax)
# CHECK-NEXT: 2 41 1.00 * idivq %rcx
# CHECK-NEXT: 1 45 41.00 * * idivq (%rax)
# CHECK-NEXT: 1 15 15.00 * divb %dil
# CHECK-NEXT: 2 19 15.00 * * divb (%rax)
# CHECK-NEXT: 2 17 17.00 * divw %si
# CHECK-NEXT: 3 21 17.00 * * divw (%rax)
# CHECK-NEXT: 2 25 25.00 * divl %edx
# CHECK-NEXT: 3 29 25.00 * * divl (%rax)
# CHECK-NEXT: 2 41 41.00 * divq %rcx
# CHECK-NEXT: 3 45 41.00 * * divq (%rax)
# CHECK-NEXT: 1 15 15.00 * idivb %dil
# CHECK-NEXT: 2 19 15.00 * * idivb (%rax)
# CHECK-NEXT: 2 17 17.00 * idivw %si
# CHECK-NEXT: 3 21 17.00 * * idivw (%rax)
# CHECK-NEXT: 2 25 25.00 * idivl %edx
# CHECK-NEXT: 3 29 25.00 * * idivl (%rax)
# CHECK-NEXT: 2 41 41.00 * idivq %rcx
# CHECK-NEXT: 3 45 41.00 * * idivq (%rax)
# CHECK-NEXT: 1 4 1.00 imulb %dil
# CHECK-NEXT: 2 8 1.00 * imulb (%rax)
# CHECK-NEXT: 1 3 1.00 imulw %di
@ -954,7 +954,7 @@ subq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 119.00 119.00 87.00 121.00 103.00 87.00 336.00 - - - - 34.00
# CHECK-NEXT: 107.00 107.00 87.00 121.00 103.00 87.00 392.00 - - - - 34.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@ -1074,22 +1074,22 @@ subq (%rax), %rdi
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - decl (%rax)
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - decq %rdi
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - decq (%rax)
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - divb %dil
# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - divb (%rax)
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - divw %si
# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - divw (%rax)
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - divl %edx
# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - divl (%rax)
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - divq %rcx
# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - divq (%rax)
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - idivb %dil
# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - idivb (%rax)
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - idivw %si
# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - idivw (%rax)
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - idivl %edx
# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - idivl (%rax)
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - idivq %rcx
# CHECK-NEXT: 2.00 2.00 - - 1.00 - 41.00 - - - - - idivq (%rax)
# CHECK-NEXT: - - - - 1.00 - 15.00 - - - - - divb %dil
# CHECK-NEXT: 0.50 0.50 - - 1.00 - 15.00 - - - - - divb (%rax)
# CHECK-NEXT: - - - - 1.00 - 17.00 - - - - - divw %si
# CHECK-NEXT: 0.50 0.50 - - 1.00 - 17.00 - - - - - divw (%rax)
# CHECK-NEXT: - - - - 1.00 - 25.00 - - - - - divl %edx
# CHECK-NEXT: 0.50 0.50 - - 1.00 - 25.00 - - - - - divl (%rax)
# CHECK-NEXT: - - - - 1.00 - 41.00 - - - - - divq %rcx
# CHECK-NEXT: 0.50 0.50 - - 1.00 - 41.00 - - - - - divq (%rax)
# CHECK-NEXT: - - - - 1.00 - 15.00 - - - - - idivb %dil
# CHECK-NEXT: 0.50 0.50 - - 1.00 - 15.00 - - - - - idivb (%rax)
# CHECK-NEXT: - - - - 1.00 - 17.00 - - - - - idivw %si
# CHECK-NEXT: 0.50 0.50 - - 1.00 - 17.00 - - - - - idivw (%rax)
# CHECK-NEXT: - - - - 1.00 - 25.00 - - - - - idivl %edx
# CHECK-NEXT: 0.50 0.50 - - 1.00 - 25.00 - - - - - idivl (%rax)
# CHECK-NEXT: - - - - 1.00 - 41.00 - - - - - idivq %rcx
# CHECK-NEXT: 0.50 0.50 - - 1.00 - 41.00 - - - - - idivq (%rax)
# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 imulb %dil
# CHECK-NEXT: 0.50 0.50 - 1.00 - - - - - - - 1.00 imulb (%rax)
# CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 imulw %di