[X86][BtVer2] Update latency of mmx horizontal operations

D56777 added +1cy local forwarding penalty for horizontal operations, but this penalty only affects sse2/xmm variants, the mmx variants don't suffer the penalty.

Confirmed with @andreadb

llvm-svn: 351755
This commit is contained in:
Simon Pilgrim 2019-01-21 18:04:25 +00:00
parent fe3a1b56eb
commit 9b73ae96c5
3 changed files with 25 additions and 25 deletions

View File

@ -577,7 +577,7 @@ defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU01, JVALU, JFPU0, JVIMUL], 3, [1,
defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 4>; // +1cy latency.
defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 4, [2,2], 2>; // +1cy latency.
defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 2>; // +1cy latency.
defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 2>; // +1cy latency.
defm : X86WriteResPairUnsupported<WritePHAddY>;

View File

@ -3368,8 +3368,8 @@ define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
;
; BTVER2-LABEL: test_phaddd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [2:0.50]
; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [7:1.00]
; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50]
; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [6:1.00]
; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
@ -3453,8 +3453,8 @@ define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
;
; BTVER2-LABEL: test_phaddsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [2:0.50]
; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [7:1.00]
; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50]
; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [6:1.00]
; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
@ -3538,8 +3538,8 @@ define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
;
; BTVER2-LABEL: test_phaddw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [2:0.50]
; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [7:1.00]
; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50]
; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [6:1.00]
; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
@ -3623,8 +3623,8 @@ define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
;
; BTVER2-LABEL: test_phsubd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [2:0.50]
; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [7:1.00]
; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50]
; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [6:1.00]
; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
@ -3708,8 +3708,8 @@ define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
;
; BTVER2-LABEL: test_phsubsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [2:0.50]
; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [7:1.00]
; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50]
; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [6:1.00]
; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
@ -3793,8 +3793,8 @@ define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
;
; BTVER2-LABEL: test_phsubw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [2:0.50]
; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [7:1.00]
; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50]
; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [6:1.00]
; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;

View File

@ -122,28 +122,28 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 palignr $1, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 phaddd %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * phaddd (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phaddd %mm0, %mm2
# CHECK-NEXT: 1 6 1.00 * phaddd (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 phaddd %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * phaddd (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 phaddsw %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * phaddsw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phaddsw %mm0, %mm2
# CHECK-NEXT: 1 6 1.00 * phaddsw (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 phaddsw %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * phaddsw (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 phaddw %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * phaddw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phaddw %mm0, %mm2
# CHECK-NEXT: 1 6 1.00 * phaddw (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 phaddw %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * phaddw (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 phsubd %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * phsubd (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phsubd %mm0, %mm2
# CHECK-NEXT: 1 6 1.00 * phsubd (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 phsubd %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * phsubd (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 phsubsw %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * phsubsw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phsubsw %mm0, %mm2
# CHECK-NEXT: 1 6 1.00 * phsubsw (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 phsubsw %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * phsubsw (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 phsubw %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * phsubw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 phsubw %mm0, %mm2
# CHECK-NEXT: 1 6 1.00 * phsubw (%rax), %mm2
# CHECK-NEXT: 1 2 0.50 phsubw %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 pmaddubsw %mm0, %mm2