[SchedModel] Fix for read advance cycles with implicit pseudo operands.

The SchedModel allows the addition of ReadAdvances to express that certain
operands of the instructions are needed at a later point than the others.

RegAlloc may add pseudo operands that are not part of the instruction
descriptor, and therefore cannot have any read advance entries. This meant
that in some cases the desired read advance was nullified by such a pseudo
operand, which still had the original latency.

This patch fixes this by making sure that such pseudo operands get a zero
latency during DAG construction.

Review: Matthias Braun, Ulrich Weigand.
https://reviews.llvm.org/D49671

llvm-svn: 345606
This commit is contained in:
Jonas Paulsson 2018-10-30 15:04:40 +00:00
parent 023b1d19f3
commit 611b533f1d
32 changed files with 278 additions and 234 deletions

View File

@ -234,6 +234,11 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
// Ask the target if address-backscheduling is desirable, and if so how much.
const TargetSubtargetInfo &ST = MF.getSubtarget();
// Only use any non-zero latency for real defs/uses, in contrast to
// "fake" operands added by regalloc.
const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
!DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
Alias.isValid(); ++Alias) {
if (!Uses.contains(*Alias))
@ -257,11 +262,18 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
Dep = SDep(SU, SDep::Data, *Alias);
RegUse = UseSU->getInstr();
}
Dep.setLatency(
SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
UseOp));
const MCInstrDesc *UseMIDesc =
(RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
bool ImplicitPseudoUse =
(UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
!UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp));
ST.adjustSchedDependency(SU, UseSU, Dep);
} else
Dep.setLatency(0);
ST.adjustSchedDependency(SU, UseSU, Dep);
UseSU->addPred(Dep);
}
}

View File

@ -61,11 +61,11 @@ declare void @external_void_func_v16i8(<16 x i8>) #0
; MESA-DAG: s_mov_b64 s[0:1], s[36:37]
; GCN: v_mov_b32_e32 v0, 1{{$}}
; MESA-DAG: s_mov_b64 s[2:3], s[38:39]
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1@rel32@hi+4
; GCN-DAG: v_mov_b32_e32 v0, 1{{$}}
; MESA-DAG: s_mov_b64 s[2:3], s[38:39]
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_endpgm
@ -123,12 +123,12 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; GCN-LABEL: {{^}}test_call_external_void_func_i8_imm:
; MESA-DAG: s_mov_b32 s33, s3{{$}}
; GCN: v_mov_b32_e32 v0, 0x7b
; HSA-DAG: s_mov_b32 s4, s33{{$}}
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8@rel32@hi+4
; GCN-NEXT: v_mov_b32_e32 v0, 0x7b
; HSA-DAG: s_mov_b32 s4, s33{{$}}
; GCN-DAG: s_mov_b32 s32, s33{{$}}
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
@ -144,11 +144,11 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; MESA-DAG: s_mov_b32 s33, s3{{$}}
; GCN-DAG: buffer_load_sbyte v0
; GCN: s_mov_b32 s4, s33
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext@rel32@hi+4
; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s3
; GCN: s_waitcnt vmcnt(0)
@ -165,11 +165,11 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; HSA-DAG: s_mov_b32 s33, s9{{$}}
; GCN-DAG: buffer_load_ubyte v0
; GCN: s_mov_b32 s4, s33
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_zeroext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_zeroext@rel32@hi+4
; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_waitcnt vmcnt(0)
@ -197,11 +197,11 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; MESA-DAG: s_mov_b32 s33, s3{{$}}
; GCN-DAG: buffer_load_sshort v0
; GCN: s_mov_b32 s4, s33
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_signext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_signext@rel32@hi+4
; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_waitcnt vmcnt(0)
@ -218,11 +218,11 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; GCN-DAG: buffer_load_ushort v0
; GCN: s_mov_b32 s4, s33
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_zeroext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_zeroext@rel32@hi+4
; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_waitcnt vmcnt(0)
@ -237,11 +237,11 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; GCN-LABEL: {{^}}test_call_external_void_func_i32_imm:
; MESA-DAG: s_mov_b32 s33, s3{{$}}
; GCN: v_mov_b32_e32 v0, 42
; GCN: s_mov_b32 s4, s33
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i32@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i32@rel32@hi+4
; GCN: v_mov_b32_e32 v0, 42
; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
@ -481,10 +481,10 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; HSA-DAG: s_mov_b32 s33, s9
; MESA-DAG: s_mov_b32 s33, s3{{$}}
; GCN-NOT: v3
; GCN-DAG: v_mov_b32_e32 v0, 3
; GCN-DAG: v_mov_b32_e32 v1, 4
; GCN-DAG: v_mov_b32_e32 v2, 5
; GCN-NOT: v3
; GCN: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {

View File

@ -6,10 +6,10 @@ declare void @external_void_func_void() #0
; GCN-LABEL: {{^}}test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GCN: s_mov_b32 s33, s7
; GCN: s_getpc_b64 s[34:35]
; GCN: s_mov_b32 s4, s33
; GCN-NEXT: s_getpc_b64 s[34:35]
; GCN-NEXT: s_add_u32 s34, s34,
; GCN-NEXT: s_addc_u32 s35, s35,
; GCN-NEXT: s_mov_b32 s4, s33
; GCN-NEXT: s_mov_b32 s32, s33
; GCN: s_swappc_b64 s[30:31], s[34:35]
@ -129,13 +129,13 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(i32 addrspace
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
; GCN: s_mov_b32 s34, s9
; GCN: ; def s33
; GCN-NEXT: #ASMEND
; GCN: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, external_void_func_void@rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, external_void_func_void@rel32@hi+4
; GCN-NEXT: s_mov_b32 s4, s34
; GCN-NEXT: s_mov_b32 s32, s34
; GCN: s_mov_b32 s4, s34
; GCN-DAG: s_mov_b32 s32, s34
; GCN-DAG: ; def s33
; GCN-DAG: #ASMEND
; GCN-DAG: s_getpc_b64 s[6:7]
; GCN-DAG: s_add_u32 s6, s6, external_void_func_void@rel32@lo+4
; GCN-DAG: s_addc_u32 s7, s7, external_void_func_void@rel32@hi+4
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s33
@ -150,13 +150,13 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s33(i32 addrspace(
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v32:
; GCN: s_mov_b32 s33, s9
; GCN: ; def v32
; GCN-NEXT: #ASMEND
; GCN: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, external_void_func_void@rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, external_void_func_void@rel32@hi+4
; GCN-NEXT: s_mov_b32 s4, s33
; GCN-NEXT: s_mov_b32 s32, s33
; GCN: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN-DAG: ; def v32
; GCN-DAG: #ASMEND
; GCN-DAG: s_getpc_b64 s[6:7]
; GCN-DAG: s_add_u32 s6, s6, external_void_func_void@rel32@lo+4
; GCN-DAG: s_addc_u32 s7, s7, external_void_func_void@rel32@hi+4
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use v32
@ -183,10 +183,10 @@ define void @void_func_void_clobber_s33() #2 {
; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s33:
; GCN: s_mov_b32 s33, s7
; GCN: s_getpc_b64
; GCN: s_mov_b32 s4, s33
; GCN-NEXT: s_getpc_b64
; GCN-NEXT: s_add_u32
; GCN-NEXT: s_addc_u32
; GCN-NEXT: s_mov_b32 s4, s33
; GCN-NEXT: s_mov_b32 s32, s33
; GCN: s_swappc_b64
; GCN-NEXT: s_endpgm

View File

@ -558,7 +558,8 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
; GCN: s_mov_b32 s5, s32
; GCN: s_add_u32 s32, s32, 0x400
; GCN-DAG: s_add_u32 s32, s32, 0x400
; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14
; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15

View File

@ -386,9 +386,9 @@ bb2:
; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62
; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT3:[0-9]+]], s[[S_ELT3]]
; GCN: v_mov_b32_e32 v[[VEC_ELT2:[0-9]+]], s{{[0-9]+}}
; GCN: v_mov_b32_e32 v[[VEC_ELT1:[0-9]+]], s{{[0-9]+}}
; GCN: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]
; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT2:[0-9]+]], s{{[0-9]+}}
; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT1:3]], s{{[0-9]+}}
; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]
; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]:
; GCN-NEXT: s_waitcnt vmcnt(0)

View File

@ -186,8 +186,8 @@ entry:
; FIXME: Should not have intermediate sgprs
; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr:
; CHECK: s_mov_b32 s1, 0
; CHECK: s_mov_b32 s0, 0x1e240
; CHECK-DAG: s_mov_b32 s1, 0
; CHECK-DAG: s_mov_b32 s0, 0x1e240
; CHECK: v_mov_b32_e32 v0, s0
; CHECK: v_mov_b32_e32 v1, s1
; CHECK: use v[0:1]

View File

@ -352,7 +352,7 @@ endif:
; GCN-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0x40200000
; GCN-DAG: s_mov_b32 m0, [[SCALEDIDX]]
; GCN: v_movreld_b32_e32 v{{[0-9]+}}, 0
; GCN-DAG: v_movreld_b32_e32 v{{[0-9]+}}, 0
; Increment to next element folded into base register, but FileCheck
; can't do math expressions

View File

@ -26,20 +26,20 @@ body: |
S_ENDPGM
...
# CHECK-LABEL: name: func0
# CHECK: $sgpr10 = S_MOV_B32 5
# CHECK: $sgpr9 = S_MOV_B32 4
# CHECK: $sgpr8 = S_MOV_B32 3
# CHECK: $sgpr33 = S_MOV_B32 killed $sgpr7
# CHECK-DAG: $sgpr10 = S_MOV_B32 5
# CHECK-DAG: $sgpr9 = S_MOV_B32 4
# CHECK-DAG: $sgpr8 = S_MOV_B32 3
# CHECK-DAG: $sgpr33 = S_MOV_B32 killed $sgpr7
# CHECK: $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr8_sgpr9_sgpr10_sgpr11
# CHECK: $sgpr32 = S_MOV_B32 $sgpr33
# CHECK: BUNDLE implicit-def $sgpr6_sgpr7, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $scc {
# CHECK: $sgpr6_sgpr7 = S_GETPC_B64
# CHECK: $sgpr6 = S_ADD_U32 internal $sgpr6, 0, implicit-def $scc
# CHECK: $sgpr7 = S_ADDC_U32 internal $sgpr7, 0, implicit-def $scc, implicit internal $scc
# CHECK: }
# CHECK: $sgpr4 = S_MOV_B32 $sgpr33
# CHECK: $sgpr4 = S_MOV_B32 killed $sgpr33
# CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
# CHECK: $vgpr2 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
# CHECK: $vgpr3 = V_MOV_B32_e32 killed $sgpr11, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
# CHECK: $sgpr32 = S_MOV_B32 killed $sgpr33
# CHECK: S_NOP 0, implicit killed $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
# CHECK: S_ENDPGM

View File

@ -33,8 +33,8 @@ define void @test_func_call_external_void_func_i32_imm() #0 {
; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use:
; GCN: s_waitcnt
; GCN: s_mov_b32 s5, s32
; GCN: s_add_u32 s32, s32, 0x1400{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset
; GCN-DAG: s_add_u32 s32, s32, 0x1400{{$}}
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset
; GCN: s_swappc_b64
; GCN: s_sub_u32 s32, s32, 0x1400{{$}}
; GCN: s_setpc_b64

View File

@ -57,18 +57,18 @@ bb11: ; preds = %bb9
; CHECK-LABEL: {{^}}partially_undef_copy:
; CHECK: v_mov_b32_e32 v5, 5
; CHECK: v_mov_b32_e32 v6, 6
; CHECK-DAG: v_mov_b32_e32 v6, 6
; CHECK: v_mov_b32_e32 v[[OUTPUT_LO:[0-9]+]], v5
; CHECK-DAG: v_mov_b32_e32 v[[OUTPUT_LO:[0-9]+]], v5
; Undef copy
; CHECK: v_mov_b32_e32 v1, v6
; CHECK-DAG: v_mov_b32_e32 v1, v6
; undef copy
; CHECK: v_mov_b32_e32 v2, v7
; CHECK-DAG: v_mov_b32_e32 v2, v7
; CHECK: v_mov_b32_e32 v[[OUTPUT_HI:[0-9]+]], v8
; CHECK: v_mov_b32_e32 v[[OUTPUT_LO]], v6
; CHECK-DAG: v_mov_b32_e32 v[[OUTPUT_HI:[0-9]+]], v8
; CHECK-DAG: v_mov_b32_e32 v[[OUTPUT_LO]], v6
; CHECK: buffer_store_dwordx4 v{{\[}}[[OUTPUT_LO]]:[[OUTPUT_HI]]{{\]}}
define amdgpu_kernel void @partially_undef_copy() #0 {

View File

@ -19,9 +19,9 @@ entry:
; CHECK-LABEL: isel
; CHECK: push {r4, r5, r6, lr}
; CHECK: movw r12, #0
; CHECK: movt r12, #0
; CHECK: movw r4, #{{\d*}}
; CHECK-DAG: movw r12, #0
; CHECK-DAG: movt r12, #0
; CHECK-DAG: movw r4, #{{\d*}}
; CHECK: blx r12
; CHECK: sub.w sp, sp, r4

View File

@ -16,9 +16,9 @@ entry:
; CHECK-DEFAULT-CODE-MODEL: sub.w sp, sp, r4
; CHECK-LARGE-CODE-MODEL: check_watermark:
; CHECK-LARGE-CODE-MODEL: movw r12, :lower16:__chkstk
; CHECK-LARGE-CODE-MODEL: movt r12, :upper16:__chkstk
; CHECK-LARGE-CODE-MODEL: movw r4, #1024
; CHECK-LARGE-CODE-MODEL-DAG: movw r12, :lower16:__chkstk
; CHECK-LARGE-CODE-MODEL-DAG: movt r12, :upper16:__chkstk
; CHECK-LARGE-CODE-MODEL-DAG: movw r4, #1024
; CHECK-LARGE-CODE-MODEL: blx r12
; CHECK-LARGE-CODE-MODEL: sub.w sp, sp, r4

View File

@ -10,9 +10,9 @@ entry:
unreachable
}
; CHECK: movw r0, :lower16:source
; CHECK: movt r0, :upper16:source
; CHECK: movs r1, #0
; CHECK: mov.w r2, #512
; CHECK: movw r0, :lower16:source
; CHECK: movt r0, :upper16:source
; CHECK: memset

View File

@ -162,8 +162,8 @@ define i32 @test_tst_assessment(i32 %a, i32 %b) {
;
; T2-LABEL: test_tst_assessment:
; T2: @ %bb.0:
; T2-NEXT: lsls r1, r1, #31
; T2-NEXT: and r0, r0, #1
; T2-NEXT: lsls r1, r1, #31
; T2-NEXT: it ne
; T2-NEXT: subne r0, #1
; T2-NEXT: bx lr

View File

@ -104,10 +104,10 @@ declare i32 @doSomething(i32, i32*)
; Next BB.
; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
; ARM: subs [[IV]], [[IV]], #1
; THUMB: subs [[IV]], #1
; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
; THUMB-NEXT: add [[SUM]], [[TMP]]
; ARM: add [[SUM]], [[TMP]], [[SUM]]
; THUMB: add [[SUM]], [[TMP]]
; ARM-NEXT: subs [[IV]], [[IV]], #1
; THUMB-NEXT: subs [[IV]], #1
; CHECK-NEXT: bne [[LOOP]]
;
; Next BB.
@ -169,10 +169,10 @@ declare i32 @something(...)
; Next BB.
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body
; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
; ARM: subs [[IV]], [[IV]], #1
; THUMB: subs [[IV]], #1
; ARM: add [[SUM]], [[TMP]], [[SUM]]
; THUMB: add [[SUM]], [[TMP]]
; ARM: subs [[IV]], [[IV]], #1
; THUMB: subs [[IV]], #1
; CHECK-NEXT: bne [[LOOP_LABEL]]
; Next BB.
; CHECK: @ %for.exit
@ -228,10 +228,10 @@ for.end: ; preds = %for.body
; Next BB.
; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
; ARM: subs [[IV]], [[IV]], #1
; THUMB: subs [[IV]], #1
; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
; THUMB-NEXT: add [[SUM]], [[TMP]]
; ARM: add [[SUM]], [[TMP]], [[SUM]]
; THUMB: add [[SUM]], [[TMP]]
; ARM-NEXT: subs [[IV]], [[IV]], #1
; THUMB-NEXT: subs [[IV]], #1
; CHECK-NEXT: bne [[LOOP]]
;
; Next BB.
@ -307,10 +307,10 @@ declare void @somethingElse(...)
; Next BB.
; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
; ARM: subs [[IV]], [[IV]], #1
; THUMB: subs [[IV]], #1
; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
; THUMB-NEXT: add [[SUM]], [[TMP]]
; ARM: add [[SUM]], [[TMP]], [[SUM]]
; THUMB: add [[SUM]], [[TMP]]
; ARM-NEXT: subs [[IV]], [[IV]], #1
; THUMB-NEXT: subs [[IV]], #1
; CHECK-NEXT: bne [[LOOP]]
;
; Next BB.

View File

@ -18,9 +18,9 @@
; CHECK-NEXT: Data
; CHECK-SAME: Latency=3
; CHECK-NEXT: Data
; CHECK-SAME: Latency=3
; CHECK-SAME: Latency=0
; CHECK-NEXT: Data
; CHECK-SAME: Latency=4
; CHECK-SAME: Latency=0
define i32 @bar(i32 %a1, i32 %b1, i32 %c1) minsize optsize {
%1 = load i32, i32* @a, align 4
%2 = load i32, i32* @b, align 4

View File

@ -11,7 +11,7 @@
; CHECK: Data
; CHECK-SAME: Latency=3
; CHECK-NEXT: Data
; CHECK-SAME: Latency=3
; CHECK-SAME: Latency=0
define i32 @foo(i32* %a) nounwind optsize {
entry:

View File

@ -20,9 +20,9 @@
; CHECK-NEXT: Data
; CHECK-SAME: Latency=5
; CHECK-NEXT: Data
; CHECK-SAME: Latency=5
; CHECK-SAME: Latency=0
; CHECK-NEXT: Data
; CHECK-SAME: Latency=6
; CHECK-SAME: Latency=0
define i32 @bar(i32* %iptr) minsize optsize {
%1 = load double, double* @a, align 8
%2 = load double, double* @b, align 8

View File

@ -11,9 +11,9 @@
; CHECK: Data
; CHECK-SAME: Latency=5
; CHECK-NEXT: Data
; CHECK-SAME: Latency=5
; CHECK-SAME: Latency=0
; CHECK-NEXT: Data
; CHECK-SAME: Latency=6
; CHECK-SAME: Latency=0
define double @foo(double* %a) nounwind optsize {
entry:

View File

@ -935,9 +935,9 @@ entry:
; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0
; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}}
; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]]
; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-T32: it eq
; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]]

View File

@ -80,8 +80,8 @@ define double @f7(double %a, double %b) {
; block generated, odds are good that we have close to the ideal code for this:
;
; CHECK-NEON-LABEL: f8:
; CHECK-NEON: movw [[R3:r[0-9]+]], #1123
; CHECK-NEON: adr [[R2:r[0-9]+]], LCPI7_0
; CHECK-NEON: movw [[R3:r[0-9]+]], #1123
; CHECK-NEON-NEXT: cmp r0, [[R3]]
; CHECK-NEON-NEXT: it eq
; CHECK-NEON-NEXT: addeq{{.*}} [[R2]], #4

View File

@ -4,8 +4,8 @@
define void @PR13378() nounwind {
; This was orriginally a crasher trying to schedule the instructions.
; CHECK-LABEL: PR13378:
; CHECK: vld1.32
; CHECK-NEXT: vmov.i32
; CHECK: vmov.i32
; CHECK-NEXT: vld1.32
; CHECK-NEXT: vst1.32
; CHECK-NEXT: vst1.32
; CHECK-NEXT: vmov.f32

View File

@ -39,8 +39,8 @@ define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-LE: vmov r2, r3, [[LD1]]
; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
@ -56,8 +56,8 @@ define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-LE: vmov r2, r3, [[LD1]]
; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
@ -72,11 +72,11 @@ define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-LE: vmov r2, r3, [[LD1]]
; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-BE: vmov r1, r0, [[LD0]]
; CHECK-BE: vmov r3, r2, [[LD1]]
; CHECK-BE: vmov r1, r0, [[LD0]]
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>

View File

@ -324,23 +324,23 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8
; truncate from i32 to i16 and one vmovn.i16 to perform the final truncation for i8.
; CHECK-LABEL: cmpsel_trunc:
; CHECK: @ %bb.0:
; CHECK-NEXT: add r12, sp, #16
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: add r12, sp, #48
; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
; CHECK-NEXT: add r12, sp, #32
; CHECK-NEXT: vcgt.u32 q8, q10, q8
; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
; CHECK-NEXT: vcgt.u32 q9, q10, q9
; CHECK-NEXT: vmov d20, r2, r3
; CHECK-NEXT: vmovn.i32 d17, q8
; CHECK-NEXT: vmovn.i32 d16, q9
; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vmovn.i16 d16, q8
; CHECK-NEXT: vbsl d16, d18, d20
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: add r12, sp, #16
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: add r12, sp, #48
; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
; CHECK-NEXT: add r12, sp, #32
; CHECK-NEXT: vcgt.u32 q8, q10, q8
; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
; CHECK-NEXT: vcgt.u32 q9, q10, q9
; CHECK-NEXT: vmov d20, r2, r3
; CHECK-NEXT: vmovn.i32 d17, q8
; CHECK-NEXT: vmovn.i32 d16, q9
; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vmovn.i16 d16, q8
; CHECK-NEXT: vbsl d16, d18, d20
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%c = icmp ult <8 x i32> %cmp0, %cmp1
%res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1
@ -353,28 +353,28 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8
define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: add r12, sp, #8
; CHECK-NEXT: add lr, sp, #24
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: ldr r12, [sp, #40]
; CHECK-NEXT: vld1.64 {d18, d19}, [lr]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vld1.32 {d18[0]}, [r12:32]
; CHECK-NEXT: vmov.i8 d19, #0x7
; CHECK-NEXT: vmovl.u8 q10, d18
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vneg.s8 d17, d19
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vuzp.8 d16, d20
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vbsl d16, d17, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: add r12, sp, #8
; CHECK-NEXT: add lr, sp, #24
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: ldr r12, [sp, #40]
; CHECK-NEXT: vld1.64 {d18, d19}, [lr]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vld1.32 {d18[0]}, [r12:32]
; CHECK-NEXT: vmov.i8 d19, #0x7
; CHECK-NEXT: vmovl.u8 q10, d18
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vneg.s8 d17, d19
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vuzp.8 d16, d20
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vbsl d16, d17, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
@ -389,22 +389,22 @@ define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: add r12, sp, #16
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmov.i8 d18, #0x7
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vuzp.8 d16, d17
; CHECK-NEXT: vneg.s8 d17, d18
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vbsl d16, d17, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: add r12, sp, #16
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmov.i8 d18, #0x7
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vuzp.8 d16, d17
; CHECK-NEXT: vneg.s8 d17, d18
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vbsl d16, d17, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
@ -417,23 +417,23 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1
define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: add r12, sp, #16
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vldr d18, .LCPI22_0
; CHECK-NEXT: vmov.i8 d19, #0x7
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vtbl.8 d16, {d16}, d18
; CHECK-NEXT: vneg.s8 d17, d19
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vbsl d16, d17, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: add r12, sp, #16
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vldr d18, .LCPI22_0
; CHECK-NEXT: vmov.i8 d19, #0x7
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vtbl.8 d16, {d16}, d18
; CHECK-NEXT: vneg.s8 d17, d19
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vbsl d16, d17, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI22_0:
@ -459,55 +459,55 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
; CHECK-LABEL: vuzp_wide_type:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: add r12, sp, #32
; CHECK-NEXT: add lr, sp, #48
; CHECK-NEXT: vld1.32 {d17[0]}, [r12:32]
; CHECK-NEXT: add r12, sp, #24
; CHECK-NEXT: vld1.32 {d16[0]}, [r12:32]
; CHECK-NEXT: add r12, sp, #56
; CHECK-NEXT: vld1.32 {d19[0]}, [r12:32]
; CHECK-NEXT: ldr r12, [sp, #68]
; CHECK-NEXT: vld1.32 {d18[0]}, [lr:32]
; CHECK-NEXT: add lr, sp, #40
; CHECK-NEXT: vld1.32 {d20[0]}, [lr:32]
; CHECK-NEXT: ldr r4, [r12]
; CHECK-NEXT: vmov.32 d23[0], r4
; CHECK-NEXT: add r4, sp, #64
; CHECK-NEXT: vld1.32 {d24[0]}, [r4:32]
; CHECK-NEXT: add r4, sp, #36
; CHECK-NEXT: vld1.32 {d17[1]}, [r4:32]
; CHECK-NEXT: add r4, sp, #28
; CHECK-NEXT: vcgt.u32 q10, q12, q10
; CHECK-NEXT: vmov.u8 lr, d23[3]
; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
; CHECK-NEXT: add r4, sp, #60
; CHECK-NEXT: vld1.32 {d19[1]}, [r4:32]
; CHECK-NEXT: add r4, sp, #52
; CHECK-NEXT: vld1.32 {d18[1]}, [r4:32]
; CHECK-NEXT: add r4, r12, #4
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmovn.i32 d19, q10
; CHECK-NEXT: vldr d20, .LCPI23_0
; CHECK-NEXT: vmovn.i32 d18, q8
; CHECK-NEXT: vmovn.i16 d22, q9
; CHECK-NEXT: vmov.i8 q9, #0x7
; CHECK-NEXT: vmov.8 d17[0], lr
; CHECK-NEXT: vneg.s8 q9, q9
; CHECK-NEXT: vtbl.8 d16, {d22, d23}, d20
; CHECK-NEXT: vld1.8 {d17[1]}, [r4]
; CHECK-NEXT: add r4, sp, #8
; CHECK-NEXT: vshl.i8 q8, q8, #7
; CHECK-NEXT: vld1.64 {d20, d21}, [r4]
; CHECK-NEXT: vshl.s8 q8, q8, q9
; CHECK-NEXT: vmov d19, r2, r3
; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vbsl q8, q9, q10
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: add r12, sp, #32
; CHECK-NEXT: add lr, sp, #48
; CHECK-NEXT: vld1.32 {d17[0]}, [r12:32]
; CHECK-NEXT: add r12, sp, #24
; CHECK-NEXT: vld1.32 {d16[0]}, [r12:32]
; CHECK-NEXT: add r12, sp, #56
; CHECK-NEXT: vld1.32 {d19[0]}, [r12:32]
; CHECK-NEXT: vld1.32 {d18[0]}, [lr:32]
; CHECK-NEXT: add lr, sp, #40
; CHECK-NEXT: vld1.32 {d20[0]}, [lr:32]
; CHECK-NEXT: ldr r12, [sp, #68]
; CHECK-NEXT: ldr r4, [r12]
; CHECK-NEXT: vmov.32 d23[0], r4
; CHECK-NEXT: add r4, sp, #64
; CHECK-NEXT: vld1.32 {d24[0]}, [r4:32]
; CHECK-NEXT: add r4, sp, #36
; CHECK-NEXT: vcgt.u32 q10, q12, q10
; CHECK-NEXT: vld1.32 {d17[1]}, [r4:32]
; CHECK-NEXT: add r4, sp, #28
; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
; CHECK-NEXT: add r4, sp, #60
; CHECK-NEXT: vld1.32 {d19[1]}, [r4:32]
; CHECK-NEXT: add r4, sp, #52
; CHECK-NEXT: vld1.32 {d18[1]}, [r4:32]
; CHECK-NEXT: add r4, r12, #4
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmovn.i32 d19, q10
; CHECK-NEXT: vmov.u8 lr, d23[3]
; CHECK-NEXT: vldr d20, .LCPI23_0
; CHECK-NEXT: vmovn.i32 d18, q8
; CHECK-NEXT: vmovn.i16 d22, q9
; CHECK-NEXT: vmov.i8 q9, #0x7
; CHECK-NEXT: vneg.s8 q9, q9
; CHECK-NEXT: vmov.8 d17[0], lr
; CHECK-NEXT: vtbl.8 d16, {d22, d23}, d20
; CHECK-NEXT: vld1.8 {d17[1]}, [r4]
; CHECK-NEXT: add r4, sp, #8
; CHECK-NEXT: vshl.i8 q8, q8, #7
; CHECK-NEXT: vld1.64 {d20, d21}, [r4]
; CHECK-NEXT: vshl.s8 q8, q8, q9
; CHECK-NEXT: vmov d19, r2, r3
; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vbsl q8, q9, q10
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI23_0:

View File

@ -0,0 +1,31 @@
# Check that the extra operand for the full register added by RegAlloc does
# not have a latency that interferes with the latency adjustment
# (ReadAdvance) for the MSY register operand.
# RUN: llc %s -mtriple=s390x-linux-gnu -mcpu=z13 -start-before=machine-scheduler \
# RUN: -debug-only=machine-scheduler -o - 2>&1 | FileCheck %s
# REQUIRES: asserts
# CHECK: ScheduleDAGMI::schedule starting
# CHECK: SU(4): renamable $r2l = MSR renamable $r2l(tied-def 0), renamable $r2l
# CHECK: Latency : 6
# CHECK: SU(5): renamable $r2l = MSY renamable $r2l(tied-def 0), renamable $r1d, -4, $noreg, implicit $r2d
# CHECK: Predecessors:
# CHECK: SU(4): Data Latency=2 Reg=$r2l
# CHECK: SU(4): Data Latency=0 Reg=$r2d
---
name: Perl_do_sv_dump
alignment: 4
tracksRegLiveness: true
body: |
bb.0 :
%1:addr64bit = IMPLICIT_DEF
%2:addr64bit = IMPLICIT_DEF
%3:vr64bit = IMPLICIT_DEF
bb.1 :
%2:addr64bit = ALGFI %2, 4294967291, implicit-def dead $cc
%2.subreg_l32:addr64bit = MSR %2.subreg_l32, %2.subreg_l32
%2.subreg_l32:addr64bit = MSY %2.subreg_l32, %1, -4, $noreg
...

View File

@ -88,15 +88,15 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV7-NEXT: orrs r3, r2
; THUMBV7-NEXT: ldr r2, [sp, #80]
; THUMBV7-NEXT: orr.w r1, r1, r4
; THUMBV7-NEXT: orr.w r1, r1, r10
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
; THUMBV7-NEXT: orr.w r1, r1, r10
; THUMBV7-NEXT: orrs.w r7, r2, r11
; THUMBV7-NEXT: orr.w r1, r1, r9
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r7, #1
; THUMBV7-NEXT: orr.w r0, r0, r12
; THUMBV7-NEXT: ands r3, r7
; THUMBV7-NEXT: orr.w r0, r0, r12
; THUMBV7-NEXT: orrs r1, r3
; THUMBV7-NEXT: orrs r0, r1
; THUMBV7-NEXT: orr.w r0, r0, r8

View File

@ -20,11 +20,11 @@ define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
; THUMBV7-NEXT: cmp r5, #0
; THUMBV7-NEXT: and.w r1, r1, r3
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r5, #1
; THUMBV7-NEXT: ands r1, r3
; THUMBV7-NEXT: orrs r1, r5
; THUMBV7-NEXT: cmp.w lr, #0
; THUMBV7-NEXT: orr.w r1, r1, r5
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne.w lr, #1
; THUMBV7-NEXT: orr.w r1, r1, lr

View File

@ -97,8 +97,8 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
; ATOM-NEXT: pushq %r14
; ATOM-NEXT: pushq %rbx
; ATOM-NEXT: ## kill: def $ecx killed $ecx def $rcx
; ATOM-NEXT: movl 4(%rdx), %eax
; ATOM-NEXT: movl (%rdx), %r15d
; ATOM-NEXT: movl 4(%rdx), %eax
; ATOM-NEXT: leaq 20(%rdx), %r14
; ATOM-NEXT: movq _Te0@{{.*}}(%rip), %r9
; ATOM-NEXT: movq _Te1@{{.*}}(%rip), %r8
@ -116,8 +116,8 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
; ATOM-NEXT: movzbl %bl, %eax
; ATOM-NEXT: movl (%r10,%rax,4), %eax
; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d
; ATOM-NEXT: xorl -4(%r14), %r15d
; ATOM-NEXT: xorl (%r9,%rdi,4), %eax
; ATOM-NEXT: xorl -4(%r14), %r15d
; ATOM-NEXT: xorl (%r14), %eax
; ATOM-NEXT: addq $16, %r14
; ATOM-NEXT: LBB0_1: ## %bb
@ -130,14 +130,14 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
; ATOM-NEXT: movzbl %dil, %edi
; ATOM-NEXT: movl (%r8,%rdi,4), %ebx
; ATOM-NEXT: movzbl %r15b, %edi
; ATOM-NEXT: movl (%r10,%rdi,4), %edi
; ATOM-NEXT: xorl (%r9,%rbp,4), %ebx
; ATOM-NEXT: movl (%r10,%rdi,4), %edi
; ATOM-NEXT: xorl -12(%r14), %ebx
; ATOM-NEXT: xorl (%r9,%rax,4), %edi
; ATOM-NEXT: movl %ebx, %eax
; ATOM-NEXT: xorl -8(%r14), %edi
; ATOM-NEXT: shrl $24, %eax
; ATOM-NEXT: movl (%r9,%rax,4), %r15d
; ATOM-NEXT: xorl -8(%r14), %edi
; ATOM-NEXT: testq %r11, %r11
; ATOM-NEXT: movl %edi, %eax
; ATOM-NEXT: jne LBB0_2

View File

@ -41,8 +41,8 @@ define void @t() nounwind {
; YMM-NEXT: movl %esp, %ebp
; YMM-NEXT: andl $-32, %esp
; YMM-NEXT: subl $96, %esp
; YMM-NEXT: vxorps %xmm0, %xmm0, %xmm0
; YMM-NEXT: leal {{[0-9]+}}(%esp), %eax
; YMM-NEXT: vxorps %xmm0, %xmm0, %xmm0
; YMM-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; YMM-NEXT: movl %eax, (%esp)
; YMM-NEXT: vzeroupper

View File

@ -20,9 +20,9 @@ entry:
; On Intel Atom the scheduler moves a movl instruction
; used for the printf call to follow movl 24(%esp), %eax
; ATOM: movl 24(%esp), %eax
; ATOM: movl
; ATOM: movl %eax, 36(%esp)
; ATOM-NOT: movl
; ATOM: movl %eax, 36(%esp)
; ATOM: movl
; ATOM: movl 28(%esp), %ebx
; ATOM-NOT: movl
; ATOM: movl %ebx, 40(%esp)

View File

@ -135,16 +135,16 @@ define i64 @lshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize
;
; BDVER12-LABEL: lshift_cl_optsize:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
; BDVER12-NEXT: retq # sched: [5:1.00]
;
; BTVER2-LABEL: lshift_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
@ -211,16 +211,16 @@ define i64 @rshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize
;
; BDVER12-LABEL: rshift_cl_optsize:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
; BDVER12-NEXT: retq # sched: [5:1.00]
;
; BTVER2-LABEL: rshift_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
; BTVER2-NEXT: retq # sched: [4:1.00]

View File

@ -495,8 +495,8 @@ define void @test_arpl(i16 %a0, i16 *%a1) optsize {
;
; ZNVER1-LABEL: test_arpl:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
; ZNVER1-NEXT: #NO_APP
@ -681,10 +681,10 @@ define void @test_bound(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3) optsize {
; ZNVER1-NEXT: pushl %esi # sched: [1:0.50]
; ZNVER1-NEXT: .cfi_def_cfa_offset 8
; ZNVER1-NEXT: .cfi_offset %esi, -8
; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50]
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [8:0.50]
; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: bound %ax, (%esi) # sched: [100:0.25]
; ZNVER1-NEXT: bound %ecx, (%edx) # sched: [100:0.25]
@ -985,8 +985,8 @@ define void @test_dec16(i16 %a0, i16* %a1) optsize {
;
; ZNVER1-LABEL: test_dec16:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: decw %ax # sched: [1:0.25]
; ZNVER1-NEXT: decw (%ecx) # sched: [5:0.50]
@ -1212,8 +1212,8 @@ define void @test_inc16(i16 %a0, i16* %a1) optsize {
;
; ZNVER1-LABEL: test_inc16:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: incw %ax # sched: [1:0.25]
; ZNVER1-NEXT: incw (%ecx) # sched: [5:0.50]
@ -1949,8 +1949,8 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
;
; ZNVER1-LABEL: test_pop_push_16:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: popw %ax # sched: [8:0.50]
; ZNVER1-NEXT: popw (%ecx) # sched: [5:0.50]