[AtomicExpand] Fix a crash bug when lowering unordered loads to cmpxchg

Add tests for wider atomic loads and stores.  In the process, fix a crasher where we appearently handled unorder stores, but not loads, when lowering to cmpxchg idioms.

llvm-svn: 356482
This commit is contained in:
Philip Reames 2019-03-19 17:20:49 +00:00
parent ae211ece6a
commit 2153c4b828
2 changed files with 213 additions and 24 deletions

View File

@ -430,6 +430,9 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
IRBuilder<> Builder(LI);
AtomicOrdering Order = LI->getOrdering();
if (Order == AtomicOrdering::Unordered)
Order = AtomicOrdering::Monotonic;
Value *Addr = LI->getPointerOperand();
Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
Constant *DummyVal = Constant::getNullValue(Ty);

View File

@ -272,6 +272,192 @@ define void @widen_broadcast_unaligned(i32* %p0, i32 %v) {
ret void
}
define i128 @load_i128(i128* %ptr) {
; CHECK-O0-LABEL: load_i128:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: pushq %rbx
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-O0-NEXT: .cfi_offset %rbx, -16
; CHECK-O0-NEXT: xorl %eax, %eax
; CHECK-O0-NEXT: movl %eax, %ecx
; CHECK-O0-NEXT: movq %rcx, %rax
; CHECK-O0-NEXT: movq %rcx, %rdx
; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
; CHECK-O0-NEXT: lock cmpxchg16b (%rdi)
; CHECK-O0-NEXT: popq %rbx
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: load_i128:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: pushq %rbx
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
; CHECK-O3-NEXT: .cfi_offset %rbx, -16
; CHECK-O3-NEXT: xorl %eax, %eax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: xorl %ecx, %ecx
; CHECK-O3-NEXT: xorl %ebx, %ebx
; CHECK-O3-NEXT: lock cmpxchg16b (%rdi)
; CHECK-O3-NEXT: popq %rbx
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
; CHECK-O3-NEXT: retq
%v = load atomic i128, i128* %ptr unordered, align 16
ret i128 %v
}
define void @store_i128(i128* %ptr, i128 %v) {
; CHECK-O0-LABEL: store_i128:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: pushq %rbx
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-O0-NEXT: .cfi_offset %rbx, -16
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: movq 8(%rdi), %rcx
; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: jmp .LBB16_1
; CHECK-O0-NEXT: .LBB16_1: # %atomicrmw.start
; CHECK-O0-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq %rcx, %rax
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; CHECK-O0-NEXT: lock cmpxchg16b (%rsi)
; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: jne .LBB16_1
; CHECK-O0-NEXT: jmp .LBB16_2
; CHECK-O0-NEXT: .LBB16_2: # %atomicrmw.end
; CHECK-O0-NEXT: popq %rbx
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: store_i128:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: pushq %rbx
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
; CHECK-O3-NEXT: .cfi_offset %rbx, -16
; CHECK-O3-NEXT: movq %rdx, %rcx
; CHECK-O3-NEXT: movq %rsi, %rbx
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq 8(%rdi), %rdx
; CHECK-O3-NEXT: .p2align 4, 0x90
; CHECK-O3-NEXT: .LBB16_1: # %atomicrmw.start
; CHECK-O3-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-O3-NEXT: lock cmpxchg16b (%rdi)
; CHECK-O3-NEXT: jne .LBB16_1
; CHECK-O3-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-O3-NEXT: popq %rbx
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
; CHECK-O3-NEXT: retq
store atomic i128 %v, i128* %ptr unordered, align 16
ret void
}
define i256 @load_i256(i256* %ptr) {
; CHECK-O0-LABEL: load_i256:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: subq $56, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 64
; CHECK-O0-NEXT: movq %rdi, %rax
; CHECK-O0-NEXT: movl $32, %ecx
; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; CHECK-O0-NEXT: xorl %r8d, %r8d
; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: movq %rcx, %rdi
; CHECK-O0-NEXT: movl %r8d, %ecx
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: callq __atomic_load
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
; CHECK-O0-NEXT: movq %rdi, 24(%r9)
; CHECK-O0-NEXT: movq %rsi, 16(%r9)
; CHECK-O0-NEXT: movq %rdx, 8(%r9)
; CHECK-O0-NEXT: movq %rax, (%r9)
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-O0-NEXT: addq $56, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: load_i256:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: pushq %rbx
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
; CHECK-O3-NEXT: subq $32, %rsp
; CHECK-O3-NEXT: .cfi_def_cfa_offset 48
; CHECK-O3-NEXT: .cfi_offset %rbx, -16
; CHECK-O3-NEXT: movq %rdi, %rbx
; CHECK-O3-NEXT: movq %rsp, %rdx
; CHECK-O3-NEXT: movl $32, %edi
; CHECK-O3-NEXT: xorl %ecx, %ecx
; CHECK-O3-NEXT: callq __atomic_load
; CHECK-O3-NEXT: vmovups (%rsp), %ymm0
; CHECK-O3-NEXT: vmovups %ymm0, (%rbx)
; CHECK-O3-NEXT: movq %rbx, %rax
; CHECK-O3-NEXT: addq $32, %rsp
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
; CHECK-O3-NEXT: popq %rbx
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
; CHECK-O3-NEXT: vzeroupper
; CHECK-O3-NEXT: retq
%v = load atomic i256, i256* %ptr unordered, align 16
ret i256 %v
}
define void @store_i256(i256* %ptr, i256 %v) {
; CHECK-O0-LABEL: store_i256:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: subq $40, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 48
; CHECK-O0-NEXT: xorl %eax, %eax
; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %r9
; CHECK-O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movl $32, %ecx
; CHECK-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill
; CHECK-O0-NEXT: movq %rcx, %rdi
; CHECK-O0-NEXT: movq (%rsp), %rsi # 8-byte Reload
; CHECK-O0-NEXT: movq %r9, %rdx
; CHECK-O0-NEXT: movl %eax, %ecx
; CHECK-O0-NEXT: callq __atomic_store
; CHECK-O0-NEXT: addq $40, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: store_i256:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: subq $40, %rsp
; CHECK-O3-NEXT: .cfi_def_cfa_offset 48
; CHECK-O3-NEXT: movq %rdi, %rax
; CHECK-O3-NEXT: movq %r8, {{[0-9]+}}(%rsp)
; CHECK-O3-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-O3-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-O3-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; CHECK-O3-NEXT: movl $32, %edi
; CHECK-O3-NEXT: movq %rax, %rsi
; CHECK-O3-NEXT: xorl %ecx, %ecx
; CHECK-O3-NEXT: callq __atomic_store
; CHECK-O3-NEXT: addq $40, %rsp
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
; CHECK-O3-NEXT: retq
store atomic i256 %v, i256* %ptr unordered, align 16
ret void
}
; Legal if wider type is also atomic (TODO)
define void @vec_store(i32* %p0, <2 x i32> %vec) {
; CHECK-O0-LABEL: vec_store:
@ -621,12 +807,12 @@ define i64 @load_fold_sdiv2(i64* %p, i64 %v2) {
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
; CHECK-O3-NEXT: je .LBB31_1
; CHECK-O3-NEXT: je .LBB35_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rsi
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB31_1:
; CHECK-O3-NEXT: .LBB35_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
@ -652,12 +838,12 @@ define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) {
; CHECK-O3-NEXT: movq %rax, %rdx
; CHECK-O3-NEXT: orq %rcx, %rdx
; CHECK-O3-NEXT: shrq $32, %rdx
; CHECK-O3-NEXT: je .LBB32_1
; CHECK-O3-NEXT: je .LBB36_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rcx
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB32_1:
; CHECK-O3-NEXT: .LBB36_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %ecx
@ -708,12 +894,12 @@ define i64 @load_fold_udiv2(i64* %p, i64 %v2) {
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
; CHECK-O3-NEXT: je .LBB34_1
; CHECK-O3-NEXT: je .LBB38_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rsi
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB34_1:
; CHECK-O3-NEXT: .LBB38_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
@ -740,12 +926,12 @@ define i64 @load_fold_udiv3(i64* %p1, i64* %p2) {
; CHECK-O3-NEXT: movq %rax, %rdx
; CHECK-O3-NEXT: orq %rcx, %rdx
; CHECK-O3-NEXT: shrq $32, %rdx
; CHECK-O3-NEXT: je .LBB35_1
; CHECK-O3-NEXT: je .LBB39_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rcx
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB35_1:
; CHECK-O3-NEXT: .LBB39_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %ecx
@ -805,13 +991,13 @@ define i64 @load_fold_srem2(i64* %p, i64 %v2) {
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
; CHECK-O3-NEXT: je .LBB37_1
; CHECK-O3-NEXT: je .LBB41_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rsi
; CHECK-O3-NEXT: movq %rdx, %rax
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB37_1:
; CHECK-O3-NEXT: .LBB41_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
@ -838,13 +1024,13 @@ define i64 @load_fold_srem3(i64* %p1, i64* %p2) {
; CHECK-O3-NEXT: movq %rax, %rdx
; CHECK-O3-NEXT: orq %rcx, %rdx
; CHECK-O3-NEXT: shrq $32, %rdx
; CHECK-O3-NEXT: je .LBB38_1
; CHECK-O3-NEXT: je .LBB42_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rcx
; CHECK-O3-NEXT: movq %rdx, %rax
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB38_1:
; CHECK-O3-NEXT: .LBB42_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %ecx
@ -902,13 +1088,13 @@ define i64 @load_fold_urem2(i64* %p, i64 %v2) {
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
; CHECK-O3-NEXT: je .LBB40_1
; CHECK-O3-NEXT: je .LBB44_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rsi
; CHECK-O3-NEXT: movq %rdx, %rax
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB40_1:
; CHECK-O3-NEXT: .LBB44_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
@ -936,13 +1122,13 @@ define i64 @load_fold_urem3(i64* %p1, i64* %p2) {
; CHECK-O3-NEXT: movq %rax, %rdx
; CHECK-O3-NEXT: orq %rcx, %rdx
; CHECK-O3-NEXT: shrq $32, %rdx
; CHECK-O3-NEXT: je .LBB41_1
; CHECK-O3-NEXT: je .LBB45_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rcx
; CHECK-O3-NEXT: movq %rdx, %rax
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB41_1:
; CHECK-O3-NEXT: .LBB45_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %ecx
@ -1537,13 +1723,13 @@ define void @rmw_fold_sdiv2(i64* %p, i64 %v) {
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
; CHECK-O3-NEXT: je .LBB70_1
; CHECK-O3-NEXT: je .LBB74_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rsi
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB70_1:
; CHECK-O3-NEXT: .LBB74_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
@ -1599,13 +1785,13 @@ define void @rmw_fold_udiv2(i64* %p, i64 %v) {
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
; CHECK-O3-NEXT: je .LBB72_1
; CHECK-O3-NEXT: je .LBB76_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rsi
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB72_1:
; CHECK-O3-NEXT: .LBB76_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
@ -1677,13 +1863,13 @@ define void @rmw_fold_srem2(i64* %p, i64 %v) {
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
; CHECK-O3-NEXT: je .LBB74_1
; CHECK-O3-NEXT: je .LBB78_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rsi
; CHECK-O3-NEXT: movq %rdx, (%rdi)
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB74_1:
; CHECK-O3-NEXT: .LBB78_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
@ -1748,13 +1934,13 @@ define void @rmw_fold_urem2(i64* %p, i64 %v) {
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
; CHECK-O3-NEXT: je .LBB76_1
; CHECK-O3-NEXT: je .LBB80_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rsi
; CHECK-O3-NEXT: movq %rdx, (%rdi)
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB76_1:
; CHECK-O3-NEXT: .LBB80_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi