[X86] Add test case to demonstrate failure to fold the address computation of a simple gather from a global array. NFC

llvm-svn: 317905
This commit is contained in:
Craig Topper 2017-11-10 18:48:18 +00:00
parent 89765acc6c
commit cad1c95b31
1 changed files with 226 additions and 2 deletions

View File

@ -6,6 +6,8 @@
; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR ; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR
; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null ; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null
@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16
; SCALAR-LABEL: test1 ; SCALAR-LABEL: test1
; SCALAR: extractelement <16 x float*> ; SCALAR: extractelement <16 x float*>
; SCALAR-NEXT: load float ; SCALAR-NEXT: load float
@ -1563,8 +1565,181 @@ define <16 x float> @test29(float* %base, <16 x i32> %ind) {
; Check non-power-of-2 case. It should be scalarized. ; Check non-power-of-2 case. It should be scalarized.
declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>) declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
; ALL-LABEL: test30 ; KNL_64-LABEL: test30:
; ALL-NOT: gather ; KNL_64: # BB#0:
; KNL_64-NEXT: kmovw %edx, %k0
; KNL_64-NEXT: kmovw %esi, %k2
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
; KNL_64-NEXT: testb $1, %dil
; KNL_64-NEXT: # implicit-def: %XMM0
; KNL_64-NEXT: je .LBB30_2
; KNL_64-NEXT: # BB#1: # %cond.load
; KNL_64-NEXT: vmovq %xmm1, %rax
; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL_64-NEXT: .LBB30_2: # %else
; KNL_64-NEXT: kmovw %edi, %k1
; KNL_64-NEXT: kshiftlw $15, %k2, %k2
; KNL_64-NEXT: kshiftrw $15, %k2, %k2
; KNL_64-NEXT: kmovw %k2, %eax
; KNL_64-NEXT: testb $1, %al
; KNL_64-NEXT: je .LBB30_4
; KNL_64-NEXT: # BB#3: # %cond.load1
; KNL_64-NEXT: vpextrq $1, %xmm1, %rax
; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
; KNL_64-NEXT: .LBB30_4: # %else2
; KNL_64-NEXT: kshiftlw $15, %k0, %k0
; KNL_64-NEXT: kshiftrw $15, %k0, %k0
; KNL_64-NEXT: kmovw %k0, %eax
; KNL_64-NEXT: testb $1, %al
; KNL_64-NEXT: je .LBB30_6
; KNL_64-NEXT: # BB#5: # %cond.load4
; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
; KNL_64-NEXT: vmovq %xmm1, %rax
; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
; KNL_64-NEXT: .LBB30_6: # %else5
; KNL_64-NEXT: kmovw %k2, %eax
; KNL_64-NEXT: kshiftlw $15, %k1, %k1
; KNL_64-NEXT: kshiftrw $15, %k1, %k1
; KNL_64-NEXT: kmovw %k1, %ecx
; KNL_64-NEXT: vmovd %ecx, %xmm1
; KNL_64-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL_64-NEXT: kmovw %k0, %eax
; KNL_64-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test30:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: kmovw %eax, %k0
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: kmovw %eax, %k2
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: # implicit-def: %XMM0
; KNL_32-NEXT: je .LBB30_2
; KNL_32-NEXT: # BB#1: # %cond.load
; KNL_32-NEXT: vmovd %xmm1, %ecx
; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL_32-NEXT: .LBB30_2: # %else
; KNL_32-NEXT: kmovw %eax, %k1
; KNL_32-NEXT: kshiftlw $15, %k2, %k2
; KNL_32-NEXT: kshiftrw $15, %k2, %k2
; KNL_32-NEXT: kmovw %k2, %eax
; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: je .LBB30_4
; KNL_32-NEXT: # BB#3: # %cond.load1
; KNL_32-NEXT: vpextrd $1, %xmm1, %eax
; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
; KNL_32-NEXT: .LBB30_4: # %else2
; KNL_32-NEXT: kshiftlw $15, %k0, %k0
; KNL_32-NEXT: kshiftrw $15, %k0, %k0
; KNL_32-NEXT: kmovw %k0, %eax
; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: je .LBB30_6
; KNL_32-NEXT: # BB#5: # %cond.load4
; KNL_32-NEXT: vpextrd $2, %xmm1, %eax
; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
; KNL_32-NEXT: .LBB30_6: # %else5
; KNL_32-NEXT: kmovw %k2, %eax
; KNL_32-NEXT: kshiftlw $15, %k1, %k1
; KNL_32-NEXT: kshiftrw $15, %k1, %k1
; KNL_32-NEXT: kmovw %k1, %ecx
; KNL_32-NEXT: vmovd %ecx, %xmm1
; KNL_32-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL_32-NEXT: kmovw %k0, %eax
; KNL_32-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test30:
; SKX: # BB#0:
; SKX-NEXT: vpslld $31, %xmm2, %xmm2
; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1
; SKX-NEXT: kshiftlw $15, %k1, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
; SKX-NEXT: testb $1, %al
; SKX-NEXT: # implicit-def: %XMM0
; SKX-NEXT: je .LBB30_2
; SKX-NEXT: # BB#1: # %cond.load
; SKX-NEXT: vmovq %xmm1, %rax
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SKX-NEXT: .LBB30_2: # %else
; SKX-NEXT: kshiftlw $14, %k1, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: je .LBB30_4
; SKX-NEXT: # BB#3: # %cond.load1
; SKX-NEXT: vpextrq $1, %xmm1, %rax
; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
; SKX-NEXT: .LBB30_4: # %else2
; SKX-NEXT: kshiftlw $13, %k1, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: je .LBB30_6
; SKX-NEXT: # BB#5: # %cond.load4
; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1
; SKX-NEXT: vmovq %xmm1, %rax
; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
; SKX-NEXT: .LBB30_6: # %else5
; SKX-NEXT: vmovdqa32 %xmm0, %xmm3 {%k1}
; SKX-NEXT: vmovdqa %xmm3, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
;
; SKX_32-LABEL: test30:
; SKX_32: # BB#0:
; SKX_32-NEXT: subl $12, %esp
; SKX_32-NEXT: .cfi_def_cfa_offset 16
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1
; SKX_32-NEXT: kshiftlw $15, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
; SKX_32-NEXT: testb $1, %al
; SKX_32-NEXT: # implicit-def: %XMM1
; SKX_32-NEXT: je .LBB30_2
; SKX_32-NEXT: # BB#1: # %cond.load
; SKX_32-NEXT: vmovd %xmm2, %eax
; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SKX_32-NEXT: .LBB30_2: # %else
; SKX_32-NEXT: kshiftlw $14, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
; SKX_32-NEXT: testb $1, %al
; SKX_32-NEXT: je .LBB30_4
; SKX_32-NEXT: # BB#3: # %cond.load1
; SKX_32-NEXT: vpextrd $1, %xmm2, %eax
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: .LBB30_4: # %else2
; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
; SKX_32-NEXT: kshiftlw $13, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
; SKX_32-NEXT: testb $1, %al
; SKX_32-NEXT: je .LBB30_6
; SKX_32-NEXT: # BB#5: # %cond.load4
; SKX_32-NEXT: vpextrd $2, %xmm2, %eax
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: .LBB30_6: # %else5
; SKX_32-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
; SKX_32-NEXT: addl $12, %esp
; SKX_32-NEXT: retl
%sext_ind = sext <3 x i32> %ind to <3 x i64> %sext_ind = sext <3 x i32> %ind to <3 x i64>
%gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind
@ -2151,3 +2326,52 @@ define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i6
ret <4 x i64> %b ret <4 x i64> %b
} }
declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>) declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
define <8 x i32> @test_global_array(<8 x i64> %indxs) {
; KNL_64-LABEL: test_global_array:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpsllq $2, %zmm0, %zmm0
; KNL_64-NEXT: movl $glob_array, %eax
; KNL_64-NEXT: vpbroadcastq %rax, %zmm1
; KNL_64-NEXT: vpaddq %zmm0, %zmm1, %zmm1
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test_global_array:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpmovqd %zmm0, %ymm0
; KNL_32-NEXT: vpslld $2, %ymm0, %ymm0
; KNL_32-NEXT: movl $glob_array, %eax
; KNL_32-NEXT: vmovd %eax, %xmm1
; KNL_32-NEXT: vpbroadcastd %xmm1, %ymm1
; KNL_32-NEXT: vpaddd %ymm0, %ymm1, %ymm0
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; KNL_32-NEXT: retl
;
; SKX-LABEL: test_global_array:
; SKX: # BB#0:
; SKX-NEXT: vpsllq $2, %zmm0, %zmm0
; SKX-NEXT: movl $glob_array, %eax
; SKX-NEXT: vpbroadcastq %rax, %zmm1
; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm1
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test_global_array:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl $glob_array, %eax
; SKX_32-NEXT: vpbroadcastd %eax, %ymm1
; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
; SKX_32-NEXT: vpslld $2, %ymm0, %ymm0
; SKX_32-NEXT: vpaddd %ymm0, %ymm1, %ymm1
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1}
; SKX_32-NEXT: retl
%p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <8 x i64> %indxs
%g = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
ret <8 x i32> %g
}