[X86][SKX][KNL] Updated regression tests to use -mattr instead of -mcpu flag.NFC.

NFC.
 Updated 8 regression tests to use -mattr instead of -mcpu flag as follows:
 -mcpu=knl --> -mattr=+avx512f
 -mcpu=skx --> -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq

The updates are as part of the preparation of a large commit to add all instruction scheduling for the SKX target.

Reviewers: delena, zvi, RKSimon
Differential Revision: https://reviews.llvm.org/D38222

Change-Id: I2381c9b5bb75ecacfca017243c22d054f6eddd14
llvm-svn: 314306
This commit is contained in:
Gadi Haber 2017-09-27 14:44:15 +00:00
parent eb7a0bf847
commit 87337a2bb9
8 changed files with 32735 additions and 31384 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=CHECK --check-prefix=KNL %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=CHECK --check-prefix=SKX --check-prefix=SKX_ONLY %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=avx512vbmi | FileCheck --check-prefix=CHECK --check-prefix=SKX --check-prefix=SKX_VBMI %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=KNL %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=SKX --check-prefix=SKX_ONLY %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq,+avx512vbmi | FileCheck --check-prefix=CHECK --check-prefix=SKX --check-prefix=SKX_VBMI %s
define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
; CHECK-LABEL: test1:
@ -12,7 +12,6 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; CHECK-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%rrr = load float, float* %br
%rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
%rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
@ -28,7 +27,6 @@ define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; CHECK-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%rrr = load double, double* %br
%rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
%rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
@ -42,7 +40,6 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%eee = extractelement <16 x float> %x, i32 4
%rrr2 = insertelement <16 x float> %x, float %eee, i32 1
ret <16 x float> %rrr2
@ -56,7 +53,6 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind {
; CHECK-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%eee = extractelement <8 x i64> %x, i32 4
%rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
ret <8 x i64> %rrr2
@ -67,7 +63,6 @@ define i32 @test5(<4 x float> %x) nounwind {
; CHECK: ## BB#0:
; CHECK-NEXT: vextractps $3, %xmm0, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%ef = extractelement <4 x float> %x, i32 3
%ei = bitcast float %ef to i32
ret i32 %ei
@ -78,148 +73,83 @@ define void @test6(<4 x float> %x, float* %out) nounwind {
; CHECK: ## BB#0:
; CHECK-NEXT: vextractps $3, %xmm0, (%rdi)
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%ef = extractelement <4 x float> %x, i32 3
store float %ef, float* %out, align 4
ret void
}
define float @test7(<16 x float> %x, i32 %ind) nounwind {
; KNL-LABEL: test7:
; KNL: ## BB#0:
; KNL-NEXT: pushq %rbp
; KNL-NEXT: movq %rsp, %rbp
; KNL-NEXT: andq $-64, %rsp
; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %zmm0, (%rsp)
; KNL-NEXT: andl $15, %edi
; KNL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test7:
; SKX: ## BB#0:
; SKX-NEXT: pushq %rbp
; SKX-NEXT: movq %rsp, %rbp
; SKX-NEXT: andq $-64, %rsp
; SKX-NEXT: subq $128, %rsp
; SKX-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; SKX-NEXT: vmovaps %zmm0, (%rsp)
; SKX-NEXT: andl $15, %edi
; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SKX-NEXT: movq %rbp, %rsp
; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
; CHECK-LABEL: test7:
; CHECK: ## BB#0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: andq $-64, %rsp
; CHECK-NEXT: subq $128, %rsp
; CHECK-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
; CHECK-NEXT: andl $15, %edi
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%e = extractelement <16 x float> %x, i32 %ind
ret float %e
}
define double @test8(<8 x double> %x, i32 %ind) nounwind {
; KNL-LABEL: test8:
; KNL: ## BB#0:
; KNL-NEXT: pushq %rbp
; KNL-NEXT: movq %rsp, %rbp
; KNL-NEXT: andq $-64, %rsp
; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %zmm0, (%rsp)
; KNL-NEXT: andl $7, %edi
; KNL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test8:
; SKX: ## BB#0:
; SKX-NEXT: pushq %rbp
; SKX-NEXT: movq %rsp, %rbp
; SKX-NEXT: andq $-64, %rsp
; SKX-NEXT: subq $128, %rsp
; SKX-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; SKX-NEXT: vmovaps %zmm0, (%rsp)
; SKX-NEXT: andl $7, %edi
; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; SKX-NEXT: movq %rbp, %rsp
; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
; CHECK-LABEL: test8:
; CHECK: ## BB#0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: andq $-64, %rsp
; CHECK-NEXT: subq $128, %rsp
; CHECK-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
; CHECK-NEXT: andl $7, %edi
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%e = extractelement <8 x double> %x, i32 %ind
ret double %e
}
define float @test9(<8 x float> %x, i32 %ind) nounwind {
; KNL-LABEL: test9:
; KNL: ## BB#0:
; KNL-NEXT: pushq %rbp
; KNL-NEXT: movq %rsp, %rbp
; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $64, %rsp
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %ymm0, (%rsp)
; KNL-NEXT: andl $7, %edi
; KNL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test9:
; SKX: ## BB#0:
; SKX-NEXT: pushq %rbp
; SKX-NEXT: movq %rsp, %rbp
; SKX-NEXT: andq $-32, %rsp
; SKX-NEXT: subq $64, %rsp
; SKX-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; SKX-NEXT: vmovaps %ymm0, (%rsp)
; SKX-NEXT: andl $7, %edi
; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SKX-NEXT: movq %rbp, %rsp
; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
; CHECK-LABEL: test9:
; CHECK: ## BB#0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: andq $-32, %rsp
; CHECK-NEXT: subq $64, %rsp
; CHECK-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
; CHECK-NEXT: andl $7, %edi
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%e = extractelement <8 x float> %x, i32 %ind
ret float %e
}
define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
; KNL-LABEL: test10:
; KNL: ## BB#0:
; KNL-NEXT: pushq %rbp
; KNL-NEXT: movq %rsp, %rbp
; KNL-NEXT: andq $-64, %rsp
; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %zmm0, (%rsp)
; KNL-NEXT: andl $15, %edi
; KNL-NEXT: movl (%rsp,%rdi,4), %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test10:
; SKX: ## BB#0:
; SKX-NEXT: pushq %rbp
; SKX-NEXT: movq %rsp, %rbp
; SKX-NEXT: andq $-64, %rsp
; SKX-NEXT: subq $128, %rsp
; SKX-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; SKX-NEXT: vmovaps %zmm0, (%rsp)
; SKX-NEXT: andl $15, %edi
; SKX-NEXT: movl (%rsp,%rdi,4), %eax
; SKX-NEXT: movq %rbp, %rsp
; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
; CHECK-LABEL: test10:
; CHECK: ## BB#0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: andq $-64, %rsp
; CHECK-NEXT: subq $128, %rsp
; CHECK-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
; CHECK-NEXT: andl $15, %edi
; CHECK-NEXT: movl (%rsp,%rdi,4), %eax
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%e = extractelement <16 x i32> %x, i32 %ind
ret i32 %e
}
@ -274,6 +204,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
; KNL-NEXT: testb $1, %al
; KNL-NEXT: cmoveq %rsi, %rdi
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test12:
@ -339,6 +270,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
; KNL-NEXT: testb $1, %al
; KNL-NEXT: cmoveq %rsi, %rdi
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test14:
@ -387,6 +319,7 @@ define i16 @test16(i1 *%addr, i16 %a) {
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test16:
@ -424,6 +357,7 @@ define i8 @test17(i1 *%addr, i8 %a) {
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test17:
@ -448,20 +382,13 @@ define i8 @test17(i1 *%addr, i8 %a) {
}
define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
; KNL-LABEL: extract_v8i64:
; KNL: ## BB#0:
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrq $1, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v8i64:
; SKX: ## BB#0:
; SKX-NEXT: vpextrq $1, %xmm0, %rax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; CHECK-LABEL: extract_v8i64:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrq $1, %xmm0, %rax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <8 x i64> %x, i32 1
%r2 = extractelement <8 x i64> %x, i32 3
store i64 %r2, i64* %dst, align 1
@ -469,20 +396,13 @@ define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
}
define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
; KNL-LABEL: extract_v4i64:
; KNL: ## BB#0:
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrq $1, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v4i64:
; SKX: ## BB#0:
; SKX-NEXT: vpextrq $1, %xmm0, %rax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; CHECK-LABEL: extract_v4i64:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrq $1, %xmm0, %rax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <4 x i64> %x, i32 1
%r2 = extractelement <4 x i64> %x, i32 3
store i64 %r2, i64* %dst, align 1
@ -502,20 +422,13 @@ define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
}
define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
; KNL-LABEL: extract_v16i32:
; KNL: ## BB#0:
; KNL-NEXT: vpextrd $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrd $1, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v16i32:
; SKX: ## BB#0:
; SKX-NEXT: vpextrd $1, %xmm0, %eax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; CHECK-LABEL: extract_v16i32:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrd $1, %xmm0, %eax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <16 x i32> %x, i32 1
%r2 = extractelement <16 x i32> %x, i32 5
store i32 %r2, i32* %dst, align 1
@ -523,20 +436,13 @@ define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
}
define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
; KNL-LABEL: extract_v8i32:
; KNL: ## BB#0:
; KNL-NEXT: vpextrd $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrd $1, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v8i32:
; SKX: ## BB#0:
; SKX-NEXT: vpextrd $1, %xmm0, %eax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; CHECK-LABEL: extract_v8i32:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrd $1, %xmm0, %eax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <8 x i32> %x, i32 1
%r2 = extractelement <8 x i32> %x, i32 5
store i32 %r2, i32* %dst, align 1
@ -556,22 +462,14 @@ define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
}
define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
; KNL-LABEL: extract_v32i16:
; KNL: ## BB#0:
; KNL-NEXT: vpextrw $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrw $1, %xmm0, (%rdi)
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v32i16:
; SKX: ## BB#0:
; SKX-NEXT: vpextrw $1, %xmm0, %eax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; CHECK-LABEL: extract_v32i16:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrw $1, %xmm0, %eax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpextrw $1, %xmm0, (%rdi)
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <32 x i16> %x, i32 1
%r2 = extractelement <32 x i16> %x, i32 9
store i16 %r2, i16* %dst, align 1
@ -579,22 +477,14 @@ define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
}
define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
; KNL-LABEL: extract_v16i16:
; KNL: ## BB#0:
; KNL-NEXT: vpextrw $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrw $1, %xmm0, (%rdi)
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v16i16:
; SKX: ## BB#0:
; SKX-NEXT: vpextrw $1, %xmm0, %eax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; CHECK-LABEL: extract_v16i16:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrw $1, %xmm0, %eax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpextrw $1, %xmm0, (%rdi)
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <16 x i16> %x, i32 1
%r2 = extractelement <16 x i16> %x, i32 9
store i16 %r2, i16* %dst, align 1
@ -615,22 +505,14 @@ define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
}
define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
; KNL-LABEL: extract_v64i8:
; KNL: ## BB#0:
; KNL-NEXT: vpextrb $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrb $1, %xmm0, (%rdi)
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v64i8:
; SKX: ## BB#0:
; SKX-NEXT: vpextrb $1, %xmm0, %eax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; CHECK-LABEL: extract_v64i8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrb $1, %xmm0, %eax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpextrb $1, %xmm0, (%rdi)
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <64 x i8> %x, i32 1
%r2 = extractelement <64 x i8> %x, i32 17
store i8 %r2, i8* %dst, align 1
@ -638,22 +520,14 @@ define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
}
define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
; KNL-LABEL: extract_v32i8:
; KNL: ## BB#0:
; KNL-NEXT: vpextrb $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrb $1, %xmm0, (%rdi)
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v32i8:
; SKX: ## BB#0:
; SKX-NEXT: vpextrb $1, %xmm0, %eax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; CHECK-LABEL: extract_v32i8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrb $1, %xmm0, %eax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpextrb $1, %xmm0, (%rdi)
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <32 x i8> %x, i32 1
%r2 = extractelement <32 x i8> %x, i32 17
store i8 %r2, i8* %dst, align 1
@ -936,6 +810,7 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; KNL-NEXT: subq $32, %rsp
; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: setb %al
; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
@ -1061,7 +936,6 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; KNL-NEXT: setb %al
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
@ -1083,6 +957,7 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; KNL-NEXT: movl (%rsp), %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_insertelement_v32i1:
@ -1112,23 +987,23 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
; KNL-LABEL: test_iinsertelement_v4i1:
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: setb %al
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpextrb $4, %xmm0, %eax
; KNL-NEXT: setb %cl
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpextrb $4, %xmm0, %ecx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpextrb $0, %xmm0, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; KNL-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; KNL-NEXT: vpsllq $63, %zmm3, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; KNL-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
@ -1144,6 +1019,7 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_iinsertelement_v4i1:
@ -1188,6 +1064,7 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y)
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_iinsertelement_v2i1:
@ -1308,6 +1185,7 @@ define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) {
; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpextrb $2, %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_v32i1:
@ -1338,6 +1216,7 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) {
; KNL-NEXT: movb $4, %cl
; KNL-NEXT: subb %al, %cl
; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_v64i1:
@ -1370,6 +1249,7 @@ define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) {
; KNL-NEXT: movb $4, %cl
; KNL-NEXT: subb %al, %cl
; KNL-NEXT: movzbl %cl, %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: extractelement_v64i1_alt:
@ -1421,6 +1301,7 @@ define i64 @test_extractelement_variable_v4i64(<4 x i64> %t1, i32 %index) {
; KNL-NEXT: movq (%rsp,%rdi,8), %rax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v4i64:
@ -1466,6 +1347,7 @@ define i64 @test_extractelement_variable_v8i64(<8 x i64> %t1, i32 %index) {
; KNL-NEXT: movq (%rsp,%rdi,8), %rax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v8i64:
@ -1523,6 +1405,7 @@ define double @test_extractelement_variable_v4f64(<4 x double> %t1, i32 %index)
; KNL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v4f64:
@ -1568,6 +1451,7 @@ define double @test_extractelement_variable_v8f64(<8 x double> %t1, i32 %index)
; KNL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v8f64:
@ -1625,6 +1509,7 @@ define i32 @test_extractelement_variable_v8i32(<8 x i32> %t1, i32 %index) {
; KNL-NEXT: movl (%rsp,%rdi,4), %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v8i32:
@ -1670,6 +1555,7 @@ define i32 @test_extractelement_variable_v16i32(<16 x i32> %t1, i32 %index) {
; KNL-NEXT: movl (%rsp,%rdi,4), %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v16i32:
@ -1727,6 +1613,7 @@ define float @test_extractelement_variable_v8f32(<8 x float> %t1, i32 %index) {
; KNL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v8f32:
@ -1772,6 +1659,7 @@ define float @test_extractelement_variable_v16f32(<16 x float> %t1, i32 %index)
; KNL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v16f32:
@ -1829,6 +1717,7 @@ define i16 @test_extractelement_variable_v16i16(<16 x i16> %t1, i32 %index) {
; KNL-NEXT: movzwl (%rsp,%rdi,2), %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v16i16:
@ -1875,6 +1764,7 @@ define i16 @test_extractelement_variable_v32i16(<32 x i16> %t1, i32 %index) {
; KNL-NEXT: movzwl (%rsp,%rdi,2), %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v32i16:
@ -1902,23 +1792,14 @@ define i16 @test_extractelement_variable_v32i16(<32 x i16> %t1, i32 %index) {
}
define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) {
; KNL-LABEL: test_extractelement_variable_v16i8:
; KNL: ## BB#0:
; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $15, %edi
; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; KNL-NEXT: movb (%rdi,%rax), %al
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v16i8:
; SKX: ## BB#0:
; SKX-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; SKX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $15, %edi
; SKX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; SKX-NEXT: movb (%rdi,%rax), %al
; SKX-NEXT: retq
; CHECK-LABEL: test_extractelement_variable_v16i8:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $15, %edi
; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: movb (%rdi,%rax), %al
; CHECK-NEXT: retq
%t2 = extractelement <16 x i8> %t1, i32 %index
ret i8 %t2
}
@ -1936,13 +1817,14 @@ define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) {
; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $64, %rsp
; KNL-NEXT: vmovaps %ymm0, (%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %ymm0, (%rsp)
; KNL-NEXT: andl $31, %edi
; KNL-NEXT: movq %rsp, %rax
; KNL-NEXT: movb (%rdi,%rax), %al
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v32i8:
@ -1984,14 +1866,15 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) {
; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-64, %rsp
; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovaps %ymm0, (%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $63, %edi
; KNL-NEXT: movq %rsp, %rax
; KNL-NEXT: movb (%rdi,%rax), %al
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v64i8:
@ -2042,6 +1925,7 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index)
; KNL-NEXT: movb (%rax,%rcx), %al
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v64i8_indexi8:
@ -2075,12 +1959,12 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index)
define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) {
; KNL-LABEL: test_extractelement_varible_v2i1:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax
; KNL-NEXT: andl $1, %eax
@ -2105,12 +1989,12 @@ define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b,
define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) {
; KNL-LABEL: test_extractelement_varible_v4i1:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $3, %edi
; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax
; KNL-NEXT: andl $1, %eax
@ -2156,6 +2040,7 @@ define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b,
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_varible_v8i1:
@ -2209,6 +2094,7 @@ define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_varible_v16i1:
@ -2265,6 +2151,7 @@ define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b,
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_varible_v32i1:

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
; CHECK-LABEL: test1:
@ -8,7 +8,6 @@ define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = fcmp ole <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
ret <16 x float> %max
@ -20,7 +19,6 @@ define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = fcmp ole <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
ret <8 x double> %max
@ -32,7 +30,6 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin
; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <16 x i32>, <16 x i32>* %yp, align 4
%mask = icmp eq <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -45,7 +42,6 @@ define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1)
; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = icmp uge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
@ -57,7 +53,6 @@ define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
ret <8 x i64> %max
@ -69,7 +64,6 @@ define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) noun
; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = icmp ugt <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
ret <8 x i64> %max
@ -123,14 +117,12 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test9:
; SKX: ## BB#0:
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
@ -145,14 +137,12 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test10:
; SKX: ## BB#0:
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%mask = fcmp oeq <8 x float> %x, %y
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
@ -164,7 +154,6 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
; CHECK: ## BB#0:
; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = icmp ugt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
@ -178,8 +167,8 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; KNL-NEXT: kunpckbw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test12:
; SKX: ## BB#0:
@ -190,7 +179,6 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16
ret i16 %res1
@ -269,6 +257,8 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
@ -325,13 +315,11 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@ -342,8 +330,8 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; KNL-NEXT: movl (%rsp), %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test12_v32i32:
; SKX: ## BB#0:
@ -353,7 +341,6 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%res = icmp eq <32 x i32> %a, %b
%res1 = bitcast <32 x i1> %res to i32
ret i32 %res1
@ -577,75 +564,75 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm1
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
; KNL-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
@ -656,8 +643,8 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; KNL-NEXT: orq %rcx, %rax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test12_v64i16:
; SKX: ## BB#0:
@ -667,7 +654,6 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; SKX-NEXT: kmovq %k0, %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%res = icmp eq <64 x i16> %a, %b
%res1 = bitcast <64 x i1> %res to i64
ret i64 %res1
@ -721,7 +707,6 @@ define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind
; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
@ -733,7 +718,6 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -746,7 +730,6 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -759,7 +742,6 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -773,7 +755,6 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
@ -788,7 +769,6 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y
; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
@ -803,7 +783,6 @@ define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i6
; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp sgt <8 x i64> %x1, %y1
%y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <8 x i64> %x, %y
@ -819,7 +798,6 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp sge <16 x i32> %x1, %y1
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask0 = icmp ule <16 x i32> %x, %y
@ -834,7 +812,6 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
@ -849,7 +826,6 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind
; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
@ -865,7 +841,6 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32
; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp sge <16 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
@ -883,7 +858,6 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y
; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp sge <8 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
@ -926,6 +900,7 @@ define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32>
; KNL-NEXT: kxorw %k1, %k0, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test29:
@ -949,14 +924,12 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test30:
; SKX: ## BB#0:
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%mask = fcmp oeq <4 x double> %x, %y
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
@ -969,14 +942,12 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp
; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2
; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test31:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%y = load <2 x double>, <2 x double>* %yp, align 4
%mask = fcmp olt <2 x double> %x, %y
@ -990,14 +961,12 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp
; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test32:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%y = load <4 x double>, <4 x double>* %yp, align 4
%mask = fcmp ogt <4 x double> %y, %x
@ -1011,7 +980,6 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp
; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <8 x double>, <8 x double>* %yp, align 4
%mask = fcmp olt <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
@ -1024,14 +992,12 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) no
; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2
; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test34:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%y = load <4 x float>, <4 x float>* %yp, align 4
%mask = fcmp olt <4 x float> %x, %y
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
@ -1048,14 +1014,12 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) no
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test35:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%y = load <8 x float>, <8 x float>* %yp, align 4
%mask = fcmp ogt <8 x float> %y, %x
@ -1069,7 +1033,6 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp
; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <16 x float>, <16 x float>* %yp, align 4
%mask = fcmp olt <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
@ -1082,7 +1045,6 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nou
; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%a = load double, double* %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
@ -1100,14 +1062,12 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou
; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test38:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%a = load double, double* %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
@ -1125,14 +1085,12 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou
; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test39:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
@ -1150,7 +1108,6 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) n
; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%a = load float, float* %ptr
%v = insertelement <16 x float> undef, float %a, i32 0
@ -1171,14 +1128,12 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test41:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%a = load float, float* %ptr
%v = insertelement <8 x float> undef, float %a, i32 0
@ -1196,14 +1151,12 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun
; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test42:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%a = load float, float* %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
@ -1223,7 +1176,6 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x
; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
;
; SKX-LABEL: test43:
; SKX: ## BB#0:
@ -1232,7 +1184,6 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x
; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%a = load double, double* %ptr
%v = insertelement <8 x double> undef, double %a, i32 0

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl --show-mc-encoding| FileCheck %s
declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16)
@ -9,8 +9,8 @@ define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8>
; CHECK-NEXT: vpbroadcastb %edi, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xcf]
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7]
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; CHECK-NEXT: vpbroadcastb %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf]
; CHECK-NEXT: vpbroadcastb %edi, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7]
; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2]
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1)
@ -30,8 +30,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16
; CHECK-NEXT: vpbroadcastw %edi, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xcf]
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: vpbroadcastw %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf]
; CHECK-NEXT: vpbroadcastw %edi, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7]
; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1)
@ -51,8 +51,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16
; CHECK-NEXT: vpbroadcastb %edi, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xcf]
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7]
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
; CHECK-NEXT: vpbroadcastb %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf]
; CHECK-NEXT: vpbroadcastb %edi, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7]
; CHECK-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2]
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1)
@ -73,8 +73,8 @@ declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i
; CHECK-NEXT: vpbroadcastw %edi, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xcf]
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: vpbroadcastw %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf]
; CHECK-NEXT: vpbroadcastw %edi, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7]
; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1)
@ -93,9 +93,9 @@ define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x
; CHECK-NEXT: vpbroadcastb %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xd0]
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8]
; CHECK-NEXT: vpaddb %ymm1, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc9]
; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0]
; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1]
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1)
%res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask)
@ -113,9 +113,9 @@ define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x
; CHECK-NEXT: vpbroadcastb %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xd0]
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8]
; CHECK-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc9]
; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0]
; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask)
@ -133,9 +133,9 @@ define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16>
; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xd0]
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8]
; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc9]
; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0]
; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1)
%res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask)
@ -153,9 +153,9 @@ define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x
; CHECK-NEXT: vpbroadcastw %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xd0]
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8]
; CHECK-NEXT: vpaddw %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc9]
; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0]
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
%res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask)
@ -173,9 +173,9 @@ define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x
; CHECK-NEXT: vpbroadcastb %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x78,0xd0]
; CHECK-NEXT: kmovq %rdi, %k1 ## encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
; CHECK-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
; CHECK-NEXT: vpaddb %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc9]
; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0]
; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1)
%res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask)
@ -193,9 +193,9 @@ define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16>
; CHECK-NEXT: vpbroadcastw %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x79,0xd0]
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
; CHECK-NEXT: vpaddw %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc9]
; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0]
; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1)
%res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask)
@ -227,6 +227,7 @@ define void@test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8
; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca]
; CHECK-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07]
; CHECK-NEXT: vmovdqu %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2)
call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1)
@ -255,6 +256,7 @@ define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i1
; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca]
; CHECK-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07]
; CHECK-NEXT: vmovdqu %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2)
call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1)
@ -345,8 +347,8 @@ define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %
; CHECK-NEXT: ## xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02]
; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; CHECK-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4)
@ -368,8 +370,8 @@ define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %
; CHECK-NEXT: ## ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02]
; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
; CHECK-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4)
%res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4)
@ -391,8 +393,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i32 %x1, <
; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4]
; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03]
; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
@ -414,8 +416,8 @@ define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i32 %x1,
; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03]
; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
@ -437,8 +439,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i32 %x1, <
; CHECK-NEXT: ## xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7]
; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03]
; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
@ -460,8 +462,8 @@ define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1,
; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03]
; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
@ -476,6 +478,7 @@ define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
ret i32 %res
@ -487,6 +490,7 @@ define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
ret i32 %res
@ -500,6 +504,7 @@ define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
ret i16 %res
@ -512,6 +517,7 @@ define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
ret i16 %res
@ -524,6 +530,7 @@ define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
ret i32 %res
@ -535,6 +542,7 @@ define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
ret i32 %res
@ -548,6 +556,7 @@ define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
ret i16 %res
@ -560,6 +569,7 @@ define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
ret i16 %res
@ -1660,9 +1670,9 @@ define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x
; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xd9]
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1]
; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xd3]
; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1]
; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
; CHECK-NEXT: vpaddw %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
@ -1680,8 +1690,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16>
; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xd9]
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1]
; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xd3]
; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1]
; CHECK-NEXT: vpaddw %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
@ -1701,8 +1711,8 @@ define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1]
; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1]
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
@ -1721,8 +1731,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16>
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1]
; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1]
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
@ -1741,8 +1751,8 @@ define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1]
; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1]
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
@ -1761,8 +1771,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16>
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1]
; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1]
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
@ -1780,9 +1790,9 @@ define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8
; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x03]
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03]
; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca]
; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x03]
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
@ -1800,8 +1810,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1,
; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x03]
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03]
; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xca]
; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x03]
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
@ -1821,8 +1831,8 @@ define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03]
; CHECK-NEXT: vpsraw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xe0,0x03]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
@ -1841,8 +1851,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1,
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03]
; CHECK-NEXT: vpsraw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xe0,0x03]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
@ -1861,8 +1871,8 @@ define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03]
; CHECK-NEXT: vpsllw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xf0,0x03]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
@ -1881,8 +1891,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1,
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03]
; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xf0,0x03]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
@ -1936,8 +1946,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16>
; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0]
; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
@ -1959,8 +1969,8 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i1
; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0]
; CHECK-NEXT: ## ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
@ -1980,8 +1990,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16>
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8]
; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
@ -2000,8 +2010,8 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i1
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8]
; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
@ -2020,8 +2030,8 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64>
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8]
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0]
; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
@ -2040,8 +2050,8 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64>
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8]
; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0]
; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
@ -2834,23 +2844,23 @@ define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
@ -2932,23 +2942,23 @@ define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask)
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
@ -2997,6 +3007,7 @@ define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@ -3043,6 +3054,7 @@ define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask)
; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
; CHECK-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@ -3090,6 +3102,7 @@ define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@ -3136,6 +3149,7 @@ define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask
; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
; CHECK-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=VCHECK --check-prefix=AVX2
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=VCHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=VCHECK --check-prefix=SKX
define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
; SSE42-LABEL: test_x86_sse42_pcmpestri128: