[X86] Fix InsertBitToMaskVector to only issue KSHIFTS of native size so that upper bits are properly zeroed.

There's no v2i1 or v4i1 kshift, and v8i1 is only supported with AVXDQ. Isel has fake patterns to extend these types to native shifts, but makes no guarantees about the value of any bits shifted in when shifting right.

This patch promotes the vector to a type that supports a native shift first and only allows inserting into the msb of a native sized shift.

I've constructed this in a way that doesn't do the promotion if we're going to fallback to using a xmm/ymm/zmm shuffle. I think I have a plan to remove the shuffle fall back entirely. In which case we this can be simplified, but I wanted to fix the correctness issue first.

llvm-svn: 320081
This commit is contained in:
Craig Topper 2017-12-07 20:10:04 +00:00
parent 7b8fa5f782
commit dfc79c7c33
4 changed files with 167 additions and 219 deletions

View File

@ -14503,16 +14503,20 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// If the kshift instructions of the correct width aren't natively supported
// then we need to promote the vector to the native size to get the correct
// zeroing behavior.
if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) ||
(VecVT.getVectorNumElements() < 8)) {
// Use kshiftlw/rw instruction.
VecVT = MVT::v16i1;
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
DAG.getUNDEF(VecVT),
Vec,
DAG.getIntPtrConstant(0, dl));
}
// Use kshiftlw/rw instruction.
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
unsigned MaxShift = VecVT.getVectorNumElements() - 1;
if (MaxShift - IdxVal)
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
@ -14670,7 +14674,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
/// Insert one bit to mask vector, like v16i1 or v8i1.
/// AVX-512 feature.
static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) {
static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
SDValue Elt = Op.getOperand(1);
@ -14689,10 +14694,31 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) {
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
unsigned NumElems = VecVT.getVectorNumElements();
if(Vec.isUndef()) {
// If the kshift instructions of the correct width aren't natively supported
// then we need to promote the vector to the native size to get the correct
// zeroing behavior.
bool HasNativeShift = true;
if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) {
HasNativeShift = false;
// For now don't do this if we are going to end up using the shuffle
// below. This minimizes test diffs.
// TODO: Remove this restriction once we no longer need a shuffle fallback.
if (Vec.isUndef() || IdxVal == 0) {
// Need to promote to v16i1, do the insert, then extract back.
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
DAG.getUNDEF(MVT::v16i1), Vec,
DAG.getIntPtrConstant(0, dl));
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op,
DAG.getIntPtrConstant(0, dl));
}
}
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
if (Vec.isUndef()) {
if (IdxVal)
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
@ -14715,7 +14741,7 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
// Insertion of one bit into last position
if (IdxVal == NumElems -1) {
if (HasNativeShift && IdxVal == NumElems - 1) {
// Move the bit to the last position inside the vector.
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
@ -14743,7 +14769,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
unsigned NumElts = VT.getVectorNumElements();
if (EltVT == MVT::i1)
return InsertBitToMaskVector(Op, DAG);
return InsertBitToMaskVector(Op, DAG, Subtarget);
SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);

View File

@ -991,7 +991,11 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: andl $1, %ecx
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k0, %k1, %k1
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; KNL-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
@ -1049,7 +1053,11 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y)
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: andl $1, %ecx
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k0, %k1, %k1
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@ -1067,11 +1075,11 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y)
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: kshiftlw $1, %k1, %k1
; SKX-NEXT: kshiftlw $1, %k0, %k0
; SKX-NEXT: kshiftrw $1, %k0, %k0
; SKX-NEXT: korw %k1, %k0, %k0
; SKX-NEXT: vpmovm2q %k0, %xmm0
; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: vpmovm2q %k0, %xmm1
; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SKX-NEXT: vpmovq2m %xmm0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %al killed %al killed %eax
; SKX-NEXT: retq

View File

@ -1159,22 +1159,24 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-LABEL: test18:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kmovw %esi, %k2
; KNL-NEXT: kshiftlw $7, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kshiftlw $6, %k2, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kshiftlw $6, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k3
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k2
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,8]
; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: kshiftlw $7, %k0, %k0
; KNL-NEXT: korw %k0, %k1, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
@ -1204,22 +1206,24 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
;
; AVX512BW-LABEL: test18:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: kmovd %esi, %k2
; AVX512BW-NEXT: kshiftlw $7, %k2, %k0
; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
; AVX512BW-NEXT: kshiftlw $6, %k2, %k2
; AVX512BW-NEXT: kshiftrw $15, %k2, %k2
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; AVX512BW-NEXT: kmovd %edi, %k2
; AVX512BW-NEXT: kmovd %esi, %k0
; AVX512BW-NEXT: kshiftlw $7, %k0, %k1
; AVX512BW-NEXT: kshiftrw $15, %k1, %k1
; AVX512BW-NEXT: kshiftlw $6, %k0, %k0
; AVX512BW-NEXT: kshiftrw $15, %k0, %k3
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512BW-NEXT: kshiftlw $1, %k1, %k1
; AVX512BW-NEXT: kshiftrw $1, %k1, %k1
; AVX512BW-NEXT: kshiftlw $7, %k0, %k0
; AVX512BW-NEXT: korw %k0, %k1, %k0
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k2
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,8]
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512BW-NEXT: vzeroupper

View File

@ -4210,9 +4210,8 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -4266,9 +4265,8 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -4342,9 +4340,8 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -4420,9 +4417,8 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -4482,9 +4478,8 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -4560,9 +4555,8 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -8530,9 +8524,8 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -8570,9 +8563,8 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -8622,9 +8614,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -8676,9 +8667,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -8722,9 +8712,8 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -8776,9 +8765,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -9742,9 +9730,8 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -9800,9 +9787,8 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -9878,9 +9864,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -9958,9 +9943,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -10022,9 +10006,8 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -10102,9 +10085,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -16571,9 +16553,8 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -16627,9 +16608,8 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -16703,9 +16683,8 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -16781,9 +16760,8 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -16843,9 +16821,8 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -16921,9 +16898,8 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -20891,9 +20867,8 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -20931,9 +20906,8 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -20983,9 +20957,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -21037,9 +21010,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -21083,9 +21055,8 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -21137,9 +21108,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -22103,9 +22073,8 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -22161,9 +22130,8 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -22239,9 +22207,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -22319,9 +22286,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -22383,9 +22349,8 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -22463,9 +22428,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -29030,9 +28994,8 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -29089,9 +29052,8 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -29165,9 +29127,8 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -29244,9 +29205,8 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -29308,9 +29268,8 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -29386,9 +29345,8 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -33390,9 +33348,8 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -33433,9 +33390,8 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -33485,9 +33441,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -33540,9 +33495,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -33588,9 +33542,8 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -33642,9 +33595,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -34634,9 +34586,8 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -34695,9 +34646,8 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -34775,9 +34725,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -34858,9 +34807,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -34924,9 +34872,8 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -35006,9 +34953,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -41636,9 +41582,8 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -41695,9 +41640,8 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -41774,9 +41718,8 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -41855,9 +41798,8 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -41920,9 +41862,8 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -42001,9 +41942,8 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -46046,9 +45986,8 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -46089,9 +46028,8 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -46144,9 +46082,8 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -46201,9 +46138,8 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -46250,9 +46186,8 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -46307,9 +46242,8 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -47330,9 +47264,8 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -47391,9 +47324,8 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -47472,9 +47404,8 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -47555,9 +47486,8 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -47622,9 +47552,8 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -47705,9 +47634,8 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -50036,9 +49964,8 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -50092,9 +50019,8 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -50150,9 +50076,8 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %_
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -50215,9 +50140,8 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -50280,9 +50204,8 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -50347,9 +50270,8 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -54279,9 +54201,8 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -54319,9 +54240,8 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -54361,9 +54281,8 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -54410,9 +54329,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -54459,9 +54377,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -54510,9 +54427,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -55431,9 +55347,8 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b)
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -55489,9 +55404,8 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>*
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -55549,9 +55463,8 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -55616,9 +55529,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i6
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -55683,9 +55595,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
@ -55752,9 +55663,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1