diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 775d05b63605..7ddd9bdc9837 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14503,16 +14503,20 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); } - unsigned IdxVal = cast(Idx)->getZExtValue(); + // If the kshift instructions of the correct width aren't natively supported + // then we need to promote the vector to the native size to get the correct + // zeroing behavior. if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) || (VecVT.getVectorNumElements() < 8)) { - // Use kshiftlw/rw instruction. VecVT = MVT::v16i1; Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, DAG.getUNDEF(VecVT), Vec, DAG.getIntPtrConstant(0, dl)); } + + // Use kshiftlw/rw instruction. + unsigned IdxVal = cast(Idx)->getZExtValue(); unsigned MaxShift = VecVT.getVectorNumElements() - 1; if (MaxShift - IdxVal) Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec, @@ -14670,7 +14674,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, /// Insert one bit to mask vector, like v16i1 or v8i1. /// AVX-512 feature. -static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) { +static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDLoc dl(Op); SDValue Vec = Op.getOperand(0); SDValue Elt = Op.getOperand(1); @@ -14689,10 +14694,31 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) { } unsigned IdxVal = cast(Idx)->getZExtValue(); - SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt); unsigned NumElems = VecVT.getVectorNumElements(); - if(Vec.isUndef()) { + // If the kshift instructions of the correct width aren't natively supported + // then we need to promote the vector to the native size to get the correct + // zeroing behavior. + bool HasNativeShift = true; + if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) { + HasNativeShift = false; + // For now don't do this if we are going to end up using the shuffle + // below. This minimizes test diffs. + // TODO: Remove this restriction once we no longer need a shuffle fallback. + if (Vec.isUndef() || IdxVal == 0) { + // Need to promote to v16i1, do the insert, then extract back. + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, + DAG.getUNDEF(MVT::v16i1), Vec, + DAG.getIntPtrConstant(0, dl)); + Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op, + DAG.getIntPtrConstant(0, dl)); + } + } + + SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt); + + if (Vec.isUndef()) { if (IdxVal) EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, DAG.getConstant(IdxVal, dl, MVT::i8)); @@ -14715,7 +14741,7 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); } // Insertion of one bit into last position - if (IdxVal == NumElems -1) { + if (HasNativeShift && IdxVal == NumElems - 1) { // Move the bit to the last position inside the vector. EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, DAG.getConstant(IdxVal, dl, MVT::i8)); @@ -14743,7 +14769,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, unsigned NumElts = VT.getVectorNumElements(); if (EltVT == MVT::i1) - return InsertBitToMaskVector(Op, DAG); + return InsertBitToMaskVector(Op, DAG, Subtarget); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index 39d93e70dd15..a5abfe0cad2c 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -991,7 +991,11 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vpextrb $0, %xmm0, %ecx -; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: andl $1, %ecx +; KNL-NEXT: kmovw %ecx, %k0 +; KNL-NEXT: kshiftrw $1, %k0, %k1 +; KNL-NEXT: kshiftlw $1, %k1, %k1 +; KNL-NEXT: korw %k0, %k1, %k1 ; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; KNL-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 @@ -1049,7 +1053,11 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vpextrb $0, %xmm0, %ecx -; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: andl $1, %ecx +; KNL-NEXT: kmovw %ecx, %k0 +; KNL-NEXT: kshiftrw $1, %k0, %k1 +; KNL-NEXT: kshiftlw $1, %k1, %k1 +; KNL-NEXT: korw %k0, %k1, %k1 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} @@ -1067,11 +1075,11 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: setb %al ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftlw $1, %k1, %k1 -; SKX-NEXT: kshiftlw $1, %k0, %k0 -; SKX-NEXT: kshiftrw $1, %k0, %k0 -; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpmovm2q %k0, %xmm0 +; SKX-NEXT: kmovd %eax, %k0 +; SKX-NEXT: vpmovm2q %k0, %xmm1 +; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SKX-NEXT: vpmovq2m %xmm0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def %al killed %al killed %eax ; SKX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 014460176715..d5a3f784af48 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -1159,22 +1159,24 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { define <8 x i1> @test18(i8 %a, i16 %y) { ; KNL-LABEL: test18: ; KNL: ## %bb.0: -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: kmovw %esi, %k2 -; KNL-NEXT: kshiftlw $7, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kshiftlw $6, %k2, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; KNL-NEXT: kmovw %edi, %k2 +; KNL-NEXT: kmovw %esi, %k0 +; KNL-NEXT: kshiftlw $7, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kshiftlw $6, %k0, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k3 +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k3} {z} ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] ; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k2 +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,8] +; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 +; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: kshiftlw $7, %k0, %k0 -; KNL-NEXT: korw %k0, %k1, %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0 @@ -1204,22 +1206,24 @@ define <8 x i1> @test18(i8 %a, i16 %y) { ; ; AVX512BW-LABEL: test18: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: kmovd %edi, %k1 -; AVX512BW-NEXT: kmovd %esi, %k2 -; AVX512BW-NEXT: kshiftlw $7, %k2, %k0 -; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 -; AVX512BW-NEXT: kshiftlw $6, %k2, %k2 -; AVX512BW-NEXT: kshiftrw $15, %k2, %k2 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; AVX512BW-NEXT: kmovd %edi, %k2 +; AVX512BW-NEXT: kmovd %esi, %k0 +; AVX512BW-NEXT: kshiftlw $7, %k0, %k1 +; AVX512BW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512BW-NEXT: kshiftlw $6, %k0, %k0 +; AVX512BW-NEXT: kshiftrw $15, %k0, %k3 +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k3} {z} ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] ; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0 -; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k1 -; AVX512BW-NEXT: kshiftlw $1, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $7, %k0, %k0 -; AVX512BW-NEXT: korw %k0, %k1, %k0 +; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k2 +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,8] +; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0 +; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512BW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll index 7900812aae98..217ddb607b80 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -4210,9 +4210,8 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -4266,9 +4265,8 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* % ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -4342,9 +4340,8 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -4420,9 +4417,8 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -4482,9 +4478,8 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -4560,9 +4555,8 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -8530,9 +8524,8 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -8570,9 +8563,8 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* % ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -8622,9 +8614,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -8676,9 +8667,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -8722,9 +8712,8 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -8776,9 +8765,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -9742,9 +9730,8 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -9800,9 +9787,8 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* % ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -9878,9 +9864,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -9958,9 +9943,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -10022,9 +10006,8 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -10102,9 +10085,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -16571,9 +16553,8 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -16627,9 +16608,8 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -16703,9 +16683,8 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -16781,9 +16760,8 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -16843,9 +16821,8 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -16921,9 +16898,8 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -20891,9 +20867,8 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -20931,9 +20906,8 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -20983,9 +20957,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -21037,9 +21010,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -21083,9 +21055,8 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -21137,9 +21108,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -22103,9 +22073,8 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -22161,9 +22130,8 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -22239,9 +22207,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -22319,9 +22286,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -22383,9 +22349,8 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -22463,9 +22428,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -29030,9 +28994,8 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -29089,9 +29052,8 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -29165,9 +29127,8 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -29244,9 +29205,8 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -29308,9 +29268,8 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -29386,9 +29345,8 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -33390,9 +33348,8 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -33433,9 +33390,8 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -33485,9 +33441,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -33540,9 +33495,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -33588,9 +33542,8 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -33642,9 +33595,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -34634,9 +34586,8 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -34695,9 +34646,8 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -34775,9 +34725,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -34858,9 +34807,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -34924,9 +34872,8 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -35006,9 +34953,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -41636,9 +41582,8 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -41695,9 +41640,8 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -41774,9 +41718,8 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -41855,9 +41798,8 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -41920,9 +41862,8 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -42001,9 +41942,8 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -46046,9 +45986,8 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -46089,9 +46028,8 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -46144,9 +46082,8 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -46201,9 +46138,8 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -46250,9 +46186,8 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -46307,9 +46242,8 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -47330,9 +47264,8 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -47391,9 +47324,8 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -47472,9 +47404,8 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -47555,9 +47486,8 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -47622,9 +47552,8 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -47705,9 +47634,8 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -50036,9 +49964,8 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -50092,9 +50019,8 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -50150,9 +50076,8 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %_ ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -50215,9 +50140,8 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -50280,9 +50204,8 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -50347,9 +50270,8 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -54279,9 +54201,8 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -54319,9 +54240,8 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -54361,9 +54281,8 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* % ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -54410,9 +54329,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -54459,9 +54377,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -54510,9 +54427,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -55431,9 +55347,8 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -55489,9 +55404,8 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -55549,9 +55463,8 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* % ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -55616,9 +55529,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i6 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -55683,9 +55595,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 @@ -55752,9 +55663,8 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, < ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1