[X86][AVX] X86ISD::PERMV/PERMV3 node types can never fold index ops

Improves codegen demonstrated by D60512 - instructions represented by X86ISD::PERMV/PERMV3 can never memory fold the operand used for their index register.

This patch updates the 'isUseOfShuffle' helper into the more capable 'isFoldableUseOfShuffle' that recognises that the op is used for a X86ISD::PERMV/PERMV3 index mask and can't be folded - allowing us to use broadcast/subvector-broadcast ops to reduce the size of the mask constant pool data.

Differential Revision: https://reviews.llvm.org/D60562

llvm-svn: 358516
This commit is contained in:
Simon Pilgrim 2019-04-16 19:18:53 +00:00
parent 5ecd6a48b9
commit d769bb1e58
38 changed files with 516 additions and 370 deletions

View File

@ -7566,12 +7566,20 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
}
static bool isUseOfShuffle(SDNode *N) {
static bool isFoldableUseOfShuffle(SDNode *N) {
for (auto *U : N->uses()) {
if (isTargetShuffle(U->getOpcode()))
unsigned Opc = U->getOpcode();
// VPERMV/VPERMV3 shuffles can never fold their index operands.
if (Opc == X86ISD::VPERMV && U->getOperand(0).getNode() == N)
return false;
if (Opc == X86ISD::VPERMV3 && U->getOperand(1).getNode() == N)
return false;
if (isTargetShuffle(Opc))
return true;
if (Opc == ISD::BITCAST) // Ignore bitcasts
return isFoldableUseOfShuffle(U);
if (N->hasOneUse())
return true;
if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
return isUseOfShuffle(U);
}
return false;
}
@ -7679,7 +7687,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
SplatBitSize < VT.getSizeInBits()) {
// Avoid replacing with broadcast when it's a use of a shuffle
// instruction to preserve the present custom lowering of shuffles.
if (isUseOfShuffle(BVOp) || BVOp->hasOneUse())
if (isFoldableUseOfShuffle(BVOp))
return SDValue();
// replace BUILD_VECTOR with broadcast of the repeated constants.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();

View File

@ -462,9 +462,9 @@ define <16 x i16> @test_masked_z_32xi16_to_16xi16_perm_mask3(<32 x i16> %vec, <1
define <8 x i16> @test_32xi16_to_8xi16_perm_mask0(<32 x i16> %vec) {
; CHECK-LABEL: test_32xi16_to_8xi16_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = <22,27,7,10,13,21,5,14,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w %ymm0, %ymm2, %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [22,27,7,10,13,21,5,14]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; CHECK-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@ -474,9 +474,9 @@ define <8 x i16> @test_32xi16_to_8xi16_perm_mask0(<32 x i16> %vec) {
define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mask0(<32 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_32xi16_to_8xi16_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = <22,27,7,10,13,21,5,14,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w %ymm0, %ymm3, %ymm4
; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [22,27,7,10,13,21,5,14]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; CHECK-NEXT: vpermt2w %ymm0, %ymm3, %ymm4
; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
; CHECK-NEXT: vpblendmw %xmm4, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
@ -490,9 +490,9 @@ define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mask0(<32 x i16> %vec, <8 x i
define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mask0(<32 x i16> %vec, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_32xi16_to_8xi16_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <22,27,7,10,13,21,5,14,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w %ymm0, %ymm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [22,27,7,10,13,21,5,14]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vpermt2w %ymm0, %ymm2, %ymm3
; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
@ -505,11 +505,11 @@ define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mask0(<32 x i16> %vec, <8 x
define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mask1(<32 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_32xi16_to_8xi16_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = <1,21,27,10,8,19,14,5,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,21,27,10,8,19,14,5]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; CHECK-NEXT: vpermt2w %ymm4, %ymm3, %ymm0
; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
; CHECK-NEXT: vpblendmw %xmm4, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <8 x i32> <i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5>
@ -521,11 +521,11 @@ define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mask1(<32 x i16> %vec, <8 x i
define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mask1(<32 x i16> %vec, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_32xi16_to_8xi16_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <1,21,27,10,8,19,14,5,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,21,27,10,8,19,14,5]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vpermt2w %ymm3, %ymm2, %ymm0
; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <8 x i32> <i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5>
@ -536,11 +536,11 @@ define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mask1(<32 x i16> %vec, <8 x
define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mask2(<32 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_32xi16_to_8xi16_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = <15,13,18,16,9,11,26,8,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [15,13,18,16,9,11,26,8]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; CHECK-NEXT: vpermt2w %ymm4, %ymm3, %ymm0
; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
; CHECK-NEXT: vpblendmw %xmm4, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <8 x i32> <i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8>
@ -552,11 +552,11 @@ define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mask2(<32 x i16> %vec, <8 x i
define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mask2(<32 x i16> %vec, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_32xi16_to_8xi16_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <15,13,18,16,9,11,26,8,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [15,13,18,16,9,11,26,8]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vpermt2w %ymm3, %ymm2, %ymm0
; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <8 x i32> <i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8>
@ -567,10 +567,10 @@ define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mask2(<32 x i16> %vec, <8 x
define <8 x i16> @test_32xi16_to_8xi16_perm_mask3(<32 x i16> %vec) {
; CHECK-LABEL: test_32xi16_to_8xi16_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [17,0,23,10,1,8,7,30]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = <17,0,23,10,1,8,7,30,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = shufflevector <32 x i16> %vec, <32 x i16> undef, <8 x i32> <i32 17, i32 0, i32 23, i32 10, i32 1, i32 8, i32 7, i32 30>
@ -579,11 +579,11 @@ define <8 x i16> @test_32xi16_to_8xi16_perm_mask3(<32 x i16> %vec) {
define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mask3(<32 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_32xi16_to_8xi16_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = <17,0,23,10,1,8,7,30,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [17,0,23,10,1,8,7,30]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; CHECK-NEXT: vpermt2w %ymm4, %ymm3, %ymm0
; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
; CHECK-NEXT: vpblendmw %xmm4, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <8 x i32> <i32 17, i32 0, i32 23, i32 10, i32 1, i32 8, i32 7, i32 30>
@ -595,11 +595,11 @@ define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mask3(<32 x i16> %vec, <8 x i
define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mask3(<32 x i16> %vec, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_32xi16_to_8xi16_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <17,0,23,10,1,8,7,30,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [17,0,23,10,1,8,7,30]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vpermt2w %ymm3, %ymm2, %ymm0
; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <8 x i32> <i32 17, i32 0, i32 23, i32 10, i32 1, i32 8, i32 7, i32 30>
@ -760,9 +760,9 @@ define <16 x i16> @test_masked_z_32xi16_to_16xi16_perm_mem_mask3(<32 x i16>* %vp
define <8 x i16> @test_32xi16_to_8xi16_perm_mem_mask0(<32 x i16>* %vp) {
; CHECK-LABEL: test_32xi16_to_8xi16_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = <16,17,5,1,14,14,13,17,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w (%rdi), %ymm1, %ymm0
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [16,17,5,1,14,14,13,17]
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm0
; CHECK-NEXT: vpermt2w (%rdi), %ymm1, %ymm0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@ -773,9 +773,9 @@ define <8 x i16> @test_32xi16_to_8xi16_perm_mem_mask0(<32 x i16>* %vp) {
define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mem_mask0(<32 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_32xi16_to_8xi16_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <16,17,5,1,14,14,13,17,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w (%rdi), %ymm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [16,17,5,1,14,14,13,17]
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm3
; CHECK-NEXT: vpermt2w (%rdi), %ymm2, %ymm3
; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm3, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
@ -790,9 +790,9 @@ define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mem_mask0(<32 x i16>* %vp, <8
define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mem_mask0(<32 x i16>* %vp, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_32xi16_to_8xi16_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = <16,17,5,1,14,14,13,17,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w (%rdi), %ymm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [16,17,5,1,14,14,13,17]
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm2
; CHECK-NEXT: vpermt2w (%rdi), %ymm1, %ymm2
; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
; CHECK-NEXT: vmovdqu16 %xmm2, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
@ -807,9 +807,9 @@ define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mem_mask0(<32 x i16>* %vp,
define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mem_mask1(<32 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_32xi16_to_8xi16_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <7,6,4,6,12,4,27,1,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w (%rdi), %ymm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [7,6,4,6,12,4,27,1]
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm3
; CHECK-NEXT: vpermt2w (%rdi), %ymm2, %ymm3
; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm3, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
@ -824,9 +824,9 @@ define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mem_mask1(<32 x i16>* %vp, <8
define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mem_mask1(<32 x i16>* %vp, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_32xi16_to_8xi16_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = <7,6,4,6,12,4,27,1,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w (%rdi), %ymm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [7,6,4,6,12,4,27,1]
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm2
; CHECK-NEXT: vpermt2w (%rdi), %ymm1, %ymm2
; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
; CHECK-NEXT: vmovdqu16 %xmm2, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
@ -841,9 +841,9 @@ define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mem_mask1(<32 x i16>* %vp,
define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mem_mask2(<32 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_32xi16_to_8xi16_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <6,18,0,4,10,25,22,10,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [6,18,0,4,10,25,22,10]
; CHECK-NEXT: vmovdqa (%rdi), %ymm3
; CHECK-NEXT: vpermt2w 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm3, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
@ -858,9 +858,9 @@ define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mem_mask2(<32 x i16>* %vp, <8
define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mem_mask2(<32 x i16>* %vp, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_32xi16_to_8xi16_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = <6,18,0,4,10,25,22,10,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w 32(%rdi), %ymm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [6,18,0,4,10,25,22,10]
; CHECK-NEXT: vmovdqa (%rdi), %ymm2
; CHECK-NEXT: vpermt2w 32(%rdi), %ymm1, %ymm2
; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
; CHECK-NEXT: vmovdqu16 %xmm2, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
@ -875,9 +875,9 @@ define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mem_mask2(<32 x i16>* %vp,
define <8 x i16> @test_32xi16_to_8xi16_perm_mem_mask3(<32 x i16>* %vp) {
; CHECK-LABEL: test_32xi16_to_8xi16_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = <19,1,5,31,9,12,17,9,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w 32(%rdi), %ymm1, %ymm0
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [19,1,5,31,9,12,17,9]
; CHECK-NEXT: vmovdqa (%rdi), %ymm0
; CHECK-NEXT: vpermt2w 32(%rdi), %ymm1, %ymm0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@ -888,9 +888,9 @@ define <8 x i16> @test_32xi16_to_8xi16_perm_mem_mask3(<32 x i16>* %vp) {
define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mem_mask3(<32 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_32xi16_to_8xi16_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <19,1,5,31,9,12,17,9,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [19,1,5,31,9,12,17,9]
; CHECK-NEXT: vmovdqa (%rdi), %ymm3
; CHECK-NEXT: vpermt2w 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm3, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
@ -905,9 +905,9 @@ define <8 x i16> @test_masked_32xi16_to_8xi16_perm_mem_mask3(<32 x i16>* %vp, <8
define <8 x i16> @test_masked_z_32xi16_to_8xi16_perm_mem_mask3(<32 x i16>* %vp, <8 x i16> %mask) {
; CHECK-LABEL: test_masked_z_32xi16_to_8xi16_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = <19,1,5,31,9,12,17,9,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpermi2w 32(%rdi), %ymm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [19,1,5,31,9,12,17,9]
; CHECK-NEXT: vmovdqa (%rdi), %ymm2
; CHECK-NEXT: vpermt2w 32(%rdi), %ymm1, %ymm2
; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
; CHECK-NEXT: vmovdqu16 %xmm2, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
@ -935,7 +935,7 @@ define <8 x i16> @test_16xi16_to_8xi16_E84C94EF(<16 x i16> %vec) {
define <4 x i32> @test_8xi32_to_4xi32_perm_mask0(<8 x i32> %vec) {
; CHECK-LABEL: test_8xi32_to_4xi32_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = <4,0,3,2,u,u,u,u>
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [4,0,3,2]
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; CHECK-NEXT: vzeroupper
@ -946,7 +946,7 @@ define <4 x i32> @test_8xi32_to_4xi32_perm_mask0(<8 x i32> %vec) {
define <4 x i32> @test_masked_8xi32_to_4xi32_perm_mask0(<8 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_8xi32_to_4xi32_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <4,0,3,2,u,u,u,u>
; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [4,0,3,2]
; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm0
; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
@ -961,7 +961,7 @@ define <4 x i32> @test_masked_8xi32_to_4xi32_perm_mask0(<8 x i32> %vec, <4 x i32
define <4 x i32> @test_masked_z_8xi32_to_4xi32_perm_mask0(<8 x i32> %vec, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_8xi32_to_4xi32_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = <4,0,3,2,u,u,u,u>
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,3,2]
; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
@ -975,7 +975,7 @@ define <4 x i32> @test_masked_z_8xi32_to_4xi32_perm_mask0(<8 x i32> %vec, <4 x i
define <4 x i32> @test_masked_8xi32_to_4xi32_perm_mask1(<8 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_8xi32_to_4xi32_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <3,0,7,3,u,u,u,u>
; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [3,0,7,3]
; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm0
; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
@ -990,7 +990,7 @@ define <4 x i32> @test_masked_8xi32_to_4xi32_perm_mask1(<8 x i32> %vec, <4 x i32
define <4 x i32> @test_masked_z_8xi32_to_4xi32_perm_mask1(<8 x i32> %vec, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_8xi32_to_4xi32_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = <3,0,7,3,u,u,u,u>
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [3,0,7,3]
; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
@ -1033,7 +1033,7 @@ define <4 x i32> @test_masked_z_8xi32_to_4xi32_perm_mask2(<8 x i32> %vec, <4 x i
define <4 x i32> @test_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec) {
; CHECK-LABEL: test_8xi32_to_4xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = <5,3,2,5,u,u,u,u>
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [5,3,2,5]
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; CHECK-NEXT: vzeroupper
@ -1044,7 +1044,7 @@ define <4 x i32> @test_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec) {
define <4 x i32> @test_masked_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_8xi32_to_4xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <5,3,2,5,u,u,u,u>
; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [5,3,2,5]
; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm0
; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
@ -1059,7 +1059,7 @@ define <4 x i32> @test_masked_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec, <4 x i32
define <4 x i32> @test_masked_z_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_8xi32_to_4xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = <5,3,2,5,u,u,u,u>
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [5,3,2,5]
; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
@ -1397,8 +1397,8 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mask0(<16 x i32> %vec, <4 x
define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mask1(<16 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [5,1,3,4]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <5,1,3,4,u,u,u,u>
; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm0
; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
@ -1413,8 +1413,8 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mask1(<16 x i32> %vec, <4 x i
define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mask1(<16 x i32> %vec, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [5,1,3,4]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = <5,1,3,4,u,u,u,u>
; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
@ -1428,11 +1428,11 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mask1(<16 x i32> %vec, <4 x
define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mask2(<16 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = <1,1,13,0,u,u,u,u>
; CHECK-NEXT: vpermi2d %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,13,0]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; CHECK-NEXT: vpermt2d %ymm4, %ymm3, %ymm0
; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
; CHECK-NEXT: vpblendmd %xmm4, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 1, i32 1, i32 13, i32 0>
@ -1444,11 +1444,11 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mask2(<16 x i32> %vec, <4 x i
define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mask2(<16 x i32> %vec, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <1,1,13,0,u,u,u,u>
; CHECK-NEXT: vpermi2d %ymm2, %ymm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,13,0]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vpermt2d %ymm3, %ymm2, %ymm0
; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqa32 %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 1, i32 1, i32 13, i32 0>
@ -1459,10 +1459,10 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mask2(<16 x i32> %vec, <4 x
define <4 x i32> @test_16xi32_to_4xi32_perm_mask3(<16 x i32> %vec) {
; CHECK-LABEL: test_16xi32_to_4xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [3,0,0,13]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = <3,0,0,13,u,u,u,u>
; CHECK-NEXT: vpermi2d %ymm2, %ymm0, %ymm1
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: vpermt2d %ymm2, %ymm1, %ymm0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 13>
@ -1471,11 +1471,11 @@ define <4 x i32> @test_16xi32_to_4xi32_perm_mask3(<16 x i32> %vec) {
define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mask3(<16 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = <3,0,0,13,u,u,u,u>
; CHECK-NEXT: vpermi2d %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [3,0,0,13]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; CHECK-NEXT: vpermt2d %ymm4, %ymm3, %ymm0
; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
; CHECK-NEXT: vpblendmd %xmm4, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 13>
@ -1487,11 +1487,11 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mask3(<16 x i32> %vec, <4 x i
define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mask3(<16 x i32> %vec, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <3,0,0,13,u,u,u,u>
; CHECK-NEXT: vpermi2d %ymm2, %ymm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [3,0,0,13]
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vpermt2d %ymm3, %ymm2, %ymm0
; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqa32 %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 13>
@ -1647,9 +1647,9 @@ define <8 x i32> @test_masked_z_16xi32_to_8xi32_perm_mem_mask3(<16 x i32>* %vp,
define <4 x i32> @test_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp) {
; CHECK-LABEL: test_16xi32_to_4xi32_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = <13,0,0,6,u,u,u,u>
; CHECK-NEXT: vpermi2d 32(%rdi), %ymm1, %ymm0
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [13,0,0,6]
; CHECK-NEXT: vmovdqa (%rdi), %ymm0
; CHECK-NEXT: vpermt2d 32(%rdi), %ymm1, %ymm0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@ -1660,9 +1660,9 @@ define <4 x i32> @test_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp) {
define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <13,0,0,6,u,u,u,u>
; CHECK-NEXT: vpermi2d 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [13,0,0,6]
; CHECK-NEXT: vmovdqa (%rdi), %ymm3
; CHECK-NEXT: vpermt2d 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqa32 %xmm3, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
@ -1677,9 +1677,9 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp, <4
define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask0(<16 x i32>* %vp, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = <13,0,0,6,u,u,u,u>
; CHECK-NEXT: vpermi2d 32(%rdi), %ymm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [13,0,0,6]
; CHECK-NEXT: vmovdqa (%rdi), %ymm2
; CHECK-NEXT: vpermt2d 32(%rdi), %ymm1, %ymm2
; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
; CHECK-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
@ -1728,9 +1728,9 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask1(<16 x i32>* %vp,
define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask2(<16 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <2,15,6,9,u,u,u,u>
; CHECK-NEXT: vpermi2d 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [2,15,6,9]
; CHECK-NEXT: vmovdqa (%rdi), %ymm3
; CHECK-NEXT: vpermt2d 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
; CHECK-NEXT: vmovdqa32 %xmm3, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
@ -1745,9 +1745,9 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask2(<16 x i32>* %vp, <4
define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask2(<16 x i32>* %vp, <4 x i32> %mask) {
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm1
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = <2,15,6,9,u,u,u,u>
; CHECK-NEXT: vpermi2d 32(%rdi), %ymm1, %ymm2
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [2,15,6,9]
; CHECK-NEXT: vmovdqa (%rdi), %ymm2
; CHECK-NEXT: vpermt2d 32(%rdi), %ymm1, %ymm2
; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
; CHECK-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
@ -1823,9 +1823,9 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp,
define <4 x i32> @test_16xi32_to_4xi32_perm_mask9(<16 x i32> %vec) {
; CHECK-LABEL: test_16xi32_to_4xi32_perm_mask9:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = <4,1,u,2,u,u,u,u>
; CHECK-NEXT: vpermps %ymm1, %ymm2, %ymm1
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [4,1,0,2]
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vpermps %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
@ -2035,7 +2035,8 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask2(<8 x i64> %vec, <4 x i64
; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,7,2,7]
; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [2,7,2,7]
; CHECK-NEXT: # ymm4 = mem[0,1,0,1]
; CHECK-NEXT: vpermi2q %ymm0, %ymm3, %ymm4
; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
; CHECK-NEXT: vpblendmq %ymm4, %ymm1, %ymm0 {%k1}
@ -2050,7 +2051,8 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask2(<8 x i64> %vec, <4 x i
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,7,2,7]
; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,7,2,7]
; CHECK-NEXT: # ymm2 = mem[0,1,0,1]
; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
; CHECK-NEXT: vpermi2q %ymm0, %ymm3, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
@ -2695,7 +2697,7 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask0(<8 x float> %vec
define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask1(<8 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = <1,3,5,0,u,u,u,u>
; CHECK-NEXT: vmovaps {{.*#+}} xmm3 = [1,3,5,0]
; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm0
; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
@ -2711,7 +2713,7 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask1(<8 x float> %vec,
define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask1(<8 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = <1,3,5,0,u,u,u,u>
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [1,3,5,0]
; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
@ -2726,7 +2728,7 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask1(<8 x float> %vec
define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask2(<8 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = <3,2,7,0,u,u,u,u>
; CHECK-NEXT: vmovaps {{.*#+}} xmm3 = [3,2,7,0]
; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm0
; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
@ -2742,7 +2744,7 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask2(<8 x float> %vec,
define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask2(<8 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = <3,2,7,0,u,u,u,u>
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [3,2,7,0]
; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
@ -2757,7 +2759,7 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask2(<8 x float> %vec
define <4 x float> @test_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec) {
; CHECK-LABEL: test_8xfloat_to_4xfloat_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = <3,3,5,2,u,u,u,u>
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [3,3,5,2]
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; CHECK-NEXT: vzeroupper
@ -2768,7 +2770,7 @@ define <4 x float> @test_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec) {
define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = <3,3,5,2,u,u,u,u>
; CHECK-NEXT: vmovaps {{.*#+}} xmm3 = [3,3,5,2]
; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm0
; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
@ -2784,7 +2786,7 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec,
define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = <3,3,5,2,u,u,u,u>
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [3,3,5,2]
; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
@ -3103,9 +3105,9 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mask3(<16 x float> %v
define <4 x float> @test_16xfloat_to_4xfloat_perm_mask0(<16 x float> %vec) {
; CHECK-LABEL: test_16xfloat_to_4xfloat_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = <12,0,1,2,u,u,u,u>
; CHECK-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [12,0,1,2]
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; CHECK-NEXT: vpermt2ps %ymm0, %ymm2, %ymm1
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@ -3115,9 +3117,9 @@ define <4 x float> @test_16xfloat_to_4xfloat_perm_mask0(<16 x float> %vec) {
define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask0(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = <12,0,1,2,u,u,u,u>
; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm4
; CHECK-NEXT: vmovaps {{.*#+}} xmm3 = [12,0,1,2]
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm4
; CHECK-NEXT: vpermt2ps %ymm0, %ymm3, %ymm4
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqps %xmm0, %xmm2, %k1
; CHECK-NEXT: vblendmps %xmm4, %xmm1, %xmm0 {%k1}
@ -3132,9 +3134,9 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask0(<16 x float> %vec
define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask0(<16 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = <12,0,1,2,u,u,u,u>
; CHECK-NEXT: vpermi2ps %ymm0, %ymm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [12,0,1,2]
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vpermt2ps %ymm0, %ymm2, %ymm3
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqps %xmm0, %xmm1, %k1
; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z}
@ -3543,9 +3545,9 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask2(<16 x float
define <4 x float> @test_16xfloat_to_4xfloat_perm_mem_mask3(<16 x float>* %vp) {
; CHECK-LABEL: test_16xfloat_to_4xfloat_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = <3,3,15,9,u,u,u,u>
; CHECK-NEXT: vpermi2ps 32(%rdi), %ymm1, %ymm0
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [3,3,15,9]
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: vpermt2ps 32(%rdi), %ymm1, %ymm0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@ -3556,9 +3558,9 @@ define <4 x float> @test_16xfloat_to_4xfloat_perm_mem_mask3(<16 x float>* %vp) {
define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask3(<16 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm2
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = <3,3,15,9,u,u,u,u>
; CHECK-NEXT: vpermi2ps 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [3,3,15,9]
; CHECK-NEXT: vmovaps (%rdi), %ymm3
; CHECK-NEXT: vpermt2ps 32(%rdi), %ymm2, %ymm3
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1}
@ -3574,9 +3576,9 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask3(<16 x float>*
define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask3(<16 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = <3,3,15,9,u,u,u,u>
; CHECK-NEXT: vpermi2ps 32(%rdi), %ymm1, %ymm2
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [3,3,15,9]
; CHECK-NEXT: vmovaps (%rdi), %ymm2
; CHECK-NEXT: vpermt2ps 32(%rdi), %ymm1, %ymm2
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm0 {%k1} {z}
@ -3733,7 +3735,8 @@ define <4 x double> @test_8xdouble_to_4xdouble_perm_mask0(<8 x double> %vec) {
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [3,7,3,7]
; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [3,7,3,7]
; CHECK-NEXT: # ymm1 = mem[0,1,0,1]
; CHECK-NEXT: vpermi2pd %ymm0, %ymm2, %ymm1
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -3744,7 +3747,8 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask0(<8 x double> %v
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [3,7,3,7]
; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [3,7,3,7]
; CHECK-NEXT: # ymm4 = mem[0,1,0,1]
; CHECK-NEXT: vpermi2pd %ymm0, %ymm3, %ymm4
; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
@ -3760,7 +3764,8 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask0(<8 x double>
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [3,7,3,7]
; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [3,7,3,7]
; CHECK-NEXT: # ymm2 = mem[0,1,0,1]
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm0, %ymm3, %ymm2 {%k1} {z}

View File

@ -1391,7 +1391,8 @@ define <4 x i64> @f4xi64_i128(<4 x i64> %a) {
;
; ALL32-LABEL: f4xi64_i128:
; ALL32: # %bb.0:
; ALL32-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
; ALL32-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL32-NEXT: retl
@ -1431,14 +1432,15 @@ define <8 x i64> @f8xi64_i128(<8 x i64> %a) {
; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddq %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,0,1,0,0,0,1,0]
; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-NEXT: retl
;
; AVX2-LABEL: f8xi64_i128:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,1,0,0,0,1,0]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,0,1,0,0,0,1,0]
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
@ -1447,7 +1449,8 @@ define <8 x i64> @f8xi64_i128(<8 x i64> %a) {
;
; AVX512-LABEL: f8xi64_i128:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0]
; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0]
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retl
@ -1521,7 +1524,8 @@ define <8 x i64> @f8xi64_i256(<8 x i64> %a) {
;
; AVX512-LABEL: f8xi64_i256:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,0,0,1,0,2,0,3,0]
; AVX512-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,0,0,1,0,2,0,3,0]
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retl

View File

@ -50,17 +50,30 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v2i64:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
; X86-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2
; X86-AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx
; X86-AVX-NEXT: retl
; X86-AVX1-LABEL: test_reduce_v2i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm2
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_reduce_v2i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_reduce_v2i64:
; X64-SSE2: ## %bb.0:
@ -524,9 +537,10 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-LABEL: test_reduce_v4i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm4
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
@ -544,7 +558,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
; X86-AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3
@ -1239,18 +1253,19 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-LABEL: test_reduce_v8i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0]
; X86-AVX1-NEXT: ## xmm3 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
; X86-AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; X86-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4
; X86-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm5
; X86-AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm4
; X86-AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm5
; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm2
; X86-AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm2
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
@ -1268,7 +1283,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
;
; X86-AVX2-LABEL: test_reduce_v8i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm4
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3

View File

@ -51,17 +51,30 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v2i64:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
; X86-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2
; X86-AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx
; X86-AVX-NEXT: retl
; X86-AVX1-LABEL: test_reduce_v2i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm2
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_reduce_v2i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_reduce_v2i64:
; X64-SSE2: ## %bb.0:
@ -462,9 +475,10 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-LABEL: test_reduce_v4i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm4
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm4
; X86-AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
@ -482,7 +496,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
@ -1141,19 +1155,20 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-LABEL: test_reduce_v8i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0]
; X86-AVX1-NEXT: ## xmm3 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
; X86-AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; X86-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
; X86-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm5
; X86-AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm4
; X86-AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm5
; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
; X86-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4
; X86-AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm4
; X86-AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
@ -1170,7 +1185,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
;
; X86-AVX2-LABEL: test_reduce_v8i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3

View File

@ -179,10 +179,12 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
;
; X32-AVX-LABEL: clamp_sitofp_2i64_2f64:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967041,4294967295,4294967041,4294967295]
; X32-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN]
; X32-AVX-NEXT: # xmm1 = mem[0,0]
; X32-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X32-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X32-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,255,0]
; X32-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [1.2598673968951787E-321,1.2598673968951787E-321]
; X32-AVX-NEXT: # xmm1 = mem[0,0]
; X32-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X32-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X32-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]

View File

@ -7,7 +7,7 @@ define void @compressstore_v16f32_const(float* %base, <16 x float> %V) {
; HSW-LABEL: compressstore_v16f32_const:
; HSW: # %bb.0: # %cond.store
; HSW-NEXT: vmovups %ymm0, (%rdi)
; HSW-NEXT: vmovaps {{.*#+}} ymm0 = <0,1,2,4,u,u,u,u>
; HSW-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,2,4]
; HSW-NEXT: vpermps %ymm1, %ymm0, %ymm0
; HSW-NEXT: vmovups %xmm0, 32(%rdi)
; HSW-NEXT: vextractf128 $1, %ymm1, %xmm0

View File

@ -1379,13 +1379,14 @@ define void @interleave_24i32_out(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2
; AVX2-SLOW-NEXT: vmovups (%rdi), %ymm0
; AVX2-SLOW-NEXT: vmovups 32(%rdi), %ymm1
; AVX2-SLOW-NEXT: vmovups 64(%rdi), %ymm2
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm3 = <u,u,u,u,u,u,2,5>
; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm3 = [21474836482,21474836482,21474836482,21474836482]
; AVX2-SLOW-NEXT: vpermps %ymm2, %ymm3, %ymm3
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm5 = <0,3,6,1,4,7,u,u>
; AVX2-SLOW-NEXT: vpermps %ymm4, %ymm5, %ymm4
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3,4,5],ymm3[6,7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm4 = <u,u,u,u,u,0,3,6>
; AVX2-SLOW-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [0,0,3,6,0,0,3,6]
; AVX2-SLOW-NEXT: # ymm4 = mem[0,1,0,1]
; AVX2-SLOW-NEXT: vpermps %ymm2, %ymm4, %ymm4
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm5 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm6 = <1,4,7,2,5,u,u,u>
@ -1408,13 +1409,14 @@ define void @interleave_24i32_out(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2
; AVX2-FAST-NEXT: vmovups (%rdi), %ymm0
; AVX2-FAST-NEXT: vmovups 32(%rdi), %ymm1
; AVX2-FAST-NEXT: vmovups 64(%rdi), %ymm2
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm3 = <u,u,u,u,u,u,2,5>
; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm3 = [21474836482,21474836482,21474836482,21474836482]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm3, %ymm3
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = <0,3,6,1,4,7,u,u>
; AVX2-FAST-NEXT: vpermps %ymm4, %ymm5, %ymm4
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3,4,5],ymm3[6,7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = <u,u,u,u,u,0,3,6>
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [0,0,3,6,0,0,3,6]
; AVX2-FAST-NEXT: # ymm4 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm4, %ymm4
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm5 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm6 = <1,4,7,2,5,u,u,u>
@ -1645,7 +1647,8 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2,
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm4[1],ymm2[2,3],ymm4[4],ymm2[5,6],ymm4[7]
; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[0,0,3,3,4,4,7,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3,4],ymm4[5],ymm2[6,7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = [1,0,2,2,1,0,2,2]
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [1,0,2,2,1,0,2,2]
; AVX2-FAST-NEXT: # ymm4 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm1
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]

View File

@ -193,7 +193,8 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
;
; X86-AVX2-LABEL: trunc_ashr_v4i64_demandedelts:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [63,0,0,0,63,0,0,0]
; X86-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [63,0,0,0,63,0,0,0]
; X86-AVX2-NEXT: # ymm1 = mem[0,1,0,1]
; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2

View File

@ -21,7 +21,7 @@ define void @f_f___un_3C_unf_3E_un_3C_unf_3E_(<16 x i1> %x) {
; CHECK-NEXT: vpmovd2m %zmm0, %k1
; CHECK-NEXT: vmovapd 0, %zmm0
; CHECK-NEXT: vmovapd 64, %zmm1
; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [0,16,0,16,0,16,0,16,0,16,0,16,0,16,0,16]
; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm2 = [3.3951932655444357E-313,3.3951932655444357E-313,3.3951932655444357E-313,3.3951932655444357E-313,3.3951932655444357E-313,3.3951932655444357E-313,3.3951932655444357E-313,3.3951932655444357E-313]
; CHECK-NEXT: kshiftrw $8, %k1, %k2
; CHECK-NEXT: vorpd %zmm2, %zmm1, %zmm1 {%k2}
; CHECK-NEXT: vorpd %zmm2, %zmm0, %zmm0 {%k1}

View File

@ -215,7 +215,8 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vptestnmb %ymm0, %ymm0, %k0
; AVX512VLBW-NEXT: vpmovm2w %k0, %zmm0
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; AVX512VLBW-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; AVX512VLBW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512VLBW-NEXT: vpermw %zmm0, %zmm1, %zmm0
; AVX512VLBW-NEXT: vpmovw2m %zmm0, %k0
; AVX512VLBW-NEXT: vpmovm2b %k0, %ymm0
@ -226,7 +227,8 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0

View File

@ -278,9 +278,9 @@ define void @shuffle_v32i16_to_v8i16_1(<32 x i16>* %L, <8 x i16>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16_1:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [1,5,9,13,17,21,25,29]
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa %xmm1, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@ -345,9 +345,9 @@ define void @shuffle_v32i16_to_v8i16_2(<32 x i16>* %L, <8 x i16>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16_2:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,6,10,14,18,22,26,30,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [2,6,10,14,18,22,26,30]
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa %xmm1, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@ -412,9 +412,9 @@ define void @shuffle_v32i16_to_v8i16_3(<32 x i16>* %L, <8 x i16>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16_3:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <3,7,11,15,19,23,27,31,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [3,7,11,15,19,23,27,31]
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa %xmm1, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@ -559,9 +559,9 @@ define void @shuffle_v64i8_to_v8i8_2(<64 x i8>* %L, <8 x i8>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v64i8_to_v8i8_2:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [1,5,9,13,17,21,25,29]
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vpmovwb %xmm1, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@ -706,9 +706,9 @@ define void @shuffle_v64i8_to_v8i8_4(<64 x i8>* %L, <8 x i8>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v64i8_to_v8i8_4:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <2,6,10,14,18,22,26,30,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [2,6,10,14,18,22,26,30]
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vpmovwb %xmm1, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@ -853,9 +853,9 @@ define void @shuffle_v64i8_to_v8i8_6(<64 x i8>* %L, <8 x i8>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v64i8_to_v8i8_6:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <3,7,11,15,19,23,27,31,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [3,7,11,15,19,23,27,31]
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vpmovwb %xmm1, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq

View File

@ -398,9 +398,9 @@ define void @shuffle_v64i8_to_v16i8(<64 x i8>* %L, <16 x i8>* %S) nounwind {
;
; AVX512VBMIVL-LABEL: shuffle_v64i8_to_v16i8:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpermi2b 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60]
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512VBMIVL-NEXT: vpermt2b 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa %xmm1, (%rsi)
; AVX512VBMIVL-NEXT: vzeroupper
; AVX512VBMIVL-NEXT: retq
@ -479,9 +479,9 @@ define void @shuffle_v32i16_to_v8i16(<32 x i16>* %L, <8 x i16>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4,8,12,16,20,24,28,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28]
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa %xmm1, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@ -506,9 +506,9 @@ define void @shuffle_v32i16_to_v8i16(<32 x i16>* %L, <8 x i16>* %S) nounwind {
;
; AVX512VBMIVL-LABEL: shuffle_v32i16_to_v8i16:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4,8,12,16,20,24,28,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28]
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512VBMIVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa %xmm1, (%rsi)
; AVX512VBMIVL-NEXT: vzeroupper
; AVX512VBMIVL-NEXT: retq
@ -626,7 +626,7 @@ define void @shuffle_v64i8_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind {
; AVX512VBMIVL-LABEL: shuffle_v64i8_to_v8i8:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,8,16,24,32,40,48,56,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4048780183313844224,4048780183313844224,4048780183313844224,4048780183313844224]
; AVX512VBMIVL-NEXT: vpermi2b 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovq %xmm1, (%rsi)
; AVX512VBMIVL-NEXT: vzeroupper
@ -737,10 +737,10 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
;
; AVX512VBMIVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61]
; AVX512VBMIVL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpermi2b %ymm2, %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VBMIVL-NEXT: vpermt2b %ymm2, %ymm1, %ymm0
; AVX512VBMIVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMIVL-NEXT: vzeroupper
; AVX512VBMIVL-NEXT: retq
%res = shufflevector <64 x i8> %x, <64 x i8> %x, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
@ -828,10 +828,10 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
;
; AVX512VBMIVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62]
; AVX512VBMIVL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpermi2b %ymm2, %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VBMIVL-NEXT: vpermt2b %ymm2, %ymm1, %ymm0
; AVX512VBMIVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMIVL-NEXT: vzeroupper
; AVX512VBMIVL-NEXT: retq
%res = shufflevector <64 x i8> %x, <64 x i8> %x, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 62>
@ -878,7 +878,7 @@ define <4 x double> @PR34175(<32 x i16>* %p) {
; AVX512BWVL-LABEL: PR34175:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqu (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6755468161056768,6755468161056768,6755468161056768,6755468161056768]
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX512BWVL-NEXT: vcvtdq2pd %xmm0, %ymm0
@ -899,7 +899,7 @@ define <4 x double> @PR34175(<32 x i16>* %p) {
; AVX512VBMIVL-LABEL: PR34175:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqu (%rdi), %ymm0
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6755468161056768,6755468161056768,6755468161056768,6755468161056768]
; AVX512VBMIVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX512VBMIVL-NEXT: vcvtdq2pd %xmm0, %ymm0

View File

@ -398,9 +398,9 @@ define void @shuffle_v64i8_to_v16i8(<64 x i8>* %L, <16 x i8>* %S) nounwind {
;
; AVX512VBMIVL-LABEL: shuffle_v64i8_to_v16i8:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpermi2b 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60]
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512VBMIVL-NEXT: vpermt2b 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa %xmm1, (%rsi)
; AVX512VBMIVL-NEXT: vzeroupper
; AVX512VBMIVL-NEXT: retq
@ -479,9 +479,9 @@ define void @shuffle_v32i16_to_v8i16(<32 x i16>* %L, <8 x i16>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4,8,12,16,20,24,28,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28]
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa %xmm1, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@ -506,9 +506,9 @@ define void @shuffle_v32i16_to_v8i16(<32 x i16>* %L, <8 x i16>* %S) nounwind {
;
; AVX512VBMIVL-LABEL: shuffle_v32i16_to_v8i16:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4,8,12,16,20,24,28,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28]
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512VBMIVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa %xmm1, (%rsi)
; AVX512VBMIVL-NEXT: vzeroupper
; AVX512VBMIVL-NEXT: retq
@ -589,9 +589,9 @@ define void @shuffle_v64i8_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v64i8_to_v8i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4,8,12,16,20,24,28,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28]
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vpmovwb %xmm1, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@ -616,9 +616,9 @@ define void @shuffle_v64i8_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind {
;
; AVX512VBMIVL-LABEL: shuffle_v64i8_to_v8i8:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4,8,12,16,20,24,28,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28]
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm1
; AVX512VBMIVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vpmovwb %xmm1, (%rsi)
; AVX512VBMIVL-NEXT: vzeroupper
; AVX512VBMIVL-NEXT: retq
@ -728,10 +728,10 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
;
; AVX512VBMIVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61]
; AVX512VBMIVL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpermi2b %ymm2, %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VBMIVL-NEXT: vpermt2b %ymm2, %ymm1, %ymm0
; AVX512VBMIVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMIVL-NEXT: vzeroupper
; AVX512VBMIVL-NEXT: retq
%res = shufflevector <64 x i8> %x, <64 x i8> %x, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
@ -819,10 +819,10 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
;
; AVX512VBMIVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62]
; AVX512VBMIVL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpermi2b %ymm2, %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VBMIVL-NEXT: vpermt2b %ymm2, %ymm1, %ymm0
; AVX512VBMIVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMIVL-NEXT: vzeroupper
; AVX512VBMIVL-NEXT: retq
%res = shufflevector <64 x i8> %x, <64 x i8> %x, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 62>
@ -869,7 +869,7 @@ define <4 x double> @PR34175(<32 x i16>* %p) {
; AVX512BWVL-LABEL: PR34175:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vmovdqu (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6755468161056768,6755468161056768,6755468161056768,6755468161056768]
; AVX512BWVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512BWVL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX512BWVL-NEXT: vcvtdq2pd %xmm0, %ymm0
@ -890,7 +890,7 @@ define <4 x double> @PR34175(<32 x i16>* %p) {
; AVX512VBMIVL-LABEL: PR34175:
; AVX512VBMIVL: # %bb.0:
; AVX512VBMIVL-NEXT: vmovdqu (%rdi), %ymm0
; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMIVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6755468161056768,6755468161056768,6755468161056768,6755468161056768]
; AVX512VBMIVL-NEXT: vpermi2w 32(%rdi), %ymm0, %ymm1
; AVX512VBMIVL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX512VBMIVL-NEXT: vcvtdq2pd %xmm0, %ymm0

View File

@ -874,8 +874,9 @@ define void @fallback_broadcast_v4i64_to_v8i64(<4 x i64> %a, <8 x i64> %b) {
;
; X32-AVX512-LABEL: fallback_broadcast_v4i64_to_v8i64:
; X32-AVX512: # %bb.0: # %entry
; X32-AVX512-NEXT: vpaddq {{\.LCPI.*}}, %ymm0, %ymm0
; X32-AVX512-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,2,0,3,0,4,0,1,0,2,0,3,0,4,0]
; X32-AVX512-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,2,0,3,0,4,0]
; X32-AVX512-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; X32-AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; X32-AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm1
; X32-AVX512-NEXT: vpandq %zmm2, %zmm1, %zmm1
; X32-AVX512-NEXT: vmovdqu %ymm0, ga4

View File

@ -2048,7 +2048,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]
; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpsllw $2, %ymm2, %ymm4
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
@ -2074,7 +2075,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: # ymm3 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm2
; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm4
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
@ -2100,7 +2102,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: # ymm3 = mem[0,1,0,1]
; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm2
; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm4
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4

View File

@ -1113,7 +1113,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: # ymm6 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm4
; AVX512F-NEXT: vpsllw $2, %ymm4, %ymm7
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -1162,7 +1163,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: # ymm6 = mem[0,1,0,1]
; AVX512VL-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm4
; AVX512VL-NEXT: vpsllw $2, %ymm4, %ymm7
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -1211,7 +1213,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
;
; AVX512BW-LABEL: constant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm3
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
@ -1242,7 +1245,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VBMI2-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VBMI2-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm3
; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
@ -1273,7 +1277,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
;
; AVX512VLBW-LABEL: constant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VLBW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VLBW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm3
; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
@ -1304,7 +1309,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
;
; AVX512VLVBMI2-LABEL: constant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VLVBMI2-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VLVBMI2-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm3
; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3

View File

@ -1184,7 +1184,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpsllw $2, %ymm1, %ymm3
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
@ -1208,7 +1209,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm3
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
@ -1232,7 +1234,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm3
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3

View File

@ -561,7 +561,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: # ymm4 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm5
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -607,7 +608,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: # ymm4 = mem[0,1,0,1]
; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm5
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -653,7 +655,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
;
; AVX512BW-LABEL: constant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
@ -680,7 +683,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
;
; AVX512VLBW-LABEL: constant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VLBW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VLBW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2

View File

@ -2063,7 +2063,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]
; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
@ -2089,7 +2090,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512F-NEXT: # ymm3 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
@ -2115,7 +2117,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512VL-NEXT: # ymm3 = mem[0,1,0,1]
; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2

View File

@ -1109,7 +1109,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512F-NEXT: # ymm6 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -1158,7 +1159,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512VL-NEXT: # ymm6 = mem[0,1,0,1]
; AVX512VL-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm4
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -1207,7 +1209,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
;
; AVX512BW-LABEL: constant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm3
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
@ -1237,7 +1240,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512VBMI2-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512VBMI2-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm3
; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
@ -1267,7 +1271,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
;
; AVX512VLBW-LABEL: constant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512VLBW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512VLBW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm3
; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
@ -1297,7 +1302,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
;
; AVX512VLVBMI2-LABEL: constant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512VLVBMI2-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536,57600,41152,24704,8256,8448,24640,41088,57536]
; AVX512VLVBMI2-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm3
; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3

View File

@ -1261,7 +1261,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpsllw $2, %ymm1, %ymm3
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
@ -1285,7 +1286,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512F-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm3
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
@ -1309,7 +1311,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512VL-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm3
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3

View File

@ -581,7 +581,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512F-NEXT: # ymm4 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm5
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -627,7 +628,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512VL-NEXT: # ymm4 = mem[0,1,0,1]
; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm5
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -673,7 +675,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
;
; AVX512BW-LABEL: constant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
@ -700,7 +703,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
;
; AVX512VLBW-LABEL: constant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512VLBW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536]
; AVX512VLBW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2

View File

@ -1177,7 +1177,8 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpsllw $2, %ymm1, %ymm3
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
@ -1201,7 +1202,8 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
; AVX512F-NEXT: vpsllw $2, %ymm1, %ymm3
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
@ -1225,7 +1227,8 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm3
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3

View File

@ -542,7 +542,8 @@ define <64 x i8> @constant_rotate_v64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512F-NEXT: # ymm4 = mem[0,1,0,1]
; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm5
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -588,7 +589,8 @@ define <64 x i8> @constant_rotate_v64i8(<64 x i8> %a) nounwind {
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
; AVX512VL-NEXT: # ymm4 = mem[0,1,0,1]
; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm2
; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm5
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -634,7 +636,8 @@ define <64 x i8> @constant_rotate_v64i8(<64 x i8> %a) nounwind {
;
; AVX512BW-LABEL: constant_rotate_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
@ -661,7 +664,8 @@ define <64 x i8> @constant_rotate_v64i8(<64 x i8> %a) nounwind {
;
; AVX512VLBW-LABEL: constant_rotate_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
; AVX512VLBW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
; AVX512VLBW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2

View File

@ -89,7 +89,8 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; X32-AVX1-LABEL: var_shift_v4i64:
; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
; X32-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0]
; X32-AVX1-NEXT: # xmm3 = mem[0,0]
; X32-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4
; X32-AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
; X32-AVX1-NEXT: vpsrlq %xmm5, %xmm3, %xmm6

View File

@ -958,7 +958,8 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsllw $2, %ymm0, %ymm1
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
@ -991,7 +992,8 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQ-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm1
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
@ -1013,7 +1015,8 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQVL-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm1
; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
@ -1058,7 +1061,8 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; X32-AVX2: # %bb.0:
; X32-AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm1, %ymm1
; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; X32-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; X32-AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; X32-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: vpsllw $2, %ymm0, %ymm1
; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm1, %ymm1

View File

@ -208,7 +208,8 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQ-NEXT: # ymm4 = mem[0,1,0,1]
; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@ -230,7 +231,8 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
;
; AVX512BW-LABEL: constant_shift_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2

View File

@ -726,7 +726,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_0
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,0,0,0,1]
; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,1,0,0,0,1]
; AVX2-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: retq
;
@ -739,7 +740,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_0
;
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
@ -764,7 +766,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_0
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
; AVX2-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: retq
;
@ -777,7 +780,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_0
;
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0]
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
@ -802,7 +806,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_0
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
; AVX2-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: retq
;
@ -815,7 +820,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_0
;
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0]
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
@ -843,7 +849,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_0
;
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0]
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
@ -871,7 +878,8 @@ define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_0
;
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0,0,0,5,0,0,0,0,0]
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0,0,0,5,0,0,0,0,0]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
@ -899,7 +907,8 @@ define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_0
;
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@ -927,7 +936,8 @@ define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_0
;
; AVX512VL-FAST-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [7,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@ -3798,7 +3808,8 @@ define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_2
;
; AVX2-FAST-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,4,4,5,6,4]
; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [4,5,6,4,4,5,6,4]
; AVX2-FAST-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15,16,17,18,19,20,21,18,19,24,25,26,27,30,31,30,31]
; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]

View File

@ -1336,7 +1336,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_
;
; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
; AVX512VLVBMI-FAST: # %bb.0:
; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
; AVX512VLVBMI-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
; AVX512VLVBMI-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
@ -1370,7 +1371,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_
;
; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
; AVX512VLVBMI-FAST: # %bb.0:
; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
; AVX512VLVBMI-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
; AVX512VLVBMI-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
@ -1404,7 +1406,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_
;
; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
; AVX512VLVBMI-FAST: # %bb.0:
; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX512VLVBMI-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX512VLVBMI-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@ -1438,7 +1441,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_
;
; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
; AVX512VLVBMI-FAST: # %bb.0:
; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
; AVX512VLVBMI-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
; AVX512VLVBMI-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@ -1472,7 +1476,8 @@ define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX512VLVBMI-FAST: # %bb.0:
; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VLVBMI-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VLVBMI-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@ -1514,7 +1519,8 @@ define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
;
; AVX512VLVBMI-FAST-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX512VLVBMI-FAST: # %bb.0:
; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VLVBMI-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VLVBMI-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VLVBMI-FAST-NEXT: vpermb %ymm0, %ymm1, %ymm0
; AVX512VLVBMI-FAST-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>

View File

@ -750,7 +750,8 @@ define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
;
; AVX2-SLOW-LABEL: shuffle_v8f32_c348cda0:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
; AVX2-SLOW-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,5,2,0,4,5,2,0]
; AVX2-SLOW-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-SLOW-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,2,0,4,7,6,4]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,1]
@ -761,7 +762,8 @@ define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,3,4,7,4,7,2,0]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,5,2,0,4,5,2,0]
; AVX2-FAST-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
; AVX2-FAST-NEXT: retq
@ -829,7 +831,8 @@ define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
;
; AVX2-FAST-LABEL: shuffle_v8f32_32103210:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
; AVX2-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: retq
;
@ -841,8 +844,9 @@ define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
;
; AVX512VL-FAST-LABEL: shuffle_v8f32_32103210:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
ret <8 x float> %shuffle
@ -863,7 +867,8 @@ define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
;
; AVX2-FAST-LABEL: shuffle_v8f32_76547654:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX2-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: retq
;
@ -875,8 +880,9 @@ define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
;
; AVX512VL-FAST-LABEL: shuffle_v8f32_76547654:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %shuffle
@ -2080,7 +2086,8 @@ define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-FAST-LABEL: shuffle_v8i32_32103210:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
; AVX2-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: retq
;
@ -2092,8 +2099,9 @@ define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
;
; AVX512VL-FAST-LABEL: shuffle_v8i32_32103210:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
ret <8 x i32> %shuffle
@ -2114,7 +2122,8 @@ define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-FAST-LABEL: shuffle_v8i32_76547654:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX2-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: retq
;
@ -2126,8 +2135,9 @@ define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
;
; AVX512VL-FAST-LABEL: shuffle_v8i32_76547654:
; AVX512VL-FAST: # %bb.0:
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX512VL-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX512VL-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
ret <8 x i32> %shuffle
@ -3000,13 +3010,15 @@ define <8 x i32> @lowhalf_v8i32(<8 x i32> %x, <8 x i32> %y) {
; AVX2-LABEL: lowhalf_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <2,6,3,6,u,u,u,u>
; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2,6,3,6,2,6,3,6]
; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: lowhalf_v8i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <2,14,3,14,u,u,u,u>
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,14,3,14,2,14,3,14]
; AVX512VL-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512VL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 2, i32 14, i32 3, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
@ -3026,13 +3038,15 @@ define <8 x float> @lowhalf_v8f32(<8 x float> %x, <8 x float> %y) {
; AVX2-LABEL: lowhalf_v8f32:
; AVX2: # %bb.0:
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <2,6,3,6,u,u,u,u>
; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2,6,3,6,2,6,3,6]
; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: lowhalf_v8f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = <2,14,3,14,u,u,u,u>
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,14,3,14,2,14,3,14]
; AVX512VL-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 2, i32 14, i32 3, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>

View File

@ -325,7 +325,7 @@ define <4 x i32> @test_v16i32_0_4_8_12(<16 x i32> %v) {
; ALL-NEXT: vextractf128 $1, %ymm0, %xmm1
; ALL-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,0,4,u,u,u,u>
; ALL-NEXT: vbroadcastsd {{.*#+}} ymm2 = [17179869184,17179869184,17179869184,17179869184]
; ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; ALL-NEXT: vzeroupper

View File

@ -338,10 +338,10 @@ define <8 x i16> @pr32967(<32 x i16> %v) {
;
; SKX-LABEL: pr32967:
; SKX: ## %bb.0:
; SKX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29]
; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,u,u,u,u,u,u,u,u>
; SKX-NEXT: vpermi2w %ymm2, %ymm0, %ymm1
; SKX-NEXT: vmovdqa %xmm1, %xmm0
; SKX-NEXT: vpermt2w %ymm2, %ymm1, %ymm0
; SKX-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%shuffle = shufflevector <32 x i16> %v, <32 x i16> undef, <8 x i32> <i32 1,i32 5,i32 9,i32 13,i32 17,i32 21,i32 25,i32 29>

View File

@ -212,13 +212,15 @@ define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
;
; AVX512F-LABEL: shuffle_v8f64_08080808:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
; AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
; AVX512F-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_08080808:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
; AVX512F-32-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
; AVX512F-32-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
@ -1084,13 +1086,15 @@ define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
;
; AVX512F-LABEL: shuffle_v8i64_08080808:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
; AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
; AVX512F-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_08080808:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
; AVX512F-32-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
; AVX512F-32-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>

View File

@ -373,7 +373,7 @@ define void @PR39483() {
; X86-AVX512: # %bb.0: # %entry
; X86-AVX512-NEXT: vmovups 0, %zmm0
; X86-AVX512-NEXT: vmovups 64, %ymm1
; X86-AVX512-NEXT: vmovaps {{.*#+}} zmm2 = <2,5,8,11,14,17,20,23,u,u,u,u,u,u,u,u>
; X86-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,8,11,14,17,20,23]
; X86-AVX512-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2
; X86-AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X86-AVX512-NEXT: vmulps %ymm0, %ymm2, %ymm1
@ -416,7 +416,7 @@ define void @PR39483() {
; X64-AVX512: # %bb.0: # %entry
; X64-AVX512-NEXT: vmovups 0, %zmm0
; X64-AVX512-NEXT: vmovups 64, %ymm1
; X64-AVX512-NEXT: vmovaps {{.*#+}} zmm2 = <2,5,8,11,14,17,20,23,u,u,u,u,u,u,u,u>
; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,8,11,14,17,20,23]
; X64-AVX512-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2
; X64-AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vmulps %ymm0, %ymm2, %ymm1

View File

@ -61,7 +61,8 @@ define <16 x i8> @combine_vpermi2var_16i8_as_vpshufb(<16 x i8> %x0, <16 x i8> %x
define <32 x i8> @combine_vpermi2var_32i8_as_vpermb(<32 x i8> %x0, <32 x i8> %x1) {
; CHECK-LABEL: combine_vpermi2var_32i8_as_vpermb:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; CHECK-NEXT: # ymm1 = mem[0,1,0,1]
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = shufflevector <32 x i8> %x0, <32 x i8> %x1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
@ -71,7 +72,8 @@ define <32 x i8> @combine_vpermi2var_32i8_as_vpermb(<32 x i8> %x0, <32 x i8> %x1
define <64 x i8> @combine_vpermi2var_64i8_as_vpermb(<64 x i8> %x0, <64 x i8> %x1) {
; CHECK-LABEL: combine_vpermi2var_64i8_as_vpermb:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; CHECK-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = shufflevector <64 x i8> %x0, <64 x i8> %x1, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
@ -94,7 +96,8 @@ define <16 x i8> @combine_vpermt2var_vpermi2var_16i8_as_vperm2(<16 x i8> %x0, <1
define <32 x i8> @combine_vpermi2var_32i8_as_vperm2(<32 x i8> %x0, <32 x i8> %x1) {
; CHECK-LABEL: combine_vpermi2var_32i8_as_vperm2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; CHECK-NEXT: # ymm2 = mem[0,1,0,1]
; CHECK-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = shufflevector <32 x i8> %x0, <32 x i8> %x1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>

View File

@ -2465,7 +2465,8 @@ define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) {
; AVX2-FAST-LABEL: combine_unneeded_subvector1:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX2-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: retq
%b = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>

View File

@ -254,7 +254,8 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
; VL_BW_DQ-NEXT: vpsllw $7, %ymm0, %ymm0
; VL_BW_DQ-NEXT: vpmovb2m %ymm0, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm1, %zmm0
; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2b %k0, %ymm0
@ -308,7 +309,8 @@ define <32 x i16> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: vptestnmw %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3]
; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm3, %zmm0
; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k1
; VL_BW_DQ-NEXT: vpblendmw %zmm1, %zmm2, %zmm0 {%k1}
@ -364,7 +366,8 @@ define <32 x i8> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: vptestnmb %ymm0, %ymm0, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3]
; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm3, %zmm0
; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k1
; VL_BW_DQ-NEXT: vpblendmb %ymm1, %ymm2, %ymm0 {%k1}
@ -412,7 +415,8 @@ define <32 x i16> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_
; VL_BW_DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
; VL_BW_DQ-NEXT: kunpckwd %k0, %k1, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm1, %zmm0
; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k1
; VL_BW_DQ-NEXT: vpblendmw %zmm2, %zmm3, %zmm0 {%k1}
@ -462,7 +466,8 @@ define <32 x i8> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
; VL_BW_DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
; VL_BW_DQ-NEXT: kunpckwd %k0, %k1, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm1, %zmm0
; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k1
; VL_BW_DQ-NEXT: vpblendmb %ymm2, %ymm3, %ymm0 {%k1}