[x86] Enable some support for lowerVectorShuffleWithUndefHalf with AVX-512
Summary: This teaches 512-bit shuffles to detect unused halfs in order to reduce shuffle size. We may need to refine the 512-bit exit point. I couldn't remember if we had good cross lane shuffles for 8/16 bit with AVX-512 or not. I believe this is step towards being able to handle D36454 without a special case. From here we need to improve our ability to combine extract_subvector with insert_subvector and other extract_subvectors. And we need to support narrowing binary operations where we don't demand all elements. This may be improvements to DAGCombiner::narrowExtractedVectorBinOp(by recognizing an insert_subvector in addition to concat) or we may need a target specific combiner. Reviewers: RKSimon, zvi, delena, jbhateja Reviewed By: RKSimon, jbhateja Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D36601 llvm-svn: 310724
This commit is contained in:
parent
9c52574886
commit
0f30fe9634
|
@ -12256,7 +12256,7 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
|
|||
return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
|
||||
}
|
||||
|
||||
/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
|
||||
/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
|
||||
/// This allows for fast cases such as subvector extraction/insertion
|
||||
/// or shuffling smaller vector types which can lower more efficiently.
|
||||
static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
|
||||
|
@ -12264,7 +12264,8 @@ static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
|
|||
ArrayRef<int> Mask,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
assert(VT.is256BitVector() && "Expected 256-bit vector");
|
||||
assert((VT.is256BitVector() || VT.is512BitVector()) &&
|
||||
"Expected 256-bit or 512-bit vector");
|
||||
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
unsigned HalfNumElts = NumElts / 2;
|
||||
|
@ -12360,6 +12361,10 @@ static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
|
|||
}
|
||||
}
|
||||
|
||||
// AVX512 - XXXXuuuu - always extract lowers.
|
||||
if (VT.is512BitVector() && !(UndefUpper && NumUpperHalves == 0))
|
||||
return SDValue();
|
||||
|
||||
auto GetHalfVector = [&](int HalfIdx) {
|
||||
if (HalfIdx < 0)
|
||||
return DAG.getUNDEF(HalfVT);
|
||||
|
@ -13703,6 +13708,11 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
|
||||
return Insertion;
|
||||
|
||||
// Handle special cases where the lower or upper half is UNDEF.
|
||||
if (SDValue V =
|
||||
lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
// Check for being able to broadcast a single element.
|
||||
if (SDValue Broadcast =
|
||||
lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
|
||||
|
|
|
@ -323,13 +323,13 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
|
|||
; AVX512-NEXT: cmpq %rcx, %rax
|
||||
; AVX512-NEXT: jne .LBB2_1
|
||||
; AVX512-NEXT: # BB#2: # %middle.block
|
||||
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
|
||||
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
|
|
|
@ -72,13 +72,13 @@ define i32 @sad_16i8() nounwind {
|
|||
; AVX512F-NEXT: addq $4, %rax
|
||||
; AVX512F-NEXT: jne .LBB0_1
|
||||
; AVX512F-NEXT: # BB#2: # %middle.block
|
||||
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -98,13 +98,13 @@ define i32 @sad_16i8() nounwind {
|
|||
; AVX512BW-NEXT: addq $4, %rax
|
||||
; AVX512BW-NEXT: jne .LBB0_1
|
||||
; AVX512BW-NEXT: # BB#2: # %middle.block
|
||||
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
|
||||
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
|
@ -321,13 +321,13 @@ define i32 @sad_32i8() nounwind {
|
|||
; AVX512F-NEXT: jne .LBB1_1
|
||||
; AVX512F-NEXT: # BB#2: # %middle.block
|
||||
; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -349,13 +349,13 @@ define i32 @sad_32i8() nounwind {
|
|||
; AVX512BW-NEXT: jne .LBB1_1
|
||||
; AVX512BW-NEXT: # BB#2: # %middle.block
|
||||
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
|
||||
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
|
@ -794,13 +794,13 @@ define i32 @sad_avx64i8() nounwind {
|
|||
; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpaddd %zmm3, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
|
||||
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -823,13 +823,13 @@ define i32 @sad_avx64i8() nounwind {
|
|||
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddd %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
|
||||
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
|
|
|
@ -262,19 +262,10 @@ define <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19
|
|||
}
|
||||
|
||||
define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) {
|
||||
; AVX512F-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: movw $8, %ax
|
||||
; AVX512F-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: movw $8, %ax
|
||||
; AVX512BW-NEXT: kmovd %eax, %k1
|
||||
; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
|
||||
; AVX512BW-NEXT: retq
|
||||
; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
|
||||
; ALL-NEXT: retq
|
||||
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i32> %c
|
||||
}
|
||||
|
|
|
@ -101,7 +101,7 @@ define <32 x i16> @shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u(<32 x i1
|
|||
;
|
||||
; SKX-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
|
||||
; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
|
||||
; SKX-NEXT: retq
|
||||
%c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <32 x i16> %c
|
||||
|
@ -115,7 +115,7 @@ define <32 x i16> @shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u(<32 x
|
|||
;
|
||||
; SKX-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
|
||||
; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
|
||||
; SKX-NEXT: retq
|
||||
%c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <32 x i16> %c
|
||||
|
|
|
@ -5,25 +5,10 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VBMI
|
||||
|
||||
define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a) {
|
||||
; AVX512F-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
; AVX512VBMI: # BB#0:
|
||||
; AVX512VBMI-NEXT: vpsrld $16, %zmm0, %zmm0
|
||||
; AVX512VBMI-NEXT: retq
|
||||
; ALL-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; ALL-NEXT: retq
|
||||
%b = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <64 x i8> %b
|
||||
}
|
||||
|
|
|
@ -2241,12 +2241,12 @@ define <8 x double> @shuffle_v8f64_2301uu67(<8 x double> %a0, <8 x double> %a1)
|
|||
define <8 x double> @shuffle_v8f64_2301uuuu(<8 x double> %a0, <8 x double> %a1) {
|
||||
; AVX512F-LABEL: shuffle_v8f64_2301uuuu:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
|
||||
; AVX512F-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_2301uuuu:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
|
||||
; AVX512F-32-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <8 x double> %1
|
||||
|
|
Loading…
Reference in New Issue