Revert r322279 due to Skylake miscompile.

Summary:
This revision causes Skylake (and apparently, only Skylake) codegen to fail in
certain cases. Details: https://bugs.llvm.org/show_bug.cgi?id=35918

Subscribers: sanjoy, llvm-commits

Differential Revision: https://reviews.llvm.org/D41972

llvm-svn: 322335
This commit is contained in:
David L. Jones 2018-01-12 00:17:38 +00:00
parent 1af6c114cc
commit 8c87213c26
8 changed files with 477 additions and 443 deletions

View File

@ -5940,17 +5940,6 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
unsigned Opcode = N.getOpcode();
switch (Opcode) {
case ISD::VECTOR_SHUFFLE: {
// Don't treat ISD::VECTOR_SHUFFLE as a target shuffle so decode it here.
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(N)->getMask();
if (isUndefOrInRange(ShuffleMask, 0, 2 * NumElts)) {
Mask.append(ShuffleMask.begin(), ShuffleMask.end());
Ops.push_back(N.getOperand(0));
Ops.push_back(N.getOperand(1));
return true;
}
return false;
}
case ISD::AND:
case X86ISD::ANDNP: {
// Attempt to decode as a per-byte mask.

View File

@ -9,7 +9,9 @@ define void @prom_bug(<4 x i8> %t, i16* %p) {
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: pextrw $0, %xmm0, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movw %ax, (%rdi)
; SSE2-NEXT: retq
;

View File

@ -1386,39 +1386,39 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) {
; SSE2-LABEL: mul_v8i64_sext:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: punpckhwd {{.*#+}} xmm9 = xmm9[4],xmm1[4],xmm9[5],xmm1[5],xmm9[6],xmm1[6],xmm9[7],xmm1[7]
; SSE2-NEXT: movdqa %xmm9, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: psrad $16, %xmm9
; SSE2-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm0[0],xmm9[1],xmm0[1]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm8 = xmm3[0,2,2,3,4,5,6,7]
; SSE2-NEXT: movdqa %xmm8, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm1[0],xmm8[1],xmm1[1],xmm8[2],xmm1[2],xmm8[3],xmm1[3]
; SSE2-NEXT: movdqa %xmm8, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: psrad $16, %xmm8
; SSE2-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm3[0],xmm8[1],xmm3[1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm1[0,2,2,3,4,5,6,7]
; SSE2-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm1[0],xmm8[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm9 = xmm9[0],xmm1[0],xmm9[1],xmm1[1],xmm9[2],xmm1[2],xmm9[3],xmm1[3]
; SSE2-NEXT: movdqa %xmm9, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: psrad $16, %xmm9
; SSE2-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm1[0],xmm9[1],xmm1[1]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
; SSE2-NEXT: movdqa %xmm7, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: psrad $16, %xmm7
; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[1],xmm1[1]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
; SSE2-NEXT: movdqa %xmm3, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[2,3,0,1]
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: psrad $31, %xmm5
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
; SSE2-NEXT: movdqa %xmm2, %xmm5
; SSE2-NEXT: psrad $31, %xmm5
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: psrad $31, %xmm5
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
@ -1432,26 +1432,26 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) {
; SSE2-NEXT: psllq $32, %xmm6
; SSE2-NEXT: pmuludq %xmm4, %xmm0
; SSE2-NEXT: paddq %xmm6, %xmm0
; SSE2-NEXT: movdqa %xmm7, %xmm4
; SSE2-NEXT: psrlq $32, %xmm4
; SSE2-NEXT: pmuludq %xmm1, %xmm4
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: psrlq $32, %xmm5
; SSE2-NEXT: pmuludq %xmm7, %xmm5
; SSE2-NEXT: paddq %xmm4, %xmm5
; SSE2-NEXT: psllq $32, %xmm5
; SSE2-NEXT: pmuludq %xmm7, %xmm1
; SSE2-NEXT: paddq %xmm5, %xmm1
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: psrlq $32, %xmm4
; SSE2-NEXT: pmuludq %xmm9, %xmm4
; SSE2-NEXT: movdqa %xmm9, %xmm5
; SSE2-NEXT: pmuludq %xmm7, %xmm4
; SSE2-NEXT: movdqa %xmm7, %xmm5
; SSE2-NEXT: psrlq $32, %xmm5
; SSE2-NEXT: pmuludq %xmm2, %xmm5
; SSE2-NEXT: paddq %xmm4, %xmm5
; SSE2-NEXT: psllq $32, %xmm5
; SSE2-NEXT: pmuludq %xmm9, %xmm2
; SSE2-NEXT: pmuludq %xmm7, %xmm2
; SSE2-NEXT: paddq %xmm5, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psrlq $32, %xmm4
; SSE2-NEXT: pmuludq %xmm9, %xmm4
; SSE2-NEXT: movdqa %xmm9, %xmm5
; SSE2-NEXT: psrlq $32, %xmm5
; SSE2-NEXT: pmuludq %xmm1, %xmm5
; SSE2-NEXT: paddq %xmm4, %xmm5
; SSE2-NEXT: psllq $32, %xmm5
; SSE2-NEXT: pmuludq %xmm9, %xmm1
; SSE2-NEXT: paddq %xmm5, %xmm1
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: psrlq $32, %xmm4
; SSE2-NEXT: pmuludq %xmm8, %xmm4

View File

@ -14,9 +14,11 @@ define <3 x i16> @zext_i8(<3 x i8>) {
; SSE3-NEXT: pinsrw $1, %eax, %xmm0
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $2, %eax, %xmm0
; SSE3-NEXT: pextrw $0, %xmm0, %eax
; SSE3-NEXT: pxor %xmm1, %xmm1
; SSE3-NEXT: pextrw $1, %xmm0, %edx
; SSE3-NEXT: pextrw $2, %xmm0, %ecx
; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE3-NEXT: movd %xmm0, %eax
; SSE3-NEXT: # kill: def %ax killed %ax killed %eax
; SSE3-NEXT: # kill: def %dx killed %dx killed %edx
; SSE3-NEXT: # kill: def %cx killed %cx killed %ecx

View File

@ -2095,18 +2095,25 @@ define <16 x i16> @psubus_16i32_max(<16 x i16> %x, <16 x i32> %y) nounwind {
;
; AVX1-LABEL: psubus_16i32_max:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [65535,65535,65535,65535]
; AVX1-NEXT: vpminud %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpminud %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [65535,65535,65535,65535]
; AVX1-NEXT: vpminud %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpminud %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpminud %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpminud %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpsubusw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
; AVX1-NEXT: vpackusdw %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;

File diff suppressed because it is too large Load Diff

View File

@ -449,7 +449,7 @@ define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp
; SSSE3-NEXT: psrad $31, %xmm2
; SSSE3-NEXT: psrad $24, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,2,u,u,u,3,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,2,u,u,u,3,u,u,u],zero,xmm1[u,u,u],zero
; SSSE3-NEXT: movdqa %xmm1, %xmm2
; SSSE3-NEXT: psrad $31, %xmm2
; SSSE3-NEXT: psrad $24, %xmm1
@ -530,7 +530,7 @@ define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp
;
; SSSE3-LABEL: sext_16i8_to_8i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <u,u,u,2,u,u,u,3,u,u,u,u,u,u,u,u>
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <u,u,u,2,u,u,u,3,u,u,u,255,u,u,u,255>
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
; SSSE3-NEXT: movdqa %xmm0, %xmm1
@ -836,7 +836,8 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: psrad $16, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: psrad $16, %xmm1
@ -851,7 +852,8 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp
; SSSE3-NEXT: psrad $31, %xmm1
; SSSE3-NEXT: psrad $16, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: psrad $31, %xmm0
; SSSE3-NEXT: psrad $16, %xmm1
@ -911,13 +913,14 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: psrad $16, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: psrad $16, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; SSE2-NEXT: movdqa %xmm3, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: psrad $16, %xmm3
@ -937,13 +940,14 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp
; SSSE3-NEXT: psrad $31, %xmm1
; SSSE3-NEXT: psrad $16, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
; SSSE3-NEXT: movdqa %xmm1, %xmm3
; SSSE3-NEXT: psrad $31, %xmm3
; SSSE3-NEXT: psrad $16, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,2,2,3,4,5,6,7]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; SSSE3-NEXT: movdqa %xmm3, %xmm0
; SSSE3-NEXT: psrad $31, %xmm0
; SSSE3-NEXT: psrad $16, %xmm3
@ -1291,7 +1295,8 @@ define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) {
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movzwl (%rdi), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,0,u,u,u,1,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: psrad $31, %xmm1
; SSSE3-NEXT: psrad $24, %xmm0
@ -5065,7 +5070,8 @@ define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) {
; SSSE3: # %bb.0:
; SSSE3-NEXT: movzwl (%rdi), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,0,u,u,u,1,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSSE3-NEXT: psrad $24, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSSE3-NEXT: paddq %xmm0, %xmm0

View File

@ -368,10 +368,12 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movdqa {{.*#+}} xmm0 = [40606,0,158,0]
; X86-NEXT: pextrw $0, %xmm0, (%edx)
; X86-NEXT: movb $-98, 2(%edx)
; X86-NEXT: movw $-24930, (%edx) # imm = 0x9E9E
; X86-NEXT: movdqa {{.*#+}} xmm0 = [257,0,1,0]
; X86-NEXT: pextrw $0, %xmm0, (%ecx)
; X86-NEXT: movb $1, 2(%ecx)
; X86-NEXT: movw $257, (%ecx) # imm = 0x101
; X86-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrld $1, %xmm1
@ -384,10 +386,12 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
;
; X64-LABEL: rot:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa {{.*#+}} xmm0 = [40606,158]
; X64-NEXT: pextrw $0, %xmm0, (%rsi)
; X64-NEXT: movb $-98, 2(%rsi)
; X64-NEXT: movw $-24930, (%rsi) # imm = 0x9E9E
; X64-NEXT: movdqa {{.*#+}} xmm0 = [257,1]
; X64-NEXT: pextrw $0, %xmm0, (%rdx)
; X64-NEXT: movb $1, 2(%rdx)
; X64-NEXT: movw $257, (%rdx) # imm = 0x101
; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrld $1, %xmm1