[X86][SSE] Test case showing missed PSHUFB target shuffle constant fold opportunity.

It also shows an unnecessary pshufb/broadcast being used - the original pshufb mask only requested the lowest byte.

llvm-svn: 295046
This commit is contained in:
Simon Pilgrim 2017-02-14 11:20:11 +00:00
parent 258ff25bbc
commit 75dda50ebe
1 changed files with 35 additions and 0 deletions

View File

@ -586,3 +586,38 @@ define <16 x i8> @constant_fold_pshufb() {
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 undef, i8 undef, i8 -1, i8 -1, i8 15, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 7, i8 6>) %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 undef, i8 undef, i8 -1, i8 -1, i8 15, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 7, i8 6>)
ret <16 x i8> %1 ret <16 x i8> %1
} }
; FIXME - unnecessary pshufb/broadcast being used - pshufb mask only needs lowest byte.
define <16 x i8> @constant_fold_pshufb_2() {
; SSE-LABEL: constant_fold_pshufb_2:
; SSE: # BB#0:
; SSE-NEXT: movl $2, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pshufb %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: constant_fold_pshufb_2:
; AVX1: # BB#0:
; AVX1-NEXT: movl $2, %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: constant_fold_pshufb_2:
; AVX2: # BB#0:
; AVX2-NEXT: movl $2, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: constant_fold_pshufb_2:
; AVX512F: # BB#0:
; AVX512F-NEXT: movl $2, %eax
; AVX512F-NEXT: vmovd %eax, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %xmm0
; AVX512F-NEXT: retq
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 2, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
ret <16 x i8> %1
}