[LegalizeVectorTypes] Have SplitVecOp_TruncateHelper fall back to SplitVecOp_UnaryOp if splitting the output type would be a legal type.

SplitVecOp_TruncateHelper tries to introduce a multilevel truncate to avoid scalarization. But if splitting the result type would still be a legal type we don't need to do that.

The comment block at the top of the function implied that this was already implemented. I looked back through the history and it doesn't look to have ever been checked.

llvm-svn: 347479
This commit is contained in:
Craig Topper 2018-11-22 22:56:52 +00:00
parent 3e80019275
commit b239763384
5 changed files with 98 additions and 221 deletions

View File

@ -2247,10 +2247,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
unsigned InElementSize = InVT.getScalarSizeInBits();
unsigned OutElementSize = OutVT.getScalarSizeInBits();
// Determine the split output VT. If its legal we can just split dirctly.
EVT LoOutVT, HiOutVT;
std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT);
assert(LoOutVT == HiOutVT && "Unequal split?");
// If the input elements are only 1/2 the width of the result elements,
// just use the normal splitting. Our trick only work if there's room
// to split more than once.
if (InElementSize <= OutElementSize * 2)
if (isTypeLegal(LoOutVT) ||
InElementSize <= OutElementSize * 2)
return SplitVecOp_UnaryOp(N);
SDLoc DL(N);

View File

@ -463,28 +463,13 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
;
; AVX512BW-LABEL: avg_v48i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512BW-NEXT: vpaddd %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512BW-NEXT: vpaddd %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512BW-NEXT: vpaddd %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; AVX512BW-NEXT: vpsubd %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: vpsubd %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: vpsubd %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpsrld $1, %zmm2, %zmm2
; AVX512BW-NEXT: vpsrld $1, %zmm1, %zmm1
; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
; AVX512BW-NEXT: vpmovdw %zmm1, %ymm1
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: vpmovdw %zmm2, %ymm1
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2
; AVX512BW-NEXT: vpavgb 16(%rsi), %xmm1, %xmm1
; AVX512BW-NEXT: vpavgb (%rsi), %xmm0, %xmm0
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpavgb 32(%rsi), %xmm2, %xmm1
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
; AVX512BW-NEXT: vmovdqu %ymm0, (%rax)
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, (%rax)

View File

@ -655,23 +655,12 @@ define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
}
define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
; KNL-LABEL: usat_trunc_db_1024:
; KNL: ## %bb.0:
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: usat_trunc_db_1024:
; SKX: ## %bb.0:
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmovdw %zmm0, %ymm0
; SKX-NEXT: vpmovdw %zmm1, %ymm1
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; SKX-NEXT: vpmovwb %zmm0, %ymm0
; SKX-NEXT: retq
; ALL-LABEL: usat_trunc_db_1024:
; ALL: ## %bb.0:
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: retq
%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%x6 = trunc <32 x i32> %x5 to <32 x i8>
@ -679,26 +668,14 @@ define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
}
define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
; KNL-LABEL: usat_trunc_db_1024_mem:
; KNL: ## %bb.0:
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vmovdqu %ymm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: usat_trunc_db_1024_mem:
; SKX: ## %bb.0:
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmovdw %zmm0, %ymm0
; SKX-NEXT: vpmovdw %zmm1, %ymm1
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; SKX-NEXT: vpmovwb %zmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; ALL-LABEL: usat_trunc_db_1024_mem:
; ALL: ## %bb.0:
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: vmovdqu %ymm0, (%rdi)
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%x6 = trunc <32 x i32> %x5 to <32 x i8>
@ -732,13 +709,9 @@ define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
; ALL-LABEL: usat_trunc_qw_1024:
; ALL: ## %bb.0:
; ALL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
; ALL-NEXT: vpminuq %zmm2, %zmm1, %zmm1
; ALL-NEXT: vpminuq %zmm2, %zmm0, %zmm0
; ALL-NEXT: vpmovqd %zmm0, %ymm0
; ALL-NEXT: vpmovqd %zmm1, %ymm1
; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: vpmovdw %zmm0, %ymm0
; ALL-NEXT: vpmovusqw %zmm0, %xmm0
; ALL-NEXT: vpmovusqw %zmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: retq
%x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
%x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
@ -954,29 +927,15 @@ define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
}
define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
; KNL-LABEL: smax_usat_trunc_db_1024:
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: smax_usat_trunc_db_1024:
; SKX: ## %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SKX-NEXT: vpminsd %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpminsd %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmovdw %zmm0, %ymm0
; SKX-NEXT: vpmovdw %zmm1, %ymm1
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; SKX-NEXT: vpmovwb %zmm0, %ymm0
; SKX-NEXT: retq
; ALL-LABEL: smax_usat_trunc_db_1024:
; ALL: ## %bb.0:
; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: retq
%x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@ -986,32 +945,17 @@ define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
}
define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
; KNL-LABEL: smax_usat_trunc_db_1024_mem:
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vmovdqu %ymm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: smax_usat_trunc_db_1024_mem:
; SKX: ## %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SKX-NEXT: vpminsd %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpminsd %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmovdw %zmm0, %ymm0
; SKX-NEXT: vpmovdw %zmm1, %ymm1
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; SKX-NEXT: vpmovwb %zmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; ALL-LABEL: smax_usat_trunc_db_1024_mem:
; ALL: ## %bb.0:
; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: vmovdqu %ymm0, (%rdi)
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>

View File

@ -650,23 +650,12 @@ define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
}
define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
; KNL-LABEL: usat_trunc_db_1024:
; KNL: ## %bb.0:
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: usat_trunc_db_1024:
; SKX: ## %bb.0:
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmovdw %zmm0, %ymm0
; SKX-NEXT: vpmovdw %zmm1, %ymm1
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; SKX-NEXT: vpmovwb %zmm0, %ymm0
; SKX-NEXT: retq
; ALL-LABEL: usat_trunc_db_1024:
; ALL: ## %bb.0:
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: retq
%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%x6 = trunc <32 x i32> %x5 to <32 x i8>
@ -674,26 +663,14 @@ define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
}
define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
; KNL-LABEL: usat_trunc_db_1024_mem:
; KNL: ## %bb.0:
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vmovdqu %ymm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: usat_trunc_db_1024_mem:
; SKX: ## %bb.0:
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmovdw %zmm0, %ymm0
; SKX-NEXT: vpmovdw %zmm1, %ymm1
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; SKX-NEXT: vpmovwb %zmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; ALL-LABEL: usat_trunc_db_1024_mem:
; ALL: ## %bb.0:
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: vmovdqu %ymm0, (%rdi)
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%x6 = trunc <32 x i32> %x5 to <32 x i8>
@ -726,13 +703,9 @@ define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
; ALL-LABEL: usat_trunc_qw_1024:
; ALL: ## %bb.0:
; ALL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
; ALL-NEXT: vpminuq %zmm2, %zmm1, %zmm1
; ALL-NEXT: vpminuq %zmm2, %zmm0, %zmm0
; ALL-NEXT: vpmovqd %zmm0, %ymm0
; ALL-NEXT: vpmovqd %zmm1, %ymm1
; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: vpmovdw %zmm0, %ymm0
; ALL-NEXT: vpmovusqw %zmm0, %xmm0
; ALL-NEXT: vpmovusqw %zmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: retq
%x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
%x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
@ -950,29 +923,15 @@ define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
}
define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
; KNL-LABEL: smax_usat_trunc_db_1024:
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: smax_usat_trunc_db_1024:
; SKX: ## %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SKX-NEXT: vpminsd %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpminsd %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmovdw %zmm0, %ymm0
; SKX-NEXT: vpmovdw %zmm1, %ymm1
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; SKX-NEXT: vpmovwb %zmm0, %ymm0
; SKX-NEXT: retq
; ALL-LABEL: smax_usat_trunc_db_1024:
; ALL: ## %bb.0:
; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: retq
%x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@ -982,32 +941,17 @@ define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
}
define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
; KNL-LABEL: smax_usat_trunc_db_1024_mem:
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vmovdqu %ymm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: smax_usat_trunc_db_1024_mem:
; SKX: ## %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SKX-NEXT: vpminsd %zmm2, %zmm1, %zmm1
; SKX-NEXT: vpminsd %zmm2, %zmm0, %zmm0
; SKX-NEXT: vpmovdw %zmm0, %ymm0
; SKX-NEXT: vpmovdw %zmm1, %ymm1
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; SKX-NEXT: vpmovwb %zmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; ALL-LABEL: smax_usat_trunc_db_1024_mem:
; ALL: ## %bb.0:
; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: vmovdqu %ymm0, (%rdi)
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>

View File

@ -588,12 +588,12 @@ define <16 x i16> @test_16f32toub_256(<16 x float>* %ptr, <16 x i16> %passthru)
; CHECK-LABEL: test_16f32toub_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2dq (%rdi), %ymm1
; CHECK-NEXT: vpmovdw %ymm1, %xmm1
; CHECK-NEXT: vcvttps2dq 32(%rdi), %ymm2
; CHECK-NEXT: vpmovdw %ymm2, %xmm2
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; CHECK-NEXT: vpsllw $15, %ymm1, %ymm1
; CHECK-NEXT: vpmovw2m %ymm1, %k1
; CHECK-NEXT: vpslld $31, %ymm1, %ymm1
; CHECK-NEXT: vpmovd2m %ymm1, %k0
; CHECK-NEXT: vcvttps2dq 32(%rdi), %ymm1
; CHECK-NEXT: vpslld $31, %ymm1, %ymm1
; CHECK-NEXT: vpmovd2m %ymm1, %k1
; CHECK-NEXT: kunpckbw %k0, %k1, %k1
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%a = load <16 x float>, <16 x float>* %ptr
@ -620,12 +620,10 @@ define <16 x i16> @test_16f32tosb_256(<16 x float>* %ptr, <16 x i16> %passthru)
; CHECK-LABEL: test_16f32tosb_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2dq (%rdi), %ymm1
; CHECK-NEXT: vpmovdw %ymm1, %xmm1
; CHECK-NEXT: vcvttps2dq 32(%rdi), %ymm2
; CHECK-NEXT: vpmovdw %ymm2, %xmm2
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; CHECK-NEXT: vpsllw $15, %ymm1, %ymm1
; CHECK-NEXT: vpmovw2m %ymm1, %k1
; CHECK-NEXT: vpmovd2m %ymm1, %k0
; CHECK-NEXT: vcvttps2dq 32(%rdi), %ymm1
; CHECK-NEXT: vpmovd2m %ymm1, %k1
; CHECK-NEXT: kunpckbw %k0, %k1, %k1
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%a = load <16 x float>, <16 x float>* %ptr