[LegalizeVectorTypes] Have SplitVecOp_TruncateHelper fall back to SplitVecOp_UnaryOp if splitting the output type would be a legal type.
SplitVecOp_TruncateHelper tries to introduce a multilevel truncate to avoid scalarization. But if splitting the result type would still be a legal type we don't need to do that. The comment block at the top of the function implied that this was already implemented. I looked back through the history and it doesn't look to have ever been checked. llvm-svn: 347479
This commit is contained in:
parent
3e80019275
commit
b239763384
|
@ -2247,10 +2247,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
|
|||
unsigned InElementSize = InVT.getScalarSizeInBits();
|
||||
unsigned OutElementSize = OutVT.getScalarSizeInBits();
|
||||
|
||||
// Determine the split output VT. If its legal we can just split dirctly.
|
||||
EVT LoOutVT, HiOutVT;
|
||||
std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT);
|
||||
assert(LoOutVT == HiOutVT && "Unequal split?");
|
||||
|
||||
// If the input elements are only 1/2 the width of the result elements,
|
||||
// just use the normal splitting. Our trick only work if there's room
|
||||
// to split more than once.
|
||||
if (InElementSize <= OutElementSize * 2)
|
||||
if (isTypeLegal(LoOutVT) ||
|
||||
InElementSize <= OutElementSize * 2)
|
||||
return SplitVecOp_UnaryOp(N);
|
||||
SDLoc DL(N);
|
||||
|
||||
|
|
|
@ -463,28 +463,13 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
|
|||
;
|
||||
; AVX512BW-LABEL: avg_v48i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpaddd %zmm3, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpaddd %zmm3, %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
|
||||
; AVX512BW-NEXT: vpsubd %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsubd %zmm3, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsubd %zmm3, %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsrld $1, %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsrld $1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovdw %zmm2, %ymm1
|
||||
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2
|
||||
; AVX512BW-NEXT: vpavgb 16(%rsi), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpavgb (%rsi), %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpavgb 32(%rsi), %xmm2, %xmm1
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, (%rax)
|
||||
|
|
|
@ -655,23 +655,12 @@ define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
|
|||
}
|
||||
|
||||
define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
|
||||
; KNL-LABEL: usat_trunc_db_1024:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: usat_trunc_db_1024:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; SKX-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: usat_trunc_db_1024:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
%x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
%x6 = trunc <32 x i32> %x5 to <32 x i8>
|
||||
|
@ -679,26 +668,14 @@ define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
|
|||
}
|
||||
|
||||
define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
|
||||
; KNL-LABEL: usat_trunc_db_1024_mem:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: usat_trunc_db_1024_mem:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; SKX-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovwb %zmm0, (%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: usat_trunc_db_1024_mem:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; ALL-NEXT: vzeroupper
|
||||
; ALL-NEXT: retq
|
||||
%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
%x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
%x6 = trunc <32 x i32> %x5 to <32 x i8>
|
||||
|
@ -732,13 +709,9 @@ define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
|
|||
define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
|
||||
; ALL-LABEL: usat_trunc_qw_1024:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
|
||||
; ALL-NEXT: vpminuq %zmm2, %zmm1, %zmm1
|
||||
; ALL-NEXT: vpminuq %zmm2, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; ALL-NEXT: vpmovqd %zmm1, %ymm1
|
||||
; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; ALL-NEXT: vpmovusqw %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusqw %zmm1, %xmm1
|
||||
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
|
||||
%x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
|
||||
|
@ -954,29 +927,15 @@ define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
|
|||
}
|
||||
|
||||
define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
|
||||
; KNL-LABEL: smax_usat_trunc_db_1024:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: smax_usat_trunc_db_1024:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; SKX-NEXT: vpminsd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpminsd %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; SKX-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: smax_usat_trunc_db_1024:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
%x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
%x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
|
@ -986,32 +945,17 @@ define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
|
|||
}
|
||||
|
||||
define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
|
||||
; KNL-LABEL: smax_usat_trunc_db_1024_mem:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: smax_usat_trunc_db_1024_mem:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; SKX-NEXT: vpminsd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpminsd %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; SKX-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovwb %zmm0, (%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: smax_usat_trunc_db_1024_mem:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; ALL-NEXT: vzeroupper
|
||||
; ALL-NEXT: retq
|
||||
%x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
%x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
%x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
|
|
|
@ -650,23 +650,12 @@ define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
|
|||
}
|
||||
|
||||
define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
|
||||
; KNL-LABEL: usat_trunc_db_1024:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: usat_trunc_db_1024:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; SKX-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: usat_trunc_db_1024:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
%x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
%x6 = trunc <32 x i32> %x5 to <32 x i8>
|
||||
|
@ -674,26 +663,14 @@ define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
|
|||
}
|
||||
|
||||
define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
|
||||
; KNL-LABEL: usat_trunc_db_1024_mem:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: usat_trunc_db_1024_mem:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; SKX-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovwb %zmm0, (%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: usat_trunc_db_1024_mem:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; ALL-NEXT: vzeroupper
|
||||
; ALL-NEXT: retq
|
||||
%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
%x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
%x6 = trunc <32 x i32> %x5 to <32 x i8>
|
||||
|
@ -726,13 +703,9 @@ define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
|
|||
define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
|
||||
; ALL-LABEL: usat_trunc_qw_1024:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
|
||||
; ALL-NEXT: vpminuq %zmm2, %zmm1, %zmm1
|
||||
; ALL-NEXT: vpminuq %zmm2, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; ALL-NEXT: vpmovqd %zmm1, %ymm1
|
||||
; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; ALL-NEXT: vpmovusqw %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusqw %zmm1, %xmm1
|
||||
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
|
||||
%x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
|
||||
|
@ -950,29 +923,15 @@ define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
|
|||
}
|
||||
|
||||
define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
|
||||
; KNL-LABEL: smax_usat_trunc_db_1024:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: smax_usat_trunc_db_1024:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; SKX-NEXT: vpminsd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpminsd %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; SKX-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: smax_usat_trunc_db_1024:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
%x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
%x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
|
@ -982,32 +941,17 @@ define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
|
|||
}
|
||||
|
||||
define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
|
||||
; KNL-LABEL: smax_usat_trunc_db_1024_mem:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: smax_usat_trunc_db_1024_mem:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; SKX-NEXT: vpminsd %zmm2, %zmm1, %zmm1
|
||||
; SKX-NEXT: vpminsd %zmm2, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; SKX-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovwb %zmm0, (%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: smax_usat_trunc_db_1024_mem:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; ALL-NEXT: vzeroupper
|
||||
; ALL-NEXT: retq
|
||||
%x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
%x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
%x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
|
|
|
@ -588,12 +588,12 @@ define <16 x i16> @test_16f32toub_256(<16 x float>* %ptr, <16 x i16> %passthru)
|
|||
; CHECK-LABEL: test_16f32toub_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttps2dq (%rdi), %ymm1
|
||||
; CHECK-NEXT: vpmovdw %ymm1, %xmm1
|
||||
; CHECK-NEXT: vcvttps2dq 32(%rdi), %ymm2
|
||||
; CHECK-NEXT: vpmovdw %ymm2, %xmm2
|
||||
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpsllw $15, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpmovw2m %ymm1, %k1
|
||||
; CHECK-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpmovd2m %ymm1, %k0
|
||||
; CHECK-NEXT: vcvttps2dq 32(%rdi), %ymm1
|
||||
; CHECK-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpmovd2m %ymm1, %k1
|
||||
; CHECK-NEXT: kunpckbw %k0, %k1, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%a = load <16 x float>, <16 x float>* %ptr
|
||||
|
@ -620,12 +620,10 @@ define <16 x i16> @test_16f32tosb_256(<16 x float>* %ptr, <16 x i16> %passthru)
|
|||
; CHECK-LABEL: test_16f32tosb_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttps2dq (%rdi), %ymm1
|
||||
; CHECK-NEXT: vpmovdw %ymm1, %xmm1
|
||||
; CHECK-NEXT: vcvttps2dq 32(%rdi), %ymm2
|
||||
; CHECK-NEXT: vpmovdw %ymm2, %xmm2
|
||||
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpsllw $15, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpmovw2m %ymm1, %k1
|
||||
; CHECK-NEXT: vpmovd2m %ymm1, %k0
|
||||
; CHECK-NEXT: vcvttps2dq 32(%rdi), %ymm1
|
||||
; CHECK-NEXT: vpmovd2m %ymm1, %k1
|
||||
; CHECK-NEXT: kunpckbw %k0, %k1, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%a = load <16 x float>, <16 x float>* %ptr
|
||||
|
|
Loading…
Reference in New Issue