From 27170fee8dd8659da736ee22d0e5e368fb1d05fd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 8 Oct 2017 01:33:41 +0000 Subject: [PATCH] [X86] If we see an insert of a bitcast into zero vector, canonicalize it to move the bitcast to the other side of the insert. This improves detection of zeroing of upper bits during isel. llvm-svn: 315161 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 14 ++++++++++++++ llvm/lib/Target/X86/X86InstrVecCompiler.td | 3 ++- llvm/test/CodeGen/X86/sad.ll | 5 ----- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7305af6b39df..28a30d7824ac 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36107,6 +36107,20 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, SubVec.getOperand(1), DAG.getIntPtrConstant(IdxVal + Idx2Val, dl)); } + + // If we're inserting a bitcast into zeros, rewrite the insert and move the + // bitcast to the other side. This helps with detecting zero extending + // during isel. + // TODO: Is this useful for other indices than 0? + if (SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) { + MVT CastVT = SubVec.getOperand(0).getSimpleValueType(); + unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits(); + MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems); + SDValue Insert = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, + DAG.getBitcast(NewVT, Vec), + SubVec.getOperand(0), N->getOperand(2)); + return DAG.getBitcast(OpVT, Insert); + } } // If this is an insert of an extract, combine to a shuffle. Don't do this diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td index f6b41c46f6d8..7e2195cf93aa 100644 --- a/llvm/lib/Target/X86/X86InstrVecCompiler.td +++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td @@ -368,7 +368,8 @@ let Predicates = [HasAVX512, NoVLX] in { // where we explicitly insert zeros. class veczeroupper : PatLeaf<(vt RC:$src), [{ - return N->getOpcode() == X86ISD::VPMADDWD; + return N->getOpcode() == X86ISD::VPMADDWD || + N->getOpcode() == X86ISD::PSADBW; }]>; def zeroupperv2f64 : veczeroupper; diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll index e8a55215dc8d..27a220e7cd6b 100644 --- a/llvm/test/CodeGen/X86/sad.ll +++ b/llvm/test/CodeGen/X86/sad.ll @@ -43,7 +43,6 @@ define i32 @sad_16i8() nounwind { ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX2-NEXT: vmovdqu a+1024(%rax), %xmm2 ; AVX2-NEXT: vpsadbw b+1024(%rax), %xmm2, %xmm2 -; AVX2-NEXT: vmovdqa %xmm2, %xmm2 ; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: addq $4, %rax ; AVX2-NEXT: jne .LBB0_1 @@ -67,7 +66,6 @@ define i32 @sad_16i8() nounwind { ; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX512F-NEXT: vmovdqu a+1024(%rax), %xmm1 ; AVX512F-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1 -; AVX512F-NEXT: vmovdqa %xmm1, %xmm1 ; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: addq $4, %rax ; AVX512F-NEXT: jne .LBB0_1 @@ -93,7 +91,6 @@ define i32 @sad_16i8() nounwind { ; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX512BW-NEXT: vmovdqu a+1024(%rax), %xmm1 ; AVX512BW-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1 -; AVX512BW-NEXT: vmovdqa %xmm1, %xmm1 ; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: addq $4, %rax ; AVX512BW-NEXT: jne .LBB0_1 @@ -315,7 +312,6 @@ define i32 @sad_32i8() nounwind { ; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX512F-NEXT: vmovdqa a+1024(%rax), %ymm2 ; AVX512F-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2 -; AVX512F-NEXT: vmovdqa %ymm2, %ymm2 ; AVX512F-NEXT: vpaddd %zmm1, %zmm2, %zmm1 ; AVX512F-NEXT: addq $4, %rax ; AVX512F-NEXT: jne .LBB1_1 @@ -343,7 +339,6 @@ define i32 @sad_32i8() nounwind { ; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX512BW-NEXT: vmovdqa a+1024(%rax), %ymm2 ; AVX512BW-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2 -; AVX512BW-NEXT: vmovdqa %ymm2, %ymm2 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm2, %zmm1 ; AVX512BW-NEXT: addq $4, %rax ; AVX512BW-NEXT: jne .LBB1_1