[X86] If we see an insert of a bitcast into zero vector, canonicalize it to move the bitcast to the other side of the insert.
This improves detection of zeroing of upper bits during isel. llvm-svn: 315161
This commit is contained in:
parent
f7a19db649
commit
27170fee8d
|
@ -36107,6 +36107,20 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
|
||||||
SubVec.getOperand(1),
|
SubVec.getOperand(1),
|
||||||
DAG.getIntPtrConstant(IdxVal + Idx2Val, dl));
|
DAG.getIntPtrConstant(IdxVal + Idx2Val, dl));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we're inserting a bitcast into zeros, rewrite the insert and move the
|
||||||
|
// bitcast to the other side. This helps with detecting zero extending
|
||||||
|
// during isel.
|
||||||
|
// TODO: Is this useful for other indices than 0?
|
||||||
|
if (SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) {
|
||||||
|
MVT CastVT = SubVec.getOperand(0).getSimpleValueType();
|
||||||
|
unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits();
|
||||||
|
MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems);
|
||||||
|
SDValue Insert = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
|
||||||
|
DAG.getBitcast(NewVT, Vec),
|
||||||
|
SubVec.getOperand(0), N->getOperand(2));
|
||||||
|
return DAG.getBitcast(OpVT, Insert);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this is an insert of an extract, combine to a shuffle. Don't do this
|
// If this is an insert of an extract, combine to a shuffle. Don't do this
|
||||||
|
|
|
@ -368,7 +368,8 @@ let Predicates = [HasAVX512, NoVLX] in {
|
||||||
// where we explicitly insert zeros.
|
// where we explicitly insert zeros.
|
||||||
class veczeroupper<ValueType vt, RegisterClass RC> :
|
class veczeroupper<ValueType vt, RegisterClass RC> :
|
||||||
PatLeaf<(vt RC:$src), [{
|
PatLeaf<(vt RC:$src), [{
|
||||||
return N->getOpcode() == X86ISD::VPMADDWD;
|
return N->getOpcode() == X86ISD::VPMADDWD ||
|
||||||
|
N->getOpcode() == X86ISD::PSADBW;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
def zeroupperv2f64 : veczeroupper<v2f64, VR128>;
|
def zeroupperv2f64 : veczeroupper<v2f64, VR128>;
|
||||||
|
|
|
@ -43,7 +43,6 @@ define i32 @sad_16i8() nounwind {
|
||||||
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
|
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; AVX2-NEXT: vmovdqu a+1024(%rax), %xmm2
|
; AVX2-NEXT: vmovdqu a+1024(%rax), %xmm2
|
||||||
; AVX2-NEXT: vpsadbw b+1024(%rax), %xmm2, %xmm2
|
; AVX2-NEXT: vpsadbw b+1024(%rax), %xmm2, %xmm2
|
||||||
; AVX2-NEXT: vmovdqa %xmm2, %xmm2
|
|
||||||
; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
|
; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
|
||||||
; AVX2-NEXT: addq $4, %rax
|
; AVX2-NEXT: addq $4, %rax
|
||||||
; AVX2-NEXT: jne .LBB0_1
|
; AVX2-NEXT: jne .LBB0_1
|
||||||
|
@ -67,7 +66,6 @@ define i32 @sad_16i8() nounwind {
|
||||||
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
|
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; AVX512F-NEXT: vmovdqu a+1024(%rax), %xmm1
|
; AVX512F-NEXT: vmovdqu a+1024(%rax), %xmm1
|
||||||
; AVX512F-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1
|
; AVX512F-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1
|
||||||
; AVX512F-NEXT: vmovdqa %xmm1, %xmm1
|
|
||||||
; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||||
; AVX512F-NEXT: addq $4, %rax
|
; AVX512F-NEXT: addq $4, %rax
|
||||||
; AVX512F-NEXT: jne .LBB0_1
|
; AVX512F-NEXT: jne .LBB0_1
|
||||||
|
@ -93,7 +91,6 @@ define i32 @sad_16i8() nounwind {
|
||||||
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
|
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; AVX512BW-NEXT: vmovdqu a+1024(%rax), %xmm1
|
; AVX512BW-NEXT: vmovdqu a+1024(%rax), %xmm1
|
||||||
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1
|
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm1
|
|
||||||
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: addq $4, %rax
|
; AVX512BW-NEXT: addq $4, %rax
|
||||||
; AVX512BW-NEXT: jne .LBB0_1
|
; AVX512BW-NEXT: jne .LBB0_1
|
||||||
|
@ -315,7 +312,6 @@ define i32 @sad_32i8() nounwind {
|
||||||
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
|
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; AVX512F-NEXT: vmovdqa a+1024(%rax), %ymm2
|
; AVX512F-NEXT: vmovdqa a+1024(%rax), %ymm2
|
||||||
; AVX512F-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2
|
; AVX512F-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2
|
||||||
; AVX512F-NEXT: vmovdqa %ymm2, %ymm2
|
|
||||||
; AVX512F-NEXT: vpaddd %zmm1, %zmm2, %zmm1
|
; AVX512F-NEXT: vpaddd %zmm1, %zmm2, %zmm1
|
||||||
; AVX512F-NEXT: addq $4, %rax
|
; AVX512F-NEXT: addq $4, %rax
|
||||||
; AVX512F-NEXT: jne .LBB1_1
|
; AVX512F-NEXT: jne .LBB1_1
|
||||||
|
@ -343,7 +339,6 @@ define i32 @sad_32i8() nounwind {
|
||||||
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
|
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; AVX512BW-NEXT: vmovdqa a+1024(%rax), %ymm2
|
; AVX512BW-NEXT: vmovdqa a+1024(%rax), %ymm2
|
||||||
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2
|
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2
|
||||||
; AVX512BW-NEXT: vmovdqa %ymm2, %ymm2
|
|
||||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm2, %zmm1
|
; AVX512BW-NEXT: vpaddd %zmm1, %zmm2, %zmm1
|
||||||
; AVX512BW-NEXT: addq $4, %rax
|
; AVX512BW-NEXT: addq $4, %rax
|
||||||
; AVX512BW-NEXT: jne .LBB1_1
|
; AVX512BW-NEXT: jne .LBB1_1
|
||||||
|
|
Loading…
Reference in New Issue