[SelectionDAG] Add SIGN_EXTEND_VECTOR_INREG and CONCAT_VECTORS support to SimplifyDemandedBits

Fix for AVX1 masked load/store regression on D52964

llvm-svn: 344043
This commit is contained in:
Simon Pilgrim 2018-10-09 13:13:35 +00:00
parent daf662c492
commit 23f880317a
2 changed files with 30 additions and 1 deletions

View File

@ -573,6 +573,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Known.Zero &= Known2.Zero;
}
return false; // Don't fall through, will infinitely loop.
case ISD::CONCAT_VECTORS:
Known.Zero.setAllBits();
Known.One.setAllBits();
for (SDValue SrcOp : Op->ops()) {
if (SimplifyDemandedBits(SrcOp, NewMask, Known2, TLO, Depth + 1))
return true;
// Known bits are the values that are shared by every subvector.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
break;
case ISD::AND:
// If the RHS is a constant, check to see if the LHS would be zero without
// using the bits from the RHS. Below, we use knowledge about the RHS to
@ -1104,6 +1115,25 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Op.getOperand(0)));
break;
}
case ISD::SIGN_EXTEND_VECTOR_INREG: {
// TODO - merge this with SIGN_EXTEND above?
SDValue Src = Op.getOperand(0);
unsigned InBits = Src.getValueType().getScalarSizeInBits();
APInt InDemandedBits = NewMask.trunc(InBits);
// If some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
if (InBits < NewMask.getActiveBits())
InDemandedBits.setBit(InBits - 1);
if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// If the sign bit is known one, the top bits match.
Known = Known.sext(BitWidth);
break;
}
case ISD::ANY_EXTEND: {
unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
APInt InMask = NewMask.trunc(OperandBitWidth);

View File

@ -41,7 +41,6 @@ define void @test2(double** %call1559, i64 %indvars.iv4198, <4 x i1> %tmp1895) {
; AVX1-LABEL: test2:
; AVX1: ## %bb.0: ## %bb
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0