[X86] combineX86ShufflesRecursively - start recursion at depth = 0. NFCI.

As discussed on rL367171, we have a problem where the depth recursion used in combineX86ShufflesRecursively was subtly different to computeKnownBits etc. - it starts at Depth=1 instead of Depth=0 like the others and has a different maximum recursion depth.

This NFC patch fixes the recursion depth to start at 0, so we can more easily reuse depth values in calls from combineX86ShufflesRecursively and its helper functions in computeKnownBits etc.

llvm-svn: 367232
This commit is contained in:
Simon Pilgrim 2019-07-29 15:57:06 +00:00
parent c2409baa66
commit 5ab948f823
1 changed files with 18 additions and 18 deletions

View File

@ -32033,7 +32033,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
unsigned NumRootElts = RootVT.getVectorNumElements();
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
(RootVT.isFloatingPoint() && Depth >= 2) ||
(RootVT.isFloatingPoint() && Depth >= 1) ||
(RootVT.is256BitVector() && !Subtarget.hasAVX2());
// Don't combine if we are a AVX512/EVEX target and the mask element size
@ -32072,7 +32072,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
!(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) &&
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
return SDValue(); // Nothing to do!
MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
unsigned PermMask = 0;
@ -32117,8 +32117,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Which shuffle domains are permitted?
// Permit domain crossing at higher combine depths.
// TODO: Should we indicate which domain is preferred if both are allowed?
bool AllowFloatDomain = FloatDomain || (Depth > 3);
bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() &&
bool AllowFloatDomain = FloatDomain || (Depth >= 3);
bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() &&
(!MaskVT.is256BitVector() || Subtarget.hasAVX2());
// Determine zeroable mask elements.
@ -32153,14 +32153,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (V1.getValueType() == MaskVT &&
V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
MayFoldLoad(V1.getOperand(0))) {
if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do!
Res = V1.getOperand(0);
Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
return DAG.getBitcast(RootVT, Res);
}
if (Subtarget.hasAVX2()) {
if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(MaskVT, V1);
Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
@ -32174,7 +32174,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
@ -32185,7 +32185,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
AllowIntDomain, Subtarget, Shuffle, ShuffleVT,
PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, V1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
@ -32200,7 +32200,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT, UnaryShuffle) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
@ -32214,7 +32214,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
@ -32232,7 +32232,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
uint64_t BitLen, BitIdx;
if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
Zeroable)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI)
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
@ -32242,7 +32242,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI)
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
V2 = DAG.getBitcast(IntMaskVT, V2);
@ -32255,11 +32255,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Don't try to re-form single instruction chains under any circumstances now
// that we've done encoding canonicalization for them.
if (Depth < 2)
if (Depth < 1)
return SDValue();
// Depth threshold above which we can efficiently use variable mask shuffles.
int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3;
int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2;
AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
bool MaskContainsZeros =
@ -32741,7 +32741,7 @@ static SDValue combineX86ShufflesRecursively(
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
const unsigned MaxRecursionDepth = 8;
if (Depth > MaxRecursionDepth)
if (Depth >= MaxRecursionDepth)
return SDValue();
// Directly rip through bitcasts to find the underlying operand.
@ -32944,7 +32944,7 @@ static SDValue combineX86ShufflesRecursively(
/// Helper entry wrapper to combineX86ShufflesRecursively.
static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 0,
/*HasVarMask*/ false,
/*AllowVarMask*/ true, DAG, Subtarget);
}
@ -33179,7 +33179,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
for (unsigned i = 0; i != Scale; ++i)
DemandedMask[i] = i;
if (SDValue Res = combineX86ShufflesRecursively(
{BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1,
{BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
@ -38697,7 +38697,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
}
if (SDValue Shuffle = combineX86ShufflesRecursively(
{SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2,
{SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
N->getOperand(0).getOperand(1));