[X86] combineX86ShufflesRecursively - start recursion at depth = 0. NFCI.

As discussed on rL367171, we have a problem where the depth recursion used in combineX86ShufflesRecursively was subtly different to computeKnownBits etc. - it starts at Depth=1 instead of Depth=0 like the others and has a different maximum recursion depth.

This NFC patch fixes the recursion depth to start at 0, so we can more easily reuse depth values in calls from combineX86ShufflesRecursively and its helper functions in computeKnownBits etc.

llvm-svn: 367232
This commit is contained in:
Simon Pilgrim 2019-07-29 15:57:06 +00:00
parent c2409baa66
commit 5ab948f823
1 changed files with 18 additions and 18 deletions

View File

@ -32033,7 +32033,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
unsigned NumRootElts = RootVT.getVectorNumElements(); unsigned NumRootElts = RootVT.getVectorNumElements();
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() || bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
(RootVT.isFloatingPoint() && Depth >= 2) || (RootVT.isFloatingPoint() && Depth >= 1) ||
(RootVT.is256BitVector() && !Subtarget.hasAVX2()); (RootVT.is256BitVector() && !Subtarget.hasAVX2());
// Don't combine if we are a AVX512/EVEX target and the mask element size // Don't combine if we are a AVX512/EVEX target and the mask element size
@ -32072,7 +32072,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 && if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
!(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) && !(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) &&
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) { !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128) if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
unsigned PermMask = 0; unsigned PermMask = 0;
@ -32117,8 +32117,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Which shuffle domains are permitted? // Which shuffle domains are permitted?
// Permit domain crossing at higher combine depths. // Permit domain crossing at higher combine depths.
// TODO: Should we indicate which domain is preferred if both are allowed? // TODO: Should we indicate which domain is preferred if both are allowed?
bool AllowFloatDomain = FloatDomain || (Depth > 3); bool AllowFloatDomain = FloatDomain || (Depth >= 3);
bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() && bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() &&
(!MaskVT.is256BitVector() || Subtarget.hasAVX2()); (!MaskVT.is256BitVector() || Subtarget.hasAVX2());
// Determine zeroable mask elements. // Determine zeroable mask elements.
@ -32153,14 +32153,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (V1.getValueType() == MaskVT && if (V1.getValueType() == MaskVT &&
V1.getOpcode() == ISD::SCALAR_TO_VECTOR && V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
MayFoldLoad(V1.getOperand(0))) { MayFoldLoad(V1.getOperand(0))) {
if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST) if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
Res = V1.getOperand(0); Res = V1.getOperand(0);
Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
return DAG.getBitcast(RootVT, Res); return DAG.getBitcast(RootVT, Res);
} }
if (Subtarget.hasAVX2()) { if (Subtarget.hasAVX2()) {
if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST) if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
Res = DAG.getBitcast(MaskVT, V1); Res = DAG.getBitcast(MaskVT, V1);
Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
@ -32174,7 +32174,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT) && ShuffleVT) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle) if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleSrcVT, NewV1); Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
@ -32185,7 +32185,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
AllowIntDomain, Subtarget, Shuffle, ShuffleVT, AllowIntDomain, Subtarget, Shuffle, ShuffleVT,
PermuteImm) && PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle) if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getBitcast(ShuffleVT, V1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
@ -32200,7 +32200,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT, UnaryShuffle) && ShuffleVT, UnaryShuffle) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle) if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
@ -32214,7 +32214,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle) if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
NewV1 = DAG.getBitcast(ShuffleVT, NewV1); NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
NewV2 = DAG.getBitcast(ShuffleVT, NewV2); NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
@ -32232,7 +32232,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
uint64_t BitLen, BitIdx; uint64_t BitLen, BitIdx;
if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
Zeroable)) { Zeroable)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI)
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1); V1 = DAG.getBitcast(IntMaskVT, V1);
Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
@ -32242,7 +32242,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
} }
if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI)
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1); V1 = DAG.getBitcast(IntMaskVT, V1);
V2 = DAG.getBitcast(IntMaskVT, V2); V2 = DAG.getBitcast(IntMaskVT, V2);
@ -32255,11 +32255,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Don't try to re-form single instruction chains under any circumstances now // Don't try to re-form single instruction chains under any circumstances now
// that we've done encoding canonicalization for them. // that we've done encoding canonicalization for them.
if (Depth < 2) if (Depth < 1)
return SDValue(); return SDValue();
// Depth threshold above which we can efficiently use variable mask shuffles. // Depth threshold above which we can efficiently use variable mask shuffles.
int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3; int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2;
AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask; AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
bool MaskContainsZeros = bool MaskContainsZeros =
@ -32741,7 +32741,7 @@ static SDValue combineX86ShufflesRecursively(
// Bound the depth of our recursive combine because this is ultimately // Bound the depth of our recursive combine because this is ultimately
// quadratic in nature. // quadratic in nature.
const unsigned MaxRecursionDepth = 8; const unsigned MaxRecursionDepth = 8;
if (Depth > MaxRecursionDepth) if (Depth >= MaxRecursionDepth)
return SDValue(); return SDValue();
// Directly rip through bitcasts to find the underlying operand. // Directly rip through bitcasts to find the underlying operand.
@ -32944,7 +32944,7 @@ static SDValue combineX86ShufflesRecursively(
/// Helper entry wrapper to combineX86ShufflesRecursively. /// Helper entry wrapper to combineX86ShufflesRecursively.
static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG, static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) { const X86Subtarget &Subtarget) {
return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 0,
/*HasVarMask*/ false, /*HasVarMask*/ false,
/*AllowVarMask*/ true, DAG, Subtarget); /*AllowVarMask*/ true, DAG, Subtarget);
} }
@ -33179,7 +33179,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
for (unsigned i = 0; i != Scale; ++i) for (unsigned i = 0; i != Scale; ++i)
DemandedMask[i] = i; DemandedMask[i] = i;
if (SDValue Res = combineX86ShufflesRecursively( if (SDValue Res = combineX86ShufflesRecursively(
{BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1, {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res)); DAG.getBitcast(SrcVT, Res));
@ -38697,7 +38697,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
} }
if (SDValue Shuffle = combineX86ShufflesRecursively( if (SDValue Shuffle = combineX86ShufflesRecursively(
{SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2, {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
N->getOperand(0).getOperand(1)); N->getOperand(0).getOperand(1));