From 5ab948f8238fd35f6487c7a1491894a15f279bc5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 29 Jul 2019 15:57:06 +0000 Subject: [PATCH] [X86] combineX86ShufflesRecursively - start recursion at depth = 0. NFCI. As discussed on rL367171, we have a problem where the depth recursion used in combineX86ShufflesRecursively was subtly different to computeKnownBits etc. - it starts at Depth=1 instead of Depth=0 like the others and has a different maximum recursion depth. This NFC patch fixes the recursion depth to start at 0, so we can more easily reuse depth values in calls from combineX86ShufflesRecursively and its helper functions in computeKnownBits etc. llvm-svn: 367232 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 36 ++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 17481f7fb26f..36d3db44b189 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32033,7 +32033,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, unsigned NumRootElts = RootVT.getVectorNumElements(); unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() || - (RootVT.isFloatingPoint() && Depth >= 2) || + (RootVT.isFloatingPoint() && Depth >= 1) || (RootVT.is256BitVector() && !Subtarget.hasAVX2()); // Don't combine if we are a AVX512/EVEX target and the mask element size @@ -32072,7 +32072,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 && !(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) && !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128) + if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128) return SDValue(); // Nothing to do! MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); unsigned PermMask = 0; @@ -32117,8 +32117,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Which shuffle domains are permitted? // Permit domain crossing at higher combine depths. // TODO: Should we indicate which domain is preferred if both are allowed? - bool AllowFloatDomain = FloatDomain || (Depth > 3); - bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() && + bool AllowFloatDomain = FloatDomain || (Depth >= 3); + bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() && (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); // Determine zeroable mask elements. @@ -32153,14 +32153,14 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (V1.getValueType() == MaskVT && V1.getOpcode() == ISD::SCALAR_TO_VECTOR && MayFoldLoad(V1.getOperand(0))) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST) + if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) return SDValue(); // Nothing to do! Res = V1.getOperand(0); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); return DAG.getBitcast(RootVT, Res); } if (Subtarget.hasAVX2()) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST) + if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) return SDValue(); // Nothing to do! Res = DAG.getBitcast(MaskVT, V1); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); @@ -32174,7 +32174,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleSrcVT, NewV1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); @@ -32185,7 +32185,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, @@ -32200,7 +32200,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT, UnaryShuffle) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); @@ -32214,7 +32214,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! NewV1 = DAG.getBitcast(ShuffleVT, NewV1); NewV2 = DAG.getBitcast(ShuffleVT, NewV2); @@ -32232,7 +32232,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, uint64_t BitLen, BitIdx; if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) + if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, @@ -32242,7 +32242,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) + if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); V2 = DAG.getBitcast(IntMaskVT, V2); @@ -32255,11 +32255,11 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. - if (Depth < 2) + if (Depth < 1) return SDValue(); // Depth threshold above which we can efficiently use variable mask shuffles. - int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3; + int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2; AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask; bool MaskContainsZeros = @@ -32741,7 +32741,7 @@ static SDValue combineX86ShufflesRecursively( // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. const unsigned MaxRecursionDepth = 8; - if (Depth > MaxRecursionDepth) + if (Depth >= MaxRecursionDepth) return SDValue(); // Directly rip through bitcasts to find the underlying operand. @@ -32944,7 +32944,7 @@ static SDValue combineX86ShufflesRecursively( /// Helper entry wrapper to combineX86ShufflesRecursively. static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, + return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 0, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget); } @@ -33179,7 +33179,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, for (unsigned i = 0; i != Scale; ++i) DemandedMask[i] = i; if (SDValue Res = combineX86ShufflesRecursively( - {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1, + {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getBitcast(SrcVT, Res)); @@ -38697,7 +38697,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, } if (SDValue Shuffle = combineX86ShufflesRecursively( - {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2, + {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, N->getOperand(0).getOperand(1));