Clean up and optimizations to the X86 shuffle lowering code. No functional change.

llvm-svn: 145803
2011-12-05 06:56:46 +00:00 · 2011-12-05 06:56:46 +00:00 · 6a55b1dd9f
parent 3924cb0267
commit 6a55b1dd9f
1 changed files with 36 additions and 52 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -3323,11 +3323,9 @@ static bool isVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
 /// the reverse of what x86 shuffles want. x86 shuffles requires the lower
 /// half elements to come from vector 1 (which would equal the dest.) and
 /// the upper half to come from vector 2.
-static bool isCommutedVSHUFPY(ShuffleVectorSDNode *N, bool HasAVX) {
-  EVT VT = N->getValueType(0);
+static bool isCommutedVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                                  bool HasAVX) {
  int NumElems = VT.getVectorNumElements();
-  SmallVector<int, 8> Mask;
-  N->getMask(Mask);

  if (!HasAVX || VT.getSizeInBits() != 256)
    return false;
@ -3423,8 +3421,8 @@ static unsigned getShuffleVSHUFPYImmediate(SDNode *N) {

 /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
 /// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
-  unsigned NumElems = VT.getVectorNumElements();
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
+                                     unsigned NumElems) {
  for (unsigned i = 0; i != NumElems; ++i) {
    int idx = Mask[i];
    if (idx < 0)
@ -3485,12 +3483,6 @@ static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
  return true;
 }

-static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return isCommutedSHUFPMask(M, N->getValueType(0));
-}
-
 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
 bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
@ -3975,21 +3967,18 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
 /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to 256-bit
 /// version of MOVDDUP.
-static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
-                           const X86Subtarget *Subtarget) {
-  EVT VT = N->getValueType(0);
+static bool isMOVDDUPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                           bool HasAVX) {
  int NumElts = VT.getVectorNumElements();
-  bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF;

-  if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 ||
-      !V2IsUndef || NumElts != 4)
+  if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
    return false;

  for (int i = 0; i != NumElts/2; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i), 0))
+    if (!isUndefOrEqual(Mask[i], 0))
      return false;
  for (int i = NumElts/2; i != NumElts; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2))
+    if (!isUndefOrEqual(Mask[i], NumElts/2))
      return false;
  return true;
 }
@ -6172,7 +6161,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
    // from X.
    if (NumHi == 3) {
      // Normalize it so the 3 elements come from V1.
-      CommuteVectorShuffleMask(PermMask, VT);
+      CommuteVectorShuffleMask(PermMask, 4);
      std::swap(V1, V2);
    }

@ -6603,6 +6592,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
  bool V1IsSplat = false;
  bool V2IsSplat = false;
  bool HasXMMInt = Subtarget->hasXMMInt();
+  bool HasAVX    = Subtarget->hasAVX();
  bool HasAVX2   = Subtarget->hasAVX2();
  MachineFunction &MF = DAG.getMachineFunction();
  bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
@ -6738,7 +6728,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
    Commuted = true;
  }

-  if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+  SmallVector<int, 32> M;
+  SVOp->getMask(M);
+
+  if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
    // Shuffling low element of v1 into undef, just return v1.
    if (V2IsUndef)
      return V1;
@ -6748,11 +6741,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
    return getMOVL(DAG, dl, VT, V2, V1);
  }

-  if (X86::isUNPCKLMask(SVOp, HasAVX2))
+  if (isUNPCKLMask(M, VT, HasAVX2))
    return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2,
                                DAG);

-  if (X86::isUNPCKHMask(SVOp, HasAVX2))
+  if (isUNPCKHMask(M, VT, HasAVX2))
    return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2,
                                DAG);

@ -6787,15 +6780,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
  }

  // Normalize the node to match x86 shuffle ops if needed
-  if (!V2IsUndef && (isCommutedSHUFP(SVOp) ||
-                     isCommutedVSHUFPY(SVOp, Subtarget->hasAVX())))
+  if (!V2IsUndef && (isCommutedSHUFPMask(M, VT) ||
+                     isCommutedVSHUFPYMask(M, VT, HasAVX)))
    return CommuteVectorShuffle(SVOp, DAG);

  // The checks below are all present in isShuffleMaskLegal, but they are
  // inlined here right now to enable us to directly emit target specific
  // nodes, and remove one by one until they don't return Op anymore.
-  SmallVector<int, 16> M;
-  SVOp->getMask(M);

  if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()))
    return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
@ -6804,10 +6795,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {

  if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
      SVOp->getSplatIndex() == 0 && V2IsUndef) {
-    if (VT == MVT::v2f64)
-      return getTargetShuffleNode(X86ISD::UNPCKLP, dl, VT, V1, V1, DAG);
-    if (VT == MVT::v2i64)
-      return getTargetShuffleNode(X86ISD::PUNPCKL, dl, VT, V1, V1, DAG);
+    if (VT == MVT::v2f64 || VT == MVT::v2i64)
+      return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
+                                  DAG);
  }

  if (isPSHUFHWMask(M, VT))
@ -6824,10 +6814,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
    return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
                                X86::getShuffleSHUFImmediate(SVOp), DAG);

-  if (X86::isUNPCKL_v_undef_Mask(SVOp))
+  if (isUNPCKL_v_undef_Mask(M, VT))
    return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
                                DAG);
-  if (X86::isUNPCKH_v_undef_Mask(SVOp))
+  if (isUNPCKH_v_undef_Mask(M, VT))
    return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
                                DAG);

@ -6837,21 +6827,21 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
  //

  // Handle VMOVDDUPY permutations
-  if (isMOVDDUPYMask(SVOp, Subtarget))
+  if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX))
    return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);

  // Handle VPERMILPS/D* permutations
-  if (isVPERMILPMask(M, VT, Subtarget->hasAVX()))
+  if (isVPERMILPMask(M, VT, HasAVX))
    return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
                                getShuffleVPERMILPImmediate(SVOp), DAG);

  // Handle VPERM2F128/VPERM2I128 permutations
-  if (isVPERM2X128Mask(M, VT, Subtarget->hasAVX()))
+  if (isVPERM2X128Mask(M, VT, HasAVX))
    return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
                                V2, getShuffleVPERM2X128Immediate(SVOp), DAG);

  // Handle VSHUFPS/DY permutations
-  if (isVSHUFPYMask(M, VT, Subtarget->hasAVX()))
+  if (isVSHUFPYMask(M, VT, HasAVX))
    return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
                                getShuffleVSHUFPYImmediate(SVOp), DAG);

@ -14321,7 +14311,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 /// set to A, RHS to B, and the routine returns 'true'.
 /// Note that the binary operation should have the property that if one of the
 /// operands is UNDEF then the result is UNDEF.
-static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
  // Look for the following pattern: if
  //   A = < float a0, float a1, float a2, float a3 >
  //   B = < float b0, float b1, float b2, float b3 >
@ -14399,34 +14389,28 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
  // If A and B occur in reverse order in RHS, then "swap" them (which means
  // rewriting the mask).
  if (A != C)
-    for (unsigned i = 0; i != NumElts; ++i) {
-      unsigned Idx = RMask[i];
-      if (Idx < NumElts)
-        RMask[i] += NumElts;
-      else if (Idx < 2*NumElts)
-        RMask[i] -= NumElts;
-    }
+    CommuteVectorShuffleMask(RMask, NumElts);

  // At this point LHS and RHS are equivalent to
  //   LHS = VECTOR_SHUFFLE A, B, LMask
  //   RHS = VECTOR_SHUFFLE A, B, RMask
  // Check that the masks correspond to performing a horizontal operation.
  for (unsigned i = 0; i != NumElts; ++i) {
-    unsigned LIdx = LMask[i], RIdx = RMask[i];
+    int LIdx = LMask[i], RIdx = RMask[i];

    // Ignore any UNDEF components.
-    if (LIdx >= 2*NumElts || RIdx >= 2*NumElts ||
-        (!A.getNode() && (LIdx < NumElts || RIdx < NumElts)) ||
-        (!B.getNode() && (LIdx >= NumElts || RIdx >= NumElts)))
+    if (LIdx < 0 || RIdx < 0 ||
+        (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
+        (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
      continue;

    // Check that successive elements are being operated on.  If not, this is
    // not a horizontal operation.
    unsigned Src = (i/HalfLaneElts) % 2; // each lane is split between srcs
    unsigned LaneStart = (i/NumLaneElts) * NumLaneElts;
-    unsigned Index = 2*(i%HalfLaneElts) + NumElts*Src + LaneStart;
+    int Index = 2*(i%HalfLaneElts) + NumElts*Src + LaneStart;
    if (!(LIdx == Index && RIdx == Index + 1) &&
-        !(isCommutative && LIdx == Index + 1 && RIdx == Index))
+        !(IsCommutative && LIdx == Index + 1 && RIdx == Index))
      return false;
  }