diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index cb120633a404..472dbfa62f89 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -628,14 +628,16 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     const char *SrcName = isSrc1 ? Src1Name : Src2Name;
     OS << (SrcName ? SrcName : "mem") << '[';
     bool IsFirst = true;
-    while (i != e &&
-           (int)ShuffleMask[i] >= 0 &&
+    while (i != e && (int)ShuffleMask[i] != SM_SentinelZero &&
            (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
       if (!IsFirst)
         OS << ',';
       else
         IsFirst = false;
-      OS << ShuffleMask[i] % ShuffleMask.size();
+      if (ShuffleMask[i] == SM_SentinelUndef)
+        OS << "u";
+      else
+        OS << ShuffleMask[i] % ShuffleMask.size();
       ++i;
     }
     OS << ']';
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 9aca2da49020..713e147fbf5e 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -224,8 +224,7 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
   }
 }
 
-void DecodePSHUFBMask(const ConstantDataSequential *C,
-                      SmallVectorImpl<int> &ShuffleMask) {
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
   assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
   assert(MaskTy->getVectorElementType()->isIntegerTy(8) &&
@@ -234,22 +233,48 @@ void DecodePSHUFBMask(const ConstantDataSequential *C,
   // FIXME: Add support for AVX-512.
   assert((NumElements == 16 || NumElements == 32) &&
          "Only 128-bit and 256-bit vectors supported!");
-  assert((unsigned)NumElements == C->getNumElements() &&
-         "Constant mask has a different number of elements!");
-
   ShuffleMask.reserve(NumElements);
-  for (int i = 0; i < NumElements; ++i) {
-    // For AVX vectors with 32 bytes the base of the shuffle is the half of the
-    // vector we're inside.
-    int Base = i < 16 ? 0 : 16;
-    uint64_t Element = C->getElementAsInteger(i);
-    // If the high bit (7) of the byte is set, the element is zeroed.
-    if (Element & (1 << 7))
-      ShuffleMask.push_back(SM_SentinelZero);
-    else {
-      // Only the least significant 4 bits of the byte are used.
-      int Index = Base + (Element & 0xf);
-      ShuffleMask.push_back(Index);
+
+  if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+    assert((unsigned)NumElements == CDS->getNumElements() &&
+           "Constant mask has a different number of elements!");
+
+    for (int i = 0; i < NumElements; ++i) {
+      // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
+      // lane of the vector we're inside.
+      int Base = i < 16 ? 0 : 16;
+      uint64_t Element = CDS->getElementAsInteger(i);
+      // If the high bit (7) of the byte is set, the element is zeroed.
+      if (Element & (1 << 7))
+        ShuffleMask.push_back(SM_SentinelZero);
+      else {
+        // Only the least significant 4 bits of the byte are used.
+        int Index = Base + (Element & 0xf);
+        ShuffleMask.push_back(Index);
+      }
+    }
+  } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+    assert((unsigned)NumElements == CV->getNumOperands() &&
+           "Constant mask has a different number of elements!");
+
+    for (int i = 0; i < NumElements; ++i) {
+      // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
+      // lane of the vector we're inside.
+      int Base = i < 16 ? 0 : 16;
+      Constant *COp = CV->getOperand(i);
+      if (isa<UndefValue>(COp)) {
+        ShuffleMask.push_back(SM_SentinelUndef);
+        continue;
+      }
+      uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+      // If the high bit (7) of the byte is set, the element is zeroed.
+      if (Element & (1 << 7))
+        ShuffleMask.push_back(SM_SentinelZero);
+      else {
+        // Only the least significant 4 bits of the byte are used.
+        int Index = Base + (Element & 0xf);
+        ShuffleMask.push_back(Index);
+      }
     }
   }
 }
@@ -258,6 +283,10 @@ void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
                       SmallVectorImpl<int> &ShuffleMask) {
   for (int i = 0, e = RawMask.size(); i < e; ++i) {
     uint64_t M = RawMask[i];
+    if (M == (uint64_t)SM_SentinelUndef) {
+      ShuffleMask.push_back(M);
+      continue;
+    }
     // For AVX vectors with 32 bytes the base of the shuffle is the half of
     // the vector we're inside.
     int Base = i < 16 ? 0 : 16;
@@ -287,8 +316,7 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   }
 }
 
-void DecodeVPERMILPMask(const ConstantDataSequential *C,
-                        SmallVectorImpl<int> &ShuffleMask) {
+void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
   assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
   assert(MaskTy->getVectorElementType()->isIntegerTy() &&
@@ -297,16 +325,34 @@ void DecodeVPERMILPMask(const ConstantDataSequential *C,
   int NumElements = MaskTy->getVectorNumElements();
   assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
          "Unexpected number of vector elements.");
-  assert((unsigned)NumElements == C->getNumElements() &&
-         "Constant mask has a different number of elements!");
-
   ShuffleMask.reserve(NumElements);
-  for (int i = 0; i < NumElements; ++i) {
-    int Base = (i * ElementBits / 128) * (128 / ElementBits);
-    uint64_t Element = C->getElementAsInteger(i);
-    // Only the least significant 2 bits of the integer are used.
-    int Index = Base + (Element & 0x3);
-    ShuffleMask.push_back(Index);
+  if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+    assert((unsigned)NumElements == CDS->getNumElements() &&
+           "Constant mask has a different number of elements!");
+
+    for (int i = 0; i < NumElements; ++i) {
+      int Base = (i * ElementBits / 128) * (128 / ElementBits);
+      uint64_t Element = CDS->getElementAsInteger(i);
+      // Only the least significant 2 bits of the integer are used.
+      int Index = Base + (Element & 0x3);
+      ShuffleMask.push_back(Index);
+    }
+  } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+    assert((unsigned)NumElements == C->getNumOperands() &&
+           "Constant mask has a different number of elements!");
+
+    for (int i = 0; i < NumElements; ++i) {
+      int Base = (i * ElementBits / 128) * (128 / ElementBits);
+      Constant *COp = CV->getOperand(i);
+      if (isa<UndefValue>(COp)) {
+        ShuffleMask.push_back(SM_SentinelUndef);
+        continue;
+      }
+      uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+      // Only the least significant 2 bits of the integer are used.
+      int Index = Base + (Element & 0x3);
+      ShuffleMask.push_back(Index);
+    }
   }
 }
 
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 8034d209ac38..ece895d77cc5 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -23,12 +23,10 @@
 //===----------------------------------------------------------------------===//
 
 namespace llvm {
-class ConstantDataSequential;
+class Constant;
 class MVT;
 
-enum {
-  SM_SentinelZero = -1
-};
+enum { SM_SentinelZero = -1, SM_SentinelUndef = -2 };
 
 void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
@@ -66,8 +64,7 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
 void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
 
 /// \brief Decode a PSHUFB mask from an IR-level vector constant.
-void DecodePSHUFBMask(const ConstantDataSequential *C,
-                      SmallVectorImpl<int> &ShuffleMask);
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
 
 /// \brief Decode a PSHUFB mask from a raw array of constants such as from
 /// BUILD_VECTOR.
@@ -85,8 +82,7 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 /// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
-void DecodeVPERMILPMask(const ConstantDataSequential *C,
-                        SmallVectorImpl<int> &ShuffleMask);
+void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
 
 } // llvm namespace
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 40ab77aaaa03..a3fa78a701ad 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5347,7 +5347,12 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
 
       SmallVector<uint64_t, 32> RawMask;
       for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
-        auto *CN = dyn_cast<ConstantSDNode>(MaskNode->getOperand(i));
+        SDValue Op = MaskNode->getOperand(i);
+        if (Op->getOpcode() == ISD::UNDEF) {
+          RawMask.push_back((uint64_t)SM_SentinelUndef);
+          continue;
+        }
+        auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
         if (!CN)
           return false;
         APInt MaskElement = CN->getAPIntValue();
@@ -5377,13 +5382,13 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
     if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
       return false;
 
-    if (auto *C = dyn_cast<ConstantDataSequential>(MaskCP->getConstVal())) {
+    if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
       // FIXME: Support AVX-512 here.
-      if (!C->getType()->isVectorTy() ||
-          (C->getNumElements() != 16 && C->getNumElements() != 32))
+      Type *Ty = C->getType();
+      if (!Ty->isVectorTy() || (Ty->getVectorNumElements() != 16 &&
+                                Ty->getVectorNumElements() != 32))
         return false;
 
-      assert(C->getType()->isVectorTy() && "Expected a vector constant.");
       DecodePSHUFBMask(C, Mask);
       break;
     }
@@ -8994,7 +8999,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     SDValue V2Mask[16];
     for (int i = 0; i < 16; ++i)
       if (Mask[i] == -1) {
-        V1Mask[i] = V2Mask[i] = DAG.getConstant(0x80, MVT::i8);
+        V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
       } else {
         V1Mask[i] = DAG.getConstant(Mask[i] < 16 ? Mask[i] : 0x80, MVT::i8);
         V2Mask[i] =
@@ -20167,6 +20172,10 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
     assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
     int Ratio = 16 / Mask.size();
     for (unsigned i = 0; i < 16; ++i) {
+      if (Mask[i / Ratio] == SM_SentinelUndef) {
+        PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
+        continue;
+      }
       int M = Mask[i / Ratio] != SM_SentinelZero
                   ? Ratio * Mask[i / Ratio] + i % Ratio
                   : 255;
@@ -20277,17 +20286,18 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
   // for this order is that we are recursing up the operation chain.
   for (int i = 0, e = std::max(OpMask.size(), RootMask.size()); i < e; ++i) {
     int RootIdx = i / RootRatio;
-    if (RootMask[RootIdx] == SM_SentinelZero) {
-      // This is a zero-ed lane, we're done.
-      Mask.push_back(SM_SentinelZero);
+    if (RootMask[RootIdx] < 0) {
+      // This is a zero or undef lane, we're done.
+      Mask.push_back(RootMask[RootIdx]);
       continue;
     }
 
     int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio;
     int OpIdx = RootMaskedIdx / OpRatio;
-    if (OpMask[OpIdx] == SM_SentinelZero) {
-      // The incoming lanes are zero, it doesn't matter which ones we are using.
-      Mask.push_back(SM_SentinelZero);
+    if (OpMask[OpIdx] < 0) {
+      // The incoming lanes are zero or undef, it doesn't matter which ones we
+      // are using.
+      Mask.push_back(OpMask[OpIdx]);
       continue;
     }
 
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 5665a0126064..bc02d6bac3e5 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1060,8 +1060,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       Type *MaskTy = MaskConstantEntry.getType();
       (void)MaskTy;
       if (!MaskConstantEntry.isMachineConstantPoolEntry())
-        if (auto *C = dyn_cast<ConstantDataSequential>(
-                MaskConstantEntry.Val.ConstVal)) {
+        if (auto *C = dyn_cast<Constant>(MaskConstantEntry.Val.ConstVal)) {
           assert(MaskTy == C->getType() &&
                  "Expected a constant of the same type!");
 
@@ -1077,8 +1076,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
             DecodeVPERMILPMask(C, Mask);
           }
 
-          assert(Mask.size() == MaskTy->getVectorNumElements() &&
-                 "Shuffle mask has a different size than its type!");
+          assert(
+              (Mask.empty() || Mask.size() == MaskTy->getVectorNumElements()) &&
+              "Shuffle mask has a different size than its type!");
         }
     }
 
@@ -1104,7 +1104,10 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
             InSrc = true;
             CS << SrcName << "[";
           }
-          CS << M;
+          if (M == SM_SentinelUndef)
+            CS << "u";
+          else
+            CS << M;
         }
       }
       if (InSrc)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 36575463da32..b7991bbff999 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -301,12 +301,12 @@ define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
 ;
 ; SSSE3-LABEL: @trunc_v4i32_shuffle
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: @trunc_v4i32_shuffle
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
 ; SSE41-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <16 x i8> %shuffle
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
index df40df2a3258..1922150f9003 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -498,7 +498,7 @@ define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
 ; ALL-LABEL: @shuffle_v8f32_002u6u44
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[0,0,2,u,6,u,4,4]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   ret <8 x float> %shuffle
@@ -507,7 +507,7 @@ define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
 ; ALL-LABEL: @shuffle_v8f32_00uu66uu
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[0,0,u,u,6,6,u,u]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   ret <8 x float> %shuffle
@@ -516,7 +516,7 @@ define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
 ; ALL-LABEL: @shuffle_v8f32_103245uu
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[1,0,3,2,4,5,u,u]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   ret <8 x float> %shuffle
@@ -525,7 +525,7 @@ define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
 ; ALL-LABEL: @shuffle_v8f32_1133uu67
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[1,1,3,3,u,u,6,7]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   ret <8 x float> %shuffle
@@ -534,7 +534,7 @@ define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
 ; ALL-LABEL: @shuffle_v8f32_0uu354uu
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[0,u,u,3,5,4,u,u]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   ret <8 x float> %shuffle
@@ -543,7 +543,7 @@ define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
 ; ALL-LABEL: @shuffle_v8f32_uuu3uu66
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[u,u,u,3,u,u,6,6]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   ret <8 x float> %shuffle
@@ -1044,7 +1044,7 @@ define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
 ; ALL-LABEL: @shuffle_v8i32_002u6u44
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[0,0,2,u,6,u,4,4]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   ret <8 x i32> %shuffle
@@ -1053,7 +1053,7 @@ define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
 ; ALL-LABEL: @shuffle_v8i32_00uu66uu
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[0,0,u,u,6,6,u,u]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   ret <8 x i32> %shuffle
@@ -1062,7 +1062,7 @@ define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
 ; ALL-LABEL: @shuffle_v8i32_103245uu
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[1,0,3,2,4,5,u,u]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   ret <8 x i32> %shuffle
@@ -1071,7 +1071,7 @@ define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
 ; ALL-LABEL: @shuffle_v8i32_1133uu67
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[1,1,3,3,u,u,6,7]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   ret <8 x i32> %shuffle
@@ -1080,7 +1080,7 @@ define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
 ; ALL-LABEL: @shuffle_v8i32_0uu354uu
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[0,u,u,3,5,4,u,u]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   ret <8 x i32> %shuffle
@@ -1089,7 +1089,7 @@ define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
 ; ALL-LABEL: @shuffle_v8i32_uuu3uu66
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilps {{.*}}, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*}} # ymm0 = ymm0[u,u,u,3,u,u,6,6]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   ret <8 x i32> %shuffle