[x86] Fix a crasher due to shuffles which cancel each other out and add

a test case. We also miscompile this test case which is showing a serious flaw in the single-input v8i16 shuffle code. I've left the specific instruction checks FIXME-ed out until I can address the bug in the single-input code, but I wanted to separate out a significant functionality change to produce correct code from a very simple and targeted crasher fix. The miscompile problem stems from keeping track of inputs by value rather than by index. As a consequence of doing this, we can't reliably update those inputs because they might swap and we can't detect this without copying the mask. The blend code now uses indices for the input lists and this seems strictly better. It also should make it easier to sort things and do other cleanups. I think the time has come to simplify The Great Lambda here. llvm-svn: 214914
2014-08-05 18:45:49 +00:00 · 2014-08-05 18:45:49 +00:00 · a746239be3
parent 70db9d4d72
commit a746239be3
2 changed files with 36 additions and 6 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -19419,7 +19419,11 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
    // We fell out of the loop without finding a viable combining instruction.
    return false;

-  // Record the old value to use in RAUW-ing.
+  // Combine away the bottom node as its shuffle will be accumulated into
+  // a preceding shuffle.
+  DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
+
+  // Record the old value.
  SDValue Old = V;

  // Merge this node's mask and our incoming mask (adjusted to account for all
@ -19430,12 +19434,13 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
  V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
                  getV4X86ShuffleImm8ForMask(Mask, DAG));

-  // Replace N with its operand as we're going to combine that shuffle away.
-  DAG.ReplaceAllUsesWith(N, N.getOperand(0));
+  // Check that the shuffles didn't cancel each other out. If not, we need to
+  // combine to the new one.
+  if (Old != V)
+    // Replace the combinable shuffle with the combined one, updating all users
+    // so that we re-evaluate the chain here.
+    DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);

-  // Replace the combinable shuffle with the combined one, updating all users
-  // so that we re-evaluate the chain here.
-  DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
  return true;
 }

--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
@ -671,3 +671,28 @@ define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
  ret <8 x i16> %shuffle
 }
+
+define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
+; FIXME-SSE2-LABEL: @shuffle_v8i16_XX4X8acX
+; FIXME-SSE2:       # BB#0:
+; FIXME-SSE2-NEXT:    pshufd {{.*}}    # xmm0 = xmm0[2,1,2,3]
+; FIXME-SSE2-NEXT:    pshuflw {{.*}}   # xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; FIXME-SSE2-NEXT:    pshufd {{.*}}    # xmm1 = xmm1[0,2,2,3]
+; FIXME-SSE2-NEXT:    punpcklwd {{.*}} # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; FIXME-SSE2-NEXT:    pshuflw {{.*}}   # xmm0 = xmm1[0,1,0,2,4,5,6,7]
+; FIXME-SSE2-NEXT:    pshufd {{.*}}    # xmm0 = xmm0[0,1,2,1]
+; FIXME-SSE2-NEXT:    pshuflw {{.*}}   # xmm0 = xmm0[0,1,1,3,4,5,6,7]
+; FIXME-SSE2-NEXT:    pshufhw {{.*}}   # xmm0 = xmm0[0,1,2,3,6,7,4,7]
+; FIXME-SSE2-NEXT:    retq
+;
+; FIXME-SSSE3-LABEL: @shuffle_v8i16_XX4X8acX
+; FIXME-SSSE3:       # BB#0:
+; FIXME-SSSE3-NEXT:    pshufd {{.*}}    # xmm0 = xmm0[2,1,2,3]
+; FIXME-SSSE3-NEXT:    pshuflw {{.*}}   # xmm1 = xmm1[0,2,2,3,5,7,6,7]
+; FIXME-SSSE3-NEXT:    pshufd {{.*}}    # xmm1 = xmm1[0,2,2,3]
+; FIXME-SSSE3-NEXT:    punpcklwd {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; FIXME-SSSE3-NEXT:    pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+}},0,1,{{[0-9]+,[0-9]+}},2,3,6,7,10,11{{[0-9]+,[0-9]+}}]
+; FIXME-SSSE3-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
+  ret <8 x i16> %shuffle
+}