[x86] Fix a crasher due to shuffles which cancel each other out and add

a test case.

We also miscompile this test case which is showing a serious flaw in the
single-input v8i16 shuffle code. I've left the specific instruction
checks FIXME-ed out until I can address the bug in the single-input
code, but I wanted to separate out a significant functionality change to
produce correct code from a very simple and targeted crasher fix.

The miscompile problem stems from keeping track of inputs by value
rather than by index. As a consequence of doing this, we can't reliably
update those inputs because they might swap and we can't detect this
without copying the mask.

The blend code now uses indices for the input lists and this seems
strictly better. It also should make it easier to sort things and do
other cleanups. I think the time has come to simplify The Great Lambda
here.

llvm-svn: 214914
This commit is contained in:
Chandler Carruth 2014-08-05 18:45:49 +00:00
parent 70db9d4d72
commit a746239be3
2 changed files with 36 additions and 6 deletions

View File

@ -19419,7 +19419,11 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
// We fell out of the loop without finding a viable combining instruction.
return false;
// Record the old value to use in RAUW-ing.
// Combine away the bottom node as its shuffle will be accumulated into
// a preceding shuffle.
DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
// Record the old value.
SDValue Old = V;
// Merge this node's mask and our incoming mask (adjusted to account for all
@ -19430,12 +19434,13 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
getV4X86ShuffleImm8ForMask(Mask, DAG));
// Replace N with its operand as we're going to combine that shuffle away.
DAG.ReplaceAllUsesWith(N, N.getOperand(0));
// Check that the shuffles didn't cancel each other out. If not, we need to
// combine to the new one.
if (Old != V)
// Replace the combinable shuffle with the combined one, updating all users
// so that we re-evaluate the chain here.
DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
// Replace the combinable shuffle with the combined one, updating all users
// so that we re-evaluate the chain here.
DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
return true;
}

View File

@ -671,3 +671,28 @@ define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
; FIXME-SSE2-LABEL: @shuffle_v8i16_XX4X8acX
; FIXME-SSE2: # BB#0:
; FIXME-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
; FIXME-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7]
; FIXME-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
; FIXME-SSE2-NEXT: punpcklwd {{.*}} # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; FIXME-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,1,0,2,4,5,6,7]
; FIXME-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
; FIXME-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,1,3,4,5,6,7]
; FIXME-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,4,7]
; FIXME-SSE2-NEXT: retq
;
; FIXME-SSSE3-LABEL: @shuffle_v8i16_XX4X8acX
; FIXME-SSSE3: # BB#0:
; FIXME-SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
; FIXME-SSSE3-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,5,7,6,7]
; FIXME-SSSE3-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
; FIXME-SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; FIXME-SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+}},0,1,{{[0-9]+,[0-9]+}},2,3,6,7,10,11{{[0-9]+,[0-9]+}}]
; FIXME-SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
ret <8 x i16> %shuffle
}