[x86] Expand the target DAG combining for PSHUFD nodes to be able to

combine into half-shuffles through unpack instructions that expand the
half to a whole vector without messing with the dword lanes.

This fixes some redundant instructions in splat-like lowerings for
v16i8, which are now getting to be *really* nice.

llvm-svn: 212695
This commit is contained in:
Chandler Carruth 2014-07-10 09:57:36 +00:00
parent 91c2a99740
commit 853fa0ac8d
2 changed files with 35 additions and 5 deletions

View File

@ -18492,6 +18492,39 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
return false;
continue;
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
// shuffle into a preceding word shuffle.
if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
return false;
// Search for a half-shuffle which we can combine with.
unsigned CombineOp =
V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
if (V.getOperand(0) != V.getOperand(1) ||
!V->isOnlyUserOf(V.getOperand(0).getNode()))
return false;
V = V.getOperand(0);
do {
switch (V.getOpcode()) {
default:
return false; // Nothing to combine.
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
if (V.getOpcode() == CombineOp)
break;
// Fallthrough!
case ISD::BITCAST:
V = V.getOperand(0);
continue;
}
break;
} while (V.hasOneUse());
break;
}
// Break out of the loop if we break out of the switch.
break;
@ -18508,7 +18541,7 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
for (int &M : Mask)
M = VMask[M];
V = DAG.getNode(X86ISD::PSHUFD, DL, V.getValueType(), V.getOperand(0),
V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
getV4X86ShuffleImm8ForMask(Mask, DAG));
// It is possible that one of the combinable shuffles was completely absorbed

View File

@ -31,9 +31,8 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(
; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
; CHECK-SSE2: # BB#0:
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,2,4,5,6,7]
; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6]
; CHECK-SSE2-NEXT: retq
@ -59,9 +58,7 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6]
; CHECK-SSE2-NEXT: retq