diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 78bc7fa75a68..831386f408e3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6011,6 +6011,40 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV); } +// v32i8 shuffles - Translate to VPSHUFB if possible. +static +SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, + const X86TargetLowering &TLI) { + EVT VT = SVOp->getValueType(0); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + ArrayRef MaskVals = SVOp->getMask(); + + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + + if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() || !V2IsUndef) + return SDValue(); + + SmallVector pshufbMask; + for (unsigned i = 0; i != 32; i++) { + int EltIdx = MaskVals[i]; + if (EltIdx < 0 || EltIdx >= 32) + EltIdx = 0x80; + else { + if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16)) + // Cross lane is not allowed. + return SDValue(); + EltIdx &= 0xf; + } + pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); + } + return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v32i8, &pshufbMask[0], 32)); +} + /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide /// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be /// done when every pair / quad of shuffle mask elements point to elements in @@ -6837,6 +6871,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } + if (VT == MVT::v32i8) { + SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, DAG, *this); + if (NewOp.getNode()) + return NewOp; + } + // Handle all 128-bit wide vectors with 4 elements, and match them with // several different shuffle types. if (NumElems == 4 && VT.is128BitVector()) diff --git a/llvm/test/CodeGen/X86/avx2-shuffle.ll b/llvm/test/CodeGen/X86/avx2-shuffle.ll index c5899fa27426..8af9373e506b 100644 --- a/llvm/test/CodeGen/X86/avx2-shuffle.ll +++ b/llvm/test/CodeGen/X86/avx2-shuffle.ll @@ -26,3 +26,14 @@ entry: %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> ret <16 x i16> %shuffle.i } + +; CHECK: vpshufb_test +; CHECK; vpshufb {{.*\(%r.*}}, %ymm +; CHECK: ret +define <32 x i8> @vpshufb_test(<32 x i8> %a) nounwind { + %S = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> + ret <32 x i8>%S +} \ No newline at end of file