[AVX] Add decode support for VUNPCKLPS/D instructions, both 128-bit

and 256-bit forms.  Because the number of elements in a vector
      does not determine the vector type (4 elements could be v4f32 or
      v4f64), pass the full type of the vector to decode routines.

llvm-svn: 126664
This commit is contained in:
David Greene 2011-02-28 19:06:56 +00:00
parent e16af53619
commit 20a1cbefad
4 changed files with 120 additions and 30 deletions

View File

@ -111,28 +111,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// FALL THROUGH.
case X86::PUNPCKLBWrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
DecodePUNPCKLMask(16, ShuffleMask);
DecodePUNPCKLBWMask(16, ShuffleMask);
break;
case X86::PUNPCKLWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLWDrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
DecodePUNPCKLMask(8, ShuffleMask);
DecodePUNPCKLWDMask(8, ShuffleMask);
break;
case X86::PUNPCKLDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
DecodePUNPCKLMask(4, ShuffleMask);
DecodePUNPCKLDQMask(4, ShuffleMask);
break;
case X86::PUNPCKLQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLQDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
DecodePUNPCKLMask(2, ShuffleMask);
DecodePUNPCKLQDQMask(2, ShuffleMask);
break;
case X86::SHUFPDrri:
@ -153,16 +153,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPDrm:
DecodeUNPCKLPMask(2, ShuffleMask);
DecodeUNPCKLPDMask(2, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPDrm:
DecodeUNPCKLPDMask(2, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
break;
case X86::VUNPCKLPDYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPDYrm:
DecodeUNPCKLPDMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
break;
case X86::UNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPSrm:
DecodeUNPCKLPMask(4, ShuffleMask);
DecodeUNPCKLPSMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPSrm:
DecodeUNPCKLPSMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
break;
case X86::VUNPCKLPSYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPSYrm:
DecodeUNPCKLPSMask(8, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
break;
case X86::UNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.

View File

@ -1,4 +1,4 @@
//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
//
// The LLVM Compiler Infrastructure
//
@ -95,12 +95,29 @@ void DecodePSHUFLWMask(unsigned Imm,
ShuffleMask.push_back(7);
}
void DecodePUNPCKLMask(unsigned NElts,
void DecodePUNPCKLBWMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i);
ShuffleMask.push_back(i+NElts);
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
}
void DecodePUNPCKLWDMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask) {
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
}
void DecodePUNPCKLDQMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask) {
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
}
void DecodePUNPCKLQDQMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask) {
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
}
void DecodePUNPCKLMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
DecodeUNPCKLPMask(VT, ShuffleMask);
}
void DecodePUNPCKHMask(unsigned NElts,
@ -133,12 +150,24 @@ void DecodeUNPCKHPMask(unsigned NElts,
}
}
void DecodeUNPCKLPSMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask) {
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
}
void DecodeUNPCKLPDMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask) {
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
}
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. NElts indicates the number of elements in the vector allowing it to
/// handle different datatypes and vector widths.
void DecodeUNPCKLPMask(unsigned NElts,
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
int NElts = VT.getVectorNumElements();
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i); // Reads from dest
ShuffleMask.push_back(i+NElts); // Reads from src

View File

@ -16,6 +16,7 @@
#define X86_SHUFFLE_DECODE_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ValueTypes.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
@ -45,7 +46,19 @@ void DecodePSHUFHWMask(unsigned Imm,
void DecodePSHUFLWMask(unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodePUNPCKLMask(unsigned NElts,
void DecodePUNPCKLBWMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodePUNPCKLWDMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodePUNPCKLDQMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodePUNPCKLQDQMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodePUNPCKLMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodePUNPCKHMask(unsigned NElts,
@ -57,11 +70,16 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
void DecodeUNPCKHPMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeUNPCKLPSMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeUNPCKLPDMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. NElts indicates the number of elements in the vector allowing it to
/// handle different datatypes and vector widths.
void DecodeUNPCKLPMask(unsigned NElts,
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask);
} // llvm namespace

View File

@ -3895,11 +3895,15 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
DecodePUNPCKLMask(NumElems, ShuffleMask);
DecodePUNPCKLMask(VT, ShuffleMask);
break;
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
DecodeUNPCKLPMask(NumElems, ShuffleMask);
case X86ISD::VUNPCKLPS:
case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
DecodeUNPCKLPMask(VT, ShuffleMask);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, ShuffleMask);
@ -5263,6 +5267,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
Locs.resize(4);
SmallVector<int,8> LoMask(4U, -1);
SmallVector<int,8> HiMask(4U, -1);
@ -5508,12 +5513,16 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
X86::getShuffleSHUFImmediate(SVOp), DAG);
}
static inline unsigned getUNPCKLOpcode(EVT VT) {
static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
case MVT::v4f32: return X86ISD::UNPCKLPS;
case MVT::v2f64: return X86ISD::UNPCKLPD;
case MVT::v4f32:
return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
case MVT::v2f64:
return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
case MVT::v8f32: return X86ISD::VUNPCKLPSY;
case MVT::v4f64: return X86ISD::VUNPCKLPDY;
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v8i16: return X86ISD::PUNPCKLWD;
default:
@ -5641,7 +5650,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@ -5762,7 +5771,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (X86::isUNPCKLMask(SVOp))
return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
dl, VT, V1, V2, DAG);
if (X86::isUNPCKHMask(SVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
@ -5789,7 +5799,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
if (X86::isUNPCKLMask(NewSVOp))
return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
dl, VT, V2, V1, DAG);
if (X86::isUNPCKHMask(NewSVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
@ -5812,8 +5823,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
if (VT == MVT::v2f64)
return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
if (VT == MVT::v2f64) {
X86ISD::NodeType Opcode =
getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
}
if (VT == MVT::v2i64)
return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
}
@ -5840,7 +5854,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (X86::isUNPCKL_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
dl, VT, V1, V1, DAG);
if (X86::isUNPCKH_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);