[PowerPC] Fix a performance bug for PPC::XXPERMDI.

There are some VectorShuffle Nodes in SDAG which can be selected to XXPERMDI
Instruction, this patch recognizes them and does the selection to improve
the PPC performance.

Differential Revision: https://reviews.llvm.org/D33404

llvm-svn: 304298
This commit is contained in:
Tony Jiang 2017-05-31 13:09:57 +00:00
parent b1f0a346d6
commit 60c247de18
5 changed files with 417 additions and 13 deletions

View File

@ -1112,6 +1112,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::XXINSERT: return "PPCISD::XXINSERT";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";
@ -1593,17 +1594,25 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
// Check that the mask is shuffling words
static bool isWordShuffleMask(ShuffleVectorSDNode *N) {
for (unsigned i = 0; i < 4; ++i) {
unsigned B0 = N->getMaskElt(i*4);
unsigned B1 = N->getMaskElt(i*4+1);
unsigned B2 = N->getMaskElt(i*4+2);
unsigned B3 = N->getMaskElt(i*4+3);
if (B0 % 4)
return false;
if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
// Check that the mask is shuffling N byte elements.
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) {
assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.");
unsigned NumOfElem = 16 / Width;
unsigned MaskVal[16]; // Width is never greater than 16
for (unsigned i = 0; i < NumOfElem; ++i) {
MaskVal[0] = N->getMaskElt(i * Width);
if (MaskVal[0] % Width) {
return false;
}
for (unsigned int j = 1; j < Width; ++j) {
MaskVal[j] = N->getMaskElt(i * Width + j);
if (MaskVal[j] != MaskVal[j-1] + 1) {
return false;
}
}
}
return true;
@ -1611,7 +1620,7 @@ static bool isWordShuffleMask(ShuffleVectorSDNode *N) {
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
if (!isWordShuffleMask(N))
if (!isNByteElemShuffleMask(N, 4))
return false;
// Now we look at mask elements 0,4,8,12
@ -1688,7 +1697,7 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the word is consecutive.
if (!isWordShuffleMask(N))
if (!isNByteElemShuffleMask(N, 4))
return false;
// Now we look at mask elements 0,4,8,12, which are the beginning of words.
@ -1746,6 +1755,66 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
}
}
/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
/// if the inputs to the instruction should be swapped and set \p DM to the
/// value for the immediate.
/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
/// AND element 0 of the result comes from the first input (LE) or second input
/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
/// mask.
bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the double word is consecutive.
if (!isNByteElemShuffleMask(N, 8))
return false;
unsigned M0 = N->getMaskElt(0) / 8;
unsigned M1 = N->getMaskElt(8) / 8;
assert(((M0 | M1) < 4) && "A mask element out of bounds?");
// If both vector operands for the shuffle are the same vector, the mask will
// contain only elements from the first one and the second one will be undef.
if (N->getOperand(1).isUndef()) {
if ((M0 | M1) < 2) {
DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
Swap = false;
return true;
} else
return false;
}
if (IsLE) {
if (M0 > 1 && M1 < 2) {
Swap = false;
} else if (M0 < 2 && M1 > 1) {
M0 = (M0 + 2) % 4;
M1 = (M1 + 2) % 4;
Swap = true;
} else
return false;
// Note: if control flow comes here that means Swap is already set above
DM = (((~M1) & 1) << 1) + ((~M0) & 1);
return true;
} else { // BE
if (M0 < 2 && M1 > 1) {
Swap = false;
} else if (M0 > 1 && M1 < 2) {
M0 = (M0 + 2) % 4;
M1 = (M1 + 2) % 4;
Swap = true;
} else
return false;
// Note: if control flow comes here that means Swap is already set above
DM = (M0 << 1) + (M1 & 1);
return true;
}
}
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
@ -7760,6 +7829,19 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
}
if (Subtarget.hasVSX() &&
PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
if (Swap)
std::swap(V1, V2);
SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
SDValue Conv2 =
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
DAG.getConstant(ShiftElts, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
}
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);

View File

@ -90,6 +90,10 @@ namespace llvm {
///
VECSHL,
/// XXPERMDI - The PPC XXPERMDI instruction
///
XXPERMDI,
/// The CMPB instruction (takes two operands of i32 or i64).
CMPB,
@ -454,6 +458,10 @@ namespace llvm {
/// for a XXSLDWI instruction.
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE);
/// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
/// for a XXPERMDI instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.

View File

@ -53,6 +53,10 @@ def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;
def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;
def SDT_PPCvcmp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
]>;
@ -170,6 +174,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;

View File

@ -843,7 +843,9 @@ let Uses = [RM] in {
def XXPERMDI : XX3Form_2<60, 10,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
"xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
"xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm,
[(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB,
imm32SExt16:$DM))]>;
let isCodeGenOnly = 1 in
def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
"xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;

View File

@ -0,0 +1,307 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \
; RUN: FileCheck %s -check-prefix=CHECK-LE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \
; RUN: FileCheck %s -check-prefix=CHECK-BE
; Possible LE ShuffleVector masks (Case 1):
; ShuffleVector((vector double)a, (vector double)b, 3, 1)
; ShuffleVector((vector double)a, (vector double)b, 2, 1)
; ShuffleVector((vector double)a, (vector double)b, 3, 0)
; ShuffleVector((vector double)a, (vector double)b, 2, 0)
; which targets at:
; xxpermdi a, b, 0
; xxpermdi a, b, 1
; xxpermdi a, b, 2
; xxpermdi a, b, 3
; Possible LE Swap ShuffleVector masks (Case 2):
; ShuffleVector((vector double)a, (vector double)b, 1, 3)
; ShuffleVector((vector double)a, (vector double)b, 0, 3)
; ShuffleVector((vector double)a, (vector double)b, 1, 2)
; ShuffleVector((vector double)a, (vector double)b, 0, 2)
; which targets at:
; xxpermdi b, a, 0
; xxpermdi b, a, 1
; xxpermdi b, a, 2
; xxpermdi b, a, 3
; Possible LE ShuffleVector masks when a == b, b is undef (Case 3):
; ShuffleVector((vector double)a, (vector double)a, 1, 1)
; ShuffleVector((vector double)a, (vector double)a, 0, 1)
; ShuffleVector((vector double)a, (vector double)a, 1, 0)
; ShuffleVector((vector double)a, (vector double)a, 0, 0)
; which targets at:
; xxpermdi a, a, 0
; xxpermdi a, a, 1
; xxpermdi a, a, 2
; xxpermdi a, a, 3
; Possible BE ShuffleVector masks (Case 4):
; ShuffleVector((vector double)a, (vector double)b, 0, 2)
; ShuffleVector((vector double)a, (vector double)b, 0, 3)
; ShuffleVector((vector double)a, (vector double)b, 1, 2)
; ShuffleVector((vector double)a, (vector double)b, 1, 3)
; which targets at:
; xxpermdi a, b, 0
; xxpermdi a, b, 1
; xxpermdi a, b, 2
; xxpermdi a, b, 3
; Possible BE Swap ShuffleVector masks (Case 5):
; ShuffleVector((vector double)a, (vector double)b, 2, 0)
; ShuffleVector((vector double)a, (vector double)b, 3, 0)
; ShuffleVector((vector double)a, (vector double)b, 2, 1)
; ShuffleVector((vector double)a, (vector double)b, 3, 1)
; which targets at:
; xxpermdi b, a, 0
; xxpermdi b, a, 1
; xxpermdi b, a, 2
; xxpermdi b, a, 3
; Possible BE ShuffleVector masks when a == b, b is undef (Case 6):
; ShuffleVector((vector double)a, (vector double)a, 0, 0)
; ShuffleVector((vector double)a, (vector double)a, 0, 1)
; ShuffleVector((vector double)a, (vector double)a, 1, 0)
; ShuffleVector((vector double)a, (vector double)a, 1, 1)
; which targets at:
; xxpermdi a, a, 0
; xxpermdi a, a, 1
; xxpermdi a, a, 2
; xxpermdi a, a, 3
define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 3, i32 1>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_0
; CHECK-LE: xxmrghd 34, 34, 35
; CHECK-LE: blr
}
define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 2, i32 1>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_1
; CHECK-LE: xxpermdi 34, 34, 35, 1
; CHECK-LE: blr
}
define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 3, i32 0>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_2
; CHECK-LE: xxpermdi 34, 34, 35, 2
; CHECK-LE: blr
}
define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 2, i32 0>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_3
; CHECK-LE: xxmrgld 34, 34, 35
; CHECK-LE: blr
}
define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 1, i32 3>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_0
; CHECK-LE: xxmrghd 34, 35, 34
; CHECK-LE: blr
}
define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 0, i32 3>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_1
; CHECK-LE: xxpermdi 34, 35, 34, 1
; CHECK-LE: blr
}
define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 1, i32 2>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_2
; CHECK-LE: xxpermdi 34, 35, 34, 2
; CHECK-LE: blr
}
define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 0, i32 2>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_3
; CHECK-LE: xxmrgld 34, 35, 34
; CHECK-LE: blr
}
define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_0(<2 x double> %VA) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 1, i32 1>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_0
; CHECK-LE: xxspltd 34, 34, 0
; CHECK-LE: blr
}
define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_1(<2 x double> %VA) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 0, i32 1>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_1
; CHECK-LE: blr
}
define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_2(<2 x double> %VA) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 1, i32 0>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_2
; CHCECK-LE: xxswapd 34, 34
}
define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_3(<2 x double> %VA) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 0, i32 0>
ret <2 x double> %0
; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_3
; CHECK-LE: xxspltd 34, 34, 1
; CHECK-LE: blr
}
; Start testing BE
define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 0, i32 2>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_0
; CHECK-BE: xxmrghd 34, 34, 35
; CHECK-BE: blr
}
define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 0, i32 3>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_1
; CHECK-BE: xxpermdi 34, 34, 35, 1
; CHECK-BE: blr
}
define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 1, i32 2>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_2
; CHECK-BE: xxpermdi 34, 34, 35, 2
; CHECK-BE: blr
}
define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 1, i32 3>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_3
; CHECK-BE: xxmrgld 34, 34, 35
; CHECK-BE: blr
}
define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 2, i32 0>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_0
; CHECK-BE: xxmrghd 34, 35, 34
; CHECK-BE: blr
}
define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 2, i32 1>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_1
; CHECK-BE: xxpermdi 34, 35, 34, 1
; CHECK-BE: blr
}
define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 3, i32 0>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_2
; CHECK-BE: xxpermdi 34, 35, 34, 2
; CHECK-BE: blr
}
define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 3, i32 1>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_3
; CHECK-BE: xxmrgld 34, 35, 34
; CHECK-BE: blr
}
define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_0(<2 x double> %VA) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 0, i32 0>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_0
; CHECK-BE: xxspltd 34, 34, 0
; CHECK-BE: blr
}
define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_1(<2 x double> %VA) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 0, i32 1>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_1
; CHECK-BE: blr
}
define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_2(<2 x double> %VA) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 1, i32 0>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_2
; CHCECK-LE: xxswapd 34, 34
}
define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_3(<2 x double> %VA) {
entry:
%0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 1, i32 1>
ret <2 x double> %0
; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_3
; CHECK-BE: xxspltd 34, 34, 1
; CHECK-BE: blr
}
; More test cases to test different types of vector inputs
define <16 x i8> @test_be_vec_xxpermdi_v16i8_v16i8(<16 x i8> %VA, <16 x i8> %VB) {
entry:
%0 = shufflevector <16 x i8> %VA, <16 x i8> %VB,<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
ret <16 x i8> %0
; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v16i8_v16i8
; CHECK-BE: xxpermdi 34, 34, 35, 1
; CHECK-BE: blr
}
define <8 x i16> @test_le_swap_vec_xxpermdi_v8i16_v8i16(<8 x i16> %VA, <8 x i16> %VB) {
entry:
%0 = shufflevector <8 x i16> %VA, <8 x i16> %VB,<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %0
; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v8i16_v8i16
; CHECK-LE: xxpermdi 34, 35, 34, 1
; CHECK-LE: blr
}
define <4 x i32> @test_le_swap_vec_xxpermdi_v4i32_v4i32(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB,<4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %0
; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v4i32_v4i32
; CHECK-LE: xxpermdi 34, 35, 34, 1
; CHECK-LE: blr
}