[SLP] Fix for PR31880: shuffle and vectorize repeated scalar ops on extracted elements
Summary: Currently most of the time vectors of extractelement instructions are treated as scalars that must be gathered into vectors. But in some cases, like when we have extractelement instructions from single vector with different constant indeces or from 2 vectors of the same size, we can treat this operations as shuffle of a single vector or blending of 2 vectors. ``` define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) { %x0 = extractelement <2 x i8> %x, i32 0 %y1 = extractelement <2 x i8> %y, i32 1 %x0x0 = mul i8 %x0, %x0 %y1y1 = mul i8 %y1, %y1 %ins1 = insertelement <2 x i8> undef, i8 %x0x0, i32 0 %ins2 = insertelement <2 x i8> %ins1, i8 %y1y1, i32 1 ret <2 x i8> %ins2 } ``` can be converted to something like ``` define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) { %1 = shufflevector <2 x i8> %x, <2 x i8> %y, <2 x i32> <i32 0, i32 3> %2 = mul <2 x i8> %1, %1 ret <2 x i8> %2 } ``` Currently this type of conversion is considered as high cost transformation. Reviewers: mzolotukhin, delena, mkuper, hfinkel, RKSimon Subscribers: ashahid, RKSimon, spatel, llvm-commits Differential Revision: https://reviews.llvm.org/D30200 llvm-svn: 309812
This commit is contained in:
parent
f081ec7609
commit
fba97e6e21
|
@ -158,6 +158,119 @@ static bool isSplat(ArrayRef<Value *> VL) {
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Checks if the vector of instructions can be represented as a shuffle, like:
|
||||
/// %x0 = extractelement <4 x i8> %x, i32 0
|
||||
/// %x3 = extractelement <4 x i8> %x, i32 3
|
||||
/// %y1 = extractelement <4 x i8> %y, i32 1
|
||||
/// %y2 = extractelement <4 x i8> %y, i32 2
|
||||
/// %x0x0 = mul i8 %x0, %x0
|
||||
/// %x3x3 = mul i8 %x3, %x3
|
||||
/// %y1y1 = mul i8 %y1, %y1
|
||||
/// %y2y2 = mul i8 %y2, %y2
|
||||
/// %ins1 = insertelement <4 x i8> undef, i8 %x0x0, i32 0
|
||||
/// %ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
|
||||
/// %ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
|
||||
/// %ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
|
||||
/// ret <4 x i8> %ins4
|
||||
/// can be transformed into:
|
||||
/// %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 3, i32 5,
|
||||
/// i32 6>
|
||||
/// %2 = mul <4 x i8> %1, %1
|
||||
/// ret <4 x i8> %2
|
||||
/// We convert this initially to something like:
|
||||
/// %x0 = extractelement <4 x i8> %x, i32 0
|
||||
/// %x3 = extractelement <4 x i8> %x, i32 3
|
||||
/// %y1 = extractelement <4 x i8> %y, i32 1
|
||||
/// %y2 = extractelement <4 x i8> %y, i32 2
|
||||
/// %1 = insertelement <4 x i8> undef, i8 %x0, i32 0
|
||||
/// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
|
||||
/// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2
|
||||
/// %4 = insertelement <4 x i8> %3, i8 %y2, i32 3
|
||||
/// %5 = mul <4 x i8> %4, %4
|
||||
/// %6 = extractelement <4 x i8> %5, i32 0
|
||||
/// %ins1 = insertelement <4 x i8> undef, i8 %6, i32 0
|
||||
/// %7 = extractelement <4 x i8> %5, i32 1
|
||||
/// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1
|
||||
/// %8 = extractelement <4 x i8> %5, i32 2
|
||||
/// %ins3 = insertelement <4 x i8> %ins2, i8 %8, i32 2
|
||||
/// %9 = extractelement <4 x i8> %5, i32 3
|
||||
/// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3
|
||||
/// ret <4 x i8> %ins4
|
||||
/// InstCombiner transforms this into a shuffle and vector mul
|
||||
static Optional<TargetTransformInfo::ShuffleKind>
|
||||
isShuffle(ArrayRef<Value *> VL) {
|
||||
auto *EI0 = cast<ExtractElementInst>(VL[0]);
|
||||
unsigned Size = EI0->getVectorOperandType()->getVectorNumElements();
|
||||
Value *Vec1 = nullptr;
|
||||
Value *Vec2 = nullptr;
|
||||
enum ShuffleMode {Unknown, FirstAlternate, SecondAlternate, Permute};
|
||||
ShuffleMode CommonShuffleMode = Unknown;
|
||||
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
|
||||
auto *EI = cast<ExtractElementInst>(VL[I]);
|
||||
auto *Vec = EI->getVectorOperand();
|
||||
// All vector operands must have the same number of vector elements.
|
||||
if (Vec->getType()->getVectorNumElements() != Size)
|
||||
return None;
|
||||
auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
|
||||
if (!Idx)
|
||||
return None;
|
||||
// Undefined behavior if Idx is negative or >= Size.
|
||||
if (Idx->getValue().uge(Size))
|
||||
continue;
|
||||
unsigned IntIdx = Idx->getValue().getZExtValue();
|
||||
// We can extractelement from undef vector.
|
||||
if (isa<UndefValue>(Vec))
|
||||
continue;
|
||||
// For correct shuffling we have to have at most 2 different vector operands
|
||||
// in all extractelement instructions.
|
||||
if (Vec1 && Vec2 && Vec != Vec1 && Vec != Vec2)
|
||||
return None;
|
||||
if (CommonShuffleMode == Permute)
|
||||
continue;
|
||||
// If the extract index is not the same as the operation number, it is a
|
||||
// permutation.
|
||||
if (IntIdx != I) {
|
||||
CommonShuffleMode = Permute;
|
||||
continue;
|
||||
}
|
||||
// Check the shuffle mode for the current operation.
|
||||
if (!Vec1)
|
||||
Vec1 = Vec;
|
||||
else if (Vec != Vec1)
|
||||
Vec2 = Vec;
|
||||
// Example: shufflevector A, B, <0,5,2,7>
|
||||
// I is odd and IntIdx for A == I - FirstAlternate shuffle.
|
||||
// I is even and IntIdx for B == I - FirstAlternate shuffle.
|
||||
// Example: shufflevector A, B, <4,1,6,3>
|
||||
// I is even and IntIdx for A == I - SecondAlternate shuffle.
|
||||
// I is odd and IntIdx for B == I - SecondAlternate shuffle.
|
||||
const bool IIsEven = I & 1;
|
||||
const bool CurrVecIsA = Vec == Vec1;
|
||||
const bool IIsOdd = !IIsEven;
|
||||
const bool CurrVecIsB = !CurrVecIsA;
|
||||
ShuffleMode CurrentShuffleMode =
|
||||
((IIsOdd && CurrVecIsA) || (IIsEven && CurrVecIsB)) ? FirstAlternate
|
||||
: SecondAlternate;
|
||||
// Common mode is not set or the same as the shuffle mode of the current
|
||||
// operation - alternate.
|
||||
if (CommonShuffleMode == Unknown)
|
||||
CommonShuffleMode = CurrentShuffleMode;
|
||||
// Common shuffle mode is not the same as the shuffle mode of the current
|
||||
// operation - permutation.
|
||||
if (CommonShuffleMode != CurrentShuffleMode)
|
||||
CommonShuffleMode = Permute;
|
||||
}
|
||||
// If we're not crossing lanes in different vectors, consider it as blending.
|
||||
if ((CommonShuffleMode == FirstAlternate ||
|
||||
CommonShuffleMode == SecondAlternate) &&
|
||||
Vec2)
|
||||
return TargetTransformInfo::SK_Alternate;
|
||||
// If Vec2 was never used, we have a permutation of a single vector, otherwise
|
||||
// we have permutation of 2 vectors.
|
||||
return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
|
||||
: TargetTransformInfo::SK_PermuteSingleSrc;
|
||||
}
|
||||
|
||||
///\returns Opcode that can be clubbed with \p Op to create an alternate
|
||||
/// sequence which can later be merged as a ShuffleVector instruction.
|
||||
static unsigned getAltOpcode(unsigned Op) {
|
||||
|
@ -1736,6 +1849,26 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
if (isSplat(VL)) {
|
||||
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
|
||||
}
|
||||
if (getSameOpcode(VL) == Instruction::ExtractElement) {
|
||||
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
|
||||
if (ShuffleKind.hasValue()) {
|
||||
int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
|
||||
for (auto *V : VL) {
|
||||
// If all users of instruction are going to be vectorized and this
|
||||
// instruction itself is not going to be vectorized, consider this
|
||||
// instruction as dead and remove its cost from the final cost of the
|
||||
// vectorized tree.
|
||||
if (areAllUsersVectorized(cast<Instruction>(V)) &&
|
||||
!ScalarToTreeEntry.count(V)) {
|
||||
auto *IO = cast<ConstantInt>(
|
||||
cast<ExtractElementInst>(V)->getIndexOperand());
|
||||
Cost -= TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
|
||||
IO->getZExtValue());
|
||||
}
|
||||
}
|
||||
return Cost;
|
||||
}
|
||||
}
|
||||
return getGatherCost(E->Scalars);
|
||||
}
|
||||
unsigned Opcode = getSameOpcode(VL);
|
||||
|
|
|
@ -3,13 +3,9 @@
|
|||
|
||||
define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
|
||||
; CHECK-LABEL: @g(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
|
||||
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
|
||||
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x i8> undef, i8 [[X0X0]], i32 0
|
||||
; CHECK-NEXT: [[INS2:%.*]] = insertelement <2 x i8> [[INS1]], i8 [[Y1Y1]], i32 1
|
||||
; CHECK-NEXT: ret <2 x i8> [[INS2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: ret <2 x i8> [[TMP2]]
|
||||
;
|
||||
%x0 = extractelement <2 x i8> %x, i32 0
|
||||
%y1 = extractelement <2 x i8> %y, i32 1
|
||||
|
@ -22,19 +18,9 @@ define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
|
|||
|
||||
define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {
|
||||
; CHECK-LABEL: @h(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
|
||||
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
|
||||
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
|
||||
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
|
||||
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
|
||||
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
|
||||
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x i8> undef, i8 [[X0X0]], i32 0
|
||||
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> [[INS1]], i8 [[X3X3]], i32 1
|
||||
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
|
||||
; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
|
||||
; CHECK-NEXT: ret <4 x i8> [[INS4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: ret <4 x i8> [[TMP2]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> %x, i32 0
|
||||
%x3 = extractelement <4 x i8> %x, i32 3
|
||||
|
@ -53,16 +39,9 @@ define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {
|
|||
|
||||
define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {
|
||||
; CHECK-LABEL: @h_undef(
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 3
|
||||
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
|
||||
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
|
||||
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
|
||||
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
|
||||
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> undef, i8 [[X3X3]], i32 1
|
||||
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
|
||||
; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
|
||||
; CHECK-NEXT: ret <4 x i8> [[INS4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 undef, i32 3, i32 5, i32 6>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: ret <4 x i8> [[TMP2]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> undef, i32 0
|
||||
%x3 = extractelement <4 x i8> %x, i32 3
|
||||
|
@ -81,17 +60,13 @@ define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {
|
|||
|
||||
define i8 @i(<4 x i8> %x, <4 x i8> %y) {
|
||||
; CHECK-LABEL: @i(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
|
||||
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
|
||||
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
|
||||
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
|
||||
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
|
||||
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[Y1Y1]], [[Y2Y2]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i8> [[TMP2]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i8> [[BIN_RDX]], <4 x i8> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i8> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[BIN_RDX2]], i32 0
|
||||
; CHECK-NEXT: ret i8 [[TMP3]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> %x, i32 0
|
||||
|
@ -110,18 +85,15 @@ define i8 @i(<4 x i8> %x, <4 x i8> %y) {
|
|||
|
||||
define i8 @j(<4 x i8> %x, <4 x i8> %y) {
|
||||
; CHECK-LABEL: @j(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
|
||||
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
|
||||
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
|
||||
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
|
||||
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
|
||||
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[Y1Y1]], [[Y2Y2]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: ret i8 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 5>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <2 x i32> <i32 3, i32 6>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i8> [[TMP3]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
|
||||
; CHECK-NEXT: ret i8 [[TMP8]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> %x, i32 0
|
||||
%x3 = extractelement <4 x i8> %x, i32 3
|
||||
|
@ -139,18 +111,15 @@ define i8 @j(<4 x i8> %x, <4 x i8> %y) {
|
|||
|
||||
define i8 @k(<4 x i8> %x) {
|
||||
; CHECK-LABEL: @k(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
|
||||
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i8> [[X]], i32 1
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i8> [[X]], i32 2
|
||||
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
|
||||
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
|
||||
; CHECK-NEXT: [[X1X1:%.*]] = mul i8 [[X1]], [[X1]]
|
||||
; CHECK-NEXT: [[X2X2:%.*]] = mul i8 [[X2]], [[X2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X0X0]], [[X3X3]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X1X1]], [[X2X2]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sdiv i8 [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: ret i8 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[X:%.*]], [[X]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[X]], [[X]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> undef, <2 x i32> <i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i8> [[TMP2]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i8 [[TMP6]], [[TMP7]]
|
||||
; CHECK-NEXT: ret i8 [[TMP8]]
|
||||
;
|
||||
%x0 = extractelement <4 x i8> %x, i32 0
|
||||
%x3 = extractelement <4 x i8> %x, i32 3
|
||||
|
|
Loading…
Reference in New Issue