[InstCombine] Fix SSE2/AVX2 vector logical shift by constant

This patch fixes the sse2/avx2 vector shift by constant instcombine call to correctly deal with the fact that the shift amount is formed from the entire lower 64-bit and not just the lowest element as it currently assumes.

e.g.

%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)

In this case, (V)PSRLD doesn't perform a lshr by 15 but in fact attempts to shift by 64424509455 ((15 << 32) | 15) - giving a zero result.

In addition, this review also recognizes shift-by-zero from a ConstantAggregateZero type (PR23821).

Differential Revision: http://reviews.llvm.org/D11760

llvm-svn: 244341
This commit is contained in:
Simon Pilgrim 2015-08-07 18:22:50 +00:00
parent 855ea0f71a
commit 3815c16bf8
2 changed files with 264 additions and 100 deletions

View File

@ -200,33 +200,56 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
static Value *SimplifyX86immshift(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder,
bool ShiftLeft) {
// Simplify if count is constant. To 0 if >= BitWidth,
// otherwise to shl/lshr.
auto CDV = dyn_cast<ConstantDataVector>(II.getArgOperand(1));
auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(1));
if (!CDV && !CInt)
// Simplify if count is constant.
auto Arg1 = II.getArgOperand(1);
auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
auto CDV = dyn_cast<ConstantDataVector>(Arg1);
auto CInt = dyn_cast<ConstantInt>(Arg1);
if (!CAZ && !CDV && !CInt)
return nullptr;
ConstantInt *Count;
if (CDV)
Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
else
Count = CInt;
APInt Count(64, 0);
if (CDV) {
// SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
// operand to compute the shift amount.
auto VT = cast<VectorType>(CDV->getType());
unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
unsigned NumSubElts = 64 / BitWidth;
// Concatenate the sub-elements to create the 64-bit value.
for (unsigned i = 0; i != NumSubElts; ++i) {
unsigned SubEltIdx = (NumSubElts - 1) - i;
auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
Count = Count.shl(BitWidth);
Count |= SubElt->getValue().zextOrTrunc(64);
}
}
else if (CInt)
Count = CInt->getValue();
auto Vec = II.getArgOperand(0);
auto VT = cast<VectorType>(Vec->getType());
auto SVT = VT->getElementType();
if (Count->getZExtValue() > (SVT->getPrimitiveSizeInBits() - 1))
unsigned VWidth = VT->getNumElements();
unsigned BitWidth = SVT->getPrimitiveSizeInBits();
// If shift-by-zero then just return the original value.
if (Count == 0)
return Vec;
// Handle cases when Shift >= BitWidth - just return zero.
if (Count.uge(BitWidth))
return ConstantAggregateZero::get(VT);
unsigned VWidth = VT->getNumElements();
// Get a constant vector of the same type as the first operand.
auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
if (ShiftLeft)
return Builder.CreateShl(Vec, Builder.CreateVectorSplat(VWidth, VTCI));
return Builder.CreateShl(Vec, ShiftVec);
return Builder.CreateLShr(Vec, Builder.CreateVectorSplat(VWidth, VTCI));
return Builder.CreateLShr(Vec, ShiftVec);
}
static Value *SimplifyX86extend(const IntrinsicInst &II,

View File

@ -7,132 +7,132 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_w_0
; CHECK: ret <8 x i16> %v
; CHECK-NEXT: ret <8 x i16> %v
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_w_15
; CHECK: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK: ret <8 x i16> %1
; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <8 x i16> %1
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_w_64
; CHECK: ret <8 x i16> zeroinitializer
; CHECK-NEXT: ret <8 x i16> zeroinitializer
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
ret <8 x i16> %1
}
define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_d_0
; CHECK: ret <4 x i32> %v
; CHECK-NEXT: ret <4 x i32> %v
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_d_15
; CHECK: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
; CHECK: ret <4 x i32> %1
; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: ret <4 x i32> %1
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_d_64
; CHECK: ret <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> zeroinitializer
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
ret <4 x i32> %1
}
define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_q_0
; CHECK: ret <2 x i64> %v
; CHECK-NEXT: ret <2 x i64> %v
%1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_q_15
; CHECK: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
; CHECK: ret <2 x i64> %1
; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
; CHECK-NEXT: ret <2 x i64> %1
%1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrli_q_64
; CHECK: ret <2 x i64> zeroinitializer
; CHECK-NEXT: ret <2 x i64> zeroinitializer
%1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
ret <2 x i64> %1
}
define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_w_0
; CHECK: ret <16 x i16> %v
; CHECK-NEXT: ret <16 x i16> %v
%1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_w_15
; CHECK: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK: ret <16 x i16> %1
; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <16 x i16> %1
%1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_w_64
; CHECK: ret <16 x i16> zeroinitializer
; CHECK-NEXT: ret <16 x i16> zeroinitializer
%1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
ret <16 x i16> %1
}
define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_d_0
; CHECK: ret <8 x i32> %v
; CHECK-NEXT: ret <8 x i32> %v
%1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_d_15
; CHECK: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK: ret <8 x i32> %1
; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: ret <8 x i32> %1
%1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_d_64
; CHECK: ret <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x i32> zeroinitializer
%1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
ret <8 x i32> %1
}
define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_q_0
; CHECK: ret <4 x i64> %v
; CHECK-NEXT: ret <4 x i64> %v
%1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_q_15
; CHECK: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
; CHECK: ret <4 x i64> %1
; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT: ret <4 x i64> %1
%1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrli_q_64
; CHECK: ret <4 x i64> zeroinitializer
; CHECK-NEXT: ret <4 x i64> zeroinitializer
%1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
ret <4 x i64> %1
}
@ -143,132 +143,132 @@ define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) nounwind readnone uwtable {
define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_w_0
; CHECK: ret <8 x i16> %v
; CHECK-NEXT: ret <8 x i16> %v
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
ret <8 x i16> %1
}
define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_w_15
; CHECK: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK: ret <8 x i16> %1
; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <8 x i16> %1
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
ret <8 x i16> %1
}
define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_w_64
; CHECK: ret <8 x i16> zeroinitializer
; CHECK-NEXT: ret <8 x i16> zeroinitializer
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
ret <8 x i16> %1
}
define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_d_0
; CHECK: ret <4 x i32> %v
; CHECK-NEXT: ret <4 x i32> %v
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
ret <4 x i32> %1
}
define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_d_15
; CHECK: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
; CHECK: ret <4 x i32> %1
; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: ret <4 x i32> %1
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
ret <4 x i32> %1
}
define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_d_64
; CHECK: ret <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> zeroinitializer
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
ret <4 x i32> %1
}
define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_q_0
; CHECK: ret <2 x i64> %v
; CHECK-NEXT: ret <2 x i64> %v
%1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
ret <2 x i64> %1
}
define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_q_15
; CHECK: %1 = shl <2 x i64> %v, <i64 15, i64 15>
; CHECK: ret <2 x i64> %1
; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
; CHECK-NEXT: ret <2 x i64> %1
%1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
ret <2 x i64> %1
}
define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_pslli_q_64
; CHECK: ret <2 x i64> zeroinitializer
; CHECK-NEXT: ret <2 x i64> zeroinitializer
%1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
ret <2 x i64> %1
}
define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_w_0
; CHECK: ret <16 x i16> %v
; CHECK-NEXT: ret <16 x i16> %v
%1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
ret <16 x i16> %1
}
define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_w_15
; CHECK: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK: ret <16 x i16> %1
; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <16 x i16> %1
%1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
ret <16 x i16> %1
}
define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_w_64
; CHECK: ret <16 x i16> zeroinitializer
; CHECK-NEXT: ret <16 x i16> zeroinitializer
%1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
ret <16 x i16> %1
}
define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_d_0
; CHECK: ret <8 x i32> %v
; CHECK-NEXT: ret <8 x i32> %v
%1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
ret <8 x i32> %1
}
define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_d_15
; CHECK: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK: ret <8 x i32> %1
; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: ret <8 x i32> %1
%1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
ret <8 x i32> %1
}
define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_d_64
; CHECK: ret <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x i32> zeroinitializer
%1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
ret <8 x i32> %1
}
define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_q_0
; CHECK: ret <4 x i64> %v
; CHECK-NEXT: ret <4 x i64> %v
%1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
ret <4 x i64> %1
}
define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_q_15
; CHECK: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
; CHECK: ret <4 x i64> %1
; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT: ret <4 x i64> %1
%1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
ret <4 x i64> %1
}
define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_pslli_q_64
; CHECK: ret <4 x i64> zeroinitializer
; CHECK-NEXT: ret <4 x i64> zeroinitializer
%1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
ret <4 x i64> %1
}
@ -277,92 +277,162 @@ define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) nounwind readnone uwtable {
; LSHR - Constant Vector
;
define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_w_0
; CHECK-NEXT: ret <8 x i16> %v
%1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_w_15
; CHECK: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK: ret <8 x i16> %1
; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <8 x i16> %1
%1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_w_15_splat
; CHECK-NEXT: ret <8 x i16> zeroinitializer
%1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_w_64
; CHECK: ret <8 x i16> zeroinitializer
; CHECK-NEXT: ret <8 x i16> zeroinitializer
%1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_d_0
; CHECK-NEXT: ret <4 x i32> %v
%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_d_15
; CHECK: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
; CHECK: ret <4 x i32> %1
; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: ret <4 x i32> %1
%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_d_15_splat
; CHECK-NEXT: ret <4 x i32> zeroinitializer
%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_d_64
; CHECK: ret <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> zeroinitializer
%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_q_0
; CHECK-NEXT: ret <2 x i64> %v
%1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_q_15
; CHECK: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
; CHECK: ret <2 x i64> %1
; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
; CHECK-NEXT: ret <2 x i64> %1
%1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psrl_q_64
; CHECK: ret <2 x i64> zeroinitializer
; CHECK-NEXT: ret <2 x i64> zeroinitializer
%1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <2 x i64> %1
}
define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_w_0
; CHECK-NEXT: ret <16 x i16> %v
%1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_w_15
; CHECK: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK: ret <16 x i16> %1
; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <16 x i16> %1
%1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_w_15_splat
; CHECK-NEXT: ret <16 x i16> zeroinitializer
%1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_w_64
; CHECK: ret <16 x i16> zeroinitializer
; CHECK-NEXT: ret <16 x i16> zeroinitializer
%1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_d_0
; CHECK-NEXT: ret <8 x i32> %v
%1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_d_15
; CHECK: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK: ret <8 x i32> %1
; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: ret <8 x i32> %1
%1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_d_15_splat
; CHECK-NEXT: ret <8 x i32> zeroinitializer
%1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_d_64
; CHECK: ret <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x i32> zeroinitializer
%1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_q_0
; CHECK-NEXT: ret <4 x i64> %v
%1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_q_15
; CHECK: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
; CHECK: ret <4 x i64> %1
; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT: ret <4 x i64> %1
%1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psrl_q_64
; CHECK: ret <4 x i64> zeroinitializer
; CHECK-NEXT: ret <4 x i64> zeroinitializer
%1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <4 x i64> %1
}
@ -371,92 +441,162 @@ define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) nounwind readnone uwtable {
; SHL - Constant Vector
;
define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_w_0
; CHECK-NEXT: ret <8 x i16> %v
%1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_w_15
; CHECK: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK: ret <8 x i16> %1
; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <8 x i16> %1
%1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_w_15_splat
; CHECK-NEXT: ret <8 x i16> zeroinitializer
%1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_w_64
; CHECK: ret <8 x i16> zeroinitializer
; CHECK-NEXT: ret <8 x i16> zeroinitializer
%1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_d_0
; CHECK-NEXT: ret <4 x i32> %v
%1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_d_15
; CHECK: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
; CHECK: ret <4 x i32> %1
; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: ret <4 x i32> %1
%1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_d_15_splat
; CHECK-NEXT: ret <4 x i32> zeroinitializer
%1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_d_64
; CHECK: ret <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> zeroinitializer
%1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_q_0
; CHECK-NEXT: ret <2 x i64> %v
%1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_q_15
; CHECK: %1 = shl <2 x i64> %v, <i64 15, i64 15>
; CHECK: ret <2 x i64> %1
; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
; CHECK-NEXT: ret <2 x i64> %1
%1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @sse2_psll_q_64
; CHECK: ret <2 x i64> zeroinitializer
; CHECK-NEXT: ret <2 x i64> zeroinitializer
%1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <2 x i64> %1
}
define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_w_0
; CHECK-NEXT: ret <16 x i16> %v
%1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_w_15
; CHECK: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK: ret <16 x i16> %1
; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <16 x i16> %1
%1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_w_15_splat
; CHECK-NEXT: ret <16 x i16> zeroinitializer
%1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_w_64
; CHECK: ret <16 x i16> zeroinitializer
; CHECK-NEXT: ret <16 x i16> zeroinitializer
%1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_d_0
; CHECK-NEXT: ret <8 x i32> %v
%1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_d_15
; CHECK: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK: ret <8 x i32> %1
; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: ret <8 x i32> %1
%1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_d_15_splat
; CHECK-NEXT: ret <8 x i32> zeroinitializer
%1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_d_64
; CHECK: ret <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x i32> zeroinitializer
%1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_q_0
; CHECK-NEXT: ret <4 x i64> %v
%1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_q_15
; CHECK: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
; CHECK: ret <4 x i64> %1
; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT: ret <4 x i64> %1
%1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) nounwind readnone uwtable {
; CHECK-LABEL: @avx2_psll_q_64
; CHECK: ret <4 x i64> zeroinitializer
; CHECK-NEXT: ret <4 x i64> zeroinitializer
%1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <4 x i64> %1
}
@ -660,6 +800,7 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1