[X86] Convert palignr builtin handling to use shuffle form of right shift instead of intrinsics. This should allow the instrinsics to removed from the backend.

llvm-svn: 229474
This commit is contained in:
Craig Topper 2015-02-17 07:18:01 +00:00
parent f23604d0de
commit 96f9a573b5
3 changed files with 19 additions and 38 deletions

View File

@ -5936,50 +5936,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
unsigned NumLanes = NumElts / 16;
unsigned NumLaneElts = NumElts / NumLanes;
// If palignr is shifting the pair of input vectors less than the size of
// a lane, emit a shuffle instruction.
if (ShiftVal <= NumLaneElts) {
SmallVector<llvm::Constant*, 32> Indices;
// 256-bit palignr operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
unsigned Idx = ShiftVal + i;
if (Idx >= NumLaneElts)
Idx += NumElts - NumLaneElts; // End of lane, switch operand.
Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
}
}
Value* SV = llvm::ConstantVector::get(Indices);
return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
}
// If palignr is shifting the pair of vectors more than the size of two
// lanes, emit zero.
if (ShiftVal >= (2 * NumLaneElts))
return llvm::Constant::getNullValue(ConvertType(E->getType()));
// If palignr is shifting the pair of input vectors more than one lane,
// but less than two lanes, emit a shift.
llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, NumElts/8);
Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
Ops[1] = llvm::ConstantInt::get(Int32Ty, (ShiftVal-NumLaneElts) * 8);
Intrinsic::ID ID;
switch (BuiltinID) {
default: llvm_unreachable("Unsupported intrinsic!");
case X86::BI__builtin_ia32_palignr128:
ID = Intrinsic::x86_sse2_psrl_dq;
break;
case X86::BI__builtin_ia32_palignr256:
ID = Intrinsic::x86_avx2_psrl_dq;
break;
// but less than two lanes, convert to shifting in zeroes.
if (ShiftVal > NumLaneElts) {
ShiftVal -= NumLaneElts;
Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
}
// create i32 constant
llvm::Function *F = CGM.getIntrinsic(ID);
return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr");
SmallVector<llvm::Constant*, 32> Indices;
// 256-bit palignr operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
unsigned Idx = ShiftVal + i;
if (Idx >= NumLaneElts)
Idx += NumElts - NumLaneElts; // End of lane, switch operand.
Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
}
}
Value* SV = llvm::ConstantVector::get(Indices);
return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
}
case X86::BI__builtin_ia32_pslldqi256: {
// Shift value is in bits so divide by 8.

View File

@ -96,7 +96,7 @@ __m256i test_mm256_alignr_epi8(__m256i a, __m256i b) {
}
__m256i test2_mm256_alignr_epi8(__m256i a, __m256i b) {
// CHECK: @llvm.x86.avx2.psrl.dq({{.*}}, i32 8)
// CHECK: shufflevector <32 x i8> %5, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
return _mm256_alignr_epi8(a, b, 17);
}

View File

@ -574,6 +574,6 @@ __m128i test_mm_alignr_epi8(__m128i a, __m128i b) {
}
__m128i test2_mm_alignr_epi8(__m128i a, __m128i b) {
// CHECK: @llvm.x86.sse2.psrl.dq({{.*}}, i32 8)
// CHECK: shufflevector <16 x i8> %5, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
return _mm_alignr_epi8(a, b, 17);
}