Add vec_insert4b and vec_extract4b functions to altivec.h
Add builtins for the functions and custom codegen mapping the builtins to their corresponding intrinsics and handling the endian related swapping. https://reviews.llvm.org/D26546 llvm-svn: 291179
This commit is contained in:
parent
8b8a600d92
commit
96d9e0ec05
|
@ -417,6 +417,9 @@ BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "")
|
|||
BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "")
|
||||
BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "")
|
||||
|
||||
BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "")
|
||||
BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "")
|
||||
|
||||
// HTM builtins
|
||||
BUILTIN(__builtin_tbegin, "UiUIi", "")
|
||||
BUILTIN(__builtin_tend, "UiUIi", "")
|
||||
|
|
|
@ -35,6 +35,11 @@ using namespace clang;
|
|||
using namespace CodeGen;
|
||||
using namespace llvm;
|
||||
|
||||
static
|
||||
int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
|
||||
return std::min(High, std::max(Low, Value));
|
||||
}
|
||||
|
||||
/// getBuiltinLibFunction - Given a builtin id for a function like
|
||||
/// "__builtin_fabsf", return a Function* for "fabsf".
|
||||
llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
|
||||
|
@ -8191,6 +8196,85 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
llvm_unreachable("Unknown FMA operation");
|
||||
return nullptr; // Suppress no-return warning
|
||||
}
|
||||
|
||||
case PPC::BI__builtin_vsx_insertword: {
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
|
||||
|
||||
// Third argument is a compile time constant int. It must be clamped to
|
||||
// to the range [0, 12].
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
|
||||
assert(ArgCI &&
|
||||
"Third arg to xxinsertw intrinsic must be constant integer");
|
||||
const int64_t MaxIndex = 12;
|
||||
int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
|
||||
|
||||
// The builtin semantics don't exactly match the xxinsertw instructions
|
||||
// semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
|
||||
// word from the first argument, and inserts it in the second argument. The
|
||||
// instruction extracts the word from its second input register and inserts
|
||||
// it into its first input register, so swap the first and second arguments.
|
||||
std::swap(Ops[0], Ops[1]);
|
||||
|
||||
// Need to cast the second argument from a vector of unsigned int to a
|
||||
// vector of long long.
|
||||
Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
|
||||
|
||||
if (getTarget().isLittleEndian()) {
|
||||
// Create a shuffle mask of (1, 0)
|
||||
Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
|
||||
ConstantInt::get(Int32Ty, 0)
|
||||
};
|
||||
Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
|
||||
|
||||
// Reverse the double words in the vector we will extract from.
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
|
||||
Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
|
||||
|
||||
// Reverse the index.
|
||||
Index = MaxIndex - Index;
|
||||
}
|
||||
|
||||
// Intrinsic expects the first arg to be a vector of int.
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
|
||||
Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
|
||||
return Builder.CreateCall(F, Ops);
|
||||
}
|
||||
|
||||
case PPC::BI__builtin_vsx_extractuword: {
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
|
||||
|
||||
// Intrinsic expects the first argument to be a vector of doublewords.
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
|
||||
|
||||
// The second argument is a compile time constant int that needs to
|
||||
// be clamped to the range [0, 12].
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
|
||||
assert(ArgCI &&
|
||||
"Second Arg to xxextractuw intrinsic must be a constant integer!");
|
||||
const int64_t MaxIndex = 12;
|
||||
int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
|
||||
|
||||
if (getTarget().isLittleEndian()) {
|
||||
// Reverse the index.
|
||||
Index = MaxIndex - Index;
|
||||
Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
|
||||
|
||||
// Emit the call, then reverse the double words of the results vector.
|
||||
Value *Call = Builder.CreateCall(F, Ops);
|
||||
|
||||
// Create a shuffle mask of (1, 0)
|
||||
Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
|
||||
ConstantInt::get(Int32Ty, 0)
|
||||
};
|
||||
Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
|
||||
|
||||
Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
|
||||
return ShuffleCall;
|
||||
} else {
|
||||
Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
|
||||
return Builder.CreateCall(F, Ops);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -12574,6 +12574,9 @@ static __inline__ float __ATTRS_o_ai vec_extract(vector float __a, int __b) {
|
|||
|
||||
#ifdef __POWER9_VECTOR__
|
||||
|
||||
#define vec_insert4b __builtin_vsx_insertword
|
||||
#define vec_extract4b __builtin_vsx_extractuword
|
||||
|
||||
/* vec_extract_exp */
|
||||
|
||||
static __inline__ vector unsigned int __ATTRS_o_ai
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
// REQUIRES: powerpc-registered-target
|
||||
|
||||
// RUN: %clang_cc1 -faltivec -target-feature +power9-vector \
|
||||
// RUN: -triple powerpc64-unknown-unknown -fsyntax-only \
|
||||
// RUN: -Wall -Werror -verify %s
|
||||
|
||||
// RUN: %clang_cc1 -faltivec -target-feature +power9-vector \
|
||||
// RUN: -triple powerpc64le-unknown-unknown -fsyntax-only \
|
||||
// RUN: -Wall -Werror -verify %s
|
||||
|
||||
#include <altivec.h>
|
||||
|
||||
extern vector signed int vsi;
|
||||
extern vector unsigned char vuc;
|
||||
|
||||
void testInsertWord1(void) {
|
||||
int index = 5;
|
||||
vector unsigned char v1 = vec_insert4b(vsi, vuc, index); // expected-error {{argument to '__builtin_vsx_insertword' must be a constant integer}}
|
||||
vector unsigned long long v2 = vec_extract4b(vuc, index); // expected-error {{argument to '__builtin_vsx_extractuword' must be a constant integer}}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
// REQUIRES: powerpc-registered-target
|
||||
// XFAIL: powerpc
|
||||
|
||||
// RUN: %clang -faltivec -target powerpc64le-unknown-unknown -mcpu=power8 \
|
||||
// RUN: -Wall -Wextra -c %s
|
||||
// RUN: %clang -faltivec -target powerpc64-unknown-unknown -mcpu=power8 \
|
||||
// RUN: -Wall -Wextra -c %s
|
||||
|
||||
// Expect the compile to fail with "cannot compile this builtin function yet"
|
||||
extern vector signed int vsi;
|
||||
extern vector unsigned char vuc;
|
||||
|
||||
vector unsigned long long testExtractWord(void) {
|
||||
return __builtin_vsx_extractuword(vuc, 12);
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
// REQUIRES: powerpc-registered-target
|
||||
// XFAIL: powerpc
|
||||
|
||||
// RUN: %clang -faltivec -target powerpc64le-unknown-unknown -mcpu=power8 \
|
||||
// RUN: -Wall -Werror -c %s
|
||||
|
||||
// RUN: %clang -faltivec -target powerpc64-unknown-unknown -mcpu=power8 \
|
||||
// RUN: -Wall -Werror -c %s
|
||||
|
||||
// expect to fail with diagnostic: "cannot compile this builtin function yet"
|
||||
extern vector signed int vsi;
|
||||
extern vector unsigned char vuc;
|
||||
|
||||
vector unsigned char testInsertWord(void) {
|
||||
return __builtin_vsx_insertword(vsi, vuc, 0);
|
||||
}
|
|
@ -1166,17 +1166,52 @@ vector float test114(void) {
|
|||
// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
|
||||
// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
|
||||
// CHECK-BE-NEXT: ret <4 x float>
|
||||
// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
|
||||
// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
|
||||
// CHECK-LE-NEXT: ret <4 x float>
|
||||
// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
|
||||
// CHECK: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
|
||||
// CHECK-NEXT: ret <4 x float>
|
||||
return vec_extract_fp32_from_shorth(vusa);
|
||||
}
|
||||
vector float test115(void) {
|
||||
// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
|
||||
// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
|
||||
// CHECK-BE-NEXT: ret <4 x float>
|
||||
// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
|
||||
// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
|
||||
// CHECK-LE-NEXT: ret <4 x float>
|
||||
// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
|
||||
// CHECK: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
|
||||
// CHECK-NEXT: ret <4 x float>
|
||||
return vec_extract_fp32_from_shortl(vusa);
|
||||
}
|
||||
vector unsigned char test116(void) {
|
||||
// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 7)
|
||||
// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8>
|
||||
// CHECK: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> <i32 1, i32 0>
|
||||
// CHECK-NEXT: [[T2:%.+]] = bitcast <2 x i64> [[T1]] to <4 x i32>
|
||||
// CHECK-NEXT: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> [[T2]], <2 x i64> {{.+}}, i32 5)
|
||||
// CHECK-NEXT: bitcast <4 x i32> [[T3]] to <16 x i8>
|
||||
return vec_insert4b(vuia, vuca, 7);
|
||||
}
|
||||
vector unsigned char test117(void) {
|
||||
// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 12)
|
||||
// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8>
|
||||
// CHECK: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> <i32 1, i32 0>
|
||||
// CHECK-NEXT: [[T2:%.+]] = bitcast <2 x i64> [[T1]] to <4 x i32>
|
||||
// CHECK-NEXT: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> [[T2]], <2 x i64> {{.+}}, i32 0)
|
||||
// CHECK-NEXT: bitcast <4 x i32> [[T3]] to <16 x i8>
|
||||
return vec_insert4b(vuia, vuca, 13);
|
||||
}
|
||||
vector unsigned long long test118(void) {
|
||||
// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 11)
|
||||
// CHECK-BE-NEXT: ret <2 x i64>
|
||||
// CHECK: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 1)
|
||||
// CHECK-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> <i32 1, i32 0>
|
||||
// CHECK-NEXT: ret <2 x i64>
|
||||
return vec_extract4b(vuca, 11);
|
||||
}
|
||||
vector unsigned long long test119(void) {
|
||||
// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 0)
|
||||
// CHECK-BE-NEXT: ret <2 x i64>
|
||||
// CHECK: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 12)
|
||||
// CHECK-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> <i32 1, i32 0>
|
||||
// CHECK-NEXT: ret <2 x i64>
|
||||
return vec_extract4b(vuca, -5);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue