[X86][SSE] Add _mm_undefined_* intrinsics

Added missing SSE/AVX 'undefined' intrinsics (PR24040):

_mm_undefined_pd, _mm_undefined_ps + _mm_undefined_si128
_mm256_undefined_pd, _mm256_undefined_ps + _mm256_undefined_si256
_mm512_undefined, _mm512_undefined_ps, _mm512_undefined_pd + _mm512_undefined_epi32

Added builtin intrinsicss:

__builtin_ia32_undef128, __builtin_ia32_undef256 + __builtin_ia32_undef512

Differential Revision: http://reviews.llvm.org/D12052

llvm-svn: 246083
This commit is contained in:
Simon Pilgrim 2015-08-26 21:17:12 +00:00
parent 0ab4b5b52e
commit 5aba9925c0
9 changed files with 130 additions and 0 deletions

View File

@ -30,6 +30,12 @@
// can use it?
BUILTIN(__builtin_cpu_supports, "bcC*", "nc")
// Undefined Values
//
TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "nc", "")
TARGET_BUILTIN(__builtin_ia32_undef256, "V4d", "nc", "")
TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "nc", "")
// 3DNow!
//
TARGET_BUILTIN(__builtin_ia32_femms, "v", "", "3dnow")

View File

@ -6047,6 +6047,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
return Builder.CreateCall(F, {Address, RW, Locality, Data});
}
case X86::BI__builtin_ia32_undef128:
case X86::BI__builtin_ia32_undef256:
case X86::BI__builtin_ia32_undef512:
return UndefValue::get(ConvertType(E->getType()));
case X86::BI__builtin_ia32_vec_init_v8qi:
case X86::BI__builtin_ia32_vec_init_v4hi:
case X86::BI__builtin_ia32_vec_init_v2si:

View File

@ -57,6 +57,30 @@ _mm512_setzero_si512(void)
return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_undefined_pd()
{
return (__m512d)__builtin_ia32_undef512();
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_undefined()
{
return (__m512)__builtin_ia32_undef512();
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_undefined_ps()
{
return (__m512)__builtin_ia32_undef512();
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_undefined_epi32()
{
return (__m512i)__builtin_ia32_undef512();
}
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
{

View File

@ -900,6 +900,24 @@ _mm256_stream_ps(float *__p, __m256 __a)
}
/* Create vectors */
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_undefined_pd()
{
return (__m256d)__builtin_ia32_undef256();
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_undefined_ps()
{
return (__m256)__builtin_ia32_undef256();
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_undefined_si256()
{
return (__m256i)__builtin_ia32_undef256();
}
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_set_pd(double __a, double __b, double __c, double __d)
{

View File

@ -522,6 +522,12 @@ _mm_loadl_pd(__m128d __a, double const *__dp)
return (__m128d){ __u, __a[1] };
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_undefined_pd()
{
return (__m128d)__builtin_ia32_undef128();
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_set_sd(double __w)
{
@ -1115,6 +1121,12 @@ _mm_loadl_epi64(__m128i const *__p)
return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_undefined_si128()
{
return (__m128i)__builtin_ia32_undef128();
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_set_epi64x(long long q1, long long q0)
{

View File

@ -576,6 +576,12 @@ _mm_loadr_ps(const float *__p)
return __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_undefined_ps()
{
return (__m128)__builtin_ia32_undef128();
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_set_ss(float __w)
{

View File

@ -147,3 +147,21 @@ __m256i test_256_insert_epi64(__m256i __a) {
// CHECK: insertelement <4 x i64> {{.*}}, i64 {{.*}}, i32 {{.*}}
return _mm256_insert_epi64(__a, 42, 3);
}
__m256 test_mm256_undefined_ps() {
// CHECK-LABEL: @test_mm256_undefined_ps
// CHECK: ret <8 x float> undef
return _mm256_undefined_ps();
}
__m256d test_mm256_undefined_pd() {
// CHECK-LABEL: @test_mm256_undefined_pd
// CHECK: ret <4 x double> undef
return _mm256_undefined_pd();
}
__m256i test_mm256_undefined_si256() {
// CHECK-LABEL: @test_mm256_undefined_si256
// CHECK: ret <4 x i64> undef
return _mm256_undefined_si256();
}

View File

@ -1875,3 +1875,27 @@ __m128d test_mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK: @llvm.x86.avx512.mask.min.sd.round
return _mm_maskz_min_sd(__U,__A,__B);
}
__m512 test_mm512_undefined() {
// CHECK-LABEL: @test_mm512_undefined
// CHECK: ret <16 x float> undef
return _mm512_undefined();
}
__m512 test_mm512_undefined_ps() {
// CHECK-LABEL: @test_mm512_undefined_ps
// CHECK: ret <16 x float> undef
return _mm512_undefined_ps();
}
__m512d test_mm512_undefined_pd() {
// CHECK-LABEL: @test_mm512_undefined_pd
// CHECK: ret <8 x double> undef
return _mm512_undefined_pd();
}
__m512i test_mm512_undefined_epi32() {
// CHECK-LABEL: @test_mm512_undefined_epi32
// CHECK: ret <8 x i64> undef
return _mm512_undefined_epi32();
}

View File

@ -649,3 +649,21 @@ __m128i test_mm_cvtepu32_epi64(__m128i a) {
// CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}})
return _mm_cvtepu32_epi64(a);
}
__m128 test_mm_undefined_ps() {
// CHECK-LABEL: @test_mm_undefined_ps
// CHECK: ret <4 x float> undef
return _mm_undefined_ps();
}
__m128d test_mm_undefined_pd() {
// CHECK-LABEL: @test_mm_undefined_pd
// CHECK: ret <2 x double> undef
return _mm_undefined_pd();
}
__m128i test_mm_undefined_si128() {
// CHECK-LABEL: @test_mm_undefined_si128
// CHECK: ret <2 x i64> undef
return _mm_undefined_si128();
}