diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index e487e487ca5d..1b3728ff6849 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -369,6 +369,14 @@ BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "") BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "") BUILTIN(__builtin_ia32_crc32di, "ULLiULLiULLi", "") +// SSE4a +BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLicc", "") +BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "") +BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLicc", "") +BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "") +BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "") +BUILTIN(__builtin_ia32_movntss, "vf*V4f", "") + // AES BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "") BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "") diff --git a/clang/lib/Headers/ammintrin.h b/clang/lib/Headers/ammintrin.h new file mode 100644 index 000000000000..d87b9cde4405 --- /dev/null +++ b/clang/lib/Headers/ammintrin.h @@ -0,0 +1,68 @@ +/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __AMMINTRIN_H +#define __AMMINTRIN_H + +#ifndef __SSE4A__ +#error "SSE4A instruction set not enabled" +#else + +#include + +#define _mm_extracti_si64(x, len, idx) \ + ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ + (char)(len), (char)(idx))) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_extract_si64(__m128i __x, __m128i __y) +{ + return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); +} + +#define _mm_inserti_si64(x, y, len, idx) \ + ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ + (__v2di)(__m128i)(y), \ + (char)(len), (char)(idx))) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_insert_si64(__m128i __x, __m128i __y) +{ + return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_sd(double *__p, __m128d __a) +{ + __builtin_ia32_movntsd(__p, (__v2df)__a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_ss(float *__p, __m128 __a) +{ + __builtin_ia32_movntss(__p, (__v4sf)__a); +} + +#endif /* __SSE4A__ */ + +#endif /* __AMMINTRIN_H */ diff --git a/clang/lib/Headers/x86intrin.h b/clang/lib/Headers/x86intrin.h index f5e4d883e8b2..131e433cf52c 100644 --- a/clang/lib/Headers/x86intrin.h +++ b/clang/lib/Headers/x86intrin.h @@ -46,10 +46,14 @@ #include #endif +#ifdef __SSE4A__ +#include +#endif + #ifdef __FMA4__ #include #endif -// FIXME: SSE4A, XOP, LWP, ABM +// FIXME: XOP, LWP, ABM #endif /* __X86INTRIN_H */ diff --git a/clang/test/CodeGen/sse4a-builtins.c b/clang/test/CodeGen/sse4a-builtins.c new file mode 100644 index 000000000000..e1d7e8fb561b --- /dev/null +++ b/clang/test/CodeGen/sse4a-builtins.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4a -g -emit-llvm %s -o - | FileCheck %s + +#include + +__m128i test_extracti_si64(__m128i x) { + return _mm_extracti_si64(x, 3, 2); +// CHECK: @test_extracti_si64 +// CHECK: @llvm.x86.sse4a.extrqi(<2 x i64> %{{[^,]+}}, i8 3, i8 2) +} + +__m128i test_extract_si64(__m128i x, __m128i y) { + return _mm_extract_si64(x, y); +// CHECK: @test_extract_si64 +// CHECK: @llvm.x86.sse4a.extrq(<2 x i64> %{{[^,]+}}, <16 x i8> %{{[^,]+}}) +} + +__m128i test_inserti_si64(__m128i x, __m128i y) { + return _mm_inserti_si64(x, y, 5, 6); +// CHECK: @test_inserti_si64 +// CHECK: @llvm.x86.sse4a.insertqi(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}}, i8 5, i8 6) +} + +__m128i test_insert_si64(__m128i x, __m128i y) { + return _mm_insert_si64(x, y); +// CHECK: @test_insert_si64 +// CHECK: @llvm.x86.sse4a.insertq(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}}) +} + +void test_stream_sd(double *p, __m128d a) { + _mm_stream_sd(p, a); +// CHECK: @test_stream_sd +// CHECK: @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> %{{[^,]+}}) +} + +void test_stream_ss(float *p, __m128 a) { + _mm_stream_ss(p, a); +// CHECK: @test_stream_ss +// CHECK: @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> %{{[^,]+}}) +}