Add an ammintrin.h header for SSE4a intrinsics.

This is a clean-room implementation based on public documentation and
I tried to validate it as much as possible against gcc.

llvm-svn: 157638
This commit is contained in:
Benjamin Kramer 2012-05-29 19:10:17 +00:00
parent 217dc2d8dd
commit ba6e2528fa
4 changed files with 120 additions and 1 deletions

View File

@ -369,6 +369,14 @@ BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "")
BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "")
BUILTIN(__builtin_ia32_crc32di, "ULLiULLiULLi", "")
// SSE4a
BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLicc", "")
BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "")
BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLicc", "")
BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "")
BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "")
BUILTIN(__builtin_ia32_movntss, "vf*V4f", "")
// AES
BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "")
BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "")

View File

@ -0,0 +1,68 @@
/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __AMMINTRIN_H
#define __AMMINTRIN_H
#ifndef __SSE4A__
#error "SSE4A instruction set not enabled"
#else
#include <pmmintrin.h>
#define _mm_extracti_si64(x, len, idx) \
((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
(char)(len), (char)(idx)))
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_extract_si64(__m128i __x, __m128i __y)
{
return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
}
#define _mm_inserti_si64(x, y, len, idx) \
((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
(__v2di)(__m128i)(y), \
(char)(len), (char)(idx)))
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_insert_si64(__m128i __x, __m128i __y)
{
return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_stream_sd(double *__p, __m128d __a)
{
__builtin_ia32_movntsd(__p, (__v2df)__a);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_stream_ss(float *__p, __m128 __a)
{
__builtin_ia32_movntss(__p, (__v4sf)__a);
}
#endif /* __SSE4A__ */
#endif /* __AMMINTRIN_H */

View File

@ -46,10 +46,14 @@
#include <popcntintrin.h>
#endif
#ifdef __SSE4A__
#include <ammintrin.h>
#endif
#ifdef __FMA4__
#include <fma4intrin.h>
#endif
// FIXME: SSE4A, XOP, LWP, ABM
// FIXME: XOP, LWP, ABM
#endif /* __X86INTRIN_H */

View File

@ -0,0 +1,39 @@
// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4a -g -emit-llvm %s -o - | FileCheck %s
#include <ammintrin.h>
__m128i test_extracti_si64(__m128i x) {
return _mm_extracti_si64(x, 3, 2);
// CHECK: @test_extracti_si64
// CHECK: @llvm.x86.sse4a.extrqi(<2 x i64> %{{[^,]+}}, i8 3, i8 2)
}
__m128i test_extract_si64(__m128i x, __m128i y) {
return _mm_extract_si64(x, y);
// CHECK: @test_extract_si64
// CHECK: @llvm.x86.sse4a.extrq(<2 x i64> %{{[^,]+}}, <16 x i8> %{{[^,]+}})
}
__m128i test_inserti_si64(__m128i x, __m128i y) {
return _mm_inserti_si64(x, y, 5, 6);
// CHECK: @test_inserti_si64
// CHECK: @llvm.x86.sse4a.insertqi(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}}, i8 5, i8 6)
}
__m128i test_insert_si64(__m128i x, __m128i y) {
return _mm_insert_si64(x, y);
// CHECK: @test_insert_si64
// CHECK: @llvm.x86.sse4a.insertq(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}})
}
void test_stream_sd(double *p, __m128d a) {
_mm_stream_sd(p, a);
// CHECK: @test_stream_sd
// CHECK: @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> %{{[^,]+}})
}
void test_stream_ss(float *p, __m128 a) {
_mm_stream_ss(p, a);
// CHECK: @test_stream_ss
// CHECK: @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> %{{[^,]+}})
}