Add an ammintrin.h header for SSE4a intrinsics.
This is a clean-room implementation based on public documentation and I tried to validate it as much as possible against gcc. llvm-svn: 157638
This commit is contained in:
parent
217dc2d8dd
commit
ba6e2528fa
|
@ -369,6 +369,14 @@ BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "")
|
|||
BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "")
|
||||
BUILTIN(__builtin_ia32_crc32di, "ULLiULLiULLi", "")
|
||||
|
||||
// SSE4a
|
||||
BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLicc", "")
|
||||
BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "")
|
||||
BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLicc", "")
|
||||
BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "")
|
||||
BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "")
|
||||
BUILTIN(__builtin_ia32_movntss, "vf*V4f", "")
|
||||
|
||||
// AES
|
||||
BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "")
|
||||
BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "")
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __AMMINTRIN_H
|
||||
#define __AMMINTRIN_H
|
||||
|
||||
#ifndef __SSE4A__
|
||||
#error "SSE4A instruction set not enabled"
|
||||
#else
|
||||
|
||||
#include <pmmintrin.h>
|
||||
|
||||
#define _mm_extracti_si64(x, len, idx) \
|
||||
((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
|
||||
(char)(len), (char)(idx)))
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_extract_si64(__m128i __x, __m128i __y)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
|
||||
}
|
||||
|
||||
#define _mm_inserti_si64(x, y, len, idx) \
|
||||
((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
|
||||
(__v2di)(__m128i)(y), \
|
||||
(char)(len), (char)(idx)))
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_insert_si64(__m128i __x, __m128i __y)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_stream_sd(double *__p, __m128d __a)
|
||||
{
|
||||
__builtin_ia32_movntsd(__p, (__v2df)__a);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_stream_ss(float *__p, __m128 __a)
|
||||
{
|
||||
__builtin_ia32_movntss(__p, (__v4sf)__a);
|
||||
}
|
||||
|
||||
#endif /* __SSE4A__ */
|
||||
|
||||
#endif /* __AMMINTRIN_H */
|
|
@ -46,10 +46,14 @@
|
|||
#include <popcntintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __SSE4A__
|
||||
#include <ammintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __FMA4__
|
||||
#include <fma4intrin.h>
|
||||
#endif
|
||||
|
||||
// FIXME: SSE4A, XOP, LWP, ABM
|
||||
// FIXME: XOP, LWP, ABM
|
||||
|
||||
#endif /* __X86INTRIN_H */
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4a -g -emit-llvm %s -o - | FileCheck %s
|
||||
|
||||
#include <ammintrin.h>
|
||||
|
||||
__m128i test_extracti_si64(__m128i x) {
|
||||
return _mm_extracti_si64(x, 3, 2);
|
||||
// CHECK: @test_extracti_si64
|
||||
// CHECK: @llvm.x86.sse4a.extrqi(<2 x i64> %{{[^,]+}}, i8 3, i8 2)
|
||||
}
|
||||
|
||||
__m128i test_extract_si64(__m128i x, __m128i y) {
|
||||
return _mm_extract_si64(x, y);
|
||||
// CHECK: @test_extract_si64
|
||||
// CHECK: @llvm.x86.sse4a.extrq(<2 x i64> %{{[^,]+}}, <16 x i8> %{{[^,]+}})
|
||||
}
|
||||
|
||||
__m128i test_inserti_si64(__m128i x, __m128i y) {
|
||||
return _mm_inserti_si64(x, y, 5, 6);
|
||||
// CHECK: @test_inserti_si64
|
||||
// CHECK: @llvm.x86.sse4a.insertqi(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}}, i8 5, i8 6)
|
||||
}
|
||||
|
||||
__m128i test_insert_si64(__m128i x, __m128i y) {
|
||||
return _mm_insert_si64(x, y);
|
||||
// CHECK: @test_insert_si64
|
||||
// CHECK: @llvm.x86.sse4a.insertq(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}})
|
||||
}
|
||||
|
||||
void test_stream_sd(double *p, __m128d a) {
|
||||
_mm_stream_sd(p, a);
|
||||
// CHECK: @test_stream_sd
|
||||
// CHECK: @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> %{{[^,]+}})
|
||||
}
|
||||
|
||||
void test_stream_ss(float *p, __m128 a) {
|
||||
_mm_stream_ss(p, a);
|
||||
// CHECK: @test_stream_ss
|
||||
// CHECK: @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> %{{[^,]+}})
|
||||
}
|
Loading…
Reference in New Issue