From 63a951eb1c59a1f4e4a66278ce1a322867700109 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Wed, 14 Jan 2015 01:31:17 +0000 Subject: [PATCH] [AVX512] Add FP unpack intrinsics These are implemented with __builtin_shufflevector just like AVX. We have some tests on the LLVM side to assert that these shufflevectors do indeed generate the corresponding unpck instruction. Part of llvm-svn: 225922 --- clang/lib/Headers/avx512fintrin.h | 33 +++++++++++++++++++++++++++ clang/test/CodeGen/avx512f-builtins.c | 28 +++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9591dcf37a55..9c80710110b0 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -839,6 +839,39 @@ _mm512_cvt_roundpd_epu32(__m512d __A, const int __R) __R); } +/* Unpack and Interleave */ +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_unpackhi_pd(__m512d __a, __m512d __b) +{ + return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); +} + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_unpacklo_pd(__m512d __a, __m512d __b) +{ + return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_unpackhi_ps(__m512 __a, __m512 __b) +{ + return __builtin_shufflevector(__a, __b, + 2, 18, 3, 19, + 2+4, 18+4, 3+4, 19+4, + 2+8, 18+8, 3+8, 19+8, + 2+12, 18+12, 3+12, 19+12); +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_unpacklo_ps(__m512 __a, __m512 __b) +{ + return __builtin_shufflevector(__a, __b, + 0, 16, 1, 17, + 0+4, 16+4, 1+4, 17+4, + 0+8, 16+8, 1+8, 17+8, + 0+12, 16+12, 1+12, 17+12); +} + /* Bit Test */ static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__)) diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index ba05f4abdc8c..8bb013fef731 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -182,3 +182,31 @@ __mmask8 test_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.512 return (__mmask8)_mm512_cmpeq_epi64_mask(__a, __b); } + +__m512d test_mm512_unpackhi_pd(__m512d a, __m512d b) +{ + // CHECK-LABEL: @test_mm512_unpackhi_pd + // CHECK: shufflevector <8 x double> {{.*}} + return _mm512_unpackhi_pd(a, b); +} + +__m512d test_mm512_unpacklo_pd(__m512d a, __m512d b) +{ + // CHECK-LABEL: @test_mm512_unpacklo_pd + // CHECK: shufflevector <8 x double> {{.*}} + return _mm512_unpacklo_pd(a, b); +} + +__m512d test_mm512_unpackhi_ps(__m512d a, __m512d b) +{ + // CHECK-LABEL: @test_mm512_unpackhi_ps + // CHECK: shufflevector <16 x float> {{.*}} + return _mm512_unpackhi_ps(a, b); +} + +__m512d test_mm512_unpacklo_ps(__m512d a, __m512d b) +{ + // CHECK-LABEL: @test_mm512_unpacklo_ps + // CHECK: shufflevector <16 x float> {{.*}} + return _mm512_unpacklo_ps(a, b); +}