diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index caf4ced92054..57c4dfada873 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -126,9 +126,9 @@ _mm256_adds_epu16(__m256i __a, __m256i __b) return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b); } -#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \ +#define _mm256_alignr_epi8(a, b, n) \ (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \ - (__v32qi)(__m256i)(b), (n)); }) + (__v32qi)(__m256i)(b), (n)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_and_si256(__m256i __a, __m256i __b) @@ -169,7 +169,7 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) (__v32qi)__M); } -#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \ +#define _mm256_blend_epi16(V1, V2, M) \ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(V1), \ (__v16hi)(__m256i)(V2), \ (((M) & 0x01) ? 16 : 0), \ @@ -187,7 +187,7 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) (((M) & 0x10) ? 28 : 12), \ (((M) & 0x20) ? 29 : 13), \ (((M) & 0x40) ? 30 : 14), \ - (((M) & 0x80) ? 31 : 15)); }) + (((M) & 0x80) ? 31 : 15)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpeq_epi8(__m256i __a, __m256i __b) @@ -503,7 +503,7 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); } -#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \ +#define _mm256_shuffle_epi32(a, imm) \ (__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \ (__v8si)_mm256_undefined_si256(), \ 0 + (((imm) >> 0) & 0x3), \ @@ -513,9 +513,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) 4 + (((imm) >> 0) & 0x3), \ 4 + (((imm) >> 2) & 0x3), \ 4 + (((imm) >> 4) & 0x3), \ - 4 + (((imm) >> 6) & 0x3)); }) + 4 + (((imm) >> 6) & 0x3)) -#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \ +#define _mm256_shufflehi_epi16(a, imm) \ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \ (__v16hi)_mm256_undefined_si256(), \ 0, 1, 2, 3, \ @@ -527,9 +527,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) 12 + (((imm) >> 0) & 0x3), \ 12 + (((imm) >> 2) & 0x3), \ 12 + (((imm) >> 4) & 0x3), \ - 12 + (((imm) >> 6) & 0x3)); }) + 12 + (((imm) >> 6) & 0x3)) -#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \ +#define _mm256_shufflelo_epi16(a, imm) \ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \ (__v16hi)_mm256_undefined_si256(), \ 0 + (((imm) >> 0) & 0x3), \ @@ -541,7 +541,7 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) 8 + (((imm) >> 2) & 0x3), \ 8 + (((imm) >> 4) & 0x3), \ 8 + (((imm) >> 6) & 0x3), \ - 12, 13, 14, 15); }) + 12, 13, 14, 15) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sign_epi8(__m256i __a, __m256i __b) @@ -561,7 +561,7 @@ _mm256_sign_epi32(__m256i __a, __m256i __b) return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); } -#define _mm256_slli_si256(a, imm) __extension__ ({ \ +#define _mm256_slli_si256(a, imm) \ (__m256i)__builtin_shufflevector( \ (__v32qi)_mm256_setzero_si256(), \ (__v32qi)(__m256i)(a), \ @@ -596,7 +596,7 @@ _mm256_sign_epi32(__m256i __a, __m256i __b) ((char)(imm)&0xF0) ? 28 : ((char)(imm)>0xC ? 44 : 60) - (char)(imm), \ ((char)(imm)&0xF0) ? 29 : ((char)(imm)>0xD ? 45 : 61) - (char)(imm), \ ((char)(imm)&0xF0) ? 30 : ((char)(imm)>0xE ? 46 : 62) - (char)(imm), \ - ((char)(imm)&0xF0) ? 31 : ((char)(imm)>0xF ? 47 : 63) - (char)(imm)); }) + ((char)(imm)&0xF0) ? 31 : ((char)(imm)>0xF ? 47 : 63) - (char)(imm)) #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count)) @@ -660,7 +660,7 @@ _mm256_sra_epi32(__m256i __a, __m128i __count) return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); } -#define _mm256_srli_si256(a, imm) __extension__ ({ \ +#define _mm256_srli_si256(a, imm) \ (__m256i)__builtin_shufflevector( \ (__v32qi)(__m256i)(a), \ (__v32qi)_mm256_setzero_si256(), \ @@ -695,7 +695,7 @@ _mm256_sra_epi32(__m256i __a, __m128i __count) ((char)(imm)&0xF0) ? 60 : (char)(imm) + ((char)(imm)>0x3 ? 44 : 28), \ ((char)(imm)&0xF0) ? 61 : (char)(imm) + ((char)(imm)>0x2 ? 45 : 29), \ ((char)(imm)&0xF0) ? 62 : (char)(imm) + ((char)(imm)>0x1 ? 46 : 30), \ - ((char)(imm)&0xF0) ? 63 : (char)(imm) + ((char)(imm)>0x0 ? 47 : 31)); }) + ((char)(imm)&0xF0) ? 63 : (char)(imm) + ((char)(imm)>0x0 ? 47 : 31)) #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count)) @@ -874,15 +874,15 @@ _mm256_broadcastsi128_si256(__m128i __X) return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1); } -#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ +#define _mm_blend_epi32(V1, V2, M) \ (__m128i)__builtin_shufflevector((__v4si)(__m128i)(V1), \ (__v4si)(__m128i)(V2), \ (((M) & 0x01) ? 4 : 0), \ (((M) & 0x02) ? 5 : 1), \ (((M) & 0x04) ? 6 : 2), \ - (((M) & 0x08) ? 7 : 3)); }) + (((M) & 0x08) ? 7 : 3)) -#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \ +#define _mm256_blend_epi32(V1, V2, M) \ (__m256i)__builtin_shufflevector((__v8si)(__m256i)(V1), \ (__v8si)(__m256i)(V2), \ (((M) & 0x01) ? 8 : 0), \ @@ -892,7 +892,7 @@ _mm256_broadcastsi128_si256(__m128i __X) (((M) & 0x10) ? 12 : 4), \ (((M) & 0x20) ? 13 : 5), \ (((M) & 0x40) ? 14 : 6), \ - (((M) & 0x80) ? 15 : 7)); }) + (((M) & 0x80) ? 15 : 7)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastb_epi8(__m128i __X) @@ -949,13 +949,13 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); } -#define _mm256_permute4x64_pd(V, M) __extension__ ({ \ +#define _mm256_permute4x64_pd(V, M) \ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \ (__v4df)_mm256_undefined_pd(), \ ((M) >> 0) & 0x3, \ ((M) >> 2) & 0x3, \ ((M) >> 4) & 0x3, \ - ((M) >> 6) & 0x3); }) + ((M) >> 6) & 0x3) static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) @@ -963,30 +963,30 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b); } -#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \ +#define _mm256_permute4x64_epi64(V, M) \ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \ (__v4di)_mm256_undefined_si256(), \ ((M) >> 0) & 0x3, \ ((M) >> 2) & 0x3, \ ((M) >> 4) & 0x3, \ - ((M) >> 6) & 0x3); }) + ((M) >> 6) & 0x3) -#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ - (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); }) +#define _mm256_permute2x128_si256(V1, V2, M) \ + (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)) -#define _mm256_extracti128_si256(V, M) __extension__ ({ \ +#define _mm256_extracti128_si256(V, M) \ (__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \ (__v4di)_mm256_undefined_si256(), \ (((M) & 1) ? 2 : 0), \ - (((M) & 1) ? 3 : 1) ); }) + (((M) & 1) ? 3 : 1) ) -#define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \ +#define _mm256_inserti128_si256(V1, V2, M) \ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V1), \ (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ (((M) & 1) ? 0 : 4), \ (((M) & 1) ? 1 : 5), \ (((M) & 1) ? 4 : 2), \ - (((M) & 1) ? 5 : 3) ); }) + (((M) & 1) ? 5 : 3) ) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskload_epi32(int const *__X, __m256i __M) @@ -1096,212 +1096,212 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y); } -#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i32gather_pd(a, m, i, mask, s) \ (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ - (__v2df)(__m128d)(mask), (s)); }) + (__v2df)(__m128d)(mask), (s)) -#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \ (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ - (__v4df)(__m256d)(mask), (s)); }) + (__v4df)(__m256d)(mask), (s)) -#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i64gather_pd(a, m, i, mask, s) \ (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \ (double const *)(m), \ (__v2di)(__m128i)(i), \ - (__v2df)(__m128d)(mask), (s)); }) + (__v2df)(__m128d)(mask), (s)) -#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \ (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \ (double const *)(m), \ (__v4di)(__m256i)(i), \ - (__v4df)(__m256d)(mask), (s)); }) + (__v4df)(__m256d)(mask), (s)) -#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i32gather_ps(a, m, i, mask, s) \ (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v4si)(__m128i)(i), \ - (__v4sf)(__m128)(mask), (s)); }) + (__v4sf)(__m128)(mask), (s)) -#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \ (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \ (float const *)(m), \ (__v8si)(__m256i)(i), \ - (__v8sf)(__m256)(mask), (s)); }) + (__v8sf)(__m256)(mask), (s)) -#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i64gather_ps(a, m, i, mask, s) \ (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v2di)(__m128i)(i), \ - (__v4sf)(__m128)(mask), (s)); }) + (__v4sf)(__m128)(mask), (s)) -#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v4di)(__m256i)(i), \ - (__v4sf)(__m128)(mask), (s)); }) + (__v4sf)(__m128)(mask), (s)) -#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \ (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v4si)(__m128i)(i), \ - (__v4si)(__m128i)(mask), (s)); }) + (__v4si)(__m128i)(mask), (s)) -#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \ (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \ (int const *)(m), \ (__v8si)(__m256i)(i), \ - (__v8si)(__m256i)(mask), (s)); }) + (__v8si)(__m256i)(mask), (s)) -#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \ (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v2di)(__m128i)(i), \ - (__v4si)(__m128i)(mask), (s)); }) + (__v4si)(__m128i)(mask), (s)) -#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \ (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v4di)(__m256i)(i), \ - (__v4si)(__m128i)(mask), (s)); }) + (__v4si)(__m128i)(mask), (s)) -#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \ (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ - (__v2di)(__m128i)(mask), (s)); }) + (__v2di)(__m128i)(mask), (s)) -#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \ (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ - (__v4di)(__m256i)(mask), (s)); }) + (__v4di)(__m256i)(mask), (s)) -#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ +#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \ (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \ (long long const *)(m), \ (__v2di)(__m128i)(i), \ - (__v2di)(__m128i)(mask), (s)); }) + (__v2di)(__m128i)(mask), (s)) -#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ +#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \ (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \ (long long const *)(m), \ (__v4di)(__m256i)(i), \ - (__v4di)(__m256i)(mask), (s)); }) + (__v4di)(__m256i)(mask), (s)) -#define _mm_i32gather_pd(m, i, s) __extension__ ({ \ +#define _mm_i32gather_pd(m, i, s) \ (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \ _mm_setzero_pd()), \ - (s)); }) + (s)) -#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \ +#define _mm256_i32gather_pd(m, i, s) \ (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \ _mm256_setzero_pd(), \ _CMP_EQ_OQ), \ - (s)); }) + (s)) -#define _mm_i64gather_pd(m, i, s) __extension__ ({ \ +#define _mm_i64gather_pd(m, i, s) \ (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \ (double const *)(m), \ (__v2di)(__m128i)(i), \ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \ _mm_setzero_pd()), \ - (s)); }) + (s)) -#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \ +#define _mm256_i64gather_pd(m, i, s) \ (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \ (double const *)(m), \ (__v4di)(__m256i)(i), \ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \ _mm256_setzero_pd(), \ _CMP_EQ_OQ), \ - (s)); }) + (s)) -#define _mm_i32gather_ps(m, i, s) __extension__ ({ \ +#define _mm_i32gather_ps(m, i, s) \ (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v4si)(__m128i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ - (s)); }) + (s)) -#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \ +#define _mm256_i32gather_ps(m, i, s) \ (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \ (float const *)(m), \ (__v8si)(__m256i)(i), \ (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \ _mm256_setzero_ps(), \ _CMP_EQ_OQ), \ - (s)); }) + (s)) -#define _mm_i64gather_ps(m, i, s) __extension__ ({ \ +#define _mm_i64gather_ps(m, i, s) \ (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v2di)(__m128i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ - (s)); }) + (s)) -#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \ +#define _mm256_i64gather_ps(m, i, s) \ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v4di)(__m256i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ - (s)); }) + (s)) -#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \ +#define _mm_i32gather_epi32(m, i, s) \ (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v4si)(__m128i)(i), \ - (__v4si)_mm_set1_epi32(-1), (s)); }) + (__v4si)_mm_set1_epi32(-1), (s)) -#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \ +#define _mm256_i32gather_epi32(m, i, s) \ (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \ (int const *)(m), (__v8si)(__m256i)(i), \ - (__v8si)_mm256_set1_epi32(-1), (s)); }) + (__v8si)_mm256_set1_epi32(-1), (s)) -#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \ +#define _mm_i64gather_epi32(m, i, s) \ (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v2di)(__m128i)(i), \ - (__v4si)_mm_set1_epi32(-1), (s)); }) + (__v4si)_mm_set1_epi32(-1), (s)) -#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \ +#define _mm256_i64gather_epi32(m, i, s) \ (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v4di)(__m256i)(i), \ - (__v4si)_mm_set1_epi32(-1), (s)); }) + (__v4si)_mm_set1_epi32(-1), (s)) -#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ +#define _mm_i32gather_epi64(m, i, s) \ (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ - (__v2di)_mm_set1_epi64x(-1), (s)); }) + (__v2di)_mm_set1_epi64x(-1), (s)) -#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ +#define _mm256_i32gather_epi64(m, i, s) \ (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ - (__v4di)_mm256_set1_epi64x(-1), (s)); }) + (__v4di)_mm256_set1_epi64x(-1), (s)) -#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ +#define _mm_i64gather_epi64(m, i, s) \ (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \ (long long const *)(m), \ (__v2di)(__m128i)(i), \ - (__v2di)_mm_set1_epi64x(-1), (s)); }) + (__v2di)_mm_set1_epi64x(-1), (s)) -#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ +#define _mm256_i64gather_epi64(m, i, s) \ (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \ (long long const *)(m), \ (__v4di)(__m256i)(i), \ - (__v4di)_mm256_set1_epi64x(-1), (s)); }) + (__v4di)_mm256_set1_epi64x(-1), (s)) #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 8b7952d9f201..fcdae9aa3c54 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -36,45 +36,45 @@ typedef unsigned long long __mmask64; /* Integer compare */ -#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epi8_mask(a, b, p) \ (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)-1); }) + (__mmask64)-1) -#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)(m)); }) + (__mmask64)(m)) -#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epu8_mask(a, b, p) \ (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)-1); }) + (__mmask64)-1) -#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)(m)); }) + (__mmask64)(m)) -#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epi16_mask(a, b, p) \ (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)-1); }) + (__mmask32)-1) -#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)(m)); }) + (__mmask32)(m)) -#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epu16_mask(a, b, p) \ (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)-1); }) + (__mmask32)-1) -#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)(m)); }) + (__mmask32)(m)) #define _mm512_cmpeq_epi8_mask(A, B) \ _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) @@ -1286,7 +1286,7 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) } -#define _mm512_shufflehi_epi16(A, imm) __extension__ ({ \ +#define _mm512_shufflehi_epi16(A, imm) \ (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \ (__v32hi)_mm512_undefined_epi32(), \ 0, 1, 2, 3, \ @@ -1308,21 +1308,21 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) 28 + (((imm) >> 0) & 0x3), \ 28 + (((imm) >> 2) & 0x3), \ 28 + (((imm) >> 4) & 0x3), \ - 28 + (((imm) >> 6) & 0x3)); }) + 28 + (((imm) >> 6) & 0x3)) -#define _mm512_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ +#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflehi_epi16((A), \ (imm)), \ - (__v32hi)(__m512i)(W)); }) + (__v32hi)(__m512i)(W)) -#define _mm512_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ +#define _mm512_maskz_shufflehi_epi16(U, A, imm) \ (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflehi_epi16((A), \ (imm)), \ - (__v32hi)_mm512_setzero_si512()); }) + (__v32hi)_mm512_setzero_si512()) -#define _mm512_shufflelo_epi16(A, imm) __extension__ ({ \ +#define _mm512_shufflelo_epi16(A, imm) \ (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \ (__v32hi)_mm512_undefined_epi32(), \ 0 + (((imm) >> 0) & 0x3), \ @@ -1344,21 +1344,21 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) 24 + (((imm) >> 2) & 0x3), \ 24 + (((imm) >> 4) & 0x3), \ 24 + (((imm) >> 6) & 0x3), \ - 28, 29, 30, 31); }) + 28, 29, 30, 31) -#define _mm512_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ +#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflelo_epi16((A), \ (imm)), \ - (__v32hi)(__m512i)(W)); }) + (__v32hi)(__m512i)(W)) -#define _mm512_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ +#define _mm512_maskz_shufflelo_epi16(U, A, imm) \ (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflelo_epi16((A), \ (imm)), \ - (__v32hi)_mm512_setzero_si512()); }) + (__v32hi)_mm512_setzero_si512()) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sllv_epi16(__m512i __A, __m512i __B) @@ -1426,7 +1426,7 @@ _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) (__v32hi)_mm512_setzero_si512()); } -#define _mm512_bslli_epi128(a, imm) __extension__ ({ \ +#define _mm512_bslli_epi128(a, imm) \ (__m512i)__builtin_shufflevector( \ (__v64qi)_mm512_setzero_si512(), \ (__v64qi)(__m512i)(a), \ @@ -1493,7 +1493,7 @@ _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) ((char)(imm)&0xF0) ? 60 : ((char)(imm)>0xC ? 76 : 124) - (char)(imm), \ ((char)(imm)&0xF0) ? 61 : ((char)(imm)>0xD ? 77 : 125) - (char)(imm), \ ((char)(imm)&0xF0) ? 62 : ((char)(imm)>0xE ? 78 : 126) - (char)(imm), \ - ((char)(imm)&0xF0) ? 63 : ((char)(imm)>0xF ? 79 : 127) - (char)(imm)); }) + ((char)(imm)&0xF0) ? 63 : ((char)(imm)>0xF ? 79 : 127) - (char)(imm)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srlv_epi16(__m512i __A, __m512i __B) @@ -1627,7 +1627,7 @@ _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) (__v32hi)_mm512_setzero_si512()); } -#define _mm512_bsrli_epi128(a, imm) __extension__ ({ \ +#define _mm512_bsrli_epi128(a, imm) \ (__m512i)__builtin_shufflevector( \ (__v64qi)(__m512i)(a), \ (__v64qi)_mm512_setzero_si512(), \ @@ -1694,7 +1694,7 @@ _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) ((char)(imm)&0xF0) ? 124 : (char)(imm) + ((char)(imm)>0x3 ? 108 : 60), \ ((char)(imm)&0xF0) ? 125 : (char)(imm) + ((char)(imm)>0x2 ? 109 : 61), \ ((char)(imm)&0xF0) ? 126 : (char)(imm) + ((char)(imm)>0x1 ? 110 : 62), \ - ((char)(imm)&0xF0) ? 127 : (char)(imm) + ((char)(imm)>0x0 ? 111 : 63)); }) + ((char)(imm)&0xF0) ? 127 : (char)(imm) + ((char)(imm)>0x0 ? 111 : 63)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) @@ -1978,37 +1978,37 @@ _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, (__v32hi)__W); } -#define _mm512_alignr_epi8(A, B, N) __extension__ ({\ +#define _mm512_alignr_epi8(A, B, N) \ (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N)); }) + (__v64qi)(__m512i)(B), (int)(N)) -#define _mm512_mask_alignr_epi8(W, U, A, B, N) __extension__({\ +#define _mm512_mask_alignr_epi8(W, U, A, B, N) \ (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ - (__v64qi)(__m512i)(W)); }) + (__v64qi)(__m512i)(W)) -#define _mm512_maskz_alignr_epi8(U, A, B, N) __extension__({\ +#define _mm512_maskz_alignr_epi8(U, A, B, N) \ (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ - (__v64qi)(__m512i)_mm512_setzero_si512()); }) + (__v64qi)(__m512i)_mm512_setzero_si512()) -#define _mm512_dbsad_epu8(A, B, imm) __extension__ ({\ +#define _mm512_dbsad_epu8(A, B, imm) \ (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), (int)(imm), \ (__v32hi)_mm512_undefined_epi32(), \ - (__mmask32)-1); }) + (__mmask32)-1) -#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) ({\ +#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), (int)(imm), \ (__v32hi)(__m512i)(W), \ - (__mmask32)(U)); }) + (__mmask32)(U)) -#define _mm512_maskz_dbsad_epu8(U, A, B, imm) ({\ +#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), (int)(imm), \ (__v32hi)_mm512_setzero_si512(), \ - (__mmask32)(U)); }) + (__mmask32)(U)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sad_epu8 (__m512i __A, __m512i __B) diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index d873b8c6512d..d7563af9499a 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -226,20 +226,20 @@ _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \ +#define _mm512_cvt_roundpd_epi64(A, R) \ (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \ (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \ (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtpd_epu64 (__m512d __A) { @@ -265,20 +265,20 @@ _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \ +#define _mm512_cvt_roundpd_epu64(A, R) \ (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \ (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \ (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epi64 (__m256 __A) { @@ -304,20 +304,20 @@ _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \ +#define _mm512_cvt_roundps_epi64(A, R) \ (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \ (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_epi64(U, A, R) \ (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epu64 (__m256 __A) { @@ -343,20 +343,20 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \ +#define _mm512_cvt_roundps_epu64(A, R) \ (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \ (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_epu64(U, A, R) \ (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -378,20 +378,20 @@ _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { (__v8df)_mm512_setzero_pd()); } -#define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepi64_pd(A, R) \ (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \ (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \ (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_cvtepi64_ps (__m512i __A) { @@ -417,20 +417,20 @@ _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepi64_ps(A, R) \ (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \ (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \ (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -457,20 +457,20 @@ _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundpd_epi64(A, R) \ (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \ (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \ (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttpd_epu64 (__m512d __A) { @@ -496,20 +496,20 @@ _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundpd_epu64(A, R) \ (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \ (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \ (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epi64 (__m256 __A) { @@ -535,20 +535,20 @@ _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundps_epi64(A, R) \ (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \ (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \ (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epu64 (__m256 __A) { @@ -574,20 +574,20 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundps_epu64(A, R) \ (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \ (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \ (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtepu64_pd (__m512i __A) { @@ -608,21 +608,21 @@ _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { (__v8df)_mm512_setzero_pd()); } -#define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepu64_pd(A, R) \ (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \ (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \ (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m256 __DEFAULT_FN_ATTRS @@ -649,292 +649,292 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepu64_ps(A, R) \ (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \ (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \ (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_range_pd(A, B, C) __extension__ ({ \ +#define _mm512_range_pd(A, B, C) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \ +#define _mm512_mask_range_pd(W, U, A, B, C) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \ +#define _mm512_maskz_range_pd(U, A, B, C) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_range_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \ +#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_range_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_range_ps(A, B, C) __extension__ ({ \ +#define _mm512_range_ps(A, B, C) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm512_mask_range_ps(W, U, A, B, C) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \ +#define _mm512_maskz_range_ps(U, A, B, C) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_range_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \ +#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_range_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm_range_round_ss(A, B, C, R) __extension__ ({ \ +#define _mm_range_round_ss(A, B, C, R) \ (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8) -1, (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) -#define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \ +#define _mm_mask_range_round_ss(W, U, A, B, C, R) \ (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W),\ (__mmask8)(U), (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \ +#define _mm_maskz_range_round_ss(U, A, B, C, R) \ (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) -#define _mm_range_round_sd(A, B, C, R) __extension__ ({ \ +#define _mm_range_round_sd(A, B, C, R) \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8) -1, (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) -#define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \ +#define _mm_mask_range_round_sd(W, U, A, B, C, R) \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W),\ (__mmask8)(U), (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \ +#define _mm_maskz_range_round_sd(U, A, B, C, R) \ (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C),\ - (int)(R)); }) + (int)(R)) #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) -#define _mm512_reduce_pd(A, B) __extension__ ({ \ +#define _mm512_reduce_pd(A, B) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \ +#define _mm512_mask_reduce_pd(W, U, A, B) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \ +#define _mm512_maskz_reduce_pd(U, A, B) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_reduce_ps(A, B) __extension__ ({ \ +#define _mm512_reduce_ps(A, B) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \ +#define _mm512_mask_reduce_ps(W, U, A, B) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \ +#define _mm512_maskz_reduce_ps(U, A, B) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_reduce_round_pd(A, B, R) __extension__ ({\ +#define _mm512_reduce_round_pd(A, B, R) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\ +#define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\ +#define _mm512_maskz_reduce_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_reduce_round_ps(A, B, R) __extension__ ({\ +#define _mm512_reduce_round_ps(A, B, R) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\ +#define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\ +#define _mm512_maskz_reduce_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm_reduce_ss(A, B, C) __extension__ ({ \ +#define _mm_reduce_ss(A, B, C) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ - (int)(C), _MM_FROUND_CUR_DIRECTION); }) + (int)(C), _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \ +#define _mm_mask_reduce_ss(W, U, A, B, C) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(C), _MM_FROUND_CUR_DIRECTION); }) + (int)(C), _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \ +#define _mm_maskz_reduce_ss(U, A, B, C) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(C), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \ +#define _mm_reduce_round_ss(A, B, C, R) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ - (int)(C), (int)(R)); }) + (int)(C), (int)(R)) -#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \ +#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(C), (int)(R)); }) + (int)(C), (int)(R)) -#define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \ +#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(C), (int)(R)); }) + (__mmask8)(U), (int)(C), (int)(R)) -#define _mm_reduce_sd(A, B, C) __extension__ ({ \ +#define _mm_reduce_sd(A, B, C) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(C), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \ +#define _mm_mask_reduce_sd(W, U, A, B, C) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), (__mmask8)(U), \ - (int)(C), _MM_FROUND_CUR_DIRECTION); }) + (int)(C), _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \ +#define _mm_maskz_reduce_sd(U, A, B, C) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \ +#define _mm_reduce_round_sd(A, B, C, R) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(C), (int)(R)); }) + (__mmask8)-1, (int)(C), (int)(R)) -#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \ +#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), (__mmask8)(U), \ - (int)(C), (int)(R)); }) + (int)(C), (int)(R)) -#define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \ +#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(C), (int)(R)); }) - + (__mmask8)(U), (int)(C), (int)(R)) + static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_movepi32_mask (__m512i __A) { @@ -1104,7 +1104,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v8di)_mm512_setzero_si512()); } -#define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \ +#define _mm512_extractf32x8_ps(A, imm) \ (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \ (__v16sf)_mm512_undefined_ps(), \ ((imm) & 1) ? 8 : 0, \ @@ -1114,35 +1114,35 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) ((imm) & 1) ? 12 : 4, \ ((imm) & 1) ? 13 : 5, \ ((imm) & 1) ? 14 : 6, \ - ((imm) & 1) ? 15 : 7); }) + ((imm) & 1) ? 15 : 7) -#define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \ +#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ - (__v8sf)(W)); }) + (__v8sf)(W)) -#define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \ +#define _mm512_maskz_extractf32x8_ps(U, A, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ - (__v8sf)_mm256_setzero_ps()); }) + (__v8sf)_mm256_setzero_ps()) -#define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \ +#define _mm512_extractf64x2_pd(A, imm) \ (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \ (__v8df)_mm512_undefined_pd(), \ 0 + ((imm) & 0x3) * 2, \ - 1 + ((imm) & 0x3) * 2); }) + 1 + ((imm) & 0x3) * 2) -#define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ +#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ - (__v2df)(W)); }) + (__v2df)(W)) -#define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ +#define _mm512_maskz_extractf64x2_pd(U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ - (__v2df)_mm_setzero_pd()); }) + (__v2df)_mm_setzero_pd()) -#define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \ +#define _mm512_extracti32x8_epi32(A, imm) \ (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \ (__v16si)_mm512_undefined_epi32(), \ ((imm) & 1) ? 8 : 0, \ @@ -1152,35 +1152,35 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) ((imm) & 1) ? 12 : 4, \ ((imm) & 1) ? 13 : 5, \ ((imm) & 1) ? 14 : 6, \ - ((imm) & 1) ? 15 : 7); }) + ((imm) & 1) ? 15 : 7) -#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \ +#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ - (__v8si)(W)); }) + (__v8si)(W)) -#define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \ +#define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ - (__v8si)_mm256_setzero_si256()); }) + (__v8si)_mm256_setzero_si256()) -#define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \ +#define _mm512_extracti64x2_epi64(A, imm) \ (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \ (__v8di)_mm512_undefined_epi32(), \ 0 + ((imm) & 0x3) * 2, \ - 1 + ((imm) & 0x3) * 2); }) + 1 + ((imm) & 0x3) * 2) -#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ +#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ - (__v2di)(W)); }) + (__v2di)(W)) -#define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ +#define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ - (__v2di)_mm_setzero_si128()); }) + (__v2di)_mm_setzero_si128()) -#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \ +#define _mm512_insertf32x8(A, B, imm) \ (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ (__v16sf)_mm512_castps256_ps512((__m256)(B)),\ ((imm) & 0x1) ? 0 : 16, \ @@ -1198,19 +1198,19 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) ((imm) & 0x1) ? 20 : 12, \ ((imm) & 0x1) ? 21 : 13, \ ((imm) & 0x1) ? 22 : 14, \ - ((imm) & 0x1) ? 23 : 15); }) + ((imm) & 0x1) ? 23 : 15) -#define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_insertf32x8(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ - (__v16sf)(W)); }) + (__v16sf)(W)) -#define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_insertf32x8(U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) -#define _mm512_insertf64x2(A, B, imm) __extension__ ({ \ +#define _mm512_insertf64x2(A, B, imm) \ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\ (((imm) & 0x3) == 0) ? 8 : 0, \ @@ -1220,19 +1220,19 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (((imm) & 0x3) == 2) ? 8 : 4, \ (((imm) & 0x3) == 2) ? 9 : 5, \ (((imm) & 0x3) == 3) ? 8 : 6, \ - (((imm) & 0x3) == 3) ? 9 : 7); }) + (((imm) & 0x3) == 3) ? 9 : 7) -#define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_insertf64x2(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ - (__v8df)(W)); }) + (__v8df)(W)) -#define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_insertf64x2(U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) -#define _mm512_inserti32x8(A, B, imm) __extension__ ({ \ +#define _mm512_inserti32x8(A, B, imm) \ (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ (__v16si)_mm512_castsi256_si512((__m256i)(B)),\ ((imm) & 0x1) ? 0 : 16, \ @@ -1250,19 +1250,19 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) ((imm) & 0x1) ? 20 : 12, \ ((imm) & 0x1) ? 21 : 13, \ ((imm) & 0x1) ? 22 : 14, \ - ((imm) & 0x1) ? 23 : 15); }) + ((imm) & 0x1) ? 23 : 15) -#define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_inserti32x8(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ - (__v16si)(W)); }) + (__v16si)(W)) -#define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_inserti32x8(U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()); }) + (__v16si)_mm512_setzero_si512()) -#define _mm512_inserti64x2(A, B, imm) __extension__ ({ \ +#define _mm512_inserti64x2(A, B, imm) \ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ (__v8di)_mm512_castsi128_si512((__m128i)(B)),\ (((imm) & 0x3) == 0) ? 8 : 0, \ @@ -1272,49 +1272,49 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (((imm) & 0x3) == 2) ? 8 : 4, \ (((imm) & 0x3) == 2) ? 9 : 5, \ (((imm) & 0x3) == 3) ? 8 : 6, \ - (((imm) & 0x3) == 3) ? 9 : 7); }) + (((imm) & 0x3) == 3) ? 9 : 7) -#define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_inserti64x2(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ - (__v8di)(W)); }) + (__v8di)(W)) -#define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_inserti64x2(U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()); }) + (__v8di)_mm512_setzero_si512()) -#define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ +#define _mm512_mask_fpclass_ps_mask(U, A, imm) \ (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ - (int)(imm), (__mmask16)(U)); }) + (int)(imm), (__mmask16)(U)) -#define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \ +#define _mm512_fpclass_ps_mask(A, imm) \ (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ - (int)(imm), (__mmask16)-1); }) + (int)(imm), (__mmask16)-1) -#define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ +#define _mm512_mask_fpclass_pd_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \ +#define _mm512_fpclass_pd_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \ +#define _mm_fpclass_sd_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \ +#define _mm_mask_fpclass_sd_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \ +#define _mm_fpclass_ss_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \ +#define _mm_mask_fpclass_ss_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h index d02cb3634d28..6348275c8d31 100644 --- a/clang/lib/Headers/avx512erintrin.h +++ b/clang/lib/Headers/avx512erintrin.h @@ -28,20 +28,20 @@ #define __AVX512ERINTRIN_H /* exp2a23 */ -#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \ +#define _mm512_exp2a23_round_pd(A, R) \ (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \ (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \ +#define _mm512_maskz_exp2a23_round_pd(M, A, R) \ (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm512_exp2a23_pd(A) \ _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) @@ -52,20 +52,20 @@ #define _mm512_maskz_exp2a23_pd(M, A) \ _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) -#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \ +#define _mm512_exp2a23_round_ps(A, R) \ (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \ (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \ +#define _mm512_maskz_exp2a23_round_ps(M, A, R) \ (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R)); }) + (__mmask16)(M), (int)(R)) #define _mm512_exp2a23_ps(A) \ _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) @@ -77,20 +77,20 @@ _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) /* rsqrt28 */ -#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \ +#define _mm512_rsqrt28_round_pd(A, R) \ (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \ (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \ +#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \ (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm512_rsqrt28_pd(A) \ _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) @@ -101,20 +101,20 @@ #define _mm512_maskz_rsqrt28_pd(M, A) \ _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) -#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \ +#define _mm512_rsqrt28_round_ps(A, R) \ (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \ (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \ +#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \ (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R)); }) + (__mmask16)(M), (int)(R)) #define _mm512_rsqrt28_ps(A) \ _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) @@ -125,23 +125,23 @@ #define _mm512_maskz_rsqrt28_ps(M, A) \ _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) -#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \ +#define _mm_rsqrt28_round_ss(A, B, R) \ (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \ +#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(S), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \ +#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm_rsqrt28_ss(A, B) \ _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) @@ -152,23 +152,23 @@ #define _mm_maskz_rsqrt28_ss(M, A, B) \ _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) -#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \ +#define _mm_rsqrt28_round_sd(A, B, R) \ (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \ +#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(S), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \ +#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm_rsqrt28_sd(A, B) \ _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) @@ -180,20 +180,20 @@ _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) /* rcp28 */ -#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \ +#define _mm512_rcp28_round_pd(A, R) \ (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_rcp28_round_pd(S, M, A, R) \ (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \ +#define _mm512_maskz_rcp28_round_pd(M, A, R) \ (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm512_rcp28_pd(A) \ _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) @@ -204,20 +204,20 @@ #define _mm512_maskz_rcp28_pd(M, A) \ _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) -#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \ +#define _mm512_rcp28_round_ps(A, R) \ (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \ +#define _mm512_mask_rcp28_round_ps(S, M, A, R) \ (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \ +#define _mm512_maskz_rcp28_round_ps(M, A, R) \ (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R)); }) + (__mmask16)(M), (int)(R)) #define _mm512_rcp28_ps(A) \ _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) @@ -228,23 +228,23 @@ #define _mm512_maskz_rcp28_ps(M, A) \ _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) -#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \ +#define _mm_rcp28_round_ss(A, B, R) \ (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \ +#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \ (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(S), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \ +#define _mm_maskz_rcp28_round_ss(M, A, B, R) \ (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm_rcp28_ss(A, B) \ _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) @@ -255,23 +255,23 @@ #define _mm_maskz_rcp28_ss(M, A, B) \ _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) -#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \ +#define _mm_rcp28_round_sd(A, B, R) \ (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \ +#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \ (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(S), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \ +#define _mm_maskz_rcp28_round_sd(M, A, B, R) \ (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) #define _mm_rcp28_sd(A, B) \ _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index f8cfc30a6b1f..f6795a6bfc49 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -948,23 +948,23 @@ _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_max_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_max_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_max_round_pd(A, B, R) __extension__ ({ \ +#define _mm512_max_round_pd(A, B, R) \ (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_max_pd(__m512d __A, __m512d __B) @@ -998,23 +998,23 @@ _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_max_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_max_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_max_round_ps(A, B, R) __extension__ ({ \ +#define _mm512_max_round_ps(A, B, R) \ (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_max_ps(__m512 __A, __m512 __B) @@ -1066,23 +1066,23 @@ _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_max_round_ss(A, B, R) __extension__ ({ \ +#define _mm_max_round_ss(A, B, R) \ (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_max_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_max_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -1102,23 +1102,23 @@ _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_max_round_sd(A, B, R) __extension__ ({ \ +#define _mm_max_round_sd(A, B, R) \ (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_max_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_max_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline __m512i __DEFAULT_FN_ATTRS @@ -1209,23 +1209,23 @@ _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512()); } -#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_min_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_min_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_min_round_pd(A, B, R) __extension__ ({ \ +#define _mm512_min_round_pd(A, B, R) \ (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_min_pd(__m512d __A, __m512d __B) @@ -1248,23 +1248,23 @@ _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_min_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_min_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_min_round_ps(A, B, R) __extension__ ({ \ +#define _mm512_min_round_ps(A, B, R) \ (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) @@ -1327,23 +1327,23 @@ _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_min_round_ss(A, B, R) __extension__ ({ \ +#define _mm_min_round_ss(A, B, R) \ (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_min_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_min_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -1363,23 +1363,23 @@ _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_min_round_sd(A, B, R) __extension__ ({ \ +#define _mm_min_round_sd(A, B, R) \ (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_min_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_min_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline __m512i __DEFAULT_FN_ATTRS @@ -1548,20 +1548,20 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { (__v8di)__W); } -#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_sqrt_round_pd(W, U, A, R) \ (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \ +#define _mm512_maskz_sqrt_round_pd(U, A, R) \ (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \ +#define _mm512_sqrt_round_pd(A, R) \ (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_sqrt_pd(__m512d __a) @@ -1591,20 +1591,20 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_sqrt_round_ps(W, U, A, R) \ (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_sqrt_round_ps(U, A, R) \ (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \ +#define _mm512_sqrt_round_ps(A, R) \ (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_sqrt_ps(__m512 __a) @@ -1982,23 +1982,23 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_add_round_ss(A, B, R) __extension__ ({ \ +#define _mm_add_round_ss(A, B, R) \ (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_add_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_add_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -2017,23 +2017,23 @@ _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -#define _mm_add_round_sd(A, B, R) __extension__ ({ \ +#define _mm_add_round_sd(A, B, R) \ (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_add_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_add_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { @@ -2063,41 +2063,41 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -#define _mm512_add_round_pd(A, B, R) __extension__ ({ \ +#define _mm512_add_round_pd(A, B, R) \ (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_add_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_add_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_add_round_ps(A, B, R) __extension__ ({ \ +#define _mm512_add_round_ps(A, B, R) \ (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_add_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_add_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2116,23 +2116,23 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -#define _mm_sub_round_ss(A, B, R) __extension__ ({ \ +#define _mm_sub_round_ss(A, B, R) \ (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_sub_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_sub_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -2152,23 +2152,23 @@ _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_sub_round_sd(A, B, R) __extension__ ({ \ +#define _mm_sub_round_sd(A, B, R) \ (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_sub_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_sub_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { @@ -2198,41 +2198,41 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \ +#define _mm512_sub_round_pd(A, B, R) \ (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_sub_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_sub_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_sub_round_ps(A, B, R) __extension__ ({ \ +#define _mm512_sub_round_ps(A, B, R) \ (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_sub_round_ps(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_sub_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }); + (int)(R)); -#define _mm512_maskz_sub_round_ps(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_sub_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }); + (__mmask16)(U), (int)(R)); static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2251,23 +2251,23 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -#define _mm_mul_round_ss(A, B, R) __extension__ ({ \ +#define _mm_mul_round_ss(A, B, R) \ (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_mul_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_mul_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -2287,23 +2287,23 @@ _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_mul_round_sd(A, B, R) __extension__ ({ \ +#define _mm_mul_round_sd(A, B, R) \ (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_mul_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_mul_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { @@ -2333,41 +2333,41 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \ +#define _mm512_mul_round_pd(A, B, R) \ (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_mul_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_mul_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_mul_round_ps(A, B, R) __extension__ ({ \ +#define _mm512_mul_round_ps(A, B, R) \ (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_mul_round_ps(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_mul_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }); + (int)(R)); -#define _mm512_maskz_mul_round_ps(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_mul_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }); + (__mmask16)(U), (int)(R)); static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2387,23 +2387,23 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_div_round_ss(A, B, R) __extension__ ({ \ +#define _mm_div_round_ss(A, B, R) \ (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_div_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_div_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -2423,23 +2423,23 @@ _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm_div_round_sd(A, B, R) __extension__ ({ \ +#define _mm_div_round_sd(A, B, R) \ (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_div_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_div_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline __m512d __DEFAULT_FN_ATTRS _mm512_div_pd(__m512d __a, __m512d __b) @@ -2481,200 +2481,200 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -#define _mm512_div_round_pd(A, B, R) __extension__ ({ \ +#define _mm512_div_round_pd(A, B, R) \ (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_div_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_div_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_div_round_ps(A, B, R) __extension__ ({ \ +#define _mm512_div_round_ps(A, B, R) \ (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_div_round_ps(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_div_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)); }); + (int)(R)); -#define _mm512_maskz_div_round_ps(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_div_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }); + (__mmask16)(U), (int)(R)); -#define _mm512_roundscale_ps(A, B) __extension__ ({ \ +#define _mm512_roundscale_ps(A, B) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)(__m512)(A), (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\ +#define _mm512_mask_roundscale_ps(A, B, C, imm) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ (__v16sf)(__m512)(A), (__mmask16)(B), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\ +#define _mm512_maskz_roundscale_ps(A, B, imm) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(A), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \ +#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ (__v16sf)(__m512)(A), (__mmask16)(B), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \ +#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(A), (int)(R)); }) + (__mmask16)(A), (int)(R)) -#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \ +#define _mm512_roundscale_round_ps(A, imm, R) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_roundscale_pd(A, B) __extension__ ({ \ +#define _mm512_roundscale_pd(A, B) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)(__m512d)(A), (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\ +#define _mm512_mask_roundscale_pd(A, B, C, imm) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ (__v8df)(__m512d)(A), (__mmask8)(B), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\ +#define _mm512_maskz_roundscale_pd(A, B, imm) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(A), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \ +#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ (__v8df)(__m512d)(A), (__mmask8)(B), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \ +#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(A), (int)(R)); }) + (__mmask8)(A), (int)(R)) -#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \ +#define _mm512_roundscale_round_pd(A, imm, R) \ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fmadd_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), (int)(R)); }) + (__v8df)(__m512d)(C), (int)(R)) -#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(A)); }) + (__v8df)(__m512d)(A)) -#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(C)); }) + (__v8df)(__m512d)(C)) -#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) -#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fmsub_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (int)(R)); }) + (int)(R)) -#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(A)); }) + (__v8df)(__m512d)(A)) -#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) -#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fnmadd_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), (int)(R)); }) + (__v8df)(__m512d)(C), (int)(R)) -#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(C)); }) + (__v8df)(__m512d)(C)) -#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) -#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fnmsub_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -2801,102 +2801,102 @@ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) (__v8df) _mm512_setzero_pd()); } -#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_fmadd_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), (int)(R)); }) + (__v16sf)(__m512)(C), (int)(R)) -#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(A)); }) + (__v16sf)(__m512)(A)) -#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(C)); }) + (__v16sf)(__m512)(C)) -#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) -#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_fmsub_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (int)(R)); }) + (int)(R)) -#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(A)); }) + (__v16sf)(__m512)(A)) -#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) -#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_fnmadd_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), (int)(R)); }) + (__v16sf)(__m512)(C), (int)(R)) -#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(C)); }) + (__v16sf)(__m512)(C)) -#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) -#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_fnmsub_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -3023,63 +3023,63 @@ _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) (__v16sf) _mm512_setzero_ps()); } -#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fmaddsub_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ - (int)(R)); }) + (int)(R)) -#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(A)); }) + (__v8df)(__m512d)(A)) -#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(C)); }) + (__v8df)(__m512d)(C)) -#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) -#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_fmsubadd_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ - (int)(R)); }) + (int)(R)) -#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(A)); }) + (__v8df)(__m512d)(A)) -#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -3155,63 +3155,63 @@ _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) (__v8df) _mm512_setzero_pd()); } -#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_fmaddsub_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ - (int)(R)); }) + (int)(R)) -#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(A)); }) + (__v16sf)(__m512)(A)) -#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(C)); }) + (__v16sf)(__m512)(C)) -#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) -#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_fmsubadd_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ - (int)(R)); }) + (int)(R)) -#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(A)); }) + (__v16sf)(__m512)(A)) -#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -3287,13 +3287,13 @@ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) (__v16sf) _mm512_setzero_ps()); } -#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(C)); }) + (__v8df)(__m512d)(C)) static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -3307,13 +3307,13 @@ _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) (__v8df) __C); } -#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__m512)__builtin_ia32_vfmaddps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(C)); }) + (__v16sf)(__m512)(C)) static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) @@ -3326,13 +3326,13 @@ _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) (__v16sf) __C); } -#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__m512d)__builtin_ia32_vfmaddsubpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(C)); }) + (__v8df)(__m512d)(C)) static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -3346,13 +3346,13 @@ _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) (__v8df) __C); } -#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__m512)__builtin_ia32_vfmaddsubps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(C)); }) + (__v16sf)(__m512)(C)) static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -3366,13 +3366,13 @@ _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) (__v16sf) __C); } -#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ __builtin_ia32_vfmaddpd512(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(A)); }) + (__v8df)(__m512d)(A)) static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -3386,13 +3386,13 @@ _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) (__v8df) __A); } -#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ __builtin_ia32_vfmaddps512(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(A)); }) + (__v16sf)(__m512)(A)) static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -3406,22 +3406,22 @@ _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) (__v16sf) __A); } -#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -(__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(A)); }) + (__v8df)(__m512d)(A)) -#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__m512d)__builtin_ia32_vfmaddpd512((__v8df)(__m512d)(A), \ -(__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (int)(R)), \ - (__v8df)(__m512d)(C)); }) + (__v8df)(__m512d)(C)) static __inline__ __m512d __DEFAULT_FN_ATTRS @@ -3446,22 +3446,22 @@ _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) (__v8df) __C); } -#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ +#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \ -(__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(A)); }) + (__v16sf)(__m512)(A)) -#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ +#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__m512)__builtin_ia32_vfmaddps512 ((__v16sf)(__m512)(A), \ -(__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (int)(R)), \ - (__v16sf)(__m512)(C)); }) + (__v16sf)(__m512)(C)) static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -3558,7 +3558,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, (__v8di)_mm512_setzero_si512()); } -#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ +#define _mm512_alignr_epi64(A, B, I) \ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \ (__v8di)(__m512i)(A), \ ((int)(I) & 0x7) + 0, \ @@ -3568,19 +3568,19 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, ((int)(I) & 0x7) + 4, \ ((int)(I) & 0x7) + 5, \ ((int)(I) & 0x7) + 6, \ - ((int)(I) & 0x7) + 7); }) + ((int)(I) & 0x7) + 7) -#define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\ +#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ - (__v8di)(__m512i)(W)); }) + (__v8di)(__m512i)(W)) -#define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\ +#define _mm512_maskz_alignr_epi64(U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()); }) + (__v8di)_mm512_setzero_si512()) -#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ +#define _mm512_alignr_epi32(A, B, I) \ (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \ (__v16si)(__m512i)(A), \ ((int)(I) & 0xf) + 0, \ @@ -3598,54 +3598,54 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, ((int)(I) & 0xf) + 12, \ ((int)(I) & 0xf) + 13, \ ((int)(I) & 0xf) + 14, \ - ((int)(I) & 0xf) + 15); }) + ((int)(I) & 0xf) + 15) -#define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\ +#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ - (__v16si)(__m512i)(W)); }) + (__v16si)(__m512i)(W)) -#define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\ +#define _mm512_maskz_alignr_epi32(U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()); }) + (__v16si)_mm512_setzero_si512()) /* Vector Extract */ -#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ +#define _mm512_extractf64x4_pd(A, I) \ (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \ (__v8df)_mm512_undefined_pd(), \ ((I) & 1) ? 4 : 0, \ ((I) & 1) ? 5 : 1, \ ((I) & 1) ? 6 : 2, \ - ((I) & 1) ? 7 : 3); }) + ((I) & 1) ? 7 : 3) -#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\ +#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ - (__v4df)(W)); }) + (__v4df)(W)) -#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\ +#define _mm512_maskz_extractf64x4_pd(U, A, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ +#define _mm512_extractf32x4_ps(A, I) \ (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \ (__v16sf)_mm512_undefined_ps(), \ 0 + ((I) & 0x3) * 4, \ 1 + ((I) & 0x3) * 4, \ 2 + ((I) & 0x3) * 4, \ - 3 + ((I) & 0x3) * 4); }) + 3 + ((I) & 0x3) * 4) -#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\ +#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ - (__v4sf)(W)); }) + (__v4sf)(W)) -#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\ +#define _mm512_maskz_extractf32x4_ps(U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ - (__v4sf)_mm_setzero_ps()); }) + (__v4sf)_mm_setzero_ps()) /* Vector Blend */ @@ -3683,15 +3683,15 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) /* Compare */ -#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ +#define _mm512_cmp_round_ps_mask(A, B, P, R) \ (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(P), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ +#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(P), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) #define _mm512_cmp_ps_mask(A, B, P) \ _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) @@ -3738,15 +3738,15 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) #define _mm512_mask_cmpord_ps_mask(k, A, B) \ _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) -#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ +#define _mm512_cmp_round_pd_mask(A, B, P, R) \ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(P), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ +#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(P), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) #define _mm512_cmp_pd_mask(A, B, P) \ _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) @@ -3795,20 +3795,20 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) /* Conversion */ -#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundps_epu32(A, R) \ (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_undefined_epi32(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \ (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \ (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) static __inline __m512i __DEFAULT_FN_ATTRS @@ -3839,35 +3839,35 @@ _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepi32_ps(A, R) \ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundepu32_ps(A, R) \ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_cvtepu32_ps (__m512i __A) @@ -3981,20 +3981,20 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); } -#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundpd_ps(A, R) \ (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \ (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \ (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_cvtpd_ps (__m512d __A) @@ -4041,50 +4041,50 @@ _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } -#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \ +#define _mm512_cvt_roundps_ph(A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_undefined_si256(), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)(__m256i)(U), \ - (__mmask16)(W)); }) + (__mmask16)(W)) -#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_ph(W, A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(W)); }) + (__mmask16)(W)) -#define _mm512_cvtps_ph(A, I) __extension__ ({ \ +#define _mm512_cvtps_ph(A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \ +#define _mm512_mask_cvtps_ph(U, W, A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)(__m256i)(U), \ - (__mmask16)(W)); }) + (__mmask16)(W)) -#define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\ +#define _mm512_maskz_cvtps_ph(W, A, I) \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(W)); }) + (__mmask16)(W)) -#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \ +#define _mm512_cvt_roundph_ps(A, R) \ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundph_ps(U, A, R) \ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) static __inline __m512 __DEFAULT_FN_ATTRS @@ -4115,20 +4115,20 @@ _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundpd_epi32(A, R) \ (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \ (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)(__m256i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \ (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline __m256i __DEFAULT_FN_ATTRS _mm512_cvttpd_epi32(__m512d __a) @@ -4157,20 +4157,20 @@ _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundps_epi32(A, R) \ (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \ (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \ (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epi32(__m512 __a) @@ -4199,20 +4199,20 @@ _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ +#define _mm512_cvt_roundps_epi32(A, R) \ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epi32 (__m512 __A) @@ -4242,20 +4242,20 @@ _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ +#define _mm512_cvt_roundpd_epi32(A, R) \ (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \ (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)(__m256i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \ (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtpd_epi32 (__m512d __A) @@ -4286,20 +4286,20 @@ _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ +#define _mm512_cvt_roundps_epu32(A, R) \ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epu32 ( __m512 __A) @@ -4330,20 +4330,20 @@ _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ +#define _mm512_cvt_roundpd_epu32(A, R) \ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtpd_epu32 (__m512d __A) @@ -5238,74 +5238,74 @@ _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) -#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epi32_mask(a, b, p) \ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epu32_mask(a, b, p) \ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epi64_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ +#define _mm512_cmp_epu64_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) -#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) -#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ +#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm512_rol_epi32(a, b) __extension__ ({ \ +#define _mm512_rol_epi32(a, b) \ (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \ +#define _mm512_mask_rol_epi32(W, U, a, b) \ (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) + (__mmask16)(U)) -#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \ +#define _mm512_maskz_rol_epi32(U, a, b) \ (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) + (__mmask16)(U)) -#define _mm512_rol_epi64(a, b) __extension__ ({ \ +#define _mm512_rol_epi64(a, b) \ (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \ +#define _mm512_mask_rol_epi64(W, U, a, b) \ (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ - (__v8di)(__m512i)(W), (__mmask8)(U)); }) + (__v8di)(__m512i)(W), (__mmask8)(U)) -#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \ +#define _mm512_maskz_rol_epi64(U, a, b) \ (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_rolv_epi32 (__m512i __A, __m512i __B) { @@ -5364,34 +5364,34 @@ _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) (__mmask8) __U); } -#define _mm512_ror_epi32(A, B) __extension__ ({ \ +#define _mm512_ror_epi32(A, B) \ (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \ +#define _mm512_mask_ror_epi32(W, U, A, B) \ (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) + (__mmask16)(U)) -#define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \ +#define _mm512_maskz_ror_epi32(U, A, B) \ (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) + (__mmask16)(U)) -#define _mm512_ror_epi64(A, B) __extension__ ({ \ +#define _mm512_ror_epi64(A, B) \ (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \ +#define _mm512_mask_ror_epi64(W, U, A, B) \ (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)(__m512i)(W), (__mmask8)(U)); }) + (__v8di)(__m512i)(W), (__mmask8)(U)) -#define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \ +#define _mm512_maskz_ror_epi64(U, A, B) \ (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_slli_epi32(__m512i __A, int __B) @@ -5582,169 +5582,169 @@ _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) (__v8df)_mm512_setzero_pd()); } -#define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \ +#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \ (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \ +#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \ +#define _mm512_fixupimm_pd(A, B, C, imm) \ (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ +#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \ (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \ +#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), \ (int)(imm), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ +#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \ (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), \ (int)(imm), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \ +#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \ (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \ +#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \ +#define _mm512_fixupimm_ps(A, B, C, imm) \ (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ +#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \ (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \ +#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), \ (int)(imm), (__mmask16)(U), \ - (int)(R)); }) + (int)(R)) -#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ +#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \ (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), \ (int)(imm), (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \ +#define _mm_fixupimm_round_sd(A, B, C, imm, R) \ (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \ +#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \ (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \ +#define _mm_fixupimm_sd(A, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \ +#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \ +#define _mm_fixupimm_round_ss(A, B, C, imm, R) \ (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \ +#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \ (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \ +#define _mm_fixupimm_ss(A, B, C, imm) \ (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \ +#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_getexp_round_sd(A, B, R) __extension__ ({ \ +#define _mm_getexp_round_sd(A, B, R) \ (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -5764,11 +5764,11 @@ _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\ +#define _mm_mask_getexp_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) @@ -5780,17 +5780,17 @@ _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\ +#define _mm_maskz_getexp_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_getexp_round_ss(A, B, R) __extension__ ({ \ +#define _mm_getexp_round_ss(A, B, R) \ (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_getexp_ss (__m128 __A, __m128 __B) @@ -5809,11 +5809,11 @@ _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\ +#define _mm_mask_getexp_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) @@ -5825,101 +5825,101 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\ +#define _mm_maskz_getexp_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \ +#define _mm_getmant_round_sd(A, B, C, D, R) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_getmant_sd(A, B, C, D) __extension__ ({ \ +#define _mm_getmant_sd(A, B, C, D) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\ +#define _mm_mask_getmant_sd(W, U, A, B, C, D) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\ +#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\ +#define _mm_maskz_getmant_sd(U, A, B, C, D) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\ +#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \ +#define _mm_getmant_round_ss(A, B, C, D, R) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_getmant_ss(A, B, C, D) __extension__ ({ \ +#define _mm_getmant_ss(A, B, C, D) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\ +#define _mm_mask_getmant_ss(W, U, A, B, C, D) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\ +#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\ +#define _mm_maskz_getmant_ss(U, A, B, C, D) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_pd(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\ +#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov (__mmask16 __A) @@ -5927,17 +5927,17 @@ _mm512_kmov (__mmask16 __A) return __A; } -#define _mm_comi_round_sd(A, B, P, R) __extension__ ({\ +#define _mm_comi_round_sd(A, B, P, R) \ (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ - (int)(P), (int)(R)); }) + (int)(P), (int)(R)) -#define _mm_comi_round_ss(A, B, P, R) __extension__ ({\ +#define _mm_comi_round_ss(A, B, P, R) \ (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ - (int)(P), (int)(R)); }) + (int)(P), (int)(R)) #ifdef __x86_64__ -#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvt_roundsd_si64(A, R) \ + (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) #endif static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -6204,55 +6204,55 @@ _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) (__v8di)_mm512_setzero_si512()); } -#define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ +#define _mm512_ternarylogic_epi32(A, B, C, imm) \ (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ +#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)(U)); }) + (__mmask16)(U)) -#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ +#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), \ (__v16si)(__m512i)(C), \ - (int)(imm), (__mmask16)(U)); }) + (int)(imm), (__mmask16)(U)) -#define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ +#define _mm512_ternarylogic_epi64(A, B, C, imm) \ (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ +#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ +#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #ifdef __x86_64__ -#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvt_roundsd_i64(A, R) \ + (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) #endif -#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvt_roundsd_si32(A, R) \ + (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) -#define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvt_roundsd_i32(A, R) \ + (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) -#define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \ - (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvt_roundsd_u32(A, R) \ + (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)) static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvtsd_u32 (__m128d __A) @@ -6262,9 +6262,9 @@ _mm_cvtsd_u32 (__m128d __A) } #ifdef __x86_64__ -#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \ +#define _mm_cvt_roundsd_u64(A, R) \ (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ - (int)(R)); }) + (int)(R)) static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm_cvtsd_u64 (__m128d __A) @@ -6275,22 +6275,22 @@ _mm_cvtsd_u64 (__m128d __A) } #endif -#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvt_roundss_si32(A, R) \ + (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) -#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvt_roundss_i32(A, R) \ + (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) #ifdef __x86_64__ -#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvt_roundss_si64(A, R) \ + (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) -#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvt_roundss_i64(A, R) \ + (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) #endif -#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \ - (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvt_roundss_u32(A, R) \ + (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)) static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvtss_u32 (__m128 __A) @@ -6300,9 +6300,9 @@ _mm_cvtss_u32 (__m128 __A) } #ifdef __x86_64__ -#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \ +#define _mm_cvt_roundss_u64(A, R) \ (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ - (int)(R)); }) + (int)(R)) static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm_cvtss_u64 (__m128 __A) @@ -6313,11 +6313,11 @@ _mm_cvtss_u64 (__m128 __A) } #endif -#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvtt_roundsd_i32(A, R) \ + (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) -#define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvtt_roundsd_si32(A, R) \ + (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_i32 (__m128d __A) @@ -6327,11 +6327,11 @@ _mm_cvttsd_i32 (__m128d __A) } #ifdef __x86_64__ -#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvtt_roundsd_si64(A, R) \ + (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) -#define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvtt_roundsd_i64(A, R) \ + (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_i64 (__m128d __A) @@ -6341,8 +6341,8 @@ _mm_cvttsd_i64 (__m128d __A) } #endif -#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \ - (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) +#define _mm_cvtt_roundsd_u32(A, R) \ + (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)) static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvttsd_u32 (__m128d __A) @@ -6352,9 +6352,9 @@ _mm_cvttsd_u32 (__m128d __A) } #ifdef __x86_64__ -#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \ +#define _mm_cvtt_roundsd_u64(A, R) \ (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ - (int)(R)); }) + (int)(R)) static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm_cvttsd_u64 (__m128d __A) @@ -6365,11 +6365,11 @@ _mm_cvttsd_u64 (__m128d __A) } #endif -#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvtt_roundss_i32(A, R) \ + (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) -#define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \ - (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvtt_roundss_si32(A, R) \ + (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttss_i32 (__m128 __A) @@ -6379,11 +6379,11 @@ _mm_cvttss_i32 (__m128 __A) } #ifdef __x86_64__ -#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvtt_roundss_i64(A, R) \ + (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) -#define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \ - (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvtt_roundss_si64(A, R) \ + (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttss_i64 (__m128 __A) @@ -6393,8 +6393,8 @@ _mm_cvttss_i64 (__m128 __A) } #endif -#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \ - (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); }) +#define _mm_cvtt_roundss_u32(A, R) \ + (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)) static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvttss_u32 (__m128 __A) @@ -6404,9 +6404,9 @@ _mm_cvttss_u32 (__m128 __A) } #ifdef __x86_64__ -#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \ +#define _mm_cvtt_roundss_u64(A, R) \ (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ - (int)(R)); }) + (int)(R)) static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm_cvttss_u64 (__m128 __A) @@ -6417,7 +6417,7 @@ _mm_cvttss_u64 (__m128 __A) } #endif -#define _mm512_permute_pd(X, C) __extension__ ({ \ +#define _mm512_permute_pd(X, C) \ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ (__v8df)_mm512_undefined_pd(), \ 0 + (((C) >> 0) & 0x1), \ @@ -6427,19 +6427,19 @@ _mm_cvttss_u64 (__m128 __A) 4 + (((C) >> 4) & 0x1), \ 4 + (((C) >> 5) & 0x1), \ 6 + (((C) >> 6) & 0x1), \ - 6 + (((C) >> 7) & 0x1)); }) + 6 + (((C) >> 7) & 0x1)) -#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \ +#define _mm512_mask_permute_pd(W, U, X, C) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permute_pd((X), (C)), \ - (__v8df)(__m512d)(W)); }) + (__v8df)(__m512d)(W)) -#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \ +#define _mm512_maskz_permute_pd(U, X, C) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permute_pd((X), (C)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) -#define _mm512_permute_ps(X, C) __extension__ ({ \ +#define _mm512_permute_ps(X, C) \ (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \ (__v16sf)_mm512_undefined_ps(), \ 0 + (((C) >> 0) & 0x3), \ @@ -6457,17 +6457,17 @@ _mm_cvttss_u64 (__m128 __A) 12 + (((C) >> 0) & 0x3), \ 12 + (((C) >> 2) & 0x3), \ 12 + (((C) >> 4) & 0x3), \ - 12 + (((C) >> 6) & 0x3)); }) + 12 + (((C) >> 6) & 0x3)) -#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \ +#define _mm512_mask_permute_ps(W, U, X, C) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_permute_ps((X), (C)), \ - (__v16sf)(__m512)(W)); }) + (__v16sf)(__m512)(W)) -#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \ +#define _mm512_maskz_permute_ps(U, X, C) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_permute_ps((X), (C)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_permutevar_pd(__m512d __A, __m512i __C) @@ -6578,20 +6578,20 @@ _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) } -#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \ +#define _mm512_cvtt_roundpd_epu32(A, R) \ (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \ (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)(__m256i)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \ (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttpd_epu32 (__m512d __A) @@ -6622,107 +6622,107 @@ _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \ +#define _mm_roundscale_round_sd(A, B, imm, R) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(imm), \ - (int)(R)); }) + (int)(R)) -#define _mm_roundscale_sd(A, B, imm) __extension__ ({ \ +#define _mm_roundscale_sd(A, B, imm) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(imm), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \ +#define _mm_mask_roundscale_sd(W, U, A, B, imm) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(imm), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \ +#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(I), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \ +#define _mm_maskz_roundscale_sd(U, A, B, I) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(I), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \ +#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(I), \ - (int)(R)); }) + (int)(R)) -#define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \ +#define _mm_roundscale_round_ss(A, B, imm, R) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(imm), \ - (int)(R)); }) + (int)(R)) -#define _mm_roundscale_ss(A, B, imm) __extension__ ({ \ +#define _mm_roundscale_ss(A, B, imm) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(imm), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \ +#define _mm_mask_roundscale_ss(W, U, A, B, I) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(I), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \ +#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(I), \ - (int)(R)); }) + (int)(R)) -#define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \ +#define _mm_maskz_roundscale_ss(U, A, B, I) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(I), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \ +#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(I), \ - (int)(R)); }) + (int)(R)) -#define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \ +#define _mm512_scalef_round_pd(A, B, R) \ (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_scalef_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_scalef_pd (__m512d __A, __m512d __B) @@ -6756,23 +6756,23 @@ _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \ +#define _mm512_scalef_round_ps(A, B, R) \ (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \ +#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \ +#define _mm512_maskz_scalef_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_scalef_ps (__m512 __A, __m512 __B) @@ -6806,11 +6806,11 @@ _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_scalef_round_sd(A, B, R) __extension__ ({ \ +#define _mm_scalef_round_sd(A, B, R) \ (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_scalef_sd (__m128d __A, __m128d __B) @@ -6831,11 +6831,11 @@ _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_scalef_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) @@ -6847,17 +6847,17 @@ _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_scalef_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_scalef_round_ss(A, B, R) __extension__ ({ \ +#define _mm_scalef_round_ss(A, B, R) \ (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_scalef_ss (__m128 __A, __m128 __B) @@ -6878,11 +6878,11 @@ _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_scalef_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) @@ -6894,12 +6894,12 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_scalef_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srai_epi32(__m512i __A, int __B) @@ -6944,7 +6944,7 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) (__v8di)_mm512_setzero_si512()); } -#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \ +#define _mm512_shuffle_f32x4(A, B, imm) \ (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ 0 + ((((imm) >> 0) & 0x3) * 4), \ @@ -6962,19 +6962,19 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) 16 + ((((imm) >> 6) & 0x3) * 4), \ 17 + ((((imm) >> 6) & 0x3) * 4), \ 18 + ((((imm) >> 6) & 0x3) * 4), \ - 19 + ((((imm) >> 6) & 0x3) * 4)); }) + 19 + ((((imm) >> 6) & 0x3) * 4)) -#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ - (__v16sf)(__m512)(W)); }) + (__v16sf)(__m512)(W)) -#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) -#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \ +#define _mm512_shuffle_f64x2(A, B, imm) \ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ 0 + ((((imm) >> 0) & 0x3) * 2), \ @@ -6984,19 +6984,19 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) 8 + ((((imm) >> 4) & 0x3) * 2), \ 9 + ((((imm) >> 4) & 0x3) * 2), \ 8 + ((((imm) >> 6) & 0x3) * 2), \ - 9 + ((((imm) >> 6) & 0x3) * 2)); }) + 9 + ((((imm) >> 6) & 0x3) * 2)) -#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ - (__v8df)(__m512d)(W)); }) + (__v8df)(__m512d)(W)) -#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) -#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \ +#define _mm512_shuffle_i32x4(A, B, imm) \ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ 0 + ((((imm) >> 0) & 0x3) * 2), \ @@ -7006,19 +7006,19 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) 8 + ((((imm) >> 4) & 0x3) * 2), \ 9 + ((((imm) >> 4) & 0x3) * 2), \ 8 + ((((imm) >> 6) & 0x3) * 2), \ - 9 + ((((imm) >> 6) & 0x3) * 2)); }) + 9 + ((((imm) >> 6) & 0x3) * 2)) -#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ - (__v16si)(__m512i)(W)); }) + (__v16si)(__m512i)(W)) -#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()); }) + (__v16si)_mm512_setzero_si512()) -#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \ +#define _mm512_shuffle_i64x2(A, B, imm) \ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ 0 + ((((imm) >> 0) & 0x3) * 2), \ @@ -7028,19 +7028,19 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) 8 + ((((imm) >> 4) & 0x3) * 2), \ 9 + ((((imm) >> 4) & 0x3) * 2), \ 8 + ((((imm) >> 6) & 0x3) * 2), \ - 9 + ((((imm) >> 6) & 0x3) * 2)); }) + 9 + ((((imm) >> 6) & 0x3) * 2)) -#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ - (__v8di)(__m512i)(W)); }) + (__v8di)(__m512i)(W)) -#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()); }) + (__v8di)_mm512_setzero_si512()) -#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \ +#define _mm512_shuffle_pd(A, B, M) \ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ 0 + (((M) >> 0) & 0x1), \ @@ -7050,19 +7050,19 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) 4 + (((M) >> 4) & 0x1), \ 12 + (((M) >> 5) & 0x1), \ 6 + (((M) >> 6) & 0x1), \ - 14 + (((M) >> 7) & 0x1)); }) + 14 + (((M) >> 7) & 0x1)) -#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ +#define _mm512_mask_shuffle_pd(W, U, A, B, M) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ - (__v8df)(__m512d)(W)); }) + (__v8df)(__m512d)(W)) -#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ +#define _mm512_maskz_shuffle_pd(U, A, B, M) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) -#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \ +#define _mm512_shuffle_ps(A, B, M) \ (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ 0 + (((M) >> 0) & 0x3), \ @@ -7080,23 +7080,23 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) 12 + (((M) >> 0) & 0x3), \ 12 + (((M) >> 2) & 0x3), \ 28 + (((M) >> 4) & 0x3), \ - 28 + (((M) >> 6) & 0x3)); }) + 28 + (((M) >> 6) & 0x3)) -#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ +#define _mm512_mask_shuffle_ps(W, U, A, B, M) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ - (__v16sf)(__m512)(W)); }) + (__v16sf)(__m512)(W)) -#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ +#define _mm512_maskz_shuffle_ps(U, A, B, M) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) -#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \ +#define _mm_sqrt_round_sd(A, B, R) \ (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -7108,11 +7108,11 @@ _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) @@ -7124,17 +7124,17 @@ _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_sqrt_round_sd(U, A, B, R) \ (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \ +#define _mm_sqrt_round_ss(A, B, R) \ (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) @@ -7146,11 +7146,11 @@ _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) @@ -7162,11 +7162,11 @@ _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) _MM_FROUND_CUR_DIRECTION); } -#define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_sqrt_round_ss(U, A, B, R) \ (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcast_f32x4(__m128 __A) @@ -7725,43 +7725,43 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); } -#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \ +#define _mm512_extracti32x4_epi32(A, imm) \ (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \ (__v16si)_mm512_undefined_epi32(), \ 0 + ((imm) & 0x3) * 4, \ 1 + ((imm) & 0x3) * 4, \ 2 + ((imm) & 0x3) * 4, \ - 3 + ((imm) & 0x3) * 4); }) + 3 + ((imm) & 0x3) * 4) -#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ +#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ - (__v4si)(W)); }) + (__v4si)(W)) -#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ +#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ - (__v4si)_mm_setzero_si128()); }) + (__v4si)_mm_setzero_si128()) -#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \ +#define _mm512_extracti64x4_epi64(A, imm) \ (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \ (__v8di)_mm512_undefined_epi32(), \ ((imm) & 1) ? 4 : 0, \ ((imm) & 1) ? 5 : 1, \ ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3); }) + ((imm) & 1) ? 7 : 3) -#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \ +#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ - (__v4di)(W)); }) + (__v4di)(W)) -#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \ +#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ - (__v4di)_mm256_setzero_si256()); }) + (__v4di)_mm256_setzero_si256()) -#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \ +#define _mm512_insertf64x4(A, B, imm) \ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \ ((imm) & 0x1) ? 0 : 8, \ @@ -7771,19 +7771,19 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) ((imm) & 0x1) ? 8 : 4, \ ((imm) & 0x1) ? 9 : 5, \ ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7); }) + ((imm) & 0x1) ? 11 : 7) -#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_insertf64x4(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ - (__v8df)(W)); }) + (__v8df)(W)) -#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_insertf64x4(U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) -#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \ +#define _mm512_inserti64x4(A, B, imm) \ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ (__v8di)_mm512_castsi256_si512((__m256i)(B)), \ ((imm) & 0x1) ? 0 : 8, \ @@ -7793,19 +7793,19 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) ((imm) & 0x1) ? 8 : 4, \ ((imm) & 0x1) ? 9 : 5, \ ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7); }) + ((imm) & 0x1) ? 11 : 7) -#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_inserti64x4(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ - (__v8di)(W)); }) + (__v8di)(W)) -#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_inserti64x4(U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()); }) + (__v8di)_mm512_setzero_si512()) -#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \ +#define _mm512_insertf32x4(A, B, imm) \ (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ (__v16sf)_mm512_castps128_ps512((__m128)(B)),\ (((imm) & 0x3) == 0) ? 16 : 0, \ @@ -7823,19 +7823,19 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) (((imm) & 0x3) == 3) ? 16 : 12, \ (((imm) & 0x3) == 3) ? 17 : 13, \ (((imm) & 0x3) == 3) ? 18 : 14, \ - (((imm) & 0x3) == 3) ? 19 : 15); }) + (((imm) & 0x3) == 3) ? 19 : 15) -#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_insertf32x4(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ - (__v16sf)(W)); }) + (__v16sf)(W)) -#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_insertf32x4(U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ - (__v16sf)_mm512_setzero_ps()); }) + (__v16sf)_mm512_setzero_ps()) -#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \ +#define _mm512_inserti32x4(A, B, imm) \ (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ (__v16si)_mm512_castsi128_si512((__m128i)(B)),\ (((imm) & 0x3) == 0) ? 16 : 0, \ @@ -7853,110 +7853,110 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) (((imm) & 0x3) == 3) ? 16 : 12, \ (((imm) & 0x3) == 3) ? 17 : 13, \ (((imm) & 0x3) == 3) ? 18 : 14, \ - (((imm) & 0x3) == 3) ? 19 : 15); }) + (((imm) & 0x3) == 3) ? 19 : 15) -#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm512_mask_inserti32x4(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ - (__v16si)(W)); }) + (__v16si)(W)) -#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ +#define _mm512_maskz_inserti32x4(U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()); }) + (__v16si)_mm512_setzero_si512()) -#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \ +#define _mm512_getmant_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \ +#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_getmant_pd(A, B, C) __extension__ ({ \ +#define _mm512_getmant_pd(A, B, C) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ +#define _mm512_mask_getmant_pd(W, U, A, B, C) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ +#define _mm512_maskz_getmant_pd(U, A, B, C) \ (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \ +#define _mm512_getmant_round_ps(A, B, C, R) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2) | (B)), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \ +#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2) | (B)), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \ +#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2) | (B)), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_getmant_ps(A, B, C) __extension__ ({ \ +#define _mm512_getmant_ps(A, B, C) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2)|(B)), \ (__v16sf)_mm512_undefined_ps(), \ (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm512_mask_getmant_ps(W, U, A, B, C) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2)|(B)), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ +#define _mm512_maskz_getmant_ps(U, A, B, C) \ (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2)|(B)), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm512_getexp_round_pd(A, R) __extension__ ({ \ +#define _mm512_getexp_round_pd(A, R) \ (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_getexp_round_pd(W, U, A, R) \ (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \ +#define _mm512_maskz_getexp_round_pd(U, A, R) \ (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_getexp_pd (__m512d __A) @@ -7985,20 +7985,20 @@ _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_getexp_round_ps(A, R) __extension__ ({ \ +#define _mm512_getexp_round_ps(A, R) \ (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)); }) + (__mmask16)-1, (int)(R)) -#define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_getexp_round_ps(W, U, A, R) \ (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) -#define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \ +#define _mm512_maskz_getexp_round_ps(U, A, R) \ (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)); }) + (__mmask16)(U), (int)(R)) static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_getexp_ps (__m512 __A) @@ -8027,181 +8027,181 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) _MM_FROUND_CUR_DIRECTION); } -#define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \ +#define _mm512_i64gather_ps(index, addr, scale) \ (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ (float const *)(addr), \ (__v8di)(__m512i)(index), (__mmask8)-1, \ - (int)(scale)); }) + (int)(scale)) -#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\ +#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \ (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ (float const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\ +#define _mm512_i64gather_epi32(index, addr, scale) \ (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \ (int const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)-1, (int)(scale)); }) + (__mmask8)-1, (int)(scale)) -#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \ (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ (int const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\ +#define _mm512_i64gather_pd(index, addr, scale) \ (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ (double const *)(addr), \ (__v8di)(__m512i)(index), (__mmask8)-1, \ - (int)(scale)); }) + (int)(scale)) -#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \ (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ (double const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\ +#define _mm512_i64gather_epi64(index, addr, scale) \ (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \ (long long const *)(addr), \ (__v8di)(__m512i)(index), (__mmask8)-1, \ - (int)(scale)); }) + (int)(scale)) -#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \ (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ (long long const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\ +#define _mm512_i32gather_ps(index, addr, scale) \ (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ (float const *)(addr), \ (__v16sf)(__m512)(index), \ - (__mmask16)-1, (int)(scale)); }) + (__mmask16)-1, (int)(scale)) -#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ (float const *)(addr), \ (__v16sf)(__m512)(index), \ - (__mmask16)(mask), (int)(scale)); }) + (__mmask16)(mask), (int)(scale)) -#define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\ +#define _mm512_i32gather_epi32(index, addr, scale) \ (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ (int const *)(addr), \ (__v16si)(__m512i)(index), \ - (__mmask16)-1, (int)(scale)); }) + (__mmask16)-1, (int)(scale)) -#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \ (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ (int const *)(addr), \ (__v16si)(__m512i)(index), \ - (__mmask16)(mask), (int)(scale)); }) + (__mmask16)(mask), (int)(scale)) -#define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\ +#define _mm512_i32gather_pd(index, addr, scale) \ (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ (double const *)(addr), \ (__v8si)(__m256i)(index), (__mmask8)-1, \ - (int)(scale)); }) + (int)(scale)) -#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \ (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ (double const *)(addr), \ (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\ +#define _mm512_i32gather_epi64(index, addr, scale) \ (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ (long long const *)(addr), \ (__v8si)(__m256i)(index), (__mmask8)-1, \ - (int)(scale)); }) + (int)(scale)) -#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \ (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ (long long const *)(addr), \ (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i64scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ - (__v8sf)(__m256)(v1), (int)(scale)); }) + (__v8sf)(__m256)(v1), (int)(scale)) -#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ - (__v8sf)(__m256)(v1), (int)(scale)); }) + (__v8sf)(__m256)(v1), (int)(scale)) -#define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i64scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ - (__v8si)(__m256i)(v1), (int)(scale)); }) + (__v8si)(__m256i)(v1), (int)(scale)) -#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ - (__v8si)(__m256i)(v1), (int)(scale)); }) + (__v8si)(__m256i)(v1), (int)(scale)) -#define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i64scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ - (__v8df)(__m512d)(v1), (int)(scale)); }) + (__v8df)(__m512d)(v1), (int)(scale)) -#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ - (__v8df)(__m512d)(v1), (int)(scale)); }) + (__v8df)(__m512d)(v1), (int)(scale)) -#define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i64scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ - (__v8di)(__m512i)(v1), (int)(scale)); }) + (__v8di)(__m512i)(v1), (int)(scale)) -#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ - (__v8di)(__m512i)(v1), (int)(scale)); }) + (__v8di)(__m512i)(v1), (int)(scale)) -#define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i32scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \ (__v16si)(__m512i)(index), \ - (__v16sf)(__m512)(v1), (int)(scale)); }) + (__v16sf)(__m512)(v1), (int)(scale)) -#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \ (__v16si)(__m512i)(index), \ - (__v16sf)(__m512)(v1), (int)(scale)); }) + (__v16sf)(__m512)(v1), (int)(scale)) -#define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i32scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \ (__v16si)(__m512i)(index), \ - (__v16si)(__m512i)(v1), (int)(scale)); }) + (__v16si)(__m512i)(v1), (int)(scale)) -#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \ (__v16si)(__m512i)(index), \ - (__v16si)(__m512i)(v1), (int)(scale)); }) + (__v16si)(__m512i)(v1), (int)(scale)) -#define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i32scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \ (__v8si)(__m256i)(index), \ - (__v8df)(__m512d)(v1), (int)(scale)); }) + (__v8df)(__m512d)(v1), (int)(scale)) -#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \ (__v8si)(__m256i)(index), \ - (__v8df)(__m512d)(v1), (int)(scale)); }) + (__v8df)(__m512d)(v1), (int)(scale)) -#define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\ +#define _mm512_i32scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \ (__v8si)(__m256i)(index), \ - (__v8di)(__m512i)(v1), (int)(scale)); }) + (__v8di)(__m512i)(v1), (int)(scale)) -#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ +#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \ (__v8si)(__m256i)(index), \ - (__v8di)(__m512i)(v1), (int)(scale)); }) + (__v8di)(__m512i)(v1), (int)(scale)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) @@ -8213,11 +8213,11 @@ _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) return __W; } -#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\ +#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ (__m128d)__builtin_ia32_vfmaddss3_mask((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) @@ -8229,11 +8229,11 @@ _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) return __A; } -#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(C), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) @@ -8245,11 +8245,11 @@ _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) return __Y; } -#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\ +#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \ (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) @@ -8261,11 +8261,11 @@ _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) return __W; } -#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\ +#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) @@ -8277,11 +8277,11 @@ _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) return __A; } -#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ -(__v4sf)(__m128)(C), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) @@ -8293,11 +8293,11 @@ _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) return __Y; } -#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\ +#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \ (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) @@ -8309,11 +8309,11 @@ _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) return __W; } -#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\ +#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ -(__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) @@ -8325,11 +8325,11 @@ _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) return __A; } -#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), \ (__v4sf)(__m128)(C), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) @@ -8341,11 +8341,11 @@ _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) return __Y; } -#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\ +#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \ (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ -(__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) @@ -8357,11 +8357,11 @@ _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) return __W; } -#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\ +#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ -(__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) @@ -8373,11 +8373,11 @@ _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) return __A; } -#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), \ -(__v4sf)(__m128)(C), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) @@ -8389,11 +8389,11 @@ _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) return __Y; } -#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\ +#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \ (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ -(__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -8405,11 +8405,11 @@ _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) return __W; } -#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\ +#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) @@ -8421,11 +8421,11 @@ _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) return __A; } -#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(C), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) @@ -8437,11 +8437,11 @@ _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) return __Y; } -#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\ +#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -8453,11 +8453,11 @@ _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) return __W; } -#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\ +#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) @@ -8469,11 +8469,11 @@ _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) return __A; } -#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ -(__v2df)(__m128d)(C), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) @@ -8485,11 +8485,11 @@ _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) return __Y; } -#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\ +#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \ (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -8501,11 +8501,11 @@ _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) return __W; } -#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\ +#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ -(__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) @@ -8517,11 +8517,11 @@ _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) return __A; } -#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), \ (__v2df)(__m128d)(C), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) @@ -8533,11 +8533,11 @@ _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) return __Y; } -#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\ +#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ -(__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -8549,11 +8549,11 @@ _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) return __W; } -#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\ +#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ -(__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) @@ -8565,12 +8565,12 @@ _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) return __A; } -#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\ +#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), \ -(__v2df)(__m128d)(C), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) @@ -8582,13 +8582,13 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) return __Y; } -#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\ +#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \ (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ -(__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_permutex_pd(X, C) __extension__ ({ \ +#define _mm512_permutex_pd(X, C) \ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ (__v8df)_mm512_undefined_pd(), \ 0 + (((C) >> 0) & 0x3), \ @@ -8598,19 +8598,19 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 4 + (((C) >> 0) & 0x3), \ 4 + (((C) >> 2) & 0x3), \ 4 + (((C) >> 4) & 0x3), \ - 4 + (((C) >> 6) & 0x3)); }) + 4 + (((C) >> 6) & 0x3)) -#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \ +#define _mm512_mask_permutex_pd(W, U, X, C) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permutex_pd((X), (C)), \ - (__v8df)(__m512d)(W)); }) + (__v8df)(__m512d)(W)) -#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \ +#define _mm512_maskz_permutex_pd(U, X, C) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permutex_pd((X), (C)), \ - (__v8df)_mm512_setzero_pd()); }) + (__v8df)_mm512_setzero_pd()) -#define _mm512_permutex_epi64(X, C) __extension__ ({ \ +#define _mm512_permutex_epi64(X, C) \ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \ (__v8di)_mm512_undefined_epi32(), \ 0 + (((C) >> 0) & 0x3), \ @@ -8620,17 +8620,17 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 4 + (((C) >> 0) & 0x3), \ 4 + (((C) >> 2) & 0x3), \ 4 + (((C) >> 4) & 0x3), \ - 4 + (((C) >> 6) & 0x3)); }) + 4 + (((C) >> 6) & 0x3)) -#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ +#define _mm512_mask_permutex_epi64(W, U, X, C) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_permutex_epi64((X), (C)), \ - (__v8di)(__m512i)(W)); }) + (__v8di)(__m512i)(W)) -#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \ +#define _mm512_maskz_permutex_epi64(U, X, C) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_permutex_epi64((X), (C)), \ - (__v8di)_mm512_setzero_si512()); }) + (__v8di)_mm512_setzero_si512()) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_permutexvar_pd (__m512i __X, __m512d __Y) @@ -8870,49 +8870,49 @@ _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) (__mmask16) __U); } -#define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \ +#define _mm_cmp_round_ss_mask(X, Y, P, R) \ (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \ +#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \ +#define _mm_cmp_ss_mask(X, Y, P) \ (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \ +#define _mm_mask_cmp_ss_mask(M, X, Y, P) \ (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ (__mmask8)(M), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \ +#define _mm_cmp_round_sd_mask(X, Y, P, R) \ (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \ +#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ - (__mmask8)(M), (int)(R)); }) + (__mmask8)(M), (int)(R)) -#define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \ +#define _mm_cmp_sd_mask(X, Y, P) \ (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) -#define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \ +#define _mm_mask_cmp_sd_mask(M, X, Y, P) \ (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ (__mmask8)(M), \ - _MM_FROUND_CUR_DIRECTION); }) + _MM_FROUND_CUR_DIRECTION) /* Bit Test */ @@ -9098,7 +9098,7 @@ _mm_maskz_load_sd (__mmask8 __U, const double* __A) __U & 1); } -#define _mm512_shuffle_epi32(A, I) __extension__ ({ \ +#define _mm512_shuffle_epi32(A, I) \ (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ (__v16si)_mm512_undefined_epi32(), \ 0 + (((I) >> 0) & 0x3), \ @@ -9116,17 +9116,17 @@ _mm_maskz_load_sd (__mmask8 __U, const double* __A) 12 + (((I) >> 0) & 0x3), \ 12 + (((I) >> 2) & 0x3), \ 12 + (((I) >> 4) & 0x3), \ - 12 + (((I) >> 6) & 0x3)); }) + 12 + (((I) >> 6) & 0x3)) -#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ +#define _mm512_mask_shuffle_epi32(W, U, A, I) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_epi32((A), (I)), \ - (__v16si)(__m512i)(W)); }) + (__v16si)(__m512i)(W)) -#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \ +#define _mm512_maskz_shuffle_epi32(U, A, I) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_epi32((A), (I)), \ - (__v16si)_mm512_setzero_si512()); }) + (__v16si)_mm512_setzero_si512()) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) @@ -9256,20 +9256,20 @@ _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) (__mmask16) __U); } -#define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \ +#define _mm512_cvt_roundps_pd(A, R) \ (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \ +#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \ (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \ +#define _mm512_maskz_cvt_roundps_pd(U, A, R) \ (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtps_pd (__m256 __A) @@ -9365,23 +9365,23 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) (__mmask16) __U); } -#define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundsd_ss(A, B, R) \ (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)_mm_undefined_ps(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) @@ -9413,48 +9413,48 @@ _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) #endif #ifdef __x86_64__ -#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundi64_sd(A, B, R) \ (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ - (int)(R)); }) + (int)(R)) -#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundsi64_sd(A, B, R) \ (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ - (int)(R)); }) + (int)(R)) #endif -#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \ - (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) +#define _mm_cvt_roundsi32_ss(A, B, R) \ + (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) -#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \ - (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) +#define _mm_cvt_roundi32_ss(A, B, R) \ + (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) #ifdef __x86_64__ -#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundsi64_ss(A, B, R) \ (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ - (int)(R)); }) + (int)(R)) -#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundi64_ss(A, B, R) \ (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ - (int)(R)); }) + (int)(R)) #endif -#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundss_sd(A, B, R) \ (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)_mm_undefined_pd(), \ - (__mmask8)-1, (int)(R)); }) + (__mmask8)-1, (int)(R)) -#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \ +#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) -#define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \ +#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)); }) + (__mmask8)(U), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) @@ -9482,9 +9482,9 @@ _mm_cvtu32_sd (__m128d __A, unsigned __B) } #ifdef __x86_64__ -#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundu64_sd(A, B, R) \ (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ - (unsigned long long)(B), (int)(R)); }) + (unsigned long long)(B), (int)(R)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtu64_sd (__m128d __A, unsigned long long __B) @@ -9494,9 +9494,9 @@ _mm_cvtu64_sd (__m128d __A, unsigned long long __B) } #endif -#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundu32_ss(A, B, R) \ (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ - (int)(R)); }) + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtu32_ss (__m128 __A, unsigned __B) @@ -9506,9 +9506,9 @@ _mm_cvtu32_ss (__m128 __A, unsigned __B) } #ifdef __x86_64__ -#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \ +#define _mm_cvt_roundu64_ss(A, B, R) \ (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ - (unsigned long long)(B), (int)(R)); }) + (unsigned long long)(B), (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtu64_ss (__m128 __A, unsigned long long __B) diff --git a/clang/lib/Headers/avx512pfintrin.h b/clang/lib/Headers/avx512pfintrin.h index 28fb012f2f5c..5b8260b77c63 100644 --- a/clang/lib/Headers/avx512pfintrin.h +++ b/clang/lib/Headers/avx512pfintrin.h @@ -31,80 +31,80 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf"))) -#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ (long long const *)(addr), (int)(scale), \ - (int)(hint)); }) - -#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) __extension__ ({\ + (int)(hint)) + +#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \ __builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \ (long long const *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) ({\ +#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfdps((__mmask16)(mask), \ (__v16si)(__m512i)(index), (int const *)(addr), \ - (int)(scale), (int)(hint)); }) + (int)(scale), (int)(hint)) -#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) ({\ +#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \ __builtin_ia32_gatherpfdps((__mmask16) -1, \ (__v16si)(__m512i)(index), (int const *)(addr), \ - (int)(scale), (int)(hint)); }) + (int)(scale), (int)(hint)) -#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \ (long long const *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) __extension__ ({\ +#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \ __builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \ (long long const *)(addr), (int)(scale), \ - (int)(hint)); }) - -#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) ({\ + (int)(hint)) + +#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ - (int const *)(addr), (int)(scale), (int)(hint)); }) + (int const *)(addr), (int)(scale), (int)(hint)) -#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) ({\ +#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \ __builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \ - (int const *)(addr), (int)(scale), (int)(hint)); }) + (int const *)(addr), (int)(scale), (int)(hint)) -#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) __extension__ ({\ +#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \ __builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \ (long long *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ (long long *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) __extension__ ({\ +#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \ __builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \ - (int *)(addr), (int)(scale), (int)(hint)); }) + (int *)(addr), (int)(scale), (int)(hint)) -#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfdps((__mmask16)(mask), \ (__v16si)(__m512i)(index), (int *)(addr), \ - (int)(scale), (int)(hint)); }) + (int)(scale), (int)(hint)) -#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) __extension__ ({\ +#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \ __builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \ (long long *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \ (long long *)(addr), (int)(scale), \ - (int)(hint)); }) + (int)(hint)) -#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) __extension__ ({\ +#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \ __builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \ - (int *)(addr), (int)(scale), (int)(hint)); }) + (int *)(addr), (int)(scale), (int)(hint)) -#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) __extension__ ({\ +#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ - (int *)(addr), (int)(scale), (int)(hint)); }) + (int *)(addr), (int)(scale), (int)(hint)) #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h index 585b4566f007..a7d957edec4e 100644 --- a/clang/lib/Headers/avx512vbmi2intrin.h +++ b/clang/lib/Headers/avx512vbmi2intrin.h @@ -142,12 +142,12 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) __U); } -#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ +#define _mm512_mask_shldi_epi64(S, U, A, B, I) \ (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \ (__v8di)(B), \ (int)(I), \ (__v8di)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm512_maskz_shldi_epi64(U, A, B, I) \ _mm512_mask_shldi_epi64(_mm512_setzero_si512(), (U), (A), (B), (I)) @@ -155,12 +155,12 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) #define _mm512_shldi_epi64(A, B, I) \ _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) -#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ +#define _mm512_mask_shldi_epi32(S, U, A, B, I) \ (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \ (__v16si)(B), \ (int)(I), \ (__v16si)(S), \ - (__mmask16)(U)); }) + (__mmask16)(U)) #define _mm512_maskz_shldi_epi32(U, A, B, I) \ _mm512_mask_shldi_epi32(_mm512_setzero_si512(), (U), (A), (B), (I)) @@ -168,12 +168,12 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) #define _mm512_shldi_epi32(A, B, I) \ _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) -#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ +#define _mm512_mask_shldi_epi16(S, U, A, B, I) \ (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \ (__v32hi)(B), \ (int)(I), \ (__v32hi)(S), \ - (__mmask32)(U)); }) + (__mmask32)(U)) #define _mm512_maskz_shldi_epi16(U, A, B, I) \ _mm512_mask_shldi_epi16(_mm512_setzero_si512(), (U), (A), (B), (I)) @@ -181,12 +181,12 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) #define _mm512_shldi_epi16(A, B, I) \ _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) -#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ +#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \ (__v8di)(B), \ (int)(I), \ (__v8di)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm512_maskz_shrdi_epi64(U, A, B, I) \ _mm512_mask_shrdi_epi64(_mm512_setzero_si512(), (U), (A), (B), (I)) @@ -194,12 +194,12 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) #define _mm512_shrdi_epi64(A, B, I) \ _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) -#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ +#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \ (__v16si)(B), \ (int)(I), \ (__v16si)(S), \ - (__mmask16)(U)); }) + (__mmask16)(U)) #define _mm512_maskz_shrdi_epi32(U, A, B, I) \ _mm512_mask_shrdi_epi32(_mm512_setzero_si512(), (U), (A), (B), (I)) @@ -207,12 +207,12 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) #define _mm512_shrdi_epi32(A, B, I) \ _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) -#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ +#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \ (__v32hi)(B), \ (int)(I), \ (__v32hi)(S), \ - (__mmask32)(U)); }) + (__mmask32)(U)) #define _mm512_maskz_shrdi_epi16(U, A, B, I) \ _mm512_mask_shrdi_epi16(_mm512_setzero_si512(), (U), (A), (B), (I)) diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index a18e2c98a489..a04ef6f17c36 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -33,85 +33,85 @@ /* Integer compare */ -#define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epi8_mask(a, b, p) \ (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epi8_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) -#define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epu8_mask(a, b, p) \ (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epu8_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) -#define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epi8_mask(a, b, p) \ (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)-1); }) + (__mmask32)-1) -#define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \ (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)(m)); }) + (__mmask32)(m)) -#define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epu8_mask(a, b, p) \ (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)-1); }) + (__mmask32)-1) -#define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \ (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)(m)); }) + (__mmask32)(m)) -#define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epi16_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epi16_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epu16_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epu16_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epi16_mask(a, b, p) \ (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) -#define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epu16_mask(a, b, p) \ (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \ (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)(m)); }) + (__mmask16)(m)) #define _mm_cmpeq_epi8_mask(A, B) \ _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) @@ -1833,47 +1833,47 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) } -#define _mm_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ +#define _mm_mask_shufflehi_epi16(W, U, A, imm) \ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ - (__v8hi)(__m128i)(W)); }) + (__v8hi)(__m128i)(W)) -#define _mm_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ +#define _mm_maskz_shufflehi_epi16(U, A, imm) \ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ - (__v8hi)_mm_setzero_si128()); }) + (__v8hi)_mm_setzero_si128()) -#define _mm256_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ - (__v16hi)(__m256i)(W)); }) + (__v16hi)(__m256i)(W)) -#define _mm256_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_shufflehi_epi16(U, A, imm) \ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ - (__v16hi)_mm256_setzero_si256()); }) + (__v16hi)_mm256_setzero_si256()) -#define _mm_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ +#define _mm_mask_shufflelo_epi16(W, U, A, imm) \ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ - (__v8hi)(__m128i)(W)); }) + (__v8hi)(__m128i)(W)) -#define _mm_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ +#define _mm_maskz_shufflelo_epi16(U, A, imm) \ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ - (__v8hi)_mm_setzero_si128()); }) + (__v8hi)_mm_setzero_si128()) -#define _mm256_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflelo_epi16((A), \ (imm)), \ - (__v16hi)(__m256i)(W)); }) + (__v16hi)(__m256i)(W)) -#define _mm256_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_shufflelo_epi16(U, A, imm) \ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflelo_epi16((A), \ (imm)), \ - (__v16hi)_mm256_setzero_si256()); }) + (__v16hi)_mm256_setzero_si256()) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sllv_epi16(__m256i __A, __m256i __B) @@ -2693,61 +2693,61 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, (__v16hi)__W); } -#define _mm_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \ +#define _mm_mask_alignr_epi8(W, U, A, B, N) \ (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ - (__v16qi)(__m128i)(W)); }) + (__v16qi)(__m128i)(W)) -#define _mm_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \ +#define _mm_maskz_alignr_epi8(U, A, B, N) \ (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ - (__v16qi)_mm_setzero_si128()); }) + (__v16qi)_mm_setzero_si128()) -#define _mm256_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \ +#define _mm256_mask_alignr_epi8(W, U, A, B, N) \ (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ - (__v32qi)(__m256i)(W)); }) + (__v32qi)(__m256i)(W)) -#define _mm256_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \ +#define _mm256_maskz_alignr_epi8(U, A, B, N) \ (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ - (__v32qi)_mm256_setzero_si256()); }) + (__v32qi)_mm256_setzero_si256()) -#define _mm_dbsad_epu8(A, B, imm) __extension__ ({ \ +#define _mm_dbsad_epu8(A, B, imm) \ (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(imm), \ (__v8hi)_mm_setzero_si128(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \ +#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \ (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(imm), \ (__v8hi)(__m128i)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \ +#define _mm_maskz_dbsad_epu8(U, A, B, imm) \ (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(imm), \ (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_dbsad_epu8(A, B, imm) __extension__ ({ \ +#define _mm256_dbsad_epu8(A, B, imm) \ (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), (int)(imm), \ (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)-1); }) + (__mmask16)-1) -#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \ (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), (int)(imm), \ (__v16hi)(__m256i)(W), \ - (__mmask16)(U)); }) + (__mmask16)(U)) -#define _mm256_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \ (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), (int)(imm), \ (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(U)); }) + (__mmask16)(U)) #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index 162867de5476..53beef2abda1 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -789,135 +789,135 @@ _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { (__mmask8) __U); } -#define _mm_range_pd(A, B, C) __extension__ ({ \ +#define _mm_range_pd(A, B, C) \ (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({ \ +#define _mm_mask_range_pd(W, U, A, B, C) \ (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_range_pd(U, A, B, C) __extension__ ({ \ +#define _mm_maskz_range_pd(U, A, B, C) \ (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_range_pd(A, B, C) __extension__ ({ \ +#define _mm256_range_pd(A, B, C) \ (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({ \ +#define _mm256_mask_range_pd(W, U, A, B, C) \ (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({ \ +#define _mm256_maskz_range_pd(U, A, B, C) \ (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_range_ps(A, B, C) __extension__ ({ \ +#define _mm_range_ps(A, B, C) \ (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm_mask_range_ps(W, U, A, B, C) \ (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ - (__v4sf)(__m128)(W), (__mmask8)(U)); }) + (__v4sf)(__m128)(W), (__mmask8)(U)) -#define _mm_maskz_range_ps(U, A, B, C) __extension__ ({ \ +#define _mm_maskz_range_ps(U, A, B, C) \ (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_range_ps(A, B, C) __extension__ ({ \ +#define _mm256_range_ps(A, B, C) \ (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm256_mask_range_ps(W, U, A, B, C) \ (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)(__m256)(W), (__mmask8)(U)); }) + (__v8sf)(__m256)(W), (__mmask8)(U)) -#define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({ \ +#define _mm256_maskz_range_ps(U, A, B, C) \ (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_reduce_pd(A, B) __extension__ ({ \ +#define _mm_reduce_pd(A, B) \ (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \ +#define _mm_mask_reduce_pd(W, U, A, B) \ (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_reduce_pd(U, A, B) __extension__ ({ \ +#define _mm_maskz_reduce_pd(U, A, B) \ (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_reduce_pd(A, B) __extension__ ({ \ +#define _mm256_reduce_pd(A, B) \ (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \ +#define _mm256_mask_reduce_pd(W, U, A, B) \ (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({ \ +#define _mm256_maskz_reduce_pd(U, A, B) \ (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_reduce_ps(A, B) __extension__ ({ \ +#define _mm_reduce_ps(A, B) \ (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({ \ +#define _mm_mask_reduce_ps(W, U, A, B) \ (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_reduce_ps(U, A, B) __extension__ ({ \ +#define _mm_maskz_reduce_ps(U, A, B) \ (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_reduce_ps(A, B) __extension__ ({ \ +#define _mm256_reduce_ps(A, B) \ (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \ +#define _mm256_mask_reduce_ps(W, U, A, B) \ (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)(__m256)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({ \ +#define _mm256_maskz_reduce_ps(U, A, B) \ (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_movepi32_mask (__m128i __A) @@ -1085,105 +1085,105 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) (__v4di)_mm256_setzero_si256()); } -#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \ +#define _mm256_extractf64x2_pd(A, imm) \ (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \ (__v4df)_mm256_undefined_pd(), \ ((imm) & 1) ? 2 : 0, \ - ((imm) & 1) ? 3 : 1); }) + ((imm) & 1) ? 3 : 1) -#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ - (__v2df)(W)); }) + (__v2df)(W)) -#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_extractf64x2_pd(U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ - (__v2df)_mm_setzero_pd()); }) + (__v2df)_mm_setzero_pd()) -#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \ +#define _mm256_extracti64x2_epi64(A, imm) \ (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \ (__v4di)_mm256_undefined_si256(), \ ((imm) & 1) ? 2 : 0, \ - ((imm) & 1) ? 3 : 1); }) + ((imm) & 1) ? 3 : 1) -#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ - (__v2di)(W)); }) + (__v2di)(W)) -#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ - (__v2di)_mm_setzero_si128()); }) + (__v2di)_mm_setzero_si128()) -#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \ +#define _mm256_insertf64x2(A, B, imm) \ (__m256d)__builtin_shufflevector((__v4df)(A), \ (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \ ((imm) & 0x1) ? 0 : 4, \ ((imm) & 0x1) ? 1 : 5, \ ((imm) & 0x1) ? 4 : 2, \ - ((imm) & 0x1) ? 5 : 3); }) + ((imm) & 0x1) ? 5 : 3) -#define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_insertf64x2(W, U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ - (__v4df)(W)); }) + (__v4df)(W)) -#define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_insertf64x2(U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm256_inserti64x2(A, B, imm) __extension__ ({ \ +#define _mm256_inserti64x2(A, B, imm) \ (__m256i)__builtin_shufflevector((__v4di)(A), \ (__v4di)_mm256_castsi128_si256((__m128i)(B)), \ ((imm) & 0x1) ? 0 : 4, \ ((imm) & 0x1) ? 1 : 5, \ ((imm) & 0x1) ? 4 : 2, \ - ((imm) & 0x1) ? 5 : 3); }) + ((imm) & 0x1) ? 5 : 3) -#define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_inserti64x2(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ - (__v4di)(W)); }) + (__v4di)(W)) -#define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_inserti64x2(U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ - (__v4di)_mm256_setzero_si256()); }) + (__v4di)_mm256_setzero_si256()) -#define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ +#define _mm_mask_fpclass_pd_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \ +#define _mm_fpclass_pd_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ +#define _mm256_mask_fpclass_pd_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \ +#define _mm256_fpclass_pd_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ +#define _mm_mask_fpclass_ps_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \ +#define _mm_fpclass_ps_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ +#define _mm256_mask_fpclass_ps_mask(U, A, imm) \ (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \ +#define _mm256_fpclass_ps_mask(A, imm) \ (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 23bc89a65d78..cc091e6c361c 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -684,125 +684,125 @@ _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); } -#define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epi32_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epi32_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epu32_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epu32_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epi32_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epu32_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epi64_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epi64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_epu64_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_epu64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epi64_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_epu64_mask(a, b, p) \ (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_ps_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_ps_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \ +#define _mm256_cmp_pd_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ +#define _mm256_mask_cmp_pd_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_ps_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_ps_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) -#define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \ +#define _mm_cmp_pd_mask(a, b, p) \ (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ (__v2df)(__m128d)(b), (int)(p), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ +#define _mm_mask_cmp_pd_mask(m, a, b, p) \ (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ (__v2df)(__m128d)(b), (int)(p), \ - (__mmask8)(m)); }) + (__mmask8)(m)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) @@ -3202,79 +3202,79 @@ _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { (__v4di)_mm256_setzero_si256()); } -#define _mm_roundscale_pd(A, imm) __extension__ ({ \ +#define _mm_roundscale_pd(A, imm) \ (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ (int)(imm), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ +#define _mm_mask_roundscale_pd(W, U, A, imm) \ (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ (int)(imm), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ +#define _mm_maskz_roundscale_pd(U, A, imm) \ (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ (int)(imm), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_roundscale_pd(A, imm) __extension__ ({ \ +#define _mm256_roundscale_pd(A, imm) \ (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_roundscale_pd(W, U, A, imm) \ (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_roundscale_pd(U, A, imm) \ (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_roundscale_ps(A, imm) __extension__ ({ \ +#define _mm_roundscale_ps(A, imm) \ (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ +#define _mm_mask_roundscale_ps(W, U, A, imm) \ (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ +#define _mm_maskz_roundscale_ps(U, A, imm) \ (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_roundscale_ps(A, imm) __extension__ ({ \ +#define _mm256_roundscale_ps(A, imm) \ (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_roundscale_ps(W, U, A, imm) \ (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__v8sf)(__m256)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_roundscale_ps(U, A, imm) \ (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_scalef_pd (__m128d __A, __m128d __B) { @@ -3383,889 +3383,889 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__mmask8) __U); } -#define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ +#define _mm_i64scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \ (__v2di)(__m128i)(index), \ - (__v2df)(__m128d)(v1), (int)(scale)); }) + (__v2df)(__m128d)(v1), (int)(scale)) -#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ +#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \ (__v2di)(__m128i)(index), \ - (__v2df)(__m128d)(v1), (int)(scale)); }) + (__v2df)(__m128d)(v1), (int)(scale)) -#define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ +#define _mm_i64scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \ (__v2di)(__m128i)(index), \ - (__v2di)(__m128i)(v1), (int)(scale)); }) + (__v2di)(__m128i)(v1), (int)(scale)) -#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ +#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \ (__v2di)(__m128i)(index), \ - (__v2di)(__m128i)(v1), (int)(scale)); }) + (__v2di)(__m128i)(v1), (int)(scale)) -#define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ +#define _mm256_i64scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \ (__v4di)(__m256i)(index), \ - (__v4df)(__m256d)(v1), (int)(scale)); }) + (__v4df)(__m256d)(v1), (int)(scale)) -#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ +#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \ (__v4di)(__m256i)(index), \ - (__v4df)(__m256d)(v1), (int)(scale)); }) + (__v4df)(__m256d)(v1), (int)(scale)) -#define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ +#define _mm256_i64scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \ (__v4di)(__m256i)(index), \ - (__v4di)(__m256i)(v1), (int)(scale)); }) + (__v4di)(__m256i)(v1), (int)(scale)) -#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ +#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \ (__v4di)(__m256i)(index), \ - (__v4di)(__m256i)(v1), (int)(scale)); }) + (__v4di)(__m256i)(v1), (int)(scale)) -#define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ +#define _mm_i64scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \ (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) + (int)(scale)) -#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ +#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \ (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) + (int)(scale)) -#define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ +#define _mm_i64scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \ (__v2di)(__m128i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) + (__v4si)(__m128i)(v1), (int)(scale)) -#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ +#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \ (__v2di)(__m128i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) + (__v4si)(__m128i)(v1), (int)(scale)) -#define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ +#define _mm256_i64scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \ (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) + (int)(scale)) -#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ +#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \ (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) + (int)(scale)) -#define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ +#define _mm256_i64scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \ (__v4di)(__m256i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) + (__v4si)(__m128i)(v1), (int)(scale)) -#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ +#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \ (__v4di)(__m256i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) + (__v4si)(__m128i)(v1), (int)(scale)) -#define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ +#define _mm_i32scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \ (__v4si)(__m128i)(index), \ - (__v2df)(__m128d)(v1), (int)(scale)); }) - -#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), \ - (__v2df)(__m128d)(v1), (int)(scale)); }) - -#define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), \ - (__v2di)(__m128i)(v1), (int)(scale)); }) - -#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), \ - (__v2di)(__m128i)(v1), (int)(scale)); }) - -#define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), \ - (__v4df)(__m256d)(v1), (int)(scale)); }) - -#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), \ - (__v4df)(__m256d)(v1), (int)(scale)); }) - -#define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), \ - (__v4di)(__m256i)(v1), (int)(scale)); }) - -#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), \ - (__v4di)(__m256i)(v1), (int)(scale)); }) - -#define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) - -#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ - (int)(scale)); }) - -#define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \ - (__v4si)(__m128i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) - -#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \ - (__v4si)(__m128i)(index), \ - (__v4si)(__m128i)(v1), (int)(scale)); }) - -#define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \ - (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ - (int)(scale)); }) - -#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \ - (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ - (int)(scale)); }) - -#define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \ - (__v8si)(__m256i)(index), \ - (__v8si)(__m256i)(v1), (int)(scale)); }) - -#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ - __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \ - (__v8si)(__m256i)(index), \ - (__v8si)(__m256i)(v1), (int)(scale)); }) - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sqrt_pd(__A), - (__v2df)__W); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sqrt_pd(__A), - (__v2df)_mm_setzero_pd()); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sqrt_pd(__A), - (__v4df)__W); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sqrt_pd(__A), - (__v4df)_mm256_setzero_pd()); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sqrt_ps(__A), - (__v4sf)__W); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sqrt_ps(__A), - (__v4sf)_mm_setzero_pd()); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sqrt_ps(__A), - (__v8sf)__W); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sqrt_ps(__A), - (__v8sf)_mm256_setzero_ps()); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sub_pd(__A, __B), - (__v2df)__W); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sub_pd(__A, __B), - (__v2df)_mm_setzero_pd()); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sub_pd(__A, __B), - (__v4df)__W); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sub_pd(__A, __B), - (__v4df)_mm256_setzero_pd()); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sub_ps(__A, __B), - (__v4sf)__W); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sub_ps(__A, __B), - (__v4sf)_mm_setzero_ps()); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sub_ps(__A, __B), - (__v8sf)__W); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sub_ps(__A, __B), - (__v8sf)_mm256_setzero_ps()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { - return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, - (__v4si)__B); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, - __m128i __B) { - return (__m128i)__builtin_ia32_selectd_128(__U, - (__v4si)_mm_permutex2var_epi32(__A, __I, __B), - (__v4si)__A); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, - __m128i __B) { - return (__m128i)__builtin_ia32_selectd_128(__U, - (__v4si)_mm_permutex2var_epi32(__A, __I, __B), - (__v4si)__I); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, - __m128i __B) { - return (__m128i)__builtin_ia32_selectd_128(__U, - (__v4si)_mm_permutex2var_epi32(__A, __I, __B), - (__v4si)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { - return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, - (__v8si) __B); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, - __m256i __B) { - return (__m256i)__builtin_ia32_selectd_256(__U, - (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), - (__v8si)__A); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, - __m256i __B) { - return (__m256i)__builtin_ia32_selectd_256(__U, - (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), - (__v8si)__I); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, - __m256i __B) { - return (__m256i)__builtin_ia32_selectd_256(__U, - (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), - (__v8si)_mm256_setzero_si256()); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { - return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, - (__v2df)__B); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { - return (__m128d)__builtin_ia32_selectpd_128(__U, - (__v2df)_mm_permutex2var_pd(__A, __I, __B), - (__v2df)__A); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { - return (__m128d)__builtin_ia32_selectpd_128(__U, - (__v2df)_mm_permutex2var_pd(__A, __I, __B), - (__v2df)(__m128d)__I); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { - return (__m128d)__builtin_ia32_selectpd_128(__U, - (__v2df)_mm_permutex2var_pd(__A, __I, __B), - (__v2df)_mm_setzero_pd()); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { - return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, - (__v4df)__B); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, - __m256d __B) { - return (__m256d)__builtin_ia32_selectpd_256(__U, - (__v4df)_mm256_permutex2var_pd(__A, __I, __B), - (__v4df)__A); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, - __m256d __B) { - return (__m256d)__builtin_ia32_selectpd_256(__U, - (__v4df)_mm256_permutex2var_pd(__A, __I, __B), - (__v4df)(__m256d)__I); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, - __m256d __B) { - return (__m256d)__builtin_ia32_selectpd_256(__U, - (__v4df)_mm256_permutex2var_pd(__A, __I, __B), - (__v4df)_mm256_setzero_pd()); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { - return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, - (__v4sf)__B); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { - return (__m128)__builtin_ia32_selectps_128(__U, - (__v4sf)_mm_permutex2var_ps(__A, __I, __B), - (__v4sf)__A); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { - return (__m128)__builtin_ia32_selectps_128(__U, - (__v4sf)_mm_permutex2var_ps(__A, __I, __B), - (__v4sf)(__m128)__I); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { - return (__m128)__builtin_ia32_selectps_128(__U, - (__v4sf)_mm_permutex2var_ps(__A, __I, __B), - (__v4sf)_mm_setzero_ps()); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { - return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, - (__v8sf) __B); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { - return (__m256)__builtin_ia32_selectps_256(__U, - (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), - (__v8sf)__A); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, - __m256 __B) { - return (__m256)__builtin_ia32_selectps_256(__U, - (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), - (__v8sf)(__m256)__I); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, - __m256 __B) { - return (__m256)__builtin_ia32_selectps_256(__U, - (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), - (__v8sf)_mm256_setzero_ps()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { - return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, - (__v2di)__B); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, - __m128i __B) { - return (__m128i)__builtin_ia32_selectq_128(__U, - (__v2di)_mm_permutex2var_epi64(__A, __I, __B), - (__v2di)__A); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, - __m128i __B) { - return (__m128i)__builtin_ia32_selectq_128(__U, - (__v2di)_mm_permutex2var_epi64(__A, __I, __B), - (__v2di)__I); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, - __m128i __B) { - return (__m128i)__builtin_ia32_selectq_128(__U, - (__v2di)_mm_permutex2var_epi64(__A, __I, __B), - (__v2di)_mm_setzero_si128()); -} - - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { - return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, - (__v4di) __B); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, - __m256i __B) { - return (__m256i)__builtin_ia32_selectq_256(__U, - (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), - (__v4di)__A); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, - __m256i __B) { - return (__m256i)__builtin_ia32_selectq_256(__U, - (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), - (__v4di)__I); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, - __m256i __B) { - return (__m256i)__builtin_ia32_selectq_256(__U, - (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), - (__v4di)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepi8_epi32(__A), - (__v4si)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepi8_epi32(__A), - (__v4si)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepi8_epi32(__A), - (__v8si)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepi8_epi32(__A), - (__v8si)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi8_epi64(__A), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi8_epi64(__A), - (__v2di)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi8_epi64(__A), - (__v4di)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi8_epi64(__A), - (__v4di)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi32_epi64(__X), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi32_epi64(__X), - (__v2di)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi32_epi64(__X), - (__v4di)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi32_epi64(__X), - (__v4di)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepi16_epi32(__A), - (__v4si)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepi16_epi32(__A), - (__v4si)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepi16_epi32(__A), - (__v8si)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepi16_epi32(__A), - (__v8si)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi16_epi64(__A), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepi16_epi64(__A), - (__v2di)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi16_epi64(__A), - (__v4di)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepi16_epi64(__A), - (__v4di)_mm256_setzero_si256()); -} - - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepu8_epi32(__A), - (__v4si)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepu8_epi32(__A), - (__v4si)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepu8_epi32(__A), - (__v8si)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepu8_epi32(__A), - (__v8si)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu8_epi64(__A), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu8_epi64(__A), - (__v2di)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu8_epi64(__A), - (__v4di)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu8_epi64(__A), - (__v4di)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu32_epi64(__X), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu32_epi64(__X), - (__v2di)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu32_epi64(__X), - (__v4di)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu32_epi64(__X), - (__v4di)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepu16_epi32(__A), - (__v4si)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_cvtepu16_epi32(__A), - (__v4si)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepu16_epi32(__A), - (__v8si)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_cvtepu16_epi32(__A), - (__v8si)_mm256_setzero_si256()); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu16_epi64(__A), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_cvtepu16_epi64(__A), - (__v2di)_mm_setzero_si128()); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu16_epi64(__A), - (__v4di)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_cvtepu16_epi64(__A), - (__v4di)_mm256_setzero_si256()); -} - - -#define _mm_rol_epi32(a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)-1); }) - -#define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ - (__v4si)(__m128i)(w), (__mmask8)(u)); }) - -#define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(u)); }) - -#define _mm256_rol_epi32(a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1); }) - -#define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ - (__v8si)(__m256i)(w), (__mmask8)(u)); }) - -#define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(u)); }) - -#define _mm_rol_epi64(a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ - (__v2di)_mm_setzero_si128(), \ - (__mmask8)-1); }) - -#define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ - (__v2di)(__m128i)(w), (__mmask8)(u)); }) - -#define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\ - (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ - (__v2di)_mm_setzero_si128(), \ - (__mmask8)(u)); }) - -#define _mm256_rol_epi64(a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)-1); }) - -#define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ - (__v4di)(__m256i)(w), (__mmask8)(u)); }) - -#define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\ - (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(u)); }) + (__v2df)(__m128d)(v1), (int)(scale)) + +#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), \ + (__v2df)(__m128d)(v1), (int)(scale)) + +#define _mm_i32scatter_epi64(addr, index, v1, scale) \ + __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), \ + (__v2di)(__m128i)(v1), (int)(scale)) + +#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), \ + (__v2di)(__m128i)(v1), (int)(scale)) + +#define _mm256_i32scatter_pd(addr, index, v1, scale) \ + __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), \ + (__v4df)(__m256d)(v1), (int)(scale)) + +#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), \ + (__v4df)(__m256d)(v1), (int)(scale)) + +#define _mm256_i32scatter_epi64(addr, index, v1, scale) \ + __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), \ + (__v4di)(__m256i)(v1), (int)(scale)) + +#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), \ + (__v4di)(__m256i)(v1), (int)(scale)) + +#define _mm_i32scatter_ps(addr, index, v1, scale) \ + __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ + (int)(scale)) + +#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ + (int)(scale)) + +#define _mm_i32scatter_epi32(addr, index, v1, scale) \ + __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \ + (__v4si)(__m128i)(index), \ + (__v4si)(__m128i)(v1), (int)(scale)) + +#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \ + (__v4si)(__m128i)(index), \ + (__v4si)(__m128i)(v1), (int)(scale)) + +#define _mm256_i32scatter_ps(addr, index, v1, scale) \ + __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \ + (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ + (int)(scale)) + +#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \ + (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ + (int)(scale)) + +#define _mm256_i32scatter_epi32(addr, index, v1, scale) \ + __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \ + (__v8si)(__m256i)(index), \ + (__v8si)(__m256i)(v1), (int)(scale)) + +#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ + __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \ + (__v8si)(__m256i)(index), \ + (__v8si)(__m256i)(v1), (int)(scale)) + + static __inline__ __m128d __DEFAULT_FN_ATTRS + _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_sqrt_pd(__A), + (__v2df)__W); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS + _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_sqrt_pd(__A), + (__v2df)_mm_setzero_pd()); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS + _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_sqrt_pd(__A), + (__v4df)__W); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS + _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_sqrt_pd(__A), + (__v4df)_mm256_setzero_pd()); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS + _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_sqrt_ps(__A), + (__v4sf)__W); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS + _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_sqrt_ps(__A), + (__v4sf)_mm_setzero_pd()); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS + _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_sqrt_ps(__A), + (__v8sf)__W); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS + _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_sqrt_ps(__A), + (__v8sf)_mm256_setzero_ps()); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS + _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_sub_pd(__A, __B), + (__v2df)__W); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS + _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_sub_pd(__A, __B), + (__v2df)_mm_setzero_pd()); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS + _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_sub_pd(__A, __B), + (__v4df)__W); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS + _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_sub_pd(__A, __B), + (__v4df)_mm256_setzero_pd()); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS + _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_sub_ps(__A, __B), + (__v4sf)__W); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS + _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_sub_ps(__A, __B), + (__v4sf)_mm_setzero_ps()); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS + _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_sub_ps(__A, __B), + (__v8sf)__W); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS + _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_sub_ps(__A, __B), + (__v8sf)_mm256_setzero_ps()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { + return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, + (__v4si)__B); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, + __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_permutex2var_epi32(__A, __I, __B), + (__v4si)__A); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, + __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_permutex2var_epi32(__A, __I, __B), + (__v4si)__I); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, + __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_permutex2var_epi32(__A, __I, __B), + (__v4si)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { + return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, + (__v8si) __B); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, + __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), + (__v8si)__A); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, + __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), + (__v8si)__I); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, + __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), + (__v8si)_mm256_setzero_si256()); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS + _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { + return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, + (__v2df)__B); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS + _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128(__U, + (__v2df)_mm_permutex2var_pd(__A, __I, __B), + (__v2df)__A); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS + _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128(__U, + (__v2df)_mm_permutex2var_pd(__A, __I, __B), + (__v2df)(__m128d)__I); + } + + static __inline__ __m128d __DEFAULT_FN_ATTRS + _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128(__U, + (__v2df)_mm_permutex2var_pd(__A, __I, __B), + (__v2df)_mm_setzero_pd()); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS + _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { + return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, + (__v4df)__B); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS + _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, + __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256(__U, + (__v4df)_mm256_permutex2var_pd(__A, __I, __B), + (__v4df)__A); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS + _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, + __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256(__U, + (__v4df)_mm256_permutex2var_pd(__A, __I, __B), + (__v4df)(__m256d)__I); + } + + static __inline__ __m256d __DEFAULT_FN_ATTRS + _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, + __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256(__U, + (__v4df)_mm256_permutex2var_pd(__A, __I, __B), + (__v4df)_mm256_setzero_pd()); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS + _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { + return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, + (__v4sf)__B); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS + _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128(__U, + (__v4sf)_mm_permutex2var_ps(__A, __I, __B), + (__v4sf)__A); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS + _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128(__U, + (__v4sf)_mm_permutex2var_ps(__A, __I, __B), + (__v4sf)(__m128)__I); + } + + static __inline__ __m128 __DEFAULT_FN_ATTRS + _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128(__U, + (__v4sf)_mm_permutex2var_ps(__A, __I, __B), + (__v4sf)_mm_setzero_ps()); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS + _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { + return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, + (__v8sf) __B); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS + _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256(__U, + (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), + (__v8sf)__A); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS + _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, + __m256 __B) { + return (__m256)__builtin_ia32_selectps_256(__U, + (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), + (__v8sf)(__m256)__I); + } + + static __inline__ __m256 __DEFAULT_FN_ATTRS + _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, + __m256 __B) { + return (__m256)__builtin_ia32_selectps_256(__U, + (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), + (__v8sf)_mm256_setzero_ps()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { + return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, + (__v2di)__B); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, + __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_permutex2var_epi64(__A, __I, __B), + (__v2di)__A); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, + __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_permutex2var_epi64(__A, __I, __B), + (__v2di)__I); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, + __m128i __B) { + return (__m128i)__builtin_ia32_selectq_128(__U, + (__v2di)_mm_permutex2var_epi64(__A, __I, __B), + (__v2di)_mm_setzero_si128()); + } + + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { + return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, + (__v4di) __B); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, + __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), + (__v4di)__A); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, + __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), + (__v4di)__I); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, + __m256i __B) { + return (__m256i)__builtin_ia32_selectq_256(__U, + (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), + (__v4di)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepi8_epi32(__A), + (__v4si)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepi8_epi32(__A), + (__v4si)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepi8_epi32(__A), + (__v8si)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepi8_epi32(__A), + (__v8si)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi8_epi64(__A), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi8_epi64(__A), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi8_epi64(__A), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi8_epi64(__A), + (__v4di)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi32_epi64(__X), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi32_epi64(__X), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi32_epi64(__X), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi32_epi64(__X), + (__v4di)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepi16_epi32(__A), + (__v4si)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepi16_epi32(__A), + (__v4si)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepi16_epi32(__A), + (__v8si)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepi16_epi32(__A), + (__v8si)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi16_epi64(__A), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepi16_epi64(__A), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi16_epi64(__A), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepi16_epi64(__A), + (__v4di)_mm256_setzero_si256()); + } + + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepu8_epi32(__A), + (__v4si)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepu8_epi32(__A), + (__v4si)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepu8_epi32(__A), + (__v8si)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepu8_epi32(__A), + (__v8si)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu8_epi64(__A), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu8_epi64(__A), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu8_epi64(__A), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu8_epi64(__A), + (__v4di)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu32_epi64(__X), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu32_epi64(__X), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu32_epi64(__X), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu32_epi64(__X), + (__v4di)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepu16_epi32(__A), + (__v4si)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_cvtepu16_epi32(__A), + (__v4si)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepu16_epi32(__A), + (__v8si)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_cvtepu16_epi32(__A), + (__v8si)_mm256_setzero_si256()); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu16_epi64(__A), + (__v2di)__W); + } + + static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) + { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_cvtepu16_epi64(__A), + (__v2di)_mm_setzero_si128()); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu16_epi64(__A), + (__v4di)__W); + } + + static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) + { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_cvtepu16_epi64(__A), + (__v4di)_mm256_setzero_si256()); + } + + +#define _mm_rol_epi32(a, b) \ + (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ + (__v4si)_mm_setzero_si128(), \ + (__mmask8)-1) + +#define _mm_mask_rol_epi32(w, u, a, b) \ + (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ + (__v4si)(__m128i)(w), (__mmask8)(u)) + +#define _mm_maskz_rol_epi32(u, a, b) \ + (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ + (__v4si)_mm_setzero_si128(), \ + (__mmask8)(u)) + +#define _mm256_rol_epi32(a, b) \ + (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)-1) + +#define _mm256_mask_rol_epi32(w, u, a, b) \ + (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ + (__v8si)(__m256i)(w), (__mmask8)(u)) + +#define _mm256_maskz_rol_epi32(u, a, b) \ + (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)(u)) + +#define _mm_rol_epi64(a, b) \ + (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ + (__v2di)_mm_setzero_si128(), \ + (__mmask8)-1) + +#define _mm_mask_rol_epi64(w, u, a, b) \ + (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ + (__v2di)(__m128i)(w), (__mmask8)(u)) + +#define _mm_maskz_rol_epi64(u, a, b) \ + (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ + (__v2di)_mm_setzero_si128(), \ + (__mmask8)(u)) + +#define _mm256_rol_epi64(a, b) \ + (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ + (__v4di)_mm256_setzero_si256(), \ + (__mmask8)-1) + +#define _mm256_mask_rol_epi64(w, u, a, b) \ + (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ + (__v4di)(__m256i)(w), (__mmask8)(u)) + +#define _mm256_maskz_rol_epi64(u, a, b) \ + (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ + (__v4di)_mm256_setzero_si256(), \ + (__mmask8)(u)) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rolv_epi32 (__m128i __A, __m128i __B) @@ -4387,61 +4387,61 @@ _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) (__mmask8) __U); } -#define _mm_ror_epi32(A, B) __extension__ ({ \ +#define _mm_ror_epi32(A, B) \ (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ (__v4si)_mm_setzero_si128(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \ +#define _mm_mask_ror_epi32(W, U, A, B) \ (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ - (__v4si)(__m128i)(W), (__mmask8)(U)); }) + (__v4si)(__m128i)(W), (__mmask8)(U)) -#define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \ +#define _mm_maskz_ror_epi32(U, A, B) \ (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_ror_epi32(A, B) __extension__ ({ \ +#define _mm256_ror_epi32(A, B) \ (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \ +#define _mm256_mask_ror_epi32(W, U, A, B) \ (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ - (__v8si)(__m256i)(W), (__mmask8)(U)); }) + (__v8si)(__m256i)(W), (__mmask8)(U)) -#define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \ +#define _mm256_maskz_ror_epi32(U, A, B) \ (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_ror_epi64(A, B) __extension__ ({ \ +#define _mm_ror_epi64(A, B) \ (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ (__v2di)_mm_setzero_si128(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \ +#define _mm_mask_ror_epi64(W, U, A, B) \ (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ - (__v2di)(__m128i)(W), (__mmask8)(U)); }) + (__v2di)(__m128i)(W), (__mmask8)(U)) -#define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \ +#define _mm_maskz_ror_epi64(U, A, B) \ (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ (__v2di)_mm_setzero_si128(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_ror_epi64(A, B) __extension__ ({ \ +#define _mm256_ror_epi64(A, B) \ (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ (__v4di)_mm256_setzero_si256(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \ +#define _mm256_mask_ror_epi64(W, U, A, B) \ (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ - (__v4di)(__m256i)(W), (__mmask8)(U)); }) + (__v4di)(__m256i)(W), (__mmask8)(U)) -#define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \ +#define _mm256_maskz_ror_epi64(U, A, B) \ (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) @@ -5293,77 +5293,77 @@ _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) (__v4di) _mm256_setzero_si256()); } -#define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \ +#define _mm_fixupimm_pd(A, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \ (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), \ - (int)(imm), (__mmask8)(U)); }) + (int)(imm), (__mmask8)(U)) -#define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \ +#define _mm256_fixupimm_pd(A, B, C, imm) \ (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ +#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \ (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ +#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \ (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), \ (__v4di)(__m256i)(C), \ - (int)(imm), (__mmask8)(U)); }) + (int)(imm), (__mmask8)(U)) -#define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \ +#define _mm_fixupimm_ps(A, B, C, imm) \ (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \ (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \ +#define _mm256_fixupimm_ps(A, B, C, imm) \ (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ +#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \ (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ +#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) @@ -5898,45 +5898,45 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) (__mmask8) __U); } -#define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \ +#define _mm_mask_permute_pd(W, U, X, C) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_permute_pd((X), (C)), \ - (__v2df)(__m128d)(W)); }) + (__v2df)(__m128d)(W)) -#define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \ +#define _mm_maskz_permute_pd(U, X, C) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_permute_pd((X), (C)), \ - (__v2df)_mm_setzero_pd()); }) + (__v2df)_mm_setzero_pd()) -#define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \ +#define _mm256_mask_permute_pd(W, U, X, C) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permute_pd((X), (C)), \ - (__v4df)(__m256d)(W)); }) + (__v4df)(__m256d)(W)) -#define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \ +#define _mm256_maskz_permute_pd(U, X, C) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permute_pd((X), (C)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \ +#define _mm_mask_permute_ps(W, U, X, C) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_permute_ps((X), (C)), \ - (__v4sf)(__m128)(W)); }) + (__v4sf)(__m128)(W)) -#define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \ +#define _mm_maskz_permute_ps(U, X, C) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_permute_ps((X), (C)), \ - (__v4sf)_mm_setzero_ps()); }) + (__v4sf)_mm_setzero_ps()) -#define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \ +#define _mm256_mask_permute_ps(W, U, X, C) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_permute_ps((X), (C)), \ - (__v8sf)(__m256)(W)); }) + (__v8sf)(__m256)(W)) -#define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \ +#define _mm256_maskz_permute_ps(U, X, C) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_permute_ps((X), (C)), \ - (__v8sf)_mm256_setzero_ps()); }) + (__v8sf)_mm256_setzero_ps()) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) @@ -6390,81 +6390,81 @@ _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) (__v4di)_mm256_setzero_si256()); } -#define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ +#define _mm_ternarylogic_epi32(A, B, C, imm) \ (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ +#define _mm256_ternarylogic_epi32(A, B, C, imm) \ (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ +#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ +#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ +#define _mm_ternarylogic_epi64(A, B, C, imm) \ (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ +#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ +#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ +#define _mm256_ternarylogic_epi64(A, B, C, imm) \ (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ +#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ +#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \ +#define _mm256_shuffle_f32x4(A, B, imm) \ (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ 0 + ((((imm) >> 0) & 0x1) * 4), \ @@ -6474,112 +6474,112 @@ _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) 8 + ((((imm) >> 1) & 0x1) * 4), \ 9 + ((((imm) >> 1) & 0x1) * 4), \ 10 + ((((imm) >> 1) & 0x1) * 4), \ - 11 + ((((imm) >> 1) & 0x1) * 4)); }) + 11 + ((((imm) >> 1) & 0x1) * 4)) -#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ - (__v8sf)(__m256)(W)); }) + (__v8sf)(__m256)(W)) -#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ - (__v8sf)_mm256_setzero_ps()); }) + (__v8sf)_mm256_setzero_ps()) -#define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \ +#define _mm256_shuffle_f64x2(A, B, imm) \ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), \ 0 + ((((imm) >> 0) & 0x1) * 2), \ 1 + ((((imm) >> 0) & 0x1) * 2), \ 4 + ((((imm) >> 1) & 0x1) * 2), \ - 5 + ((((imm) >> 1) & 0x1) * 2)); }) + 5 + ((((imm) >> 1) & 0x1) * 2)) -#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ - (__v4df)(__m256)(W)); }) + (__v4df)(__m256)(W)) -#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \ +#define _mm256_shuffle_i32x4(A, B, imm) \ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ 0 + ((((imm) >> 0) & 0x1) * 2), \ 1 + ((((imm) >> 0) & 0x1) * 2), \ 4 + ((((imm) >> 1) & 0x1) * 2), \ - 5 + ((((imm) >> 1) & 0x1) * 2)); }) + 5 + ((((imm) >> 1) & 0x1) * 2)) -#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ - (__v8si)(__m256)(W)); }) + (__v8si)(__m256)(W)) -#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ - (__v8si)_mm256_setzero_si256()); }) + (__v8si)_mm256_setzero_si256()) -#define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \ +#define _mm256_shuffle_i64x2(A, B, imm) \ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ 0 + ((((imm) >> 0) & 0x1) * 2), \ 1 + ((((imm) >> 0) & 0x1) * 2), \ 4 + ((((imm) >> 1) & 0x1) * 2), \ - 5 + ((((imm) >> 1) & 0x1) * 2)); }) + 5 + ((((imm) >> 1) & 0x1) * 2)) -#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ - (__v4di)(__m256)(W)); }) + (__v4di)(__m256)(W)) -#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ - (__v4di)_mm256_setzero_si256()); }) + (__v4di)_mm256_setzero_si256()) -#define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ +#define _mm_mask_shuffle_pd(W, U, A, B, M) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_shuffle_pd((A), (B), (M)), \ - (__v2df)(__m128d)(W)); }) + (__v2df)(__m128d)(W)) -#define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ +#define _mm_maskz_shuffle_pd(U, A, B, M) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_shuffle_pd((A), (B), (M)), \ - (__v2df)_mm_setzero_pd()); }) + (__v2df)_mm_setzero_pd()) -#define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ +#define _mm256_mask_shuffle_pd(W, U, A, B, M) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ - (__v4df)(__m256d)(W)); }) + (__v4df)(__m256d)(W)) -#define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ +#define _mm256_maskz_shuffle_pd(U, A, B, M) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ +#define _mm_mask_shuffle_ps(W, U, A, B, M) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ - (__v4sf)(__m128)(W)); }) + (__v4sf)(__m128)(W)) -#define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ +#define _mm_maskz_shuffle_ps(U, A, B, M) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ - (__v4sf)_mm_setzero_ps()); }) + (__v4sf)_mm_setzero_ps()) -#define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ +#define _mm256_mask_shuffle_ps(W, U, A, B, M) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ - (__v8sf)(__m256)(W)); }) + (__v8sf)(__m256)(W)) -#define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ +#define _mm256_maskz_shuffle_ps(U, A, B, M) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ - (__v8sf)_mm256_setzero_ps()); }) + (__v8sf)_mm256_setzero_ps()) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rsqrt14_pd (__m128d __A) @@ -7718,43 +7718,43 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -#define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \ +#define _mm256_extractf32x4_ps(A, imm) \ (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \ (__v8sf)_mm256_undefined_ps(), \ ((imm) & 1) ? 4 : 0, \ ((imm) & 1) ? 5 : 1, \ ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3); }) + ((imm) & 1) ? 7 : 3) -#define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ - (__v4sf)(W)); }) + (__v4sf)(W)) -#define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_extractf32x4_ps(U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ - (__v4sf)_mm_setzero_ps()); }) + (__v4sf)_mm_setzero_ps()) -#define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \ +#define _mm256_extracti32x4_epi32(A, imm) \ (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \ (__v8si)_mm256_undefined_si256(), \ ((imm) & 1) ? 4 : 0, \ ((imm) & 1) ? 5 : 1, \ ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3); }) + ((imm) & 1) ? 7 : 3) -#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ +#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ - (__v4si)(W)); }) + (__v4si)(W)) -#define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ +#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ - (__v4si)_mm_setzero_si128()); }) + (__v4si)_mm_setzero_si128()) -#define _mm256_insertf32x4(A, B, imm) __extension__ ({ \ +#define _mm256_insertf32x4(A, B, imm) \ (__m256)__builtin_shufflevector((__v8sf)(A), \ (__v8sf)_mm256_castps128_ps256((__m128)(B)), \ ((imm) & 0x1) ? 0 : 8, \ @@ -7764,19 +7764,19 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) ((imm) & 0x1) ? 8 : 4, \ ((imm) & 0x1) ? 9 : 5, \ ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7); }) + ((imm) & 0x1) ? 11 : 7) -#define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_insertf32x4(W, U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ - (__v8sf)(W)); }) + (__v8sf)(W)) -#define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_insertf32x4(U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ - (__v8sf)_mm256_setzero_ps()); }) + (__v8sf)_mm256_setzero_ps()) -#define _mm256_inserti32x4(A, B, imm) __extension__ ({ \ +#define _mm256_inserti32x4(A, B, imm) \ (__m256i)__builtin_shufflevector((__v8si)(A), \ (__v8si)_mm256_castsi128_si256((__m128i)(B)), \ ((imm) & 0x1) ? 0 : 8, \ @@ -7786,217 +7786,217 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) ((imm) & 0x1) ? 8 : 4, \ ((imm) & 0x1) ? 9 : 5, \ ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7); }) + ((imm) & 0x1) ? 11 : 7) -#define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_inserti32x4(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ - (__v8si)(W)); }) + (__v8si)(W)) -#define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_inserti32x4(U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ - (__v8si)_mm256_setzero_si256()); }) + (__v8si)_mm256_setzero_si256()) -#define _mm_getmant_pd(A, B, C) __extension__({\ +#define _mm_getmant_pd(A, B, C) \ (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ (int)(((C)<<2) | (B)), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\ +#define _mm_mask_getmant_pd(W, U, A, B, C) \ (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ (int)(((C)<<2) | (B)), \ (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\ +#define _mm_maskz_getmant_pd(U, A, B, C) \ (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ (int)(((C)<<2) | (B)), \ (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_getmant_pd(A, B, C) __extension__ ({ \ +#define _mm256_getmant_pd(A, B, C) \ (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ (int)(((C)<<2) | (B)), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ +#define _mm256_mask_getmant_pd(W, U, A, B, C) \ (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ (int)(((C)<<2) | (B)), \ (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ +#define _mm256_maskz_getmant_pd(U, A, B, C) \ (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ (int)(((C)<<2) | (B)), \ (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_getmant_ps(A, B, C) __extension__ ({ \ +#define _mm_getmant_ps(A, B, C) \ (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ (int)(((C)<<2) | (B)), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm_mask_getmant_ps(W, U, A, B, C) \ (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ (int)(((C)<<2) | (B)), \ (__v4sf)(__m128)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ +#define _mm_maskz_getmant_ps(U, A, B, C) \ (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ (int)(((C)<<2) | (B)), \ (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_getmant_ps(A, B, C) __extension__ ({ \ +#define _mm256_getmant_ps(A, B, C) \ (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ (int)(((C)<<2) | (B)), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1); }) + (__mmask8)-1) -#define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ +#define _mm256_mask_getmant_ps(W, U, A, B, C) \ (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ (int)(((C)<<2) | (B)), \ (__v8sf)(__m256)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ +#define _mm256_maskz_getmant_ps(U, A, B, C) \ (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ (int)(((C)<<2) | (B)), \ (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ (double const *)(addr), \ (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ (long long const *)(addr), \ (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ (double const *)(addr), \ (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ (long long const *)(addr), \ (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ (float const *)(addr), \ (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ (int const *)(addr), \ (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ (float const *)(addr), \ (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ (int const *)(addr), \ (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ (double const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ (long long const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ (double const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ (long long const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ (float const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ (int const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ (float const *)(addr), \ (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ +#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ (int const *)(addr), \ (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)); }) + (__mmask8)(mask), (int)(scale)) -#define _mm256_permutex_pd(X, C) __extension__ ({ \ +#define _mm256_permutex_pd(X, C) \ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \ (__v4df)_mm256_undefined_pd(), \ ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ - ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) + ((C) >> 4) & 0x3, ((C) >> 6) & 0x3) -#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \ +#define _mm256_mask_permutex_pd(W, U, X, C) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permutex_pd((X), (C)), \ - (__v4df)(__m256d)(W)); }) + (__v4df)(__m256d)(W)) -#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \ +#define _mm256_maskz_permutex_pd(U, X, C) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permutex_pd((X), (C)), \ - (__v4df)_mm256_setzero_pd()); }) + (__v4df)_mm256_setzero_pd()) -#define _mm256_permutex_epi64(X, C) __extension__ ({ \ +#define _mm256_permutex_epi64(X, C) \ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \ (__v4di)_mm256_undefined_si256(), \ ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ - ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) + ((C) >> 4) & 0x3, ((C) >> 6) & 0x3) -#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ +#define _mm256_mask_permutex_epi64(W, U, X, C) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_permutex_epi64((X), (C)), \ - (__v4di)(__m256i)(W)); }) + (__v4di)(__m256i)(W)) -#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \ +#define _mm256_maskz_permutex_epi64(U, X, C) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_permutex_epi64((X), (C)), \ - (__v4di)_mm256_setzero_si256()); }) + (__v4di)_mm256_setzero_si256()) static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_permutexvar_pd (__m256i __X, __m256d __Y) @@ -8081,25 +8081,25 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) (__v8si)_mm256_setzero_si256()); } -#define _mm_alignr_epi32(A, B, imm) __extension__ ({ \ +#define _mm_alignr_epi32(A, B, imm) \ (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \ (__v4si)(__m128i)(A), \ ((int)(imm) & 0x3) + 0, \ ((int)(imm) & 0x3) + 1, \ ((int)(imm) & 0x3) + 2, \ - ((int)(imm) & 0x3) + 3); }) + ((int)(imm) & 0x3) + 3) -#define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ +#define _mm_mask_alignr_epi32(W, U, A, B, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ - (__v4si)(__m128i)(W)); }) + (__v4si)(__m128i)(W)) -#define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ +#define _mm_maskz_alignr_epi32(U, A, B, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ - (__v4si)_mm_setzero_si128()); }) + (__v4si)_mm_setzero_si128()) -#define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \ +#define _mm256_alignr_epi32(A, B, imm) \ (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \ (__v8si)(__m256i)(A), \ ((int)(imm) & 0x7) + 0, \ @@ -8109,51 +8109,51 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) ((int)(imm) & 0x7) + 4, \ ((int)(imm) & 0x7) + 5, \ ((int)(imm) & 0x7) + 6, \ - ((int)(imm) & 0x7) + 7); }) + ((int)(imm) & 0x7) + 7) -#define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ - (__v8si)(__m256i)(W)); }) + (__v8si)(__m256i)(W)) -#define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_alignr_epi32(U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ - (__v8si)_mm256_setzero_si256()); }) + (__v8si)_mm256_setzero_si256()) -#define _mm_alignr_epi64(A, B, imm) __extension__ ({ \ +#define _mm_alignr_epi64(A, B, imm) \ (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \ (__v2di)(__m128i)(A), \ ((int)(imm) & 0x1) + 0, \ - ((int)(imm) & 0x1) + 1); }) + ((int)(imm) & 0x1) + 1) -#define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ +#define _mm_mask_alignr_epi64(W, U, A, B, imm) \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ - (__v2di)(__m128i)(W)); }) + (__v2di)(__m128i)(W)) -#define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ +#define _mm_maskz_alignr_epi64(U, A, B, imm) \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ - (__v2di)_mm_setzero_si128()); }) + (__v2di)_mm_setzero_si128()) -#define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \ +#define _mm256_alignr_epi64(A, B, imm) \ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \ (__v4di)(__m256i)(A), \ ((int)(imm) & 0x3) + 0, \ ((int)(imm) & 0x3) + 1, \ ((int)(imm) & 0x3) + 2, \ - ((int)(imm) & 0x3) + 3); }) + ((int)(imm) & 0x3) + 3) -#define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ +#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ - (__v4di)(__m256i)(W)); }) + (__v4di)(__m256i)(W)) -#define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ +#define _mm256_maskz_alignr_epi64(U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ - (__v4di)_mm256_setzero_si256()); }) + (__v4di)_mm256_setzero_si256()) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) @@ -8219,25 +8219,25 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) (__v8sf)_mm256_setzero_ps()); } -#define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ +#define _mm256_mask_shuffle_epi32(W, U, A, I) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_epi32((A), (I)), \ - (__v8si)(__m256i)(W)); }) + (__v8si)(__m256i)(W)) -#define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\ +#define _mm256_maskz_shuffle_epi32(U, A, I) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_epi32((A), (I)), \ - (__v8si)_mm256_setzero_si256()); }) + (__v8si)_mm256_setzero_si256()) -#define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\ +#define _mm_mask_shuffle_epi32(W, U, A, I) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_shuffle_epi32((A), (I)), \ - (__v4si)(__m128i)(W)); }) + (__v4si)(__m128i)(W)) -#define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\ +#define _mm_maskz_shuffle_epi32(U, A, I) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_shuffle_epi32((A), (I)), \ - (__v4si)_mm_setzero_si128()); }) + (__v4si)_mm_setzero_si128()) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) @@ -8353,15 +8353,15 @@ _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) (__mmask8) __U); } -#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ +#define _mm_mask_cvt_roundps_ph(W, U, A, I) \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ +#define _mm_maskz_cvt_roundps_ph(U, A, I) \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) static __inline __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) @@ -8378,15 +8378,15 @@ _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) (__v8hi) _mm_setzero_si128(), (__mmask8) __U); } -#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ +#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ - (__mmask8)(U)); }) + (__mmask8)(U)) -#define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ +#define _mm256_maskz_cvt_roundps_ph(U, A, I) \ (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h index 5b05376fc4b2..06dbb2ddc14c 100644 --- a/clang/lib/Headers/avx512vlvbmi2intrin.h +++ b/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -251,12 +251,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) __U); } -#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ +#define _mm256_mask_shldi_epi64(S, U, A, B, I) \ (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \ (__v4di)(B), \ (int)(I), \ (__v4di)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm256_maskz_shldi_epi64(U, A, B, I) \ _mm256_mask_shldi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I)) @@ -264,12 +264,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm256_shldi_epi64(A, B, I) \ _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shldi_epi64(S, U, A, B, I) \ (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \ (__v2di)(B), \ (int)(I), \ (__v2di)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm_maskz_shldi_epi64(U, A, B, I) \ _mm_mask_shldi_epi64(_mm_setzero_si128(), (U), (A), (B), (I)) @@ -277,12 +277,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm_shldi_epi64(A, B, I) \ _mm_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) -#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ +#define _mm256_mask_shldi_epi32(S, U, A, B, I) \ (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \ (__v8si)(B), \ (int)(I), \ (__v8si)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm256_maskz_shldi_epi32(U, A, B, I) \ _mm256_mask_shldi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I)) @@ -290,12 +290,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm256_shldi_epi32(A, B, I) \ _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shldi_epi32(S, U, A, B, I) \ (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \ (__v4si)(B), \ (int)(I), \ (__v4si)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm_maskz_shldi_epi32(U, A, B, I) \ _mm_mask_shldi_epi32(_mm_setzero_si128(), (U), (A), (B), (I)) @@ -303,12 +303,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm_shldi_epi32(A, B, I) \ _mm_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) -#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ +#define _mm256_mask_shldi_epi16(S, U, A, B, I) \ (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \ (__v16hi)(B), \ (int)(I), \ (__v16hi)(S), \ - (__mmask16)(U)); }) + (__mmask16)(U)) #define _mm256_maskz_shldi_epi16(U, A, B, I) \ _mm256_mask_shldi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I)) @@ -316,12 +316,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm256_shldi_epi16(A, B, I) \ _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shldi_epi16(S, U, A, B, I) \ (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \ (__v8hi)(B), \ (int)(I), \ (__v8hi)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm_maskz_shldi_epi16(U, A, B, I) \ _mm_mask_shldi_epi16(_mm_setzero_si128(), (U), (A), (B), (I)) @@ -329,12 +329,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm_shldi_epi16(A, B, I) \ _mm_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) -#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ +#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \ (__v4di)(B), \ (int)(I), \ (__v4di)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm256_maskz_shrdi_epi64(U, A, B, I) \ _mm256_mask_shrdi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I)) @@ -342,12 +342,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm256_shrdi_epi64(A, B, I) \ _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shrdi_epi64(S, U, A, B, I) \ (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \ (__v2di)(B), \ (int)(I), \ (__v2di)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm_maskz_shrdi_epi64(U, A, B, I) \ _mm_mask_shrdi_epi64(_mm_setzero_si128(), (U), (A), (B), (I)) @@ -355,12 +355,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm_shrdi_epi64(A, B, I) \ _mm_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) -#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ +#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \ (__v8si)(B), \ (int)(I), \ (__v8si)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm256_maskz_shrdi_epi32(U, A, B, I) \ _mm256_mask_shrdi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I)) @@ -368,12 +368,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm256_shrdi_epi32(A, B, I) \ _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shrdi_epi32(S, U, A, B, I) \ (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \ (__v4si)(B), \ (int)(I), \ (__v4si)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm_maskz_shrdi_epi32(U, A, B, I) \ _mm_mask_shrdi_epi32(_mm_setzero_si128(), (U), (A), (B), (I)) @@ -381,12 +381,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm_shrdi_epi32(A, B, I) \ _mm_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) -#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ +#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \ (__v16hi)(B), \ (int)(I), \ (__v16hi)(S), \ - (__mmask16)(U)); }) + (__mmask16)(U)) #define _mm256_maskz_shrdi_epi16(U, A, B, I) \ _mm256_mask_shrdi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I)) @@ -394,12 +394,12 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) #define _mm256_shrdi_epi16(A, B, I) \ _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shrdi_epi16(S, U, A, B, I) \ (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \ (__v8hi)(B), \ (int)(I), \ (__v8hi)(S), \ - (__mmask8)(U)); }) + (__mmask8)(U)) #define _mm_maskz_shrdi_epi16(U, A, B, I) \ _mm_mask_shrdi_epi16(_mm_setzero_si128(), (U), (A), (B), (I)) diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 5de2c5ca4c9f..ff9ca7ca08ac 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -408,8 +408,8 @@ _mm256_rcp_ps(__m256 __a) /// 10: Upward (toward positive infinity). \n /// 11: Truncated. /// \returns A 256-bit vector of [4 x double] containing the rounded values. -#define _mm256_round_pd(V, M) __extension__ ({ \ - (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); }) +#define _mm256_round_pd(V, M) \ + (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)) /// Rounds the values stored in a 256-bit vector of [8 x float] as /// specified by the byte operand. The source values are rounded to integer @@ -440,8 +440,8 @@ _mm256_rcp_ps(__m256 __a) /// 10: Upward (toward positive infinity). \n /// 11: Truncated. /// \returns A 256-bit vector of [8 x float] containing the rounded values. -#define _mm256_round_ps(V, M) __extension__ ({ \ - (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); }) +#define _mm256_round_ps(V, M) \ + (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)) /// Rounds up the values stored in a 256-bit vector of [4 x double]. The /// source values are rounded up to integer values and returned as 64-bit @@ -997,10 +997,10 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. -#define _mm_permute_pd(A, C) __extension__ ({ \ +#define _mm_permute_pd(A, C) \ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \ (__v2df)_mm_undefined_pd(), \ - ((C) >> 0) & 0x1, ((C) >> 1) & 0x1); }) + ((C) >> 0) & 0x1, ((C) >> 1) & 0x1) /// Copies the values in a 256-bit vector of [4 x double] as specified by /// the immediate integer operand. @@ -1039,13 +1039,13 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 1: Bits [255:192] of the source are copied to bits [255:192] of the /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. -#define _mm256_permute_pd(A, C) __extension__ ({ \ +#define _mm256_permute_pd(A, C) \ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \ (__v4df)_mm256_undefined_pd(), \ 0 + (((C) >> 0) & 0x1), \ 0 + (((C) >> 1) & 0x1), \ 2 + (((C) >> 2) & 0x1), \ - 2 + (((C) >> 3) & 0x1)); }) + 2 + (((C) >> 3) & 0x1)) /// Copies the values in a 128-bit vector of [4 x float] as specified by /// the immediate integer operand. @@ -1100,11 +1100,11 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. -#define _mm_permute_ps(A, C) __extension__ ({ \ +#define _mm_permute_ps(A, C) \ (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \ (__v4sf)_mm_undefined_ps(), \ ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ - ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) + ((C) >> 4) & 0x3, ((C) >> 6) & 0x3) /// Copies the values in a 256-bit vector of [8 x float] as specified by /// the immediate integer operand. @@ -1195,7 +1195,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 11: Bits [255:224] of the source are copied to bits [255:224] of the /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. -#define _mm256_permute_ps(A, C) __extension__ ({ \ +#define _mm256_permute_ps(A, C) \ (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \ (__v8sf)_mm256_undefined_ps(), \ 0 + (((C) >> 0) & 0x3), \ @@ -1205,7 +1205,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) 4 + (((C) >> 0) & 0x3), \ 4 + (((C) >> 2) & 0x3), \ 4 + (((C) >> 4) & 0x3), \ - 4 + (((C) >> 6) & 0x3)); }) + 4 + (((C) >> 6) & 0x3)) /// Permutes 128-bit data values stored in two 256-bit vectors of /// [4 x double], as specified by the immediate integer operand. @@ -1244,9 +1244,9 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. -#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \ +#define _mm256_permute2f128_pd(V1, V2, M) \ (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \ - (__v4df)(__m256d)(V2), (M)); }) + (__v4df)(__m256d)(V2), (M)) /// Permutes 128-bit data values stored in two 256-bit vectors of /// [8 x float], as specified by the immediate integer operand. @@ -1285,9 +1285,9 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. -#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \ +#define _mm256_permute2f128_ps(V1, V2, M) \ (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \ - (__v8sf)(__m256)(V2), (M)); }) + (__v8sf)(__m256)(V2), (M)) /// Permutes 128-bit data values stored in two 256-bit integer vectors, /// as specified by the immediate integer operand. @@ -1325,9 +1325,9 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit integer vector containing the copied values. -#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \ +#define _mm256_permute2f128_si256(V1, V2, M) \ (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \ - (__v8si)(__m256i)(V2), (M)); }) + (__v8si)(__m256i)(V2), (M)) /* Vector Blend */ /// Merges 64-bit double-precision data values stored in either of the @@ -1354,13 +1354,13 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// destination. When a mask bit is 1, the corresponding 64-bit element in /// operand \a V2 is copied to the same position in the destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. -#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \ +#define _mm256_blend_pd(V1, V2, M) \ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \ (__v4df)(__m256d)(V2), \ (((M) & 0x01) ? 4 : 0), \ (((M) & 0x02) ? 5 : 1), \ (((M) & 0x04) ? 6 : 2), \ - (((M) & 0x08) ? 7 : 3)); }) + (((M) & 0x08) ? 7 : 3)) /// Merges 32-bit single-precision data values stored in either of the /// two 256-bit vectors of [8 x float], as specified by the immediate @@ -1386,7 +1386,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// destination. When a mask bit is 1, the corresponding 32-bit element in /// operand \a V2 is copied to the same position in the destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. -#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \ +#define _mm256_blend_ps(V1, V2, M) \ (__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \ (__v8sf)(__m256)(V2), \ (((M) & 0x01) ? 8 : 0), \ @@ -1396,7 +1396,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) (((M) & 0x10) ? 12 : 4), \ (((M) & 0x20) ? 13 : 5), \ (((M) & 0x40) ? 14 : 6), \ - (((M) & 0x80) ? 15 : 7)); }) + (((M) & 0x80) ? 15 : 7)) /// Merges 64-bit double-precision data values stored in either of the /// two 256-bit vectors of [4 x double], as specified by the 256-bit vector @@ -1492,9 +1492,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// is set to zero. The bitmask is applied in the same way to each of the /// two parallel dot product computations. /// \returns A 256-bit vector of [8 x float] containing the two dot products. -#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \ +#define _mm256_dp_ps(V1, V2, M) \ (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \ - (__v8sf)(__m256)(V2), (M)); }) + (__v8sf)(__m256)(V2), (M)) /* Vector shuffle */ /// Selects 8 float values from the 256-bit operands of [8 x float], as @@ -1546,7 +1546,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 10: Bits [95:64] and [223:192] are copied from the selected operand. \n /// 11: Bits [127:96] and [255:224] are copied from the selected operand. /// \returns A 256-bit vector of [8 x float] containing the shuffled values. -#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \ +#define _mm256_shuffle_ps(a, b, mask) \ (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), \ 0 + (((mask) >> 0) & 0x3), \ @@ -1556,7 +1556,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) 4 + (((mask) >> 0) & 0x3), \ 4 + (((mask) >> 2) & 0x3), \ 12 + (((mask) >> 4) & 0x3), \ - 12 + (((mask) >> 6) & 0x3)); }) + 12 + (((mask) >> 6) & 0x3)) /// Selects four double-precision values from the 256-bit operands of /// [4 x double], as specified by the immediate value operand. @@ -1600,13 +1600,13 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// Bit [3]=1: Bits [255:192] are copied from \a b to bits [255:192] of the /// destination. /// \returns A 256-bit vector of [4 x double] containing the shuffled values. -#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \ +#define _mm256_shuffle_pd(a, b, mask) \ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), \ 0 + (((mask) >> 0) & 0x1), \ 4 + (((mask) >> 1) & 0x1), \ 2 + (((mask) >> 2) & 0x1), \ - 6 + (((mask) >> 3) & 0x1)); }) + 6 + (((mask) >> 3) & 0x1)) /* Compare */ #define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ @@ -1698,9 +1698,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [2 x double] containing the comparison results. -#define _mm_cmp_pd(a, b, c) __extension__ ({ \ +#define _mm_cmp_pd(a, b, c) \ (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \ - (__v2df)(__m128d)(b), (c)); }) + (__v2df)(__m128d)(b), (c)) /// Compares each of the corresponding values of two 128-bit vectors of /// [4 x float], using the operation specified by the immediate integer @@ -1758,9 +1758,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [4 x float] containing the comparison results. -#define _mm_cmp_ps(a, b, c) __extension__ ({ \ +#define _mm_cmp_ps(a, b, c) \ (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \ - (__v4sf)(__m128)(b), (c)); }) + (__v4sf)(__m128)(b), (c)) /// Compares each of the corresponding double-precision values of two /// 256-bit vectors of [4 x double], using the operation specified by the @@ -1818,9 +1818,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 256-bit vector of [4 x double] containing the comparison results. -#define _mm256_cmp_pd(a, b, c) __extension__ ({ \ +#define _mm256_cmp_pd(a, b, c) \ (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \ - (__v4df)(__m256d)(b), (c)); }) + (__v4df)(__m256d)(b), (c)) /// Compares each of the corresponding values of two 256-bit vectors of /// [8 x float], using the operation specified by the immediate integer @@ -1878,9 +1878,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 256-bit vector of [8 x float] containing the comparison results. -#define _mm256_cmp_ps(a, b, c) __extension__ ({ \ +#define _mm256_cmp_ps(a, b, c) \ (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \ - (__v8sf)(__m256)(b), (c)); }) + (__v8sf)(__m256)(b), (c)) /// Compares each of the corresponding scalar double-precision values of /// two 128-bit vectors of [2 x double], using the operation specified by the @@ -1937,9 +1937,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [2 x double] containing the comparison results. -#define _mm_cmp_sd(a, b, c) __extension__ ({ \ +#define _mm_cmp_sd(a, b, c) \ (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \ - (__v2df)(__m128d)(b), (c)); }) + (__v2df)(__m128d)(b), (c)) /// Compares each of the corresponding scalar values of two 128-bit /// vectors of [4 x float], using the operation specified by the immediate @@ -1996,9 +1996,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [4 x float] containing the comparison results. -#define _mm_cmp_ss(a, b, c) __extension__ ({ \ +#define _mm_cmp_ss(a, b, c) \ (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \ - (__v4sf)(__m128)(b), (c)); }) + (__v4sf)(__m128)(b), (c)) /// Takes a [8 x i32] vector and returns the vector element value /// indexed by the immediate constant operand. @@ -4669,7 +4669,7 @@ _mm256_zextsi128_si256(__m128i __a) /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. -#define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \ +#define _mm256_insertf128_ps(V1, V2, M) \ (__m256)__builtin_shufflevector( \ (__v8sf)(__m256)(V1), \ (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \ @@ -4680,7 +4680,7 @@ _mm256_zextsi128_si256(__m128i __a) (((M) & 1) ? 8 : 4), \ (((M) & 1) ? 9 : 5), \ (((M) & 1) ? 10 : 6), \ - (((M) & 1) ? 11 : 7) );}) + (((M) & 1) ? 11 : 7) ) /// Constructs a new 256-bit vector of [4 x double] by first duplicating /// a 256-bit vector of [4 x double] given in the first parameter, and then @@ -4716,14 +4716,14 @@ _mm256_zextsi128_si256(__m128i __a) /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit vector of [4 x double] containing the interleaved values. -#define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \ +#define _mm256_insertf128_pd(V1, V2, M) \ (__m256d)__builtin_shufflevector( \ (__v4df)(__m256d)(V1), \ (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \ (((M) & 1) ? 0 : 4), \ (((M) & 1) ? 1 : 5), \ (((M) & 1) ? 4 : 2), \ - (((M) & 1) ? 5 : 3) );}) + (((M) & 1) ? 5 : 3) ) /// Constructs a new 256-bit integer vector by first duplicating a /// 256-bit integer vector given in the first parameter, and then replacing @@ -4759,14 +4759,14 @@ _mm256_zextsi128_si256(__m128i __a) /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit integer vector containing the interleaved values. -#define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \ +#define _mm256_insertf128_si256(V1, V2, M) \ (__m256i)__builtin_shufflevector( \ (__v4di)(__m256i)(V1), \ (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ (((M) & 1) ? 0 : 4), \ (((M) & 1) ? 1 : 5), \ (((M) & 1) ? 4 : 2), \ - (((M) & 1) ? 5 : 3) );}) + (((M) & 1) ? 5 : 3) ) /* Vector extract. @@ -4794,14 +4794,14 @@ _mm256_zextsi128_si256(__m128i __a) /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit vector of [4 x float] containing the extracted bits. -#define _mm256_extractf128_ps(V, M) __extension__ ({ \ +#define _mm256_extractf128_ps(V, M) \ (__m128)__builtin_shufflevector( \ (__v8sf)(__m256)(V), \ (__v8sf)(_mm256_undefined_ps()), \ (((M) & 1) ? 4 : 0), \ (((M) & 1) ? 5 : 1), \ (((M) & 1) ? 6 : 2), \ - (((M) & 1) ? 7 : 3) );}) + (((M) & 1) ? 7 : 3) ) /// Extracts either the upper or the lower 128 bits from a 256-bit vector /// of [4 x double], as determined by the immediate integer parameter, and @@ -4824,12 +4824,12 @@ _mm256_zextsi128_si256(__m128i __a) /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit vector of [2 x double] containing the extracted bits. -#define _mm256_extractf128_pd(V, M) __extension__ ({ \ +#define _mm256_extractf128_pd(V, M) \ (__m128d)__builtin_shufflevector( \ (__v4df)(__m256d)(V), \ (__v4df)(_mm256_undefined_pd()), \ (((M) & 1) ? 2 : 0), \ - (((M) & 1) ? 3 : 1) );}) + (((M) & 1) ? 3 : 1) ) /// Extracts either the upper or the lower 128 bits from a 256-bit /// integer vector, as determined by the immediate integer parameter, and @@ -4852,12 +4852,12 @@ _mm256_zextsi128_si256(__m128i __a) /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit integer vector containing the extracted bits. -#define _mm256_extractf128_si256(V, M) __extension__ ({ \ +#define _mm256_extractf128_si256(V, M) \ (__m128i)__builtin_shufflevector( \ (__v4di)(__m256i)(V), \ (__v4di)(_mm256_undefined_si256()), \ (((M) & 1) ? 2 : 0), \ - (((M) & 1) ? 3 : 1) );}) + (((M) & 1) ? 3 : 1) ) /* SIMD load ops (unaligned) */ /// Loads two 128-bit floating-point vectors of [4 x float] from diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 83b38bab8c5d..cdc01bf1208c 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2792,7 +2792,7 @@ _mm_xor_si128(__m128i __a, __m128i __b) /// An immediate value specifying the number of bytes to left-shift operand /// \a a. /// \returns A 128-bit integer vector containing the left-shifted value. -#define _mm_slli_si128(a, imm) __extension__ ({ \ +#define _mm_slli_si128(a, imm) \ (__m128i)__builtin_shufflevector( \ (__v16qi)_mm_setzero_si128(), \ (__v16qi)(__m128i)(a), \ @@ -2811,7 +2811,7 @@ _mm_xor_si128(__m128i __a, __m128i __b) ((char)(imm)&0xF0) ? 12 : 28 - (char)(imm), \ ((char)(imm)&0xF0) ? 13 : 29 - (char)(imm), \ ((char)(imm)&0xF0) ? 14 : 30 - (char)(imm), \ - ((char)(imm)&0xF0) ? 15 : 31 - (char)(imm)); }) + ((char)(imm)&0xF0) ? 15 : 31 - (char)(imm)) #define _mm_bslli_si128(a, imm) \ _mm_slli_si128((a), (imm)) @@ -3027,7 +3027,7 @@ _mm_sra_epi32(__m128i __a, __m128i __count) /// An immediate value specifying the number of bytes to right-shift operand /// \a a. /// \returns A 128-bit integer vector containing the right-shifted value. -#define _mm_srli_si128(a, imm) __extension__ ({ \ +#define _mm_srli_si128(a, imm) \ (__m128i)__builtin_shufflevector( \ (__v16qi)(__m128i)(a), \ (__v16qi)_mm_setzero_si128(), \ @@ -3046,7 +3046,7 @@ _mm_sra_epi32(__m128i __a, __m128i __count) ((char)(imm)&0xF0) ? 28 : (char)(imm) + 12, \ ((char)(imm)&0xF0) ? 29 : (char)(imm) + 13, \ ((char)(imm)&0xF0) ? 30 : (char)(imm) + 14, \ - ((char)(imm)&0xF0) ? 31 : (char)(imm) + 15); }) + ((char)(imm)&0xF0) ? 31 : (char)(imm) + 15) #define _mm_bsrli_si128(a, imm) \ _mm_srli_si128((a), (imm)) @@ -4384,11 +4384,11 @@ _mm_movemask_epi8(__m128i __a) /// 10: assign values from bits [95:64] of \a a. \n /// 11: assign values from bits [127:96] of \a a. /// \returns A 128-bit integer vector containing the shuffled values. -#define _mm_shuffle_epi32(a, imm) __extension__ ({ \ +#define _mm_shuffle_epi32(a, imm) \ (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \ (__v4si)_mm_undefined_si128(), \ ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \ - ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); }) + ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3) /// Constructs a 128-bit integer vector by shuffling four lower 16-bit /// elements of a 128-bit integer vector of [8 x i16], using the immediate @@ -4417,12 +4417,12 @@ _mm_movemask_epi8(__m128i __a) /// 10: assign values from bits [47:32] of \a a. \n /// 11: assign values from bits [63:48] of \a a. \n /// \returns A 128-bit integer vector containing the shuffled values. -#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ +#define _mm_shufflelo_epi16(a, imm) \ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ (__v8hi)_mm_undefined_si128(), \ ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \ ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \ - 4, 5, 6, 7); }) + 4, 5, 6, 7) /// Constructs a 128-bit integer vector by shuffling four upper 16-bit /// elements of a 128-bit integer vector of [8 x i16], using the immediate @@ -4451,14 +4451,14 @@ _mm_movemask_epi8(__m128i __a) /// 10: assign values from bits [111:96] of \a a. \n /// 11: assign values from bits [127:112] of \a a. \n /// \returns A 128-bit integer vector containing the shuffled values. -#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ +#define _mm_shufflehi_epi16(a, imm) \ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ (__v8hi)_mm_undefined_si128(), \ 0, 1, 2, 3, \ 4 + (((imm) >> 0) & 0x3), \ 4 + (((imm) >> 2) & 0x3), \ 4 + (((imm) >> 4) & 0x3), \ - 4 + (((imm) >> 6) & 0x3)); }) + 4 + (((imm) >> 6) & 0x3)) /// Unpacks the high-order (index 8-15) values from two 128-bit vectors /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. @@ -4811,10 +4811,10 @@ _mm_movemask_pd(__m128d __a) /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n /// \returns A 128-bit vector of [2 x double] containing the shuffled values. -#define _mm_shuffle_pd(a, b, i) __extension__ ({ \ +#define _mm_shuffle_pd(a, b, i) \ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ 0 + (((i) >> 0) & 0x1), \ - 2 + (((i) >> 1) & 0x1)); }) + 2 + (((i) >> 1) & 0x1)) /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// floating-point vector of [4 x float]. diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h index ceb1605d215c..65dfb2c5469a 100644 --- a/clang/lib/Headers/f16cintrin.h +++ b/clang/lib/Headers/f16cintrin.h @@ -77,9 +77,9 @@ _cvtsh_ss(unsigned short __a) /// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns The converted 16-bit half-precision float value. -#define _cvtss_sh(a, imm) __extension__ ({ \ +#define _cvtss_sh(a, imm) \ (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \ - (imm)))[0]); }) + (imm)))[0]); /// Converts a 128-bit vector containing 32-bit float values into a /// 128-bit vector containing 16-bit half-precision float values. @@ -104,8 +104,8 @@ _cvtsh_ss(unsigned short __a) /// \returns A 128-bit vector containing converted 16-bit half-precision float /// values. The lower 64 bits are used to store the converted 16-bit /// half-precision floating-point values. -#define _mm_cvtps_ph(a, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); }) +#define _mm_cvtps_ph(a, imm) \ + (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); /// Converts a 128-bit vector containing 16-bit half-precision float /// values into a 128-bit vector containing 32-bit float values. @@ -147,8 +147,8 @@ _mm_cvtph_ps(__m128i __a) /// 1XX: Use MXCSR.RC for rounding /// \returns A 128-bit vector containing the converted 16-bit half-precision /// float values. -#define _mm256_cvtps_ph(a, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); }) +#define _mm256_cvtps_ph(a, imm) \ + (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); /// Converts a 128-bit vector containing 16-bit half-precision float /// values into a 256-bit vector of [8 x float]. diff --git a/clang/lib/Headers/gfniintrin.h b/clang/lib/Headers/gfniintrin.h index 34c555e3339a..313a59fb6218 100644 --- a/clang/lib/Headers/gfniintrin.h +++ b/clang/lib/Headers/gfniintrin.h @@ -29,95 +29,95 @@ #define __GFNIINTRIN_H -#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \ (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v16qi)(__m128i)(S)); }) + (__v16qi)(__m128i)(S)) -#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ +#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ - U, A, B, I); }) + U, A, B, I) -#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \ (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v32qi)(__m256i)(S)); }) + (__v32qi)(__m256i)(S)) -#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ +#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ - U, A, B, I); }) + U, A, B, I); -#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \ (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v64qi)(__m512i)(S)); }) + (__v64qi)(__m512i)(S)) -#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ +#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \ - U, A, B, I); }) + U, A, B, I) -#define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm_gf2p8affine_epi64_epi8(A, B, I) \ (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ - (__v16qi)(__m128i)(S)); }) + (__v16qi)(__m128i)(S)) -#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ +#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \ - U, A, B, I); }) + U, A, B, I) -#define _mm256_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \ (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ - (__v32qi)(__m256i)(S)); }) + (__v32qi)(__m256i)(S)) -#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ +#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ - U, A, B, I); }) + U, A, B, I) -#define _mm512_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ +#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \ (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), \ - (char)(I)); }) + (char)(I)) -#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ +#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \ - (__v64qi)(__m512i)(S)); }) + (__v64qi)(__m512i)(S)) -#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ +#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \ - U, A, B, I); }) + U, A, B, I) /* Default attributes for simple form (no masking). */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"))) diff --git a/clang/lib/Headers/shaintrin.h b/clang/lib/Headers/shaintrin.h index 9b5d21800819..35d7efa567da 100644 --- a/clang/lib/Headers/shaintrin.h +++ b/clang/lib/Headers/shaintrin.h @@ -31,8 +31,8 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"))) -#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \ - __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)); }) +#define _mm_sha1rnds4_epu32(V1, V2, M) \ + __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)); static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1nexte_epu32(__m128i __X, __m128i __Y) diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 645af418b582..494e21bea3cc 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -244,8 +244,8 @@ /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the rounded values. -#define _mm_round_ps(X, M) __extension__ ({ \ - (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); }) +#define _mm_round_ps(X, M) \ + (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)) /// Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of @@ -285,9 +285,9 @@ /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the copied and rounded /// values. -#define _mm_round_ss(X, Y, M) __extension__ ({ \ +#define _mm_round_ss(X, Y, M) \ (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (M)); }) + (__v4sf)(__m128)(Y), (M)) /// Rounds each element of the 128-bit vector of [2 x double] to an /// integer value according to the rounding control specified by the second @@ -319,8 +319,8 @@ /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [2 x double] containing the rounded values. -#define _mm_round_pd(X, M) __extension__ ({ \ - (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); }) +#define _mm_round_pd(X, M) \ + (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)) /// Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. @@ -360,9 +360,9 @@ /// 11: Truncated /// \returns A 128-bit vector of [2 x double] containing the copied and rounded /// values. -#define _mm_round_sd(X, Y, M) __extension__ ({ \ +#define _mm_round_sd(X, Y, M) \ (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (M)); }) + (__v2df)(__m128d)(Y), (M)) /* SSE4 Packed Blending Intrinsics. */ /// Returns a 128-bit vector of [2 x double] where the values are @@ -389,11 +389,11 @@ /// When a mask bit is 1, the corresponding 64-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [2 x double] containing the copied values. -#define _mm_blend_pd(V1, V2, M) __extension__ ({ \ +#define _mm_blend_pd(V1, V2, M) \ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \ (__v2df)(__m128d)(V2), \ (((M) & 0x01) ? 2 : 0), \ - (((M) & 0x02) ? 3 : 1)); }) + (((M) & 0x02) ? 3 : 1)) /// Returns a 128-bit vector of [4 x float] where the values are selected /// from either the first or second operand as specified by the third @@ -419,12 +419,12 @@ /// When a mask bit is 1, the corresponding 32-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [4 x float] containing the copied values. -#define _mm_blend_ps(V1, V2, M) __extension__ ({ \ +#define _mm_blend_ps(V1, V2, M) \ (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \ (((M) & 0x01) ? 4 : 0), \ (((M) & 0x02) ? 5 : 1), \ (((M) & 0x04) ? 6 : 2), \ - (((M) & 0x08) ? 7 : 3)); }) + (((M) & 0x08) ? 7 : 3)) /// Returns a 128-bit vector of [2 x double] where the values are /// selected from either the first or second operand as specified by the @@ -531,7 +531,7 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) /// When a mask bit is 1, the corresponding 16-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [8 x i16] containing the copied values. -#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \ +#define _mm_blend_epi16(V1, V2, M) \ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \ (__v8hi)(__m128i)(V2), \ (((M) & 0x01) ? 8 : 0), \ @@ -541,7 +541,7 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) (((M) & 0x10) ? 12 : 4), \ (((M) & 0x20) ? 13 : 5), \ (((M) & 0x40) ? 14 : 6), \ - (((M) & 0x80) ? 15 : 7)); }) + (((M) & 0x80) ? 15 : 7)) /* SSE4 Dword Multiply Instructions. */ /// Multiples corresponding elements of two 128-bit vectors of [4 x i32] @@ -616,9 +616,9 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2) /// each [4 x float] subvector. If a bit is set, the dot product is returned /// in the corresponding element; otherwise that element is set to zero. /// \returns A 128-bit vector of [4 x float] containing the dot product. -#define _mm_dp_ps(X, Y, M) __extension__ ({ \ +#define _mm_dp_ps(X, Y, M) \ (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (M)); }) + (__v4sf)(__m128)(Y), (M)) /// Computes the dot product of the two 128-bit vectors of [2 x double] /// and returns it in the elements of the 128-bit result vector of @@ -651,9 +651,9 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2) /// to the lowest element and bit [1] corresponding to the highest element of /// each [2 x double] vector. If a bit is set, the dot product is returned in /// the corresponding element; otherwise that element is set to zero. -#define _mm_dp_pd(X, Y, M) __extension__ ({\ +#define _mm_dp_pd(X, Y, M) \ (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (M)); }) + (__v2df)(__m128d)(Y), (M)) /* SSE4 Streaming Load Hint Instruction. */ /// Loads integer values from a 128-bit aligned memory location to a @@ -1546,9 +1546,9 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2) /// \endcode /// \returns A 128-bit integer vector containing the sums of the sets of /// absolute differences between both operands. -#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \ +#define _mm_mpsadbw_epu8(X, Y, M) \ (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \ - (__v16qi)(__m128i)(Y), (M)); }) + (__v16qi)(__m128i)(Y), (M)) /// Finds the minimum unsigned 16-bit element in the input 128-bit /// vector of [8 x u16] and returns it and along with its index. diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index 90e4773fdee9..b0d5f2a3d464 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -157,9 +157,9 @@ _mm_abs_epi32(__m128i __a) /// An immediate operand specifying how many bytes to right-shift the result. /// \returns A 128-bit integer vector containing the concatenated right-shifted /// value. -#define _mm_alignr_epi8(a, b, n) __extension__ ({ \ +#define _mm_alignr_epi8(a, b, n) \ (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), (n)); }) + (__v16qi)(__m128i)(b), (n)) /// Concatenates the two 64-bit integer vector operands, and right-shifts /// the result by the number of bytes specified in the immediate operand. @@ -180,8 +180,8 @@ _mm_abs_epi32(__m128i __a) /// An immediate operand specifying how many bytes to right-shift the result. /// \returns A 64-bit integer vector containing the concatenated right-shifted /// value. -#define _mm_alignr_pi8(a, b, n) __extension__ ({ \ - (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); }) +#define _mm_alignr_pi8(a, b, n) \ + (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)) /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [8 x i16]. diff --git a/clang/lib/Headers/vpclmulqdqintrin.h b/clang/lib/Headers/vpclmulqdqintrin.h index 4153d888a27c..33ea7b6647bf 100644 --- a/clang/lib/Headers/vpclmulqdqintrin.h +++ b/clang/lib/Headers/vpclmulqdqintrin.h @@ -28,15 +28,15 @@ #ifndef __VPCLMULQDQINTRIN_H #define __VPCLMULQDQINTRIN_H -#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({ \ +#define _mm256_clmulepi64_epi128(A, B, I) \ (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ - (char)(I)); }) + (char)(I)); -#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({ \ +#define _mm512_clmulepi64_epi128(A, B, I) \ (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ - (char)(I)); }) + (char)(I)); #endif /* __VPCLMULQDQINTRIN_H */ diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index dbf128d75910..1e5d02ad6c2f 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2183,8 +2183,8 @@ void _mm_sfence(void); /// 2: Bits [47:32] are copied to the destination. \n /// 3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. -#define _mm_extract_pi16(a, n) __extension__ ({ \ - (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n); }) +#define _mm_extract_pi16(a, n) \ + (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n) /// Copies data from the 64-bit vector of [4 x i16] to the destination, /// and inserts the lower 16-bits of an integer operand at the 16-bit offset @@ -2214,8 +2214,8 @@ void _mm_sfence(void); /// bits in operand \a a. /// \returns A 64-bit integer vector containing the copied packed data from the /// operands. -#define _mm_insert_pi16(a, d, n) __extension__ ({ \ - (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n); }) +#define _mm_insert_pi16(a, d, n) \ + (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n) /// Compares each of the corresponding packed 16-bit integer values of /// the 64-bit integer vectors, and writes the greater value to the @@ -2361,8 +2361,8 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b) /// 10: assigned from bits [47:32] of \a a. \n /// 11: assigned from bits [63:48] of \a a. /// \returns A 64-bit integer vector containing the shuffled values. -#define _mm_shuffle_pi16(a, n) __extension__ ({ \ - (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)); }) +#define _mm_shuffle_pi16(a, n) \ + (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)) /// Conditionally copies the values from each 8-bit element in the first /// 64-bit integer vector operand to the specified memory location, as @@ -2603,12 +2603,12 @@ void _mm_setcsr(unsigned int __i); /// 10: Bits [95:64] copied from the specified operand. \n /// 11: Bits [127:96] copied from the specified operand. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. -#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \ +#define _mm_shuffle_ps(a, b, mask) \ (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ 0 + (((mask) >> 0) & 0x3), \ 0 + (((mask) >> 2) & 0x3), \ 4 + (((mask) >> 4) & 0x3), \ - 4 + (((mask) >> 6) & 0x3)); }) + 4 + (((mask) >> 6) & 0x3)) /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of /// [4 x float] and interleaves them into a 128-bit vector of [4 x float]. diff --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h index 4a34f770d58d..34887dc79fb8 100644 --- a/clang/lib/Headers/xopintrin.h +++ b/clang/lib/Headers/xopintrin.h @@ -237,17 +237,17 @@ _mm_rot_epi64(__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); } -#define _mm_roti_epi8(A, N) __extension__ ({ \ - (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); }) +#define _mm_roti_epi8(A, N) \ + (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)) -#define _mm_roti_epi16(A, N) __extension__ ({ \ - (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); }) +#define _mm_roti_epi16(A, N) \ + (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)) -#define _mm_roti_epi32(A, N) __extension__ ({ \ - (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); }) +#define _mm_roti_epi32(A, N) \ + (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)) -#define _mm_roti_epi64(A, N) __extension__ ({ \ - (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); }) +#define _mm_roti_epi64(A, N) \ + (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi8(__m128i __A, __m128i __B) @@ -297,37 +297,37 @@ _mm_sha_epi64(__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); } -#define _mm_com_epu8(A, B, N) __extension__ ({ \ +#define _mm_com_epu8(A, B, N) \ (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (N)); }) + (__v16qi)(__m128i)(B), (N)) -#define _mm_com_epu16(A, B, N) __extension__ ({ \ +#define _mm_com_epu16(A, B, N) \ (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), (N)); }) + (__v8hi)(__m128i)(B), (N)) -#define _mm_com_epu32(A, B, N) __extension__ ({ \ +#define _mm_com_epu32(A, B, N) \ (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (N)); }) + (__v4si)(__m128i)(B), (N)) -#define _mm_com_epu64(A, B, N) __extension__ ({ \ +#define _mm_com_epu64(A, B, N) \ (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (N)); }) + (__v2di)(__m128i)(B), (N)) -#define _mm_com_epi8(A, B, N) __extension__ ({ \ +#define _mm_com_epi8(A, B, N) \ (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (N)); }) + (__v16qi)(__m128i)(B), (N)) -#define _mm_com_epi16(A, B, N) __extension__ ({ \ +#define _mm_com_epi16(A, B, N) \ (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), (N)); }) + (__v8hi)(__m128i)(B), (N)) -#define _mm_com_epi32(A, B, N) __extension__ ({ \ +#define _mm_com_epi32(A, B, N) \ (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (N)); }) + (__v4si)(__m128i)(B), (N)) -#define _mm_com_epi64(A, B, N) __extension__ ({ \ +#define _mm_com_epi64(A, B, N) \ (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (N)); }) + (__v2di)(__m128i)(B), (N)) #define _MM_PCOMCTRL_LT 0 #define _MM_PCOMCTRL_LE 1 @@ -722,24 +722,24 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B) return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); } -#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ +#define _mm_permute2_pd(X, Y, C, I) \ (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ - (__v2di)(__m128i)(C), (I)); }) + (__v2di)(__m128i)(C), (I)) -#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \ +#define _mm256_permute2_pd(X, Y, C, I) \ (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), \ - (__v4di)(__m256i)(C), (I)); }) + (__v4di)(__m256i)(C), (I)) -#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \ +#define _mm_permute2_ps(X, Y, C, I) \ (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ - (__v4si)(__m128i)(C), (I)); }) + (__v4si)(__m128i)(C), (I)) -#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \ +#define _mm256_permute2_ps(X, Y, C, I) \ (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), \ - (__v8si)(__m256i)(C), (I)); }) + (__v8si)(__m256i)(C), (I)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_frcz_ss(__m128 __A) diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/avx-builtins.c index 32a11a1fef94..4c904ee71fc1 100644 --- a/clang/test/CodeGen/avx-builtins.c +++ b/clang/test/CodeGen/avx-builtins.c @@ -1430,48 +1430,48 @@ float test_mm256_cvtss_f32(__m256 __a) __m256 test_mm256_cmp_ps_true(__m256 a, __m256 b) { // CHECK-LABEL: @test_mm256_cmp_ps_true - // CHECK: store <8 x float> zeroinitializer, <8 x float>* %tmp, align 32 + // CHECK: ret <8 x float> zeroinitializer return _mm256_cmp_ps(a, b, _CMP_FALSE_OQ); } -__m256 test_mm256_cmp_pd_false(__m256 a, __m256 b) { +__m256d test_mm256_cmp_pd_false(__m256d a, __m256d b) { // CHECK-LABEL: @test_mm256_cmp_pd_false - // CHECK: store <4 x double> zeroinitializer, <4 x double>* %tmp, align 32 + // CHECK: ret <4 x double> zeroinitializer return _mm256_cmp_pd(a, b, _CMP_FALSE_OQ); } __m256 test_mm256_cmp_ps_strue(__m256 a, __m256 b) { // CHECK-LABEL: @test_mm256_cmp_ps_strue - // CHECK: store <8 x float> zeroinitializer, <8 x float>* %tmp, align 32 + // CHECK: ret <8 x float> zeroinitializer return _mm256_cmp_ps(a, b, _CMP_FALSE_OS); } -__m256 test_mm256_cmp_pd_sfalse(__m256 a, __m256 b) { +__m256d test_mm256_cmp_pd_sfalse(__m256d a, __m256d b) { // CHECK-LABEL: @test_mm256_cmp_pd_sfalse - // CHECK: store <4 x double> zeroinitializer, <4 x double>* %tmp, align 32 + // CHECK: ret <4 x double> zeroinitializer return _mm256_cmp_pd(a, b, _CMP_FALSE_OS); } diff --git a/clang/test/Sema/x86-builtin-palignr.c b/clang/test/Sema/x86-builtin-palignr.c index f7e70b9c3c45..e055cbb70e9e 100644 --- a/clang/test/Sema/x86-builtin-palignr.c +++ b/clang/test/Sema/x86-builtin-palignr.c @@ -4,7 +4,5 @@ #include __m64 test1(__m64 a, __m64 b, int c) { - // FIXME: The "incompatible result type" error is due to pr10112 and should - // be removed when that is fixed. - return _mm_alignr_pi8(a, b, c); // expected-error {{argument to '__builtin_ia32_palignr' must be a constant integer}} expected-error {{incompatible result type}} + return _mm_alignr_pi8(a, b, c); // expected-error {{argument to '__builtin_ia32_palignr' must be a constant integer}} }