[X86] Remove temporary variables from macros in x86 intrinsic headers. Prevents duplicate names appearing from multiple macro expansions. NFC

llvm-svn: 252586
This commit is contained in:
Craig Topper 2015-11-10 05:08:05 +00:00
parent 166f8b20a3
commit 7148166785
7 changed files with 138 additions and 198 deletions

View File

@ -124,10 +124,9 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
}
#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
__m256i __a = (a); \
__m256i __b = (b); \
(__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
(__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
(__v32qi)(__m256i)(b), (n)); })
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_and_si256(__m256i __a, __m256i __b)
@ -160,20 +159,19 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
(__v32qi)__M);
}
#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
__m256i __V1 = (V1); \
__m256i __V2 = (V2); \
(__m256i)__builtin_shufflevector((__v16hi)__V1, (__v16hi)__V2, \
(((M) & 0x01) ? 16 : 0), \
(((M) & 0x02) ? 17 : 1), \
(((M) & 0x04) ? 18 : 2), \
(((M) & 0x08) ? 19 : 3), \
(((M) & 0x10) ? 20 : 4), \
(((M) & 0x20) ? 21 : 5), \
(((M) & 0x40) ? 22 : 6), \
(((M) & 0x80) ? 23 : 7), \
(((M) & 0x01) ? 24 : 8), \
(((M) & 0x02) ? 25 : 9), \
#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
(__m256i)__builtin_shufflevector((__v16hi)(__m256i)(V1), \
(__v16hi)(__m256i)(V2), \
(((M) & 0x01) ? 16 : 0), \
(((M) & 0x02) ? 17 : 1), \
(((M) & 0x04) ? 18 : 2), \
(((M) & 0x08) ? 19 : 3), \
(((M) & 0x10) ? 20 : 4), \
(((M) & 0x20) ? 21 : 5), \
(((M) & 0x40) ? 22 : 6), \
(((M) & 0x80) ? 23 : 7), \
(((M) & 0x01) ? 24 : 8), \
(((M) & 0x02) ? 25 : 9), \
(((M) & 0x04) ? 26 : 10), \
(((M) & 0x08) ? 27 : 11), \
(((M) & 0x10) ? 28 : 12), \
@ -490,8 +488,8 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
}
#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
__m256i __a = (a); \
(__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \
(__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \
(__v8si)_mm256_set1_epi32(0), \
(imm) & 0x3, ((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
4 + (((imm) & 0x03) >> 0), \
@ -500,8 +498,8 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
4 + (((imm) & 0xc0) >> 6)); })
#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \
__m256i __a = (a); \
(__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
(__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
(__v16hi)_mm256_set1_epi16(0), \
0, 1, 2, 3, \
4 + (((imm) & 0x03) >> 0), \
4 + (((imm) & 0x0c) >> 2), \
@ -514,8 +512,8 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
12 + (((imm) & 0xc0) >> 6)); })
#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \
__m256i __a = (a); \
(__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
(__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
(__v16hi)_mm256_set1_epi16(0), \
(imm) & 0x3,((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
4, 5, 6, 7, \
@ -544,8 +542,7 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
}
#define _mm256_slli_si256(a, count) __extension__ ({ \
__m256i __a = (a); \
(__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
(__m256i)__builtin_ia32_pslldqi256((__m256i)(a), (count)*8); })
#define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))
@ -610,8 +607,7 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
}
#define _mm256_srli_si256(a, count) __extension__ ({ \
__m256i __a = (a); \
(__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
(__m256i)__builtin_ia32_psrldqi256((__m256i)(a), (count)*8); })
#define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))
@ -790,18 +786,16 @@ _mm256_broadcastsi128_si256(__m128i __X)
}
#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
__m128i __V1 = (V1); \
__m128i __V2 = (V2); \
(__m128i)__builtin_shufflevector((__v4si)__V1, (__v4si)__V2, \
(__m128i)__builtin_shufflevector((__v4si)(__m128i)(V1), \
(__v4si)(__m128i)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
(((M) & 0x08) ? 7 : 3)); })
#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \
__m256i __V1 = (V1); \
__m256i __V2 = (V2); \
(__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \
(__m256i)__builtin_shufflevector((__v8si)(__m256i)(V1), \
(__v8si)(__m256i)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
@ -867,8 +861,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
}
#define _mm256_permute4x64_pd(V, M) __extension__ ({ \
__m256d __V = (V); \
(__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \
(__v4df)_mm256_setzero_pd(), \
(M) & 0x3, ((M) & 0xc) >> 2, \
((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
@ -879,15 +873,13 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
}
#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
__m256i __V = (V); \
(__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \
(__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \
(__v4di)_mm256_setzero_si256(), \
(M) & 0x3, ((M) & 0xc) >> 2, \
((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
__m256i __V1 = (V1); \
__m256i __V2 = (V2); \
(__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); })
(__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); })
#define _mm256_extracti128_si256(V, M) __extension__ ({ \
(__m128i)__builtin_shufflevector( \

View File

@ -156,12 +156,10 @@ _mm256_rcp_ps(__m256 __a)
}
#define _mm256_round_pd(V, M) __extension__ ({ \
__m256d __V = (V); \
(__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); })
(__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); })
#define _mm256_round_ps(V, M) __extension__ ({ \
__m256 __V = (V); \
(__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); })
(__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); })
#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)
#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
@ -268,26 +266,26 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
}
#define _mm_permute_pd(A, C) __extension__ ({ \
__m128d __A = (A); \
(__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \
(__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
(__v2df)_mm_setzero_pd(), \
(C) & 0x1, ((C) & 0x2) >> 1); })
#define _mm256_permute_pd(A, C) __extension__ ({ \
__m256d __A = (A); \
(__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
(__v4df)_mm256_setzero_pd(), \
(C) & 0x1, ((C) & 0x2) >> 1, \
2 + (((C) & 0x4) >> 2), \
2 + (((C) & 0x8) >> 3)); })
#define _mm_permute_ps(A, C) __extension__ ({ \
__m128 __A = (A); \
(__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \
(__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
(__v4sf)_mm_setzero_ps(), \
(C) & 0x3, ((C) & 0xc) >> 2, \
((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
#define _mm256_permute_ps(A, C) __extension__ ({ \
__m256 __A = (A); \
(__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \
(__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
(__v8sf)_mm256_setzero_ps(), \
(C) & 0x3, ((C) & 0xc) >> 2, \
((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
4 + (((C) & 0x03) >> 0), \
@ -296,34 +294,29 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
4 + (((C) & 0xc0) >> 6)); })
#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
__m256d __V1 = (V1); \
__m256d __V2 = (V2); \
(__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })
(__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
(__v4df)(__m256d)(V2), (M)); })
#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
__m256 __V1 = (V1); \
__m256 __V2 = (V2); \
(__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
(__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
(__v8sf)(__m256)(V2), (M)); })
#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
__m256i __V1 = (V1); \
__m256i __V2 = (V2); \
(__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })
(__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
(__v8si)(__m256i)(V2), (M)); })
/* Vector Blend */
#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
__m256d __V1 = (V1); \
__m256d __V2 = (V2); \
(__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \
(__v4df)(__m256d)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
(((M) & 0x08) ? 7 : 3)); })
#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
__m256 __V1 = (V1); \
__m256 __V2 = (V2); \
(__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
(__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \
(__v8sf)(__m256)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
@ -349,28 +342,29 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/* Vector Dot Product */
#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
__m256 __V1 = (V1); \
__m256 __V2 = (V2); \
(__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
(__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
(__v8sf)(__m256)(V2), (M)); })
/* Vector shuffle */
#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
__m256 __a = (a); \
__m256 __b = (b); \
(__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \
(mask) & 0x3, ((mask) & 0xc) >> 2, \
(((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \
((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \
(((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); })
(__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
(__v8sf)(__m256)(b), \
(mask) & 0x3, \
((mask) & 0xc) >> 2, \
(((mask) & 0x30) >> 4) + 8, \
(((mask) & 0xc0) >> 6) + 8, \
((mask) & 0x3) + 4, \
(((mask) & 0xc) >> 2) + 4, \
(((mask) & 0x30) >> 4) + 12, \
(((mask) & 0xc0) >> 6) + 12); })
#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
__m256d __a = (a); \
__m256d __b = (b); \
(__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \
(mask) & 0x1, \
(((mask) & 0x2) >> 1) + 4, \
(((mask) & 0x4) >> 2) + 2, \
(((mask) & 0x8) >> 3) + 6); })
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
(__v4df)(__m256d)(b), \
(mask) & 0x1, \
(((mask) & 0x2) >> 1) + 4, \
(((mask) & 0x4) >> 2) + 2, \
(((mask) & 0x8) >> 3) + 6); })
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
@ -407,34 +401,28 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
#define _mm_cmp_pd(a, b, c) __extension__ ({ \
__m128d __a = (a); \
__m128d __b = (b); \
(__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); })
(__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)); })
#define _mm_cmp_ps(a, b, c) __extension__ ({ \
__m128 __a = (a); \
__m128 __b = (b); \
(__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); })
(__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)); })
#define _mm256_cmp_pd(a, b, c) __extension__ ({ \
__m256d __a = (a); \
__m256d __b = (b); \
(__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); })
(__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
(__v4df)(__m256d)(b), (c)); })
#define _mm256_cmp_ps(a, b, c) __extension__ ({ \
__m256 __a = (a); \
__m256 __b = (b); \
(__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); })
(__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
(__v8sf)(__m256)(b), (c)); })
#define _mm_cmp_sd(a, b, c) __extension__ ({ \
__m128d __a = (a); \
__m128d __b = (b); \
(__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); })
(__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)); })
#define _mm_cmp_ss(a, b, c) __extension__ ({ \
__m128 __a = (a); \
__m128 __b = (b); \
(__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
(__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)); })
static __inline int __DEFAULT_FN_ATTRS
_mm256_extract_epi32(__m256i __a, const int __imm)

View File

@ -35,12 +35,10 @@ typedef float __m256 __attribute__ ((__vector_size__ (32)));
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
#define _mm_cvtps_ph(a, imm) __extension__ ({ \
__m128 __a = (a); \
(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__a, (imm)); })
(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); })
#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
__m256 __a = (a); \
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
static __inline __m128 __DEFAULT_FN_ATTRS
_mm_cvtph_ps(__m128i __a)

View File

@ -57,35 +57,28 @@
#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
#define _mm_round_ps(X, M) __extension__ ({ \
__m128 __X = (X); \
(__m128) __builtin_ia32_roundps((__v4sf)__X, (M)); })
(__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); })
#define _mm_round_ss(X, Y, M) __extension__ ({ \
__m128 __X = (X); \
__m128 __Y = (Y); \
(__m128) __builtin_ia32_roundss((__v4sf)__X, (__v4sf)__Y, (M)); })
(__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (M)); })
#define _mm_round_pd(X, M) __extension__ ({ \
__m128d __X = (X); \
(__m128d) __builtin_ia32_roundpd((__v2df)__X, (M)); })
(__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); })
#define _mm_round_sd(X, Y, M) __extension__ ({ \
__m128d __X = (X); \
__m128d __Y = (Y); \
(__m128d) __builtin_ia32_roundsd((__v2df)__X, (__v2df)__Y, (M)); })
(__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (M)); })
/* SSE4 Packed Blending Intrinsics. */
#define _mm_blend_pd(V1, V2, M) __extension__ ({ \
__m128d __V1 = (V1); \
__m128d __V2 = (V2); \
(__m128d)__builtin_shufflevector((__v2df)__V1, (__v2df)__V2, \
(__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \
(__v2df)(__m128d)(V2), \
(((M) & 0x01) ? 2 : 0), \
(((M) & 0x02) ? 3 : 1)); })
#define _mm_blend_ps(V1, V2, M) __extension__ ({ \
__m128 __V1 = (V1); \
__m128 __V2 = (V2); \
(__m128)__builtin_shufflevector((__v4sf)__V1, (__v4sf)__V2, \
(__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
@ -113,9 +106,8 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
}
#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \
__m128i __V1 = (V1); \
__m128i __V2 = (V2); \
(__m128i)__builtin_shufflevector((__v8hi)__V1, (__v8hi)__V2, \
(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \
(__v8hi)(__m128i)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
@ -140,14 +132,12 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/* SSE4 Floating Point Dot Product Instructions. */
#define _mm_dp_ps(X, Y, M) __extension__ ({ \
__m128 __X = (X); \
__m128 __Y = (Y); \
(__m128) __builtin_ia32_dpps((__v4sf)__X, (__v4sf)__Y, (M)); })
(__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (M)); })
#define _mm_dp_pd(X, Y, M) __extension__ ({\
__m128d __X = (X); \
__m128d __Y = (Y); \
(__m128d) __builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, (M)); })
(__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (M)); })
/* SSE4 Streaming Load Hint Instruction. */
static __inline__ __m128i __DEFAULT_FN_ATTRS
@ -372,9 +362,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
/* SSE4 Multiple Packed Sums of Absolute Difference. */
#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \
__m128i __X = (X); \
__m128i __Y = (Y); \
(__m128i) __builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, (M)); })
(__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
(__v16qi)(__m128i)(Y), (M)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_minpos_epu16(__m128i __V)

View File

@ -66,14 +66,11 @@ _mm_abs_epi32(__m128i __a)
}
#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
__m128i __a = (a); \
__m128i __b = (b); \
(__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); })
(__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
(__v16qi)(__m128i)(b), (n)); })
#define _mm_alignr_pi8(a, b, n) __extension__ ({ \
__m64 __a = (a); \
__m64 __b = (b); \
(__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
(__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadd_epi16(__m128i __a, __m128i __b)

View File

@ -754,8 +754,7 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
}
#define _mm_shuffle_pi16(a, n) __extension__ ({ \
__m64 __a = (a); \
(__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); })
(__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)); })
static __inline__ void __DEFAULT_FN_ATTRS
_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
@ -794,9 +793,7 @@ _mm_setcsr(unsigned int __i)
}
#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
__m128 __a = (a); \
__m128 __b = (b); \
(__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \
(__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
(mask) & 0x3, ((mask) & 0xc) >> 2, \
(((mask) & 0x30) >> 4) + 4, \
(((mask) & 0xc0) >> 6) + 4); })

View File

@ -238,20 +238,16 @@ _mm_rot_epi64(__m128i __A, __m128i __B)
}
#define _mm_roti_epi8(A, N) __extension__ ({ \
__m128i __A = (A); \
(__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); })
(__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); })
#define _mm_roti_epi16(A, N) __extension__ ({ \
__m128i __A = (A); \
(__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); })
(__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); })
#define _mm_roti_epi32(A, N) __extension__ ({ \
__m128i __A = (A); \
(__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); })
(__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); })
#define _mm_roti_epi64(A, N) __extension__ ({ \
__m128i __A = (A); \
(__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); })
(__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); })
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A, __m128i __B)
@ -302,44 +298,36 @@ _mm_sha_epi64(__m128i __A, __m128i __B)
}
#define _mm_com_epu8(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); })
(__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (N)); })
#define _mm_com_epu16(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); })
(__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
(__v8hi)(__m128i)(B), (N)); })
#define _mm_com_epu32(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); })
(__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
(__v4si)(__m128i)(B), (N)); })
#define _mm_com_epu64(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); })
(__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
(__v2di)(__m128i)(B), (N)); })
#define _mm_com_epi8(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); })
(__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (N)); })
#define _mm_com_epi16(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); })
(__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
(__v8hi)(__m128i)(B), (N)); })
#define _mm_com_epi32(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); })
(__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
(__v4si)(__m128i)(B), (N)); })
#define _mm_com_epi64(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); })
(__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
(__v2di)(__m128i)(B), (N)); })
#define _MM_PCOMCTRL_LT 0
#define _MM_PCOMCTRL_LE 1
@ -735,32 +723,23 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B)
}
#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
__m128d __X = (X); \
__m128d __Y = (Y); \
__m128i __C = (C); \
(__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \
(__v2di)__C, (I)); })
(__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), \
(__v2di)(__m128i)(C), (I)); })
#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
__m256d __X = (X); \
__m256d __Y = (Y); \
__m256i __C = (C); \
(__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \
(__v4di)__C, (I)); })
(__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
(__v4df)(__m256d)(Y), \
(__v4di)(__m256i)(C), (I)); })
#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
__m128 __X = (X); \
__m128 __Y = (Y); \
__m128i __C = (C); \
(__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \
(__v4si)__C, (I)); })
(__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
(__v4si)(__m128i)(C), (I)); })
#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
__m256 __X = (X); \
__m256 __Y = (Y); \
__m256i __C = (C); \
(__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \
(__v8si)__C, (I)); })
(__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
(__v8sf)(__m256)(Y), \
(__v8si)(__m256i)(C), (I)); })
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)