[X86] Make the pointer arguments to avx512 gather/scatter intrinsics 'void*' to match gcc and Intel's documentation.

The avx2 gather intrinsics are documented to use 'int', 'long long', 'float', or 'double' *. So I'm leaving those. This matches gcc.

llvm-svn: 350696
This commit is contained in:
Craig Topper 2019-01-09 07:36:01 +00:00
parent befee402ff
commit bdbe5c7dc7
3 changed files with 96 additions and 96 deletions

View File

@ -7630,177 +7630,177 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
#define _mm512_i64gather_ps(index, addr, scale) \
(__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
(float const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), (__mmask8)-1, \
(int)(scale))
#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
(__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
(float const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i64gather_epi32(index, addr, scale) \
(__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
(int const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), \
(__mmask8)-1, (int)(scale))
#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
(__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i64gather_pd(index, addr, scale) \
(__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
(double const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), (__mmask8)-1, \
(int)(scale))
#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
(__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i64gather_epi64(index, addr, scale) \
(__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
(long long const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), (__mmask8)-1, \
(int)(scale))
#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
(__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i32gather_ps(index, addr, scale) \
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
(float const *)(addr), \
(void const *)(addr), \
(__v16sf)(__m512)(index), \
(__mmask16)-1, (int)(scale))
#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
(float const *)(addr), \
(void const *)(addr), \
(__v16sf)(__m512)(index), \
(__mmask16)(mask), (int)(scale))
#define _mm512_i32gather_epi32(index, addr, scale) \
(__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
(int const *)(addr), \
(void const *)(addr), \
(__v16si)(__m512i)(index), \
(__mmask16)-1, (int)(scale))
#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
(__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v16si)(__m512i)(index), \
(__mmask16)(mask), (int)(scale))
#define _mm512_i32gather_pd(index, addr, scale) \
(__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
(double const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), (__mmask8)-1, \
(int)(scale))
#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
(__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i32gather_epi64(index, addr, scale) \
(__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
(long long const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), (__mmask8)-1, \
(int)(scale))
#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
(__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i64scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
(__v8di)(__m512i)(index), \
(__v8sf)(__m256)(v1), (int)(scale))
#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
(__v8di)(__m512i)(index), \
(__v8sf)(__m256)(v1), (int)(scale))
#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
(__v8di)(__m512i)(index), \
(__v8si)(__m256i)(v1), (int)(scale))
#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
(__v8di)(__m512i)(index), \
(__v8si)(__m256i)(v1), (int)(scale))
#define _mm512_i64scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
(__v8di)(__m512i)(index), \
(__v8df)(__m512d)(v1), (int)(scale))
#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
(__v8di)(__m512i)(index), \
(__v8df)(__m512d)(v1), (int)(scale))
#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
(__v8di)(__m512i)(index), \
(__v8di)(__m512i)(v1), (int)(scale))
#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
(__v8di)(__m512i)(index), \
(__v8di)(__m512i)(v1), (int)(scale))
#define _mm512_i32scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
__builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
(__v16si)(__m512i)(index), \
(__v16sf)(__m512)(v1), (int)(scale))
#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
__builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
(__v16si)(__m512i)(index), \
(__v16sf)(__m512)(v1), (int)(scale))
#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
__builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
(__v16si)(__m512i)(index), \
(__v16si)(__m512i)(v1), (int)(scale))
#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
__builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
(__v16si)(__m512i)(index), \
(__v16si)(__m512i)(v1), (int)(scale))
#define _mm512_i32scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
(__v8si)(__m256i)(index), \
(__v8df)(__m512d)(v1), (int)(scale))
#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
(__v8si)(__m256i)(index), \
(__v8df)(__m512d)(v1), (int)(scale))
#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
(__v8si)(__m256i)(index), \
(__v8di)(__m512i)(v1), (int)(scale))
#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
(__v8si)(__m256i)(index), \
(__v8di)(__m512i)(v1), (int)(scale))

View File

@ -33,78 +33,78 @@
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(long long const *)(addr), (int)(scale), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
(long long const *)(addr), (int)(scale), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdps((__mmask16)(mask), \
(__v16si)(__m512i)(index), (int const *)(addr), \
(__v16si)(__m512i)(index), (void const *)(addr), \
(int)(scale), (int)(hint))
#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
__builtin_ia32_gatherpfdps((__mmask16) -1, \
(__v16si)(__m512i)(index), (int const *)(addr), \
(__v16si)(__m512i)(index), (void const *)(addr), \
(int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
(long long const *)(addr), (int)(scale), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
__builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
(long long const *)(addr), (int)(scale), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(int const *)(addr), (int)(scale), (int)(hint))
(void const *)(addr), (int)(scale), (int)(hint))
#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
__builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
(int const *)(addr), (int)(scale), (int)(hint))
(void const *)(addr), (int)(scale), (int)(hint))
#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
__builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
(long long *)(addr), (int)(scale), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(long long *)(addr), (int)(scale), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
__builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
(int *)(addr), (int)(scale), (int)(hint))
(void *)(addr), (int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfdps((__mmask16)(mask), \
(__v16si)(__m512i)(index), (int *)(addr), \
(__v16si)(__m512i)(index), (void *)(addr), \
(int)(scale), (int)(hint))
#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
__builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
(long long *)(addr), (int)(scale), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
(long long *)(addr), (int)(scale), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
__builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
(int *)(addr), (int)(scale), (int)(hint))
(void *)(addr), (int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(int *)(addr), (int)(scale), (int)(hint))
(void *)(addr), (int)(scale), (int)(hint))
#undef __DEFAULT_FN_ATTRS

View File

@ -3484,162 +3484,162 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
}
#define _mm_i64scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
(__v2di)(__m128i)(index), \
(__v2df)(__m128d)(v1), (int)(scale))
#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
(__v2di)(__m128i)(index), \
(__v2df)(__m128d)(v1), (int)(scale))
#define _mm_i64scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
(__v2di)(__m128i)(index), \
(__v2di)(__m128i)(v1), (int)(scale))
#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
(__v2di)(__m128i)(index), \
(__v2di)(__m128i)(v1), (int)(scale))
#define _mm256_i64scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
(__v4di)(__m256i)(index), \
(__v4df)(__m256d)(v1), (int)(scale))
#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
(__v4di)(__m256i)(index), \
(__v4df)(__m256d)(v1), (int)(scale))
#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
(__v4di)(__m256i)(index), \
(__v4di)(__m256i)(v1), (int)(scale))
#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
(__v4di)(__m256i)(index), \
(__v4di)(__m256i)(v1), (int)(scale))
#define _mm_i64scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
(__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
(__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm_i64scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
(__v2di)(__m128i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
(__v2di)(__m128i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm256_i64scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
(__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
(__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
(__v4di)(__m256i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
(__v4di)(__m256i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm_i32scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), \
(__v2df)(__m128d)(v1), (int)(scale))
#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), \
(__v2df)(__m128d)(v1), (int)(scale))
#define _mm_i32scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), \
(__v2di)(__m128i)(v1), (int)(scale))
#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), \
(__v2di)(__m128i)(v1), (int)(scale))
#define _mm256_i32scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), \
(__v4df)(__m256d)(v1), (int)(scale))
#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), \
(__v4df)(__m256d)(v1), (int)(scale))
#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), \
(__v4di)(__m256i)(v1), (int)(scale))
#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), \
(__v4di)(__m256i)(v1), (int)(scale))
#define _mm_i32scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm_i32scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm256_i32scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
(__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
(int)(scale))
#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
(__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
(int)(scale))
#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
(__v8si)(__m256i)(index), \
(__v8si)(__m256i)(v1), (int)(scale))
#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
(__v8si)(__m256i)(index), \
(__v8si)(__m256i)(v1), (int)(scale))
@ -7984,97 +7984,97 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
(__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v2di)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
(__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v2di)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
(__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v4di)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
(__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v4di)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
(__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
(float const *)(addr), \
(void const *)(addr), \
(__v2di)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
(__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v2di)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
(__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
(float const *)(addr), \
(void const *)(addr), \
(__v4di)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
(__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v4di)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
(__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
(__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
(__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
(__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
(__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
(float const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
(__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
(__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
(float const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
(__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))