[X86] Add support for _mm512_mullox_epi64 and _mm512_mask_mullox_epi64 intrinsics to match icc.

On AVX512F targets we'll produce an emulated sequence using 3 pmuludqs with shifts and adds. On AVX512DQ we'll use vpmulld.

Fixes PR37140.

llvm-svn: 330923
This commit is contained in:
Craig Topper 2018-04-26 05:38:39 +00:00
parent bc26f3b61b
commit e95bde33df
2 changed files with 25 additions and 0 deletions

View File

@ -1581,6 +1581,18 @@ _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
(__v16si)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
return (__m512i) ((__v8du) __A * (__v8du) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_mullox_epi64(__A, __B),
(__v8di)__W);
}
#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
(__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(W), (__mmask8)(U), \

View File

@ -1952,6 +1952,19 @@ __m512i test_mm512_mullo_epi32(__m512i __A, __m512i __B) {
return _mm512_mullo_epi32(__A,__B);
}
__m512i test_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mullox_epi64
// CHECK: mul <8 x i64>
return (__m512i) ((__v8di) __A * (__v8di) __B);
}
__m512i test_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_mullox_epi64
// CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return (__m512i) _mm512_mask_mullox_epi64(__W, __U, __A, __B);
}
__m512d test_mm512_add_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_add_round_pd
// CHECK: @llvm.x86.avx512.mask.add.pd.512