From 49488407aafe459d1d713bf96474f32441378eb8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Jan 2019 08:46:51 +0000 Subject: [PATCH] [X86] Remove mask parameter from avx512 pmultishiftqb intrinsics. Use select in IR instead. Fixes PR40259 llvm-svn: 351036 --- clang/include/clang/Basic/BuiltinsX86.def | 6 +-- clang/lib/CodeGen/CGBuiltin.cpp | 24 +++++++++- clang/lib/Headers/avx512vbmiintrin.h | 26 +++++------ clang/lib/Headers/avx512vbmivlintrin.h | 56 +++++++++-------------- clang/test/CodeGen/avx512vbmi-builtins.c | 8 ++-- clang/test/CodeGen/avx512vbmivl-builtin.c | 16 ++++--- 6 files changed, 73 insertions(+), 63 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 85c0c3e12207..4ee2a705da5e 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1821,9 +1821,9 @@ TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "ncV:128: TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "ncV:128:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512vbmi") -TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "ncV:128:", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi", "ncV:256:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl") // generic select intrinsics TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3ab814a37868..b1799c0491a6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11152,6 +11152,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); } + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: { + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_vpmultishiftqb128: + ID = Intrinsic::x86_avx512_pmultishift_qb_128; + break; + case X86::BI__builtin_ia32_vpmultishiftqb256: + ID = Intrinsic::x86_avx512_pmultishift_qb_256; + break; + case X86::BI__builtin_ia32_vpmultishiftqb512: + ID = Intrinsic::x86_avx512_pmultishift_qb_512; + break; + } + + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: case X86::BI__builtin_ia32_vpshufbitqmb256_mask: case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { @@ -11173,8 +11193,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, break; } - Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); - return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); + Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn); } // packed comparison intrinsics diff --git a/clang/lib/Headers/avx512vbmiintrin.h b/clang/lib/Headers/avx512vbmiintrin.h index b6e93c285871..5463d9015504 100644 --- a/clang/lib/Headers/avx512vbmiintrin.h +++ b/clang/lib/Headers/avx512vbmiintrin.h @@ -91,30 +91,26 @@ _mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A, } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y) +_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, - (__v64qi) __Y, - (__v64qi) __W, - (__mmask64) __M); + return (__m512i)__builtin_ia32_vpmultishiftqb512((__v64qi)__X, (__v64qi) __Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y) +_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X, + __m512i __Y) { - return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, - (__v64qi) __Y, - (__v64qi) _mm512_setzero_si512 (), - (__mmask64) __M); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_multishift_epi64_epi8(__X, __Y), + (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y) +_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, - (__v64qi) __Y, - (__v64qi) _mm512_undefined_epi32 (), - (__mmask64) -1); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, + (__v64qi)_mm512_multishift_epi64_epi8(__X, __Y), + (__v64qi)_mm512_setzero_si512()); } diff --git a/clang/lib/Headers/avx512vbmivlintrin.h b/clang/lib/Headers/avx512vbmivlintrin.h index 9a0400b2b5d5..b5d5aa9af523 100644 --- a/clang/lib/Headers/avx512vbmivlintrin.h +++ b/clang/lib/Headers/avx512vbmivlintrin.h @@ -150,61 +150,49 @@ _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) +_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, - (__v16qi) __Y, - (__v16qi) __W, - (__mmask16) __M); + return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) +_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, + __m128i __Y) { - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, - (__v16qi) __Y, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) __M); + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, + (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), + (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) +_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, - (__v16qi) __Y, - (__v16qi) - _mm_undefined_si128 (), - (__mmask16) -1); + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, + (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), + (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) +_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, - (__v32qi) __Y, - (__v32qi) __W, - (__mmask32) __M); + return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y) +_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, + __m256i __Y) { - return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, - (__v32qi) __Y, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) __M); + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, + (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), + (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y) +_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, - (__v32qi) __Y, - (__v32qi) - _mm256_undefined_si256 (), - (__mmask32) -1); + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, + (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), + (__v32qi)_mm256_setzero_si256()); } diff --git a/clang/test/CodeGen/avx512vbmi-builtins.c b/clang/test/CodeGen/avx512vbmi-builtins.c index 0347916af09f..80d9a9476a62 100644 --- a/clang/test/CodeGen/avx512vbmi-builtins.c +++ b/clang/test/CodeGen/avx512vbmi-builtins.c @@ -52,18 +52,20 @@ __m512i test_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A __m512i test_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y) { // CHECK-LABEL: @test_mm512_mask_multishift_epi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.512 + // CHECK: @llvm.x86.avx512.pmultishift.qb.512 + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y); } __m512i test_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) { // CHECK-LABEL: @test_mm512_maskz_multishift_epi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.512 + // CHECK: @llvm.x86.avx512.pmultishift.qb.512 + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_multishift_epi64_epi8(__M, __X, __Y); } __m512i test_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) { // CHECK-LABEL: @test_mm512_multishift_epi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.512 + // CHECK: @llvm.x86.avx512.pmultishift.qb.512 return _mm512_multishift_epi64_epi8(__X, __Y); } diff --git a/clang/test/CodeGen/avx512vbmivl-builtin.c b/clang/test/CodeGen/avx512vbmivl-builtin.c index da8986f2e7d8..0bf9165f6c6a 100644 --- a/clang/test/CodeGen/avx512vbmivl-builtin.c +++ b/clang/test/CodeGen/avx512vbmivl-builtin.c @@ -99,37 +99,41 @@ __m256i test_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i _ __m128i test_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) { // CHECK-LABEL: @test_mm_mask_multishift_epi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.128 + // CHECK: @llvm.x86.avx512.pmultishift.qb.128 + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_multishift_epi64_epi8(__W, __M, __X, __Y); } __m128i test_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) { // CHECK-LABEL: @test_mm_maskz_multishift_epi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.128 + // CHECK: @llvm.x86.avx512.pmultishift.qb.128 + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_maskz_multishift_epi64_epi8(__M, __X, __Y); } __m128i test_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) { // CHECK-LABEL: @test_mm_multishift_epi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.128 + // CHECK: @llvm.x86.avx512.pmultishift.qb.128 return _mm_multishift_epi64_epi8(__X, __Y); } __m256i test_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) { // CHECK-LABEL: @test_mm256_mask_multishift_epi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.256 + // CHECK: @llvm.x86.avx512.pmultishift.qb.256 + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_multishift_epi64_epi8(__W, __M, __X, __Y); } __m256i test_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) { // CHECK-LABEL: @test_mm256_maskz_multishift_epi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.256 + // CHECK: @llvm.x86.avx512.pmultishift.qb.256 + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_multishift_epi64_epi8(__M, __X, __Y); } __m256i test_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) { // CHECK-LABEL: @test_mm256_multishift_epi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmultishift.qb.256 + // CHECK: @llvm.x86.avx512.pmultishift.qb.256 return _mm256_multishift_epi64_epi8(__X, __Y); }