[x86][icelake][bitalg]
added bitalg feature recognition added intrinsics support for bitalg instructions _mm512_popcnt_epi16 _mm512_mask_popcnt_epi16 _mm512_maskz_popcnt_epi16 _mm512_popcnt_epi8 _mm512_mask_popcnt_epi8 _mm512_maskz_popcnt_epi8 _mm512_mask_bitshuffle_epi64_mask _mm512_bitshuffle_epi64_mask _mm256_popcnt_epi16 _mm256_mask_popcnt_epi16 _mm256_maskz_popcnt_epi16 _mm128_popcnt_epi16 _mm128_mask_popcnt_epi16 _mm128_maskz_popcnt_epi16 _mm256_popcnt_epi8 _mm256_mask_popcnt_epi8 _mm256_maskz_popcnt_epi8 _mm128_popcnt_epi8 _mm128_mask_popcnt_epi8 _mm128_maskz_popcnt_epi8 _mm256_mask_bitshuffle_epi32_mask _mm256_bitshuffle_epi32_mask _mm128_mask_bitshuffle_epi16_mask _mm128_bitshuffle_epi16_mask matching a similar work on the backend (D40222) Differential Revision: https://reviews.llvm.org/D41564 llvm-svn: 321483
This commit is contained in:
parent
f72630bb9b
commit
2268576fa0
|
@ -1092,6 +1092,17 @@ TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "", "avx512vpopcntdq,a
|
|||
TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "", "avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "", "avx512bitalg")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "", "avx512bitalg")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
|
||||
|
|
|
@ -2467,6 +2467,8 @@ def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
|
|||
def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
|
||||
def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
|
||||
def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group<m_x86_Features_Group>;
|
||||
def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group<m_x86_Features_Group>;
|
||||
def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
|
||||
|
|
|
@ -135,6 +135,7 @@ bool X86TargetInfo::initFeatureMap(
|
|||
setFeatureEnabledImpl(Features, "vaes", true);
|
||||
setFeatureEnabledImpl(Features, "gfni", true);
|
||||
setFeatureEnabledImpl(Features, "vpclmulqdq", true);
|
||||
setFeatureEnabledImpl(Features, "avx512bitalg", true);
|
||||
// TODO: Add icelake features here.
|
||||
LLVM_FALLTHROUGH;
|
||||
case CK_Cannonlake:
|
||||
|
@ -473,7 +474,8 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
|
|||
Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
|
||||
Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
|
||||
Features["avx512vl"] = Features["avx512vbmi"] =
|
||||
Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
|
||||
Features["avx512ifma"] = Features["avx512vpopcntdq"] =
|
||||
Features["avx512bitalg"] = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -604,15 +606,15 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
|
|||
} else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
|
||||
Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
|
||||
Name == "avx512vbmi" || Name == "avx512ifma" ||
|
||||
Name == "avx512vpopcntdq") {
|
||||
Name == "avx512vpopcntdq" || Name == "avx512bitalg") {
|
||||
if (Enabled)
|
||||
setSSELevel(Features, AVX512F, Enabled);
|
||||
// Enable BWI instruction if VBMI is being enabled.
|
||||
if (Name == "avx512vbmi" && Enabled)
|
||||
// Enable BWI instruction if VBMI / BITALG is being enabled.
|
||||
if ((Name == "avx512vbmi" || Name == "avx512bitalg") && Enabled)
|
||||
Features["avx512bw"] = true;
|
||||
// Also disable VBMI if BWI is being disabled.
|
||||
// Also disable VBMI / BITALG if BWI is being disabled.
|
||||
if (Name == "avx512bw" && !Enabled)
|
||||
Features["avx512vbmi"] = false;
|
||||
Features["avx512vbmi"] = Features["avx512bitalg"] = false;
|
||||
} else if (Name == "fma") {
|
||||
if (Enabled)
|
||||
setSSELevel(Features, AVX, Enabled);
|
||||
|
@ -702,6 +704,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
|||
HasAVX512PF = true;
|
||||
} else if (Feature == "+avx512dq") {
|
||||
HasAVX512DQ = true;
|
||||
} else if (Feature == "+avx512bitalg") {
|
||||
HasAVX512BITALG = true;
|
||||
} else if (Feature == "+avx512bw") {
|
||||
HasAVX512BW = true;
|
||||
} else if (Feature == "+avx512vl") {
|
||||
|
@ -1041,6 +1045,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
Builder.defineMacro("__AVX512PF__");
|
||||
if (HasAVX512DQ)
|
||||
Builder.defineMacro("__AVX512DQ__");
|
||||
if (HasAVX512BITALG)
|
||||
Builder.defineMacro("__AVX512BITALG__");
|
||||
if (HasAVX512BW)
|
||||
Builder.defineMacro("__AVX512BW__");
|
||||
if (HasAVX512VL)
|
||||
|
@ -1179,6 +1185,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
|
|||
.Case("avx512er", true)
|
||||
.Case("avx512pf", true)
|
||||
.Case("avx512dq", true)
|
||||
.Case("avx512bitalg", true)
|
||||
.Case("avx512bw", true)
|
||||
.Case("avx512vl", true)
|
||||
.Case("avx512vbmi", true)
|
||||
|
@ -1244,6 +1251,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
|||
.Case("avx512er", HasAVX512ER)
|
||||
.Case("avx512pf", HasAVX512PF)
|
||||
.Case("avx512dq", HasAVX512DQ)
|
||||
.Case("avx512bitalg", HasAVX512BITALG)
|
||||
.Case("avx512bw", HasAVX512BW)
|
||||
.Case("avx512vl", HasAVX512VL)
|
||||
.Case("avx512vbmi", HasAVX512VBMI)
|
||||
|
|
|
@ -71,6 +71,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
|
|||
bool HasAVX512ER = false;
|
||||
bool HasAVX512PF = false;
|
||||
bool HasAVX512DQ = false;
|
||||
bool HasAVX512BITALG = false;
|
||||
bool HasAVX512BW = false;
|
||||
bool HasAVX512VL = false;
|
||||
bool HasAVX512VBMI = false;
|
||||
|
|
|
@ -8143,12 +8143,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
case X86::BI__builtin_ia32_storesd128_mask: {
|
||||
return EmitX86MaskedStore(*this, Ops, 16);
|
||||
}
|
||||
case X86::BI__builtin_ia32_vpopcntb_128:
|
||||
case X86::BI__builtin_ia32_vpopcntd_128:
|
||||
case X86::BI__builtin_ia32_vpopcntq_128:
|
||||
case X86::BI__builtin_ia32_vpopcntw_128:
|
||||
case X86::BI__builtin_ia32_vpopcntb_256:
|
||||
case X86::BI__builtin_ia32_vpopcntd_256:
|
||||
case X86::BI__builtin_ia32_vpopcntq_256:
|
||||
case X86::BI__builtin_ia32_vpopcntw_256:
|
||||
case X86::BI__builtin_ia32_vpopcntb_512:
|
||||
case X86::BI__builtin_ia32_vpopcntd_512:
|
||||
case X86::BI__builtin_ia32_vpopcntq_512: {
|
||||
case X86::BI__builtin_ia32_vpopcntq_512:
|
||||
case X86::BI__builtin_ia32_vpopcntw_512: {
|
||||
llvm::Type *ResultType = ConvertType(E->getType());
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
|
||||
return Builder.CreateCall(F, Ops);
|
||||
|
|
|
@ -7,6 +7,8 @@ set(files
|
|||
arm64intr.h
|
||||
avx2intrin.h
|
||||
avx512bwintrin.h
|
||||
avx512bitalgintrin.h
|
||||
avx512vlbitalgintrin.h
|
||||
avx512cdintrin.h
|
||||
avx512vpopcntdqintrin.h
|
||||
avx512dqintrin.h
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512BITALGINTRIN_H
|
||||
#define __AVX512BITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg")))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_popcnt_epi16(__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
|
||||
(__v32hi) _mm512_popcnt_epi16(__B),
|
||||
(__v32hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_popcnt_epi8(__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
|
||||
(__v64qi) _mm512_popcnt_epi8(__B),
|
||||
(__v64qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
|
@ -0,0 +1,157 @@
|
|||
/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VLBITALGINTRIN_H
|
||||
#define __AVX512VLBITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg")))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_popcnt_epi16(__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
|
||||
(__v16hi) _mm256_popcnt_epi16(__B),
|
||||
(__v16hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_popcnt_epi16(__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
|
||||
(__v8hi) _mm128_popcnt_epi16(__B),
|
||||
(__v8hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_popcnt_epi8(__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
|
||||
(__v32qi) _mm256_popcnt_epi8(__B),
|
||||
(__v32qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_popcnt_epi8(__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
|
||||
(__v16qi) _mm128_popcnt_epi8(__B),
|
||||
(__v16qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
|
@ -150,6 +150,10 @@ _mm256_cvtph_ps(__m128i __a)
|
|||
#include <avx512bwintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
|
||||
#include <avx512bitalgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
|
||||
#include <avx512cdintrin.h>
|
||||
#endif
|
||||
|
@ -167,6 +171,11 @@ _mm256_cvtph_ps(__m128i __a)
|
|||
#include <avx512dqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
|
||||
#include <avx512vlbitalgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BW__))
|
||||
#include <avx512vlbwintrin.h>
|
||||
|
|
|
@ -38,9 +38,9 @@ int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; }
|
|||
// CHECK: lake{{.*}} #7
|
||||
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87"
|
||||
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
|
||||
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
|
||||
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
|
||||
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
|
||||
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
|
||||
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
|
||||
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
|
||||
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx"
|
||||
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx"
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
__m512i test_mm512_popcnt_epi16(__m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_popcnt_epi16
|
||||
// CHECK: @llvm.ctpop.v32i16
|
||||
return _mm512_popcnt_epi16(__A);
|
||||
}
|
||||
|
||||
__m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
|
||||
// CHECK-LABEL: @test_mm512_mask_popcnt_epi16
|
||||
// CHECK: @llvm.ctpop.v32i16
|
||||
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
|
||||
return _mm512_mask_popcnt_epi16(__A, __U, __B);
|
||||
}
|
||||
__m512i test_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi16
|
||||
// CHECK: @llvm.ctpop.v32i16
|
||||
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
|
||||
return _mm512_maskz_popcnt_epi16(__U, __B);
|
||||
}
|
||||
|
||||
__m512i test_mm512_popcnt_epi8(__m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_popcnt_epi8
|
||||
// CHECK: @llvm.ctpop.v64i8
|
||||
return _mm512_popcnt_epi8(__A);
|
||||
}
|
||||
|
||||
__m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
|
||||
// CHECK-LABEL: @test_mm512_mask_popcnt_epi8
|
||||
// CHECK: @llvm.ctpop.v64i8
|
||||
// CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
|
||||
return _mm512_mask_popcnt_epi8(__A, __U, __B);
|
||||
}
|
||||
__m512i test_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi8
|
||||
// CHECK: @llvm.ctpop.v64i8
|
||||
// CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
|
||||
return _mm512_maskz_popcnt_epi8(__U, __B);
|
||||
}
|
||||
|
||||
__mmask64 test_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) {
|
||||
// CHECK-LABEL: @test_mm512_mask_bitshuffle_epi64_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512
|
||||
return _mm512_mask_bitshuffle_epi64_mask(__U, __A, __B);
|
||||
}
|
||||
|
||||
__mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
|
||||
// CHECK-LABEL: @test_mm512_bitshuffle_epi64_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512
|
||||
return _mm512_bitshuffle_epi64_mask(__A, __B);
|
||||
}
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
__m256i test_mm256_popcnt_epi16(__m256i __A) {
|
||||
// CHECK-LABEL: @test_mm256_popcnt_epi16
|
||||
// CHECK: @llvm.ctpop.v16i16
|
||||
return _mm256_popcnt_epi16(__A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_mask_popcnt_epi16
|
||||
// CHECK: @llvm.ctpop.v16i16
|
||||
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}}
|
||||
return _mm256_mask_popcnt_epi16(__A, __U, __B);
|
||||
}
|
||||
__m256i test_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_popcnt_epi16
|
||||
// CHECK: @llvm.ctpop.v16i16
|
||||
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}}
|
||||
return _mm256_maskz_popcnt_epi16(__U, __B);
|
||||
}
|
||||
|
||||
__m128i test_mm128_popcnt_epi16(__m128i __A) {
|
||||
// CHECK-LABEL: @test_mm128_popcnt_epi16
|
||||
// CHECK: @llvm.ctpop.v8i16
|
||||
return _mm128_popcnt_epi16(__A);
|
||||
}
|
||||
|
||||
__m128i test_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm128_mask_popcnt_epi16
|
||||
// CHECK: @llvm.ctpop.v8i16
|
||||
// CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}}
|
||||
return _mm128_mask_popcnt_epi16(__A, __U, __B);
|
||||
}
|
||||
__m128i test_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm128_maskz_popcnt_epi16
|
||||
// CHECK: @llvm.ctpop.v8i16
|
||||
// CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}}
|
||||
return _mm128_maskz_popcnt_epi16(__U, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_popcnt_epi8(__m256i __A) {
|
||||
// CHECK-LABEL: @test_mm256_popcnt_epi8
|
||||
// CHECK: @llvm.ctpop.v32i8
|
||||
return _mm256_popcnt_epi8(__A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_mask_popcnt_epi8
|
||||
// CHECK: @llvm.ctpop.v32i8
|
||||
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
|
||||
return _mm256_mask_popcnt_epi8(__A, __U, __B);
|
||||
}
|
||||
__m256i test_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_popcnt_epi8
|
||||
// CHECK: @llvm.ctpop.v32i8
|
||||
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
|
||||
return _mm256_maskz_popcnt_epi8(__U, __B);
|
||||
}
|
||||
|
||||
__m128i test_mm128_popcnt_epi8(__m128i __A) {
|
||||
// CHECK-LABEL: @test_mm128_popcnt_epi8
|
||||
// CHECK: @llvm.ctpop.v16i8
|
||||
return _mm128_popcnt_epi8(__A);
|
||||
}
|
||||
|
||||
__m128i test_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm128_mask_popcnt_epi8
|
||||
// CHECK: @llvm.ctpop.v16i8
|
||||
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
|
||||
return _mm128_mask_popcnt_epi8(__A, __U, __B);
|
||||
}
|
||||
__m128i test_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm128_maskz_popcnt_epi8
|
||||
// CHECK: @llvm.ctpop.v16i8
|
||||
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
|
||||
return _mm128_maskz_popcnt_epi8(__U, __B);
|
||||
}
|
||||
|
||||
__mmask32 test_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_mask_bitshuffle_epi32_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256
|
||||
return _mm256_mask_bitshuffle_epi32_mask(__U, __A, __B);
|
||||
}
|
||||
|
||||
__mmask32 test_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_bitshuffle_epi32_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256
|
||||
return _mm256_bitshuffle_epi32_mask(__A, __B);
|
||||
}
|
||||
|
||||
__mmask16 test_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm128_mask_bitshuffle_epi16_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128
|
||||
return _mm128_mask_bitshuffle_epi16_mask(__U, __A, __B);
|
||||
}
|
||||
|
||||
__mmask16 test_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm128_bitshuffle_epi16_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128
|
||||
return _mm128_bitshuffle_epi16_mask(__A, __B);
|
||||
}
|
||||
|
|
@ -110,3 +110,8 @@
|
|||
// VPCLMULQDQ: "-target-feature" "+vpclmulqdq"
|
||||
// NO-VPCLMULQDQ: "-target-feature" "-vpclmulqdq"
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BITALG %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-BITALG %s
|
||||
// BITALG: "-target-feature" "+avx512bitalg"
|
||||
// NO-BITALG: "-target-feature" "-avx512bitalg"
|
||||
|
||||
|
|
|
@ -1050,6 +1050,7 @@
|
|||
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M32
|
||||
// CHECK_ICL_M32: #define __AES__ 1
|
||||
// CHECK_ICL_M32: #define __AVX2__ 1
|
||||
// CHECK_ICL_M32: #define __AVX512BITALG__ 1
|
||||
// CHECK_ICL_M32: #define __AVX512BW__ 1
|
||||
// CHECK_ICL_M32: #define __AVX512CD__ 1
|
||||
// CHECK_ICL_M32: #define __AVX512DQ__ 1
|
||||
|
@ -1098,6 +1099,7 @@
|
|||
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M64
|
||||
// CHECK_ICL_M64: #define __AES__ 1
|
||||
// CHECK_ICL_M64: #define __AVX2__ 1
|
||||
// CHECK_ICL_M64: #define __AVX512BITALG__ 1
|
||||
// CHECK_ICL_M64: #define __AVX512BW__ 1
|
||||
// CHECK_ICL_M64: #define __AVX512CD__ 1
|
||||
// CHECK_ICL_M64: #define __AVX512DQ__ 1
|
||||
|
|
|
@ -209,11 +209,33 @@
|
|||
// AVX512VBMI: #define __SSE__ 1
|
||||
// AVX512VBMI: #define __SSSE3__ 1
|
||||
|
||||
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALG %s
|
||||
|
||||
// AVX512BITALG: #define __AVX2__ 1
|
||||
// AVX512BITALG: #define __AVX512BITALG__ 1
|
||||
// AVX512BITALG: #define __AVX512BW__ 1
|
||||
// AVX512BITALG: #define __AVX512F__ 1
|
||||
// AVX512BITALG: #define __AVX__ 1
|
||||
// AVX512BITALG: #define __SSE2_MATH__ 1
|
||||
// AVX512BITALG: #define __SSE2__ 1
|
||||
// AVX512BITALG: #define __SSE3__ 1
|
||||
// AVX512BITALG: #define __SSE4_1__ 1
|
||||
// AVX512BITALG: #define __SSE4_2__ 1
|
||||
// AVX512BITALG: #define __SSE_MATH__ 1
|
||||
// AVX512BITALG: #define __SSE__ 1
|
||||
// AVX512BITALG: #define __SSSE3__ 1
|
||||
|
||||
|
||||
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMINOAVX512BW %s
|
||||
|
||||
// AVX512VBMINOAVX512BW-NOT: #define __AVX512BW__ 1
|
||||
// AVX512VBMINOAVX512BW-NOT: #define __AVX512VBMI__ 1
|
||||
|
||||
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALGNOAVX512BW %s
|
||||
|
||||
// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BITALG__ 1
|
||||
// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BW__ 1
|
||||
|
||||
// RUN: %clang -target i386-unknown-unknown -march=atom -msse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=SSE42POPCNT %s
|
||||
|
||||
// SSE42POPCNT: #define __POPCNT__ 1
|
||||
|
|
Loading…
Reference in New Issue