[x86][icelake][bitalg]

added bitalg feature recognition
added intrinsics support for bitalg instructions
_mm512_popcnt_epi16
_mm512_mask_popcnt_epi16
_mm512_maskz_popcnt_epi16
_mm512_popcnt_epi8
_mm512_mask_popcnt_epi8
_mm512_maskz_popcnt_epi8
_mm512_mask_bitshuffle_epi64_mask
_mm512_bitshuffle_epi64_mask
_mm256_popcnt_epi16
_mm256_mask_popcnt_epi16
_mm256_maskz_popcnt_epi16
_mm128_popcnt_epi16
_mm128_mask_popcnt_epi16
_mm128_maskz_popcnt_epi16
_mm256_popcnt_epi8
_mm256_mask_popcnt_epi8
_mm256_maskz_popcnt_epi8
_mm128_popcnt_epi8
_mm128_mask_popcnt_epi8
_mm128_maskz_popcnt_epi8
_mm256_mask_bitshuffle_epi32_mask
_mm256_bitshuffle_epi32_mask
_mm128_mask_bitshuffle_epi16_mask
_mm128_bitshuffle_epi16_mask
matching a similar work on the backend (D40222)
Differential Revision: https://reviews.llvm.org/D41564

llvm-svn: 321483
This commit is contained in:
Coby Tayree 2017-12-27 10:01:00 +00:00
parent f72630bb9b
commit 2268576fa0
15 changed files with 489 additions and 9 deletions

View File

@ -1092,6 +1092,17 @@ TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "", "avx512vpopcntdq,a
TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq")
TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq")
TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "", "avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "", "avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "", "avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")

View File

@ -2467,6 +2467,8 @@ def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>;
def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group<m_x86_Features_Group>;
def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>;
def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group<m_x86_Features_Group>;
def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;

View File

@ -135,6 +135,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "vaes", true);
setFeatureEnabledImpl(Features, "gfni", true);
setFeatureEnabledImpl(Features, "vpclmulqdq", true);
setFeatureEnabledImpl(Features, "avx512bitalg", true);
// TODO: Add icelake features here.
LLVM_FALLTHROUGH;
case CK_Cannonlake:
@ -473,7 +474,8 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
Features["avx512vl"] = Features["avx512vbmi"] =
Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
Features["avx512ifma"] = Features["avx512vpopcntdq"] =
Features["avx512bitalg"] = false;
break;
}
}
@ -604,15 +606,15 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
} else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
Name == "avx512vbmi" || Name == "avx512ifma" ||
Name == "avx512vpopcntdq") {
Name == "avx512vpopcntdq" || Name == "avx512bitalg") {
if (Enabled)
setSSELevel(Features, AVX512F, Enabled);
// Enable BWI instruction if VBMI is being enabled.
if (Name == "avx512vbmi" && Enabled)
// Enable BWI instruction if VBMI / BITALG is being enabled.
if ((Name == "avx512vbmi" || Name == "avx512bitalg") && Enabled)
Features["avx512bw"] = true;
// Also disable VBMI if BWI is being disabled.
// Also disable VBMI / BITALG if BWI is being disabled.
if (Name == "avx512bw" && !Enabled)
Features["avx512vbmi"] = false;
Features["avx512vbmi"] = Features["avx512bitalg"] = false;
} else if (Name == "fma") {
if (Enabled)
setSSELevel(Features, AVX, Enabled);
@ -702,6 +704,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512PF = true;
} else if (Feature == "+avx512dq") {
HasAVX512DQ = true;
} else if (Feature == "+avx512bitalg") {
HasAVX512BITALG = true;
} else if (Feature == "+avx512bw") {
HasAVX512BW = true;
} else if (Feature == "+avx512vl") {
@ -1041,6 +1045,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512PF__");
if (HasAVX512DQ)
Builder.defineMacro("__AVX512DQ__");
if (HasAVX512BITALG)
Builder.defineMacro("__AVX512BITALG__");
if (HasAVX512BW)
Builder.defineMacro("__AVX512BW__");
if (HasAVX512VL)
@ -1179,6 +1185,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512er", true)
.Case("avx512pf", true)
.Case("avx512dq", true)
.Case("avx512bitalg", true)
.Case("avx512bw", true)
.Case("avx512vl", true)
.Case("avx512vbmi", true)
@ -1244,6 +1251,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512er", HasAVX512ER)
.Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
.Case("avx512bitalg", HasAVX512BITALG)
.Case("avx512bw", HasAVX512BW)
.Case("avx512vl", HasAVX512VL)
.Case("avx512vbmi", HasAVX512VBMI)

View File

@ -71,6 +71,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAVX512ER = false;
bool HasAVX512PF = false;
bool HasAVX512DQ = false;
bool HasAVX512BITALG = false;
bool HasAVX512BW = false;
bool HasAVX512VL = false;
bool HasAVX512VBMI = false;

View File

@ -8143,12 +8143,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storesd128_mask: {
return EmitX86MaskedStore(*this, Ops, 16);
}
case X86::BI__builtin_ia32_vpopcntb_128:
case X86::BI__builtin_ia32_vpopcntd_128:
case X86::BI__builtin_ia32_vpopcntq_128:
case X86::BI__builtin_ia32_vpopcntw_128:
case X86::BI__builtin_ia32_vpopcntb_256:
case X86::BI__builtin_ia32_vpopcntd_256:
case X86::BI__builtin_ia32_vpopcntq_256:
case X86::BI__builtin_ia32_vpopcntw_256:
case X86::BI__builtin_ia32_vpopcntb_512:
case X86::BI__builtin_ia32_vpopcntd_512:
case X86::BI__builtin_ia32_vpopcntq_512: {
case X86::BI__builtin_ia32_vpopcntq_512:
case X86::BI__builtin_ia32_vpopcntw_512: {
llvm::Type *ResultType = ConvertType(E->getType());
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
return Builder.CreateCall(F, Ops);

View File

@ -7,6 +7,8 @@ set(files
arm64intr.h
avx2intrin.h
avx512bwintrin.h
avx512bitalgintrin.h
avx512vlbitalgintrin.h
avx512cdintrin.h
avx512vpopcntdqintrin.h
avx512dqintrin.h

View File

@ -0,0 +1,97 @@
/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512BITALGINTRIN_H
#define __AVX512BITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg")))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi16(__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
(__v32hi) _mm512_popcnt_epi16(__B),
(__v32hi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
{
return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(),
__U,
__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi8(__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
(__v64qi) _mm512_popcnt_epi8(__B),
(__v64qi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
{
return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(),
__U,
__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B)
{
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A,
(__v64qi) __B,
__U);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
{
return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1,
__A,
__B);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -0,0 +1,157 @@
/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VLBITALGINTRIN_H
#define __AVX512VLBITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg")))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_popcnt_epi16(__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
{
return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
(__v16hi) _mm256_popcnt_epi16(__B),
(__v16hi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
{
return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
__U,
__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_popcnt_epi16(__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
{
return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
(__v8hi) _mm128_popcnt_epi16(__B),
(__v8hi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
{
return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
__U,
__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_popcnt_epi8(__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
{
return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
(__v32qi) _mm256_popcnt_epi8(__B),
(__v32qi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
{
return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
__U,
__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_popcnt_epi8(__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
{
return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
(__v16qi) _mm128_popcnt_epi8(__B),
(__v16qi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
{
return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
__U,
__B);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B)
{
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
(__v32qi) __B,
__U);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B)
{
return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1,
__A,
__B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B)
{
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
(__v16qi) __B,
__U);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B)
{
return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1,
__A,
__B);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -150,6 +150,10 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512bwintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
#include <avx512bitalgintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
#include <avx512cdintrin.h>
#endif
@ -167,6 +171,11 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512dqintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
#include <avx512vlbitalgintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BW__))
#include <avx512vlbwintrin.h>

View File

@ -38,9 +38,9 @@ int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; }
// CHECK: lake{{.*}} #7
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx"

View File

@ -0,0 +1,54 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s
#include <immintrin.h>
__m512i test_mm512_popcnt_epi16(__m512i __A) {
// CHECK-LABEL: @test_mm512_popcnt_epi16
// CHECK: @llvm.ctpop.v32i16
return _mm512_popcnt_epi16(__A);
}
__m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_popcnt_epi16
// CHECK: @llvm.ctpop.v32i16
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
return _mm512_mask_popcnt_epi16(__A, __U, __B);
}
__m512i test_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi16
// CHECK: @llvm.ctpop.v32i16
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
return _mm512_maskz_popcnt_epi16(__U, __B);
}
__m512i test_mm512_popcnt_epi8(__m512i __A) {
// CHECK-LABEL: @test_mm512_popcnt_epi8
// CHECK: @llvm.ctpop.v64i8
return _mm512_popcnt_epi8(__A);
}
__m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_popcnt_epi8
// CHECK: @llvm.ctpop.v64i8
// CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_mask_popcnt_epi8(__A, __U, __B);
}
__m512i test_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi8
// CHECK: @llvm.ctpop.v64i8
// CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_maskz_popcnt_epi8(__U, __B);
}
__mmask64 test_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_bitshuffle_epi64_mask
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512
return _mm512_mask_bitshuffle_epi64_mask(__U, __A, __B);
}
__mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_bitshuffle_epi64_mask
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512
return _mm512_bitshuffle_epi64_mask(__A, __B);
}

View File

@ -0,0 +1,104 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
#include <immintrin.h>
__m256i test_mm256_popcnt_epi16(__m256i __A) {
// CHECK-LABEL: @test_mm256_popcnt_epi16
// CHECK: @llvm.ctpop.v16i16
return _mm256_popcnt_epi16(__A);
}
__m256i test_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_popcnt_epi16
// CHECK: @llvm.ctpop.v16i16
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}}
return _mm256_mask_popcnt_epi16(__A, __U, __B);
}
__m256i test_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_popcnt_epi16
// CHECK: @llvm.ctpop.v16i16
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}}
return _mm256_maskz_popcnt_epi16(__U, __B);
}
__m128i test_mm128_popcnt_epi16(__m128i __A) {
// CHECK-LABEL: @test_mm128_popcnt_epi16
// CHECK: @llvm.ctpop.v8i16
return _mm128_popcnt_epi16(__A);
}
__m128i test_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
// CHECK-LABEL: @test_mm128_mask_popcnt_epi16
// CHECK: @llvm.ctpop.v8i16
// CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}}
return _mm128_mask_popcnt_epi16(__A, __U, __B);
}
__m128i test_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) {
// CHECK-LABEL: @test_mm128_maskz_popcnt_epi16
// CHECK: @llvm.ctpop.v8i16
// CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}}
return _mm128_maskz_popcnt_epi16(__U, __B);
}
__m256i test_mm256_popcnt_epi8(__m256i __A) {
// CHECK-LABEL: @test_mm256_popcnt_epi8
// CHECK: @llvm.ctpop.v32i8
return _mm256_popcnt_epi8(__A);
}
__m256i test_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_popcnt_epi8
// CHECK: @llvm.ctpop.v32i8
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
return _mm256_mask_popcnt_epi8(__A, __U, __B);
}
__m256i test_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_popcnt_epi8
// CHECK: @llvm.ctpop.v32i8
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
return _mm256_maskz_popcnt_epi8(__U, __B);
}
__m128i test_mm128_popcnt_epi8(__m128i __A) {
// CHECK-LABEL: @test_mm128_popcnt_epi8
// CHECK: @llvm.ctpop.v16i8
return _mm128_popcnt_epi8(__A);
}
__m128i test_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
// CHECK-LABEL: @test_mm128_mask_popcnt_epi8
// CHECK: @llvm.ctpop.v16i8
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm128_mask_popcnt_epi8(__A, __U, __B);
}
__m128i test_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) {
// CHECK-LABEL: @test_mm128_maskz_popcnt_epi8
// CHECK: @llvm.ctpop.v16i8
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm128_maskz_popcnt_epi8(__U, __B);
}
__mmask32 test_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_bitshuffle_epi32_mask
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256
return _mm256_mask_bitshuffle_epi32_mask(__U, __A, __B);
}
__mmask32 test_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_bitshuffle_epi32_mask
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256
return _mm256_bitshuffle_epi32_mask(__A, __B);
}
__mmask16 test_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_mask_bitshuffle_epi16_mask
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128
return _mm128_mask_bitshuffle_epi16_mask(__U, __A, __B);
}
__mmask16 test_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_bitshuffle_epi16_mask
// CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128
return _mm128_bitshuffle_epi16_mask(__A, __B);
}

View File

@ -110,3 +110,8 @@
// VPCLMULQDQ: "-target-feature" "+vpclmulqdq"
// NO-VPCLMULQDQ: "-target-feature" "-vpclmulqdq"
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BITALG %s
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-BITALG %s
// BITALG: "-target-feature" "+avx512bitalg"
// NO-BITALG: "-target-feature" "-avx512bitalg"

View File

@ -1050,6 +1050,7 @@
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M32
// CHECK_ICL_M32: #define __AES__ 1
// CHECK_ICL_M32: #define __AVX2__ 1
// CHECK_ICL_M32: #define __AVX512BITALG__ 1
// CHECK_ICL_M32: #define __AVX512BW__ 1
// CHECK_ICL_M32: #define __AVX512CD__ 1
// CHECK_ICL_M32: #define __AVX512DQ__ 1
@ -1098,6 +1099,7 @@
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M64
// CHECK_ICL_M64: #define __AES__ 1
// CHECK_ICL_M64: #define __AVX2__ 1
// CHECK_ICL_M64: #define __AVX512BITALG__ 1
// CHECK_ICL_M64: #define __AVX512BW__ 1
// CHECK_ICL_M64: #define __AVX512CD__ 1
// CHECK_ICL_M64: #define __AVX512DQ__ 1

View File

@ -209,11 +209,33 @@
// AVX512VBMI: #define __SSE__ 1
// AVX512VBMI: #define __SSSE3__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALG %s
// AVX512BITALG: #define __AVX2__ 1
// AVX512BITALG: #define __AVX512BITALG__ 1
// AVX512BITALG: #define __AVX512BW__ 1
// AVX512BITALG: #define __AVX512F__ 1
// AVX512BITALG: #define __AVX__ 1
// AVX512BITALG: #define __SSE2_MATH__ 1
// AVX512BITALG: #define __SSE2__ 1
// AVX512BITALG: #define __SSE3__ 1
// AVX512BITALG: #define __SSE4_1__ 1
// AVX512BITALG: #define __SSE4_2__ 1
// AVX512BITALG: #define __SSE_MATH__ 1
// AVX512BITALG: #define __SSE__ 1
// AVX512BITALG: #define __SSSE3__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMINOAVX512BW %s
// AVX512VBMINOAVX512BW-NOT: #define __AVX512BW__ 1
// AVX512VBMINOAVX512BW-NOT: #define __AVX512VBMI__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALGNOAVX512BW %s
// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BITALG__ 1
// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BW__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -msse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=SSE42POPCNT %s
// SSE42POPCNT: #define __POPCNT__ 1