[x86][icelake][vnni]

added vnni feature recognition
added intrinsics support for VNNI instructions
_mm256_mask_dpbusd_epi32
_mm256_maskz_dpbusd_epi32
_mm256_dpbusd_epi32
_mm256_mask_dpbusds_epi32
_mm256_maskz_dpbusds_epi32
_mm256_dpbusds_epi32
_mm256_mask_dpwssd_epi32
_mm256_maskz_dpwssd_epi32
_mm256_dpwssd_epi32
_mm256_mask_dpwssds_epi32
_mm256_maskz_dpwssds_epi32
_mm256_dpwssds_epi32
_mm128_mask_dpbusd_epi32
_mm128_maskz_dpbusd_epi32
_mm128_dpbusd_epi32
_mm128_mask_dpbusds_epi32
_mm128_maskz_dpbusds_epi32
_mm128_dpbusds_epi32
_mm128_mask_dpwssd_epi32
_mm128_maskz_dpwssd_epi32
_mm128_dpwssd_epi32
_mm128_mask_dpwssds_epi32
_mm128_maskz_dpwssds_epi32
_mm128_dpwssds_epi32
_mm512_mask_dpbusd_epi32
_mm512_maskz_dpbusd_epi32
_mm512_dpbusd_epi32
_mm512_mask_dpbusds_epi32
_mm512_maskz_dpbusds_epi32
_mm512_dpbusds_epi32
_mm512_mask_dpwssd_epi32
_mm512_maskz_dpwssd_epi32
_mm512_dpwssd_epi32
_mm512_mask_dpwssds_epi32
_mm512_maskz_dpwssds_epi32
_mm512_dpwssds_epi32
matching a similar work on the backend (D40208)
Differential Revision: https://reviews.llvm.org/D41558

llvm-svn: 321484
This commit is contained in:
Coby Tayree 2017-12-27 10:37:51 +00:00
parent 2268576fa0
commit 3d9c88cfec
13 changed files with 682 additions and 4 deletions

View File

@ -979,6 +979,31 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "",
TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi","","avx512vl")

View File

@ -2485,6 +2485,8 @@ def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>;
def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>;
def mavx512vl : Flag<["-"], "mavx512vl">, Group<m_x86_Features_Group>;
def mno_avx512vl : Flag<["-"], "mno-avx512vl">, Group<m_x86_Features_Group>;
def mavx512vnni : Flag<["-"], "mavx512vnni">, Group<m_x86_Features_Group>;
def mno_avx512vnni : Flag<["-"], "mno-avx512vnni">, Group<m_x86_Features_Group>;
def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Group>;
def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>;
def madx : Flag<["-"], "madx">, Group<m_x86_Features_Group>;

View File

@ -136,6 +136,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "gfni", true);
setFeatureEnabledImpl(Features, "vpclmulqdq", true);
setFeatureEnabledImpl(Features, "avx512bitalg", true);
setFeatureEnabledImpl(Features, "avx512vnni", true);
// TODO: Add icelake features here.
LLVM_FALLTHROUGH;
case CK_Cannonlake:
@ -475,7 +476,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
Features["avx512vl"] = Features["avx512vbmi"] =
Features["avx512ifma"] = Features["avx512vpopcntdq"] =
Features["avx512bitalg"] = false;
Features["avx512bitalg"] = Features["avx512vnni"] = false;
break;
}
}
@ -606,7 +607,8 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
} else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
Name == "avx512vbmi" || Name == "avx512ifma" ||
Name == "avx512vpopcntdq" || Name == "avx512bitalg") {
Name == "avx512vpopcntdq" || Name == "avx512bitalg" ||
Name == "avx512vnni") {
if (Enabled)
setSSELevel(Features, AVX512F, Enabled);
// Enable BWI instruction if VBMI / BITALG is being enabled.
@ -698,6 +700,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512CD = true;
} else if (Feature == "+avx512vpopcntdq") {
HasAVX512VPOPCNTDQ = true;
} else if (Feature == "+avx512vnni") {
HasAVX512VNNI = true;
} else if (Feature == "+avx512er") {
HasAVX512ER = true;
} else if (Feature == "+avx512pf") {
@ -1039,6 +1043,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512CD__");
if (HasAVX512VPOPCNTDQ)
Builder.defineMacro("__AVX512VPOPCNTDQ__");
if (HasAVX512VNNI)
Builder.defineMacro("__AVX512VNNI__");
if (HasAVX512ER)
Builder.defineMacro("__AVX512ER__");
if (HasAVX512PF)
@ -1182,6 +1188,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512f", true)
.Case("avx512cd", true)
.Case("avx512vpopcntdq", true)
.Case("avx512vnni", true)
.Case("avx512er", true)
.Case("avx512pf", true)
.Case("avx512dq", true)
@ -1248,6 +1255,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512f", SSELevel >= AVX512F)
.Case("avx512cd", HasAVX512CD)
.Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
.Case("avx512vnni", HasAVX512VNNI)
.Case("avx512er", HasAVX512ER)
.Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)

View File

@ -68,6 +68,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasF16C = false;
bool HasAVX512CD = false;
bool HasAVX512VPOPCNTDQ = false;
bool HasAVX512VNNI = false;
bool HasAVX512ER = false;
bool HasAVX512PF = false;
bool HasAVX512DQ = false;

View File

@ -24,6 +24,8 @@ set(files
avx512vldqintrin.h
avx512vlintrin.h
avx512vpopcntdqvlintrin.h
avx512vnniintrin.h
avx512vlvnniintrin.h
avxintrin.h
bmi2intrin.h
bmiintrin.h

View File

@ -0,0 +1,254 @@
/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VLVNNIINTRIN_H
#define __AVX512VLVNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni")))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -0,0 +1,146 @@
/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VNNIINTRIN_H
#define __AVX512VNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni")))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -167,6 +167,15 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512vpopcntdqvlintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
#include <avx512vnniintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512VNNI__))
#include <avx512vlvnniintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
#include <avx512dqintrin.h>
#endif

View File

@ -38,9 +38,9 @@ int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; }
// CHECK: lake{{.*}} #7
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx"

View File

@ -0,0 +1,148 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
#include <immintrin.h>
__m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_dpbusd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusd.256
return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B);
}
__m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_dpbusd_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpbusd.256
return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B);
}
__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_dpbusd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusd.256
return _mm256_dpbusd_epi32(__S, __A, __B);
}
__m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_dpbusds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusds.256
return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B);
}
__m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_dpbusds_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpbusds.256
return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B);
}
__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_dpbusds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusds.256
return _mm256_dpbusds_epi32(__S, __A, __B);
}
__m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_dpwssd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssd.256
return _mm256_mask_dpwssd_epi32(__S, __U, __A, __B);
}
__m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_dpwssd_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpwssd.256
return _mm256_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_dpwssd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssd.256
return _mm256_dpwssd_epi32(__S, __A, __B);
}
__m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_dpwssds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssds.256
return _mm256_mask_dpwssds_epi32(__S, __U, __A, __B);
}
__m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_dpwssds_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpwssds.256
return _mm256_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_dpwssds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssds.256
return _mm256_dpwssds_epi32(__S, __A, __B);
}
__m128i test_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_mask_dpbusd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusd.128
return _mm128_mask_dpbusd_epi32(__S, __U, __A, __B);
}
__m128i test_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_maskz_dpbusd_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpbusd.128
return _mm128_maskz_dpbusd_epi32(__U, __S, __A, __B);
}
__m128i test_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_dpbusd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusd.128
return _mm128_dpbusd_epi32(__S, __A, __B);
}
__m128i test_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_mask_dpbusds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusds.128
return _mm128_mask_dpbusds_epi32(__S, __U, __A, __B);
}
__m128i test_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_maskz_dpbusds_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpbusds.128
return _mm128_maskz_dpbusds_epi32(__U, __S, __A, __B);
}
__m128i test_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_dpbusds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusds.128
return _mm128_dpbusds_epi32(__S, __A, __B);
}
__m128i test_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_mask_dpwssd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssd.128
return _mm128_mask_dpwssd_epi32(__S, __U, __A, __B);
}
__m128i test_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_maskz_dpwssd_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpwssd.128
return _mm128_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
__m128i test_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_dpwssd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssd.128
return _mm128_dpwssd_epi32(__S, __A, __B);
}
__m128i test_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_mask_dpwssds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssds.128
return _mm128_mask_dpwssds_epi32(__S, __U, __A, __B);
}
__m128i test_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_maskz_dpwssds_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpwssds.128
return _mm128_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
__m128i test_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm128_dpwssds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssds.128
return _mm128_dpwssds_epi32(__S, __A, __B);
}

View File

@ -0,0 +1,76 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror | FileCheck %s
#include <immintrin.h>
__m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_dpbusd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusd.512
return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B);
}
__m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_dpbusd_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpbusd.512
return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B);
}
__m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_dpbusd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusd.512
return _mm512_dpbusd_epi32(__S, __A, __B);
}
__m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_dpbusds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusds.512
return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B);
}
__m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_dpbusds_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpbusds.512
return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B);
}
__m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_dpbusds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpbusds.512
return _mm512_dpbusds_epi32(__S, __A, __B);
}
__m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_dpwssd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssd.512
return _mm512_mask_dpwssd_epi32(__S, __U, __A, __B);
}
__m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_dpwssd_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpwssd.512
return _mm512_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
__m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_dpwssd_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssd.512
return _mm512_dpwssd_epi32(__S, __A, __B);
}
__m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_dpwssds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssds.512
return _mm512_mask_dpwssds_epi32(__S, __U, __A, __B);
}
__m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_dpwssds_epi32
// CHECK: @llvm.x86.avx512.maskz.vpdpwssds.512
return _mm512_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
__m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_dpwssds_epi32
// CHECK: @llvm.x86.avx512.mask.vpdpwssds.512
return _mm512_dpwssds_epi32(__S, __A, __B);
}

View File

@ -115,3 +115,8 @@
// BITALG: "-target-feature" "+avx512bitalg"
// NO-BITALG: "-target-feature" "-avx512bitalg"
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vnni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VNNI %s
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vnni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VNNI %s
// VNNI: "-target-feature" "+avx512vnni"
// NO-VNNI: "-target-feature" "-avx512vnni"

View File

@ -1058,6 +1058,7 @@
// CHECK_ICL_M32: #define __AVX512IFMA__ 1
// CHECK_ICL_M32: #define __AVX512VBMI__ 1
// CHECK_ICL_M32: #define __AVX512VL__ 1
// CHECK_ICL_M32: #define __AVX512VNNI__ 1
// CHECK_ICL_M32: #define __AVX__ 1
// CHECK_ICL_M32: #define __BMI2__ 1
// CHECK_ICL_M32: #define __BMI__ 1
@ -1107,6 +1108,7 @@
// CHECK_ICL_M64: #define __AVX512IFMA__ 1
// CHECK_ICL_M64: #define __AVX512VBMI__ 1
// CHECK_ICL_M64: #define __AVX512VL__ 1
// CHECK_ICL_M64: #define __AVX512VNNI__ 1
// CHECK_ICL_M64: #define __AVX__ 1
// CHECK_ICL_M64: #define __BMI2__ 1
// CHECK_ICL_M64: #define __BMI__ 1