[X86] Add builtins for vpermq/vpermpd instructions to enable target feature checking.

llvm-svn: 334311
This commit is contained in:
Craig Topper 2018-06-08 18:00:25 +00:00
parent 9d3962f4f1
commit 03f4f04b91
9 changed files with 46 additions and 54 deletions

View File

@ -632,8 +632,10 @@ TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_pblendd128, "V4iV4iV4iIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permdf256, "V4dV4dIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permdi256, "V4LLiV4LLiIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "n", "avx2")
@ -1710,6 +1712,8 @@ TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512
TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_permdf512, "V8dV8dIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_permdi512, "V8LLiV8LLiIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8LLi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8LLiV8LLiV8LLi", "nc", "avx512f")

View File

@ -9433,6 +9433,24 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
makeArrayRef(Indices, NumElts),
"shufp");
}
case X86::BI__builtin_ia32_permdi256:
case X86::BI__builtin_ia32_permdf256:
case X86::BI__builtin_ia32_permdi512:
case X86::BI__builtin_ia32_permdf512: {
unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
llvm::Type *Ty = Ops[0]->getType();
unsigned NumElts = Ty->getVectorNumElements();
// These intrinsics operate on 256-bit lanes of four 64-bit elements.
uint32_t Indices[8];
for (unsigned l = 0; l != NumElts; l += 4)
for (unsigned i = 0; i != 4; ++i)
Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
makeArrayRef(Indices, NumElts),
"perm");
}
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512: {

View File

@ -825,12 +825,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
}
#define _mm256_permute4x64_pd(V, M) \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \
(__v4df)_mm256_undefined_pd(), \
((M) >> 0) & 0x3, \
((M) >> 2) & 0x3, \
((M) >> 4) & 0x3, \
((M) >> 6) & 0x3)
(__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
@ -839,12 +834,7 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
}
#define _mm256_permute4x64_epi64(V, M) \
(__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \
(__v4di)_mm256_undefined_si256(), \
((M) >> 0) & 0x3, \
((M) >> 2) & 0x3, \
((M) >> 4) & 0x3, \
((M) >> 6) & 0x3)
(__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))
#define _mm256_permute2x128_si256(V1, V2, M) \
(__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M))

View File

@ -8339,16 +8339,7 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
(__mmask8)(U), (int)(R))
#define _mm512_permutex_pd(X, C) \
(__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
(__v8df)_mm512_undefined_pd(), \
0 + (((C) >> 0) & 0x3), \
0 + (((C) >> 2) & 0x3), \
0 + (((C) >> 4) & 0x3), \
0 + (((C) >> 6) & 0x3), \
4 + (((C) >> 0) & 0x3), \
4 + (((C) >> 2) & 0x3), \
4 + (((C) >> 4) & 0x3), \
4 + (((C) >> 6) & 0x3))
(__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))
#define _mm512_mask_permutex_pd(W, U, X, C) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
@ -8361,16 +8352,7 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
(__v8df)_mm512_setzero_pd())
#define _mm512_permutex_epi64(X, C) \
(__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
(__v8di)_mm512_undefined_epi32(), \
0 + (((C) >> 0) & 0x3), \
0 + (((C) >> 2) & 0x3), \
0 + (((C) >> 4) & 0x3), \
0 + (((C) >> 6) & 0x3), \
4 + (((C) >> 0) & 0x3), \
4 + (((C) >> 2) & 0x3), \
4 + (((C) >> 4) & 0x3), \
4 + (((C) >> 6) & 0x3))
(__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))
#define _mm512_mask_permutex_epi64(W, U, X, C) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \

View File

@ -7921,10 +7921,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
(__mmask8)(mask), (int)(scale))
#define _mm256_permutex_pd(X, C) \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
(__v4df)_mm256_undefined_pd(), \
((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
((C) >> 4) & 0x3, ((C) >> 6) & 0x3)
(__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))
#define _mm256_mask_permutex_pd(W, U, X, C) \
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
@ -7937,10 +7934,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
(__v4df)_mm256_setzero_pd())
#define _mm256_permutex_epi64(X, C) \
(__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
(__v4di)_mm256_undefined_si256(), \
((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
((C) >> 4) & 0x3, ((C) >> 6) & 0x3)
(__m256d)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))
#define _mm256_mask_permutex_epi64(W, U, X, C) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \

View File

@ -2757,6 +2757,10 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vec_set_v32qi:
i = 2; l = 0; u = 31;
break;
case X86::BI__builtin_ia32_permdf256:
case X86::BI__builtin_ia32_permdi256:
case X86::BI__builtin_ia32_permdf512:
case X86::BI__builtin_ia32_permdi512:
case X86::BI__builtin_ia32_vpermilps:
case X86::BI__builtin_ia32_vpermilps256:
case X86::BI__builtin_ia32_vpermilpd512:

View File

@ -919,13 +919,13 @@ __m256i test_mm256_permute2x128_si256(__m256i a, __m256i b) {
__m256i test_mm256_permute4x64_epi64(__m256i a) {
// CHECK-LABEL: test_mm256_permute4x64_epi64
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
return _mm256_permute4x64_epi64(a, 35);
}
__m256d test_mm256_permute4x64_pd(__m256d a) {
// CHECK-LABEL: test_mm256_permute4x64_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
return _mm256_permute4x64_pd(a, 25);
}

View File

@ -6854,40 +6854,40 @@ __m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __m
__m512d test_mm512_permutex_pd(__m512d __X) {
// CHECK-LABEL: @test_mm512_permutex_pd
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
return _mm512_permutex_pd(__X, 0);
}
__m512d test_mm512_mask_permutex_pd(__m512d __W, __mmask8 __U, __m512d __X) {
// CHECK-LABEL: @test_mm512_mask_permutex_pd
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_permutex_pd(__W, __U, __X, 0);
}
__m512d test_mm512_maskz_permutex_pd(__mmask8 __U, __m512d __X) {
// CHECK-LABEL: @test_mm512_maskz_permutex_pd
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_permutex_pd(__U, __X, 0);
}
__m512i test_mm512_permutex_epi64(__m512i __X) {
// CHECK-LABEL: @test_mm512_permutex_epi64
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
return _mm512_permutex_epi64(__X, 0);
}
__m512i test_mm512_mask_permutex_epi64(__m512i __W, __mmask8 __M, __m512i __X) {
// CHECK-LABEL: @test_mm512_mask_permutex_epi64
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_permutex_epi64(__W, __M, __X, 0);
}
__m512i test_mm512_maskz_permutex_epi64(__mmask8 __M, __m512i __X) {
// CHECK-LABEL: @test_mm512_maskz_permutex_epi64
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_permutex_epi64(__M, __X, 0);
}

View File

@ -7349,40 +7349,40 @@ __m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m25
__m256d test_mm256_permutex_pd(__m256d __X) {
// CHECK-LABEL: @test_mm256_permutex_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
return _mm256_permutex_pd(__X, 3);
}
__m256d test_mm256_mask_permutex_pd(__m256d __W, __mmask8 __U, __m256d __X) {
// CHECK-LABEL: @test_mm256_mask_permutex_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_permutex_pd(__W, __U, __X, 1);
}
__m256d test_mm256_maskz_permutex_pd(__mmask8 __U, __m256d __X) {
// CHECK-LABEL: @test_mm256_maskz_permutex_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_permutex_pd(__U, __X, 1);
}
__m256i test_mm256_permutex_epi64(__m256i __X) {
// CHECK-LABEL: @test_mm256_permutex_epi64
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
return _mm256_permutex_epi64(__X, 3);
}
__m256i test_mm256_mask_permutex_epi64(__m256i __W, __mmask8 __M, __m256i __X) {
// CHECK-LABEL: @test_mm256_mask_permutex_epi64
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_permutex_epi64(__W, __M, __X, 3);
}
__m256i test_mm256_maskz_permutex_epi64(__mmask8 __M, __m256i __X) {
// CHECK-LABEL: @test_mm256_maskz_permutex_epi64
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_permutex_epi64(__M, __X, 3);
}