From d343697f1ec3fb9cb4c648546075acf21645aa3c Mon Sep 17 00:00:00 2001 From: Michael Zuckerman Date: Sun, 30 Oct 2016 14:54:05 +0000 Subject: [PATCH] Fixing "type" issue for (epi32) and replaceing hardcoded inf with clang builtin inf "__builtin_inff()" for float ({max|min}_{pd|ps}) llvm-svn: 285519 --- clang/lib/Headers/avx512fintrin.h | 32 +-- clang/test/CodeGen/avx512-reduceIntrin.c | 192 ++++++++---------- .../test/CodeGen/avx512-reduceMinMaxIntrin.c | 8 +- 3 files changed, 100 insertions(+), 132 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 5703535a638d..d08d7574f7be 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -9784,43 +9784,43 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \ __m256##T1 Vec256 = \ - (__m256##T1)__builtin_shufflevector( \ + (__m256##T1)(__builtin_shufflevector( \ (__v16s##T2)Vec512, \ (__v16s##T2)Vec512, \ 0, 1, 2, 3, 4, 5, 6, 7) \ Operator \ - (__m256##T1)__builtin_shufflevector( \ + __builtin_shufflevector( \ (__v16s##T2)Vec512, \ (__v16s##T2)Vec512, \ - 8, 9, 10, 11, 12, 13, 14, 15); \ + 8, 9, 10, 11, 12, 13, 14, 15)); \ __m128##T1 Vec128 = \ - (__m128##T1)__builtin_shufflevector( \ + (__m128##T1)(__builtin_shufflevector( \ (__v8s##T2)Vec256, \ (__v8s##T2)Vec256, \ 0, 1, 2, 3) \ Operator \ - (__m128##T1)__builtin_shufflevector( \ + __builtin_shufflevector( \ (__v8s##T2)Vec256, \ (__v8s##T2)Vec256, \ - 4, 5, 6, 7); \ - Vec128 = (__m128##T1)__builtin_shufflevector( \ + 4, 5, 6, 7)); \ + Vec128 = (__m128##T1)(__builtin_shufflevector( \ (__v4s##T2)Vec128, \ (__v4s##T2)Vec128, \ 0, 1, -1, -1) \ Operator \ - (__m128##T1)__builtin_shufflevector( \ + __builtin_shufflevector( \ (__v4s##T2)Vec128, \ (__v4s##T2)Vec128, \ - 2, 3, -1, -1); \ - Vec128 = (__m128##T1)__builtin_shufflevector( \ + 2, 3, -1, -1)); \ + Vec128 = (__m128##T1)(__builtin_shufflevector( \ (__v4s##T2)Vec128, \ (__v4s##T2)Vec128, \ 0, -1, -1, -1) \ Operator \ - (__m128##T1)__builtin_shufflevector( \ + __builtin_shufflevector( \ (__v4s##T2)Vec128, \ (__v4s##T2)Vec128, \ - 1, -1, -1, -1); \ + 1, -1, -1, -1)); \ return Vec128[0]; \ }) @@ -10019,7 +10019,7 @@ _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { - _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(0xFFF0000000000000), + _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()), max_pd, d, f, pd, __M); } @@ -10037,7 +10037,7 @@ _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { - _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(0x7FF0000000000000), + _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()), min_pd, d, f, pd, __M); } @@ -10162,7 +10162,7 @@ _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { - _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(0xFF800000), max_ps, , f, + _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f, ps, __M); } @@ -10180,7 +10180,7 @@ _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { - _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(0x7F800000), min_ps, , f, + _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f, ps, __M); } diff --git a/clang/test/CodeGen/avx512-reduceIntrin.c b/clang/test/CodeGen/avx512-reduceIntrin.c index b4c2d196ad07..d24cd0e5634d 100644 --- a/clang/test/CodeGen/avx512-reduceIntrin.c +++ b/clang/test/CodeGen/avx512-reduceIntrin.c @@ -124,25 +124,17 @@ long long test_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W){ int test_mm512_reduce_add_epi32(__m512i __W){ // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32> // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: {{.*}} = bitcast <8 x i32> %shuffle.i to <4 x i64> // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: {{.*}} = bitcast <8 x i32> %shuffle1.i to <4 x i64> - // CHECK: %add.i = add <4 x i64> {{.*}}, {{.*}} - // CHECK: {{.*}} = bitcast <4 x i64> %add.i to <8 x i32> - // CHECK: %shuffle2.i = shufflevector <8 x i32> {{.*}}, <8 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle2.i to <2 x i64> - // CHECK: %shuffle3.i = shufflevector <8 x i32> {{.*}}, <8 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle3.i to <2 x i64> - // CHECK: %add4.i = add <2 x i64> {{.*}}, {{.*}} - // CHECK: {{.*}} = bitcast <2 x i64> %add4.i to <4 x i32> - // CHECK: %shuffle6.i = shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle6.i to <2 x i64> - // CHECK: %add7.i = add <2 x i64> {{.*}}, %add4.i - // CHECK: {{.*}} = bitcast <2 x i64> %add7.i to <4 x i32> - // CHECK: %shuffle9.i = shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle9.i to <2 x i64> - // CHECK: %add10.i = add <2 x i64> {{.*}}, %add7.i - // CHECK: %vecext.i = extractelement <2 x i64> %add10.i, i32 0 + // CHECK: %add.i = add <8 x i32> %shuffle.i, %shuffle1.i + // CHECK: %shuffle2.i = shufflevector <8 x i32> %add.i, <8 x i32> undef, <4 x i32> + // CHECK: %shuffle3.i = shufflevector <8 x i32> %add.i, <8 x i32> undef, <4 x i32> + // CHECK: %add4.i = add <4 x i32> %shuffle2.i, %shuffle3.i + // CHECK: %shuffle6.i = shufflevector <4 x i32> %add4.i, <4 x i32> undef, <4 x i32> + // CHECK: %add7.i = add <4 x i32> %shuffle6.i, %add4.i + // CHECK: %shuffle9.i = shufflevector <4 x i32> %add7.i, <4 x i32> undef, <4 x i32> + // CHECK: %add10.i = add <4 x i32> %shuffle9.i, %add7.i + // CHECK: {{.*}} = bitcast <4 x i32> %add10.i to <2 x i64> + // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0 // CHECK: %conv.i = trunc i64 %vecext.i to i32 // CHECK: ret i32 %conv.i return _mm512_reduce_add_epi32(__W); @@ -151,25 +143,17 @@ int test_mm512_reduce_add_epi32(__m512i __W){ int test_mm512_reduce_mul_epi32(__m512i __W){ // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32> // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: {{.*}} = bitcast <8 x i32> %shuffle.i to <4 x i64> // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: {{.*}} = bitcast <8 x i32> %shuffle1.i to <4 x i64> - // CHECK: %mul.i = mul <4 x i64> {{.*}}, {{.*}} - // CHECK: {{.*}} = bitcast <4 x i64> %mul.i to <8 x i32> - // CHECK: %shuffle2.i = shufflevector <8 x i32> {{.*}}, <8 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle2.i to <2 x i64> - // CHECK: %shuffle3.i = shufflevector <8 x i32> {{.*}}, <8 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle3.i to <2 x i64> - // CHECK: %mul4.i = mul <2 x i64> {{.*}}, {{.*}} - // CHECK: {{.*}} = bitcast <2 x i64> %mul4.i to <4 x i32> - // CHECK: %shuffle6.i = shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle6.i to <2 x i64> - // CHECK: %mul7.i = mul <2 x i64> {{.*}}, %mul4.i - // CHECK: {{.*}} = bitcast <2 x i64> %mul7.i to <4 x i32> - // CHECK: %shuffle9.i = shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle9.i to <2 x i64> - // CHECK: %mul10.i = mul <2 x i64> {{.*}}, %mul7.i - // CHECK: %vecext.i = extractelement <2 x i64> %mul10.i, i32 0 + // CHECK: %mul.i = mul <8 x i32> %shuffle.i, %shuffle1.i + // CHECK: %shuffle2.i = shufflevector <8 x i32> %mul.i, <8 x i32> undef, <4 x i32> + // CHECK: %shuffle3.i = shufflevector <8 x i32> %mul.i, <8 x i32> undef, <4 x i32> + // CHECK: %mul4.i = mul <4 x i32> %shuffle2.i, %shuffle3.i + // CHECK: %shuffle6.i = shufflevector <4 x i32> %mul4.i, <4 x i32> undef, <4 x i32> + // CHECK: %mul7.i = mul <4 x i32> %shuffle6.i, %mul4.i + // CHECK: %shuffle9.i = shufflevector <4 x i32> %mul7.i, <4 x i32> undef, <4 x i32> + // CHECK: %mul10.i = mul <4 x i32> %shuffle9.i, %mul7.i + // CHECK: {{.*}} = bitcast <4 x i32> %mul10.i to <2 x i64> + // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0 // CHECK: %conv.i = trunc i64 %vecext.i to i32 // CHECK: ret i32 %conv.i return _mm512_reduce_mul_epi32(__W); @@ -179,16 +163,16 @@ int test_mm512_reduce_or_epi32(__m512i __W){ // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32> // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: %or27.i = or <8 x i32> %shuffle.i, %shuffle1.i - // CHECK: %shuffle2.i = shufflevector <8 x i32> %or27.i, <8 x i32> undef, <4 x i32> - // CHECK: %shuffle3.i = shufflevector <8 x i32> %or27.i, <8 x i32> undef, <4 x i32> - // CHECK: %or428.i = or <4 x i32> %shuffle2.i, %shuffle3.i - // CHECK: %shuffle6.i = shufflevector <4 x i32> %or428.i, <4 x i32> undef, <4 x i32> - // CHECK: %or729.i = or <4 x i32> %shuffle6.i, %or428.i - // CHECK: %shuffle9.i = shufflevector <4 x i32> %or729.i, <4 x i32> undef, <4 x i32> - // CHECK: %or1030.i = or <4 x i32> %shuffle9.i, %or729.i - // CHECK: %or10.i = bitcast <4 x i32> %or1030.i to <2 x i64> - // CHECK: %vecext.i = extractelement <2 x i64> %or10.i, i32 0 + // CHECK: %or.i = or <8 x i32> %shuffle.i, %shuffle1.i + // CHECK: %shuffle2.i = shufflevector <8 x i32> %or.i, <8 x i32> undef, <4 x i32> + // CHECK: %shuffle3.i = shufflevector <8 x i32> %or.i, <8 x i32> undef, <4 x i32> + // CHECK: %or4.i = or <4 x i32> %shuffle2.i, %shuffle3.i + // CHECK: %shuffle6.i = shufflevector <4 x i32> %or4.i, <4 x i32> undef, <4 x i32> + // CHECK: %or7.i = or <4 x i32> %shuffle6.i, %or4.i + // CHECK: %shuffle9.i = shufflevector <4 x i32> %or7.i, <4 x i32> undef, <4 x i32> + // CHECK: %or10.i = or <4 x i32> %shuffle9.i, %or7.i + // CHECK: {{.*}} = bitcast <4 x i32> %or10.i to <2 x i64> + // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0 // CHECK: %conv.i = trunc i64 %vecext.i to i32 // CHECK: ret i32 %conv.i return _mm512_reduce_or_epi32(__W); @@ -198,16 +182,16 @@ int test_mm512_reduce_and_epi32(__m512i __W){ // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32> // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: %and27.i = and <8 x i32> %shuffle.i, %shuffle1.i - // CHECK: %shuffle2.i = shufflevector <8 x i32> %and27.i, <8 x i32> undef, <4 x i32> - // CHECK: %shuffle3.i = shufflevector <8 x i32> %and27.i, <8 x i32> undef, <4 x i32> - // CHECK: %and428.i = and <4 x i32> %shuffle2.i, %shuffle3.i - // CHECK: %shuffle6.i = shufflevector <4 x i32> %and428.i, <4 x i32> undef, <4 x i32> - // CHECK: %and729.i = and <4 x i32> %shuffle6.i, %and428.i - // CHECK: %shuffle9.i = shufflevector <4 x i32> %and729.i, <4 x i32> undef, <4 x i32> - // CHECK: %and1030.i = and <4 x i32> %shuffle9.i, %and729.i - // CHECK: %and10.i = bitcast <4 x i32> %and1030.i to <2 x i64> - // CHECK: %vecext.i = extractelement <2 x i64> %and10.i, i32 0 + // CHECK: %and.i = and <8 x i32> %shuffle.i, %shuffle1.i + // CHECK: %shuffle2.i = shufflevector <8 x i32> %and.i, <8 x i32> undef, <4 x i32> + // CHECK: %shuffle3.i = shufflevector <8 x i32> %and.i, <8 x i32> undef, <4 x i32> + // CHECK: %and4.i = and <4 x i32> %shuffle2.i, %shuffle3.i + // CHECK: %shuffle6.i = shufflevector <4 x i32> %and4.i, <4 x i32> undef, <4 x i32> + // CHECK: %and7.i = and <4 x i32> %shuffle6.i, %and4.i + // CHECK: %shuffle9.i = shufflevector <4 x i32> %and7.i, <4 x i32> undef, <4 x i32> + // CHECK: %and10.i = and <4 x i32> %shuffle9.i, %and7.i + // CHECK: {{.*}} = bitcast <4 x i32> %and10.i to <2 x i64> + // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0 // CHECK: %conv.i = trunc i64 %vecext.i to i32 // CHECK: ret i32 %conv.i return _mm512_reduce_and_epi32(__W); @@ -218,25 +202,17 @@ int test_mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W){ // CHECK: {{.*}} = bitcast i16 %__M to <16 x i1> // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> zeroinitializer // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: {{.*}} = bitcast <8 x i32> %shuffle.i to <4 x i64> // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: {{.*}} = bitcast <8 x i32> %shuffle1.i to <4 x i64> - // CHECK: %add.i = add <4 x i64> {{.*}}, {{.*}} - // CHECK: {{.*}} = bitcast <4 x i64> %add.i to <8 x i32> - // CHECK: %shuffle2.i = shufflevector <8 x i32> {{.*}}, <8 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle2.i to <2 x i64> - // CHECK: %shuffle3.i = shufflevector <8 x i32> {{.*}}, <8 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle3.i to <2 x i64> - // CHECK: %add4.i = add <2 x i64> {{.*}}, {{.*}} - // CHECK: {{.*}} = bitcast <2 x i64> %add4.i to <4 x i32> - // CHECK: %shuffle6.i = shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle6.i to <2 x i64> - // CHECK: %add7.i = add <2 x i64> {{.*}}, %add4.i - // CHECK: {{.*}} = bitcast <2 x i64> %add7.i to <4 x i32> - // CHECK: %shuffle9.i = shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle9.i to <2 x i64> - // CHECK: %add10.i = add <2 x i64> {{.*}}, %add7.i - // CHECK: %vecext.i = extractelement <2 x i64> %add10.i, i32 0 + // CHECK: %add.i = add <8 x i32> %shuffle.i, %shuffle1.i + // CHECK: %shuffle2.i = shufflevector <8 x i32> %add.i, <8 x i32> undef, <4 x i32> + // CHECK: %shuffle3.i = shufflevector <8 x i32> %add.i, <8 x i32> undef, <4 x i32> + // CHECK: %add4.i = add <4 x i32> %shuffle2.i, %shuffle3.i + // CHECK: %shuffle6.i = shufflevector <4 x i32> %add4.i, <4 x i32> undef, <4 x i32> + // CHECK: %add7.i = add <4 x i32> %shuffle6.i, %add4.i + // CHECK: %shuffle9.i = shufflevector <4 x i32> %add7.i, <4 x i32> undef, <4 x i32> + // CHECK: %add10.i = add <4 x i32> %shuffle9.i, %add7.i + // CHECK: {{.*}} = bitcast <4 x i32> %add10.i to <2 x i64> + // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0 // CHECK: %conv.i = trunc i64 %vecext.i to i32 // CHECK: ret i32 %conv.i return _mm512_mask_reduce_add_epi32(__M, __W); @@ -247,25 +223,17 @@ int test_mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W){ // CHECK: {{.*}} = bitcast i16 %__M to <16 x i1> // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: {{.*}} = bitcast <8 x i32> %shuffle.i to <4 x i64> // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: {{.*}} = bitcast <8 x i32> %shuffle1.i to <4 x i64> - // CHECK: %mul.i = mul <4 x i64> {{.*}}, {{.*}} - // CHECK: {{.*}} = bitcast <4 x i64> %mul.i to <8 x i32> - // CHECK: %shuffle2.i = shufflevector <8 x i32> {{.*}}, <8 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle2.i to <2 x i64> - // CHECK: %shuffle3.i = shufflevector <8 x i32> {{.*}}, <8 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle3.i to <2 x i64> - // CHECK: %mul4.i = mul <2 x i64> {{.*}}, {{.*}} - // CHECK: {{.*}} = bitcast <2 x i64> %mul4.i to <4 x i32> - // CHECK: %shuffle6.i = shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle6.i to <2 x i64> - // CHECK: %mul7.i = mul <2 x i64> {{.*}}, %mul4.i - // CHECK: {{.*}} = bitcast <2 x i64> %mul7.i to <4 x i32> - // CHECK: %shuffle9.i = shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <4 x i32> - // CHECK: {{.*}} = bitcast <4 x i32> %shuffle9.i to <2 x i64> - // CHECK: %mul10.i = mul <2 x i64> {{.*}}, %mul7.i - // CHECK: %vecext.i = extractelement <2 x i64> %mul10.i, i32 0 + // CHECK: %mul.i = mul <8 x i32> %shuffle.i, %shuffle1.i + // CHECK: %shuffle2.i = shufflevector <8 x i32> %mul.i, <8 x i32> undef, <4 x i32> + // CHECK: %shuffle3.i = shufflevector <8 x i32> %mul.i, <8 x i32> undef, <4 x i32> + // CHECK: %mul4.i = mul <4 x i32> %shuffle2.i, %shuffle3.i + // CHECK: %shuffle6.i = shufflevector <4 x i32> %mul4.i, <4 x i32> undef, <4 x i32> + // CHECK: %mul7.i = mul <4 x i32> %shuffle6.i, %mul4.i + // CHECK: %shuffle9.i = shufflevector <4 x i32> %mul7.i, <4 x i32> undef, <4 x i32> + // CHECK: %mul10.i = mul <4 x i32> %shuffle9.i, %mul7.i + // CHECK: {{.*}} = bitcast <4 x i32> %mul10.i to <2 x i64> + // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0 // CHECK: %conv.i = trunc i64 %vecext.i to i32 // CHECK: ret i32 %conv.i return _mm512_mask_reduce_mul_epi32(__M, __W); @@ -277,16 +245,16 @@ int test_mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W){ // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: %and28.i = and <8 x i32> %shuffle.i, %shuffle1.i - // CHECK: %shuffle2.i = shufflevector <8 x i32> %and28.i, <8 x i32> undef, <4 x i32> - // CHECK: %shuffle3.i = shufflevector <8 x i32> %and28.i, <8 x i32> undef, <4 x i32> - // CHECK: %and429.i = and <4 x i32> %shuffle2.i, %shuffle3.i - // CHECK: %shuffle6.i = shufflevector <4 x i32> %and429.i, <4 x i32> undef, <4 x i32> - // CHECK: %and730.i = and <4 x i32> %shuffle6.i, %and429.i - // CHECK: %shuffle9.i = shufflevector <4 x i32> %and730.i, <4 x i32> undef, <4 x i32> - // CHECK: %and1031.i = and <4 x i32> %shuffle9.i, %and730.i - // CHECK: %and10.i = bitcast <4 x i32> %and1031.i to <2 x i64> - // CHECK: %vecext.i = extractelement <2 x i64> %and10.i, i32 0 + // CHECK: %and.i = and <8 x i32> %shuffle.i, %shuffle1.i + // CHECK: %shuffle2.i = shufflevector <8 x i32> %and.i, <8 x i32> undef, <4 x i32> + // CHECK: %shuffle3.i = shufflevector <8 x i32> %and.i, <8 x i32> undef, <4 x i32> + // CHECK: %and4.i = and <4 x i32> %shuffle2.i, %shuffle3.i + // CHECK: %shuffle6.i = shufflevector <4 x i32> %and4.i, <4 x i32> undef, <4 x i32> + // CHECK: %and7.i = and <4 x i32> %shuffle6.i, %and4.i + // CHECK: %shuffle9.i = shufflevector <4 x i32> %and7.i, <4 x i32> undef, <4 x i32> + // CHECK: %and10.i = and <4 x i32> %shuffle9.i, %and7.i + // CHECK: {{.*}} = bitcast <4 x i32> %and10.i to <2 x i64> + // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0 // CHECK: %conv.i = trunc i64 %vecext.i to i32 // CHECK: ret i32 %conv.i return _mm512_mask_reduce_and_epi32(__M, __W); @@ -298,16 +266,16 @@ int test_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W){ // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> zeroinitializer // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> - // CHECK: %or28.i = or <8 x i32> %shuffle.i, %shuffle1.i - // CHECK: %shuffle2.i = shufflevector <8 x i32> %or28.i, <8 x i32> undef, <4 x i32> - // CHECK: %shuffle3.i = shufflevector <8 x i32> %or28.i, <8 x i32> undef, <4 x i32> - // CHECK: %or429.i = or <4 x i32> %shuffle2.i, %shuffle3.i - // CHECK: %shuffle6.i = shufflevector <4 x i32> %or429.i, <4 x i32> undef, <4 x i32> - // CHECK: %or730.i = or <4 x i32> %shuffle6.i, %or429.i - // CHECK: %shuffle9.i = shufflevector <4 x i32> %or730.i, <4 x i32> undef, <4 x i32> - // CHECK: %or1031.i = or <4 x i32> %shuffle9.i, %or730.i - // CHECK: %or10.i = bitcast <4 x i32> %or1031.i to <2 x i64> - // CHECK: %vecext.i = extractelement <2 x i64> %or10.i, i32 0 + // CHECK: %or.i = or <8 x i32> %shuffle.i, %shuffle1.i + // CHECK: %shuffle2.i = shufflevector <8 x i32> %or.i, <8 x i32> undef, <4 x i32> + // CHECK: %shuffle3.i = shufflevector <8 x i32> %or.i, <8 x i32> undef, <4 x i32> + // CHECK: %or4.i = or <4 x i32> %shuffle2.i, %shuffle3.i + // CHECK: %shuffle6.i = shufflevector <4 x i32> %or4.i, <4 x i32> undef, <4 x i32> + // CHECK: %or7.i = or <4 x i32> %shuffle6.i, %or4.i + // CHECK: %shuffle9.i = shufflevector <4 x i32> %or7.i, <4 x i32> undef, <4 x i32> + // CHECK: %or10.i = or <4 x i32> %shuffle9.i, %or7.i + // CHECK: {{.*}} = bitcast <4 x i32> %or10.i to <2 x i64> + // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0 // CHECK: %conv.i = trunc i64 %vecext.i to i32 // CHECK: ret i32 %conv.i return _mm512_mask_reduce_or_epi32(__M, __W); diff --git a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c index 80ad2d5cbc4e..8249b229c8f5 100644 --- a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c +++ b/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c @@ -134,7 +134,7 @@ unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){ long long test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){ // CHECK: %tmp = bitcast i8 %__M to <8 x i1> - // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x double> %__W, <8 x double> + // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x double> %__W, <8 x double> // CHECK: %shuffle1.i = shufflevector <8 x double> %tmp1, <8 x double> undef, <8 x i32> // CHECK: %tmp2 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %tmp1, <8 x double> %shuffle1.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3 // CHECK: %shuffle4.i = shufflevector <8 x double> %tmp2, <8 x double> undef, <8 x i32> @@ -187,7 +187,7 @@ long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){ double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){ // CHECK: %tmp = bitcast i8 %__M to <8 x i1> - // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x double> %__W, <8 x double> + // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x double> %__W, <8 x double> // CHECK: %shuffle1.i = shufflevector <8 x double> %tmp1, <8 x double> undef, <8 x i32> // CHECK: %tmp2 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %tmp1, <8 x double> %shuffle1.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3 // CHECK: %shuffle4.i = shufflevector <8 x double> %tmp2, <8 x double> undef, <8 x i32> @@ -359,7 +359,7 @@ unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){ float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){ // CHECK: %tmp = bitcast i16 %__M to <16 x i1> - // CHECK: %tmp1 = select <16 x i1> %tmp, <16 x float> %__W, <16 x float> + // CHECK: %tmp1 = select <16 x i1> %tmp, <16 x float> %__W, <16 x float> // CHECK: %shuffle1.i = shufflevector <16 x float> %tmp1, <16 x float> undef, <16 x i32> // CHECK: %tmp2 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %tmp1, <16 x float> %shuffle1.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3 // CHECK: %shuffle4.i = shufflevector <16 x float> %tmp2, <16 x float> undef, <16 x i32> @@ -421,7 +421,7 @@ unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){ float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){ // CHECK: %tmp = bitcast i16 %__M to <16 x i1> - // CHECK: %tmp1 = select <16 x i1> %tmp, <16 x float> %__W, <16 x float> + // CHECK: %tmp1 = select <16 x i1> %tmp, <16 x float> %__W, <16 x float> // CHECK: %shuffle1.i = shufflevector <16 x float> %tmp1, <16 x float> undef, <16 x i32> // CHECK: %tmp2 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %tmp1, <16 x float> %shuffle1.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3 // CHECK: %shuffle4.i = shufflevector <16 x float> %tmp2, <16 x float> undef, <16 x i32>