From b6485252aac5d673762ab31204b6f0d97c6a3cb6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 20 Jul 2017 13:07:37 +0000 Subject: [PATCH] [X86][AVX512] Improve vector rotation constant folding tests Test constant folding both on node creation (which already works) and once the input nodes have been folded themselves (not working yet). llvm-svn: 308611 --- llvm/test/CodeGen/X86/avx512-rotate.ll | 35 +++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx512-rotate.ll b/llvm/test/CodeGen/X86/avx512-rotate.ll index 98fa67ad793d..c2ea0bc4ab79 100644 --- a/llvm/test/CodeGen/X86/avx512-rotate.ll +++ b/llvm/test/CodeGen/X86/avx512-rotate.ll @@ -7,6 +7,9 @@ declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) +declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) +declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + ; Tests showing replacement of variable rotates with immediate splat versions. define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { @@ -236,6 +239,8 @@ define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x } ; Constant folding +; We also test with a target shuffle so that this can't be constant folded upon creation, it must +; wait until the target shuffle has been constant folded in combineX86ShufflesRecursively. define <8 x i64> @test_fold_rol_v8i64() { ; CHECK-LABEL: test_fold_rol_v8i64: @@ -246,11 +251,35 @@ define <8 x i64> @test_fold_rol_v8i64() { ret <8 x i64> %res } +define <16 x i32> @test_fold_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1) { +; CHECK-LABEL: test_fold_rol_v16i32: +; CHECK: # BB#0: +; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK-NEXT: vprolvd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq + %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> , <16 x i32> , <16 x i32> zeroinitializer, i16 -1) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %res0, <16 x i32> , <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res1 +} + define <8 x i64> @test_fold_ror_v8i64() { ; CHECK-LABEL: test_fold_ror_v8i64: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [1,9223372036854775808,4611686018427387904,2,9223372036854775808,4,2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1] +; CHECK-NEXT: vprorvq {{.*}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> , <8 x i64> , <8 x i64> zeroinitializer, i8 -1) - ret <8 x i64> %res + %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> , <8 x i64> , <8 x i64> zeroinitializer, i8 -1) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %res0, <8 x i64> , <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res1 +} + +define <16 x i32> @test_fold_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1) { +; CHECK-LABEL: test_fold_ror_v16i32: +; CHECK: # BB#0: +; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK-NEXT: vprorvd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq + %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> , <16 x i32> , <16 x i32> zeroinitializer, i16 -1) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %res0, <16 x i32> , <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res1 }