From 0edcd0278d05ab6a5196d0d2a03f943b11218bac Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 7 Aug 2018 19:21:05 +0000 Subject: [PATCH] [SLP] Fix insert point for reused extract instructions. Summary: Reworked the previously committed patch to insert shuffles for reused extract element instructions in the correct position. Previous logic was incorrect, and might lead to the crash with PHIs and EH instructions. Reviewers: efriedma, javed.absar Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D50143 llvm-svn: 339166 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 8 +- .../SLPVectorizer/AArch64/PR38339.ll | 95 +++++++++++++++++++ 2 files changed, 96 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5c2efe885e22..32df6d581577 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3109,14 +3109,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } if (NeedToShuffleReuses) { // TODO: Merge this shuffle with the ReorderShuffleMask. - if (!E->ReorderIndices.empty()) + if (E->ReorderIndices.empty()) Builder.SetInsertPoint(VL0); - else if (auto *I = dyn_cast(V)) - Builder.SetInsertPoint(I->getParent(), - std::next(I->getIterator())); - else - Builder.SetInsertPoint(&F->getEntryBlock(), - F->getEntryBlock().getFirstInsertionPt()); V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll index 1ab4a13260ed..1a981a32804b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll @@ -27,3 +27,98 @@ define void @f1(<2 x i16> %x, i16* %a) { store i16 %t2, i16* %ptr3 ret void } + +define void @f2(<2 x i16> %x, i16* %a) { +; CHECK-LABEL: @f2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[CONT:%.*]] +; CHECK: cont: +; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ] +; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <4 x i32> +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0 +; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2 +; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %cont + +cont: ; preds = %entry, %cont + %xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ] + %aa = phi i16* [ %a, %entry ], [ undef, %cont ] + %t2 = extractelement <2 x i16> %xx, i32 0 + %t3 = extractelement <2 x i16> %xx, i32 1 + %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 + %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 + %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 + %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 + store i16 %t2, i16* %a + store i16 %t2, i16* %ptr0 + store i16 %t3, i16* %ptr1 + store i16 %t3, i16* %ptr2 + store i16 %t2, i16* %ptr3 + %a_val = load i16, i16* %a, align 2 + %cmp = icmp eq i16 %a_val, 0 + br i1 %cmp, label %cont, label %exit + +exit: ; preds = %cont + ret void +} + +define void @f3(<2 x i16> %x, i16* %a) { +; CHECK-LABEL: @f3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[CONT:%.*]] +; CHECK: cont: +; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ] +; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ] +; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <2 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[REORDER_SHUFFLE]], <2 x i16> undef, <4 x i32> +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0 +; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2 +; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %cont + +cont: ; preds = %entry, %cont + %xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ] + %aa = phi i16* [ %a, %entry ], [ undef, %cont ] + %t2 = extractelement <2 x i16> %xx, i32 0 + %t3 = extractelement <2 x i16> %xx, i32 1 + %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 + %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 + %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 + %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 + store i16 %t3, i16* %a + store i16 %t3, i16* %ptr0 + store i16 %t2, i16* %ptr1 + store i16 %t2, i16* %ptr2 + store i16 %t3, i16* %ptr3 + %a_val = load i16, i16* %a, align 2 + %cmp = icmp eq i16 %a_val, 0 + br i1 %cmp, label %cont, label %exit + +exit: ; preds = %cont + ret void +}