[SLP] Fix insert point for reused extract instructions.

Summary:
Reworked the previously committed patch to insert shuffles for reused
extract element instructions in the correct position. Previous logic was
incorrect, and might lead to the crash with PHIs and EH instructions.

Reviewers: efriedma, javed.absar

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D50143

llvm-svn: 339166
This commit is contained in:
Alexey Bataev 2018-08-07 19:21:05 +00:00
parent f4f5b7eea3
commit 0edcd0278d
2 changed files with 96 additions and 7 deletions

View File

@ -3109,14 +3109,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
if (NeedToShuffleReuses) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
if (!E->ReorderIndices.empty())
if (E->ReorderIndices.empty())
Builder.SetInsertPoint(VL0);
else if (auto *I = dyn_cast<Instruction>(V))
Builder.SetInsertPoint(I->getParent(),
std::next(I->getIterator()));
else
Builder.SetInsertPoint(&F->getEntryBlock(),
F->getEntryBlock().getFirstInsertionPt());
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}

View File

@ -27,3 +27,98 @@ define void @f1(<2 x i16> %x, i16* %a) {
store i16 %t2, i16* %ptr3
ret void
}
define void @f2(<2 x i16> %x, i16* %a) {
; CHECK-LABEL: @f2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[CONT:%.*]]
; CHECK: cont:
; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %cont
cont: ; preds = %entry, %cont
%xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
%aa = phi i16* [ %a, %entry ], [ undef, %cont ]
%t2 = extractelement <2 x i16> %xx, i32 0
%t3 = extractelement <2 x i16> %xx, i32 1
%ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
%ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
%ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
%ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
store i16 %t2, i16* %a
store i16 %t2, i16* %ptr0
store i16 %t3, i16* %ptr1
store i16 %t3, i16* %ptr2
store i16 %t2, i16* %ptr3
%a_val = load i16, i16* %a, align 2
%cmp = icmp eq i16 %a_val, 0
br i1 %cmp, label %cont, label %exit
exit: ; preds = %cont
ret void
}
define void @f3(<2 x i16> %x, i16* %a) {
; CHECK-LABEL: @f3(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[CONT:%.*]]
; CHECK: cont:
; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[REORDER_SHUFFLE]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %cont
cont: ; preds = %entry, %cont
%xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
%aa = phi i16* [ %a, %entry ], [ undef, %cont ]
%t2 = extractelement <2 x i16> %xx, i32 0
%t3 = extractelement <2 x i16> %xx, i32 1
%ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
%ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
%ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
%ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
store i16 %t3, i16* %a
store i16 %t3, i16* %ptr0
store i16 %t2, i16* %ptr1
store i16 %t2, i16* %ptr2
store i16 %t3, i16* %ptr3
%a_val = load i16, i16* %a, align 2
%cmp = icmp eq i16 %a_val, 0
br i1 %cmp, label %cont, label %exit
exit: ; preds = %cont
ret void
}