From 2a2f35d59c135234242a587c5fcf434c4d664572 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 14 Feb 2017 15:20:48 +0000 Subject: [PATCH] [SLP] Fix for PR31879: vectorize repeated scalar ops that don't get put back into a vector Previously the cost of the existing ExtractElement/ExtractValue instructions was considered as a dead cost only if it was detected that they have only one use. But these instructions may be considered dead also if users of the instructions are also going to be vectorized, like: ``` %x0 = extractelement <2 x float> %x, i32 0 %x1 = extractelement <2 x float> %x, i32 1 %x0x0 = fmul float %x0, %x0 %x1x1 = fmul float %x1, %x1 %add = fadd float %x0x0, %x1x1 ``` This can be transformed to ``` %1 = fmul <2 x float> %x, %x %2 = extractelement <2 x float> %1, i32 0 %3 = extractelement <2 x float> %1, i32 1 %add = fadd float %2, %3 ``` because though `%x0` and `%x1` have 2 users each other, these users are part of the vectorized tree and we can consider these `extractelement` instructions as dead. Differential Revision: https://reviews.llvm.org/D29900 llvm-svn: 295056 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 8 ++++- .../SLPVectorizer/X86/extractelement.ll | 34 ++++++++++--------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4a827391b3fb..33b4f9860158 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1660,7 +1660,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { int DeadCost = 0; for (unsigned i = 0, e = VL.size(); i < e; ++i) { Instruction *E = cast(VL[i]); - if (E->hasOneUse()) + // If all users are going to be vectorized, instruction can be + // considered as dead. + // The same, if have only one user, it will be vectorized for sure. + if (E->hasOneUse() || + std::all_of(E->user_begin(), E->user_end(), [this](User *U) { + return ScalarToTreeEntry.count(U) > 0; + })) // Take credit for instruction that will become dead. DeadCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll index 4abdf615967d..10675f3be8a6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -7,11 +7,10 @@ define float @f(<2 x float> %x) { ; CHECK-LABEL: @f( -; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]] -; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]] -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 @@ -24,13 +23,13 @@ define float @f(<2 x float> %x) { define float @f_used_out_of_tree(<2 x float> %x) { ; THRESH2-LABEL: @f_used_out_of_tree( -; THRESH2-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; THRESH2-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; THRESH2-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]] -; THRESH2-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]] -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]] +; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]] +; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH2-NEXT: store float [[ADD]], float* @a -; THRESH2-NEXT: ret float [[X0]] +; THRESH2-NEXT: ret float [[TMP1]] ; %x0 = extractelement <2 x float> %x, i32 0 %x1 = extractelement <2 x float> %x, i32 1 @@ -43,12 +42,15 @@ define float @f_used_out_of_tree(<2 x float> %x) { define float @f_used_twice_in_tree(<2 x float> %x) { ; THRESH1-LABEL: @f_used_twice_in_tree( -; THRESH1-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; THRESH1-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; THRESH1-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X1]] -; THRESH1-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]] -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]] +; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 +; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 +; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 +; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]] +; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] ; THRESH1-NEXT: ret float [[ADD]] +; %x0 = extractelement <2 x float> %x, i32 0 %x1 = extractelement <2 x float> %x, i32 1 %x0x0 = fmul float %x0, %x1