[SCEV] Suppress hoisting insertion point of binops when unsafe
InsertBinop tries to move insertion-points out of loops for expressions that are loop-invariant. This patch adds a new parameter, IsSafeToHost, to guard that hoisting. This allows callers to suppress that hoisting for unsafe situations, such as divisions that may have a zero denominator. This fixes PR38697. Differential Revision: https://reviews.llvm.org/D55232 llvm-svn: 360280
This commit is contained in:
parent
1558731607
commit
d27b0c6247
|
@ -315,8 +315,10 @@ namespace llvm {
|
|||
SmallPtrSetImpl<const SCEV *> &Processed);
|
||||
|
||||
/// Insert the specified binary operator, doing a small amount of work to
|
||||
/// avoid inserting an obviously redundant operation.
|
||||
Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS);
|
||||
/// avoid inserting an obviously redundant operation, and hoisting to an
|
||||
/// outer loop when the opportunity is there and it is safe.
|
||||
Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
|
||||
bool IsSafeToHoist);
|
||||
|
||||
/// Arrange for there to be a cast of V to Ty at IP, reusing an existing
|
||||
/// cast if a suitable one exists, moving an existing cast if a suitable one
|
||||
|
|
|
@ -166,9 +166,10 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
|
|||
}
|
||||
|
||||
/// InsertBinop - Insert the specified binary operator, doing a small amount
|
||||
/// of work to avoid inserting an obviously redundant operation.
|
||||
/// of work to avoid inserting an obviously redundant operation, and hoisting
|
||||
/// to an outer loop when the opportunity is there and it is safe.
|
||||
Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
|
||||
Value *LHS, Value *RHS) {
|
||||
Value *LHS, Value *RHS, bool IsSafeToHoist) {
|
||||
// Fold a binop with constant operands.
|
||||
if (Constant *CLHS = dyn_cast<Constant>(LHS))
|
||||
if (Constant *CRHS = dyn_cast<Constant>(RHS))
|
||||
|
@ -210,14 +211,16 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
|
|||
DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
|
||||
SCEVInsertPointGuard Guard(Builder, this);
|
||||
|
||||
// Move the insertion point out of as many loops as we can.
|
||||
while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
|
||||
if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
|
||||
BasicBlock *Preheader = L->getLoopPreheader();
|
||||
if (!Preheader) break;
|
||||
if (IsSafeToHoist) {
|
||||
// Move the insertion point out of as many loops as we can.
|
||||
while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
|
||||
if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
|
||||
BasicBlock *Preheader = L->getLoopPreheader();
|
||||
if (!Preheader) break;
|
||||
|
||||
// Ok, move up a level.
|
||||
Builder.SetInsertPoint(Preheader->getTerminator());
|
||||
// Ok, move up a level.
|
||||
Builder.SetInsertPoint(Preheader->getTerminator());
|
||||
}
|
||||
}
|
||||
|
||||
// If we haven't found this binop, insert it.
|
||||
|
@ -734,7 +737,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
|
|||
// Instead of doing a negate and add, just do a subtract.
|
||||
Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
|
||||
Sum = InsertNoopCastOfTo(Sum, Ty);
|
||||
Sum = InsertBinop(Instruction::Sub, Sum, W);
|
||||
Sum = InsertBinop(Instruction::Sub, Sum, W, /*IsSafeToHoist*/ true);
|
||||
++I;
|
||||
} else {
|
||||
// A simple add.
|
||||
|
@ -742,7 +745,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
|
|||
Sum = InsertNoopCastOfTo(Sum, Ty);
|
||||
// Canonicalize a constant to the RHS.
|
||||
if (isa<Constant>(Sum)) std::swap(Sum, W);
|
||||
Sum = InsertBinop(Instruction::Add, Sum, W);
|
||||
Sum = InsertBinop(Instruction::Add, Sum, W, /*IsSafeToHoist*/ true);
|
||||
++I;
|
||||
}
|
||||
}
|
||||
|
@ -794,9 +797,11 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
|
|||
if (Exponent & 1)
|
||||
Result = P;
|
||||
for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
|
||||
P = InsertBinop(Instruction::Mul, P, P);
|
||||
P = InsertBinop(Instruction::Mul, P, P, /*IsSafeToHoist*/ true);
|
||||
if (Exponent & BinExp)
|
||||
Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P;
|
||||
Result = Result ? InsertBinop(Instruction::Mul, Result, P,
|
||||
/*IsSafeToHoist*/ true)
|
||||
: P;
|
||||
}
|
||||
|
||||
I = E;
|
||||
|
@ -811,7 +816,8 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
|
|||
} else if (I->second->isAllOnesValue()) {
|
||||
// Instead of doing a multiply by negative one, just do a negate.
|
||||
Prod = InsertNoopCastOfTo(Prod, Ty);
|
||||
Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
|
||||
Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
|
||||
/*IsSafeToHoist*/ true);
|
||||
++I;
|
||||
} else {
|
||||
// A simple mul.
|
||||
|
@ -824,9 +830,10 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
|
|||
// Canonicalize Prod*(1<<C) to Prod<<C.
|
||||
assert(!Ty->isVectorTy() && "vector types are not SCEVable");
|
||||
Prod = InsertBinop(Instruction::Shl, Prod,
|
||||
ConstantInt::get(Ty, RHS->logBase2()));
|
||||
ConstantInt::get(Ty, RHS->logBase2()),
|
||||
/*IsSafeToHoist*/ true);
|
||||
} else {
|
||||
Prod = InsertBinop(Instruction::Mul, Prod, W);
|
||||
Prod = InsertBinop(Instruction::Mul, Prod, W, /*IsSafeToHoist*/ true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -842,11 +849,13 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
|
|||
const APInt &RHS = SC->getAPInt();
|
||||
if (RHS.isPowerOf2())
|
||||
return InsertBinop(Instruction::LShr, LHS,
|
||||
ConstantInt::get(Ty, RHS.logBase2()));
|
||||
ConstantInt::get(Ty, RHS.logBase2()),
|
||||
/*IsSafeToHoist*/ true);
|
||||
}
|
||||
|
||||
Value *RHS = expandCodeFor(S->getRHS(), Ty);
|
||||
return InsertBinop(Instruction::UDiv, LHS, RHS);
|
||||
return InsertBinop(Instruction::UDiv, LHS, RHS,
|
||||
/*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
|
||||
}
|
||||
|
||||
/// Move parts of Base into Rest to leave Base with the minimal
|
||||
|
|
|
@ -0,0 +1,354 @@
|
|||
; RUN: opt -loop-vectorize -force-vector-width=2 -S < %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -indvars -S < %s 2>&1 | FileCheck %s -check-prefix=INDVARCHECK
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Produced from test-case:
|
||||
;
|
||||
; void testCountIncrLoop(unsigned char *ptr, int lim, int count, int val)
|
||||
; {
|
||||
; int inx = 0;
|
||||
; for (int outer_i = 0; outer_i < lim; ++outer_i) {
|
||||
; if (count > 0) { // At runtime, 'count' is 0, so the following code is dead.
|
||||
; int result = val;
|
||||
; int tmp = count;
|
||||
;
|
||||
; while (tmp < 8) {
|
||||
; result += val >> tmp;
|
||||
; tmp += count;
|
||||
; }
|
||||
;
|
||||
; ptr[inx++] = (unsigned char) result;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
;
|
||||
; No explicit division appears in the input, but a division is generated during
|
||||
; vectorization, and that division is a division-by-0 when the input 'count'
|
||||
; is 0, so it cannot be hoisted above the guard of 'count > 0'.
|
||||
|
||||
; Verify that a 'udiv' does not appear in the 'loop1.preheader' block, and that
|
||||
; a 'udiv' has been inserted at the top of the 'while.body.preheader' block.
|
||||
define void @testCountIncrLoop(i8* %ptr, i32 %lim, i32 %count, i32 %val) {
|
||||
; CHECK-LABEL: @testCountIncrLoop(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK: loop1.preheader:
|
||||
; CHECK-NOT: udiv
|
||||
; CHECK: loop1.body:
|
||||
; CHECK: while.cond.preheader:
|
||||
; CHECK: while.body.preheader:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0:%.*]], [[COUNT:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK: exit:
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
%cmp1 = icmp sgt i32 %lim, 0
|
||||
br i1 %cmp1, label %loop1.preheader, label %exit
|
||||
|
||||
loop1.preheader: ; preds = %entry
|
||||
%cmp2 = icmp sgt i32 %count, 0
|
||||
%cmp4 = icmp slt i32 %count, 8
|
||||
br label %loop1.body
|
||||
|
||||
loop1.body: ; preds = %loop1.inc, %loop1.preheader
|
||||
%outer_i = phi i32 [ 0, %loop1.preheader ], [ %outer_i.1, %loop1.inc ]
|
||||
%inx.1 = phi i32 [ 0, %loop1.preheader ], [ %inx.2, %loop1.inc ]
|
||||
br i1 %cmp2, label %while.cond.preheader, label %loop1.inc
|
||||
|
||||
while.cond.preheader: ; preds = %loop1.body
|
||||
br i1 %cmp4, label %while.body, label %while.end
|
||||
|
||||
while.body: ; preds = %while.body, %while.cond.preheader
|
||||
%tmp = phi i32 [ %add3, %while.body ], [ %count, %while.cond.preheader ]
|
||||
%result.1 = phi i32 [ %add, %while.body ], [ %val, %while.cond.preheader ]
|
||||
%shr = ashr i32 %val, %tmp
|
||||
%add = add nsw i32 %shr, %result.1
|
||||
%add3 = add nsw i32 %tmp, %count
|
||||
%cmp3 = icmp slt i32 %add3, 8
|
||||
br i1 %cmp3, label %while.body, label %while.end
|
||||
|
||||
while.end: ; preds = %while.body, %while.cond.preheader
|
||||
%result.0.lcssa = phi i32 [ %val, %while.cond.preheader ], [ %add, %while.body ]
|
||||
%conv = trunc i32 %result.0.lcssa to i8
|
||||
%inc = add nsw i32 %inx.1, 1
|
||||
%idxprom = sext i32 %inx.1 to i64
|
||||
%arrayidx = getelementptr inbounds i8, i8* %ptr, i64 %idxprom
|
||||
store i8 %conv, i8* %arrayidx, align 1
|
||||
br label %loop1.inc
|
||||
|
||||
loop1.inc: ; preds = %while.end, %loop1.body
|
||||
%inx.2 = phi i32 [ %inc, %while.end ], [ %inx.1, %loop1.body ]
|
||||
%outer_i.1 = add nuw nsw i32 %outer_i, 1
|
||||
%exitcond = icmp eq i32 %outer_i.1, %lim
|
||||
br i1 %exitcond, label %exit, label %loop1.body
|
||||
|
||||
exit: ; preds = %loop1.inc, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; These next tests are all based on the following source code, with slight
|
||||
; variations on the calculation of 'incr' (all of which are loop-invariant
|
||||
; divisions, but only some of which can be safely hoisted):
|
||||
;
|
||||
; uint32_t foo(uint32_t *ptr, uint32_t start1, uint32_t start2) {
|
||||
; uint32_t counter1, counter2;
|
||||
; uint32_t val = start1;
|
||||
; for (counter1 = 1; counter1 < 100; ++counter1) {
|
||||
; uint32_t index = 0;
|
||||
; val += ptr[index];
|
||||
; for (counter2 = start2; counter2 < 10; ++counter2) {
|
||||
; // Division is loop invariant, and denominator is guaranteed non-zero:
|
||||
; // Safe to hoist it out of the inner loop.
|
||||
; uint32_t incr = 16 / counter1;
|
||||
; index += incr;
|
||||
; val += ptr[index];
|
||||
; }
|
||||
; }
|
||||
; return val;
|
||||
; }
|
||||
|
||||
; This version is as written above, where 'incr' is '16/counter1', and it is
|
||||
; guaranted that 'counter1' is always non-zero. So it is safe to hoist the
|
||||
; division from the inner loop to the preheader.
|
||||
;
|
||||
; Verify that the 'udiv' is hoisted to the preheader, and is not in the loop body.
|
||||
define i32 @NonZeroDivHoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) {
|
||||
; INDVARCHECK-LABEL: @NonZeroDivHoist(
|
||||
; INDVARCHECK-NEXT: entry:
|
||||
; INDVARCHECK: for.body3.lr.ph:
|
||||
; INDVARCHECK-NEXT: [[TMP0:%.*]] = udiv i64 16, [[INDVARS_IV:%.*]]
|
||||
; INDVARCHECK-NEXT: br label [[FOR_BODY3:%.*]]
|
||||
; INDVARCHECK: for.body3:
|
||||
; INDVARCHECK-NOT: udiv
|
||||
; INDVARCHECK: for.end10:
|
||||
;
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.end, %entry
|
||||
%val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ]
|
||||
%counter1.0 = phi i32 [ 1, %entry ], [ %inc9, %for.end ]
|
||||
%cmp = icmp ult i32 %counter1.0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end10
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%add = add i32 %tmp, %val.0
|
||||
%cmp224 = icmp ult i32 %start2, 10
|
||||
br i1 %cmp224, label %for.body3.lr.ph, label %for.end
|
||||
|
||||
for.body3.lr.ph: ; preds = %for.body
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.body3, %for.body3.lr.ph
|
||||
%index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ]
|
||||
%val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ]
|
||||
%counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ]
|
||||
%div = udiv i32 16, %counter1.0
|
||||
%add4 = add i32 %div, %index.027
|
||||
%idxprom5 = zext i32 %add4 to i64
|
||||
%arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5
|
||||
%tmp1 = load i32, i32* %arrayidx6, align 4
|
||||
%add7 = add i32 %tmp1, %val.126
|
||||
%inc = add i32 %counter2.025, 1
|
||||
%cmp2 = icmp ult i32 %inc, 10
|
||||
br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge
|
||||
|
||||
for.cond1.for.end_crit_edge: ; preds = %for.body3
|
||||
%split = phi i32 [ %add7, %for.body3 ]
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body
|
||||
%val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ]
|
||||
%inc9 = add i32 %counter1.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end10: ; preds = %for.cond
|
||||
%val.0.lcssa = phi i32 [ %val.0, %for.cond ]
|
||||
ret i32 %val.0.lcssa
|
||||
}
|
||||
|
||||
; This version is identical to the above 'NonZeroDivHoist' case, except the
|
||||
; outer ('counter1') loop starts at the unknown value of 'start1' rather than 1,
|
||||
; and so it is illegal to hoist the division because if 'start1' is 0, hoisting
|
||||
; it would incorrectly cause a divide-by-zero trap.
|
||||
;
|
||||
; Verify that the 'udiv' is not hoisted to the preheader, and it remains in the
|
||||
; loop body.
|
||||
define i32 @ZeroDivNoHoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) {
|
||||
; INDVARCHECK-LABEL: @ZeroDivNoHoist(
|
||||
; INDVARCHECK-NEXT: entry:
|
||||
; INDVARCHECK-NOT: udiv
|
||||
; INDVARCHECK: for.body3:
|
||||
; INDVARCHECK: [[TMP1:%.*]] = udiv i64 16, [[INDVARS_IV:%.*]]
|
||||
; INDVARCHECK: for.cond1.for.end_crit_edge:
|
||||
;
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.end, %entry
|
||||
%val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ]
|
||||
%counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ]
|
||||
%cmp = icmp ult i32 %counter1.0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end10
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%add = add i32 %tmp, %val.0
|
||||
%cmp224 = icmp ult i32 %start2, 10
|
||||
br i1 %cmp224, label %for.body3.lr.ph, label %for.end
|
||||
|
||||
for.body3.lr.ph: ; preds = %for.body
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.body3, %for.body3.lr.ph
|
||||
%index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ]
|
||||
%val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ]
|
||||
%counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ]
|
||||
%div = udiv i32 16, %counter1.0
|
||||
%add4 = add i32 %div, %index.027
|
||||
%idxprom5 = zext i32 %add4 to i64
|
||||
%arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5
|
||||
%tmp1 = load i32, i32* %arrayidx6, align 4
|
||||
%add7 = add i32 %tmp1, %val.126
|
||||
%inc = add i32 %counter2.025, 1
|
||||
%cmp2 = icmp ult i32 %inc, 10
|
||||
br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge
|
||||
|
||||
for.cond1.for.end_crit_edge: ; preds = %for.body3
|
||||
%split = phi i32 [ %add7, %for.body3 ]
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body
|
||||
%val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ]
|
||||
%inc9 = add i32 %counter1.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end10: ; preds = %for.cond
|
||||
%val.0.lcssa = phi i32 [ %val.0, %for.cond ]
|
||||
ret i32 %val.0.lcssa
|
||||
}
|
||||
|
||||
; This version is has a clearly safe division by a non-zero constant (16). The
|
||||
; division is transformed to a logical-shift-right of 4, and it is safely
|
||||
; hoisted.
|
||||
;
|
||||
; Verify that the division-operation is hoisted, and that it appears as a
|
||||
; right-shift ('lshr') rather than an explicit division.
|
||||
define i32 @DivBy16Hoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) {
|
||||
; INDVARCHECK-LABEL: @DivBy16Hoist(
|
||||
; INDVARCHECK-NEXT: entry:
|
||||
; INDVARCHECK: for.cond:
|
||||
; INDVARCHECK: [[TMP1:%.*]] = lshr i64 [[INDVARS_IV:%.*]], 4
|
||||
; INDVARCHECK: for.body:
|
||||
; INDVARCHECK-NOT: lshr
|
||||
; INDVARCHECK-NOT: udiv
|
||||
; INDVARCHECK: for.end10:
|
||||
;
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.end, %entry
|
||||
%val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ]
|
||||
%counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ]
|
||||
%cmp = icmp ult i32 %counter1.0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end10
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%add = add i32 %tmp, %val.0
|
||||
%cmp224 = icmp ult i32 %start2, 10
|
||||
br i1 %cmp224, label %for.body3.lr.ph, label %for.end
|
||||
|
||||
for.body3.lr.ph: ; preds = %for.body
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.body3, %for.body3.lr.ph
|
||||
%index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ]
|
||||
%val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ]
|
||||
%counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ]
|
||||
%div = udiv i32 %counter1.0, 16
|
||||
%add4 = add i32 %div, %index.027
|
||||
%idxprom5 = zext i32 %add4 to i64
|
||||
%arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5
|
||||
%tmp1 = load i32, i32* %arrayidx6, align 4
|
||||
%add7 = add i32 %tmp1, %val.126
|
||||
%inc = add i32 %counter2.025, 1
|
||||
%cmp2 = icmp ult i32 %inc, 10
|
||||
br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge
|
||||
|
||||
for.cond1.for.end_crit_edge: ; preds = %for.body3
|
||||
%split = phi i32 [ %add7, %for.body3 ]
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body
|
||||
%val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ]
|
||||
%inc9 = add i32 %counter1.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end10: ; preds = %for.cond
|
||||
%val.0.lcssa = phi i32 [ %val.0, %for.cond ]
|
||||
ret i32 %val.0.lcssa
|
||||
}
|
||||
|
||||
; This version is has a clearly safe division by a non-zero constant (17). The
|
||||
; division is safely hoisted, as it was in the 'DivBy16Hoist' verison, but here
|
||||
; it remains a division, rather than being transformed to a right-shift.
|
||||
;
|
||||
; Verify that the division-operation is hoisted.
|
||||
define i32 @DivBy17Hoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) {
|
||||
; INDVARCHECK-LABEL: @DivBy17Hoist(
|
||||
; INDVARCHECK-NEXT: entry:
|
||||
; INDVARCHECK: for.cond:
|
||||
; INDVARCHECK: [[TMP1:%.*]] = udiv i64 [[INDVARS_IV:%.*]], 17
|
||||
; INDVARCHECK: for.body:
|
||||
; INDVARCHECK-NOT: udiv
|
||||
; INDVARCHECK: for.end10:
|
||||
;
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.end, %entry
|
||||
%val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ]
|
||||
%counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ]
|
||||
%cmp = icmp ult i32 %counter1.0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end10
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%add = add i32 %tmp, %val.0
|
||||
%cmp224 = icmp ult i32 %start2, 10
|
||||
br i1 %cmp224, label %for.body3.lr.ph, label %for.end
|
||||
|
||||
for.body3.lr.ph: ; preds = %for.body
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.body3, %for.body3.lr.ph
|
||||
%index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ]
|
||||
%val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ]
|
||||
%counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ]
|
||||
%div = udiv i32 %counter1.0, 17
|
||||
%add4 = add i32 %div, %index.027
|
||||
%idxprom5 = zext i32 %add4 to i64
|
||||
%arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5
|
||||
%tmp1 = load i32, i32* %arrayidx6, align 4
|
||||
%add7 = add i32 %tmp1, %val.126
|
||||
%inc = add i32 %counter2.025, 1
|
||||
%cmp2 = icmp ult i32 %inc, 10
|
||||
br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge
|
||||
|
||||
for.cond1.for.end_crit_edge: ; preds = %for.body3
|
||||
%split = phi i32 [ %add7, %for.body3 ]
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body
|
||||
%val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ]
|
||||
%inc9 = add i32 %counter1.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end10: ; preds = %for.cond
|
||||
%val.0.lcssa = phi i32 [ %val.0, %for.cond ]
|
||||
ret i32 %val.0.lcssa
|
||||
}
|
Loading…
Reference in New Issue