[CodeGen] Check for HardwareLoop Latch ExitBlock
The HardwareLoops pass finds exit blocks with a scevable exit count. If the target specifies to update the loop counter in a register, through a phi, we need to ensure that the exit block is a latch so that we can insert the phi with the correct value for the incoming edge. Differential Revision: https://reviews.llvm.org/D63336 llvm-svn: 363556
This commit is contained in:
parent
7dc917603b
commit
1bd3d00e7e
|
@ -448,9 +448,7 @@ public:
|
|||
void getUnrollingPreferences(Loop *L, ScalarEvolution &,
|
||||
UnrollingPreferences &UP) const;
|
||||
|
||||
/// Attributes of a target dependent hardware loop. Here, the term 'element'
|
||||
/// describes the work performed by an IR loop that has not been vectorized
|
||||
/// by the compiler.
|
||||
/// Attributes of a target dependent hardware loop.
|
||||
struct HardwareLoopInfo {
|
||||
HardwareLoopInfo() = delete;
|
||||
HardwareLoopInfo(Loop *L) : L(L) { }
|
||||
|
@ -459,10 +457,10 @@ public:
|
|||
BranchInst *ExitBranch = nullptr;
|
||||
const SCEV *ExitCount = nullptr;
|
||||
IntegerType *CountType = nullptr;
|
||||
Value *LoopDecrement = nullptr; // The maximum number of elements
|
||||
// processed in the loop body.
|
||||
Value *LoopDecrement = nullptr; // Decrement the loop counter by this
|
||||
// value in every iteration.
|
||||
bool IsNestingLegal = false; // Can a hardware loop be a parent to
|
||||
// another hardware loop.
|
||||
// another hardware loop?
|
||||
bool CounterInReg = false; // Should loop counter be updated in
|
||||
// the loop via a phi?
|
||||
};
|
||||
|
|
|
@ -235,7 +235,17 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) {
|
|||
|
||||
for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
|
||||
IE = ExitingBlocks.end(); I != IE; ++I) {
|
||||
const SCEV *EC = SE->getExitCount(L, *I);
|
||||
BasicBlock *BB = *I;
|
||||
|
||||
// If we pass the updated counter back through a phi, we need to know
|
||||
// which latch the updated value will be coming from.
|
||||
if (!L->isLoopLatch(BB)) {
|
||||
if ((ForceHardwareLoopPHI.getNumOccurrences() && ForceHardwareLoopPHI) ||
|
||||
HWLoopInfo.CounterInReg)
|
||||
continue;
|
||||
}
|
||||
|
||||
const SCEV *EC = SE->getExitCount(L, BB);
|
||||
if (isa<SCEVCouldNotCompute>(EC))
|
||||
continue;
|
||||
if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
|
||||
|
@ -251,7 +261,7 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) {
|
|||
// If this exiting block is contained in a nested loop, it is not eligible
|
||||
// for insertion of the branch-and-decrement since the inner loop would
|
||||
// end up messing up the value in the CTR.
|
||||
if (!HWLoopInfo.IsNestingLegal && LI->getLoopFor(*I) != L &&
|
||||
if (!HWLoopInfo.IsNestingLegal && LI->getLoopFor(BB) != L &&
|
||||
!ForceNestedLoop)
|
||||
continue;
|
||||
|
||||
|
@ -278,7 +288,7 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) {
|
|||
continue;
|
||||
|
||||
// Make sure this blocks ends with a conditional branch.
|
||||
Instruction *TI = (*I)->getTerminator();
|
||||
Instruction *TI = BB->getTerminator();
|
||||
if (!TI)
|
||||
continue;
|
||||
|
||||
|
|
|
@ -702,10 +702,6 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
|||
if (!ST->hasLOB() || DisableLowOverheadLoops)
|
||||
return false;
|
||||
|
||||
// For now, for simplicity, only support loops with one exit block.
|
||||
if (!L->getExitBlock())
|
||||
return false;
|
||||
|
||||
if (!SE.hasLoopInvariantBackedgeTakenCount(L))
|
||||
return false;
|
||||
|
||||
|
|
|
@ -135,6 +135,82 @@ while.end7:
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: not_rotated
|
||||
; CHECK-NOT: call void @llvm.set.loop.iterations
|
||||
; CHECK-NOT: call i32 @llvm.loop.decrement.i32
|
||||
define void @not_rotated(i32, i16* nocapture, i16 signext) {
|
||||
br label %4
|
||||
|
||||
4:
|
||||
%5 = phi i32 [ 0, %3 ], [ %19, %18 ]
|
||||
%6 = icmp eq i32 %5, %0
|
||||
br i1 %6, label %20, label %7
|
||||
|
||||
7:
|
||||
%8 = mul i32 %5, %0
|
||||
br label %9
|
||||
|
||||
9:
|
||||
%10 = phi i32 [ %17, %12 ], [ 0, %7 ]
|
||||
%11 = icmp eq i32 %10, %0
|
||||
br i1 %11, label %18, label %12
|
||||
|
||||
12:
|
||||
%13 = add i32 %10, %8
|
||||
%14 = getelementptr inbounds i16, i16* %1, i32 %13
|
||||
%15 = load i16, i16* %14, align 2
|
||||
%16 = add i16 %15, %2
|
||||
store i16 %16, i16* %14, align 2
|
||||
%17 = add i32 %10, 1
|
||||
br label %9
|
||||
|
||||
18:
|
||||
%19 = add i32 %5, 1
|
||||
br label %4
|
||||
|
||||
20:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: multi_latch
|
||||
; CHECK-NOT: call void @llvm.set.loop.iterations
|
||||
; CHECK-NOT: call i32 @llvm.loop.decrement
|
||||
define void @multi_latch(i32* %a, i32* %b, i32 %N) {
|
||||
entry:
|
||||
%half = lshr i32 %N, 1
|
||||
br label %header
|
||||
|
||||
header:
|
||||
%iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ]
|
||||
%cmp = icmp ult i32 %iv, %half
|
||||
%addr.a = getelementptr i32, i32* %a, i32 %iv
|
||||
%addr.b = getelementptr i32, i32* %b, i32 %iv
|
||||
br i1 %cmp, label %if.then, label %if.else
|
||||
|
||||
if.then:
|
||||
store i32 %iv, i32* %addr.a
|
||||
br label %latch.0
|
||||
|
||||
if.else:
|
||||
store i32 %iv, i32* %addr.b
|
||||
br label %latch.0
|
||||
|
||||
latch.0:
|
||||
%count.next = add nuw i32 %iv, 1
|
||||
%cmp.1 = icmp ult i32 %count.next, %half
|
||||
br i1 %cmp.1, label %header, label %latch.1
|
||||
|
||||
latch.1:
|
||||
%ld = load i32, i32* %addr.a
|
||||
store i32 %ld, i32* %addr.b
|
||||
%cmp.2 = icmp ult i32 %count.next, %N
|
||||
br i1 %cmp.2, label %header, label %latch.1
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #0
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
|
||||
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
|
||||
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
|
||||
|
||||
; CHECK-LABEL: not_rotated
|
||||
; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations
|
||||
; CHECK-LATCH-NOT: call i1 @llvm.loop.decrement
|
||||
|
||||
; CHECK-ALLOW: call void @llvm.set.loop.iterations.i32(i32 %4)
|
||||
; CHECK-ALLOW: br label %10
|
||||
|
||||
; CHECK-ALLOW: [[CMP:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1)
|
||||
; CHECK-ALLOW: br i1 [[CMP]], label %13, label %19
|
||||
|
||||
define void @not_rotated(i32, i16* nocapture, i16 signext) {
|
||||
br label %4
|
||||
|
||||
4:
|
||||
%5 = phi i32 [ 0, %3 ], [ %19, %18 ]
|
||||
%6 = icmp eq i32 %5, %0
|
||||
br i1 %6, label %20, label %7
|
||||
|
||||
7:
|
||||
%8 = mul i32 %5, %0
|
||||
br label %9
|
||||
|
||||
9:
|
||||
%10 = phi i32 [ %17, %12 ], [ 0, %7 ]
|
||||
%11 = icmp eq i32 %10, %0
|
||||
br i1 %11, label %18, label %12
|
||||
|
||||
12:
|
||||
%13 = add i32 %10, %8
|
||||
%14 = getelementptr inbounds i16, i16* %1, i32 %13
|
||||
%15 = load i16, i16* %14, align 2
|
||||
%16 = add i16 %15, %2
|
||||
store i16 %16, i16* %14, align 2
|
||||
%17 = add i32 %10, 1
|
||||
br label %9
|
||||
|
||||
18:
|
||||
%19 = add i32 %5, 1
|
||||
br label %4
|
||||
|
||||
20:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue