Implement TTI getUnrollingPreferences for PowerPC

The PowerPC A2 core greatly benefits from aggressive concatenation unrolling;
use the new getUnrollingPreferences to enable this by default when targeting
the PPC A2 core.

llvm-svn: 190549
This commit is contained in:
Hal Finkel 2013-09-11 21:20:40 +00:00
parent 3c2dacaf88
commit 71780ec4fd
3 changed files with 61 additions and 0 deletions

View File

@ -77,6 +77,7 @@ public:
/// \name Scalar TTI Implementations /// \name Scalar TTI Implementations
/// @{ /// @{
virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
/// @} /// @}
@ -129,6 +130,14 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
return PSK_Software; return PSK_Software;
} }
void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
if (ST->getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
// helps expose latency-hiding opportunities to the instruction scheduler.
UP.Partial = UP.Runtime = true;
}
}
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasAltivec()) if (Vector && !ST->hasAltivec())
return 0; return 0;

View File

@ -0,0 +1,48 @@
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s
define void @unroll_opt_for_size() nounwind optsize {
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
%inc = add i32 %iv, 1
%exitcnd = icmp uge i32 %inc, 1024
br i1 %exitcnd, label %exit, label %loop
exit:
ret void
}
; CHECK-LABEL: @unroll_opt_for_size
; CHECK: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK: icmp
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
%cmp1 = icmp eq i32 %n, 0
br i1 %cmp1, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
%0 = load i32* %arrayidx, align 4
%add = add nsw i32 %0, %sum.02
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
ret i32 %sum.0.lcssa
}
; CHECK-LABEL: @test
; CHECK: unr.cmp{{.*}}:
; CHECK: for.body.unr{{.*}}:
; CHECK: for.body:
; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body

View File

@ -0,0 +1,4 @@
targets = set(config.root.targets_to_build.split())
if not 'PowerPC' in targets:
config.unsupported = True