diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2504ba70c25a..8879630270e2 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -77,6 +77,7 @@ public: /// \name Scalar TTI Implementations /// @{ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; /// @} @@ -129,6 +130,14 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { return PSK_Software; } +void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { + if (ST->getDarwinDirective() == PPC::DIR_A2) { + // The A2 is in-order with a deep pipeline, and concatenation unrolling + // helps expose latency-hiding opportunities to the instruction scheduler. + UP.Partial = UP.Runtime = true; + } +} + unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { if (Vector && !ST->hasAltivec()) return 0; diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll new file mode 100644 index 000000000000..17c91e5c07b1 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s +define void @unroll_opt_for_size() nounwind optsize { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} + +; CHECK-LABEL: @unroll_opt_for_size +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK: icmp + +define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %sum.02 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %sum.0.lcssa +} + +; CHECK-LABEL: @test +; CHECK: unr.cmp{{.*}}: +; CHECK: for.body.unr{{.*}}: +; CHECK: for.body: +; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body + diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg b/llvm/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg new file mode 100644 index 000000000000..2e463005586f --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'PowerPC' in targets: + config.unsupported = True +