Loop unroller: set thresholds for optsize and minsize functions to zero

Before r268509, Clang would disable the loop unroll pass when optimizing
for size. That commit enabled it to be able to support unroll pragmas
in -Os builds. However, this regressed binary size in one of Chromium's
DLLs with ~100 KB.

This restores the original behaviour of no unrolling at -Os, but doing it
in LLVM instead of Clang makes more sense, and also allows the pragmas to
keep working.

Differential revision: http://reviews.llvm.org/D20115

llvm-svn: 269124
This commit is contained in:
Hans Wennborg 2016-05-10 21:45:55 +00:00
parent 12de4aeeb3
commit 719b26ba54
6 changed files with 38 additions and 172 deletions

View File

@ -111,9 +111,9 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.Threshold = 150;
UP.PercentDynamicCostSavedThreshold = 20;
UP.DynamicCostSavingsDiscount = 2000;
UP.OptSizeThreshold = 50;
UP.OptSizeThreshold = 0;
UP.PartialThreshold = UP.Threshold;
UP.PartialOptSizeThreshold = UP.OptSizeThreshold;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
UP.MaxCount = UINT_MAX;
UP.FullUnrollMaxCount = UINT_MAX;

View File

@ -1,30 +1,5 @@
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
define void @unroll_opt_for_size() nounwind optsize {
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
%inc = add i32 %iv, 1
%exitcnd = icmp uge i32 %inc, 1024
br i1 %exitcnd, label %exit, label %loop
exit:
ret void
}
; EPILOG-LABEL: @unroll_opt_for_size
; EPILOG: add
; EPILOG-NEXT: add
; EPILOG-NEXT: add
; EPILOG: icmp
; PROLOG-LABEL: @unroll_opt_for_size
; PROLOG: add
; PROLOG-NEXT: add
; PROLOG-NEXT: add
; PROLOG: icmp
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:

View File

@ -1,53 +1,4 @@
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s
define void @unroll_opt_for_size() nounwind optsize {
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
%inc = add i32 %iv, 1
%exitcnd = icmp uge i32 %inc, 1024
br i1 %exitcnd, label %exit, label %loop
exit:
ret void
}
; CHECK-LABEL: @unroll_opt_for_size
; CHECK: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: icmp
define void @unroll_default() nounwind {
entry:
br label %loop

View File

@ -1,53 +0,0 @@
; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
; RUN: sed -e 's/optsize/minsize/' %s | opt -S -loop-unroll -unroll-allow-partial | FileCheck %s
; Loop size = 3, when the function has the optsize attribute, the
; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not).
define void @unroll_opt_for_size() nounwind optsize {
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
%inc = add i32 %iv, 1
%exitcnd = icmp uge i32 %inc, 1024
br i1 %exitcnd, label %exit, label %loop
exit:
ret void
}
; CHECK: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: icmp

View File

@ -4,10 +4,10 @@
;///////////////////// TEST 1 //////////////////////////////
; This test shows that with optsize attribute, the loop is unrolled
; according to the specified unroll factor.
; This test shows that the loop is unrolled according to the specified
; unroll factor.
define void @Test1() nounwind optsize {
define void @Test1() nounwind {
entry:
br label %loop
@ -32,10 +32,10 @@ exit:
;///////////////////// TEST 2 //////////////////////////////
; This test shows that with minsize attribute, the loop is unrolled
; according to the specified unroll factor.
; This test shows that with optnone attribute, the loop is not unrolled
; even if an unroll factor was specified.
define void @Test2() nounwind minsize {
define void @Test2() nounwind optnone noinline {
entry:
br label %loop
@ -52,45 +52,16 @@ exit:
; CHECK_COUNT4-LABEL: @Test2
; CHECK_COUNT4: phi
; CHECK_COUNT4-NEXT: add
; CHECK_COUNT4-NEXT: add
; CHECK_COUNT4-NEXT: add
; CHECK_COUNT4-NEXT: add
; CHECK_COUNT4-NEXT: icmp
;///////////////////// TEST 3 //////////////////////////////
; This test shows that with optnone attribute, the loop is not unrolled
; even if an unroll factor was specified.
define void @Test3() nounwind optnone noinline {
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
%inc = add i32 %iv, 1
%exitcnd = icmp uge i32 %inc, 1024
br i1 %exitcnd, label %exit, label %loop
exit:
ret void
}
; CHECK_COUNT4-LABEL: @Test3
; CHECK_COUNT4: phi
; CHECK_COUNT4-NEXT: add
; CHECK_COUNT4-NEXT: icmp
;///////////////////// TEST 4 //////////////////////////////
; This test shows that without any attribute, this loop is fully unrolled
; by default.
; This test shows that this loop is fully unrolled by default.
@tab = common global [24 x i32] zeroinitializer, align 4
define i32 @Test4() {
define i32 @Test3() {
entry:
br label %for.body
@ -106,7 +77,7 @@ for.end: ; preds = %for.body
ret i32 42
}
; CHECK_NOCOUNT-LABEL: @Test4
; CHECK_NOCOUNT-LABEL: @Test3
; CHECK_NOCOUNT: store
; CHECK_NOCOUNT-NEXT: store
; CHECK_NOCOUNT-NEXT: store
@ -134,12 +105,11 @@ for.end: ; preds = %for.body
; CHECK_NOCOUNT-NEXT: ret
;///////////////////// TEST 5 //////////////////////////////
;///////////////////// TEST 4 //////////////////////////////
; This test shows that with optsize attribute, this loop is not unrolled
; by default.
; This test shows that with optsize attribute, this loop is not unrolled.
define i32 @Test5() optsize {
define i32 @Test4() optsize {
entry:
br label %for.body
@ -155,6 +125,6 @@ for.end: ; preds = %for.body
ret i32 42
}
; CHECK_NOCOUNT-LABEL: @Test5
; CHECK_NOCOUNT-LABEL: @Test4
; CHECK_NOCOUNT: phi
; CHECK_NOCOUNT: icmp

View File

@ -108,6 +108,29 @@ for.end: ; preds = %for.body
!3 = !{!3, !4}
!4 = !{!"llvm.loop.unroll.full"}
; #pragma clang loop unroll(full)
; Loop should be fully unrolled, even for optsize.
;
; CHECK-LABEL: @loop64_with_full_optsize(
; CHECK-NOT: br i1
define void @loop64_with_full_optsize(i32* nocapture %a) optsize {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
for.end: ; preds = %for.body
ret void
}
; #pragma clang loop unroll_count(4)
; Loop should be unrolled 4 times.
;