ScopInfo: Split start value from SCEVAddRecExpr to enable parameter sharing.

SCoP invariant parameters with the different start value would deter parameter
sharing. For example, when compiling the following C code:

  void foo(float *input) {
    for (long j = 0; j < 8; j++) {
      // SCoP begin
      for (long i = 0; i < 8; i++) {
        float x = input[j * 64 + i + 1];
        input[j * 64 + i] = x * x;
      }
    }
  }

Polly would creat two parameters for these memory accesses:

    p_0: {0,+,256}
    p_2: {4,+,256}
    [j * 64 + i + 1] => MemRef_input[o0] : 4o0 = p_1 + 4i0
    [j * 64 + i]     => MemRef_input[o0] : 4o0 = p_0 + 4i0

These parameters only differ from start value. To enable parameter sharing,
we split the start value from SCEVAddRecExpr, so they would share a single
parameter that always has zero start value:

    p0: {0,+,256}<%for.cond1.preheader>
    [j * 64 + i + 1] => MemRef_input[o0] : 4o0 = 4 + p_1 + 4i0
    [j * 64 + i]     => MemRef_input[o0] : 4o0 = p_0 + 4i0

Such translation can make the polly-dependence much faster.

Contributed-by: Star Tan <tanmx_star@yeah.net>
llvm-svn: 187728
This commit is contained in:
Tobias Grosser 2013-08-05 15:14:15 +00:00
parent 5b4634576e
commit e42ddb9ad3
5 changed files with 194 additions and 16 deletions

View File

@ -182,22 +182,38 @@ public:
__isl_give isl_pw_aff *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
assert(Expr->isAffine() && "Only affine AddRecurrences allowed");
assert(S->getRegion().contains(Expr->getLoop()) &&
"Scop does not contain the loop referenced in this AddRec");
// Directly generate isl_pw_aff for Expr if 'start' is zero.
if (Expr->getStart()->isZero()) {
assert(S->getRegion().contains(Expr->getLoop()) &&
"Scop does not contain the loop referenced in this AddRec");
isl_pw_aff *Start = visit(Expr->getStart());
isl_pw_aff *Step = visit(Expr->getOperand(1));
isl_space *Space = isl_space_set_alloc(Ctx, 0, NbLoopSpaces);
isl_local_space *LocalSpace = isl_local_space_from_space(Space);
int loopDimension = getLoopDepth(Expr->getLoop());
isl_aff *LAff = isl_aff_set_coefficient_si(
isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1);
isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff);
// TODO: Do we need to check for NSW and NUW?
return isl_pw_aff_add(Start, isl_pw_aff_mul(Step, LPwAff));
}
// Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}'
// if 'start' is not zero.
ScalarEvolution &SE = *S->getSE();
const SCEV *ZeroStartExpr = SE.getAddRecExpr(
SE.getConstant(Expr->getStart()->getType(), 0),
Expr->getStepRecurrence(SE), Expr->getLoop(), SCEV::FlagAnyWrap);
isl_pw_aff *ZeroStartResult = visit(ZeroStartExpr);
isl_pw_aff *Start = visit(Expr->getStart());
isl_pw_aff *Step = visit(Expr->getOperand(1));
isl_space *Space = isl_space_set_alloc(Ctx, 0, NbLoopSpaces);
isl_local_space *LocalSpace = isl_local_space_from_space(Space);
int loopDimension = getLoopDepth(Expr->getLoop());
isl_aff *LAff = isl_aff_set_coefficient_si(
isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1);
isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff);
// TODO: Do we need to check for NSW and NUW?
return isl_pw_aff_add(Start, isl_pw_aff_mul(Step, LPwAff));
return isl_pw_aff_add(ZeroStartResult, Start);
}
__isl_give isl_pw_aff *visitSMaxExpr(const SCEVSMaxExpr *Expr) {

View File

@ -278,7 +278,22 @@ public:
assert(Start.isConstant() && Recurrence.isConstant() &&
"Expected 'Start' and 'Recurrence' to be constant");
return ValidatorResult(SCEVType::PARAM, Expr);
// Directly generate ValidatorResult for Expr if 'start' is zero.
if (Expr->getStart()->isZero())
return ValidatorResult(SCEVType::PARAM, Expr);
// Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}'
// if 'start' is not zero.
const SCEV *ZeroStartExpr = SE.getAddRecExpr(
SE.getConstant(Expr->getStart()->getType(), 0),
Expr->getStepRecurrence(SE), Expr->getLoop(), SCEV::FlagAnyWrap);
ValidatorResult ZeroStartResult =
ValidatorResult(SCEVType::PARAM, ZeroStartExpr);
ZeroStartResult.addParamsFrom(Start);
return ZeroStartResult;
}
class ValidatorResult visitSMaxExpr(const SCEVSMaxExpr *Expr) {

View File

@ -51,6 +51,9 @@ for.end7: ; preds = %for.cond
ret i32 0
}
; CHECK: Domain :=
; CHECK: [p_0] -> { Stmt_if_then[i0] : i0 >= 0 and i0 <= 1022 and i0 >= 1001 - p_0 };
; CHECK: Context:
; CHECK: p0: {0,+,1}<%for.cond>
; CHECK: Domain :=
; CHECK: [p_0] -> { Stmt_if_then[i0] : i0 >= 0 and i0 <= 1022 and i0 >= 999 - p_0 };

View File

@ -0,0 +1,66 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; void foo(float *input) {
; for (int j = 0; j < 8; j++) {
; //SCoP begin
; for (int i = 0; i < 63; i++) {
; float x = input[j * 64 + i + 1];
; input[j * 64 + i + 0] = x * x;
; }
; }
; }
;
; Access functions:
;
; input[j * 64 + i + 1] => {4,+,256}<%for.cond1.preheader>
; input[j * 64 + i + 0] => {0,+,256}<%for.cond1.preheader>
;
; They should share the same zero-start parameter:
;
; p0: {0,+,256}<%for.cond1.preheader>
; input[j * 64 + i + 1] => p0 + 4
; input[j * 64 + i + 0] => p0
;
; Function Attrs: nounwind
define void @foo(float* nocapture %input) {
entry:
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc10, %entry
%j.021 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ]
%mul = shl nsw i64 %j.021, 6
br label %for.body3
for.body3: ; preds = %for.body3, %for.cond1.preheader
%i.020 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%add = add nsw i64 %i.020, %mul
%add4 = add nsw i64 %add, 1
%arrayidx = getelementptr inbounds float* %input, i64 %add4
%0 = load float* %arrayidx, align 8
%mul5 = fmul float %0, %0
%arrayidx9 = getelementptr inbounds float* %input, i64 %add
store float %mul5, float* %arrayidx9, align 8
%inc = add nsw i64 %i.020, 1
%exitcond = icmp eq i64 %inc, 63
br i1 %exitcond, label %for.inc10, label %for.body3
for.inc10: ; preds = %for.body3
%inc11 = add nsw i64 %j.021, 1
%exitcond22 = icmp eq i64 %inc11, 8
fence seq_cst
br i1 %exitcond22, label %for.end12, label %for.cond1.preheader
for.end12: ; preds = %for.inc10
ret void
}
; CHECK p0: {0,+,256}<%for.cond1.preheader>
; CHECK-NOT: p1
; CHECK: ReadAccess :=
; CHECK: [p_0] -> { Stmt_for_body3[i0] -> MemRef_input[o0] : 4o0 = 4 + p_0 + 4i0 };
; CHECK: MustWriteAccess :=
; CHECK: [p_0] -> { Stmt_for_body3[i0] -> MemRef_input[o0] : 4o0 = p_0 + 4i0 };

View File

@ -0,0 +1,78 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; void foo(long n, long m, long o, double A[n][m][o]) {
; for (long i = 0; i < n; i++)
; for (long j = 0; j < m; j++)
; for (long k = 0; k < o; k++)
; A[i+3][j-4][k+7] = 1.0;
; }
;
; Access function:
;
; {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,
; (8 * %o)}<%for.j>,+,8}<%for.k>
;
; The nested 'start' should be splitted into three parameters:
; p1: {0,+,(8 * %o)}<%for.j>
; p2: {0,+,(8 * %m * %o)}<%for.i>
; p3: (8 * (-4 + (3 * %m)) * %o)
;
define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
entry:
br label %for.i
for.i:
%i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
br label %for.j
for.j:
%j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
br label %for.k
for.k:
%k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
%offset0 = add nsw i64 %i, 3
%subscript0 = mul i64 %offset0, %m
%offset1 = add nsw i64 %j, -4
%subscript1 = add i64 %offset1, %subscript0
%subscript2 = mul i64 %subscript1, %o
%offset2 = add nsw i64 %k, 7
%subscript = add i64 %subscript2, %offset2
%idx = getelementptr inbounds double* %A, i64 %subscript
store double 1.0, double* %idx
br label %for.k.inc
for.k.inc:
%k.inc = add nsw i64 %k, 1
%k.exitcond = icmp eq i64 %k.inc, %o
br i1 %k.exitcond, label %for.j.inc, label %for.k
for.j.inc:
%j.inc = add nsw i64 %j, 1
%j.exitcond = icmp eq i64 %j.inc, %m
br i1 %j.exitcond, label %for.i.inc, label %for.j
for.i.inc:
%i.inc = add nsw i64 %i, 1
%i.exitcond = icmp eq i64 %i.inc, %n
br i1 %i.exitcond, label %end, label %for.i
end:
ret void
}
; CHECK: p0: %o
; CHECK: p1: {0,+,(8 * %o)}<%for.j>
; CHECK: p2: {0,+,(8 * %m * %o)}<%for.i>
; CHECK: p3: (8 * (-4 + (3 * %m)) * %o)
; CHECK-NOT: p4
; CHECK: Domain
; CHECK: [o, p_1, p_2, p_3] -> { Stmt_for_k[i0] : i0 >= 0 and i0 <= -1 + o };
; CHECK: Scattering
; CHECK: [o, p_1, p_2, p_3] -> { Stmt_for_k[i0] -> scattering[0, i0, 0] };
; CHECK: MustWriteAccess
; CHECK: [o, p_1, p_2, p_3] -> { Stmt_for_k[i0] -> MemRef_A[o0] : 8o0 = 56 + p_1 + p_2 + p_3 + 8i0 };