ScopInfo: Split start value from SCEVAddRecExpr to enable parameter sharing.
SCoP invariant parameters with the different start value would deter parameter sharing. For example, when compiling the following C code: void foo(float *input) { for (long j = 0; j < 8; j++) { // SCoP begin for (long i = 0; i < 8; i++) { float x = input[j * 64 + i + 1]; input[j * 64 + i] = x * x; } } } Polly would creat two parameters for these memory accesses: p_0: {0,+,256} p_2: {4,+,256} [j * 64 + i + 1] => MemRef_input[o0] : 4o0 = p_1 + 4i0 [j * 64 + i] => MemRef_input[o0] : 4o0 = p_0 + 4i0 These parameters only differ from start value. To enable parameter sharing, we split the start value from SCEVAddRecExpr, so they would share a single parameter that always has zero start value: p0: {0,+,256}<%for.cond1.preheader> [j * 64 + i + 1] => MemRef_input[o0] : 4o0 = 4 + p_1 + 4i0 [j * 64 + i] => MemRef_input[o0] : 4o0 = p_0 + 4i0 Such translation can make the polly-dependence much faster. Contributed-by: Star Tan <tanmx_star@yeah.net> llvm-svn: 187728
This commit is contained in:
parent
5b4634576e
commit
e42ddb9ad3
|
@ -182,22 +182,38 @@ public:
|
|||
|
||||
__isl_give isl_pw_aff *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
|
||||
assert(Expr->isAffine() && "Only affine AddRecurrences allowed");
|
||||
assert(S->getRegion().contains(Expr->getLoop()) &&
|
||||
"Scop does not contain the loop referenced in this AddRec");
|
||||
|
||||
// Directly generate isl_pw_aff for Expr if 'start' is zero.
|
||||
if (Expr->getStart()->isZero()) {
|
||||
assert(S->getRegion().contains(Expr->getLoop()) &&
|
||||
"Scop does not contain the loop referenced in this AddRec");
|
||||
|
||||
isl_pw_aff *Start = visit(Expr->getStart());
|
||||
isl_pw_aff *Step = visit(Expr->getOperand(1));
|
||||
isl_space *Space = isl_space_set_alloc(Ctx, 0, NbLoopSpaces);
|
||||
isl_local_space *LocalSpace = isl_local_space_from_space(Space);
|
||||
|
||||
int loopDimension = getLoopDepth(Expr->getLoop());
|
||||
|
||||
isl_aff *LAff = isl_aff_set_coefficient_si(
|
||||
isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1);
|
||||
isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff);
|
||||
|
||||
// TODO: Do we need to check for NSW and NUW?
|
||||
return isl_pw_aff_add(Start, isl_pw_aff_mul(Step, LPwAff));
|
||||
}
|
||||
|
||||
// Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}'
|
||||
// if 'start' is not zero.
|
||||
ScalarEvolution &SE = *S->getSE();
|
||||
const SCEV *ZeroStartExpr = SE.getAddRecExpr(
|
||||
SE.getConstant(Expr->getStart()->getType(), 0),
|
||||
Expr->getStepRecurrence(SE), Expr->getLoop(), SCEV::FlagAnyWrap);
|
||||
|
||||
isl_pw_aff *ZeroStartResult = visit(ZeroStartExpr);
|
||||
isl_pw_aff *Start = visit(Expr->getStart());
|
||||
isl_pw_aff *Step = visit(Expr->getOperand(1));
|
||||
isl_space *Space = isl_space_set_alloc(Ctx, 0, NbLoopSpaces);
|
||||
isl_local_space *LocalSpace = isl_local_space_from_space(Space);
|
||||
|
||||
int loopDimension = getLoopDepth(Expr->getLoop());
|
||||
|
||||
isl_aff *LAff = isl_aff_set_coefficient_si(
|
||||
isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1);
|
||||
isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff);
|
||||
|
||||
// TODO: Do we need to check for NSW and NUW?
|
||||
return isl_pw_aff_add(Start, isl_pw_aff_mul(Step, LPwAff));
|
||||
return isl_pw_aff_add(ZeroStartResult, Start);
|
||||
}
|
||||
|
||||
__isl_give isl_pw_aff *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
|
||||
|
|
|
@ -278,7 +278,22 @@ public:
|
|||
|
||||
assert(Start.isConstant() && Recurrence.isConstant() &&
|
||||
"Expected 'Start' and 'Recurrence' to be constant");
|
||||
return ValidatorResult(SCEVType::PARAM, Expr);
|
||||
|
||||
// Directly generate ValidatorResult for Expr if 'start' is zero.
|
||||
if (Expr->getStart()->isZero())
|
||||
return ValidatorResult(SCEVType::PARAM, Expr);
|
||||
|
||||
// Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}'
|
||||
// if 'start' is not zero.
|
||||
const SCEV *ZeroStartExpr = SE.getAddRecExpr(
|
||||
SE.getConstant(Expr->getStart()->getType(), 0),
|
||||
Expr->getStepRecurrence(SE), Expr->getLoop(), SCEV::FlagAnyWrap);
|
||||
|
||||
ValidatorResult ZeroStartResult =
|
||||
ValidatorResult(SCEVType::PARAM, ZeroStartExpr);
|
||||
ZeroStartResult.addParamsFrom(Start);
|
||||
|
||||
return ZeroStartResult;
|
||||
}
|
||||
|
||||
class ValidatorResult visitSMaxExpr(const SCEVSMaxExpr *Expr) {
|
||||
|
|
|
@ -51,6 +51,9 @@ for.end7: ; preds = %for.cond
|
|||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK: Domain :=
|
||||
; CHECK: [p_0] -> { Stmt_if_then[i0] : i0 >= 0 and i0 <= 1022 and i0 >= 1001 - p_0 };
|
||||
; CHECK: Context:
|
||||
; CHECK: p0: {0,+,1}<%for.cond>
|
||||
|
||||
; CHECK: Domain :=
|
||||
; CHECK: [p_0] -> { Stmt_if_then[i0] : i0 >= 0 and i0 <= 1022 and i0 >= 999 - p_0 };
|
||||
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; void foo(float *input) {
|
||||
; for (int j = 0; j < 8; j++) {
|
||||
; //SCoP begin
|
||||
; for (int i = 0; i < 63; i++) {
|
||||
; float x = input[j * 64 + i + 1];
|
||||
; input[j * 64 + i + 0] = x * x;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
;
|
||||
; Access functions:
|
||||
;
|
||||
; input[j * 64 + i + 1] => {4,+,256}<%for.cond1.preheader>
|
||||
; input[j * 64 + i + 0] => {0,+,256}<%for.cond1.preheader>
|
||||
;
|
||||
; They should share the same zero-start parameter:
|
||||
;
|
||||
; p0: {0,+,256}<%for.cond1.preheader>
|
||||
; input[j * 64 + i + 1] => p0 + 4
|
||||
; input[j * 64 + i + 0] => p0
|
||||
;
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @foo(float* nocapture %input) {
|
||||
entry:
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader: ; preds = %for.inc10, %entry
|
||||
%j.021 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ]
|
||||
%mul = shl nsw i64 %j.021, 6
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.body3, %for.cond1.preheader
|
||||
%i.020 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
|
||||
%add = add nsw i64 %i.020, %mul
|
||||
%add4 = add nsw i64 %add, 1
|
||||
%arrayidx = getelementptr inbounds float* %input, i64 %add4
|
||||
%0 = load float* %arrayidx, align 8
|
||||
%mul5 = fmul float %0, %0
|
||||
%arrayidx9 = getelementptr inbounds float* %input, i64 %add
|
||||
store float %mul5, float* %arrayidx9, align 8
|
||||
%inc = add nsw i64 %i.020, 1
|
||||
%exitcond = icmp eq i64 %inc, 63
|
||||
br i1 %exitcond, label %for.inc10, label %for.body3
|
||||
|
||||
for.inc10: ; preds = %for.body3
|
||||
%inc11 = add nsw i64 %j.021, 1
|
||||
%exitcond22 = icmp eq i64 %inc11, 8
|
||||
fence seq_cst
|
||||
br i1 %exitcond22, label %for.end12, label %for.cond1.preheader
|
||||
|
||||
for.end12: ; preds = %for.inc10
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK p0: {0,+,256}<%for.cond1.preheader>
|
||||
; CHECK-NOT: p1
|
||||
|
||||
; CHECK: ReadAccess :=
|
||||
; CHECK: [p_0] -> { Stmt_for_body3[i0] -> MemRef_input[o0] : 4o0 = 4 + p_0 + 4i0 };
|
||||
; CHECK: MustWriteAccess :=
|
||||
; CHECK: [p_0] -> { Stmt_for_body3[i0] -> MemRef_input[o0] : 4o0 = p_0 + 4i0 };
|
|
@ -0,0 +1,78 @@
|
|||
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; void foo(long n, long m, long o, double A[n][m][o]) {
|
||||
; for (long i = 0; i < n; i++)
|
||||
; for (long j = 0; j < m; j++)
|
||||
; for (long k = 0; k < o; k++)
|
||||
; A[i+3][j-4][k+7] = 1.0;
|
||||
; }
|
||||
;
|
||||
; Access function:
|
||||
;
|
||||
; {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,
|
||||
; (8 * %o)}<%for.j>,+,8}<%for.k>
|
||||
;
|
||||
; The nested 'start' should be splitted into three parameters:
|
||||
; p1: {0,+,(8 * %o)}<%for.j>
|
||||
; p2: {0,+,(8 * %m * %o)}<%for.i>
|
||||
; p3: (8 * (-4 + (3 * %m)) * %o)
|
||||
;
|
||||
|
||||
define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
|
||||
entry:
|
||||
br label %for.i
|
||||
|
||||
for.i:
|
||||
%i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
|
||||
br label %for.j
|
||||
|
||||
for.j:
|
||||
%j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
|
||||
br label %for.k
|
||||
|
||||
for.k:
|
||||
%k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
|
||||
%offset0 = add nsw i64 %i, 3
|
||||
%subscript0 = mul i64 %offset0, %m
|
||||
%offset1 = add nsw i64 %j, -4
|
||||
%subscript1 = add i64 %offset1, %subscript0
|
||||
%subscript2 = mul i64 %subscript1, %o
|
||||
%offset2 = add nsw i64 %k, 7
|
||||
%subscript = add i64 %subscript2, %offset2
|
||||
%idx = getelementptr inbounds double* %A, i64 %subscript
|
||||
store double 1.0, double* %idx
|
||||
br label %for.k.inc
|
||||
|
||||
for.k.inc:
|
||||
%k.inc = add nsw i64 %k, 1
|
||||
%k.exitcond = icmp eq i64 %k.inc, %o
|
||||
br i1 %k.exitcond, label %for.j.inc, label %for.k
|
||||
|
||||
for.j.inc:
|
||||
%j.inc = add nsw i64 %j, 1
|
||||
%j.exitcond = icmp eq i64 %j.inc, %m
|
||||
br i1 %j.exitcond, label %for.i.inc, label %for.j
|
||||
|
||||
for.i.inc:
|
||||
%i.inc = add nsw i64 %i, 1
|
||||
%i.exitcond = icmp eq i64 %i.inc, %n
|
||||
br i1 %i.exitcond, label %end, label %for.i
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: p0: %o
|
||||
; CHECK: p1: {0,+,(8 * %o)}<%for.j>
|
||||
; CHECK: p2: {0,+,(8 * %m * %o)}<%for.i>
|
||||
; CHECK: p3: (8 * (-4 + (3 * %m)) * %o)
|
||||
; CHECK-NOT: p4
|
||||
|
||||
; CHECK: Domain
|
||||
; CHECK: [o, p_1, p_2, p_3] -> { Stmt_for_k[i0] : i0 >= 0 and i0 <= -1 + o };
|
||||
; CHECK: Scattering
|
||||
; CHECK: [o, p_1, p_2, p_3] -> { Stmt_for_k[i0] -> scattering[0, i0, 0] };
|
||||
; CHECK: MustWriteAccess
|
||||
; CHECK: [o, p_1, p_2, p_3] -> { Stmt_for_k[i0] -> MemRef_A[o0] : 8o0 = 56 + p_1 + p_2 + p_3 + 8i0 };
|
Loading…
Reference in New Issue