From ca1e38fa437b1ed032d888f5195b01507e1643fd Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 14 Sep 2015 11:12:52 +0000 Subject: [PATCH] Propagate exit conditions as described in the PET paper At some point we build loop trip counts using this method. It was replaced by a simpler trick that works only for affine (e.g., not modulo) constraints and relies on the removal of unbounded parts. In order to allow modulo constrains again we go back to the former, more accurate method. llvm-svn: 247540 --- polly/lib/Analysis/ScopInfo.cpp | 27 +++++++++++++------ .../loop-body-references-outer-values-3.ll | 10 ++++--- .../OpenMP/reference-preceeding-loop.ll | 5 ++-- .../Isl/CodeGen/phi_loop_carried_float.ll | 12 ++++----- .../CodeGen/phi_loop_carried_float_escape.ll | 14 +++++----- polly/test/Isl/CodeGen/phi_scalar_simple_2.ll | 8 ++---- .../test/ScopInfo/NonAffine/modulo_domain.ll | 4 +-- polly/test/ScopInfo/isl_trip_count_01.ll | 2 +- polly/test/ScopInfo/loop_affine_bound_1.ll | 4 ++- .../test/ScopInfo/multiple_exiting_blocks.ll | 2 +- 10 files changed, 50 insertions(+), 38 deletions(-) diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 6d80baa52655..9de53ff4a56a 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -796,10 +796,10 @@ static std::pair<__isl_give isl_set *, __isl_give isl_set *> partitionSetParts(__isl_take isl_set *S, unsigned Dim) { for (unsigned u = 0, e = isl_set_n_dim(S); u < e; u++) - S = isl_set_lower_bound_si(S, isl_dim_set, u, 0); + S = isl_set_lower_bound_si(S, isl_dim_set, u, u == Dim ? -1 : 0); unsigned NumDimsS = isl_set_n_dim(S); - isl_set *OnlyDimS = isl_set_copy(S); + isl_set *OnlyDimS = S; // Remove dimensions that are greater than Dim as they are not interesting. assert(NumDimsS >= Dim + 1); @@ -827,7 +827,7 @@ partitionSetParts(__isl_take isl_set *S, unsigned Dim) { // Remove the artificial upper bound parameters again. BoundedParts = isl_set_remove_dims(BoundedParts, isl_dim_param, 0, Dim); - isl_set *UnboundedParts = isl_set_subtract(S, isl_set_copy(BoundedParts)); + isl_set *UnboundedParts = isl_set_complement(isl_set_copy(BoundedParts)); return std::make_pair(UnboundedParts, BoundedParts); } @@ -1863,19 +1863,30 @@ void Scop::addLoopBoundsToHeaderDomains(LoopInfo &LI, ScopDetection &SD, isl_set_project_out(BackedgeCondition, isl_dim_set, LoopDepth + 1, LatchLoopDepth - LoopDepth); - auto Parts = partitionSetParts(BackedgeCondition, LoopDepth); + isl_map *ForwardMap = isl_map_lex_le(isl_set_get_space(HeaderBBDom)); + for (int i = 0; i < LoopDepth; i++) + ForwardMap = isl_map_equate(ForwardMap, isl_dim_in, i, isl_dim_out, i); + + isl_set *BackedgeConditionComplement = + isl_set_complement(BackedgeCondition); + BackedgeConditionComplement = isl_set_lower_bound_si( + BackedgeConditionComplement, isl_dim_set, LoopDepth, 0); + BackedgeConditionComplement = + isl_set_apply(BackedgeConditionComplement, ForwardMap); + HeaderBBDom = isl_set_subtract(HeaderBBDom, BackedgeConditionComplement); + + auto Parts = partitionSetParts(HeaderBBDom, LoopDepth); // If a loop has an unbounded back edge condition part (here Parts.first) // we do not want to assume the header will even be executed for the first // iteration of an execution that will lead to an infinite loop. While it // would not be wrong to do so, it does not seem helpful. + // TODO: Use the unbounded part to build runtime assumptions. FirstIteration = isl_set_subtract(FirstIteration, Parts.first); - BackedgeCondition = isl_set_apply(Parts.second, NextIterationMap); - BackedgeCondition = isl_set_union(BackedgeCondition, FirstIteration); - BackedgeCondition = isl_set_coalesce(BackedgeCondition); + HeaderBBDom = isl_set_apply(Parts.second, NextIterationMap); + HeaderBBDom = isl_set_coalesce(isl_set_union(HeaderBBDom, FirstIteration)); - HeaderBBDom = isl_set_intersect(HeaderBBDom, BackedgeCondition); } } diff --git a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll index 968f7e1f04c4..63b13060c47d 100644 --- a/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll +++ b/polly/test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll @@ -7,10 +7,11 @@ ; but %call is a parameter of the SCoP and we need to make sure its value is ; properly forwarded to the subfunction. -; AST: Stmt_for_body(0); ; AST: #pragma omp parallel for -; AST: for (int c0 = 1; c0 < cols; c0 += 1) +; AST: for (int c0 = 0; c0 < cols; c0 += 1) ; AST: Stmt_for_body(c0); +; AST: if (cols <= 0) +; AST: Stmt_for_body(0); ; IR: @foo_polly_subfn @@ -38,10 +39,11 @@ end: ; Another variation of this test case, now with even more of the index ; expression defined outside of the scop. -; AST: Stmt_for_body(0); ; AST: #pragma omp parallel for -; AST: for (int c0 = 1; c0 < cols; c0 += 1) +; AST: for (int c0 = 0; c0 < cols; c0 += 1) ; AST: Stmt_for_body(c0); +; AST: if (cols <= 0) +; AST: Stmt_for_body(0); ; IR: @bar_polly_subfn diff --git a/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll b/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll index c7cdf39ff635..7358b1896791 100644 --- a/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll +++ b/polly/test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll @@ -6,11 +6,12 @@ ; of the scop, but does not contain the scop. ; AST: { -; AST-NEXT: Stmt_while_body(0); ; AST-NEXT: #pragma simd ; AST-NEXT: #pragma omp parallel for -; AST-NEXT: for (int c0 = 1; c0 < p_0 + symbol; c0 += 1) +; AST-NEXT: for (int c0 = 0; c0 < p_0 + symbol; c0 += 1) ; AST-NEXT: Stmt_while_body(c0); +; AST-NEXT: if (p_0 + symbol <= 0) +; AST-NEXT: Stmt_while_body(0); ; AST-NEXT: } ; IR: @update_model_polly_subfn diff --git a/polly/test/Isl/CodeGen/phi_loop_carried_float.ll b/polly/test/Isl/CodeGen/phi_loop_carried_float.ll index 9ebb7af8e850..6ebbc503ebfe 100644 --- a/polly/test/Isl/CodeGen/phi_loop_carried_float.ll +++ b/polly/test/Isl/CodeGen/phi_loop_carried_float.ll @@ -19,23 +19,23 @@ ; CHECK-LABEL: polly.start: ; CHECK-NEXT: store float 0.000000e+00, float* %tmp.0.phiops -; CHECK-LABEL: polly.merge: +; CHECK-LABEL: polly.merge2: ; CHECK-NEXT: br label %polly.merge_new_and_old ; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}: ; CHECK-NEXT: %tmp.0.phiops.reload[[R1:[0-9]*]] = load float, float* %tmp.0.phiops ; CHECK: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a -; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}: -; CHECK-NEXT: %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops -; CHECK: store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a - -; CHECK-LABEL: polly.stmt.bb4: ; preds = %polly.then3 +; CHECK-LABEL: polly.stmt.bb4: ; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a ; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ ; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops +; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}: +; CHECK-NEXT: %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops +; CHECK: store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @f(float* %A, i32 %N) { diff --git a/polly/test/Isl/CodeGen/phi_loop_carried_float_escape.ll b/polly/test/Isl/CodeGen/phi_loop_carried_float_escape.ll index a22b1718248d..1bb51b40026f 100644 --- a/polly/test/Isl/CodeGen/phi_loop_carried_float_escape.ll +++ b/polly/test/Isl/CodeGen/phi_loop_carried_float_escape.ll @@ -12,13 +12,13 @@ ; } ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge ], [ %tmp.0, %bb8 ] +; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge2 ], [ %tmp.0, %bb8 ] ; CHECK-NEXT: br label %exit ; CHECK-LABEL: polly.start: ; CHECK-NEXT: store float 0.000000e+00, float* %tmp.0.phiops -; CHECK-LABEL: polly.merge: +; CHECK-LABEL: polly.merge2: ; CHECK-NEXT: %tmp.0.final_reload = load float, float* %tmp.0.s2a ; CHECK-NEXT: br label %polly.merge_new_and_old @@ -26,16 +26,16 @@ ; CHECK-NEXT: %tmp.0.phiops.reload[[R1:[0-9]*]] = load float, float* %tmp.0.phiops ; CHECK-: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a -; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}: -; CHECK-NEXT: %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops -; CHECK: store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a - -; CHECK-LABEL: polly.stmt.bb4: ; preds = %polly.then3 +; CHECK-LABEL: polly.stmt.bb4: ; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a ; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ ; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops +; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}: +; CHECK-NEXT: %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops +; CHECK: store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define float @f(float* %A, i32 %N) { diff --git a/polly/test/Isl/CodeGen/phi_scalar_simple_2.ll b/polly/test/Isl/CodeGen/phi_scalar_simple_2.ll index 361563ad9775..afc921ac285a 100644 --- a/polly/test/Isl/CodeGen/phi_scalar_simple_2.ll +++ b/polly/test/Isl/CodeGen/phi_scalar_simple_2.ll @@ -24,13 +24,13 @@ entry: br label %for.cond ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.cond ] +; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge21 ], [ %x.addr.0, %for.cond ] ; CHECK: ret i32 %x.addr.0.merge ; CHECK-LABEL: polly.start: ; CHECK-NEXT: store i32 %x, i32* %x.addr.0.phiops -; CHECK-LABEL: polly.merge: +; CHECK-LABEL: polly.merge21: ; CHECK: %x.addr.0.final_reload = load i32, i32* %x.addr.0.s2a for.cond: ; preds = %for.inc5, %entry @@ -42,10 +42,6 @@ for.cond: ; preds = %for.inc5, %entry %cmp = icmp slt i64 %indvars.iv, %tmp br i1 %cmp, label %for.body, label %for.end7 -; CHECK-LABEL: polly.stmt.for.cond{{[0-9]*}}: -; CHECK: %x.addr.0.phiops.reload[[R1:[0-9]*]] = load i32, i32* %x.addr.0.phiops -; CHECK: store i32 %x.addr.0.phiops.reload[[R1]], i32* %x.addr.0.s2a - for.body: ; preds = %for.cond ; CHECK-LABEL: polly.stmt.for.body: ; CHECK: %x.addr.0.s2a.reload[[R2:[0-9]*]] = load i32, i32* %x.addr.0.s2a diff --git a/polly/test/ScopInfo/NonAffine/modulo_domain.ll b/polly/test/ScopInfo/NonAffine/modulo_domain.ll index 6b3b383d0182..61388e4e47b3 100644 --- a/polly/test/ScopInfo/NonAffine/modulo_domain.ll +++ b/polly/test/ScopInfo/NonAffine/modulo_domain.ll @@ -4,10 +4,10 @@ ; hence modulo handling has been disabled completely. Once this is ; resolved this test should work again. Until then we approximate the ; whole loop body. -; CHECK: Stmt_for_body__TO__if_end +; ; CHECK: Domain := ; CHECK: { Stmt_for_body__TO__if_end[i0] : i0 <= 15 and i0 >= 0 }; - +; ; ; void foo(float *A) { ; for (long i = 0; i < 16; i++) { diff --git a/polly/test/ScopInfo/isl_trip_count_01.ll b/polly/test/ScopInfo/isl_trip_count_01.ll index 60ac38cfb501..72ce9021fdb5 100644 --- a/polly/test/ScopInfo/isl_trip_count_01.ll +++ b/polly/test/ScopInfo/isl_trip_count_01.ll @@ -1,6 +1,6 @@ ; RUN: opt %loadPolly -polly-detect-unprofitable -polly-allow-non-scev-backedge-taken-count -polly-scops -analyze < %s | FileCheck %s ; -; CHECK: [M, N] -> { Stmt_while_body[i0] : i0 >= 1 and 4i0 <= -M + N; Stmt_while_body[0] } +; CHECK: [M, N] -> { Stmt_while_body[i0] : i0 >= 0 and 4i0 <= -M + N; Stmt_while_body[0] : N <= -1 + M } ; ; void f(int *A, int N, int M) { ; int i = 0; diff --git a/polly/test/ScopInfo/loop_affine_bound_1.ll b/polly/test/ScopInfo/loop_affine_bound_1.ll index 5d137b67f7bf..0201a4d4a07b 100644 --- a/polly/test/ScopInfo/loop_affine_bound_1.ll +++ b/polly/test/ScopInfo/loop_affine_bound_1.ll @@ -55,7 +55,7 @@ return: ; preds = %bb3, %entry ; CHECK: Stmt_bb1 ; CHECK: Domain := ; CHECK: [N, M] -> { Stmt_bb1[i0, i1] : -; CHECK-DAG: i0 >= 1 +; CHECK-DAG: i0 >= 0 ; CHECK-DAG: and ; CHECK-DAG: i0 <= 2 + 4N + 7M ; CHECK-DAG: and @@ -63,6 +63,8 @@ return: ; preds = %bb3, %entry ; CHECK-DAG: and ; CHECK-DAG: i1 <= 1 + 5N - i0 ; CHECK-DAG: Stmt_bb1[0, i1] : +; CHECK-DAG: 7M <= -3 - 4N +; CHECK-DAG: and ; CHECK-DAG: i1 >= 0 ; CHECK-DAG: and ; CHECK-DAG: i1 <= 1 + 5N diff --git a/polly/test/ScopInfo/multiple_exiting_blocks.ll b/polly/test/ScopInfo/multiple_exiting_blocks.ll index 13a74d743dd0..7c55a2383a4f 100644 --- a/polly/test/ScopInfo/multiple_exiting_blocks.ll +++ b/polly/test/ScopInfo/multiple_exiting_blocks.ll @@ -1,7 +1,7 @@ ; RUN: opt %loadPolly -polly-scops -polly-detect-unprofitable -analyze < %s | FileCheck %s ; ; CHECK: Domain := -; CHECK: [N, P, Q] -> { Stmt_if_end[i0] : (i0 >= 2 + P and i0 >= 1 and i0 <= 1 + Q and i0 <= -1 + N) or (i0 >= 1 and i0 <= 1 + Q and i0 <= -1 + P and i0 <= -1 + N); Stmt_if_end[0] : (N >= 1 and P <= -1) or (N >= 1 and P >= 1) }; +; CHECK: [N, P, Q] -> { Stmt_if_end[i0] : (i0 >= 0 and i0 <= 1 + Q and i0 <= -1 + P and i0 <= -1 + N) or (P <= -1 and i0 >= 1 + P - Q and i0 >= 0 and i0 <= 1 + Q and i0 <= -1 + N); Stmt_if_end[0] : (N >= 1 and P <= -2 and Q <= -2) or (N >= 1 and P >= 1 and Q <= -2) or (P = -1 and N >= 1) } ; ; void f(int *A, int N, int P, int Q) { ; for (int i = 0; i < N; i++) {