Annotate reduction parallel loops in the IslAst textual output

+ Introduced dependency type TYPE_TC_RED to represent the transitive closure
    (& the reverse) of reduction dependences. These are used when we check for
    reduction parallel loops.
  + Test cases including loop reversals and modulo schedules which compute
    reductions in a alternated order.

llvm-svn: 213019
This commit is contained in:
Johannes Doerfert 2014-07-15 00:00:35 +00:00
parent f171cf23b8
commit 457f73eaee
36 changed files with 1527 additions and 35 deletions

View File

@ -49,6 +49,9 @@ struct IslAstUser {
// The node is the innermost parallel loop.
int IsInnermostParallel;
// The node is only parallel because of reductions
bool IsReductionParallel;
};
class IslAstInfo : public ScopPass {
@ -87,7 +90,7 @@ static inline bool isInnermostParallel(__isl_keep isl_ast_node *Node) {
bool Res = false;
if (Info)
Res = Info->IsInnermostParallel;
Res = Info->IsInnermostParallel && !Info->IsReductionParallel;
isl_id_free(Id);
return Res;
}
@ -101,7 +104,7 @@ static inline bool isOutermostParallel(__isl_keep isl_ast_node *Node) {
bool Res = false;
if (Info)
Res = Info->IsOutermostParallel;
Res = Info->IsOutermostParallel && !Info->IsReductionParallel;
isl_id_free(Id);
return Res;
}

View File

@ -47,23 +47,30 @@ public:
/// @brief The type of the dependences.
///
/// Reduction dependences are seperated because they can be ignored during
/// the scheduling. This is the case since the order in which the reduction
/// statements are executed does not matter. However, if they are executed
/// in parallel we need to take additional measures (e.g., privatization)
/// to ensure a correct result.
/// Reduction dependences are separated from RAW/WAW/WAR dependences because
/// we can ignore them during the scheduling. This is the case since the order
/// in which the reduction statements are executed does not matter. However,
/// if they are executed in parallel we need to take additional measures
/// (e.g, privatization) to ensure a correct result. The (reverse) transitive
/// closure of the reduction dependences are used to check for parallel
/// executed reduction statements during code generation. These dependences
/// connect all instances of a reduction with each other, they are therefor
/// cyclic and possibly "reversed".
enum Type {
// Write after read
TYPE_WAR = 0x1,
TYPE_WAR = 1 << 0,
// Read after write
TYPE_RAW = 0x2,
TYPE_RAW = 1 << 1,
// Write after write
TYPE_WAW = 0x4,
TYPE_WAW = 1 << 2,
// Reduction dependences
TYPE_RED = 0x8,
TYPE_RED = 1 << 3,
// Transitive closure of the reduction dependences (& the reverse)
TYPE_TC_RED = 1 << 4,
};
typedef std::map<ScopStmt *, isl_map *> StatementToIslMapTy;
@ -114,6 +121,9 @@ private:
/// @brief The map of reduction dependences
isl_union_map *RED = nullptr;
/// @brief The (reverse) transitive closure of reduction dependences
isl_union_map *TC_RED = nullptr;
/// @brief Collect information about the SCoP.
void collectInfo(Scop &S, isl_union_map **Read, isl_union_map **Write,
isl_union_map **MayWrite, isl_union_map **AccessSchedule,

View File

@ -124,6 +124,15 @@ void Dependences::collectInfo(Scop &S, isl_union_map **Read,
}
}
/// @brief Fix all dimension of @p Zero to 0 and add it to @p user
static int fixSetToZero(__isl_take isl_set *Zero, void *user) {
isl_union_set **User = (isl_union_set **)user;
for (unsigned i = 0; i < isl_set_dim(Zero, isl_dim_set); i++)
Zero = isl_set_fix_si(Zero, isl_dim_set, i, 0);
*User = isl_union_set_add_set(*User, Zero);
return 0;
}
/// @brief Compute the privatization dependences for a given dependency @p Map
///
/// Privatization dependences are widened original dependences which originate
@ -164,14 +173,34 @@ void Dependences::collectInfo(Scop &S, isl_union_map **Read,
/// S1[i0] -> S2[] : i0 >= 0 and i0 <= 1023}
/// RED:
/// { S1[i0] -> S1[1 + i0] : i0 >= 0 and i0 <= 1022 }
///
/// Note: This function also computes the (reverse) transitive closure of the
/// reduction dependences.
void Dependences::addPrivatizationDependences() {
isl_union_map *PrivRAW, *PrivWAW, *PrivWAR, *TransClosure;
isl_union_map *PrivRAW, *PrivWAW, *PrivWAR;
// The transitive closure might be over approximated but we only use it to
// compute the privatization dependences. Thus, overapproximation will lead
// "only" to more conservative privatization dependences.
// FIXME: Take precautions to ensure only forward dependences are created.
TransClosure = isl_union_map_transitive_closure(isl_union_map_copy(RED), 0);
// The transitive closure might be over approximated, thus could lead to
// dependency cycles in the privatization dependences. To make sure this
// will not happen we remove all negative dependences after we computed
// the transitive closure.
TC_RED = isl_union_map_transitive_closure(isl_union_map_copy(RED), 0);
// FIXME: Apply the current schedule instead of assuming the identity schedule
// here. The current approach is only valid as long as we compute the
// dependences only with the initial (identity schedule). Any other
// schedule could change "the direction of the backward depenendes" we
// want to eliminate here.
isl_union_set *UDeltas = isl_union_map_deltas(isl_union_map_copy(TC_RED));
isl_union_set *Universe = isl_union_set_universe(isl_union_set_copy(UDeltas));
isl_union_set *Zero = isl_union_set_empty(isl_union_set_get_space(Universe));
isl_union_set_foreach_set(Universe, fixSetToZero, &Zero);
isl_union_map *NonPositive = isl_union_set_lex_le_union_set(UDeltas, Zero);
TC_RED = isl_union_map_subtract(TC_RED, NonPositive);
TC_RED = isl_union_map_union(
TC_RED, isl_union_map_reverse(isl_union_map_copy(TC_RED)));
TC_RED = isl_union_map_coalesce(TC_RED);
isl_union_map **Maps[] = {&RAW, &WAW, &WAR};
isl_union_map **PrivMaps[] = {&PrivRAW, &PrivWAW, &PrivWAR};
@ -179,15 +208,15 @@ void Dependences::addPrivatizationDependences() {
isl_union_map **Map = Maps[u], **PrivMap = PrivMaps[u];
*PrivMap = isl_union_map_apply_range(isl_union_map_copy(*Map),
isl_union_map_copy(TransClosure));
isl_union_map_copy(TC_RED));
*PrivMap = isl_union_map_union(
*PrivMap, isl_union_map_apply_range(isl_union_map_copy(TransClosure),
*PrivMap, isl_union_map_apply_range(isl_union_map_copy(TC_RED),
isl_union_map_copy(*Map)));
*Map = isl_union_map_union(*Map, *PrivMap);
}
isl_union_map_free(TransClosure);
isl_union_set_free(Universe);
}
void Dependences::calculateDependences(Scop &S) {
@ -330,6 +359,7 @@ void Dependences::calculateDependences(Scop &S) {
WAW = isl_union_map_zip(WAW);
WAR = isl_union_map_zip(WAR);
RED = isl_union_map_zip(RED);
TC_RED = isl_union_map_zip(TC_RED);
DEBUG(dbgs() << "Zipped Dependences:\n"; printScop(dbgs()); dbgs() << "\n");
@ -337,6 +367,7 @@ void Dependences::calculateDependences(Scop &S) {
WAW = isl_union_set_unwrap(isl_union_map_domain(WAW));
WAR = isl_union_set_unwrap(isl_union_map_domain(WAR));
RED = isl_union_set_unwrap(isl_union_map_domain(RED));
TC_RED = isl_union_set_unwrap(isl_union_map_domain(TC_RED));
DEBUG(dbgs() << "Unwrapped Dependences:\n"; printScop(dbgs());
dbgs() << "\n");
@ -349,6 +380,7 @@ void Dependences::calculateDependences(Scop &S) {
WAW = isl_union_map_coalesce(WAW);
WAR = isl_union_map_coalesce(WAR);
RED = isl_union_map_coalesce(RED);
TC_RED = isl_union_map_coalesce(TC_RED);
DEBUG(printScop(dbgs()));
}
@ -495,6 +527,8 @@ void Dependences::printScop(raw_ostream &OS) const {
printDependencyMap(OS, WAW);
OS << "\tReduction dependences:\n\t\t";
printDependencyMap(OS, RED);
OS << "\tTransitive closure of reduction dependences:\n\t\t";
printDependencyMap(OS, TC_RED);
}
void Dependences::releaseMemory() {
@ -502,8 +536,9 @@ void Dependences::releaseMemory() {
isl_union_map_free(WAR);
isl_union_map_free(WAW);
isl_union_map_free(RED);
isl_union_map_free(TC_RED);
RED = RAW = WAR = WAW = nullptr;
RED = RAW = WAR = WAW = TC_RED = nullptr;
}
isl_union_map *Dependences::getDependences(int Kinds) {
@ -523,6 +558,9 @@ isl_union_map *Dependences::getDependences(int Kinds) {
if (Kinds & TYPE_RED)
Deps = isl_union_map_union(Deps, isl_union_map_copy(RED));
if (Kinds & TYPE_TC_RED)
Deps = isl_union_map_union(Deps, isl_union_map_copy(TC_RED));
Deps = isl_union_map_coalesce(Deps);
Deps = isl_union_map_detect_equalities(Deps);
return Deps;

View File

@ -91,11 +91,15 @@ printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
if (Info->IsInnermostParallel) {
Printer = isl_printer_start_line(Printer);
Printer = isl_printer_print_str(Printer, "#pragma simd");
if (Info->IsReductionParallel)
Printer = isl_printer_print_str(Printer, " reduction");
Printer = isl_printer_end_line(Printer);
}
if (Info->IsOutermostParallel) {
Printer = isl_printer_start_line(Printer);
Printer = isl_printer_print_str(Printer, "#pragma omp parallel for");
if (Info->IsReductionParallel)
Printer = isl_printer_print_str(Printer, " reduction");
Printer = isl_printer_end_line(Printer);
}
}
@ -124,6 +128,7 @@ static struct IslAstUser *allocateIslAstUser() {
NodeInfo->Context = 0;
NodeInfo->IsOutermostParallel = 0;
NodeInfo->IsInnermostParallel = 0;
NodeInfo->IsReductionParallel = false;
return NodeInfo;
}
@ -148,25 +153,17 @@ static void freeIslAstUser(void *Ptr) {
// dimension if it is a subset of a map with equal values for the current
// dimension.
static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
Dependences *D) {
isl_union_map *Schedule, *Deps;
__isl_take isl_union_map *Deps) {
isl_union_map *Schedule;
isl_map *ScheduleDeps, *Test;
isl_space *ScheduleSpace;
unsigned Dimension, IsParallel;
if (!D->hasValidDependences()) {
return false;
}
Schedule = isl_ast_build_get_schedule(Build);
ScheduleSpace = isl_ast_build_get_schedule_space(Build);
Dimension = isl_space_dim(ScheduleSpace, isl_dim_out) - 1;
// FIXME: We can remove ignore reduction dependences in case we privatize the
// memory locations the reduction statements reduce into.
Deps = D->getDependences(Dependences::TYPE_RAW | Dependences::TYPE_WAW |
Dependences::TYPE_WAR | Dependences::TYPE_RED);
Deps = isl_union_map_apply_range(Deps, isl_union_map_copy(Schedule));
Deps = isl_union_map_apply_domain(Deps, Schedule);
@ -192,6 +189,35 @@ static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
return IsParallel;
}
/// @brief Check if the current scheduling dimension is parallel
///
/// In case the dimension is parallel we also check if any reduction
/// dependences is broken when we exploit this parallelism. If so,
/// @p IsReductionParallel will be set to true. The reduction dependences we use
/// to check are actually the union of the transitive closure of the initial
/// reduction dependences together with their reveresal. Even though these
/// dependences connect all iterations with each other (thus they are cyclic)
/// we can perform the parallelism check as we are only interested in a zero
/// (or non-zero) dependence distance on the dimension in question.
static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
Dependences *D,
bool &IsReductionParallel) {
if (!D->hasValidDependences())
return false;
isl_union_map *Deps = D->getDependences(
Dependences::TYPE_RAW | Dependences::TYPE_WAW | Dependences::TYPE_WAR);
if (!astScheduleDimIsParallel(Build, Deps))
return false;
isl_union_map *RedDeps =
D->getDependences(Dependences::TYPE_TC_RED);
if (!astScheduleDimIsParallel(Build, RedDeps))
IsReductionParallel = true;
return true;
}
// Mark a for node openmp parallel, if it is the outermost parallel for node.
static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
struct AstBuildUserInfo *BuildInfo,
@ -199,7 +225,8 @@ static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
if (BuildInfo->InParallelFor)
return;
if (astScheduleDimIsParallel(Build, BuildInfo->Deps)) {
if (astScheduleDimIsParallel(Build, BuildInfo->Deps,
NodeInfo->IsReductionParallel)) {
BuildInfo->InParallelFor = 1;
NodeInfo->IsOutermostParallel = 1;
}
@ -284,7 +311,8 @@ astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build,
if (Info->IsOutermostParallel)
BuildInfo->InParallelFor = 0;
if (!containsLoops(isl_ast_node_for_get_body(Node)))
if (astScheduleDimIsParallel(Build, BuildInfo->Deps))
if (astScheduleDimIsParallel(Build, BuildInfo->Deps,
Info->IsReductionParallel))
Info->IsInnermostParallel = 1;
if (!Info->Context)
Info->Context = isl_ast_build_copy(Build);

View File

@ -0,0 +1,62 @@
; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s
;
; This loopnest contains a reduction which imposes the same dependences as the
; accesses to the array A. We need to ensure we keep the dependences of A.
;
; CHECK: RAW dependences:
; CHECK: { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 >= 0 and i0 <= 1022 }
; CHECK: WAR dependences:
; CHECK: { }
; CHECK: WAW dependences:
; CHECK: { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 >= 0 and i0 <= 1022 }
; CHECK: Reduction dependences:
; CHECK: { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 <= 1022 and i0 >= 0 }
;
;
; void AandSum(int *restrict sum, int *restrict A) {
; for (int i = 0; i < 1024; i++) {
; A[i] = A[i] + A[i - 1];
; A[i - 1] = A[i] + A[i - 2];
; *sum += i;
; }
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @AandSum(i32* noalias %sum, i32* noalias %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %i.0, 1024
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%sub = add nsw i32 %i.0, -1
%arrayidx1 = getelementptr inbounds i32* %A, i32 %sub
%tmp1 = load i32* %arrayidx1, align 4
%add = add nsw i32 %tmp, %tmp1
%arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
store i32 %add, i32* %arrayidx2, align 4
%sub4 = add nsw i32 %i.0, -2
%arrayidx5 = getelementptr inbounds i32* %A, i32 %sub4
%tmp2 = load i32* %arrayidx5, align 4
%add6 = add nsw i32 %add, %tmp2
%sub7 = add nsw i32 %i.0, -1
%arrayidx8 = getelementptr inbounds i32* %A, i32 %sub7
store i32 %add6, i32* %arrayidx8, align 4
%tmp3 = load i32* %sum, align 4
%add9 = add nsw i32 %tmp3, %i.0
store i32 %add9, i32* %sum, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -9,7 +9,7 @@
; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, -1 + i1] : i0 <= 1022 and i0 >= 0 and i1 <= 511 and i1 >= 2
; CHECK-DAG: Stmt_for_body3[i0, 2] -> Stmt_for_body3[2 + i0, 0] : i0 <= 1021 and i0 >= 0
; CHECK: Reduction dependences:
; CHECK: { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 >= 0 and i0 <= 1022 }
; CHECK: { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 <= 1022 and i0 >= 0 }
;
; void f(int *sum) {
; for (int i = 0; i < 1024; i++)

View File

@ -53,7 +53,6 @@ ret:
; Note that we do not delinearize this access function because it is considered
; to already be affine: {{0,+,4}<%loop.i>,+,4096}<%loop.j>.
;
; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
; CHECK: #pragma simd
; CHECK: #pragma omp parallel for

View File

@ -0,0 +1,56 @@
; RUN: opt %loadPolly -basicaa -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; This loopnest contains a reduction which imposes the same dependences as the
; accesses to the array A. We need to ensure we do __not__ parallelize anything
; here.
;
; CHECK-NOT: pragma
; CHECK-NOT: reduction
;
; void AandSum(int *restrict sum, int *restrict A) {
; for (int i = 0; i < 1024; i++) {
; A[i] = A[i] + A[i - 1];
; A[i - 1] = A[i] + A[i - 2];
; *sum += i;
; }
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @AandSum(i32* noalias %sum, i32* noalias %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %i.0, 1024
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%sub = add nsw i32 %i.0, -1
%arrayidx1 = getelementptr inbounds i32* %A, i32 %sub
%tmp1 = load i32* %arrayidx1, align 4
%add = add nsw i32 %tmp, %tmp1
%arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
store i32 %add, i32* %arrayidx2, align 4
%sub4 = add nsw i32 %i.0, -2
%arrayidx5 = getelementptr inbounds i32* %A, i32 %sub4
%tmp2 = load i32* %arrayidx5, align 4
%add6 = add nsw i32 %add, %tmp2
%sub7 = add nsw i32 %i.0, -1
%arrayidx8 = getelementptr inbounds i32* %A, i32 %sub7
store i32 %add6, i32* %arrayidx8, align 4
%tmp3 = load i32* %sum, align 4
%add9 = add nsw i32 %tmp3, %i.0
store i32 %add9, i32* %sum, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,57 @@
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; Verify that we won't privatize anything in the outer dimension
;
; CHECK: #pragma omp parallel for
; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1)
; CHECK: #pragma simd reduction
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
; CHECK: Stmt_for_body3(c1, c3);
;
; void foo(int *A, long n) {
; for (long i = 0; i < 2 * n; i++)
; for (long j = 0; j < 1024; j++)
; A[i] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @foo(i32* %A, i32 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc4, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
%mul = shl nsw i32 %n, 1
%cmp = icmp slt i32 %i.0, %mul
br i1 %cmp, label %for.body, label %for.end6
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc4
for.inc4: ; preds = %for.end
%inc5 = add nsw i32 %i.0, 1
br label %for.cond
for.end6: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,66 @@
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; CHECK-NOT: #pragma simd{{\s*$}}
; CHECK: #pragma simd reduction
; CHECK: Stmt_S0(n - c1)
; CHECK: #pragma simd{{\s*$}}
; CHECK: Stmt_S1(n - c1)
;
; void rlr(int *A, long n) {
; for (long i = 0; i < 2 * n; i++)
; S0: A[0] += i;
; for (long i = 0; i < 2 * n; i++)
; S1: A[i + 1] = 1;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rlr(i32* %A, i32 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%mul = shl nsw i32 %n, 1
%cmp = icmp slt i32 %i.0, %mul
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
br label %S0
S0: ; preds = %for.body
%tmp = load i32* %A, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %A, align 4
br label %for.inc
for.inc: ; preds = %S0
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
br label %for.cond2
for.cond2: ; preds = %for.inc8, %for.end
%i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
%mul3 = shl nsw i32 %n, 1
%cmp4 = icmp slt i32 %i1.0, %mul3
br i1 %cmp4, label %for.body5, label %for.end10
for.body5: ; preds = %for.cond2
br label %S1
S1: ; preds = %for.body5
%add6 = add nsw i32 %i1.0, 1
%arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
store i32 1, i32* %arrayidx7, align 4
br label %for.inc8
for.inc8: ; preds = %S1
%inc9 = add nsw i32 %i1.0, 1
br label %for.cond2
for.end10: ; preds = %for.cond2
ret void
}

View File

@ -0,0 +1,69 @@
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; CHECK-NOT: #pragma simd{{\s*$}}
; CHECK: #pragma simd reduction
; CHECK: Stmt_S0(2 * n - c1)
; CHECK: #pragma simd{{\s*$}}
; CHECK: Stmt_S1
; CHECK: #pragma simd reduction
; CHECK: Stmt_S0(2 * n - c1)
; CHECK-NOT: #pragma simd{{\s*$}}
;
; void rmalrs(int *A, long n) {
; for (long i = 0; i < 2 * n; i++)
; S0: A[0] += i;
; for (long i = 0; i < 2 * n; i++)
; S1: A[i + 1] = 1;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmalrs(i32* %A, i32 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%mul = shl nsw i32 %n, 1
%cmp = icmp slt i32 %i.0, %mul
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
br label %S0
S0: ; preds = %for.body
%tmp = load i32* %A, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %A, align 4
br label %for.inc
for.inc: ; preds = %S0
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
br label %for.cond2
for.cond2: ; preds = %for.inc8, %for.end
%i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
%mul3 = shl nsw i32 %n, 1
%cmp4 = icmp slt i32 %i1.0, %mul3
br i1 %cmp4, label %for.body5, label %for.end10
for.body5: ; preds = %for.cond2
br label %S1
S1: ; preds = %for.body5
%add6 = add nsw i32 %i1.0, 1
%arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
store i32 1, i32* %arrayidx7, align 4
br label %for.inc8
for.inc8: ; preds = %S1
%inc9 = add nsw i32 %i1.0, 1
br label %for.cond2
for.end10: ; preds = %for.cond2
ret void
}

View File

@ -0,0 +1,76 @@
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; CHECK: #pragma omp parallel for reduction
; CHECK: for (int c0 = 0; c0 <= 2; c0 += 1) {
; CHECK: if (c0 == 2) {
; CHECK: #pragma simd reduction
; CHECK: for (int c1 = 1; c1 < 2 * n; c1 += 2)
; CHECK: Stmt_S0(c1);
; CHECK: } else if (c0 == 1) {
; CHECK: #pragma simd
; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1)
; CHECK: Stmt_S1(c1);
; CHECK: } else
; CHECK: #pragma simd reduction
; CHECK: for (int c1 = -2 * n + 2; c1 <= 0; c1 += 2)
; CHECK: Stmt_S0(-c1);
; CHECK: }
;
; void rmalrs2(int *A, long n) {
; for (long i = 0; i < 2 * n; i++)
; S0: A[0] += i;
; for (long i = 0; i < 2 * n; i++)
; S1: A[i + 1] = 1;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmalrs2(i32* %A, i32 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%mul = shl nsw i32 %n, 1
%cmp = icmp slt i32 %i.0, %mul
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
br label %S0
S0: ; preds = %for.body
%tmp = load i32* %A, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %A, align 4
br label %for.inc
for.inc: ; preds = %S0
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
br label %for.cond2
for.cond2: ; preds = %for.inc8, %for.end
%i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
%mul3 = shl nsw i32 %n, 1
%cmp4 = icmp slt i32 %i1.0, %mul3
br i1 %cmp4, label %for.body5, label %for.end10
for.body5: ; preds = %for.cond2
br label %S1
S1: ; preds = %for.body5
%add6 = add nsw i32 %i1.0, 1
%arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
store i32 1, i32* %arrayidx7, align 4
br label %for.inc8
for.inc8: ; preds = %S1
%inc9 = add nsw i32 %i1.0, 1
br label %for.cond2
for.end10: ; preds = %for.cond2
ret void
}

View File

@ -0,0 +1,69 @@
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; CHECK-NOT: #pragma simd{{\s*$}}
; CHECK: #pragma simd reduction
; CHECK: Stmt_S0
; CHECK: #pragma simd{{\s*$}}
; CHECK: Stmt_S1
; CHECK: #pragma simd reduction
; CHECK: Stmt_S0
; CHECK-NOT: #pragma simd{{\s*$}}
;
; void rms(int *A, long n) {
; for (long i = 0; i < 2 * n; i++)
; S0: A[0] += i;
; for (long i = 0; i < 2 * n; i++)
; S1: A[i + 1] = 1;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rms(i32* %A, i32 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%mul = shl nsw i32 %n, 1
%cmp = icmp slt i32 %i.0, %mul
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
br label %S0
S0: ; preds = %for.body
%tmp = load i32* %A, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %A, align 4
br label %for.inc
for.inc: ; preds = %S0
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
br label %for.cond2
for.cond2: ; preds = %for.inc8, %for.end
%i1.0 = phi i32 [ 0, %for.end ], [ %inc9, %for.inc8 ]
%mul3 = shl nsw i32 %n, 1
%cmp4 = icmp slt i32 %i1.0, %mul3
br i1 %cmp4, label %for.body5, label %for.end10
for.body5: ; preds = %for.cond2
br label %S1
S1: ; preds = %for.body5
%add6 = add nsw i32 %i1.0, 1
%arrayidx7 = getelementptr inbounds i32* %A, i32 %add6
store i32 1, i32* %arrayidx7, align 4
br label %for.inc8
for.inc8: ; preds = %S1
%inc9 = add nsw i32 %i1.0, 1
br label %for.cond2
for.end10: ; preds = %for.cond2
ret void
}

View File

@ -0,0 +1,63 @@
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; CHECK: #pragma omp parallel for
; CHECK: for (int c0 = 0; c0 <= 1; c0 += 1) {
; CHECK: if (c0 == 1) {
; CHECK: for (int c1 = 1; c1 < 2 * n; c1 += 2)
; CHECK: #pragma simd reduction
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
; CHECK: Stmt_for_body3(c1, c3);
; CHECK: } else
; CHECK: for (int c1 = 0; c1 < 2 * n - 1; c1 += 2)
; CHECK: #pragma simd reduction
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
; CHECK: Stmt_for_body3(c1, c3);
; CHECK: }
;
; void rmsmd(int *A, long n) {
; for (long i = 0; i < 2 * n; i++)
; for (long j = 0; j < 1024; j++)
; A[i] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmsmd(i32* %A, i32 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc4, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
%mul = shl nsw i32 %n, 1
%cmp = icmp slt i32 %i.0, %mul
br i1 %cmp, label %for.body, label %for.end6
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc4
for.inc4: ; preds = %for.end
%inc5 = add nsw i32 %i.0, 1
br label %for.cond
for.end6: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,64 @@
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; Verify that the outer dimension doesnt't carry reduction dependences
;
; CHECK-NOT:#pragma omp parallel for reduction
; CHECK: #pragma omp parallel for
; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) {
; CHECK: if (c1 % 2 == 0) {
; CHECK: #pragma simd reduction
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
; CHECK: Stmt_for_body3(c1, c3);
; CHECK: } else
; CHECK: #pragma simd reduction
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
; CHECK: Stmt_for_body3(c1, c3);
; CHECK: }
;
; void rmsmd2(int *A, long n) {
; for (long i = 0; i < 2 * n; i++)
; for (long j = 0; j < 1024; j++)
; A[i] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmsmd2(i32* %A, i32 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc4, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
%mul = shl nsw i32 %n, 1
%cmp = icmp slt i32 %i.0, %mul
br i1 %cmp, label %for.body, label %for.end6
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc4
for.inc4: ; preds = %for.end
%inc5 = add nsw i32 %i.0, 1
br label %for.cond
for.end6: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,62 @@
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; Verify that the outer dimension doesnt't carry reduction dependences
;
; CHECK-NOT:#pragma omp parallel for reduction
; CHECK: #pragma omp parallel for
; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1)
; CHECK: #pragma simd reduction
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) {
; CHECK: if (c3 % 2 == 0) {
; CHECK: Stmt_for_body3(c1, c3);
; CHECK: } else
; CHECK: Stmt_for_body3(c1, c3);
; CHECK: }
;
; void rmsmd3(int *A, long n) {
; for (long i = 0; i < 2 * n; i++)
; for (long j = 0; j < 1024; j++)
; A[i] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmsmd3(i32* %A, i32 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc4, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
%mul = shl nsw i32 %n, 1
%cmp = icmp slt i32 %i.0, %mul
br i1 %cmp, label %for.body, label %for.end6
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc4
for.inc4: ; preds = %for.end
%inc5 = add nsw i32 %i.0, 1
br label %for.cond
for.end6: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,62 @@
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; Verify that the outer dimension doesnt't carry reduction dependences
;
; CHECK-NOT:#pragma omp parallel for reduction
; CHECK: #pragma omp parallel for
; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1)
; CHECK: #pragma simd reduction
; CHECK: for (int c3 = -1022; c3 <= 1023; c3 += 1) {
; CHECK: if ((c3 + 1022) % 2 == 0 && c3 <= 0) {
; CHECK: Stmt_for_body3(c1, -c3);
; CHECK: } else if ((c3 + 1023) % 2 == 0 && c3 >= 1)
; CHECK: Stmt_for_body3(c1, c3);
; CHECK: }
;
; void rmsmd4(int *A, long n) {
; for (long i = 0; i < 2 * n; i++)
; for (long j = 0; j < 1024; j++)
; A[i] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmsmd4(i32* %A, i32 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc4, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
%mul = shl nsw i32 %n, 1
%cmp = icmp slt i32 %i.0, %mul
br i1 %cmp, label %for.body, label %for.end6
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc4
for.inc4: ; preds = %for.end
%inc5 = add nsw i32 %i.0, 1
br label %for.cond
for.end6: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,65 @@
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; Verify that only the outer dimension needs privatization
;
; CHECK: #pragma omp parallel for reduction
; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1) {
; CHECK: if (c1 % 2 == 0) {
; CHECK-NOT: #pragma simd reduction
; CHECK: #pragma simd
; CHECK: for (int c3 = 0; c3 < 2 * n; c3 += 1)
; CHECK: Stmt_for_body3(c3, c1);
; CHECK: } else
; CHECK-NOT: #pragma simd reduction
; CHECK: #pragma simd
; CHECK: for (int c3 = -2 * n + 1; c3 <= 0; c3 += 1)
; CHECK: Stmt_for_body3(-c3, c1);
; CHECK: }
;
; void rmsmd5(int *A, long n) {
; for (long i = 0; i < 2 * n; i++)
; for (long j = 0; j < 1024; j++)
; A[i] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmsmd5(i32* %A, i32 %n) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc4, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
%mul = shl nsw i32 %n, 1
%cmp = icmp slt i32 %i.0, %mul
br i1 %cmp, label %for.body, label %for.end6
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc4
for.inc4: ; preds = %for.end
%inc5 = add nsw i32 %i.0, 1
br label %for.cond
for.end6: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,72 @@
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; CHECK-NOT:#pragma omp parallel for reduction
; CHECK: #pragma omp parallel for
; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1)
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
; CHECK: #pragma simd reduction
; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1)
; CHECK: Stmt_for_body6(c1, c3, c5);
;
; void rmd(int *A) {
; for (long i = 0; i < 2048; i++)
; for (long j = 0; j < 1024; j++)
; for (long k = 0; k < 512; k++)
; A[i] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmd(i32* %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc10, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
%exitcond2 = icmp ne i32 %i.0, 2048
br i1 %exitcond2, label %for.body, label %for.end12
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc7, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
%exitcond1 = icmp ne i32 %j.0, 1024
br i1 %exitcond1, label %for.body3, label %for.end9
for.body3: ; preds = %for.cond1
br label %for.cond4
for.cond4: ; preds = %for.inc, %for.body3
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %k.0, 512
br i1 %exitcond, label %for.body6, label %for.end
for.body6: ; preds = %for.cond4
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body6
%inc = add nsw i32 %k.0, 1
br label %for.cond4
for.end: ; preds = %for.cond4
br label %for.inc7
for.inc7: ; preds = %for.end
%inc8 = add nsw i32 %j.0, 1
br label %for.cond1
for.end9: ; preds = %for.cond1
br label %for.inc10
for.inc10: ; preds = %for.end9
%inc11 = add nsw i32 %i.0, 1
br label %for.cond
for.end12: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,72 @@
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; CHECK-NOT:#pragma omp parallel for reduction
; CHECK: #pragma omp parallel for
; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1)
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
; CHECK: #pragma simd reduction
; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1)
; CHECK: Stmt_for_body6(c1, c3, c5);
;
; void rmd2(int *A) {
; for (long i = 0; i < 2048; i++)
; for (long j = 0; j < 1024; j++)
; for (long k = 0; k < 512; k++)
; A[i] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmd2(i32* %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc10, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
%exitcond2 = icmp ne i32 %i.0, 2048
br i1 %exitcond2, label %for.body, label %for.end12
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc7, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
%exitcond1 = icmp ne i32 %j.0, 1024
br i1 %exitcond1, label %for.body3, label %for.end9
for.body3: ; preds = %for.cond1
br label %for.cond4
for.cond4: ; preds = %for.inc, %for.body3
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %k.0, 512
br i1 %exitcond, label %for.body6, label %for.end
for.body6: ; preds = %for.cond4
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body6
%inc = add nsw i32 %k.0, 1
br label %for.cond4
for.end: ; preds = %for.cond4
br label %for.inc7
for.inc7: ; preds = %for.end
%inc8 = add nsw i32 %j.0, 1
br label %for.cond1
for.end9: ; preds = %for.cond1
br label %for.inc10
for.inc10: ; preds = %for.end9
%inc11 = add nsw i32 %i.0, 1
br label %for.cond
for.end12: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,72 @@
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; CHECK-NOT:#pragma omp parallel for reduction
; CHECK: #pragma omp parallel for
; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1)
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
; CHECK: #pragma simd reduction
; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1)
; CHECK: Stmt_for_body6(c1, c3, c5);
;
; void rmd3(int *A) {
; for (long i = 0; i < 2048; i++)
; for (long j = 0; j < 1024; j++)
; for (long k = 0; k < 512; k++)
; A[i] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmd3(i32* %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc10, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
%exitcond2 = icmp ne i32 %i.0, 2048
br i1 %exitcond2, label %for.body, label %for.end12
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc7, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
%exitcond1 = icmp ne i32 %j.0, 1024
br i1 %exitcond1, label %for.body3, label %for.end9
for.body3: ; preds = %for.cond1
br label %for.cond4
for.cond4: ; preds = %for.inc, %for.body3
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %k.0, 512
br i1 %exitcond, label %for.body6, label %for.end
for.body6: ; preds = %for.cond4
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body6
%inc = add nsw i32 %k.0, 1
br label %for.cond4
for.end: ; preds = %for.cond4
br label %for.inc7
for.inc7: ; preds = %for.end
%inc8 = add nsw i32 %j.0, 1
br label %for.cond1
for.end9: ; preds = %for.cond1
br label %for.inc10
for.inc10: ; preds = %for.end9
%inc11 = add nsw i32 %i.0, 1
br label %for.cond
for.end12: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,72 @@
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; CHECK-NOT:#pragma omp parallel for reduction
; CHECK: #pragma omp parallel for
; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1)
; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
; CHECK: #pragma simd reduction
; CHECK: for (int c5 = 0; c5 <= 511; c5 += 1)
; CHECK: Stmt_for_body6(c1, c3, c5);
;
; void rmd4(int *A) {
; for (long i = 0; i < 2048; i++)
; for (long j = 0; j < 1024; j++)
; for (long k = 0; k < 512; k++)
; A[i] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @rmd4(i32* %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc10, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
%exitcond2 = icmp ne i32 %i.0, 2048
br i1 %exitcond2, label %for.body, label %for.end12
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc7, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc8, %for.inc7 ]
%exitcond1 = icmp ne i32 %j.0, 1024
br i1 %exitcond1, label %for.body3, label %for.end9
for.body3: ; preds = %for.cond1
br label %for.cond4
for.cond4: ; preds = %for.inc, %for.body3
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %k.0, 512
br i1 %exitcond, label %for.body6, label %for.end
for.body6: ; preds = %for.cond4
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body6
%inc = add nsw i32 %k.0, 1
br label %for.cond4
for.end: ; preds = %for.cond4
br label %for.inc7
for.inc7: ; preds = %for.end
%inc8 = add nsw i32 %j.0, 1
br label %for.cond1
for.end9: ; preds = %for.cond1
br label %for.inc10
for.inc10: ; preds = %for.end9
%inc11 = add nsw i32 %i.0, 1
br label %for.cond
for.end12: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,32 @@
{
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
"name" : "for.cond => for.end10",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
},
{
"kind" : "write",
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
}
],
"domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
"name" : "Stmt_S0",
"schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, n - i0, 0] }"
},
{
"accesses" : [
{
"kind" : "write",
"relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
}
],
"domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
"name" : "Stmt_S1",
"schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, n - i0, 0] }"
}
]
}

View File

@ -0,0 +1,32 @@
{
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
"name" : "for.cond => for.end10",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
},
{
"kind" : "write",
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
}
],
"domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
"name" : "Stmt_S0",
"schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, -i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, i0, 0]: i0 % 2 = 1 }"
},
{
"accesses" : [
{
"kind" : "write",
"relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
}
],
"domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
"name" : "Stmt_S1",
"schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }"
}
]
}

View File

@ -0,0 +1,32 @@
{
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
"name" : "for.cond => for.end10",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
},
{
"kind" : "write",
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
}
],
"domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
"name" : "Stmt_S0",
"schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, 2 * n - i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, 2 * n - i0, 0]: i0 % 2 = 1 }"
},
{
"accesses" : [
{
"kind" : "write",
"relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
}
],
"domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
"name" : "Stmt_S1",
"schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }"
}
]
}

View File

@ -0,0 +1,21 @@
{
"context" : "{ : }",
"name" : "for.cond => for.end12",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i1, 0, i0, 0, i2, 0] }"
}
]
}

View File

@ -0,0 +1,21 @@
{
"context" : "{ : }",
"name" : "for.cond => for.end12",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i2, 0, i1, 0, i0, 0] }"
}
]
}

View File

@ -0,0 +1,21 @@
{
"context" : "{ : }",
"name" : "for.cond => for.end12",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i2, 0, i0, 0, i1, 0] }"
}
]
}

View File

@ -0,0 +1,21 @@
{
"context" : "{ : }",
"name" : "for.cond => for.end12",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 511 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] }"
}
]
}

View File

@ -0,0 +1,32 @@
{
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
"name" : "for.cond => for.end10",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
},
{
"kind" : "write",
"relation" : "[n] -> { Stmt_S0[i0] -> MemRef_A[0] }"
}
],
"domain" : "[n] -> { Stmt_S0[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
"name" : "Stmt_S0",
"schedule" : "[n] -> { Stmt_S0[i0] -> scattering[0, i0, 0]: i0 % 2 = 0; Stmt_S0[i0] -> scattering[2, i0, 0]: i0 % 2 = 1 }"
},
{
"accesses" : [
{
"kind" : "write",
"relation" : "[n] -> { Stmt_S1[i0] -> MemRef_A[1 + i0] }"
}
],
"domain" : "[n] -> { Stmt_S1[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
"name" : "Stmt_S1",
"schedule" : "[n] -> { Stmt_S1[i0] -> scattering[1, i0, 0] }"
}
]
}

View File

@ -0,0 +1,21 @@
{
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
"name" : "for.cond => for.end6",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
},
{
"kind" : "write",
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
}
],
"domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
"name" : "Stmt_for_body3",
"schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i0 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 1, i1, 0]: i0 % 2 = 1 }"
}
]
}

View File

@ -0,0 +1,21 @@
{
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
"name" : "for.cond => for.end6",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
},
{
"kind" : "write",
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
}
],
"domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
"name" : "Stmt_for_body3",
"schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 1]: i1 % 2 = 1 }"
}
]
}

View File

@ -0,0 +1,21 @@
{
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
"name" : "for.cond => for.end6",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
},
{
"kind" : "write",
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
}
],
"domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
"name" : "Stmt_for_body3",
"schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, -i1, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 1]: i1 % 2 = 1 }"
}
]
}

View File

@ -0,0 +1,21 @@
{
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
"name" : "for.cond => for.end6",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
},
{
"kind" : "write",
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
}
],
"domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
"name" : "Stmt_for_body3",
"schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i1, 0, i0, 0]: i1 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[0, i1, 1, -i0, 0]: i1 % 2 = 1 }"
}
]
}

View File

@ -0,0 +1,21 @@
{
"context" : "[n] -> { : n >= -2147483648 and n <= 2147483647 }",
"name" : "for.cond => for.end6",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
},
{
"kind" : "write",
"relation" : "[n] -> { Stmt_for_body3[i0, i1] -> MemRef_A[i0] }"
}
],
"domain" : "[n] -> { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 and i1 >= 0 and i1 <= 1023 }",
"name" : "Stmt_for_body3",
"schedule" : "[n] -> { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0]: i0 % 2 = 0; Stmt_for_body3[i0, i1] -> scattering[1, i0, 0, i1, 0]: i0 % 2 = 1 }"
}
]
}

View File

@ -0,0 +1,38 @@
; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
;
; CHECK: pragma simd reduction
; CHECK: pragma omp parallel for reduction
;
; int prod;
; void f() {
; for (int i = 0; i < 100; i++)
; prod *= i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
@prod = common global i32 0, align 4
define void @f() {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %i1.0, 100
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%add2 = add nsw i32 %i1.0, 3
%tmp1 = load i32* @prod, align 4
%mul3 = mul nsw i32 %tmp1, %add2
store i32 %mul3, i32* @prod, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i1.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}