[OPENMP 4.5] Additional codegen for statically scheduled loops with
'simd' modifier. Runtime library defines new schedule constant kmp_sch_static_balanced_chunked = 45 for static loop-based directives static with chunk adjustment (e.g., simd). Added codegen for this kind of schedule. llvm-svn: 271204
This commit is contained in:
parent
1539cd326e
commit
6cff62484a
|
@ -488,6 +488,8 @@ enum OpenMPSchedType {
|
||||||
OMP_sch_guided_chunked = 36,
|
OMP_sch_guided_chunked = 36,
|
||||||
OMP_sch_runtime = 37,
|
OMP_sch_runtime = 37,
|
||||||
OMP_sch_auto = 38,
|
OMP_sch_auto = 38,
|
||||||
|
/// static with chunk adjustment (e.g., simd)
|
||||||
|
OMP_sch_static_balanced_chunked = 45,
|
||||||
/// \brief Lower bound for 'ordered' versions.
|
/// \brief Lower bound for 'ordered' versions.
|
||||||
OMP_ord_lower = 64,
|
OMP_ord_lower = 64,
|
||||||
OMP_ord_static_chunked = 65,
|
OMP_ord_static_chunked = 65,
|
||||||
|
@ -2409,27 +2411,38 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
|
||||||
static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
|
static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
|
||||||
OpenMPScheduleClauseModifier M1,
|
OpenMPScheduleClauseModifier M1,
|
||||||
OpenMPScheduleClauseModifier M2) {
|
OpenMPScheduleClauseModifier M2) {
|
||||||
|
int Modifier = 0;
|
||||||
switch (M1) {
|
switch (M1) {
|
||||||
case OMPC_SCHEDULE_MODIFIER_monotonic:
|
case OMPC_SCHEDULE_MODIFIER_monotonic:
|
||||||
return Schedule | OMP_sch_modifier_monotonic;
|
Modifier = OMP_sch_modifier_monotonic;
|
||||||
|
break;
|
||||||
case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
|
case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
|
||||||
return Schedule | OMP_sch_modifier_nonmonotonic;
|
Modifier = OMP_sch_modifier_nonmonotonic;
|
||||||
|
break;
|
||||||
case OMPC_SCHEDULE_MODIFIER_simd:
|
case OMPC_SCHEDULE_MODIFIER_simd:
|
||||||
|
if (Schedule == OMP_sch_static_chunked)
|
||||||
|
Schedule = OMP_sch_static_balanced_chunked;
|
||||||
|
break;
|
||||||
case OMPC_SCHEDULE_MODIFIER_last:
|
case OMPC_SCHEDULE_MODIFIER_last:
|
||||||
case OMPC_SCHEDULE_MODIFIER_unknown:
|
case OMPC_SCHEDULE_MODIFIER_unknown:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
switch (M2) {
|
switch (M2) {
|
||||||
case OMPC_SCHEDULE_MODIFIER_monotonic:
|
case OMPC_SCHEDULE_MODIFIER_monotonic:
|
||||||
return Schedule | OMP_sch_modifier_monotonic;
|
Modifier = OMP_sch_modifier_monotonic;
|
||||||
|
break;
|
||||||
case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
|
case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
|
||||||
return Schedule | OMP_sch_modifier_nonmonotonic;
|
Modifier = OMP_sch_modifier_nonmonotonic;
|
||||||
|
break;
|
||||||
case OMPC_SCHEDULE_MODIFIER_simd:
|
case OMPC_SCHEDULE_MODIFIER_simd:
|
||||||
|
if (Schedule == OMP_sch_static_chunked)
|
||||||
|
Schedule = OMP_sch_static_balanced_chunked;
|
||||||
|
break;
|
||||||
case OMPC_SCHEDULE_MODIFIER_last:
|
case OMPC_SCHEDULE_MODIFIER_last:
|
||||||
case OMPC_SCHEDULE_MODIFIER_unknown:
|
case OMPC_SCHEDULE_MODIFIER_unknown:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return Schedule;
|
return Schedule | Modifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
|
void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
|
||||||
|
@ -2444,7 +2457,8 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
|
||||||
getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
|
getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
|
||||||
assert(Ordered ||
|
assert(Ordered ||
|
||||||
(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
|
(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
|
||||||
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
|
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
|
||||||
|
Schedule != OMP_sch_static_balanced_chunked));
|
||||||
// Call __kmpc_dispatch_init(
|
// Call __kmpc_dispatch_init(
|
||||||
// ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
|
// ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
|
||||||
// kmp_int[32|64] lower, kmp_int[32|64] upper,
|
// kmp_int[32|64] lower, kmp_int[32|64] upper,
|
||||||
|
@ -2476,6 +2490,7 @@ static void emitForStaticInitCall(
|
||||||
|
|
||||||
assert(!Ordered);
|
assert(!Ordered);
|
||||||
assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
|
assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
|
||||||
|
Schedule == OMP_sch_static_balanced_chunked ||
|
||||||
Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
|
Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
|
||||||
Schedule == OMP_dist_sch_static ||
|
Schedule == OMP_dist_sch_static ||
|
||||||
Schedule == OMP_dist_sch_static_chunked);
|
Schedule == OMP_dist_sch_static_chunked);
|
||||||
|
@ -2493,6 +2508,7 @@ static void emitForStaticInitCall(
|
||||||
Chunk = CGF.Builder.getIntN(IVSize, 1);
|
Chunk = CGF.Builder.getIntN(IVSize, 1);
|
||||||
} else {
|
} else {
|
||||||
assert((Schedule == OMP_sch_static_chunked ||
|
assert((Schedule == OMP_sch_static_chunked ||
|
||||||
|
Schedule == OMP_sch_static_balanced_chunked ||
|
||||||
Schedule == OMP_ord_static_chunked ||
|
Schedule == OMP_ord_static_chunked ||
|
||||||
Schedule == OMP_dist_sch_static_chunked) &&
|
Schedule == OMP_dist_sch_static_chunked) &&
|
||||||
"expected static chunked schedule");
|
"expected static chunked schedule");
|
||||||
|
|
|
@ -362,7 +362,7 @@ template <class T, unsigned K> T tfoo(T a) { return a + K; }
|
||||||
|
|
||||||
template <typename T, unsigned N>
|
template <typename T, unsigned N>
|
||||||
int templ1(T a, T *z) {
|
int templ1(T a, T *z) {
|
||||||
#pragma omp for simd collapse(N)
|
#pragma omp for simd collapse(N) schedule(simd: static, N)
|
||||||
for (int i = 0; i < N * 2; i++) {
|
for (int i = 0; i < N * 2; i++) {
|
||||||
for (long long j = 0; j < (N + N + N + N); j += 2) {
|
for (long long j = 0; j < (N + N + N + N); j += 2) {
|
||||||
z[i + j] = a + tfoo<T, N>(i + j);
|
z[i + j] = a + tfoo<T, N>(i + j);
|
||||||
|
@ -373,7 +373,7 @@ int templ1(T a, T *z) {
|
||||||
|
|
||||||
// Instatiation templ1<float,2>
|
// Instatiation templ1<float,2>
|
||||||
// CHECK-LABEL: define {{.*i32}} @{{.*}}templ1{{.*}}(float {{.+}}, float* {{.+}})
|
// CHECK-LABEL: define {{.*i32}} @{{.*}}templ1{{.*}}(float {{.+}}, float* {{.+}})
|
||||||
// CHECK: call void @__kmpc_for_static_init_8(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1)
|
// CHECK: call void @__kmpc_for_static_init_8(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 45, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 2)
|
||||||
// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
|
// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
|
||||||
// CHECK: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15
|
// CHECK: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15
|
||||||
// CHECK: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]]
|
// CHECK: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]]
|
||||||
|
@ -389,6 +389,7 @@ int templ1(T a, T *z) {
|
||||||
// CHECK: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]],
|
// CHECK: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]],
|
||||||
|
|
||||||
// ...
|
// ...
|
||||||
|
// CHECK: icmp sle i64
|
||||||
// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]
|
// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]
|
||||||
// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
|
// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
|
||||||
// CHECK-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]]
|
// CHECK-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]]
|
||||||
|
|
Loading…
Reference in New Issue