[OPENMP 4.5] Additional codegen for statically scheduled loops with

'simd' modifier.

Runtime library defines new schedule constant kmp_sch_static_balanced_chunked = 45 for static loop-based directives  static with chunk adjustment (e.g., simd). Added codegen for this kind of schedule.

llvm-svn: 271204
This commit is contained in:
Alexey Bataev 2016-05-30 13:05:14 +00:00
parent 1539cd326e
commit 6cff62484a
2 changed files with 25 additions and 8 deletions

View File

@ -488,6 +488,8 @@ enum OpenMPSchedType {
OMP_sch_guided_chunked = 36,
OMP_sch_runtime = 37,
OMP_sch_auto = 38,
/// static with chunk adjustment (e.g., simd)
OMP_sch_static_balanced_chunked = 45,
/// \brief Lower bound for 'ordered' versions.
OMP_ord_lower = 64,
OMP_ord_static_chunked = 65,
@ -2409,27 +2411,38 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
OpenMPScheduleClauseModifier M1,
OpenMPScheduleClauseModifier M2) {
int Modifier = 0;
switch (M1) {
case OMPC_SCHEDULE_MODIFIER_monotonic:
return Schedule | OMP_sch_modifier_monotonic;
Modifier = OMP_sch_modifier_monotonic;
break;
case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
return Schedule | OMP_sch_modifier_nonmonotonic;
Modifier = OMP_sch_modifier_nonmonotonic;
break;
case OMPC_SCHEDULE_MODIFIER_simd:
if (Schedule == OMP_sch_static_chunked)
Schedule = OMP_sch_static_balanced_chunked;
break;
case OMPC_SCHEDULE_MODIFIER_last:
case OMPC_SCHEDULE_MODIFIER_unknown:
break;
}
switch (M2) {
case OMPC_SCHEDULE_MODIFIER_monotonic:
return Schedule | OMP_sch_modifier_monotonic;
Modifier = OMP_sch_modifier_monotonic;
break;
case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
return Schedule | OMP_sch_modifier_nonmonotonic;
Modifier = OMP_sch_modifier_nonmonotonic;
break;
case OMPC_SCHEDULE_MODIFIER_simd:
if (Schedule == OMP_sch_static_chunked)
Schedule = OMP_sch_static_balanced_chunked;
break;
case OMPC_SCHEDULE_MODIFIER_last:
case OMPC_SCHEDULE_MODIFIER_unknown:
break;
}
return Schedule;
return Schedule | Modifier;
}
void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
@ -2444,7 +2457,8 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
assert(Ordered ||
(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
Schedule != OMP_sch_static_balanced_chunked));
// Call __kmpc_dispatch_init(
// ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
// kmp_int[32|64] lower, kmp_int[32|64] upper,
@ -2476,6 +2490,7 @@ static void emitForStaticInitCall(
assert(!Ordered);
assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
Schedule == OMP_sch_static_balanced_chunked ||
Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
Schedule == OMP_dist_sch_static ||
Schedule == OMP_dist_sch_static_chunked);
@ -2493,6 +2508,7 @@ static void emitForStaticInitCall(
Chunk = CGF.Builder.getIntN(IVSize, 1);
} else {
assert((Schedule == OMP_sch_static_chunked ||
Schedule == OMP_sch_static_balanced_chunked ||
Schedule == OMP_ord_static_chunked ||
Schedule == OMP_dist_sch_static_chunked) &&
"expected static chunked schedule");

View File

@ -362,7 +362,7 @@ template <class T, unsigned K> T tfoo(T a) { return a + K; }
template <typename T, unsigned N>
int templ1(T a, T *z) {
#pragma omp for simd collapse(N)
#pragma omp for simd collapse(N) schedule(simd: static, N)
for (int i = 0; i < N * 2; i++) {
for (long long j = 0; j < (N + N + N + N); j += 2) {
z[i + j] = a + tfoo<T, N>(i + j);
@ -373,7 +373,7 @@ int templ1(T a, T *z) {
// Instatiation templ1<float,2>
// CHECK-LABEL: define {{.*i32}} @{{.*}}templ1{{.*}}(float {{.+}}, float* {{.+}})
// CHECK: call void @__kmpc_for_static_init_8(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1)
// CHECK: call void @__kmpc_for_static_init_8(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 45, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 2)
// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
// CHECK: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15
// CHECK: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]]
@ -389,6 +389,7 @@ int templ1(T a, T *z) {
// CHECK: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]],
// ...
// CHECK: icmp sle i64
// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]
// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
// CHECK-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]]