diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index bad4a8b3778a..1f5bfee41bd9 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -220,7 +220,10 @@ class CheckVarsEscapingDeclContext final "Parameter captured by value with variably modified type"); EscapedParameters.insert(VD); } - } + } else if (VD->getType()->isAnyPointerType() || + VD->getType()->isReferenceType()) + // Do not globalize variables with reference or pointer type. + return; if (VD->getType()->isVariablyModifiedType()) EscapedVariableLengthDecls.insert(VD); else @@ -602,9 +605,12 @@ static const Stmt *getSingleCompoundChild(const Stmt *Body) { } /// Check if the parallel directive has an 'if' clause with non-constant or -/// false condition. -static bool hasParallelIfClause(ASTContext &Ctx, - const OMPExecutableDirective &D) { +/// false condition. Also, check if the number of threads is strictly specified +/// and run those directives in non-SPMD mode. +static bool hasParallelIfNumThreadsClause(ASTContext &Ctx, + const OMPExecutableDirective &D) { + if (D.hasClausesOfKind()) + return true; for (const auto *C : D.getClausesOfKind()) { OpenMPDirectiveKind NameModifier = C->getNameModifier(); if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown) @@ -629,7 +635,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, switch (D.getDirectiveKind()) { case OMPD_target: if (isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NestedDir)) + !hasParallelIfNumThreadsClause(Ctx, *NestedDir)) return true; if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); @@ -639,7 +645,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NND)) + !hasParallelIfNumThreadsClause(Ctx, *NND)) return true; if (DKind == OMPD_distribute) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); @@ -651,7 +657,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); return isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NND); + !hasParallelIfNumThreadsClause(Ctx, *NND); } } } @@ -659,7 +665,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, return false; case OMPD_target_teams: if (isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NestedDir)) + !hasParallelIfNumThreadsClause(Ctx, *NestedDir)) return true; if (DKind == OMPD_distribute) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); @@ -669,13 +675,13 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); return isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NND); + !hasParallelIfNumThreadsClause(Ctx, *NND); } } return false; case OMPD_target_teams_distribute: return isOpenMPParallelDirective(DKind) && - !hasParallelIfClause(Ctx, *NestedDir); + !hasParallelIfNumThreadsClause(Ctx, *NestedDir); case OMPD_target_simd: case OMPD_target_parallel: case OMPD_target_parallel_for: @@ -746,7 +752,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: - return !hasParallelIfClause(Ctx, D); + return !hasParallelIfNumThreadsClause(Ctx, D); case OMPD_target_simd: case OMPD_target_teams_distribute_simd: return false; @@ -967,7 +973,6 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( CGF.EmitBlock(ExecuteBB); IsInTargetMasterThreadRegion = true; - emitGenericVarsProlog(CGF, D.getLocStart()); } void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF, @@ -976,8 +981,6 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - emitGenericVarsEpilog(CGF); - if (!EST.ExitBB) EST.ExitBB = CGF.createBasicBlock(".exit"); @@ -1464,8 +1467,7 @@ void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) { // Do nothing in case of Spmd mode and L0 parallel. - if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD && - IsInTargetMasterThreadRegion) + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) return; CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc); @@ -1475,8 +1477,7 @@ void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) { // Do nothing in case of Spmd mode and L0 parallel. - if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD && - IsInTargetMasterThreadRegion) + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) return; CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc); @@ -1887,8 +1888,6 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( // Just call the outlined function to execute the parallel region. // OutlinedFn(>id, &zero, CapturedStruct); // - // TODO: Do something with IfCond when support for the 'if' clause - // is added on Spmd target directives. llvm::SmallVector OutlinedFnArgs; Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 718c650bec6e..0a8e56106ed7 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -18,7 +18,7 @@ __thread int id; -int baz(int f); +int baz(int f, double &a); template struct TT{ @@ -345,7 +345,7 @@ struct S1 { { this->a = (double)b + 1.5; c[1][1] = ++a; - baz(a); + baz(a, a); } return c[1][1] + (int)b; @@ -367,9 +367,9 @@ int bar(int n){ return a; } -int baz(int f) { +int baz(int f, double &a) { #pragma omp parallel - f = 2; + f = 2 + a; return f; } @@ -551,7 +551,7 @@ int baz(int f) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define i32 [[BAZ]](i32 [[F:%.*]]) + // CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}}) // CHECK: [[ZERO_ADDR:%.+]] = alloca i32, // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* // CHECK: [[GTID_ADDR:%.+]] = alloca i32, @@ -559,13 +559,13 @@ int baz(int f) { // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0) // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty* // CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0 - // CHECK: store i32 [[F]], i32* [[F_PTR]], + // CHECK: store i32 %{{.+}}, i32* [[F_PTR]], // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR]], // CHECK: icmp eq i32 // CHECK: br i1 // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) - // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 1) + // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 2) // CHECK: [[SHARED:%.+]] = load i8**, i8*** [[SHARED_PTR]], // CHECK: [[REF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED]], i{{64|32}} 0 // CHECK: [[F_REF:%.+]] = bitcast i32* [[F_PTR]] to i8* @@ -580,11 +580,11 @@ int baz(int f) { // CHECK: br i1 // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]]) - // CHECK: call void @__omp_outlined__(i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]]) + // CHECK: call void [[OUTLINED:@.+]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}}) // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]]) // CHECK: br label - // CHECK: call void @__omp_outlined__(i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]]) + // CHECK: call void [[OUTLINED]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}}) // CHECK: br label // CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]], diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp index 64d195c43a26..d193174ce7d9 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -55,6 +55,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}( // CHECK: [[AA_ADDR:%.+]] = alloca i16*, align + // CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index 73d3bf821344..13a7fb289dad 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -8,9 +8,9 @@ #ifndef HEADER #define HEADER -// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l21}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 0 +// Check that the execution mode of all 2 target regions on the gpu is set to non-SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l21}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 1 template tx ftemplate(int n) { @@ -46,23 +46,13 @@ int bar(int n){ // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], - // CHECK: br label {{%?}}[[EXEC:.+]] - // - // CHECK: [[EXEC]] - // CHECK-NOT: call void @__kmpc_push_num_threads - // CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]]) - // CHECK: br label {{%?}}[[DONE:.+]] - // - // CHECK: [[DONE]] - // CHECK: call void @__kmpc_spmd_kernel_deinit() - // CHECK: br label {{%?}}[[EXIT:.+]] - // - // CHECK: [[EXIT]] + // CHECK: call void @__kmpc_kernel_init(i32 + // CHECK: call void @__kmpc_push_num_threads + // CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void // CHECK: } - // CHECK: define internal void [[OP1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i16* {{[^%]*}}[[ARG:%.+]]) + // CHECK: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* {{[^%]*}}[[ARG:%.+]]) // CHECK: = alloca i32*, align // CHECK: = alloca i32*, align // CHECK: [[AA_ADDR:%.+]] = alloca i16*, align @@ -89,23 +79,13 @@ int bar(int n){ // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], - // CHECK: br label {{%?}}[[EXEC:.+]] - // - // CHECK: [[EXEC]] - // CHECK-NOT: call void @__kmpc_push_num_threads - // CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]]) - // CHECK: br label {{%?}}[[DONE:.+]] - // - // CHECK: [[DONE]] - // CHECK: call void @__kmpc_spmd_kernel_deinit() - // CHECK: br label {{%?}}[[EXIT:.+]] - // - // CHECK: [[EXIT]] + // CHECK: call void @__kmpc_kernel_init(i32 + // CHECK: call void @__kmpc_push_num_threads + // CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void // CHECK: } - // CHECK: define internal void [[OP2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* {{[^%]*}}[[ARG1:%.+]], i16* {{[^%]*}}[[ARG2:%.+]], [10 x i32]* {{[^%]*}}[[ARG3:%.+]]) + // CHECK: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{[^%]*}}[[ARG1:%.+]], i16* {{[^%]*}}[[ARG2:%.+]], [10 x i32]* {{[^%]*}}[[ARG3:%.+]]) // CHECK: = alloca i32*, align // CHECK: = alloca i32*, align // CHECK: [[A_ADDR:%.+]] = alloca i32*, align diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index 66a3d46955a1..195f428e0fbb 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -45,7 +45,7 @@ tx ftemplate(int n) { b[i] += 1; } -#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k) num_threads(M) +#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k) for(int i = 0; i < M; i++) { for(int j = 0; j < M; j++) { k = M; diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index c508bc912fdb..051ccfe9c403 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -43,7 +43,7 @@ tx ftemplate(int n) { b[i] += 1; } -#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k) num_threads(M) +#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k) for(int i = 0; i < M; i++) { for(int j = 0; j < M; j++) { k = M;