Introduce ENABLE_OMP_TASKLOOP for NVHPC 21.3.

This commit is contained in:
Ye Luo 2021-04-25 23:41:04 -05:00
parent 2271e10df2
commit 8ca8b6ca3d
4 changed files with 18 additions and 4 deletions

View File

@ -1,3 +1,7 @@
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 21.3 AND ENABLE_OMP_TASKLOOP)
message(FATAL_ERROR "NVIDIA HPC compilers (>21.3) refuse OpenMP taskloop constructs. "
"Set -DENABLE_OMP_TASKLOOP=OFF to turn off all the use of taskloop.")
endif()
# Set the std
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -c99")

View File

@ -182,6 +182,7 @@ OPTION(ENABLE_CUDA "Build with the second generation of GPU support through CUDA
OPTION(ENABLE_HIP "Build with with GPU support through HIP" OFF)
OPTION(ENABLE_ROCM "Build with with GPU support through ROCM" OFF)
OPTION(ENABLE_OFFLOAD "Enable OpenMP offload" OFF)
OPTION(ENABLE_OMP_TASKLOOP "Enable OpenMP taskloop" ON)
# Use CMake object library targets to workaround clang linker not being able to handle fat
# binary archives which contain both host and device codes, for example OpenMP offload regions.
# CMake does not propagate indirect object files by design.

View File

@ -594,7 +594,7 @@ TrialWaveFunction::ValueType TrialWaveFunction::calcRatioGrad(ParticleSet& P, in
{
std::vector<GradType> grad_components(Z.size(), GradType(0.0));
std::vector<PsiValueType> ratio_components(Z.size(), 0.0);
#pragma omp taskloop default(shared)
PRAGMA_OMP_TASKLOOP("omp taskloop default(shared)")
for (int i = 0; i < Z.size(); ++i)
{
ScopedTimer z_timer(WFC_timers_[VGL_TIMER + TIMER_SKIP * i]);
@ -659,7 +659,7 @@ void TrialWaveFunction::mw_calcRatioGrad(const RefVectorWithLeader<TrialWaveFunc
{
std::vector<std::vector<PsiValueType>> ratios_components(num_wfc, std::vector<PsiValueType>(wf_list.size()));
std::vector<std::vector<GradType>> grads_components(num_wfc, std::vector<GradType>(wf_list.size()));
#pragma omp taskloop default(shared)
PRAGMA_OMP_TASKLOOP("omp taskloop default(shared)")
for (int i = 0; i < num_wfc; ++i)
{
ScopedTimer z_timer(wf_leader.WFC_timers_[VGL_TIMER + TIMER_SKIP * i]);
@ -726,7 +726,7 @@ void TrialWaveFunction::rejectMove(int iat)
void TrialWaveFunction::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
{
ScopedTimer local_timer(TWF_timers_[ACCEPT_TIMER]);
#pragma omp taskloop default(shared) if (use_tasking_)
PRAGMA_OMP_TASKLOOP("omp taskloop default(shared) if (use_tasking_)")
for (int i = 0; i < Z.size(); i++)
{
ScopedTimer z_timer(WFC_timers_[ACCEPT_TIMER + TIMER_SKIP * i]);
@ -757,7 +757,7 @@ void TrialWaveFunction::mw_accept_rejectMove(const RefVectorWithLeader<TrialWave
wf_list[iw].PhaseValue = 0;
}
#pragma omp taskloop default(shared) if (wf_leader.use_tasking_)
PRAGMA_OMP_TASKLOOP("omp taskloop default(shared) if (wf_leader.use_tasking_)")
for (int i = 0; i < num_wfc; i++)
{
ScopedTimer z_timer(wf_leader.WFC_timers_[ACCEPT_TIMER + TIMER_SKIP * i]);

View File

@ -44,6 +44,15 @@
#define PRAGMA_OFFLOAD(x)
#endif
/* Enable OpenMP taskloop. */
#cmakedefine ENABLE_OMP_TASKLOOP @ENABLE_OMP_TASKLOOP@
#ifdef ENABLE_OMP_TASKLOOP
#define PRAGMA_OMP_TASKLOOP(x) _Pragma(x)
#else
#define PRAGMA_OMP_TASKLOOP(x)
#endif
/* Define to 1 if you have the `blitz' library */
#cmakedefine HAVE_LIBBLITZ @HAVE_LIBBLITZ@