mirror of https://github.com/QMCPACK/qmcpack.git
Introduce ENABLE_OMP_TASKLOOP for NVHPC 21.3.
This commit is contained in:
parent
2271e10df2
commit
8ca8b6ca3d
|
@ -1,3 +1,7 @@
|
|||
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 21.3 AND ENABLE_OMP_TASKLOOP)
|
||||
message(FATAL_ERROR "NVIDIA HPC compilers (>21.3) refuse OpenMP taskloop constructs. "
|
||||
"Set -DENABLE_OMP_TASKLOOP=OFF to turn off all the use of taskloop.")
|
||||
endif()
|
||||
|
||||
# Set the std
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -c99")
|
||||
|
|
|
@ -182,6 +182,7 @@ OPTION(ENABLE_CUDA "Build with the second generation of GPU support through CUDA
|
|||
OPTION(ENABLE_HIP "Build with with GPU support through HIP" OFF)
|
||||
OPTION(ENABLE_ROCM "Build with with GPU support through ROCM" OFF)
|
||||
OPTION(ENABLE_OFFLOAD "Enable OpenMP offload" OFF)
|
||||
OPTION(ENABLE_OMP_TASKLOOP "Enable OpenMP taskloop" ON)
|
||||
# Use CMake object library targets to workaround clang linker not being able to handle fat
|
||||
# binary archives which contain both host and device codes, for example OpenMP offload regions.
|
||||
# CMake does not propagate indirect object files by design.
|
||||
|
|
|
@ -594,7 +594,7 @@ TrialWaveFunction::ValueType TrialWaveFunction::calcRatioGrad(ParticleSet& P, in
|
|||
{
|
||||
std::vector<GradType> grad_components(Z.size(), GradType(0.0));
|
||||
std::vector<PsiValueType> ratio_components(Z.size(), 0.0);
|
||||
#pragma omp taskloop default(shared)
|
||||
PRAGMA_OMP_TASKLOOP("omp taskloop default(shared)")
|
||||
for (int i = 0; i < Z.size(); ++i)
|
||||
{
|
||||
ScopedTimer z_timer(WFC_timers_[VGL_TIMER + TIMER_SKIP * i]);
|
||||
|
@ -659,7 +659,7 @@ void TrialWaveFunction::mw_calcRatioGrad(const RefVectorWithLeader<TrialWaveFunc
|
|||
{
|
||||
std::vector<std::vector<PsiValueType>> ratios_components(num_wfc, std::vector<PsiValueType>(wf_list.size()));
|
||||
std::vector<std::vector<GradType>> grads_components(num_wfc, std::vector<GradType>(wf_list.size()));
|
||||
#pragma omp taskloop default(shared)
|
||||
PRAGMA_OMP_TASKLOOP("omp taskloop default(shared)")
|
||||
for (int i = 0; i < num_wfc; ++i)
|
||||
{
|
||||
ScopedTimer z_timer(wf_leader.WFC_timers_[VGL_TIMER + TIMER_SKIP * i]);
|
||||
|
@ -726,7 +726,7 @@ void TrialWaveFunction::rejectMove(int iat)
|
|||
void TrialWaveFunction::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
|
||||
{
|
||||
ScopedTimer local_timer(TWF_timers_[ACCEPT_TIMER]);
|
||||
#pragma omp taskloop default(shared) if (use_tasking_)
|
||||
PRAGMA_OMP_TASKLOOP("omp taskloop default(shared) if (use_tasking_)")
|
||||
for (int i = 0; i < Z.size(); i++)
|
||||
{
|
||||
ScopedTimer z_timer(WFC_timers_[ACCEPT_TIMER + TIMER_SKIP * i]);
|
||||
|
@ -757,7 +757,7 @@ void TrialWaveFunction::mw_accept_rejectMove(const RefVectorWithLeader<TrialWave
|
|||
wf_list[iw].PhaseValue = 0;
|
||||
}
|
||||
|
||||
#pragma omp taskloop default(shared) if (wf_leader.use_tasking_)
|
||||
PRAGMA_OMP_TASKLOOP("omp taskloop default(shared) if (wf_leader.use_tasking_)")
|
||||
for (int i = 0; i < num_wfc; i++)
|
||||
{
|
||||
ScopedTimer z_timer(wf_leader.WFC_timers_[ACCEPT_TIMER + TIMER_SKIP * i]);
|
||||
|
|
|
@ -44,6 +44,15 @@
|
|||
#define PRAGMA_OFFLOAD(x)
|
||||
#endif
|
||||
|
||||
/* Enable OpenMP taskloop. */
|
||||
#cmakedefine ENABLE_OMP_TASKLOOP @ENABLE_OMP_TASKLOOP@
|
||||
|
||||
#ifdef ENABLE_OMP_TASKLOOP
|
||||
#define PRAGMA_OMP_TASKLOOP(x) _Pragma(x)
|
||||
#else
|
||||
#define PRAGMA_OMP_TASKLOOP(x)
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `blitz' library */
|
||||
#cmakedefine HAVE_LIBBLITZ @HAVE_LIBBLITZ@
|
||||
|
||||
|
|
Loading…
Reference in New Issue