diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e7072ece..cc6fdfe06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -678,15 +678,6 @@ ELSE(QMC_CUDA) MESSAGE(STATUS "Disabling CUDA") ENDIF(QMC_CUDA) -SET(USE_NVTX 0 CACHE BOOL "Enable/disable NVTX regions in CUDA code.") -IF(USE_NVTX) - IF(HAVE_CUDA) - ADD_DEFINITIONS(-DUSE_NVTX) - LINK_DIRECTORIES("${CUDA_TOOLKIT_ROOT_DIR}/lib64") - LINK_LIBRARIES("nvToolsExt") - ENDIF(HAVE_CUDA) -ENDIF(USE_NVTX) - #INCLUDE(${PROJECT_CMAKE}/FindPkgConfig.cmake) ################################################################## @@ -790,6 +781,14 @@ IF (USE_VTUNE_API) LINK_LIBRARIES("${VTUNE_ITTNOTIFY_LIBRARY}") ENDIF() +SET(USE_NVTX 0 CACHE BOOL "Enable/disable NVTX regions in CUDA code.") +IF(USE_NVTX_API) + IF(HAVE_CUDA) + LINK_DIRECTORIES("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + LINK_LIBRARIES("nvToolsExt") + ENDIF(HAVE_CUDA) +ENDIF(USE_NVTX_API) + #include(ExternalProject) # set(einspline_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/einspline") diff --git a/src/QMCDrivers/DMC/DMC_CUDA.cpp b/src/QMCDrivers/DMC/DMC_CUDA.cpp index fb7a1c09b..e8a0e1f29 100644 --- a/src/QMCDrivers/DMC/DMC_CUDA.cpp +++ b/src/QMCDrivers/DMC/DMC_CUDA.cpp @@ -26,7 +26,7 @@ #include "Utilities/RunTimeManager.h" #include "Message/CommOperators.h" #include "type_traits/scalar_traits.h" -#ifdef USE_NVTX +#ifdef USE_NVTX_API #include #endif @@ -78,7 +78,7 @@ void DMCcuda::checkBounds (std::vector &newpos, bool DMCcuda::run() { -#ifdef USE_NVTX +#ifdef USE_NVTX_API nvtxRangePushA("DMC:run"); #endif bool scaleweight = ScaleWeight == "yes"; @@ -330,7 +330,7 @@ bool DMCcuda::run() } } while(block #endif @@ -129,7 +129,7 @@ bool VMCcuda::run() { if (UseDrift == "yes") return runWithDrift(); -#ifdef USE_NVTX +#ifdef USE_NVTX_API nvtxRangePushA("VMC:run"); #endif resetRun(); @@ -248,7 +248,7 @@ bool VMCcuda::run() std::cerr << "At the end of VMC" << std::endl; gpu::cuda_memory_manager.report(); } -#ifdef USE_NVTX +#ifdef USE_NVTX_API nvtxRangePop(); #endif return finalize(block); @@ -340,7 +340,7 @@ void VMCcuda::advanceWalkersWithDrift() bool VMCcuda::runWithDrift() { -#ifdef USE_NVTX +#ifdef USE_NVTX_API nvtxRangePushA("VMC:runWithDrift"); #endif resetRun(); @@ -439,7 +439,7 @@ bool VMCcuda::runWithDrift() std::cerr << "At the end of VMC with drift" << std::endl; gpu::cuda_memory_manager.report(); } -#ifdef USE_NVTX +#ifdef USE_NVTX_API nvtxRangePop(); #endif return finalize(block); diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in index 8bc60174b..98f56af50 100644 --- a/src/config.h.cmake.in +++ b/src/config.h.cmake.in @@ -194,9 +194,6 @@ /* Use SOA version of AA distance table */ #cmakedefine ENABLE_SOA @ENABLE_SOA@ -/* Enable NVTX regions in CUDA code. */ -#cmakedefine USE_NVTX @USE_NVTX@ - #if (__cplusplus >= 201103L) #if defined(__INTEL_COMPILER) #if defined(__KNC__) || defined(__AVX512F__) @@ -248,5 +245,8 @@ /* Use VTune Task API with timers */ #cmakedefine USE_VTUNE_TASKS @USE_VTUNE_TASKS@ +/* Enable NVTX regions in CUDA code. */ +#cmakedefine USE_NVTX_API @USE_NVTX_API@ + #endif // QMCPLUSPLUS_CONFIGURATION_H