mirror of https://github.com/QMCPACK/qmcpack.git
Merge pull request #912 from jefflarkin/addnvtx
Add option for building with NVTX in the CUDA code
This commit is contained in:
commit
cc13309a54
|
@ -688,6 +688,21 @@ ELSE(QMC_CUDA)
|
|||
MESSAGE(STATUS "Disabling CUDA")
|
||||
ENDIF(QMC_CUDA)
|
||||
|
||||
SET(USE_NVTX_API 0 CACHE BOOL "Enable/disable NVTX regions in CUDA code.")
|
||||
IF(USE_NVTX_API)
|
||||
IF(HAVE_CUDA)
|
||||
FIND_LIBRARY(NVTX_API_LIB
|
||||
NAME nvToolsExt
|
||||
HINTS ${CUDA_TOOLKIT_ROOT_DIR}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
IF(NOT NVTX_API_LIB)
|
||||
MESSAGE(FATAL_ERROR "USE_NVTX_API set but NVTX_API_LIB not found")
|
||||
ENDIF(NOT NVTX_API_LIB)
|
||||
MESSAGE("CUDA nvToolsExt library: ${NVTX_API_LIB}")
|
||||
LINK_LIBRARIES(${NVTX_API_LIB})
|
||||
ENDIF(HAVE_CUDA)
|
||||
ENDIF(USE_NVTX_API)
|
||||
|
||||
#INCLUDE(${PROJECT_CMAKE}/FindPkgConfig.cmake)
|
||||
##################################################################
|
||||
# TODO:use profile tools
|
||||
|
@ -790,7 +805,6 @@ IF (USE_VTUNE_API)
|
|||
LINK_LIBRARIES("${VTUNE_ITTNOTIFY_LIBRARY}")
|
||||
ENDIF()
|
||||
|
||||
|
||||
#include(ExternalProject)
|
||||
# set(einspline_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/einspline")
|
||||
# set(einspline_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/einspline")
|
||||
|
|
|
@ -19,6 +19,16 @@ An example of options to be passed to CMake
|
|||
-DCMAKE_LIBRARY_PATH=/opt/intel/vtune_amplifier_xe/lib64
|
||||
\end{shade}
|
||||
|
||||
\section{NVIDIA Tools Extensions (NVTX)}
|
||||
|
||||
NVIDIA's Tools Extensions (NVTX) API enables programmers to annotate their source code when used with the NVIDIA profilers.
|
||||
|
||||
\subsection{NVTX API}
|
||||
|
||||
If the variable \texttt{USE\_NVTX\_API} is set, QMCPACK will add the library (\texttt{libnvToolsExt.so}) to the qmcpack target. To add NVTX annotations
|
||||
to a function, it is necessary to include the \texttt{nvToolsExt.h} header file and then make the appropriate calls into the NVTX API. For more information
|
||||
about the NVTX API, see \url{https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx}. Any additional calls to the NVTX API should be guarded by
|
||||
the \texttt{USE\_NVTX\_API} compiler define.
|
||||
|
||||
\subsection{Timers as Tasks}
|
||||
To aid in connecting the timers in the code to the profile data, the start/stop of
|
||||
|
|
|
@ -26,6 +26,9 @@
|
|||
#include "Utilities/RunTimeManager.h"
|
||||
#include "Message/CommOperators.h"
|
||||
#include "type_traits/scalar_traits.h"
|
||||
#ifdef USE_NVTX_API
|
||||
#include <nvToolsExt.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace qmcplusplus
|
||||
|
@ -75,6 +78,9 @@ void DMCcuda::checkBounds (std::vector<PosType> &newpos,
|
|||
|
||||
bool DMCcuda::run()
|
||||
{
|
||||
#ifdef USE_NVTX_API
|
||||
nvtxRangePushA("DMC:run");
|
||||
#endif
|
||||
bool scaleweight = ScaleWeight == "yes";
|
||||
if (scaleweight)
|
||||
app_log() << " Scaling weight per Umrigar/Nightingale.\n";
|
||||
|
@ -324,6 +330,9 @@ bool DMCcuda::run()
|
|||
}
|
||||
}
|
||||
while(block<nBlocks && enough_time_for_next_iteration);
|
||||
#ifdef USE_NVTX_API
|
||||
nvtxRangePop();
|
||||
#endif
|
||||
//finalize a qmc section
|
||||
return finalize(block);
|
||||
}
|
||||
|
|
|
@ -25,6 +25,9 @@
|
|||
#include "type_traits/scalar_traits.h"
|
||||
#include "Utilities/RunTimeManager.h"
|
||||
#include "qmc_common.h"
|
||||
#ifdef USE_NVTX_API
|
||||
#include <nvToolsExt.h>
|
||||
#endif
|
||||
|
||||
namespace qmcplusplus
|
||||
{
|
||||
|
@ -126,6 +129,9 @@ bool VMCcuda::run()
|
|||
{
|
||||
if (UseDrift == "yes")
|
||||
return runWithDrift();
|
||||
#ifdef USE_NVTX_API
|
||||
nvtxRangePushA("VMC:run");
|
||||
#endif
|
||||
resetRun();
|
||||
IndexType block = 0;
|
||||
IndexType nAcceptTot = 0;
|
||||
|
@ -242,6 +248,9 @@ bool VMCcuda::run()
|
|||
std::cerr << "At the end of VMC" << std::endl;
|
||||
gpu::cuda_memory_manager.report();
|
||||
}
|
||||
#ifdef USE_NVTX_API
|
||||
nvtxRangePop();
|
||||
#endif
|
||||
return finalize(block);
|
||||
}
|
||||
|
||||
|
@ -331,6 +340,9 @@ void VMCcuda::advanceWalkersWithDrift()
|
|||
|
||||
bool VMCcuda::runWithDrift()
|
||||
{
|
||||
#ifdef USE_NVTX_API
|
||||
nvtxRangePushA("VMC:runWithDrift");
|
||||
#endif
|
||||
resetRun();
|
||||
IndexType block = 0;
|
||||
IndexType nAcceptTot = 0;
|
||||
|
@ -427,6 +439,9 @@ bool VMCcuda::runWithDrift()
|
|||
std::cerr << "At the end of VMC with drift" << std::endl;
|
||||
gpu::cuda_memory_manager.report();
|
||||
}
|
||||
#ifdef USE_NVTX_API
|
||||
nvtxRangePop();
|
||||
#endif
|
||||
return finalize(block);
|
||||
}
|
||||
|
||||
|
|
|
@ -245,5 +245,8 @@
|
|||
/* Use VTune Task API with timers */
|
||||
#cmakedefine USE_VTUNE_TASKS @USE_VTUNE_TASKS@
|
||||
|
||||
/* Enable NVTX regions in CUDA code. */
|
||||
#cmakedefine USE_NVTX_API @USE_NVTX_API@
|
||||
|
||||
#endif // QMCPLUSPLUS_CONFIGURATION_H
|
||||
|
||||
|
|
Loading…
Reference in New Issue