Merge pull request #912 from jefflarkin/addnvtx

Add option for building with NVTX in the CUDA code
This commit is contained in:
Ye Luo 2018-07-06 12:10:53 -05:00 committed by GitHub
commit cc13309a54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 52 additions and 1 deletions

View File

@ -688,6 +688,21 @@ ELSE(QMC_CUDA)
MESSAGE(STATUS "Disabling CUDA")
ENDIF(QMC_CUDA)
SET(USE_NVTX_API 0 CACHE BOOL "Enable/disable NVTX regions in CUDA code.")
IF(USE_NVTX_API)
IF(HAVE_CUDA)
FIND_LIBRARY(NVTX_API_LIB
NAME nvToolsExt
HINTS ${CUDA_TOOLKIT_ROOT_DIR}
PATH_SUFFIXES lib lib64)
IF(NOT NVTX_API_LIB)
MESSAGE(FATAL_ERROR "USE_NVTX_API set but NVTX_API_LIB not found")
ENDIF(NOT NVTX_API_LIB)
MESSAGE("CUDA nvToolsExt library: ${NVTX_API_LIB}")
LINK_LIBRARIES(${NVTX_API_LIB})
ENDIF(HAVE_CUDA)
ENDIF(USE_NVTX_API)
#INCLUDE(${PROJECT_CMAKE}/FindPkgConfig.cmake)
##################################################################
# TODO:use profile tools
@ -790,7 +805,6 @@ IF (USE_VTUNE_API)
LINK_LIBRARIES("${VTUNE_ITTNOTIFY_LIBRARY}")
ENDIF()
#include(ExternalProject)
# set(einspline_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/einspline")
# set(einspline_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/einspline")

View File

@ -19,6 +19,16 @@ An example of options to be passed to CMake
-DCMAKE_LIBRARY_PATH=/opt/intel/vtune_amplifier_xe/lib64
\end{shade}
\section{NVIDIA Tools Extensions (NVTX)}
NVIDIA's Tools Extensions (NVTX) API enables programmers to annotate their source code when used with the NVIDIA profilers.
\subsection{NVTX API}
If the variable \texttt{USE\_NVTX\_API} is set, QMCPACK will add the library (\texttt{libnvToolsExt.so}) to the qmcpack target. To add NVTX annotations
to a function, it is necessary to include the \texttt{nvToolsExt.h} header file and then make the appropriate calls into the NVTX API. For more information
about the NVTX API, see \url{https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx}. Any additional calls to the NVTX API should be guarded by
the \texttt{USE\_NVTX\_API} compiler define.
\subsection{Timers as Tasks}
To aid in connecting the timers in the code to the profile data, the start/stop of

View File

@ -26,6 +26,9 @@
#include "Utilities/RunTimeManager.h"
#include "Message/CommOperators.h"
#include "type_traits/scalar_traits.h"
#ifdef USE_NVTX_API
#include <nvToolsExt.h>
#endif
namespace qmcplusplus
@ -75,6 +78,9 @@ void DMCcuda::checkBounds (std::vector<PosType> &newpos,
bool DMCcuda::run()
{
#ifdef USE_NVTX_API
nvtxRangePushA("DMC:run");
#endif
bool scaleweight = ScaleWeight == "yes";
if (scaleweight)
app_log() << " Scaling weight per Umrigar/Nightingale.\n";
@ -324,6 +330,9 @@ bool DMCcuda::run()
}
}
while(block<nBlocks && enough_time_for_next_iteration);
#ifdef USE_NVTX_API
nvtxRangePop();
#endif
//finalize a qmc section
return finalize(block);
}

View File

@ -25,6 +25,9 @@
#include "type_traits/scalar_traits.h"
#include "Utilities/RunTimeManager.h"
#include "qmc_common.h"
#ifdef USE_NVTX_API
#include <nvToolsExt.h>
#endif
namespace qmcplusplus
{
@ -126,6 +129,9 @@ bool VMCcuda::run()
{
if (UseDrift == "yes")
return runWithDrift();
#ifdef USE_NVTX_API
nvtxRangePushA("VMC:run");
#endif
resetRun();
IndexType block = 0;
IndexType nAcceptTot = 0;
@ -242,6 +248,9 @@ bool VMCcuda::run()
std::cerr << "At the end of VMC" << std::endl;
gpu::cuda_memory_manager.report();
}
#ifdef USE_NVTX_API
nvtxRangePop();
#endif
return finalize(block);
}
@ -331,6 +340,9 @@ void VMCcuda::advanceWalkersWithDrift()
bool VMCcuda::runWithDrift()
{
#ifdef USE_NVTX_API
nvtxRangePushA("VMC:runWithDrift");
#endif
resetRun();
IndexType block = 0;
IndexType nAcceptTot = 0;
@ -427,6 +439,9 @@ bool VMCcuda::runWithDrift()
std::cerr << "At the end of VMC with drift" << std::endl;
gpu::cuda_memory_manager.report();
}
#ifdef USE_NVTX_API
nvtxRangePop();
#endif
return finalize(block);
}

View File

@ -245,5 +245,8 @@
/* Use VTune Task API with timers */
#cmakedefine USE_VTUNE_TASKS @USE_VTUNE_TASKS@
/* Enable NVTX regions in CUDA code. */
#cmakedefine USE_NVTX_API @USE_NVTX_API@
#endif // QMCPLUSPLUS_CONFIGURATION_H