Merge pull request #912 from jefflarkin/addnvtx

Add option for building with NVTX in the CUDA code
2018-07-06 12:10:53 -05:00 · 2018-07-06 12:10:53 -05:00 · cc13309a54
parent aa0425cda5 7ff291cfd8
commit cc13309a54
5 changed files with 52 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -688,6 +688,21 @@ ELSE(QMC_CUDA)
  MESSAGE(STATUS "Disabling CUDA")
 ENDIF(QMC_CUDA)

+SET(USE_NVTX_API 0 CACHE BOOL "Enable/disable NVTX regions in CUDA code.")
+IF(USE_NVTX_API)
+  IF(HAVE_CUDA)
+    FIND_LIBRARY(NVTX_API_LIB
+      NAME nvToolsExt
+      HINTS ${CUDA_TOOLKIT_ROOT_DIR}
+      PATH_SUFFIXES lib lib64)
+    IF(NOT NVTX_API_LIB)
+      MESSAGE(FATAL_ERROR "USE_NVTX_API set but NVTX_API_LIB not found")
+    ENDIF(NOT NVTX_API_LIB)
+    MESSAGE("CUDA nvToolsExt library: ${NVTX_API_LIB}")
+    LINK_LIBRARIES(${NVTX_API_LIB})
+  ENDIF(HAVE_CUDA)
+ENDIF(USE_NVTX_API)
+
 #INCLUDE(${PROJECT_CMAKE}/FindPkgConfig.cmake)
 ##################################################################
 # TODO:use profile tools
@ -790,7 +805,6 @@ IF (USE_VTUNE_API)
  LINK_LIBRARIES("${VTUNE_ITTNOTIFY_LIBRARY}")
 ENDIF()

-
 #include(ExternalProject)
 #  set(einspline_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/einspline")
 #  set(einspline_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/einspline")
--- a/manual/external_tools.tex
+++ b/manual/external_tools.tex
@ -19,6 +19,16 @@ An example of options to be passed to CMake
 -DCMAKE_LIBRARY_PATH=/opt/intel/vtune_amplifier_xe/lib64
 \end{shade}

+\section{NVIDIA Tools Extensions (NVTX)}
+
+NVIDIA's Tools Extensions (NVTX) API enables programmers to annotate their source code when used with the NVIDIA profilers.
+
+\subsection{NVTX API}
+
+If the variable \texttt{USE\_NVTX\_API} is set, QMCPACK will add the library (\texttt{libnvToolsExt.so}) to the qmcpack target. To add NVTX annotations
+to a function, it is necessary to include the \texttt{nvToolsExt.h} header file and then make the appropriate calls into the NVTX API. For more information
+about the NVTX API, see \url{https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx}. Any additional calls to the NVTX API should be guarded by
+the \texttt{USE\_NVTX\_API} compiler define.

 \subsection{Timers as Tasks}
 To aid in connecting the timers in the code to the profile data, the start/stop of
--- a/src/QMCDrivers/DMC/DMC_CUDA.cpp
+++ b/src/QMCDrivers/DMC/DMC_CUDA.cpp
@ -26,6 +26,9 @@
 #include "Utilities/RunTimeManager.h"
 #include "Message/CommOperators.h"
 #include "type_traits/scalar_traits.h"
+#ifdef USE_NVTX_API
+#include <nvToolsExt.h>
+#endif


 namespace qmcplusplus
@ -75,6 +78,9 @@ void DMCcuda::checkBounds (std::vector<PosType> &newpos,

 bool DMCcuda::run()
 {
+#ifdef USE_NVTX_API
+  nvtxRangePushA("DMC:run");
+#endif
  bool scaleweight = ScaleWeight == "yes";
  if (scaleweight)
    app_log() << "  Scaling weight per Umrigar/Nightingale.\n";
@ -324,6 +330,9 @@ bool DMCcuda::run()
    }
  }
  while(block<nBlocks && enough_time_for_next_iteration);
+#ifdef USE_NVTX_API
+  nvtxRangePop();
+#endif
  //finalize a qmc section
  return finalize(block);
 }
--- a/src/QMCDrivers/VMC/VMC_CUDA.cpp
+++ b/src/QMCDrivers/VMC/VMC_CUDA.cpp
@ -25,6 +25,9 @@
 #include "type_traits/scalar_traits.h"
 #include "Utilities/RunTimeManager.h"
 #include "qmc_common.h"
+#ifdef USE_NVTX_API
+#include <nvToolsExt.h>
+#endif

 namespace qmcplusplus
 {
@ -126,6 +129,9 @@ bool VMCcuda::run()
 {
  if (UseDrift == "yes")
    return runWithDrift();
+#ifdef USE_NVTX_API
+  nvtxRangePushA("VMC:run");
+#endif
  resetRun();
  IndexType block = 0;
  IndexType nAcceptTot = 0;
@ -242,6 +248,9 @@ bool VMCcuda::run()
    std::cerr << "At the end of VMC" << std::endl;
    gpu::cuda_memory_manager.report();
  }
+#ifdef USE_NVTX_API
+  nvtxRangePop();
+#endif
  return finalize(block);
 }

@ -331,6 +340,9 @@ void VMCcuda::advanceWalkersWithDrift()

 bool VMCcuda::runWithDrift()
 {
+#ifdef USE_NVTX_API
+  nvtxRangePushA("VMC:runWithDrift");
+#endif
  resetRun();
  IndexType block = 0;
  IndexType nAcceptTot = 0;
@ -427,6 +439,9 @@ bool VMCcuda::runWithDrift()
    std::cerr << "At the end of VMC with drift" << std::endl;
    gpu::cuda_memory_manager.report();
  }
+#ifdef USE_NVTX_API
+  nvtxRangePop();
+#endif
  return finalize(block);
 }

--- a/src/config.h.cmake.in
+++ b/src/config.h.cmake.in
@ -245,5 +245,8 @@
 /* Use VTune Task API with timers */
 #cmakedefine USE_VTUNE_TASKS @USE_VTUNE_TASKS@

+/* Enable NVTX regions in CUDA code. */
+#cmakedefine USE_NVTX_API @USE_NVTX_API@
+
 #endif // QMCPLUSPLUS_CONFIGURATION_H