Manage GPU markers with compilation flag rather than input param

GPU markers, such as NVTX (NVIDIA) or ROCtx (AMD) are meant to be used
for profiling purposes, along specific profiling tools (Nsight Systems,
Radeon profiler, Perfetto UI...).
As such, they are mostly used for development or benchmarking purposes
and aren't of any use for the end-user of ABINIT.

This commit changes the way one enables GPU Markers:
- input parameter "gpu_use_nvtx" is removed
- configure option "with_gpu_markers/--with-gpu-markers" is added
- CMake option "-DABINIT_ENABLE_GPU_MARKERS" is added

The way GPU markers were handled in ABINIT code doesn't change as this
feature was already hidden and protected by define HAVE_GPU_MARKERS.
This commit is contained in:
Marc Sarraute 2024-03-25 15:39:36 +01:00
parent ec48130bec
commit 8f8345b34a
9 changed files with 81 additions and 73 deletions

View File

@ -141,12 +141,6 @@ if(ABINIT_ENABLE_GPU_CUDA)
set(HAVE_GPU 1)
set(HAVE_GPU_SERIAL 1)
# check nvtx library is available
if (TARGET CUDA::nvToolsExt)
set(HAVE_GPU_CUDA10 1)
set(HAVE_GPU_MARKERS 1)
endif()
endif()
option(ABINIT_ENABLE_GPU_HIP "Enable GPU build (using AMD HIP backend, default OFF)" OFF)
@ -164,20 +158,35 @@ if(ABINIT_ENABLE_GPU_HIP)
set(HAVE_GPU 1)
set(HAVE_GPU_SERIAL 1)
# ROCTX: ROC tracer library similar in use to NVTX for CUDA
find_library(ROCTX
NAMES libroctx64.so
HINTS ${ROCM_ROOT}/roctracer/lib ${ROCM_PATH}/roctracer/lib ${ROCM_HOME}/roctracer/lib
REQUIRED)
# check roctx library is available
if (EXISTS ${ROCTX})
set(HAVE_GPU_MARKERS 1)
endif()
add_compile_definitions("__HIP_PLATFORM_AMD__")
endif()
option(ABINIT_ENABLE_GPU_MARKERS "Enable GPU markers for profiling (requires CUDA or ROCM/HIP, default OFF)" OFF)
if(ABINIT_ENABLE_GPU_MARKERS)
if(ABINIT_ENABLE_GPU_CUDA)
# check nvtx library is available
if (TARGET CUDA::nvToolsExt)
set(HAVE_GPU_CUDA10 1)
set(HAVE_GPU_MARKERS 1)
endif()
endif()
if(ABINIT_ENABLE_GPU_HIP)
# ROCTX: ROC tracer library similar in use to NVTX for CUDA
find_library(ROCTX
NAMES libroctx64.so
HINTS ${ROCM_ROOT}/roctracer/lib ${ROCM_PATH}/roctracer/lib ${ROCM_HOME}/roctracer/lib
REQUIRED)
# check roctx library is available
if (EXISTS ${ROCTX})
set(HAVE_GPU_MARKERS 1)
endif()
endif()
endif()
if (ABINIT_ENABLE_GPU_CUDA OR ABINIT_ENABLE_GPU_HIP)
set(DO_BUILD_17_GPU_TOOLBOX TRUE)
else()

View File

@ -89,18 +89,22 @@ AC_DEFUN([_ABI_GPU_CHECK_CUDA],[
]])], [abi_gpu_cuda_version_10="yes"], [abi_gpu_cuda_version_10="no"])
AC_MSG_RESULT([${abi_gpu_cuda_version_10}])
if test "${abi_gpu_cuda_version_10}" = "yes"; then
if test -e "${abi_gpu_cuda_libdir}/libnvToolsExt.${abi_so_ext}"; then
# always add link flags to nvtx if available
if test "${GPU_LIBS}" = ""; then
abi_gpu_cuda_libs="-lnvToolsExt ${abi_gpu_cuda_libs}"
if test "${abi_gpu_markers_enable}" = "yes"; then
if test "${abi_gpu_cuda_version_10}" = "yes"; then
if test -e "${abi_gpu_cuda_libdir}/libnvToolsExt.${abi_so_ext}"; then
# always add link flags to nvtx if available
if test "${GPU_LIBS}" = ""; then
abi_gpu_cuda_libs="-lnvToolsExt ${abi_gpu_cuda_libs}"
else
abi_gpu_cuda_libs="${abi_gpu_cuda_libs} -lnvToolsExt"
fi
abi_gpu_nvtx_v3="yes"
abi_result="${abi_result} nvtx_v3"
else
abi_gpu_cuda_libs="${abi_gpu_cuda_libs} -lnvToolsExt"
AC_MSG_ERROR([Cuda NVTX: ${abi_gpu_cuda_libdir}/libnvToolsExt.${abi_so_ext} not found])
fi
abi_gpu_nvtx_v3="yes"
abi_result="${abi_result} nvtx_v3"
else
AC_MSG_NOTICE([Cuda Nvtx: ${abi_gpu_cuda_libdir}/libnvToolsExt.${abi_so_ext} not found])
AC_MSG_ERROR([Cuda NVTX was requested but is not available for CUDA < v10])
fi
fi
@ -357,17 +361,19 @@ AC_DEFUN([_ABI_GPU_CHECK_HIP],[
AC_MSG_WARN([your Fortran compiler does not provide any ISO C binding module])
fi
if test -e "${abi_gpu_hip_libdir}/libroctx64.${abi_so_ext}"; then
# always add link flags to roctx if available
if test "${GPU_LIBS}" = ""; then
abi_gpu_hip_libs="-lroctx64 ${abi_gpu_hip_libs}"
if test "${abi_gpu_markers_enable}" = "yes"; then
if test -e "${abi_gpu_hip_libdir}/libroctx64.${abi_so_ext}"; then
# always add link flags to roctx if available
if test "${GPU_LIBS}" = ""; then
abi_gpu_hip_libs="-lroctx64 ${abi_gpu_hip_libs}"
else
abi_gpu_hip_libs="${abi_gpu_hip_libs} -lroctx64"
fi
abi_gpu_roctx="yes"
abi_result="${abi_result} roctx"
else
abi_gpu_hip_libs="${abi_gpu_hip_libs} -lroctx64"
AC_MSG_ERROR([AMD ROCtx: ${abi_gpu_hip_libdir}/libroctx64.${abi_so_ext} not found])
fi
abi_gpu_roctx="yes"
abi_result="${abi_result} roctx"
else
AC_MSG_NOTICE([AMD ROCtx: ${abi_gpu_hip_libdir}/libroctx64.${abi_so_ext} not found])
fi
# Restore build environment
@ -551,6 +557,7 @@ AC_DEFUN([ABI_GPU_INIT],[
# Init
abi_gpu_complete="unknown"
abi_gpu_enable="${sd_gpu_enable}"
abi_gpu_markers_enable="${sd_gpu_markers_enable}"
abi_gpu_has_cc="no"
abi_gpu_has_fft="no"
abi_gpu_has_incs="no"

View File

@ -22,6 +22,7 @@ AC_DEFUN([SD_GPU_INIT], [
sd_gpu_ldflags=""
sd_gpu_libs=""
sd_gpu_enable=""
sd_gpu_markers_enable=""
sd_gpu_init="unknown"
sd_gpu_ok="unknown"
sd_gpu_prefix=""
@ -35,6 +36,7 @@ AC_DEFUN([SD_GPU_INIT], [
sd_gpu_fcflags_def="$6"
sd_gpu_ldflags_def="$7"
sd_gpu_enable_def=""
sd_gpu_markers_enable_def=""
sd_gpu_policy=""
sd_gpu_status=""
@ -58,6 +60,7 @@ AC_DEFUN([SD_GPU_INIT], [
# Set reasonable defaults if not provided
test -z "${sd_gpu_enable_def}" && sd_gpu_enable_def="no"
test -z "${sd_gpu_markers_enable_def}" && sd_gpu_markers_enable_def="no"
test -z "${sd_gpu_policy}" && sd_gpu_policy="warn"
test -z "${sd_gpu_status}" && sd_gpu_status="optional"
# FIXME: improve the setting mechanism
@ -78,6 +81,18 @@ AC_DEFUN([SD_GPU_INIT], [
fi],
[ sd_gpu_enable="${sd_gpu_enable_def}"; sd_gpu_init="def"])
# Declare main configure option
AC_ARG_WITH([gpu_markers],
[AS_HELP_STRING(
[--with-gpu-markers],
[Enable GPU markers such as NVTX for NVIDIA CUDA or ROCTX for AMD ROCm.])],
[ if test "${withval}" = "no" -o "${withval}" = "yes"; then
sd_gpu_markers_enable="${withval}"
else
sd_gpu_markers_enable="no"
fi],
[ sd_gpu_markers_enable="${sd_gpu_markers_enable_def}"; sd_gpu_markers_init="def"])
# Declare flavor option
sd_gpu_flavors_supported="cuda-double cuda-single hip-double"
AC_ARG_WITH([gpu-flavor],
@ -202,6 +217,7 @@ AC_DEFUN([SD_GPU_INIT], [
AC_SUBST(sd_gpu_prefix)
AC_SUBST(sd_gpu_status)
AC_SUBST(sd_gpu_enable)
AC_SUBST(sd_gpu_markers_enable)
AC_SUBST(sd_gpu_init)
AC_SUBST(sd_gpu_ok)
AC_SUBST(sd_gpu_cppflags)

View File

@ -26,8 +26,6 @@ module m_nvtx_data
implicit none
logical :: nvtx_activated = .false.
integer, parameter :: NUMBER_OF_NVTX_REGIONS = 65
character(len=32), dimension(NUMBER_OF_NVTX_REGIONS) :: nvtx_names
integer , dimension(NUMBER_OF_NVTX_REGIONS) :: nvtx_ids
@ -102,15 +100,10 @@ contains
!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
subroutine nvtx_init(activate)
subroutine nvtx_init()
implicit none
! dummy variables
logical :: activate
nvtx_activated = activate
nvtx_names = [character(len=32) :: &
& "MAIN_COMPUTATION", &
& "SCF", &
@ -257,10 +250,8 @@ contains
! dummy variables
integer :: id
if (nvtx_activated) then
if (id .le. NUMBER_OF_NVTX_REGIONS) then
call nvtxStartRange(nvtx_names(id),id)
end if
if (id .le. NUMBER_OF_NVTX_REGIONS) then
call nvtxStartRange(nvtx_names(id),id)
end if
end subroutine abi_nvtx_start_range
@ -271,9 +262,7 @@ contains
implicit none
if (nvtx_activated) then
call nvtxEndRange()
end if
call nvtxEndRange()
end subroutine abi_nvtx_end_range

View File

@ -238,7 +238,6 @@ type, public :: dataset_type
integer :: gpu_nl_distrib = 0
integer :: gpu_nl_splitsize = 1
integer :: gpu_option
integer :: gpu_use_nvtx
integer :: gstore_cplex = 2
integer :: gstore_with_vk = 1
@ -1617,7 +1616,6 @@ type(dataset_type) function dtset_copy(dtin) result(dtout)
dtout%gpu_nl_distrib = dtin%gpu_nl_distrib
dtout%gpu_nl_splitsize = dtin%gpu_nl_splitsize
dtout%gpu_option = dtin%gpu_option
dtout%gpu_use_nvtx = dtin%gpu_use_nvtx
dtout%gstore_cplex = dtin%gstore_cplex
dtout%gstore_with_vk = dtin%gstore_with_vk
@ -3371,7 +3369,7 @@ subroutine chkvars(string)
list_vars=trim(list_vars)//' getvel getwfk getwfk_filepath getwfq getwfq_filepath getxcart getxred'
list_vars=trim(list_vars)//' get1den get1wf goprecon goprecprm'
list_vars=trim(list_vars)//' gpu_devices gpu_kokkos_nthrd gpu_linalg_limit gpu_nl_distrib'
list_vars=trim(list_vars)//' gpu_nl_splitsize gpu_option gpu_use_nvtx'
list_vars=trim(list_vars)//' gpu_nl_splitsize gpu_option'
list_vars=trim(list_vars)//' gwaclowrank gwcalctyp gwcomp gwencomp gwgamma gwmem'
list_vars=trim(list_vars)//' gstore_brange gstore_cplex gstore_erange gstore_kfilter'
list_vars=trim(list_vars)//' gstore_kzone gstore_qzone gstore_with_vk'

View File

@ -556,7 +556,6 @@ subroutine invars0(dtsets, istatr, istatshft, lenstr, msym, mxnatom, mxnimage, m
! GPU related parameters
dtsets(:)%gpu_option=ABI_GPU_DISABLED
dtsets(:)%gpu_use_nvtx=0
#if defined HAVE_GPU
call Get_ndevice(idev)
if (idev>0) then
@ -587,11 +586,6 @@ subroutine invars0(dtsets, istatr, istatshft, lenstr, msym, mxnatom, mxnimage, m
end if
end if
#if defined HAVE_GPU && defined HAVE_GPU_MARKERS
call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'gpu_use_nvtx',tread,'INT')
if(tread==1)dtsets(idtset)%gpu_use_nvtx=intarr(1)
#endif
if (dtsets(idtset)%gpu_option/=ABI_GPU_DISABLED) gpu_option=dtsets(idtset)%gpu_option
end do

View File

@ -1183,9 +1183,6 @@ subroutine outvar_a_h (choice,dmatpuflag,dtsets,iout,&
intarr(1,:)=dtsets(:)%gpu_option
call prttagm(dprarr,intarr,iout,jdtset_,2,marr,1,narrm,ncid,ndtset_alloc,'gpu_option','INT',0,firstchar=firstchar_gpu)
intarr(1,:)=dtsets(:)%gpu_use_nvtx
call prttagm(dprarr,intarr,iout,jdtset_,2,marr,1,narrm,ncid,ndtset_alloc,'gpu_use_nvtx','INT',0,firstchar=firstchar_gpu)
if (any(dtsets(:)%gpu_option/=ABI_GPU_KOKKOS)) then
intarr(1,:)=dtsets(:)%gpu_kokkos_nthrd
call prttagm(dprarr,intarr,iout,jdtset_,2,marr,1,narrm,ncid,ndtset_alloc,'gpu_kokkos_nthrd','INT',0,firstchar=firstchar_gpu)

View File

@ -158,7 +158,6 @@ program abinit
integer :: mu,natom,ncomment,ncomment_paw,ndtset
integer :: ndtset_alloc,nexit,nexit_paw,nfft,nkpt,npsp
integer :: nsppol,nwarning,nwarning_paw,prtvol,timopt,gpu_option
logical :: use_nvtx
integer,allocatable :: nband(:),npwtot(:)
real(dp) :: etotal, tcpui, twalli
real(dp) :: strten(6),tsec(2)
@ -365,14 +364,12 @@ program abinit
!Activate GPU is required
gpu_option=ABI_GPU_DISABLED
use_nvtx=.false.
gpu_devices(:)=-1
do ii=1,ndtset_alloc
if (dtsets(ii)%gpu_option/=ABI_GPU_DISABLED) then
gpu_option=dtsets(ii)%gpu_option
gpu_devices(:)=dtsets(ii)%gpu_devices(:)
end if
if (dtsets(ii)%gpu_use_nvtx==1) use_nvtx=.true.
end do
#ifdef HAVE_GPU
call setdevice_cuda(gpu_devices,gpu_option)
@ -383,8 +380,9 @@ program abinit
end if
#endif
!Enable GPU markers (NVTX/ROCTX) if required
#if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS)
NVTX_INIT(use_nvtx)
NVTX_INIT()
#endif
!------------------------------------------------------------------------------

View File

@ -6,34 +6,34 @@
* distribution.
*
*/
#ifndef ABINIT_52_MANAGE_CUDA_NVTX_MACRO_H
#define ABINIT_52_MANAGE_CUDA_NVTX_MACRO_H
#ifndef ABINIT_NVTX_MACRO_H
#define ABINIT_NVTX_MACRO_H
#include "config.h"
/*
* Note:
* nvtx_activated is a boolean variable defined in module
* m_nvtx_data (52_manage_gpu/m_nvtx_data.F90).
* m_nvtx_data (44_abitools/m_nvtx_data.F90).
*
* It can only be true if GPU/Cuda is enabled and Cuda version >= 10.
* It can only be true if GPU (NVIDIA CUDA > v10 or AMD ROCm) is enabled.
*
* We need these macro because subroutine abi_nvtx_start_range and abi_nvtx_end_range
* only exists when GPU is enabled.
* only exists when GPU markers are enabled.
*/
#if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS)
#define ABI_NVTX_START_RANGE(id) call abi_nvtx_start_range(id)
#define ABI_NVTX_END_RANGE() call abi_nvtx_end_range()
#define NVTX_INIT(value) call nvtx_init(value)
#define NVTX_INIT() call nvtx_init()
#define NVTX_PROFILER_START() call nvtxProfilerStart()
#define NVTX_PROFILER_STOP() call nvtxProfilerStop()
#else
#define ABI_NVTX_START_RANGE(id)
#define ABI_NVTX_END_RANGE()
#define NVTX_INIT(value)
#define NVTX_INIT()
#define NVTX_PROFILER_START()
#define NVTX_PROFILER_STOP()
#endif
#endif /* ABINIT_52_MANAGE_CUDA_NVTX_MACRO_H */
#endif /* ABINIT_NVTX_MACRO_H */