mirror of https://github.com/abinit/abinit.git
Manage GPU markers with compilation flag rather than input param
GPU markers, such as NVTX (NVIDIA) or ROCtx (AMD) are meant to be used for profiling purposes, along specific profiling tools (Nsight Systems, Radeon profiler, Perfetto UI...). As such, they are mostly used for development or benchmarking purposes and aren't of any use for the end-user of ABINIT. This commit changes the way one enables GPU Markers: - input parameter "gpu_use_nvtx" is removed - configure option "with_gpu_markers/--with-gpu-markers" is added - CMake option "-DABINIT_ENABLE_GPU_MARKERS" is added The way GPU markers were handled in ABINIT code doesn't change as this feature was already hidden and protected by define HAVE_GPU_MARKERS.
This commit is contained in:
parent
ec48130bec
commit
8f8345b34a
|
@ -141,12 +141,6 @@ if(ABINIT_ENABLE_GPU_CUDA)
|
|||
set(HAVE_GPU 1)
|
||||
set(HAVE_GPU_SERIAL 1)
|
||||
|
||||
# check nvtx library is available
|
||||
if (TARGET CUDA::nvToolsExt)
|
||||
set(HAVE_GPU_CUDA10 1)
|
||||
set(HAVE_GPU_MARKERS 1)
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
option(ABINIT_ENABLE_GPU_HIP "Enable GPU build (using AMD HIP backend, default OFF)" OFF)
|
||||
|
@ -164,20 +158,35 @@ if(ABINIT_ENABLE_GPU_HIP)
|
|||
set(HAVE_GPU 1)
|
||||
set(HAVE_GPU_SERIAL 1)
|
||||
|
||||
# ROCTX: ROC tracer library similar in use to NVTX for CUDA
|
||||
find_library(ROCTX
|
||||
NAMES libroctx64.so
|
||||
HINTS ${ROCM_ROOT}/roctracer/lib ${ROCM_PATH}/roctracer/lib ${ROCM_HOME}/roctracer/lib
|
||||
REQUIRED)
|
||||
|
||||
# check roctx library is available
|
||||
if (EXISTS ${ROCTX})
|
||||
set(HAVE_GPU_MARKERS 1)
|
||||
endif()
|
||||
add_compile_definitions("__HIP_PLATFORM_AMD__")
|
||||
|
||||
endif()
|
||||
|
||||
option(ABINIT_ENABLE_GPU_MARKERS "Enable GPU markers for profiling (requires CUDA or ROCM/HIP, default OFF)" OFF)
|
||||
if(ABINIT_ENABLE_GPU_MARKERS)
|
||||
|
||||
if(ABINIT_ENABLE_GPU_CUDA)
|
||||
# check nvtx library is available
|
||||
if (TARGET CUDA::nvToolsExt)
|
||||
set(HAVE_GPU_CUDA10 1)
|
||||
set(HAVE_GPU_MARKERS 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ABINIT_ENABLE_GPU_HIP)
|
||||
# ROCTX: ROC tracer library similar in use to NVTX for CUDA
|
||||
find_library(ROCTX
|
||||
NAMES libroctx64.so
|
||||
HINTS ${ROCM_ROOT}/roctracer/lib ${ROCM_PATH}/roctracer/lib ${ROCM_HOME}/roctracer/lib
|
||||
REQUIRED)
|
||||
|
||||
# check roctx library is available
|
||||
if (EXISTS ${ROCTX})
|
||||
set(HAVE_GPU_MARKERS 1)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (ABINIT_ENABLE_GPU_CUDA OR ABINIT_ENABLE_GPU_HIP)
|
||||
set(DO_BUILD_17_GPU_TOOLBOX TRUE)
|
||||
else()
|
||||
|
|
|
@ -89,18 +89,22 @@ AC_DEFUN([_ABI_GPU_CHECK_CUDA],[
|
|||
]])], [abi_gpu_cuda_version_10="yes"], [abi_gpu_cuda_version_10="no"])
|
||||
AC_MSG_RESULT([${abi_gpu_cuda_version_10}])
|
||||
|
||||
if test "${abi_gpu_cuda_version_10}" = "yes"; then
|
||||
if test -e "${abi_gpu_cuda_libdir}/libnvToolsExt.${abi_so_ext}"; then
|
||||
# always add link flags to nvtx if available
|
||||
if test "${GPU_LIBS}" = ""; then
|
||||
abi_gpu_cuda_libs="-lnvToolsExt ${abi_gpu_cuda_libs}"
|
||||
if test "${abi_gpu_markers_enable}" = "yes"; then
|
||||
if test "${abi_gpu_cuda_version_10}" = "yes"; then
|
||||
if test -e "${abi_gpu_cuda_libdir}/libnvToolsExt.${abi_so_ext}"; then
|
||||
# always add link flags to nvtx if available
|
||||
if test "${GPU_LIBS}" = ""; then
|
||||
abi_gpu_cuda_libs="-lnvToolsExt ${abi_gpu_cuda_libs}"
|
||||
else
|
||||
abi_gpu_cuda_libs="${abi_gpu_cuda_libs} -lnvToolsExt"
|
||||
fi
|
||||
abi_gpu_nvtx_v3="yes"
|
||||
abi_result="${abi_result} nvtx_v3"
|
||||
else
|
||||
abi_gpu_cuda_libs="${abi_gpu_cuda_libs} -lnvToolsExt"
|
||||
AC_MSG_ERROR([Cuda NVTX: ${abi_gpu_cuda_libdir}/libnvToolsExt.${abi_so_ext} not found])
|
||||
fi
|
||||
abi_gpu_nvtx_v3="yes"
|
||||
abi_result="${abi_result} nvtx_v3"
|
||||
else
|
||||
AC_MSG_NOTICE([Cuda Nvtx: ${abi_gpu_cuda_libdir}/libnvToolsExt.${abi_so_ext} not found])
|
||||
AC_MSG_ERROR([Cuda NVTX was requested but is not available for CUDA < v10])
|
||||
fi
|
||||
fi
|
||||
|
||||
|
@ -357,17 +361,19 @@ AC_DEFUN([_ABI_GPU_CHECK_HIP],[
|
|||
AC_MSG_WARN([your Fortran compiler does not provide any ISO C binding module])
|
||||
fi
|
||||
|
||||
if test -e "${abi_gpu_hip_libdir}/libroctx64.${abi_so_ext}"; then
|
||||
# always add link flags to roctx if available
|
||||
if test "${GPU_LIBS}" = ""; then
|
||||
abi_gpu_hip_libs="-lroctx64 ${abi_gpu_hip_libs}"
|
||||
if test "${abi_gpu_markers_enable}" = "yes"; then
|
||||
if test -e "${abi_gpu_hip_libdir}/libroctx64.${abi_so_ext}"; then
|
||||
# always add link flags to roctx if available
|
||||
if test "${GPU_LIBS}" = ""; then
|
||||
abi_gpu_hip_libs="-lroctx64 ${abi_gpu_hip_libs}"
|
||||
else
|
||||
abi_gpu_hip_libs="${abi_gpu_hip_libs} -lroctx64"
|
||||
fi
|
||||
abi_gpu_roctx="yes"
|
||||
abi_result="${abi_result} roctx"
|
||||
else
|
||||
abi_gpu_hip_libs="${abi_gpu_hip_libs} -lroctx64"
|
||||
AC_MSG_ERROR([AMD ROCtx: ${abi_gpu_hip_libdir}/libroctx64.${abi_so_ext} not found])
|
||||
fi
|
||||
abi_gpu_roctx="yes"
|
||||
abi_result="${abi_result} roctx"
|
||||
else
|
||||
AC_MSG_NOTICE([AMD ROCtx: ${abi_gpu_hip_libdir}/libroctx64.${abi_so_ext} not found])
|
||||
fi
|
||||
|
||||
# Restore build environment
|
||||
|
@ -551,6 +557,7 @@ AC_DEFUN([ABI_GPU_INIT],[
|
|||
# Init
|
||||
abi_gpu_complete="unknown"
|
||||
abi_gpu_enable="${sd_gpu_enable}"
|
||||
abi_gpu_markers_enable="${sd_gpu_markers_enable}"
|
||||
abi_gpu_has_cc="no"
|
||||
abi_gpu_has_fft="no"
|
||||
abi_gpu_has_incs="no"
|
||||
|
|
|
@ -22,6 +22,7 @@ AC_DEFUN([SD_GPU_INIT], [
|
|||
sd_gpu_ldflags=""
|
||||
sd_gpu_libs=""
|
||||
sd_gpu_enable=""
|
||||
sd_gpu_markers_enable=""
|
||||
sd_gpu_init="unknown"
|
||||
sd_gpu_ok="unknown"
|
||||
sd_gpu_prefix=""
|
||||
|
@ -35,6 +36,7 @@ AC_DEFUN([SD_GPU_INIT], [
|
|||
sd_gpu_fcflags_def="$6"
|
||||
sd_gpu_ldflags_def="$7"
|
||||
sd_gpu_enable_def=""
|
||||
sd_gpu_markers_enable_def=""
|
||||
sd_gpu_policy=""
|
||||
sd_gpu_status=""
|
||||
|
||||
|
@ -58,6 +60,7 @@ AC_DEFUN([SD_GPU_INIT], [
|
|||
|
||||
# Set reasonable defaults if not provided
|
||||
test -z "${sd_gpu_enable_def}" && sd_gpu_enable_def="no"
|
||||
test -z "${sd_gpu_markers_enable_def}" && sd_gpu_markers_enable_def="no"
|
||||
test -z "${sd_gpu_policy}" && sd_gpu_policy="warn"
|
||||
test -z "${sd_gpu_status}" && sd_gpu_status="optional"
|
||||
# FIXME: improve the setting mechanism
|
||||
|
@ -78,6 +81,18 @@ AC_DEFUN([SD_GPU_INIT], [
|
|||
fi],
|
||||
[ sd_gpu_enable="${sd_gpu_enable_def}"; sd_gpu_init="def"])
|
||||
|
||||
# Declare main configure option
|
||||
AC_ARG_WITH([gpu_markers],
|
||||
[AS_HELP_STRING(
|
||||
[--with-gpu-markers],
|
||||
[Enable GPU markers such as NVTX for NVIDIA CUDA or ROCTX for AMD ROCm.])],
|
||||
[ if test "${withval}" = "no" -o "${withval}" = "yes"; then
|
||||
sd_gpu_markers_enable="${withval}"
|
||||
else
|
||||
sd_gpu_markers_enable="no"
|
||||
fi],
|
||||
[ sd_gpu_markers_enable="${sd_gpu_markers_enable_def}"; sd_gpu_markers_init="def"])
|
||||
|
||||
# Declare flavor option
|
||||
sd_gpu_flavors_supported="cuda-double cuda-single hip-double"
|
||||
AC_ARG_WITH([gpu-flavor],
|
||||
|
@ -202,6 +217,7 @@ AC_DEFUN([SD_GPU_INIT], [
|
|||
AC_SUBST(sd_gpu_prefix)
|
||||
AC_SUBST(sd_gpu_status)
|
||||
AC_SUBST(sd_gpu_enable)
|
||||
AC_SUBST(sd_gpu_markers_enable)
|
||||
AC_SUBST(sd_gpu_init)
|
||||
AC_SUBST(sd_gpu_ok)
|
||||
AC_SUBST(sd_gpu_cppflags)
|
||||
|
|
|
@ -26,8 +26,6 @@ module m_nvtx_data
|
|||
|
||||
implicit none
|
||||
|
||||
logical :: nvtx_activated = .false.
|
||||
|
||||
integer, parameter :: NUMBER_OF_NVTX_REGIONS = 65
|
||||
character(len=32), dimension(NUMBER_OF_NVTX_REGIONS) :: nvtx_names
|
||||
integer , dimension(NUMBER_OF_NVTX_REGIONS) :: nvtx_ids
|
||||
|
@ -102,15 +100,10 @@ contains
|
|||
|
||||
!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
subroutine nvtx_init(activate)
|
||||
subroutine nvtx_init()
|
||||
|
||||
implicit none
|
||||
|
||||
! dummy variables
|
||||
logical :: activate
|
||||
|
||||
nvtx_activated = activate
|
||||
|
||||
nvtx_names = [character(len=32) :: &
|
||||
& "MAIN_COMPUTATION", &
|
||||
& "SCF", &
|
||||
|
@ -257,10 +250,8 @@ contains
|
|||
! dummy variables
|
||||
integer :: id
|
||||
|
||||
if (nvtx_activated) then
|
||||
if (id .le. NUMBER_OF_NVTX_REGIONS) then
|
||||
call nvtxStartRange(nvtx_names(id),id)
|
||||
end if
|
||||
if (id .le. NUMBER_OF_NVTX_REGIONS) then
|
||||
call nvtxStartRange(nvtx_names(id),id)
|
||||
end if
|
||||
|
||||
end subroutine abi_nvtx_start_range
|
||||
|
@ -271,9 +262,7 @@ contains
|
|||
|
||||
implicit none
|
||||
|
||||
if (nvtx_activated) then
|
||||
call nvtxEndRange()
|
||||
end if
|
||||
call nvtxEndRange()
|
||||
|
||||
end subroutine abi_nvtx_end_range
|
||||
|
||||
|
|
|
@ -238,7 +238,6 @@ type, public :: dataset_type
|
|||
integer :: gpu_nl_distrib = 0
|
||||
integer :: gpu_nl_splitsize = 1
|
||||
integer :: gpu_option
|
||||
integer :: gpu_use_nvtx
|
||||
|
||||
integer :: gstore_cplex = 2
|
||||
integer :: gstore_with_vk = 1
|
||||
|
@ -1617,7 +1616,6 @@ type(dataset_type) function dtset_copy(dtin) result(dtout)
|
|||
dtout%gpu_nl_distrib = dtin%gpu_nl_distrib
|
||||
dtout%gpu_nl_splitsize = dtin%gpu_nl_splitsize
|
||||
dtout%gpu_option = dtin%gpu_option
|
||||
dtout%gpu_use_nvtx = dtin%gpu_use_nvtx
|
||||
|
||||
dtout%gstore_cplex = dtin%gstore_cplex
|
||||
dtout%gstore_with_vk = dtin%gstore_with_vk
|
||||
|
@ -3371,7 +3369,7 @@ subroutine chkvars(string)
|
|||
list_vars=trim(list_vars)//' getvel getwfk getwfk_filepath getwfq getwfq_filepath getxcart getxred'
|
||||
list_vars=trim(list_vars)//' get1den get1wf goprecon goprecprm'
|
||||
list_vars=trim(list_vars)//' gpu_devices gpu_kokkos_nthrd gpu_linalg_limit gpu_nl_distrib'
|
||||
list_vars=trim(list_vars)//' gpu_nl_splitsize gpu_option gpu_use_nvtx'
|
||||
list_vars=trim(list_vars)//' gpu_nl_splitsize gpu_option'
|
||||
list_vars=trim(list_vars)//' gwaclowrank gwcalctyp gwcomp gwencomp gwgamma gwmem'
|
||||
list_vars=trim(list_vars)//' gstore_brange gstore_cplex gstore_erange gstore_kfilter'
|
||||
list_vars=trim(list_vars)//' gstore_kzone gstore_qzone gstore_with_vk'
|
||||
|
|
|
@ -556,7 +556,6 @@ subroutine invars0(dtsets, istatr, istatshft, lenstr, msym, mxnatom, mxnimage, m
|
|||
|
||||
! GPU related parameters
|
||||
dtsets(:)%gpu_option=ABI_GPU_DISABLED
|
||||
dtsets(:)%gpu_use_nvtx=0
|
||||
#if defined HAVE_GPU
|
||||
call Get_ndevice(idev)
|
||||
if (idev>0) then
|
||||
|
@ -587,11 +586,6 @@ subroutine invars0(dtsets, istatr, istatshft, lenstr, msym, mxnatom, mxnimage, m
|
|||
end if
|
||||
end if
|
||||
|
||||
#if defined HAVE_GPU && defined HAVE_GPU_MARKERS
|
||||
call intagm(dprarr,intarr,jdtset,marr,1,string(1:lenstr),'gpu_use_nvtx',tread,'INT')
|
||||
if(tread==1)dtsets(idtset)%gpu_use_nvtx=intarr(1)
|
||||
#endif
|
||||
|
||||
if (dtsets(idtset)%gpu_option/=ABI_GPU_DISABLED) gpu_option=dtsets(idtset)%gpu_option
|
||||
end do
|
||||
|
||||
|
|
|
@ -1183,9 +1183,6 @@ subroutine outvar_a_h (choice,dmatpuflag,dtsets,iout,&
|
|||
intarr(1,:)=dtsets(:)%gpu_option
|
||||
call prttagm(dprarr,intarr,iout,jdtset_,2,marr,1,narrm,ncid,ndtset_alloc,'gpu_option','INT',0,firstchar=firstchar_gpu)
|
||||
|
||||
intarr(1,:)=dtsets(:)%gpu_use_nvtx
|
||||
call prttagm(dprarr,intarr,iout,jdtset_,2,marr,1,narrm,ncid,ndtset_alloc,'gpu_use_nvtx','INT',0,firstchar=firstchar_gpu)
|
||||
|
||||
if (any(dtsets(:)%gpu_option/=ABI_GPU_KOKKOS)) then
|
||||
intarr(1,:)=dtsets(:)%gpu_kokkos_nthrd
|
||||
call prttagm(dprarr,intarr,iout,jdtset_,2,marr,1,narrm,ncid,ndtset_alloc,'gpu_kokkos_nthrd','INT',0,firstchar=firstchar_gpu)
|
||||
|
|
|
@ -158,7 +158,6 @@ program abinit
|
|||
integer :: mu,natom,ncomment,ncomment_paw,ndtset
|
||||
integer :: ndtset_alloc,nexit,nexit_paw,nfft,nkpt,npsp
|
||||
integer :: nsppol,nwarning,nwarning_paw,prtvol,timopt,gpu_option
|
||||
logical :: use_nvtx
|
||||
integer,allocatable :: nband(:),npwtot(:)
|
||||
real(dp) :: etotal, tcpui, twalli
|
||||
real(dp) :: strten(6),tsec(2)
|
||||
|
@ -365,14 +364,12 @@ program abinit
|
|||
|
||||
!Activate GPU is required
|
||||
gpu_option=ABI_GPU_DISABLED
|
||||
use_nvtx=.false.
|
||||
gpu_devices(:)=-1
|
||||
do ii=1,ndtset_alloc
|
||||
if (dtsets(ii)%gpu_option/=ABI_GPU_DISABLED) then
|
||||
gpu_option=dtsets(ii)%gpu_option
|
||||
gpu_devices(:)=dtsets(ii)%gpu_devices(:)
|
||||
end if
|
||||
if (dtsets(ii)%gpu_use_nvtx==1) use_nvtx=.true.
|
||||
end do
|
||||
#ifdef HAVE_GPU
|
||||
call setdevice_cuda(gpu_devices,gpu_option)
|
||||
|
@ -383,8 +380,9 @@ program abinit
|
|||
end if
|
||||
#endif
|
||||
|
||||
!Enable GPU markers (NVTX/ROCTX) if required
|
||||
#if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS)
|
||||
NVTX_INIT(use_nvtx)
|
||||
NVTX_INIT()
|
||||
#endif
|
||||
|
||||
!------------------------------------------------------------------------------
|
||||
|
|
|
@ -6,34 +6,34 @@
|
|||
* distribution.
|
||||
*
|
||||
*/
|
||||
#ifndef ABINIT_52_MANAGE_CUDA_NVTX_MACRO_H
|
||||
#define ABINIT_52_MANAGE_CUDA_NVTX_MACRO_H
|
||||
#ifndef ABINIT_NVTX_MACRO_H
|
||||
#define ABINIT_NVTX_MACRO_H
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* nvtx_activated is a boolean variable defined in module
|
||||
* m_nvtx_data (52_manage_gpu/m_nvtx_data.F90).
|
||||
* m_nvtx_data (44_abitools/m_nvtx_data.F90).
|
||||
*
|
||||
* It can only be true if GPU/Cuda is enabled and Cuda version >= 10.
|
||||
* It can only be true if GPU (NVIDIA CUDA > v10 or AMD ROCm) is enabled.
|
||||
*
|
||||
* We need these macro because subroutine abi_nvtx_start_range and abi_nvtx_end_range
|
||||
* only exists when GPU is enabled.
|
||||
* only exists when GPU markers are enabled.
|
||||
*/
|
||||
|
||||
#if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS)
|
||||
#define ABI_NVTX_START_RANGE(id) call abi_nvtx_start_range(id)
|
||||
#define ABI_NVTX_END_RANGE() call abi_nvtx_end_range()
|
||||
#define NVTX_INIT(value) call nvtx_init(value)
|
||||
#define NVTX_INIT() call nvtx_init()
|
||||
#define NVTX_PROFILER_START() call nvtxProfilerStart()
|
||||
#define NVTX_PROFILER_STOP() call nvtxProfilerStop()
|
||||
#else
|
||||
#define ABI_NVTX_START_RANGE(id)
|
||||
#define ABI_NVTX_END_RANGE()
|
||||
#define NVTX_INIT(value)
|
||||
#define NVTX_INIT()
|
||||
#define NVTX_PROFILER_START()
|
||||
#define NVTX_PROFILER_STOP()
|
||||
#endif
|
||||
|
||||
#endif /* ABINIT_52_MANAGE_CUDA_NVTX_MACRO_H */
|
||||
#endif /* ABINIT_NVTX_MACRO_H */
|
||||
|
|
Loading…
Reference in New Issue