mirror of https://gitlab.com/QEF/q-e.git
Merge branch 'simplify_cuda' into 'develop'
Simplify CUDA configure See merge request QEF/q-e!1739
This commit is contained in:
commit
e821a717dc
|
@ -428,12 +428,11 @@ formerly PGI compiler, freely available for download.
|
|||
|
||||
As a rule, \qe\ tries to keep compatibility with older compilers,
|
||||
avoiding nonstandard extensions and newer features that are not
|
||||
widespread or stabilized. If however your compiler is older say
|
||||
than $\sim 5$ years or so it is quite likely that something will
|
||||
not work. The same applies to mathematical and MPI libraries.
|
||||
For GPU compilation, get the most recent NVidia HPC SDK you can:
|
||||
while compilers from v. 17.4 on should work, several problems and
|
||||
limitations are known to exist for old compiler versions.
|
||||
widespread or stabilized. If however your compiler is older than
|
||||
a few ($\sim 5$) years, it is likely that something will not work.
|
||||
The same applies to mathematical and MPI libraries.
|
||||
For GPU compilation, you need v.19.10 or later of the NVidia HPC SDK
|
||||
(previous versions are no longer supported).
|
||||
|
||||
Big computing centers typically provide a Fortran compiler complete
|
||||
with all needed libraries. Workstations or ``commodity'' machines
|
||||
|
@ -621,10 +620,11 @@ and the following optional packages:\\
|
|||
\end{tabular}\\
|
||||
\\
|
||||
In order to compile the code for GPU's you will need a recent version
|
||||
-- the more recent, the better -- of the NVidia HPC software development
|
||||
kit (SDK). OpenMP must be enabled, and you may want to use a CUDA-aware MPI
|
||||
distribution if running on multiple GPUs in order to optimize the
|
||||
interprocess data transfer. The following \configure\ options are
|
||||
(v.19.10 or later: the more recent, the better) of the NVidia HPC software
|
||||
development kit (SDK). OpenMP should be enabled. Enabling faster communications
|
||||
between GPUs, via NVlink or Infiniband RDMA, is essential for optimal
|
||||
performance. If your MPI library is built to be CUDA-aware, then enable it
|
||||
with \texttt{--with-cuda-mpi=yes}. The following \configure\ options are
|
||||
available:\\
|
||||
\begin{tabular}{ll}
|
||||
\texttt{--with-cuda=value}& enable compilation of GPU-accelerated subroutines.\\
|
||||
|
@ -640,10 +640,7 @@ available:\\
|
|||
& \texttt{value} must be consistent with the\\
|
||||
& CUDA Toolkit installed on the workstation \\
|
||||
& or available on the compute nodes of the HPC facility.\\
|
||||
\texttt{--enable-cuda-env-check=[yes]}& if set, sanity checks on the CUDA environment\\
|
||||
& are performed (default: no).
|
||||
\end{tabular}\\
|
||||
|
||||
\texttt{--with-cuda-mpi=value} & enable usage of a CUDA-aware MPI library (default: no).\\
|
||||
|
||||
To modify or extend \configure, see the Wiki pages on GitLab:
|
||||
\texttt{https://gitlab.com/QEF/q-e/-/wikis}.
|
||||
|
@ -662,7 +659,7 @@ libraries (e.g. you need to add \texttt{-D\_\_FFTW} to \texttt{DFLAGS}
|
|||
if you want to link internal FFTW). For a correct choice of preprocessing
|
||||
flags, refer to the documentation in \texttt{include/defs.h.README}.
|
||||
|
||||
Even if \configure\ works, yuo may need to tweak the \texttt{make.inc}
|
||||
Even if \configure\ works, you may need to tweak the \texttt{make.inc}
|
||||
file. It is very simple, but please note that if you change any settings
|
||||
(e.g. preprocessing, compilation flags)
|
||||
after a previous, successful or failed, compilation, you must run
|
||||
|
|
|
@ -808,35 +808,15 @@ END SUBROUTINE setup_para
|
|||
!
|
||||
!----------------------------------------------------------------------------
|
||||
LOGICAL FUNCTION check_gpu_support( )
|
||||
!
|
||||
! FIXME: seems useless. If one has GPUs, one wants to run on GPUs.
|
||||
!
|
||||
! Minimal case: returns true if compiled for GPUs
|
||||
IMPLICIT NONE
|
||||
!
|
||||
LOGICAL, SAVE :: first = .TRUE.
|
||||
LOGICAL, SAVE :: saved_value = .FALSE.
|
||||
CHARACTER(len=255) :: gpu_env
|
||||
INTEGER :: vlen, istat
|
||||
|
||||
#if defined(__CUDA)
|
||||
IF( .NOT. first ) THEN
|
||||
check_gpu_support = saved_value
|
||||
RETURN
|
||||
END IF
|
||||
first = .FALSE.
|
||||
!
|
||||
CALL get_environment_variable("USEGPU", gpu_env, vlen, istat, .true.)
|
||||
IF (istat == 0) THEN
|
||||
check_gpu_support = (gpu_env /= "no")
|
||||
ELSE
|
||||
check_gpu_support = .TRUE.
|
||||
END IF
|
||||
saved_value = check_gpu_support
|
||||
!
|
||||
check_gpu_support = .TRUE.
|
||||
#else
|
||||
check_gpu_support = .FALSE.
|
||||
#endif
|
||||
RETURN
|
||||
!
|
||||
END FUNCTION check_gpu_support
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
|
|
|
@ -9,9 +9,9 @@ Installation
|
|||
============
|
||||
|
||||
This version requires the nvfortran (previously PGI) compiler from the
|
||||
freely available NVidia HPC SDK. You are advised to use the most recent
|
||||
version of NVidia software you can find. While any version later than 17.4
|
||||
should work, many glitches are known to exist in older versions.
|
||||
NVidia HPC SDK, v.19.10 or later (freely downloadable from NVidia).
|
||||
Earlier versions may or may not work and are no longer supported.
|
||||
You are advised to use the most recent version of NVidia software you can find.
|
||||
The `configure` script checks for the presence of the nvfortran compiler and
|
||||
of a few cuda libraries. For this reason the path pointing to the cuda toolkit
|
||||
must be present in `LD_LIBRARY_PATH`.
|
||||
|
@ -19,7 +19,7 @@ must be present in `LD_LIBRARY_PATH`.
|
|||
A template for the configure command is:
|
||||
|
||||
```
|
||||
./configure --with-cuda=XX --with-cuda-runtime=YY --with-cuda-cc=ZZ --enable-openmp [--enable-openacc] [ --with-scalapack=no ]
|
||||
./configure --with-cuda=XX --with-cuda-runtime=YY --with-cuda-cc=ZZ --enable-openmp [ --with-scalapack=no ][ --with-cuda-mpi=yes ]
|
||||
```
|
||||
|
||||
where `XX` is the location of the CUDA Toolkit (in HPC environments is
|
||||
|
@ -32,14 +32,21 @@ CUDA Driver Version: 11000
|
|||
Default Target: cc70
|
||||
...
|
||||
```
|
||||
The version is returned as (1000 major + 10 minor). For example, CUDA 9.2
|
||||
would be represented by 9020. For the above case, configure QE with:
|
||||
The version is returned as (1000 major + 10 minor). For example, CUDA 11.0
|
||||
is represented by 11000. For the above case, configure QE with:
|
||||
```
|
||||
./configure --with-cuda=$CUDA_HOME --with-cuda-cc=70 --with-cuda-runtime=11.0
|
||||
```
|
||||
Alternatively, you may use the (deprecated) tool `get_device_props.py` in
|
||||
directory `dev-tools/`.
|
||||
|
||||
Enabling faster communications between GPUs, via NVlink or Infiniband RDMA,
|
||||
is essential for optimal performance. If your MPI library is built to be
|
||||
CUDA-aware, then enable `--with-cuda-mpi=yes` (default: no).
|
||||
|
||||
Serial (no MPI) compilation is also supported: use `--disable-parallel`.
|
||||
|
||||
Option --with-openacc is no longer honored: OpenACC is always needed.
|
||||
It is generally a good idea to disable Scalapack when running small test
|
||||
cases since the serial GPU eigensolver outperforms the parallel CPU
|
||||
eigensolver in many circumstances.
|
||||
|
@ -48,8 +55,6 @@ From time to time PGI links to the wrong CUDA libraries and fails reporting a
|
|||
problem in `cusolver` missing `GOmp` (GNU Openmp). This problem can be solved
|
||||
by removing the cuda toolkit from the `LD_LIBRARY_PATH` before compiling.
|
||||
|
||||
Serial compilation is also supported.
|
||||
|
||||
Execution
|
||||
=========
|
||||
|
||||
|
@ -60,16 +65,5 @@ the beginning of the output
|
|||
GPU acceleration is ACTIVE.
|
||||
```
|
||||
|
||||
GPU acceleration can be switched off by setting the following environment
|
||||
variable:
|
||||
|
||||
```
|
||||
$ export USEGPU=no
|
||||
```
|
||||
|
||||
|
||||
Testing
|
||||
=======
|
||||
|
||||
The current GPU version passes all tests with both parallel and serial
|
||||
compilation.
|
||||
|
|
|
@ -108,7 +108,7 @@ export OMP_NUM_THREADS=1
|
|||
LC_ALL=C
|
||||
export LC_ALL
|
||||
|
||||
NETWORK_PSEUDO=http://www.quantum-espresso.org/wp-content/uploads/upf_files/
|
||||
NETWORK_PSEUDO=https://www.quantum-espresso.org/wp-content/uploads/upf_files/
|
||||
|
||||
# wget or curl needed if some PP has to be downloaded from web site
|
||||
# script wizard will surely find a better way to find what is available
|
||||
|
|
|
@ -769,6 +769,7 @@ enable_static
|
|||
with_cuda
|
||||
with_cuda_cc
|
||||
with_cuda_runtime
|
||||
with_cuda_mpi
|
||||
enable_openacc
|
||||
with_libxc
|
||||
with_libxc_prefix
|
||||
|
@ -1438,6 +1439,7 @@ Optional Packages:
|
|||
--with-cuda-cc=VAL GPU architecture (Kepler: 35, Pascal: 60, Volta: 70)
|
||||
[default=35]
|
||||
--with-cuda-runtime=VAL CUDA runtime (Pascal: 8+, Volta: 9+) [default=10.1]
|
||||
--with-cuda-mpi=VAL CUDA-aware MPI (yes|no) [default=no]
|
||||
--with-libxc (yes|no) Use libXC for some XC functionals (default:
|
||||
no)
|
||||
--with-libxc-prefix=DIR Directory where libxc was installed.
|
||||
|
@ -4191,6 +4193,16 @@ else
|
|||
fi
|
||||
|
||||
|
||||
|
||||
# Check whether --with-cuda-mpi was given.
|
||||
if test "${with_cuda_mpi+set}" = set; then :
|
||||
withval=$with_cuda_mpi;
|
||||
else
|
||||
with_cuda_mpi=no
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# Check whether --enable-openacc was given.
|
||||
if test "${enable_openacc+set}" = set; then :
|
||||
enableval=$enable_openacc;
|
||||
|
@ -4316,6 +4328,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
|
|||
# Headers and libraries
|
||||
# -----------------------------------------
|
||||
try_dflags="$try_dflags -D__CUDA"
|
||||
if test "$use_parallel" -eq 1 && test "$with_cuda_mpi" == "yes"; then
|
||||
try_dflags="$try_dflags -D__GPU_MPI"
|
||||
fi
|
||||
cuda_extlibs="devxlib"
|
||||
cuda_libs="$mMcudalib=cufft,cublas,cusolver,curand \$(TOPDIR)/external/devxlib/src/libdevXlib.a"
|
||||
|
||||
|
@ -4328,21 +4343,17 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
|
|||
runtime_major_version=`echo $with_cuda_runtime | cut -d. -f1`
|
||||
runtime_minor_version=`echo $with_cuda_runtime | cut -d. -f2`
|
||||
if test "$runtime_major_version" -lt 10 ||
|
||||
( "$runtime_major_version" -eq 10 && "$runtime_minor_version" -lt 1 )
|
||||
(test "$runtime_major_version" -eq 10 && test "$runtime_minor_version" -lt 1 )
|
||||
then
|
||||
# CUDA toolkit v < 10.1: new solver not available
|
||||
cuda_fflags="$cuda_fflags \$(MOD_FLAG)\$(TOPDIR)/EIGENSOLVER_GPU/lib_eigsolve"
|
||||
cuda_extlibs="$cuda_extlibs eigensolver"
|
||||
cuda_libs="$cuda_libs \$(TOPDIR)/EIGENSOLVER_GPU/lib_eigsolve/lib_eigsolve.a"
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Using legacy custom solver." >&5
|
||||
$as_echo "$as_me: WARNING: Using legacy custom solver." >&2;}
|
||||
# CUDA toolkit v < 10.1: cusolver not available
|
||||
as_fn_error $? "Unsupported CUDA Toolkit, too old" "$LINENO" 5
|
||||
else
|
||||
try_dflags="$try_dflags -D__USE_CUSOLVER"
|
||||
fi
|
||||
# -----------------------------------------
|
||||
# C flags - not sure whether they are suitable for old version as well
|
||||
# C flags
|
||||
# -----------------------------------------
|
||||
cuda_cflags=" -I$with_cuda/include -gpu=cc$with_cuda_cc,cuda$with_cuda_runtime"
|
||||
cuda_cflags=" -I$with_cuda/include $mMcuda=cc$with_cuda_cc,cuda$with_cuda_runtime"
|
||||
ldflags="$ldflags $mMcuda=cc$with_cuda_cc,cuda$with_cuda_runtime"
|
||||
gpu_arch="$with_cuda_cc"
|
||||
cuda_runtime="$with_cuda_runtime"
|
||||
|
@ -4350,6 +4361,8 @@ $as_echo "$as_me: WARNING: Using legacy custom solver." >&2;}
|
|||
ldflags="$ldflags -acc"
|
||||
cuda_fflags="$cuda_fflags -acc"
|
||||
cuda_cflags="$cuda_cflags -acc"
|
||||
else
|
||||
as_fn_error $? "OpenACC must be enabled" "$LINENO" 5
|
||||
fi
|
||||
|
||||
fi
|
||||
|
|
|
@ -48,6 +48,12 @@ AC_ARG_WITH([cuda-runtime],
|
|||
[AS_HELP_STRING([--with-cuda-runtime=VAL],[CUDA runtime (Pascal: 8+, Volta: 9+) @<:@default=10.1@:>@])],
|
||||
[],
|
||||
[with_cuda_runtime=10.1])
|
||||
|
||||
AC_ARG_WITH([cuda-mpi],
|
||||
[AS_HELP_STRING([--with-cuda-mpi=VAL],[CUDA-aware MPI (yes|no) @<:@default=no@:>@])],
|
||||
[],
|
||||
[with_cuda_mpi=no])
|
||||
|
||||
|
||||
AC_ARG_ENABLE([openacc],
|
||||
[AS_HELP_STRING([--enable-openacc],[Enable compilation with OPENACC @<:@default=yes@:>@])],
|
||||
|
@ -81,6 +87,9 @@ then
|
|||
# Headers and libraries
|
||||
# -----------------------------------------
|
||||
try_dflags="$try_dflags -D__CUDA"
|
||||
if test "$use_parallel" -eq 1 && test "$with_cuda_mpi" == "yes"; then
|
||||
try_dflags="$try_dflags -D__GPU_MPI"
|
||||
fi
|
||||
cuda_extlibs="devxlib"
|
||||
cuda_libs="$mMcudalib=cufft,cublas,cusolver,curand \$(TOPDIR)/external/devxlib/src/libdevXlib.a"
|
||||
|
||||
|
@ -93,20 +102,17 @@ then
|
|||
runtime_major_version=`echo $with_cuda_runtime | cut -d. -f1`
|
||||
runtime_minor_version=`echo $with_cuda_runtime | cut -d. -f2`
|
||||
if test "$runtime_major_version" -lt 10 ||
|
||||
( "$runtime_major_version" -eq 10 && "$runtime_minor_version" -lt 1 )
|
||||
(test "$runtime_major_version" -eq 10 && test "$runtime_minor_version" -lt 1 )
|
||||
then
|
||||
# CUDA toolkit v < 10.1: new solver not available
|
||||
cuda_fflags="$cuda_fflags \$(MOD_FLAG)\$(TOPDIR)/EIGENSOLVER_GPU/lib_eigsolve"
|
||||
cuda_extlibs="$cuda_extlibs eigensolver"
|
||||
cuda_libs="$cuda_libs \$(TOPDIR)/EIGENSOLVER_GPU/lib_eigsolve/lib_eigsolve.a"
|
||||
AC_MSG_WARN([Using legacy custom solver.])
|
||||
# CUDA toolkit v < 10.1: cusolver not available
|
||||
AC_MSG_ERROR([Unsupported CUDA Toolkit, too old])
|
||||
else
|
||||
try_dflags="$try_dflags -D__USE_CUSOLVER"
|
||||
fi
|
||||
# -----------------------------------------
|
||||
# C flags - not sure whether they are suitable for old version as well
|
||||
# C flags
|
||||
# -----------------------------------------
|
||||
cuda_cflags=" -I$with_cuda/include -gpu=cc$with_cuda_cc,cuda$with_cuda_runtime"
|
||||
cuda_cflags=" -I$with_cuda/include $mMcuda=cc$with_cuda_cc,cuda$with_cuda_runtime"
|
||||
ldflags="$ldflags $mMcuda=cc$with_cuda_cc,cuda$with_cuda_runtime"
|
||||
gpu_arch="$with_cuda_cc"
|
||||
cuda_runtime="$with_cuda_runtime"
|
||||
|
@ -114,6 +120,8 @@ then
|
|||
ldflags="$ldflags -acc"
|
||||
cuda_fflags="$cuda_fflags -acc"
|
||||
cuda_cflags="$cuda_cflags -acc"
|
||||
else
|
||||
AC_MSG_ERROR([OpenACC must be enabled])
|
||||
fi
|
||||
|
||||
fi
|
||||
|
|
Loading…
Reference in New Issue