mirror of https://gitlab.com/QEF/q-e.git
Support for CUDA EIGENSOLVER removed
This commit is contained in:
parent
e821a717dc
commit
d262aca3cb
|
@ -223,11 +223,7 @@ SUBROUTINE laxlib_cdiaghg_gpu( n, m, h_d, s_d, ldh, e_d, v_d, me_bgrp, root_bgrp
|
|||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
!
|
||||
#if defined(__USE_CUSOLVER)
|
||||
USE cusolverdn
|
||||
#else
|
||||
USE zhegvdx_gpu
|
||||
#endif
|
||||
#endif
|
||||
!
|
||||
USE laxlib_parallel_include
|
||||
|
@ -296,7 +292,6 @@ SUBROUTINE laxlib_cdiaghg_gpu( n, m, h_d, s_d, ldh, e_d, v_d, me_bgrp, root_bgrp
|
|||
ATTRIBUTES( DEVICE ) :: work_d, rwork_d, h_diag_d, s_diag_d
|
||||
#endif
|
||||
INTEGER :: i, j
|
||||
#if defined( __USE_CUSOLVER )
|
||||
INTEGER :: devInfo_d, h_meig
|
||||
ATTRIBUTES( DEVICE ) :: devInfo_d
|
||||
TYPE(cusolverDnHandle), SAVE :: cuSolverHandle
|
||||
|
@ -304,7 +299,6 @@ SUBROUTINE laxlib_cdiaghg_gpu( n, m, h_d, s_d, ldh, e_d, v_d, me_bgrp, root_bgrp
|
|||
!
|
||||
COMPLEX(DP), VARTYPE :: h_bkp_d(:,:), s_bkp_d(:,:)
|
||||
ATTRIBUTES( DEVICE ) :: h_bkp_d, s_bkp_d
|
||||
#endif
|
||||
#undef VARTYPE
|
||||
!
|
||||
!
|
||||
|
@ -318,9 +312,7 @@ SUBROUTINE laxlib_cdiaghg_gpu( n, m, h_d, s_d, ldh, e_d, v_d, me_bgrp, root_bgrp
|
|||
!
|
||||
! Keeping compatibility for both CUSolver and CustomEigensolver, CUSolver below
|
||||
!
|
||||
#if defined(__USE_CUSOLVER) && defined(__CUDA)
|
||||
!
|
||||
! vvv __USE_CUSOLVER
|
||||
#if defined(__CUDA)
|
||||
|
||||
#if ! defined(__USE_GLOBAL_BUFFER)
|
||||
ALLOCATE(h_bkp_d(n,n), s_bkp_d(n,n), STAT = info)
|
||||
|
@ -387,96 +379,9 @@ SUBROUTINE laxlib_cdiaghg_gpu( n, m, h_d, s_d, ldh, e_d, v_d, me_bgrp, root_bgrp
|
|||
CALL dev%release_buffer( h_bkp_d, info )
|
||||
CALL dev%release_buffer( s_bkp_d, info )
|
||||
#endif
|
||||
! ^^^ __USE_CUSOLVER
|
||||
!
|
||||
! Keeping compatibility for both CUSolver and CustomEigensolver, CustomEigensolver below
|
||||
!
|
||||
#elif defined(__CUDA)
|
||||
! vvv not __USE_CUSOLVER
|
||||
#if ! defined(__USE_GLOBAL_BUFFER)
|
||||
! NB: dimension is different!
|
||||
ALLOCATE(v_h(ldh,n), e_h(n))
|
||||
ALLOCATE(h_diag_d(n) , s_diag_d(n))
|
||||
#else
|
||||
CALL pin%lock_buffer( v_h, (/ldh,n/), info )
|
||||
CALL pin%lock_buffer( e_h, n, info )
|
||||
!
|
||||
CALL dev%lock_buffer( h_diag_d, n, info )
|
||||
IF( info /= 0 ) CALL lax_error__( ' cdiaghg_gpu ', ' cannot allocate h_bkp_d ', ABS( info ) )
|
||||
CALL dev%lock_buffer( s_diag_d, n, info )
|
||||
IF( info /= 0 ) CALL lax_error__( ' cdiaghg_gpu ', ' cannot allocate s_bkp_d ', ABS( info ) )
|
||||
#endif
|
||||
!
|
||||
lwork = n
|
||||
lrwork = 1+5*n+2*n*n
|
||||
liwork = 3+5*n
|
||||
!
|
||||
lwork_d = 2*64*64 + 65 * n
|
||||
lrwork_d = n
|
||||
!
|
||||
#if ! defined(__USE_GLOBAL_BUFFER)
|
||||
ALLOCATE(work(lwork), rwork(lrwork), iwork(liwork))
|
||||
!
|
||||
ALLOCATE(work_d(1*lwork_d), STAT = info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' cdiaghg_gpu ', ' allocate work_d ', ABS( info ) )
|
||||
!
|
||||
ALLOCATE(rwork_d(1*lrwork_d), STAT = info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' cdiaghg_gpu ', ' allocate rwork_d ', ABS( info ) )
|
||||
#else
|
||||
CALL pin%lock_buffer(work, lwork, info)
|
||||
CALL pin%lock_buffer(rwork, lrwork, info)
|
||||
CALL pin%lock_buffer(iwork, liwork, info)
|
||||
CALL dev%lock_buffer( work_d, lwork_d, info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' cdiaghg_gpu ', ' cannot allocate work_d ', ABS( info ) )
|
||||
CALL dev%lock_buffer( rwork_d, lrwork_d, info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' cdiaghg_gpu ', ' cannot allocate rwork_d ', ABS( info ) )
|
||||
#endif
|
||||
!
|
||||
!$cuf kernel do(1) <<<*,*>>>
|
||||
DO i = 1, n
|
||||
h_diag_d(i) = DBLE( h_d(i,i) )
|
||||
s_diag_d(i) = DBLE( s_d(i,i) )
|
||||
END DO
|
||||
CALL zhegvdx_gpu(n, h_d, ldh, s_d, ldh, v_d, ldh, 1, m, e_d, work_d,&
|
||||
lwork_d, rwork_d, lrwork_d, &
|
||||
work, lwork, rwork, lrwork, &
|
||||
iwork, liwork, v_h, SIZE(v_h, 1), e_h, info, .TRUE.)
|
||||
!
|
||||
|
||||
IF( info /= 0 ) CALL lax_error__( ' cdiaghg_gpu ', ' zhegvdx_gpu failed ', ABS( info ) )
|
||||
!
|
||||
!$cuf kernel do(1) <<<*,*>>>
|
||||
DO i = 1, n
|
||||
h_d(i,i) = DCMPLX( h_diag_d(i), 0.0_DP)
|
||||
s_d(i,i) = DCMPLX( s_diag_d(i), 0.0_DP)
|
||||
DO j = i + 1, n
|
||||
h_d(i,j) = DCONJG( h_d(j,i) )
|
||||
s_d(i,j) = DCONJG( s_d(j,i) )
|
||||
END DO
|
||||
DO j = n + 1, ldh
|
||||
h_d(j,i) = ( 0.0_DP, 0.0_DP )
|
||||
s_d(j,i) = ( 0.0_DP, 0.0_DP )
|
||||
END DO
|
||||
END DO
|
||||
#if ! defined(__USE_GLOBAL_BUFFER)
|
||||
DEALLOCATE(h_diag_d, s_diag_d)
|
||||
!
|
||||
DEALLOCATE(work, rwork, iwork)
|
||||
DEALLOCATE(work_d, rwork_d)
|
||||
DEALLOCATE(v_h, e_h)
|
||||
#else
|
||||
CALL dev%release_buffer( h_diag_d, info )
|
||||
CALL dev%release_buffer( s_diag_d, info)
|
||||
!
|
||||
CALL pin%release_buffer(work, info)
|
||||
CALL pin%release_buffer(rwork, info)
|
||||
CALL pin%release_buffer(iwork, info)
|
||||
CALL dev%release_buffer( work_d, info)
|
||||
CALL dev%release_buffer( rwork_d, info)
|
||||
CALL pin%release_buffer(v_h, info)
|
||||
CALL pin%release_buffer(e_h, info)
|
||||
#endif
|
||||
! ^^^ not __USE_CUSOLVER
|
||||
#else
|
||||
CALL lax_error__( 'cdiaghg', 'Called GPU eigensolver without GPU support', 1 )
|
||||
#endif
|
||||
|
|
|
@ -574,13 +574,7 @@ END SUBROUTINE laxlib_multi_init_desc_x
|
|||
SUBROUTINE diagonalize_serial_gpu( m, rhos, rhod, s, info )
|
||||
#if defined(__CUDA)
|
||||
use cudafor
|
||||
#if defined ( __USE_CUSOLVER )
|
||||
USE cusolverDn
|
||||
#else
|
||||
use eigsolve_vars
|
||||
use nvtx_inters
|
||||
use dsyevd_gpu
|
||||
#endif
|
||||
IMPLICIT NONE
|
||||
include 'laxlib_kinds.fh'
|
||||
INTEGER, INTENT(IN) :: m
|
||||
|
@ -592,28 +586,13 @@ END SUBROUTINE laxlib_multi_init_desc_x
|
|||
INTEGER :: lwork_d
|
||||
INTEGER :: i, j, lda
|
||||
!
|
||||
#if defined (__USE_CUSOLVER)
|
||||
!
|
||||
INTEGER, DEVICE :: devInfo
|
||||
TYPE(cusolverDnHandle) :: cuSolverHandle
|
||||
REAL(DP), ALLOCATABLE, DEVICE :: work_d(:)
|
||||
!
|
||||
#else
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: work_d(:), a(:,:)
|
||||
ATTRIBUTES( DEVICE ) :: work_d, a
|
||||
REAL(DP), ALLOCATABLE :: b(:,:)
|
||||
REAL(DP), ALLOCATABLE :: work_h(:), w_h(:), z_h(:,:)
|
||||
ATTRIBUTES( PINNED ) :: work_h, w_h, z_h
|
||||
INTEGER, ALLOCATABLE :: iwork_h(:)
|
||||
ATTRIBUTES( PINNED ) :: iwork_h
|
||||
!
|
||||
INTEGER :: lwork_h, liwork_h
|
||||
!
|
||||
#endif
|
||||
! .... Subroutine Body
|
||||
!
|
||||
#if defined (__USE_CUSOLVER)
|
||||
!
|
||||
s = rhos
|
||||
lda = SIZE( rhos, 1 )
|
||||
|
@ -641,49 +620,6 @@ END SUBROUTINE laxlib_multi_init_desc_x
|
|||
DEALLOCATE( work_d )
|
||||
|
||||
!
|
||||
#else
|
||||
!
|
||||
info = 0
|
||||
lwork_d = 2*64*64 + 66*SIZE(rhos,1)
|
||||
lwork_h = 1 + 6*SIZE(rhos,1) + 2*SIZE(rhos,1)*SIZE(rhos,1)
|
||||
liwork_h = 3 + 5*SIZE(rhos,1)
|
||||
ALLOCATE(work_d(lwork_d),STAT = info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' laxlib diagonalize_serial_gpu ', ' allocate work_d ', ABS( info ) )
|
||||
ALLOCATE(a(SIZE(rhos,1),SIZE(rhos,2)),STAT = info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' laxlib diagonalize_serial_gpu ', ' allocate a ', ABS( info ) )
|
||||
ALLOCATE(work_h(lwork_h),STAT = info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' laxlib diagonalize_serial_gpu ', ' allocate work_h ', ABS( info ) )
|
||||
ALLOCATE(iwork_h(liwork_h),STAT = info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' laxlib diagonalize_serial_gpu ', ' allocate iwork_h ', ABS( info ) )
|
||||
!
|
||||
ALLOCATE(w_h(SIZE(rhod)),STAT = info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' laxlib diagonalize_serial_gpu ', ' allocate w_h ', ABS( info ) )
|
||||
ALLOCATE(z_h(SIZE(s,1),SIZE(s,2)),STAT = info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' laxlib diagonalize_serial_gpu ', ' allocate z_h ', ABS( info ) )
|
||||
|
||||
if(initialized == 0) call init_eigsolve_gpu
|
||||
|
||||
info = cudaMemcpy(a, rhos, SIZE(rhos,1)*SIZE(rhos,2), cudaMemcpyDeviceToDevice)
|
||||
lda = SIZE(rhos,1)
|
||||
!$cuf kernel do(2) <<<*,*>>>
|
||||
do j = 1,m
|
||||
do i = 1,m
|
||||
if (i > j) then
|
||||
s(i,j) = a(i,j)
|
||||
endif
|
||||
end do
|
||||
end do
|
||||
|
||||
call dsyevd_gpu('V', 'U', 1, m, m, a, lda, s, lda, rhod, work_d, lwork_d, &
|
||||
work_h, lwork_h, iwork_h, liwork_h, z_h, lda, w_h, info)
|
||||
|
||||
DEALLOCATE(z_h)
|
||||
DEALLOCATE(w_h)
|
||||
DEALLOCATE(iwork_h)
|
||||
DEALLOCATE(work_h)
|
||||
DEALLOCATE(a)
|
||||
DEALLOCATE(work_d)
|
||||
#endif
|
||||
#else
|
||||
IMPLICIT NONE
|
||||
include 'laxlib_kinds.fh'
|
||||
|
|
|
@ -203,11 +203,7 @@ SUBROUTINE laxlib_rdiaghg_gpu( n, m, h_d, s_d, ldh, e_d, v_d, me_bgrp, root_bgrp
|
|||
USE laxlib_parallel_include
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
#if defined(__USE_CUSOLVER)
|
||||
USE cusolverdn
|
||||
#else
|
||||
USE dsygvdx_gpu
|
||||
#endif
|
||||
#endif
|
||||
!
|
||||
! NB: the flag below can be used to decouple LAXlib from devXlib.
|
||||
|
@ -276,14 +272,12 @@ SUBROUTINE laxlib_rdiaghg_gpu( n, m, h_d, s_d, ldh, e_d, v_d, me_bgrp, root_bgrp
|
|||
ATTRIBUTES( DEVICE ) :: h_diag_d, s_diag_d
|
||||
#endif
|
||||
!
|
||||
#if defined(__USE_CUSOLVER)
|
||||
INTEGER :: devInfo_d, h_meig
|
||||
ATTRIBUTES( DEVICE ) :: devInfo_d
|
||||
TYPE(cusolverDnHandle), SAVE :: cuSolverHandle
|
||||
LOGICAL, SAVE :: cuSolverInitialized = .FALSE.
|
||||
REAL(DP), VARTYPE :: h_bkp_d(:,:), s_bkp_d(:,:)
|
||||
ATTRIBUTES( DEVICE ) :: h_bkp_d, s_bkp_d
|
||||
#endif
|
||||
#undef VARTYPE
|
||||
!
|
||||
CALL start_clock_gpu( 'rdiaghg' )
|
||||
|
@ -292,61 +286,8 @@ SUBROUTINE laxlib_rdiaghg_gpu( n, m, h_d, s_d, ldh, e_d, v_d, me_bgrp, root_bgrp
|
|||
!
|
||||
IF ( me_bgrp == root_bgrp ) THEN
|
||||
!
|
||||
#if (!defined(__USE_CUSOLVER)) && defined(__CUDA)
|
||||
ALLOCATE(e_h(n), v_h(ldh,n))
|
||||
#if defined(__CUDA)
|
||||
!
|
||||
ALLOCATE(h_diag_d(n), s_diag_d(n))
|
||||
!$cuf kernel do(1) <<<*,*>>>
|
||||
DO i = 1, n
|
||||
h_diag_d(i) = DBLE( h_d(i,i) )
|
||||
s_diag_d(i) = DBLE( s_d(i,i) )
|
||||
END DO
|
||||
!
|
||||
lwork = 1 + 6*n + 2*n*n
|
||||
liwork = 3 + 5*n
|
||||
ALLOCATE(work(lwork), iwork(liwork))
|
||||
!
|
||||
lwork_d = 2*64*64 + 66*n
|
||||
#if ! defined(__USE_GLOBAL_BUFFER)
|
||||
ALLOCATE(work_d(1*lwork_d), STAT = info)
|
||||
#else
|
||||
CALL dev%lock_buffer( work_d, lwork_d, info )
|
||||
IF( info /= 0 ) CALL lax_error__( ' rdiaghg_gpu ', ' cannot allocate work_d ', ABS( info ) )
|
||||
#endif
|
||||
IF( info /= 0 ) CALL lax_error__( ' rdiaghg_gpu ', ' allocate work_d ', ABS( info ) )
|
||||
!
|
||||
CALL dsygvdx_gpu(n, h_d, ldh, s_d, ldh, v_d, ldh, 1, m, e_d, work_d, &
|
||||
lwork_d, work, lwork, iwork, liwork, v_h, size(v_h, 1), &
|
||||
e_h, info, .TRUE.)
|
||||
!
|
||||
IF( info /= 0 ) CALL lax_error__( ' rdiaghg_gpu ', ' dsygvdx_gpu failed ', ABS( info ) )
|
||||
!
|
||||
!$cuf kernel do(1) <<<*,*>>>
|
||||
DO i = 1, n
|
||||
h_d(i,i) = h_diag_d(i)
|
||||
s_d(i,i) = s_diag_d(i)
|
||||
DO j = i + 1, n
|
||||
h_d(i,j) = h_d(j,i)
|
||||
s_d(i,j) = s_d(j,i)
|
||||
END DO
|
||||
! This could be avoided, need to check dsygvdx_gpu implementation
|
||||
DO j = n + 1, ldh
|
||||
h_d(j,i) = 0.0_DP
|
||||
s_d(j,i) = 0.0_DP
|
||||
END DO
|
||||
END DO
|
||||
DEALLOCATE(h_diag_d,s_diag_d)
|
||||
!
|
||||
DEALLOCATE(work, iwork)
|
||||
#if ! defined(__USE_GLOBAL_BUFFER)
|
||||
DEALLOCATE(work_d)
|
||||
#else
|
||||
CALL dev%release_buffer( work_d, info )
|
||||
#endif
|
||||
!
|
||||
DEALLOCATE(v_h, e_h)
|
||||
#elif defined(__USE_CUSOLVER) && defined(__CUDA)
|
||||
! vvv __USE_CUSOLVER
|
||||
#if ! defined(__USE_GLOBAL_BUFFER)
|
||||
ALLOCATE(h_bkp_d(n,n), s_bkp_d(n,n), STAT = info)
|
||||
IF( info /= 0 ) CALL lax_error__( ' rdiaghg_gpu ', ' cannot allocate h_bkp_d or s_bkp_d ', ABS( info ) )
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
include ../../make.inc
|
||||
|
||||
# Stripped version of F90FLAGS to remove GPU details added explicitly below.
|
||||
F90FLAGSS := $(filter-out $(CUDA_F90FLAGS),$(F90FLAGS))
|
||||
|
||||
FLAGS = -O3 -pgf90libs -Mcuda=cc$(GPU_ARCH),cuda$(CUDA_RUNTIME),ptxinfo $(F90FLAGSS)
|
||||
FLAGS2 = -O3 -pgf90libs -Mcuda=cc$(GPU_ARCH),cuda$(CUDA_RUNTIME),ptxinfo,maxregcount:64 $(F90FLAGSS)
|
||||
|
||||
# For performance reasons, cc of FLAGS3 must be <= 60
|
||||
define MIN
|
||||
$(firstword $(sort ${1} ${2}))
|
||||
endef
|
||||
FLAGS3 = -O3 -pgf90libs -Mcuda=cc$(call MIN,${GPU_ARCH},60),cuda$(CUDA_RUNTIME),ptxinfo,nordc,maxregcount:255 $(F90FLAGSS)
|
||||
|
||||
# Uncomment to enable NVTX markers
|
||||
#OPTFLAGS = -DUSE_NVTX
|
||||
|
||||
all: lib_eigsolve.a
|
||||
|
||||
OBJS = cusolverDn_m.o eigsolve_vars.o toolbox.o zhegst_gpu.o zhemv_gpu.o zhetd2_gpu.o zhetrd_gpu.o zheevd_gpu.o zhegvdx_gpu.o \
|
||||
dsygst_gpu.o dsymv_gpu.o dsytd2_gpu.o dsytrd_gpu.o dsyevd_gpu.o dsygvdx_gpu.o
|
||||
|
||||
zhetd2_gpu.o : zhetd2_gpu.F90
|
||||
pgf90 -c ${FLAGS2} ${OPTFLAGS} $*.F90 -o $*.o
|
||||
zhemv_gpu.o : zhemv_gpu.F90
|
||||
pgf90 -c ${FLAGS3} ${OPTFLAGS} $*.F90 -o $*.o
|
||||
dsytd2_gpu.o : dsytd2_gpu.F90
|
||||
pgf90 -c ${FLAGS2} ${OPTFLAGS} $*.F90 -o $*.o
|
||||
dsymv_gpu.o : dsymv_gpu.F90
|
||||
pgf90 -c ${FLAGS3} ${OPTFLAGS} $*.F90 -o $*.o
|
||||
%.o: %.cuf
|
||||
pgf90 -c ${FLAGS} ${OPTFLAGS} $*.cuf -o $*.o
|
||||
%.o: %.F90
|
||||
pgf90 -c ${FLAGS} ${OPTFLAGS} $*.F90 -o $*.o
|
||||
|
||||
lib_eigsolve.a: $(OBJS)
|
||||
ar -cr lib_eigsolve.a $(OBJS)
|
||||
|
||||
clean:
|
||||
rm -f lib_eigsolve.a *.mod *.o
|
||||
|
||||
# Dependencies
|
||||
dsyevd_gpu.o : dsytrd_gpu.o
|
||||
dsyevd_gpu.o : eigsolve_vars.o
|
||||
dsyevd_gpu.o : toolbox.o
|
||||
dsygst_gpu.o : eigsolve_vars.o
|
||||
dsygvdx_gpu.o : dsyevd_gpu.o
|
||||
dsygvdx_gpu.o : dsygst_gpu.o
|
||||
dsygvdx_gpu.o : eigsolve_vars.o
|
||||
dsygvdx_gpu.o : toolbox.o
|
||||
dsytrd_gpu.o : dsymv_gpu.o
|
||||
dsytrd_gpu.o : dsytd2_gpu.o
|
||||
dsytrd_gpu.o : eigsolve_vars.o
|
||||
eigsolve_vars.o : cusolverDn_m.o
|
||||
zheevd_gpu.o : eigsolve_vars.o
|
||||
zheevd_gpu.o : toolbox.o
|
||||
zheevd_gpu.o : zhetrd_gpu.o
|
||||
zhegst_gpu.o : eigsolve_vars.o
|
||||
zhegvdx_gpu.o : eigsolve_vars.o
|
||||
zhegvdx_gpu.o : toolbox.o
|
||||
zhegvdx_gpu.o : zheevd_gpu.o
|
||||
zhegvdx_gpu.o : zhegst_gpu.o
|
||||
zhetrd_gpu.o : eigsolve_vars.o
|
||||
zhetrd_gpu.o : zhemv_gpu.o
|
||||
zhetrd_gpu.o : zhetd2_gpu.o
|
|
@ -11,12 +11,6 @@
|
|||
include ../make.inc
|
||||
include install_utils
|
||||
|
||||
# For NVIDIA Eigensolver
|
||||
EIGENSOLVER_VERSION=0.3.1
|
||||
EIGENSOLVER_GPU=v${EIGENSOLVER_VERSION}.tar.gz
|
||||
EIGENSOLVER_GPU_NAME=Eigensolver_gpu-${EIGENSOLVER_VERSION}
|
||||
EIGENSOLVER_GPU_URL=https://github.com/NVIDIA/Eigensolver_gpu/archive/${EIGENSOLVER_GPU}
|
||||
|
||||
# MAIN target
|
||||
|
||||
all: libcuda
|
||||
|
@ -72,18 +66,6 @@ fox_clean:
|
|||
|
||||
libcuda : $(addprefix libcuda_,$(CUDA_EXTLIBS))
|
||||
|
||||
libcuda_eigensolver :
|
||||
$(call download_and_unpack,$(EIGENSOLVER_GPU_NAME),$(EIGENSOLVER_GPU_URL),EIGENSOLVER_GPU,EIGENSOLVER_GPU)
|
||||
if test ! -e ../EIGENSOLVER_GPU/lib_eigsolve/lib_eigsolve.a ; then \
|
||||
(cp Makefile.lib_eigsolve ../EIGENSOLVER_GPU/lib_eigsolve/Makefile; \
|
||||
cd ../EIGENSOLVER_GPU/lib_eigsolve/; $(MAKE) ); else \
|
||||
(echo "No configuration file found for GPU custom eigensolver"; exit); fi
|
||||
|
||||
libcuda_eigensolver_clean:
|
||||
if test -d ../EIGENSOLVER_GPU; then (cd ../EIGENSOLVER_GPU/lib_eigsolve ; $(MAKE) clean); fi
|
||||
libcuda_eigensolver_veryclean:
|
||||
if test -d ../EIGENSOLVER_GPU; then (rm -R -f ../EIGENSOLVER_GPU ../${EIGENSOLVER_GPU_NAME}); fi
|
||||
|
||||
CUDA_PATH := $(if $(GPU_ARCH),$(CUDA_PATH),no)
|
||||
libcuda_devxlib :
|
||||
$(call update_submodule,external,devxlib)
|
||||
|
@ -136,6 +118,6 @@ libmbd_distclean:
|
|||
# cleaning
|
||||
###################################
|
||||
|
||||
clean: lapack_clean fox_clean libcuda_eigensolver_clean libcuda_devxlib_clean libmbd_clean
|
||||
clean: lapack_clean fox_clean libcuda_devxlib_clean libmbd_clean
|
||||
|
||||
veryclean: fox_clean libcuda_eigensolver_veryclean libcuda_devxlib_veryclean libmbd_distclean
|
||||
veryclean: fox_clean libcuda_devxlib_veryclean libmbd_distclean
|
||||
|
|
Loading…
Reference in New Issue