mirror of https://gitlab.com/QEF/q-e.git
More Vloc*psi cleanup
Variable "vrs" used in the Hamiltonian is now an ACC variable, replaces vrs_d. vrs is copied to device in set_vrs. Obsolete using_vrs* machinery deleted.
This commit is contained in:
parent
7bdae85022
commit
457c7d55ef
|
@ -245,7 +245,6 @@ set(src_pw
|
|||
src/wannier_occ.f90
|
||||
src/d3hess_mod.f90
|
||||
# GPU
|
||||
src/scf_mod_gpu.f90
|
||||
src/g_psi_mod_gpu.f90
|
||||
src/newd_gpu.f90
|
||||
src/add_paw_to_deeq_gpu.f90
|
||||
|
|
|
@ -278,7 +278,6 @@ PWLIBS += \
|
|||
h_psi_gpu.o \
|
||||
vhpsi_gpu.o \
|
||||
vloc_psi_gpu.o \
|
||||
scf_mod_gpu.o \
|
||||
usnldiag_gpu.o \
|
||||
add_vuspsi_gpu.o \
|
||||
newd_gpu.o \
|
||||
|
|
|
@ -27,8 +27,6 @@ SUBROUTINE allocate_fft
|
|||
USE wavefunctions, ONLY : psic, psic_nc
|
||||
USE xc_lib, ONLY : xclib_dft_is
|
||||
!
|
||||
USE scf_gpum, ONLY : using_vrs
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
! ... First a bunch of checks
|
||||
|
@ -68,9 +66,7 @@ SUBROUTINE allocate_fft
|
|||
ALLOCATE( rhog_core(ngm) )
|
||||
ALLOCATE( psic(dfftp%nnr) )
|
||||
ALLOCATE( vrs(dfftp%nnr,nspin) )
|
||||
#if defined(__CUDA)
|
||||
CALL using_vrs(2)
|
||||
#endif
|
||||
!$acc enter data create (vrs)
|
||||
!
|
||||
IF (noncolin) ALLOCATE( psic_nc(dfftp%nnr,npol) )
|
||||
!
|
||||
|
|
|
@ -142,8 +142,8 @@ SUBROUTINE clean_pw( lflag )
|
|||
IF ( ALLOCATED( rhog_core ) ) DEALLOCATE( rhog_core )
|
||||
IF ( ALLOCATED( psic ) ) DEALLOCATE( psic )
|
||||
IF ( ALLOCATED( psic_nc ) ) DEALLOCATE( psic_nc )
|
||||
!$acc exit data delete(vrs)
|
||||
IF ( ALLOCATED( vrs ) ) DEALLOCATE( vrs )
|
||||
CALL deallocate_scf_gpu()
|
||||
!
|
||||
! ... arrays allocated in allocate_locpot.f90 ( and never deallocated )
|
||||
!
|
||||
|
|
|
@ -47,8 +47,6 @@ SUBROUTINE electrons()
|
|||
USE loc_scdm, ONLY : use_scdm, localize_orbitals
|
||||
USE loc_scdm_k, ONLY : localize_orbitals_k
|
||||
!
|
||||
USE scf_gpum, ONLY : using_vrs
|
||||
!
|
||||
USE add_dmft_occ, ONLY : dmft
|
||||
USE rism_module, ONLY : lrism, rism_calc3d
|
||||
USE makovpayne, ONLY : makov_payne
|
||||
|
@ -145,7 +143,6 @@ SUBROUTINE electrons()
|
|||
ehart, etxc, vtxc, eth, etotefield, charge, v)
|
||||
IF (lrism) CALL rism_calc3d(rho%of_g(:, 1), esol, vsol, v%of_r, tr2)
|
||||
IF (okpaw) CALL PAW_potential(rho%bec, ddd_PAW, epaw,etot_cmp_paw)
|
||||
CALL using_vrs(1)
|
||||
CALL set_vrs( vrs, vltot, v%of_r, kedtau, v%kin_r, dfftp%nnr, &
|
||||
nspin, doublegrid )
|
||||
!
|
||||
|
@ -242,7 +239,6 @@ SUBROUTINE electrons()
|
|||
IF (lrism) CALL rism_calc3d(rho%of_g(:, 1), esol, vsol, v%of_r, tr2)
|
||||
!
|
||||
IF (okpaw) CALL PAW_potential(rho%bec, ddd_PAW, epaw,etot_cmp_paw)
|
||||
CALL using_vrs(1)
|
||||
CALL set_vrs( vrs, vltot, v%of_r, kedtau, v%kin_r, dfftp%nnr, &
|
||||
nspin, doublegrid )
|
||||
!
|
||||
|
@ -462,7 +458,6 @@ SUBROUTINE electrons_scf ( printout, exxen )
|
|||
USE libmbd_interface, ONLY : EmbdvdW
|
||||
USE add_dmft_occ, ONLY : dmft, dmft_update, v_dmft, dmft_updated
|
||||
!
|
||||
USE scf_gpum, ONLY : using_vrs
|
||||
USE device_fbuff_m, ONLY : dev_buf, pin_buf
|
||||
USE pwcom, ONLY : report_mag
|
||||
USE makovpayne, ONLY : makov_payne
|
||||
|
@ -987,13 +982,8 @@ SUBROUTINE electrons_scf ( printout, exxen )
|
|||
!
|
||||
! ... define the total local potential (external + scf)
|
||||
!
|
||||
CALL using_vrs(1)
|
||||
CALL sum_vrs( dfftp%nnr, nspin, vltot, v%of_r, vrs )
|
||||
!
|
||||
! ... interpolate the total local potential
|
||||
!
|
||||
CALL using_vrs(1) ! redundant
|
||||
CALL interpolate_vrs( dfftp%nnr, nspin, doublegrid, kedtau, v%kin_r, vrs )
|
||||
CALL set_vrs( vrs, vltot, v%of_r, kedtau, v%kin_r, dfftp%nnr, &
|
||||
nspin, doublegrid )
|
||||
!
|
||||
! ... in the US case we have to recompute the self-consistent
|
||||
! ... term in the nonlocal potential
|
||||
|
|
|
@ -19,7 +19,6 @@ SUBROUTINE h_epsi_her_apply( lda, n, nbande, psi, hpsi, pdir, e_field )
|
|||
USE noncollin_module, ONLY : noncolin, npol, lspinorb
|
||||
USE wvfct, ONLY : npwx, nbnd, ik => current_k
|
||||
USE lsda_mod, ONLY : current_spin, nspin
|
||||
USE scf, ONLY : vrs
|
||||
USE gvect
|
||||
USE uspp, ONLY : okvan, nkb, vkb, qq_so, qq_at
|
||||
USE uspp_param, ONLY : nh, nhm, nbetam
|
||||
|
|
|
@ -19,7 +19,6 @@ SUBROUTINE h_epsi_her_set( pdir, e_field )
|
|||
USE noncollin_module, ONLY: noncolin, npol, lspinorb
|
||||
USE wvfct, ONLY: npwx, nbnd
|
||||
USE lsda_mod, ONLY: current_spin, nspin
|
||||
USE scf, ONLY: vrs
|
||||
USE gvect
|
||||
USE fft_base, ONLY: dfftp
|
||||
USE uspp, ONLY: okvan, nkb, vkb
|
||||
|
|
|
@ -106,7 +106,6 @@ SUBROUTINE h_psi_( lda, n, m, psi, hpsi )
|
|||
USE sci_mod, ONLY: p_psi
|
||||
USE fft_helper_subroutines
|
||||
!
|
||||
USE scf_gpum, ONLY: using_vrs
|
||||
#if defined(__OSCDFT)
|
||||
USE plugin_flags, ONLY : use_oscdft
|
||||
USE oscdft_base, ONLY : oscdft_ctx
|
||||
|
@ -133,10 +132,6 @@ SUBROUTINE h_psi_( lda, n, m, psi, hpsi )
|
|||
!
|
||||
!
|
||||
CALL start_clock( 'h_psi' ); !write (*,*) 'start h_psi';FLUSH(6)
|
||||
|
||||
CALL using_vrs(0) ! vloc_psi_gamma (intent:in)
|
||||
|
||||
|
||||
!
|
||||
! ... Here we set the kinetic energy (k+G)^2 psi and clean up garbage
|
||||
!
|
||||
|
|
|
@ -97,7 +97,6 @@ SUBROUTINE h_psi__gpu( lda, n, m, psi_d, hpsi_d )
|
|||
USE bp, ONLY: lelfield, l3dstring, gdir, efield, efield_cry
|
||||
USE becmod, ONLY: bec_type, becp
|
||||
USE lsda_mod, ONLY: current_spin
|
||||
USE scf_gpum, ONLY: vrs_d, using_vrs_d
|
||||
USE uspp, ONLY: nkb, vkb
|
||||
USE ldaU, ONLY: lda_plus_u, lda_plus_u_kind, Hubbard_projectors
|
||||
USE gvect, ONLY: gstart
|
||||
|
@ -113,6 +112,7 @@ SUBROUTINE h_psi__gpu( lda, n, m, psi_d, hpsi_d )
|
|||
USE fft_helper_subroutines
|
||||
USE device_memcpy_m, ONLY: dev_memcpy, dev_memset
|
||||
!
|
||||
USE scf, ONLY: vrs
|
||||
USE wvfct, ONLY: g2kin
|
||||
#if defined(__OSCDFT)
|
||||
USE plugin_flags, ONLY : use_oscdft
|
||||
|
@ -140,7 +140,6 @@ SUBROUTINE h_psi__gpu( lda, n, m, psi_d, hpsi_d )
|
|||
LOGICAL :: need_host_copy
|
||||
!
|
||||
CALL start_clock_gpu( 'h_psi' ); !write (*,*) 'start h_psi';FLUSH(6)
|
||||
CALL using_vrs_d(0)
|
||||
!
|
||||
! ... Here we add the kinetic energy (k+G)^2 psi and clean up garbage
|
||||
!
|
||||
|
@ -208,13 +207,13 @@ SUBROUTINE h_psi__gpu( lda, n, m, psi_d, hpsi_d )
|
|||
!
|
||||
ELSE
|
||||
! ... usual reciprocal-space algorithm
|
||||
CALL vloc_psi_gamma_gpu ( lda, n, m, psi_d, vrs_d(1,current_spin), hpsi_d )
|
||||
CALL vloc_psi_gamma_gpu ( lda, n, m, psi_d, vrs(1,current_spin), hpsi_d )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
ELSE IF ( noncolin ) THEN
|
||||
!
|
||||
CALL vloc_psi_nc_gpu ( lda, n, m, psi_d, vrs_d, hpsi_d )
|
||||
CALL vloc_psi_nc_gpu ( lda, n, m, psi_d, vrs, hpsi_d )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
|
@ -246,7 +245,7 @@ SUBROUTINE h_psi__gpu( lda, n, m, psi_d, hpsi_d )
|
|||
!
|
||||
ELSE
|
||||
!
|
||||
CALL vloc_psi_k_gpu ( lda, n, m, psi_d, vrs_d(1,current_spin), hpsi_d )
|
||||
CALL vloc_psi_k_gpu ( lda, n, m, psi_d, vrs(1,current_spin), hpsi_d )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
|
|
|
@ -35,7 +35,6 @@ SUBROUTINE hinit1()
|
|||
USE dfunct_gpum, ONLY : newd_gpu
|
||||
USE exx_base, ONLY : coulomb_fac, coulomb_done
|
||||
!
|
||||
USE scf_gpum, ONLY : using_vrs
|
||||
USE ener, ONLY : esol, vsol
|
||||
USE rism_module, ONLY : lrism, rism_update_pos, rism_calc3d
|
||||
!
|
||||
|
@ -113,7 +112,6 @@ SUBROUTINE hinit1()
|
|||
!
|
||||
! ... define the total local potential (external+scf)
|
||||
!
|
||||
CALL using_vrs(1)
|
||||
CALL set_vrs( vrs, vltot, v%of_r, kedtau, v%kin_r, dfftp%nnr, nspin, &
|
||||
doublegrid )
|
||||
!
|
||||
|
|
|
@ -47,7 +47,6 @@ SUBROUTINE orbm_kubo()
|
|||
USE gvecs, ONLY : doublegrid
|
||||
USE mp_pools, ONLY : intra_pool_comm
|
||||
USE mp_world, ONLY : world_comm
|
||||
USE scf_gpum, ONLY : using_vrs
|
||||
USE uspp_init, ONLY : init_us_2
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
@ -112,7 +111,6 @@ SUBROUTINE orbm_kubo()
|
|||
ALLOCATE( H_evc(npol*npwx,nbnd) )
|
||||
ALLOCATE( temp(ngm) )
|
||||
!
|
||||
CALL using_vrs(1)
|
||||
CALL set_vrs( vrs, vltot, v%of_r, kedtau, v%kin_r, dfftp%nnr, nspin, doublegrid )
|
||||
CALL allocate_bec_type( nkb, nbnd, becp )
|
||||
! Initializations
|
||||
|
|
|
@ -55,7 +55,6 @@ SUBROUTINE potinit()
|
|||
USE paw_init, ONLY : PAW_atomic_becsum
|
||||
USE paw_onecenter, ONLY : PAW_potential
|
||||
!
|
||||
USE scf_gpum, ONLY : using_vrs
|
||||
USE pwcom, ONLY : report_mag
|
||||
USE rism_module, ONLY : lrism, rism_init3d, rism_calc3d
|
||||
!
|
||||
|
@ -278,9 +277,7 @@ SUBROUTINE potinit()
|
|||
!
|
||||
! ... define the total local potential (external+scf)
|
||||
!
|
||||
CALL using_vrs(1)
|
||||
CALL set_vrs( vrs, vltot, v%of_r, kedtau, v%kin_r, dfftp%nnr, nspin, doublegrid )
|
||||
!
|
||||
! ... write on output the parameters used in the DFT+U(+V) calculation
|
||||
!
|
||||
IF ( lda_plus_u ) THEN
|
||||
|
|
|
@ -2613,7 +2613,6 @@ MODULE realus
|
|||
USE fft_base, ONLY : dffts
|
||||
USE scf, ONLY : vrs
|
||||
USE lsda_mod, ONLY : current_spin
|
||||
USE scf_gpum, ONLY : using_vrs
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -2629,8 +2628,6 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 'v_loc_psir' )
|
||||
|
||||
CALL using_vrs(0) ! tg_gather (intent: in)
|
||||
|
||||
IF( dffts%has_task_groups ) THEN
|
||||
IF (ibnd == 1 ) THEN
|
||||
CALL tg_gather( dffts, vrs(:,current_spin), tg_v )
|
||||
|
@ -2672,7 +2669,6 @@ MODULE realus
|
|||
USE fft_base, ONLY : dffts
|
||||
USE scf, ONLY : vrs
|
||||
USE lsda_mod, ONLY : current_spin
|
||||
USE scf_gpum, ONLY : using_vrs
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -2688,8 +2684,6 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 'v_loc_psir' )
|
||||
|
||||
CALL using_vrs(0) ! tg_gather (intent: in)
|
||||
|
||||
IF( dffts%has_task_groups ) THEN
|
||||
IF (ibnd == 1 ) THEN
|
||||
CALL tg_gather( dffts, vrs(:,current_spin), tg_v )
|
||||
|
|
|
@ -98,9 +98,6 @@ MODULE scf
|
|||
REAL(DP), ALLOCATABLE :: vltot(:)
|
||||
!! the local potential in real space
|
||||
REAL(DP), ALLOCATABLE :: vrs(:,:)
|
||||
#if defined(__CUDA)
|
||||
attributes(pinned) :: vrs
|
||||
#endif
|
||||
!! the total pot. in real space (smooth grid)
|
||||
REAL(DP), ALLOCATABLE :: rho_core(:)
|
||||
!! the core charge in real space
|
||||
|
|
|
@ -1,124 +0,0 @@
|
|||
!
|
||||
! Copyright (C) 2002-2011 Quantum ESPRESSO group
|
||||
! This file is distributed under the terms of the
|
||||
! GNU General Public License. See the file `License'
|
||||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
#define DIMS1D(arr) lbound(arr,1):ubound(arr,1)
|
||||
#define DIMS2D(arr) lbound(arr,1):ubound(arr,1),lbound(arr,2):ubound(arr,2)
|
||||
#define DIMS3D(arr) lbound(arr,1):ubound(arr,1),lbound(arr,2):ubound(arr,2),lbound(arr,3):ubound(arr,3)
|
||||
#define DIMS4D(arr) lbound(arr,1):ubound(arr,1),lbound(arr,2):ubound(arr,2),lbound(arr,3):ubound(arr,3),lbound(arr,4):ubound(arr,4)
|
||||
#define DIMS5D(arr) lbound(arr,1):ubound(arr,1),lbound(arr,2):ubound(arr,2),lbound(arr,3):ubound(arr,3),lbound(arr,4):ubound(arr,4),lbound(arr,5):ubound(arr,5)
|
||||
!=----------------------------------------------------------------------------=!
|
||||
MODULE scf_gpum
|
||||
!=----------------------------------------------------------------------------=!
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
#endif
|
||||
IMPLICIT NONE
|
||||
SAVE
|
||||
INTEGER, PARAMETER :: DP = selected_real_kind(14,200)
|
||||
INTEGER, PARAMETER :: sgl = selected_real_kind(6,30)
|
||||
INTEGER, PARAMETER :: i4b = selected_int_kind(9)
|
||||
INTEGER, PARAMETER :: i8b = selected_int_kind(18)
|
||||
#if defined(__DEBUG)
|
||||
INTEGER :: iverbosity = 1
|
||||
#else
|
||||
INTEGER :: iverbosity = 0
|
||||
#endif
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: vrs_d(:, :)
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
attributes (DEVICE) :: vrs_d
|
||||
#endif
|
||||
|
||||
LOGICAL :: vrs_ood = .false. ! used to flag out of date variables
|
||||
LOGICAL :: vrs_d_ood = .false. ! used to flag out of date variables
|
||||
!
|
||||
CONTAINS
|
||||
!
|
||||
SUBROUTINE using_vrs(intento, debug_info)
|
||||
!
|
||||
! intento is used to specify what the variable will be used for :
|
||||
! 0 -> in , the variable needs to be synchronized but won't be changed
|
||||
! 1 -> inout , the variable needs to be synchronized AND will be changed
|
||||
! 2 -> out , NO NEED to synchronize the variable, everything will be overwritten
|
||||
!
|
||||
USE scf, ONLY : vrs
|
||||
implicit none
|
||||
INTEGER, INTENT(IN) :: intento
|
||||
CHARACTER(len=*), INTENT(IN), OPTIONAL :: debug_info
|
||||
#if defined(__CUDA) || defined(__CUDA_GNU)
|
||||
INTEGER :: intento_
|
||||
intento_ = intento
|
||||
!
|
||||
IF (PRESENT(debug_info) ) print *, "using_vrs ", debug_info, vrs_ood
|
||||
!
|
||||
IF (vrs_ood) THEN
|
||||
IF ((.not. allocated(vrs_d)) .and. (intento_ < 2)) THEN
|
||||
CALL errore('using_vrs_d', 'PANIC: sync of vrs from vrs_d with unallocated array. Bye!!', 1)
|
||||
stop
|
||||
END IF
|
||||
IF (.not. allocated(vrs)) THEN
|
||||
IF (intento_ /= 2) THEN
|
||||
print *, "WARNING: sync of vrs with unallocated array and intento /= 2? Changed to 2!"
|
||||
intento_ = 2
|
||||
END IF
|
||||
! IF (intento_ > 0) vrs_d_ood = .true.
|
||||
END IF
|
||||
IF (intento_ < 2) THEN
|
||||
IF ( iverbosity > 0 ) print *, "Really copied vrs D->H"
|
||||
vrs = vrs_d
|
||||
END IF
|
||||
vrs_ood = .false.
|
||||
ENDIF
|
||||
IF (intento_ > 0) vrs_d_ood = .true.
|
||||
#endif
|
||||
END SUBROUTINE using_vrs
|
||||
!
|
||||
SUBROUTINE using_vrs_d(intento, debug_info)
|
||||
!
|
||||
USE scf, ONLY : vrs
|
||||
implicit none
|
||||
INTEGER, INTENT(IN) :: intento
|
||||
CHARACTER(len=*), INTENT(IN), OPTIONAL :: debug_info
|
||||
#if defined(__CUDA) || defined(__CUDA_GNU)
|
||||
!
|
||||
IF (PRESENT(debug_info) ) print *, "using_vrs_d ", debug_info, vrs_d_ood
|
||||
!
|
||||
IF (.not. allocated(vrs)) THEN
|
||||
IF (intento /= 2) print *, "WARNING: sync of vrs_d with unallocated array and intento /= 2?"
|
||||
IF (allocated(vrs_d)) DEALLOCATE(vrs_d)
|
||||
vrs_d_ood = .false.
|
||||
RETURN
|
||||
END IF
|
||||
! here we know that vrs is allocated, check if size is 0
|
||||
IF ( SIZE(vrs) == 0 ) THEN
|
||||
print *, "Refusing to allocate 0 dimensional array vrs_d. If used, code will crash."
|
||||
RETURN
|
||||
END IF
|
||||
!
|
||||
IF (vrs_d_ood) THEN
|
||||
IF ( allocated(vrs_d) .and. (SIZE(vrs_d)/=SIZE(vrs))) deallocate(vrs_d)
|
||||
IF (.not. allocated(vrs_d)) ALLOCATE(vrs_d(DIMS2D(vrs))) ! MOLD does not work on all compilers
|
||||
IF (intento < 2) THEN
|
||||
IF ( iverbosity > 0 ) print *, "Really copied vrs H->D"
|
||||
vrs_d = vrs
|
||||
END IF
|
||||
vrs_d_ood = .false.
|
||||
ENDIF
|
||||
IF (intento > 0) vrs_ood = .true.
|
||||
#else
|
||||
CALL errore('using_vrs_d', 'Trying to use device data without device compiled code!', 1)
|
||||
#endif
|
||||
END SUBROUTINE using_vrs_d
|
||||
!
|
||||
SUBROUTINE deallocate_scf_gpu
|
||||
IF( ALLOCATED( vrs_d ) ) DEALLOCATE( vrs_d )
|
||||
vrs_d_ood = .false.
|
||||
END SUBROUTINE deallocate_scf_gpu
|
||||
!=----------------------------------------------------------------------------=!
|
||||
END MODULE scf_gpum
|
||||
!=----------------------------------------------------------------------------=!
|
|
@ -1,5 +1,5 @@
|
|||
!
|
||||
! Copyright (C) 2001 PWSCF group
|
||||
! Copyright (C) 2001-2024 Quantum ESPESSO Foundation
|
||||
! This file is distributed under the terms of the
|
||||
! GNU General Public License. See the file `License'
|
||||
! in the root directory of the present distribution,
|
||||
|
@ -13,7 +13,9 @@ SUBROUTINE set_vrs( vrs, vltot, vr, kedtau, kedtaur, nrxx, nspin, doublegrid )
|
|||
!! the sum of all the local pseudopotential contributions.
|
||||
!
|
||||
USE kinds
|
||||
USE fft_base, ONLY : dffts
|
||||
USE xc_lib, ONLY : xclib_dft_is
|
||||
USE fft_base, ONLY : dffts, dfftp
|
||||
USE fft_interfaces, ONLY : fft_interpolate
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -33,42 +35,9 @@ SUBROUTINE set_vrs( vrs, vltot, vr, kedtau, kedtaur, nrxx, nspin, doublegrid )
|
|||
!! the kinetic energy density in R-space
|
||||
LOGICAL :: doublegrid
|
||||
! input: true if a doublegrid is used
|
||||
!
|
||||
CALL sum_vrs( nrxx, nspin, vltot, vr, vrs )
|
||||
!
|
||||
CALL interpolate_vrs( nrxx, nspin, doublegrid, kedtau, kedtaur, vrs )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE set_vrs
|
||||
!
|
||||
!
|
||||
!--------------------------------------------------------------------
|
||||
SUBROUTINE sum_vrs( nrxx, nspin, vltot, vr, vrs )
|
||||
!--------------------------------------------------------------------
|
||||
!! Accumulates local potential contributions into vrs (the total local potential).
|
||||
!
|
||||
USE kinds
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER :: nspin
|
||||
!! input: number of spin components: 1 if lda, 2 if lsd, 4 if noncolinear
|
||||
INTEGER :: nrxx
|
||||
!! input: the fft grid dimension
|
||||
REAL(DP) :: vrs(nrxx,nspin)
|
||||
!! output: total local potential on the smooth grid:
|
||||
!! \(\text{vrs\}=\text{vltot}+\text{vr}\)
|
||||
REAL(DP) :: vltot(nrxx)
|
||||
!! input: the total local pseudopotential
|
||||
REAL(DP) :: vr(nrxx,nspin)
|
||||
!! input: the scf(H+xc) part of the local potential
|
||||
!
|
||||
! ... local variable
|
||||
!
|
||||
INTEGER :: is
|
||||
!
|
||||
!
|
||||
!$acc data present(vrs)
|
||||
DO is = 1, nspin
|
||||
!
|
||||
! define the total local potential (external + scf) for each spin ...
|
||||
|
@ -84,39 +53,6 @@ SUBROUTINE sum_vrs( nrxx, nspin, vltot, vr, vrs )
|
|||
!
|
||||
ENDDO
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE sum_vrs
|
||||
!
|
||||
!--------------------------------------------------------------------------
|
||||
SUBROUTINE interpolate_vrs( nrxx, nspin, doublegrid, kedtau, kedtaur, vrs )
|
||||
!--------------------------------------------------------------------------
|
||||
!! Interpolates local potential on the smooth mesh if necessary.
|
||||
!
|
||||
USE kinds
|
||||
USE xc_lib, ONLY : xclib_dft_is
|
||||
USE fft_base, ONLY : dffts, dfftp
|
||||
USE fft_interfaces, ONLY : fft_interpolate
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER :: nspin
|
||||
!! input: number of spin components: 1 if lda, 2 if lsd, 4 if noncolinear
|
||||
INTEGER :: nrxx
|
||||
!! input: the fft grid dimension
|
||||
REAL(DP) :: vrs(nrxx,nspin)
|
||||
!! output: total local potential interpolated on the smooth grid
|
||||
REAL(DP) :: kedtau(dffts%nnr,nspin)
|
||||
!! position dependent kinetic energy enhancement factor
|
||||
REAL(DP) :: kedtaur(nrxx,nspin)
|
||||
!! the kinetic energy density in R-space
|
||||
LOGICAL :: doublegrid
|
||||
!! input: true if a doublegrid is used
|
||||
!
|
||||
! ... local variable
|
||||
!
|
||||
INTEGER :: is
|
||||
!
|
||||
! ... interpolate it on the smooth mesh if necessary
|
||||
!
|
||||
DO is = 1, nspin
|
||||
|
@ -124,6 +60,10 @@ SUBROUTINE interpolate_vrs( nrxx, nspin, doublegrid, kedtau, kedtaur, vrs )
|
|||
IF (xclib_dft_is('meta')) CALL fft_interpolate( dfftp, kedtaur(:,is), dffts, kedtau(:,is) )
|
||||
ENDDO
|
||||
!
|
||||
!$acc update device(vrs)
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE interpolate_vrs
|
||||
END SUBROUTINE set_vrs
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#define tg_gather_gpu tg_gather
|
||||
#endif
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE vloc_psi_gamma_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
||||
SUBROUTINE vloc_psi_gamma_gpu( lda, n, m, psi_d, v, hpsi_d )
|
||||
!-----------------------------------------------------------------------
|
||||
!! Calculation of Vloc*psi using dual-space technique - Gamma point.
|
||||
!
|
||||
|
@ -30,9 +30,9 @@ SUBROUTINE vloc_psi_gamma_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
|||
INTEGER, INTENT(in) :: lda, n, m
|
||||
COMPLEX(DP), INTENT(in) :: psi_d(lda,m)
|
||||
COMPLEX(DP), INTENT(inout):: hpsi_d(lda,m)
|
||||
REAL(DP), INTENT(in) :: v_d(dffts%nnr)
|
||||
REAL(DP), INTENT(in) :: v(dffts%nnr)
|
||||
#if defined(__CUDA)
|
||||
attributes(DEVICE) :: psi_d, hpsi_d, v_d
|
||||
attributes(DEVICE) :: psi_d, hpsi_d
|
||||
#endif
|
||||
!
|
||||
! ... local variables
|
||||
|
@ -82,10 +82,10 @@ SUBROUTINE vloc_psi_gamma_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
|||
!
|
||||
CALL wave_g2r( psi(:,ibnd:ibnd+group_size-1), psic, dffts, howmany_set=hm_vec )
|
||||
!
|
||||
!$acc parallel loop collapse(2)
|
||||
!$acc parallel loop collapse(2) present(v)
|
||||
DO idx = 0, howmany-1
|
||||
DO j = 1, dffts_nnr
|
||||
psic(idx*dffts_nnr+j) = psic(idx*dffts_nnr+j) * v_d(j)
|
||||
psic(idx*dffts_nnr+j) = psic(idx*dffts_nnr+j) * v(j)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
|
@ -122,9 +122,9 @@ SUBROUTINE vloc_psi_gamma_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
|||
!
|
||||
CALL wave_g2r( psi(1:n,ibnd:ebnd), psic, dffts )
|
||||
!
|
||||
!$acc parallel loop
|
||||
!$acc parallel loop present(v)
|
||||
DO j = 1, dffts_nnr
|
||||
psic(j) = psic(j) * v_d(j)
|
||||
psic(j) = psic(j) * v(j)
|
||||
ENDDO
|
||||
!
|
||||
brange=1 ; fac=1.d0
|
||||
|
@ -158,7 +158,7 @@ END SUBROUTINE vloc_psi_gamma_gpu
|
|||
!
|
||||
!@njs: vloc_psi_k
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE vloc_psi_k_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
||||
SUBROUTINE vloc_psi_k_gpu( lda, n, m, psi_d, v, hpsi_d )
|
||||
!-----------------------------------------------------------------------
|
||||
!! Calculation of Vloc*psi using dual-space technique - k-points. GPU double.
|
||||
!
|
||||
|
@ -185,9 +185,9 @@ SUBROUTINE vloc_psi_k_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
|||
INTEGER, INTENT(IN) :: lda, n, m
|
||||
COMPLEX(DP), INTENT(IN) :: psi_d(lda,m)
|
||||
COMPLEX(DP), INTENT(INOUT):: hpsi_d(lda,m)
|
||||
REAL(DP), INTENT(IN) :: v_d(dffts%nnr)
|
||||
REAL(DP), INTENT(IN) :: v(dffts%nnr)
|
||||
#if defined(__CUDA)
|
||||
attributes(DEVICE) :: psi_d, hpsi_d, v_d
|
||||
attributes(DEVICE) :: psi_d, hpsi_d
|
||||
#endif
|
||||
!
|
||||
! ... local variables
|
||||
|
@ -229,10 +229,10 @@ SUBROUTINE vloc_psi_k_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
|||
CALL wave_g2r( psi(:,ibnd:ebnd), psic, dffts, igk=igk_k(:,current_k), &
|
||||
howmany_set=hm_vec )
|
||||
!
|
||||
!$acc parallel loop collapse(2)
|
||||
!$acc parallel loop collapse(2) present(v)
|
||||
DO idx = 0, group_size-1
|
||||
DO j = 1, dffts_nnr
|
||||
psic(idx*dffts_nnr+j) = psic(idx*dffts_nnr+j) * v_d(j)
|
||||
psic(idx*dffts_nnr+j) = psic(idx*dffts_nnr+j) * v(j)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
|
@ -256,9 +256,9 @@ SUBROUTINE vloc_psi_k_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
|||
!
|
||||
CALL wave_g2r( psi(1:n,ibnd:ibnd), psic, dffts, igk=igk_k(:,current_k) )
|
||||
!
|
||||
!$acc parallel loop
|
||||
!$acc parallel loop present(v)
|
||||
DO j = 1, dffts_nnr
|
||||
psic(j) = psic(j) * v_d(j)
|
||||
psic(j) = psic(j) * v(j)
|
||||
ENDDO
|
||||
!
|
||||
CALL wave_r2g( psic, vpsi(1:n,:), dffts, igk=igk_k(:,current_k) )
|
||||
|
@ -289,7 +289,7 @@ END SUBROUTINE vloc_psi_k_gpu
|
|||
!
|
||||
!@njs: vloc_psi_nc
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE vloc_psi_nc_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
||||
SUBROUTINE vloc_psi_nc_gpu( lda, n, m, psi_d, v, hpsi_d )
|
||||
!-----------------------------------------------------------------------
|
||||
!! Calculation of Vloc*psi using dual-space technique - non-collinear -
|
||||
!! GPU version.
|
||||
|
@ -308,11 +308,11 @@ SUBROUTINE vloc_psi_nc_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
|||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: lda, n, m
|
||||
REAL(DP), INTENT(IN) :: v_d(dfftp%nnr,4) ! beware dimensions!
|
||||
REAL(DP), INTENT(IN) :: v(dfftp%nnr,4) ! beware dimensions!
|
||||
COMPLEX(DP), INTENT(IN) :: psi_d(lda*npol,m)
|
||||
COMPLEX(DP), INTENT(INOUT):: hpsi_d(lda,npol,m)
|
||||
#if defined(__CUDA)
|
||||
attributes(DEVICE) :: v_d, psi_d, hpsi_d
|
||||
attributes(DEVICE) :: psi_d, hpsi_d
|
||||
#endif
|
||||
!
|
||||
! ... local variables
|
||||
|
@ -355,19 +355,19 @@ SUBROUTINE vloc_psi_nc_gpu( lda, n, m, psi_d, v_d, hpsi_d )
|
|||
ENDDO
|
||||
!
|
||||
IF (domag) THEN
|
||||
!$acc parallel loop
|
||||
!$acc parallel loop present(v)
|
||||
DO j = 1, dffts_nnr
|
||||
sup = psic_nc(j,1) * (v_d(j,1)+v_d(j,4)) + &
|
||||
psic_nc(j,2) * (v_d(j,2)-(0.d0,1.d0)*v_d(j,3))
|
||||
sdwn = psic_nc(j,2) * (v_d(j,1)-v_d(j,4)) + &
|
||||
psic_nc(j,1) * (v_d(j,2)+(0.d0,1.d0)*v_d(j,3))
|
||||
sup = psic_nc(j,1) * (v(j,1)+v(j,4)) + &
|
||||
psic_nc(j,2) * (v(j,2)-(0.d0,1.d0)*v(j,3))
|
||||
sdwn = psic_nc(j,2) * (v(j,1)-v(j,4)) + &
|
||||
psic_nc(j,1) * (v(j,2)+(0.d0,1.d0)*v(j,3))
|
||||
psic_nc(j,1) = sup
|
||||
psic_nc(j,2) = sdwn
|
||||
ENDDO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
!$acc parallel loop present(v)
|
||||
DO j = 1, dffts_nnr
|
||||
psic_nc(j,:) = psic_nc(j,:) * v_d(j,1)
|
||||
psic_nc(j,:) = psic_nc(j,:) * v(j,1)
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
|
|
Loading…
Reference in New Issue