mirror of https://gitlab.com/QEF/q-e.git
220 lines
6.1 KiB
Fortran
220 lines
6.1 KiB
Fortran
!
|
|
! Copyright (C) 2021 Quantum ESPRESSO Foundation
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
!
|
|
!
|
|
!----------------------------------------------------------------------
|
|
SUBROUTINE gen_us_dj_gpu_ &
|
|
( npw, npwx, igk_d, xk, nat, tau, ityp, ntyp, tpiba, &
|
|
omega, nr1, nr2, nr3, eigts1_d, eigts2_d, eigts3_d, mill_d, g_d, dvkb_d )
|
|
!----------------------------------------------------------------------
|
|
!! Calculates the beta function pseudopotentials with
|
|
!! the derivative of the Bessel functions - GFPU version
|
|
!
|
|
! AF: more gpu-resident variables can be used, avoiding local GPU-alloc
|
|
! and host2dev transfers
|
|
!
|
|
USE upf_kinds, ONLY: dp
|
|
USE upf_const, ONLY: tpi
|
|
USE uspp, ONLY: nkb, indv_d, nhtol_d, nhtolm_d
|
|
USE uspp_data, ONLY: nqx, tab, tab_d, dq
|
|
USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh, nhm
|
|
USE device_fbuff_m, ONLY: dev_buf
|
|
!
|
|
IMPLICIT NONE
|
|
!
|
|
INTEGER, INTENT(IN) :: npw
|
|
!! number ok plane waves
|
|
INTEGER, INTENT(IN) :: npwx
|
|
!! max number ok plane waves across k-points
|
|
INTEGER, INTENT(IN) :: igk_d(npw)
|
|
!! indices of plane waves k+G
|
|
REAL(dp), INTENT(IN) :: xk(3)
|
|
!! k-point
|
|
INTEGER, INTENT(IN) :: nat
|
|
!! number of atoms
|
|
INTEGER, INTENT(IN) :: ityp(nat)
|
|
!! index of type per atom
|
|
INTEGER, INTENT(IN) :: ntyp
|
|
!! number of atomic types
|
|
REAL(DP), INTENT(IN) :: tau(3,nat)
|
|
!! atomic positions (cc alat units)
|
|
REAL(DP), INTENT(IN) :: tpiba
|
|
!! rec.lattice units 2pi/a
|
|
REAL(DP), INTENT(IN) :: omega
|
|
!! cell volume
|
|
INTEGER, INTENT(IN) :: nr1,nr2,nr3
|
|
!! fft dims (dense grid)
|
|
COMPLEX(DP), INTENT(IN) :: eigts1_d(-nr1:nr1,nat)
|
|
!! structure factor 1
|
|
COMPLEX(DP), INTENT(IN) :: eigts2_d(-nr2:nr2,nat)
|
|
!! structure factor 2
|
|
COMPLEX(DP), INTENT(IN) :: eigts3_d(-nr3:nr3,nat)
|
|
!! structure factor 3
|
|
INTEGER, INTENT(IN) :: mill_d(3,*)
|
|
!! miller index map
|
|
REAL(DP), INTENT(IN) :: g_d(3,*)
|
|
!! g vectors (2pi/a units)
|
|
COMPLEX(DP), INTENT(OUT) :: dvkb_d(npwx, nkb)
|
|
!! the beta function pseudopotential
|
|
!
|
|
! ... local variables
|
|
!
|
|
INTEGER :: na, nt, nb, ih, l, lm, ikb, iig, i0, i1, i2, &
|
|
i3, ig, nbm, iq, mil1, mil2, mil3, &
|
|
ikb_t, nht, ina, nas(nat), ierr(3)
|
|
REAL(DP) :: px, ux, vx, wx, arg, u_ipol, xk1, xk2, xk3, qt
|
|
COMPLEX(DP) :: pref
|
|
INTEGER, ALLOCATABLE :: ityp_d(:), ih_d(:), na_d(:), nas_d(:)
|
|
!
|
|
REAL(DP), POINTER :: gk_d(:,:), djl_d(:,:,:), ylm_d(:,:)
|
|
REAL(DP), ALLOCATABLE :: q_d(:), tau_d(:,:)
|
|
COMPLEX(DP), ALLOCATABLE :: phase_d(:), sk_d(:,:)
|
|
!
|
|
#if defined(__CUDA)
|
|
attributes(DEVICE) :: igk_d, mill_d, eigts1_d, eigts2_d, eigts3_d, g_d
|
|
attributes(DEVICE) :: gk_d, q_d, sk_d, djl_d, ylm_d, &
|
|
ityp_d, phase_d, ih_d, na_d, tau_d, nas_d
|
|
attributes(DEVICE) :: dvkb_d
|
|
#endif
|
|
!
|
|
IF (nkb == 0) RETURN
|
|
!
|
|
CALL dev_buf%lock_buffer( ylm_d, (/ npw,(lmaxkb+1)**2 /), ierr(1) )
|
|
CALL dev_buf%lock_buffer( djl_d, (/ npw,nbetam,ntyp /), ierr(2) )
|
|
CALL dev_buf%lock_buffer( gk_d, (/ 3,npw /), ierr(3) )
|
|
ALLOCATE( q_d(npw) )
|
|
!
|
|
xk1 = xk(1)
|
|
xk2 = xk(2)
|
|
xk3 = xk(3)
|
|
!
|
|
!$cuf kernel do (1) <<<*,*>>>
|
|
DO ig = 1, npw
|
|
iig = igk_d(ig)
|
|
gk_d(1,ig) = xk1 + g_d(1,iig)
|
|
gk_d(2,ig) = xk2 + g_d(2,iig)
|
|
gk_d(3,ig) = xk3 + g_d(3,iig)
|
|
q_d(ig) = gk_d(1,ig)**2 + gk_d(2,ig)**2 + gk_d(3,ig)**2
|
|
ENDDO
|
|
!
|
|
CALL ylmr2_gpu( (lmaxkb+1)**2, npw, gk_d, q_d, ylm_d )
|
|
!
|
|
DO nt = 1, ntyp
|
|
nbm = upf(nt)%nbeta
|
|
!$cuf kernel do (2) <<<*,*>>>
|
|
DO nb = 1, nbm
|
|
DO ig = 1, npw
|
|
qt = SQRT(q_d(ig)) * tpiba
|
|
px = qt/dq - DBLE(INT(qt/dq))
|
|
ux = 1._DP - px
|
|
vx = 2._DP - px
|
|
wx = 3._DP - px
|
|
i0 = INT(qt/dq) + 1
|
|
i1 = i0 + 1
|
|
i2 = i0 + 2
|
|
i3 = i0 + 3
|
|
djl_d(ig,nb,nt) = (tab_d(i0,nb,nt) * (-vx*wx-ux*wx-ux*vx)/6._DP + &
|
|
tab_d(i1,nb,nt) * (+vx*wx-px*wx-px*vx)/2._DP - &
|
|
tab_d(i2,nb,nt) * (+ux*wx-px*wx-px*ux)/2._DP + &
|
|
tab_d(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6._DP)/dq
|
|
ENDDO
|
|
ENDDO
|
|
ENDDO
|
|
!
|
|
DEALLOCATE( q_d )
|
|
!
|
|
ALLOCATE( ityp_d(nat), nas_d(nat) )
|
|
ityp_d = ityp
|
|
ALLOCATE( tau_d(3,nat) )
|
|
tau_d = tau
|
|
ALLOCATE( phase_d(nat) )
|
|
!
|
|
ina = 0
|
|
DO nt = 1, ntyp
|
|
DO na = 1, nat
|
|
IF ( ityp(na) == nt ) THEN
|
|
ina = ina + 1
|
|
nas(ina) = na
|
|
ENDIF
|
|
ENDDO
|
|
ENDDO
|
|
nas_d = nas
|
|
!
|
|
!$cuf kernel do (1) <<<*,*>>>
|
|
DO ina = 1, nat
|
|
na = nas_d(ina)
|
|
arg = (xk1 * tau_d(1,na) + xk2 * tau_d(2,na) &
|
|
+ xk3 * tau_d(3,na) ) * tpi
|
|
phase_d(na) = CMPLX( COS(arg), -SIN(arg), KIND=DP )
|
|
ENDDO
|
|
!
|
|
DEALLOCATE( tau_d )
|
|
!
|
|
ALLOCATE( sk_d(npw,nat) )
|
|
!
|
|
!$cuf kernel do (2) <<<*,*>>>
|
|
DO ina = 1, nat
|
|
DO ig = 1, npw
|
|
!
|
|
na = nas_d(ina)
|
|
iig = igk_d(ig)
|
|
mil1 = mill_d(1,iig)
|
|
mil2 = mill_d(2,iig)
|
|
mil3 = mill_d(3,iig)
|
|
sk_d(ig,na) = eigts1_d(mil1,na) * &
|
|
eigts2_d(mil2,na) * &
|
|
eigts3_d(mil3,na) * phase_d(na)
|
|
ENDDO
|
|
ENDDO
|
|
!
|
|
!
|
|
DEALLOCATE( phase_d )
|
|
!
|
|
ALLOCATE( ih_d(nat*nhm), na_d(nat*nhm) )
|
|
!
|
|
ikb_t = 0
|
|
DO ina = 1, nat
|
|
na = nas(ina)
|
|
nht = nh(ityp(na))
|
|
!$cuf kernel do (1) <<<*,*>>>
|
|
DO ih = 1, nht
|
|
ih_d(ikb_t+ih) = ih
|
|
na_d(ikb_t+ih) = na
|
|
ENDDO
|
|
ikb_t = ikb_t + nht
|
|
ENDDO
|
|
!
|
|
!$cuf kernel do (2) <<<*,*>>>
|
|
DO ikb = 1, ikb_t
|
|
DO ig = 1, npw
|
|
ih = ih_d(ikb)
|
|
na = na_d(ikb)
|
|
nt = ityp_d(na)
|
|
nb = indv_d(ih,nt)
|
|
l = nhtol_d(ih,nt)
|
|
lm = nhtolm_d(ih,nt)
|
|
pref = (0._DP,-1._DP)**l
|
|
!
|
|
dvkb_d(ig,ikb) = CMPLX(djl_d(ig,nb,nt)) * sk_d(ig,na) * &
|
|
CMPLX(ylm_d(ig,lm)) * pref
|
|
ENDDO
|
|
ENDDO
|
|
!
|
|
DEALLOCATE( sk_d )
|
|
!
|
|
IF (ikb_t /= nkb) CALL upf_error( 'gen_us_dj', 'unexpected error', 1 )
|
|
!
|
|
CALL dev_buf%release_buffer( ylm_d, ierr(1) )
|
|
CALL dev_buf%release_buffer( djl_d, ierr(2) )
|
|
CALL dev_buf%release_buffer( gk_d, ierr(3) )
|
|
!
|
|
DEALLOCATE( ih_d, na_d, nas_d )
|
|
!
|
|
RETURN
|
|
!
|
|
END SUBROUTINE gen_us_dj_gpu_
|