Merge branch 'final_beta_clean' into 'develop'

Beta function interpolation routines encapsulated into module beta_mod

See merge request QEF/q-e!2221
This commit is contained in:
giannozz 2024-01-11 11:49:37 +00:00
commit 93776a5a90
15 changed files with 291 additions and 311 deletions

View File

@ -14,6 +14,7 @@ subroutine gen_beta_simple (qk, npw_max, dvkb)
USE klist, ONLY : ngk
USE gvect, ONLY : mill, eigts1, eigts2, eigts3, g
USE uspp, ONLY : nkb, indv, nhtol, nhtolm
USE beta_mod, ONLY : interp_dbeta
USE uspp_param, ONLY : lmaxkb, nbetam, nh
USE io_global, ONLY : stdout
!
@ -137,6 +138,7 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb)
USE klist, ONLY : ngk, igk_k
USE gvect, ONLY : mill, eigts1, eigts2, eigts3, g
USE uspp, ONLY : nkb, indv, nhtol, nhtolm
USE beta_mod, ONLY : interp_beta
USE uspp_param, ONLY : upf, lmaxkb, nbetam, nh
!
implicit none

View File

@ -150,7 +150,7 @@ SUBROUTINE compute_gw( omegamin, omegamax, d_omega, use_gmaps, qplda, vkb, vxcdi
USE parallel_include
USE scf, ONLY : rho, rho_core, rhog_core
USE ener, ONLY : etxc, vtxc
USE beta_mod, ONLY : interp_beta, interp_dbeta
USE uspp_param, ONLY : upf, nh, nbetam
USE ions_base, ONLY : ntyp => nsp
USE klist, ONLY : ngk

View File

@ -22,6 +22,7 @@ SUBROUTINE hinit0()
USE klist, ONLY : qnorm
USE gvecw, ONLY : ecutwfc
USE vlocal, ONLY : strf
USE beta_mod, ONLY : init_tab_beta
USE realus, ONLY : generate_qpointlist, betapointlist, &
init_realspace_vars, real_space
USE ldaU, ONLY : lda_plus_U, Hubbard_projectors
@ -42,6 +43,7 @@ SUBROUTINE hinit0()
!
IMPLICIT NONE
REAL (dp) :: alat_old, qmax
INTEGER :: ierr
LOGICAL :: is_tau_read = .FALSE.
!
#if defined (__ENVIRON)
@ -60,10 +62,20 @@ SUBROUTINE hinit0()
IF (tbeta_smoothing) CALL init_us_b0(ecutwfc,intra_bgrp_comm)
IF (tq_smoothing) CALL init_us_0(ecutrho,intra_bgrp_comm)
qmax = (qnorm + sqrt(ecutrho))*cell_factor
! qmax is the maximum needed |q+G|, increased by a factor (20% or so)
! to avoid too frequent reallocations in variable-cell calculations
! (qnorm=max|q| may be needed for hybrid EXX or phonon calculations)
!
! qmax is the maximum |q+G|, for all G needed by the charge density,
! increased by a factor (20% or so) to avoid too frequent reallocations
! in variable-cell calculations ( norm is an estimate of max|q|, that
! may be needed for hybrid EXX or phonon calculations)
!
CALL init_us_1(nat, ityp, omega, qmax, intra_bgrp_comm)
!
! fill interpolation table for beta functions
! qmax as above, for all G needed by wavefunctions
!
qmax = (qnorm + sqrt(ecutwfc))*cell_factor
CALL init_tab_beta ( qmax, omega, intra_bgrp_comm, ierr )
!
IF ( lda_plus_U .AND. ( Hubbard_projectors == 'pseudo' ) ) CALL init_q_aeps()
CALL init_tab_atwfc (omega, intra_bgrp_comm)
!

View File

@ -282,11 +282,14 @@ SUBROUTINE post_xml_init ( )
USE rism_module, ONLY : rism_tobe_alive, rism_pot3d
USE rism3d_facade, ONLY : lrism3d, rism3d_initialize, rism3d_read_to_restart
USE xc_lib, ONLY : xclib_dft_is_libxc, xclib_init_libxc
USE beta_mod, ONLY : init_tab_beta
USE klist, ONLY : qnorm
!
IMPLICIT NONE
!
REAL(DP) :: ehart, etxc, vtxc, etotefield, charge, qmax
CHARACTER(LEN=37) :: dft_name
INTEGER :: ierr
!
! ... initialize Libxc if needed
!
@ -359,9 +362,20 @@ SUBROUTINE post_xml_init ( )
CALL init_vloc()
IF (tbeta_smoothing) CALL init_us_b0(ecutwfc,intra_bgrp_comm)
IF (tq_smoothing) CALL init_us_0(ecutrho,intra_bgrp_comm)
!
! qmax is the maximum |G|, for all G needed by the charge density
!
qmax = sqrt(ecutrho)*cell_factor
CALL init_us_1(nat, ityp, omega, qmax, intra_bgrp_comm)
!
! fill interpolation table for beta functions
! qmax is the maximum |q+G|, for all G needed by the wavefunctions
!
qmax = (qnorm + sqrt(ecutwfc))*cell_factor
CALL init_tab_beta ( qmax, omega, intra_bgrp_comm, ierr )
!
IF ( lda_plus_u .AND. ( Hubbard_projectors == 'pseudo' ) ) CALL init_q_aeps()
!
CALL init_tab_atwfc(omega, intra_bgrp_comm)
!
CALL struc_fact( nat, tau, nsp, ityp, ngm, g, bg, dfftp%nr1, dfftp%nr2,&

View File

@ -29,6 +29,7 @@ SUBROUTINE scale_h
USE rism_module, ONLY : lrism, rism_reinit3d
USE mp, ONLY : mp_max
USE mp_bands, ONLY : intra_bgrp_comm
USE beta_mod, ONLY : scale_tab_beta
USE vloc_mod, ONLY : scale_tab_vloc
USE rhoc_mod, ONLY : scale_tab_rhc
USE rhoat_mod, ONLY : scale_tab_rhoat
@ -86,6 +87,7 @@ SUBROUTINE scale_h
! scale the non-local pseudopotential tables
!
call scale_uspp_data( omega_old/omega )
call scale_tab_beta( omega_old/omega )
CALL scale_tab_rhc( omega_old/omega )
CALL scale_tab_rhoat( omega_old/omega )
CALL scale_tab_qrad( omega_old/omega )

View File

@ -8,7 +8,7 @@ set(src_upflib
init_us_b0.f90
init_us_1.f90
init_tab_atwfc.f90
init_tab_beta.f90
beta_mod.f90
interp_atwfc.f90
init_us_2_acc.f90
gth.f90

View File

@ -10,10 +10,11 @@ MODFLAGS= $(MOD_FLAG)../UtilXlib $(MOD_FLAG).
# OBJS_GPU are GPU-specific
OBJS_DEP= \
vloc_mod.o \
rhoc_mod.o \
rhoat_mod.o \
beta_mod.o \
qrad_mod.o \
rhoat_mod.o \
rhoc_mod.o \
vloc_mod.o \
qvan2.o \
dqvan2.o \
gen_us_dj.o \
@ -22,8 +23,7 @@ init_us_0.o \
init_us_b0.o \
init_us_1.o \
init_us_2_acc.o \
init_tab_atwfc.o \
init_tab_beta.o
init_tab_atwfc.o
OBJS_NODEP= \
atom.o \

236
upflib/beta_mod.f90 Normal file
View File

@ -0,0 +1,236 @@
!
! Copyright (C) 2024 Quantum ESPRESSO Foundation
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
!
MODULE beta_mod
!
!! Variables and routines for nonlocal beta functions in numerical form
!! Contains generation of interpolation tables in reciprocal space,
!! interpolation routines and other utility routines
!! Code moved to upflib and restructured by Paolo Giannozzi, 2024
!
USE upf_kinds, ONLY : dp
USE upf_const, ONLY : fpi, e2
!
IMPLICIT NONE
!
PRIVATE
PUBLIC :: init_tab_beta
PUBLIC :: deallocate_tab_beta
PUBLIC :: scale_tab_beta
PUBLIC :: interp_beta
PUBLIC :: interp_dbeta
!
SAVE
!
INTEGER :: nqx = 0
!! size of interpolation table
REAL(DP), PARAMETER:: dq = 0.01_dp
!! grid step for interpolation table
REAL(DP) :: qmax = 0.0_dp
!! max q covered by the interpolation table
REAL(DP), ALLOCATABLE :: tab_beta(:,:,:)
!! interpolation table for numerical beta functions in reciprocal space
!
CONTAINS
!
!----------------------------------------------------------------------
SUBROUTINE init_tab_beta ( qmax_, omega, comm, ierr )
!----------------------------------------------------------------------
!
! Compute interpolation table for beta(G) radial functions
!
USE upf_kinds, ONLY : dp
USE atom, ONLY : rgrid
USE uspp_param, ONLY : upf, lmaxq, nbetam, nsp
USE mp, ONLY : mp_sum
USE m_gth, ONLY : mk_ffnl_gth
!
IMPLICIT NONE
!
REAL(dp), INTENT(IN) :: qmax_
!! Interpolate q up to qmax_ (sqrt(Ry), q^2 is an energy)
REAL(dp), INTENT(IN) :: omega
!! Unit-cell volume
INTEGER, INTENT(IN) :: comm
!! MPI communicator, to split the workload
INTEGER, INTENT(OUT) :: ierr
!! error code: ierr = 0 if interpolation table (IT) was allocated
!! ierr =-1 if IT had insufficient size and was re-allocated
!! ierr =-2 if IT was already present and nothing is done
!
INTEGER :: ndm, startq, lastq, nt, l, nb, iq, ir
REAL(dp) :: qi
! q-point grid for interpolation
REAL(dp) :: pref
! the prefactor of the Q functions
real(DP) :: vqint, d1
!
REAL(dp), allocatable :: aux (:)
! work space
REAL(dp), allocatable :: besr(:)
! work space
!
IF ( .NOT. ALLOCATED(tab_beta) ) THEN
!! table not yet allocated
qmax = qmax_
ierr = 0
ELSE IF ( qmax_ > qmax ) THEN
!! table ìs allocated but dimension insufficient: re-allocate
!! (with some margin so that this does not happen too often)
!$acc exit data delete(tab_beta)
DEALLOCATE ( tab_beta )
qmax = qmax_ + MAX(dq*100,qmax_-qmax)
ierr =-1
ELSE
!! table already computed: exit
ierr =-2
RETURN
END IF
nqx = INT( qmax/dq + 4)
allocate(tab_beta(nqx,nbetam,nsp))
!$acc enter data create(tab_beta)
ndm = MAXVAL ( upf(:)%kkbeta )
allocate( aux (ndm) )
allocate (besr( ndm))
pref = fpi / sqrt (omega)
call divide (comm, nqx, startq, lastq)
tab_beta (:,:,:) = 0.d0
do nt = 1, nsp
do nb = 1, upf(nt)%nbeta
l = upf(nt)%lll (nb)
do iq = startq, lastq
qi = (iq - 1) * dq
if ( upf(nt)%is_gth ) then
CALL mk_ffnl_gth( nt, nb, 1, omega, [ qi ] , tab_beta(iq,nb,nt) )
else
call sph_bes (upf(nt)%kkbeta, rgrid(nt)%r, qi, l, besr)
do ir = 1, upf(nt)%kkbeta
aux (ir) = upf(nt)%beta (ir, nb) * besr (ir) * rgrid(nt)%r(ir)
enddo
call simpson (upf(nt)%kkbeta, aux, rgrid(nt)%rab, vqint)
tab_beta (iq, nb, nt) = vqint * pref
end if
enddo
enddo
enddo
deallocate (besr)
deallocate (aux)
!
call mp_sum( tab_beta, comm )
!$acc update device (tab_beta)
!
END SUBROUTINE init_tab_beta
!
!----------------------------------------------------------------------
SUBROUTINE interp_beta( nt, npw_, qg, vq )
!----------------------------------------------------------------------
!
USE upf_kinds, ONLY : dp
USE uspp_param, ONLY : upf, nbetam
!
implicit none
integer, intent(in) :: nt, npw_
real(dp), intent(in ) :: qg(npw_)
real(dp), intent(out) :: vq(npw_,nbetam)
!
integer :: i0, i1, i2, i3, nbnt, nb, ig
real(dp):: qgr, px, ux, vx, wx
!
nbnt = upf(nt)%nbeta
!$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
!$acc parallel loop collapse(2)
do nb = 1, nbnt
DO ig = 1, npw_
qgr = qg(ig)
px = qgr / dq - DBLE(INT(qgr/dq))
ux = 1.0_dp - px
vx = 2.0_dp - px
wx = 3.0_dp - px
i0 = INT(qgr/dq) + 1
i1 = i0 + 1
i2 = i0 + 2
i3 = i0 + 3
if ( i3 <= nqx ) then
vq(ig,nb) = &
tab_beta(i0,nb,nt) * ux * vx * wx / 6.0_dp + &
tab_beta(i1,nb,nt) * px * vx * wx / 2.0_dp - &
tab_beta(i2,nb,nt) * px * ux * wx / 2.0_dp + &
tab_beta(i3,nb,nt) * px * ux * vx / 6.0_dp
else
!! This case should never happen if tab_beta is properly allocated
!! (setting q_max to be large enough) - for compatibility with GWW
vq(ig,nb) = 0.0_dp
end if
END DO
END DO
!$acc end data
!----------------------------------------------------------------------
END SUBROUTINE interp_beta
!----------------------------------------------------------------------
!
!----------------------------------------------------------------------
SUBROUTINE interp_dbeta( nt, npw, qg, vq )
!----------------------------------------------------------------------
!
USE upf_kinds, ONLY : dp
USE uspp_param, ONLY : upf, nbetam
!
implicit none
integer, intent(in) :: nt, npw
real(dp), intent(in ) :: qg(npw)
real(dp), intent(out) :: vq(npw,nbetam)
!
integer :: i0, i1, i2, i3, nbnt, nb, ig
real(dp):: qgr, px, ux, vx, wx
!
nbnt = upf(nt)%nbeta
!$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
!$acc parallel loop collapse(2)
DO nb = 1, nbnt
DO ig = 1, npw
qgr = qg(ig)
px = qgr / dq - INT(qgr/dq)
ux = 1.0_dp - px
vx = 2.0_dp - px
wx = 3.0_dp - px
i0 = qgr / dq + 1
i1 = i0 + 1
i2 = i0 + 2
i3 = i0 + 3
IF ( i3 <= nqx ) THEN
vq(ig,nb) = ( tab_beta(i0,nb,nt) * (-vx*wx-ux*wx-ux*vx)/6.0_dp + &
tab_beta(i1,nb,nt) * (+vx*wx-px*wx-px*vx)/2.0_dp - &
tab_beta(i2,nb,nt) * (+ux*wx-px*wx-px*ux)/2.0_dp + &
tab_beta(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6.0_dp )/dq
ELSE
vq(ig,nb) = 0.0_dp
END IF
ENDDO
END DO
!$acc end data
!----------------------------------------------------------------------
END SUBROUTINE interp_dbeta
!
subroutine deallocate_tab_beta ()
implicit none
!
!$acc exit data delete(tab_beta)
if( allocated( tab_beta ) ) deallocate( tab_beta )
!
end subroutine deallocate_tab_beta
!
subroutine scale_tab_beta( vol_ratio_m1 )
! vol_ratio_m1 = omega_old / omega
implicit none
real(DP), intent(in) :: vol_ratio_m1
!
tab_beta(:,:,:) = tab_beta(:,:,:) * SQRT(vol_ratio_m1)
!$acc update device ( tab_beta)
end subroutine scale_tab_beta
!
END MODULE beta_mod

View File

@ -18,6 +18,7 @@ SUBROUTINE gen_us_dj_base( npw, npwx, igk, xk, nat, tau, ityp, ntyp, tpiba, &
USE upf_const, ONLY: tpi
USE uspp, ONLY: nkb, indv, nhtol, nhtolm
USE uspp_param, ONLY: lmaxkb, nbetam, nh, nhm
USE beta_mod, ONLY: interp_dbeta
!
IMPLICIT NONE
!
@ -202,47 +203,3 @@ SUBROUTINE gen_us_dj_base( npw, npwx, igk, xk, nat, tau, ityp, ntyp, tpiba, &
RETURN
!
END SUBROUTINE gen_us_dj_base
!
!----------------------------------------------------------------------
SUBROUTINE interp_dbeta( nt, npw, qg, vq )
!----------------------------------------------------------------------
!
USE upf_kinds, ONLY : dp
USE uspp_param, ONLY : upf, nbetam
USE uspp_data, ONLY : nqx, dq, tab_beta
!
implicit none
integer, intent(in) :: nt, npw
real(dp), intent(in ) :: qg(npw)
real(dp), intent(out) :: vq(npw,nbetam)
!
integer :: i0, i1, i2, i3, nbnt, nb, ig
real(dp):: qgr, px, ux, vx, wx
!
nbnt = upf(nt)%nbeta
!$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
!$acc parallel loop collapse(2)
DO nb = 1, nbnt
DO ig = 1, npw
qgr = qg(ig)
px = qgr / dq - INT(qgr/dq)
ux = 1.0_dp - px
vx = 2.0_dp - px
wx = 3.0_dp - px
i0 = qgr / dq + 1
i1 = i0 + 1
i2 = i0 + 2
i3 = i0 + 3
IF ( i3 <= nqx ) THEN
vq(ig,nb) = ( tab_beta(i0,nb,nt) * (-vx*wx-ux*wx-ux*vx)/6.0_dp + &
tab_beta(i1,nb,nt) * (+vx*wx-px*wx-px*vx)/2.0_dp - &
tab_beta(i2,nb,nt) * (+ux*wx-px*wx-px*ux)/2.0_dp + &
tab_beta(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6.0_dp )/dq
ELSE
vq(ig,nb) = 0.0_dp
END IF
ENDDO
END DO
!$acc end data
!----------------------------------------------------------------------
END SUBROUTINE interp_dbeta

View File

@ -20,6 +20,7 @@ SUBROUTINE gen_us_dy_base( npw, npwx, igk, xk, nat, tau, ityp, ntyp, tpiba, &
USE upf_const, ONLY: tpi
USE uspp, ONLY: nkb, indv, nhtol, nhtolm
USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh, nhm
USE beta_mod, ONLY: interp_beta
!
IMPLICIT NONE
!

View File

@ -1,69 +0,0 @@
!
! Copyright (C) 2021-2023 Quantum ESPRESSO Foundation
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
!----------------------------------------------------------------------
SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm )
!----------------------------------------------------------------------
!
! Compute interpolation table for beta(G) radial functions
!
USE upf_kinds, ONLY : dp
USE upf_const, ONLY : fpi
USE atom, ONLY : rgrid
USE uspp_param, ONLY : upf, lmaxq, nbetam, nsp
USE uspp_data, ONLY : nqx, dq, tab_beta
USE mp, ONLY : mp_sum
USE m_gth, ONLY : mk_ffnl_gth
!
IMPLICIT NONE
!
real(DP), intent(in) :: omega
integer, intent(in) :: intra_bgrp_comm
!
INTEGER :: ndm, startq, lastq, nt, l, nb, iq, ir
REAL(dp) :: qi
! q-point grid for interpolation
REAL(dp) :: pref
! the prefactor of the Q functions
real(DP) :: vqint, d1
!
REAL(dp), allocatable :: aux (:)
! work space
REAL(dp), allocatable :: besr(:)
! work space
!
ndm = MAXVAL ( upf(:)%kkbeta )
allocate( aux (ndm) )
allocate (besr( ndm))
pref = fpi / sqrt (omega)
call divide (intra_bgrp_comm, nqx, startq, lastq)
tab_beta (:,:,:) = 0.d0
do nt = 1, nsp
do nb = 1, upf(nt)%nbeta
l = upf(nt)%lll (nb)
do iq = startq, lastq
qi = (iq - 1) * dq
if ( upf(nt)%is_gth ) then
CALL mk_ffnl_gth( nt, nb, 1, omega, [ qi ] , tab_beta(iq,nb,nt) )
else
call sph_bes (upf(nt)%kkbeta, rgrid(nt)%r, qi, l, besr)
do ir = 1, upf(nt)%kkbeta
aux (ir) = upf(nt)%beta (ir, nb) * besr (ir) * rgrid(nt)%r(ir)
enddo
call simpson (upf(nt)%kkbeta, aux, rgrid(nt)%rab, vqint)
tab_beta (iq, nb, nt) = vqint * pref
end if
enddo
enddo
enddo
deallocate (besr)
deallocate (aux)
!
call mp_sum( tab_beta, intra_bgrp_comm )
!$acc update device (tab_beta)
!
END SUBROUTINE init_tab_beta

View File

@ -23,7 +23,6 @@ subroutine init_us_1( nat, ityp, omega, qmax, intra_bgrp_comm )
! spherical harmonics in the Q expansion
! f) It computes the interpolation table "qrad" for Q(G)
! g) It computes the qq terms which define the S matrix.
! h) It fills the interpolation table "tab_beta" for the beta functions
!
USE upf_kinds, ONLY : DP
USE upf_const, ONLY : fpi, sqrt2
@ -52,7 +51,7 @@ subroutine init_us_1( nat, ityp, omega, qmax, intra_bgrp_comm )
real(DP) :: j
! J=L+S (noninteger!)
integer :: n1, m0, m1, n, li, mi, vi, vj, ijs, is1, is2, &
lk, mk, vk, kh, lh, ijkb0, na
lk, mk, kh, lh, ijkb0, na
integer, external :: sph_ind
complex(DP) :: coeff
real(DP) :: ji, jk
@ -170,12 +169,10 @@ subroutine init_us_1( nat, ityp, omega, qmax, intra_bgrp_comm )
li = nhtol(ih, nt)
ji = nhtoj(ih, nt)
mi = nhtolm(ih, nt)-li*li
vi = indv (ih, nt)
do kh=1,nh(nt)
lk = nhtol(kh, nt)
jk = nhtoj(kh, nt)
mk = nhtolm(kh, nt)-lk*lk
vk = indv (kh, nt)
if (li == lk .and. abs(ji-jk) < 1.d-7) then
do is1=1,2
do is2=1,2
@ -251,10 +248,6 @@ subroutine init_us_1( nat, ityp, omega, qmax, intra_bgrp_comm )
end do
end if
!
! fill interpolation table for beta functions
!
CALL init_tab_beta ( omega, intra_bgrp_comm )
!
#if defined __CUDA
!
! update GPU memory (taking care of zero-dim allocations)

View File

@ -20,6 +20,7 @@ SUBROUTINE init_us_2_acc( npw_, npwx, igk_, q_, nat, tau, ityp, &
USE upf_const, ONLY : tpi
USE uspp, ONLY : nkb, nhtol, nhtolm, indv
USE uspp_param, ONLY : lmaxkb, nbetam, nhm, nh, nsp
USE beta_mod, ONLY : interp_beta
!
implicit none
!
@ -168,52 +169,6 @@ SUBROUTINE init_us_2_acc( npw_, npwx, igk_, q_, nat, tau, ityp, &
deallocate(vkb1)
!
return
!----------------------------------------------------------------------
end subroutine init_us_2_acc
!
!----------------------------------------------------------------------
SUBROUTINE interp_beta( nt, npw_, qg, vq )
!----------------------------------------------------------------------
!
USE upf_kinds, ONLY : dp
USE uspp_param, ONLY : upf, nbetam
USE uspp_data, ONLY : nqx, dq, tab_beta
!
implicit none
integer, intent(in) :: nt, npw_
real(dp), intent(in ) :: qg(npw_)
real(dp), intent(out) :: vq(npw_,nbetam)
!
integer :: i0, i1, i2, i3, nbnt, nb, ig
real(dp):: qgr, px, ux, vx, wx
!
nbnt = upf(nt)%nbeta
!$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
!$acc parallel loop collapse(2)
do nb = 1, nbnt
DO ig = 1, npw_
qgr = qg(ig)
px = qgr / dq - DBLE(INT(qgr/dq))
ux = 1.0_dp - px
vx = 2.0_dp - px
wx = 3.0_dp - px
i0 = INT(qgr/dq) + 1
i1 = i0 + 1
i2 = i0 + 2
i3 = i0 + 3
if ( i3 <= nqx ) then
vq(ig,nb) = &
tab_beta(i0,nb,nt) * ux * vx * wx / 6.0_dp + &
tab_beta(i1,nb,nt) * px * vx * wx / 2.0_dp - &
tab_beta(i2,nb,nt) * px * ux * wx / 2.0_dp + &
tab_beta(i3,nb,nt) * px * ux * vx / 6.0_dp
else
!! This case should never happen if tab_beta is properly allocated
!! (setting q_max to be large enough) - for compatibility with GWW
vq(ig,nb) = 0.0_dp
end if
END DO
END DO
!$acc end data
!----------------------------------------------------------------------
END SUBROUTINE interp_beta
!----------------------------------------------------------------------

View File

@ -16,7 +16,7 @@ MODULE uspp_data
PRIVATE
!
PUBLIC :: nqxq, nqx, dq
PUBLIC :: tab_beta, tab_at
PUBLIC :: tab_at
!
PUBLIC :: allocate_uspp_data
PUBLIC :: deallocate_uspp_data
@ -27,9 +27,7 @@ MODULE uspp_data
INTEGER :: nqx
!! number of interpolation points
REAL(DP), PARAMETER:: dq = 0.01D0
!! space between points in the pseudopotential tab_beta.
REAL(DP), ALLOCATABLE :: tab_beta(:,:,:)
!! interpolation table for PP projectorss
!! space between interpolation points
REAL(DP), ALLOCATABLE :: tab_at(:,:,:)
!! interpolation table for atomic wfc
!
@ -43,8 +41,6 @@ contains
if (nqxq_/=nqxq) call upf_error("allocate_uspp_data","invalid nqxq_",1)
if (nqx_/=nqx) call upf_error("allocate_uspp_data","invalid nqx_",1)
!
allocate(tab_beta(nqx_,nbetam,nsp))
!$acc enter data create(tab_beta)
allocate(tab_at(nqx_,nwfcm,nsp))
!$acc enter data create(tab_at)
!
@ -52,8 +48,6 @@ contains
!
subroutine deallocate_uspp_data()
implicit none
!$acc exit data delete(tab_beta)
if( allocated( tab_beta ) ) deallocate( tab_beta )
!$acc exit data delete(tab_at)
if( allocated( tab_at ) ) deallocate( tab_at )
!
@ -64,9 +58,8 @@ contains
implicit none
real(DP), intent(in) :: vol_ratio_m1
!
tab_beta(:,:,:) = tab_beta(:,:,:) * SQRT(vol_ratio_m1)
tab_at(:,:,:) = tab_at(:,:,:) * SQRT(vol_ratio_m1)
!$acc update device (tab_at, tab_beta)
!$acc update device (tab_at)
end subroutine scale_uspp_data
!
END MODULE uspp_data

View File

@ -1,16 +1,15 @@
!
! Copyright (C) 2001-2021 Quantum ESPRESSO group
! Copyright (C) 2001-2024 Quantum ESPRESSO Foundation
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
#define __CUDA_KERNEL_YLM
! use the CUDA Kernel version instead of the simple CUF version
! that crashes for obscure reasons (at least on nvfortran 22.07)
module ylmr2_gpum
#if defined(__CUDA) && defined(__CUDA_KERNEL_YLM)
#if defined(__CUDA)
! CUDA Kernel version
use cudafor
contains
attributes(global) subroutine ylmr2_gpu_kernel (lmax,lmax2, ng, g, gg, ylm)
@ -120,9 +119,9 @@ subroutine ylmr2_gpu(lmax2, ng, g, gg, ylm)
! Numerical recursive algorithm based on the one given in Numerical
! Recipes but avoiding the calculation of factorials that generate
! overflow for lmax > 11
! Last modified May 2nd, 2021, by PG
! Last modified Jan 2024, by PG
!
#if defined(__CUDA) && defined(__CUDA_KERNEL_YLM)
#if defined(__CUDA)
USE cudafor
USE ylmr2_gpum, ONLY : ylmr2_gpu_kernel
implicit none
@ -155,121 +154,6 @@ subroutine ylmr2_gpu(lmax2, ng, g, gg, ylm)
tBlock = dim3(256,1,1)
grid = dim3(ceiling(real(ng)/tBlock%x),1,1)
call ylmr2_gpu_kernel<<<grid,tBlock>>>(lmax, lmax2, ng, g, gg, ylm)
#else
USE upf_kinds, ONLY : DP
USE upf_const, ONLY : pi, fpi
!
IMPLICIT NONE
!
integer, intent(in) :: lmax2, ng
real(DP), intent(in) :: g (3, ng), gg (ng)
!
! BEWARE: gg = g(1)^2 + g(2)^2 +g(3)^2 is not checked on input
! Incorrect results will ensue if gg != g(1)^2 + g(2)^2 +g(3)^2
!
real(DP), intent(out) :: ylm (ng,lmax2)
#if defined(__CUDA)
attributes (device) :: g, gg, ylm
#endif
!
! local variables
!
real(DP), parameter :: eps = 1.0d-9
real(DP) :: cost , sent, phi
real(DP) :: c, gmod
integer :: lmax, ig, l, m, lm, lm1, lm2
!
if (ng < 1 .or. lmax2 < 1) return
do lmax = 0, 25
if ((lmax+1)**2 == lmax2) go to 10
end do
call upf_error (' ylmr', 'l > 25 or wrong number of Ylm required',lmax2)
10 continue
!
if (lmax == 0) then
!$cuf kernel do
do ig=1, ng
ylm(ig,1) = sqrt (1.d0 / fpi)
end do
return
end if
!
!$cuf kernel do
do ig=1,ng
gmod = sqrt (gg (ig) )
if (gmod < eps) then
cost = 0.d0
else
cost = g(3,ig)/gmod
endif
sent = sqrt(max(0.0_dp,1.0_dp-cost*cost))
!
! cost = cos(theta), sent = sin(theta), with theta = polar angle
!
! The Q(:,l,m) are defined as sqrt ((l-m)!/(l+m)!) * P(:,l,m),
! where P(:,l,m) are the Legendre Polynomials (0 <= m <= l),
! and are currently stored into Ylm(:,lm) where lm = l**2+1+2*m
! (one might also store them into an array Q(l,m) for each ig)
!
ylm (ig,1) = 1.d0
ylm (ig,2) = cost
ylm (ig,4) =-sent/sqrt(2.d0)
do l = 2, lmax
!
! recursion on l for Q(:,l,m)
!
do m = 0, l - 2
lm = (l )**2 + 1 + 2*m
lm1= (l-1)**2 + 1 + 2*m
lm2= (l-2)**2 + 1 + 2*m
ylm(ig,lm) = cost*(2*l-1)/sqrt(DBLE(l*l-m*m))*ylm(ig,lm1) &
- sqrt(DBLE((l-1)*(l-1)-m*m))/sqrt(DBLE(l*l-m*m))*ylm(ig,lm2)
end do
lm = (l )**2 + 1 + 2*l
lm1= (l )**2 + 1 + 2*(l-1)
lm2= (l-1)**2 + 1 + 2*(l-1)
ylm(ig,lm1) = cost * sqrt(DBLE(2*l-1)) * ylm(ig,lm2)
ylm(ig,lm ) = - sqrt(DBLE(2*l-1))/sqrt(DBLE(2*l))*sent*ylm(ig,lm2)
!
end do
!
! now add cos(phi), sin(phi), and other factors to get the true Ylm
! beware the arc tan, it is defined modulo pi
!
if (g(1,ig) > eps) then
phi = atan( g(2,ig)/g(1,ig) )
else if (g(1,ig) < -eps) then
phi = atan( g(2,ig)/g(1,ig) ) + pi
else
phi = sign( pi/2.d0,g(2,ig) )
end if
lm = 1
ylm(ig,1) = ylm(ig,1) / sqrt(fpi)
!
do l = 1, lmax
!
c = sqrt (DBLE(2*l+1) / fpi)
!
! Y_lm, m = 0
!
lm = lm + 1
ylm(ig, lm) = c * ylm(ig,lm)
!
do m = 1, l
!
! Y_lm, m > 0
!
lm = lm + 2
ylm(ig, lm-1) = c * sqrt(2.d0) * ylm(ig,lm) * cos (m*phi)
!
! Y_lm, m < 0
!
ylm(ig, lm ) = c * sqrt(2.d0) * ylm(ig,lm) * sin (m*phi)
!
end do
end do
enddo
!
#endif
return