mirror of https://gitlab.com/QEF/q-e.git
Merge branch 'final_beta_clean' into 'develop'
Beta function interpolation routines encapsulated into module beta_mod See merge request QEF/q-e!2221
This commit is contained in:
commit
93776a5a90
|
@ -14,6 +14,7 @@ subroutine gen_beta_simple (qk, npw_max, dvkb)
|
|||
USE klist, ONLY : ngk
|
||||
USE gvect, ONLY : mill, eigts1, eigts2, eigts3, g
|
||||
USE uspp, ONLY : nkb, indv, nhtol, nhtolm
|
||||
USE beta_mod, ONLY : interp_dbeta
|
||||
USE uspp_param, ONLY : lmaxkb, nbetam, nh
|
||||
USE io_global, ONLY : stdout
|
||||
!
|
||||
|
@ -137,6 +138,7 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb)
|
|||
USE klist, ONLY : ngk, igk_k
|
||||
USE gvect, ONLY : mill, eigts1, eigts2, eigts3, g
|
||||
USE uspp, ONLY : nkb, indv, nhtol, nhtolm
|
||||
USE beta_mod, ONLY : interp_beta
|
||||
USE uspp_param, ONLY : upf, lmaxkb, nbetam, nh
|
||||
!
|
||||
implicit none
|
||||
|
|
|
@ -150,7 +150,7 @@ SUBROUTINE compute_gw( omegamin, omegamax, d_omega, use_gmaps, qplda, vkb, vxcdi
|
|||
USE parallel_include
|
||||
USE scf, ONLY : rho, rho_core, rhog_core
|
||||
USE ener, ONLY : etxc, vtxc
|
||||
|
||||
USE beta_mod, ONLY : interp_beta, interp_dbeta
|
||||
USE uspp_param, ONLY : upf, nh, nbetam
|
||||
USE ions_base, ONLY : ntyp => nsp
|
||||
USE klist, ONLY : ngk
|
||||
|
|
|
@ -22,6 +22,7 @@ SUBROUTINE hinit0()
|
|||
USE klist, ONLY : qnorm
|
||||
USE gvecw, ONLY : ecutwfc
|
||||
USE vlocal, ONLY : strf
|
||||
USE beta_mod, ONLY : init_tab_beta
|
||||
USE realus, ONLY : generate_qpointlist, betapointlist, &
|
||||
init_realspace_vars, real_space
|
||||
USE ldaU, ONLY : lda_plus_U, Hubbard_projectors
|
||||
|
@ -42,6 +43,7 @@ SUBROUTINE hinit0()
|
|||
!
|
||||
IMPLICIT NONE
|
||||
REAL (dp) :: alat_old, qmax
|
||||
INTEGER :: ierr
|
||||
LOGICAL :: is_tau_read = .FALSE.
|
||||
!
|
||||
#if defined (__ENVIRON)
|
||||
|
@ -60,10 +62,20 @@ SUBROUTINE hinit0()
|
|||
IF (tbeta_smoothing) CALL init_us_b0(ecutwfc,intra_bgrp_comm)
|
||||
IF (tq_smoothing) CALL init_us_0(ecutrho,intra_bgrp_comm)
|
||||
qmax = (qnorm + sqrt(ecutrho))*cell_factor
|
||||
! qmax is the maximum needed |q+G|, increased by a factor (20% or so)
|
||||
! to avoid too frequent reallocations in variable-cell calculations
|
||||
! (qnorm=max|q| may be needed for hybrid EXX or phonon calculations)
|
||||
!
|
||||
! qmax is the maximum |q+G|, for all G needed by the charge density,
|
||||
! increased by a factor (20% or so) to avoid too frequent reallocations
|
||||
! in variable-cell calculations ( norm is an estimate of max|q|, that
|
||||
! may be needed for hybrid EXX or phonon calculations)
|
||||
!
|
||||
CALL init_us_1(nat, ityp, omega, qmax, intra_bgrp_comm)
|
||||
!
|
||||
! fill interpolation table for beta functions
|
||||
! qmax as above, for all G needed by wavefunctions
|
||||
!
|
||||
qmax = (qnorm + sqrt(ecutwfc))*cell_factor
|
||||
CALL init_tab_beta ( qmax, omega, intra_bgrp_comm, ierr )
|
||||
!
|
||||
IF ( lda_plus_U .AND. ( Hubbard_projectors == 'pseudo' ) ) CALL init_q_aeps()
|
||||
CALL init_tab_atwfc (omega, intra_bgrp_comm)
|
||||
!
|
||||
|
|
|
@ -282,11 +282,14 @@ SUBROUTINE post_xml_init ( )
|
|||
USE rism_module, ONLY : rism_tobe_alive, rism_pot3d
|
||||
USE rism3d_facade, ONLY : lrism3d, rism3d_initialize, rism3d_read_to_restart
|
||||
USE xc_lib, ONLY : xclib_dft_is_libxc, xclib_init_libxc
|
||||
USE beta_mod, ONLY : init_tab_beta
|
||||
USE klist, ONLY : qnorm
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
REAL(DP) :: ehart, etxc, vtxc, etotefield, charge, qmax
|
||||
CHARACTER(LEN=37) :: dft_name
|
||||
INTEGER :: ierr
|
||||
!
|
||||
! ... initialize Libxc if needed
|
||||
!
|
||||
|
@ -359,9 +362,20 @@ SUBROUTINE post_xml_init ( )
|
|||
CALL init_vloc()
|
||||
IF (tbeta_smoothing) CALL init_us_b0(ecutwfc,intra_bgrp_comm)
|
||||
IF (tq_smoothing) CALL init_us_0(ecutrho,intra_bgrp_comm)
|
||||
!
|
||||
! qmax is the maximum |G|, for all G needed by the charge density
|
||||
!
|
||||
qmax = sqrt(ecutrho)*cell_factor
|
||||
CALL init_us_1(nat, ityp, omega, qmax, intra_bgrp_comm)
|
||||
!
|
||||
! fill interpolation table for beta functions
|
||||
! qmax is the maximum |q+G|, for all G needed by the wavefunctions
|
||||
!
|
||||
qmax = (qnorm + sqrt(ecutwfc))*cell_factor
|
||||
CALL init_tab_beta ( qmax, omega, intra_bgrp_comm, ierr )
|
||||
!
|
||||
IF ( lda_plus_u .AND. ( Hubbard_projectors == 'pseudo' ) ) CALL init_q_aeps()
|
||||
!
|
||||
CALL init_tab_atwfc(omega, intra_bgrp_comm)
|
||||
!
|
||||
CALL struc_fact( nat, tau, nsp, ityp, ngm, g, bg, dfftp%nr1, dfftp%nr2,&
|
||||
|
|
|
@ -29,6 +29,7 @@ SUBROUTINE scale_h
|
|||
USE rism_module, ONLY : lrism, rism_reinit3d
|
||||
USE mp, ONLY : mp_max
|
||||
USE mp_bands, ONLY : intra_bgrp_comm
|
||||
USE beta_mod, ONLY : scale_tab_beta
|
||||
USE vloc_mod, ONLY : scale_tab_vloc
|
||||
USE rhoc_mod, ONLY : scale_tab_rhc
|
||||
USE rhoat_mod, ONLY : scale_tab_rhoat
|
||||
|
@ -86,6 +87,7 @@ SUBROUTINE scale_h
|
|||
! scale the non-local pseudopotential tables
|
||||
!
|
||||
call scale_uspp_data( omega_old/omega )
|
||||
call scale_tab_beta( omega_old/omega )
|
||||
CALL scale_tab_rhc( omega_old/omega )
|
||||
CALL scale_tab_rhoat( omega_old/omega )
|
||||
CALL scale_tab_qrad( omega_old/omega )
|
||||
|
|
|
@ -8,7 +8,7 @@ set(src_upflib
|
|||
init_us_b0.f90
|
||||
init_us_1.f90
|
||||
init_tab_atwfc.f90
|
||||
init_tab_beta.f90
|
||||
beta_mod.f90
|
||||
interp_atwfc.f90
|
||||
init_us_2_acc.f90
|
||||
gth.f90
|
||||
|
|
|
@ -10,10 +10,11 @@ MODFLAGS= $(MOD_FLAG)../UtilXlib $(MOD_FLAG).
|
|||
# OBJS_GPU are GPU-specific
|
||||
|
||||
OBJS_DEP= \
|
||||
vloc_mod.o \
|
||||
rhoc_mod.o \
|
||||
rhoat_mod.o \
|
||||
beta_mod.o \
|
||||
qrad_mod.o \
|
||||
rhoat_mod.o \
|
||||
rhoc_mod.o \
|
||||
vloc_mod.o \
|
||||
qvan2.o \
|
||||
dqvan2.o \
|
||||
gen_us_dj.o \
|
||||
|
@ -22,8 +23,7 @@ init_us_0.o \
|
|||
init_us_b0.o \
|
||||
init_us_1.o \
|
||||
init_us_2_acc.o \
|
||||
init_tab_atwfc.o \
|
||||
init_tab_beta.o
|
||||
init_tab_atwfc.o
|
||||
|
||||
OBJS_NODEP= \
|
||||
atom.o \
|
||||
|
|
|
@ -0,0 +1,236 @@
|
|||
!
|
||||
! Copyright (C) 2024 Quantum ESPRESSO Foundation
|
||||
! This file is distributed under the terms of the
|
||||
! GNU General Public License. See the file `License'
|
||||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
!
|
||||
MODULE beta_mod
|
||||
!
|
||||
!! Variables and routines for nonlocal beta functions in numerical form
|
||||
!! Contains generation of interpolation tables in reciprocal space,
|
||||
!! interpolation routines and other utility routines
|
||||
!! Code moved to upflib and restructured by Paolo Giannozzi, 2024
|
||||
!
|
||||
USE upf_kinds, ONLY : dp
|
||||
USE upf_const, ONLY : fpi, e2
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
PRIVATE
|
||||
PUBLIC :: init_tab_beta
|
||||
PUBLIC :: deallocate_tab_beta
|
||||
PUBLIC :: scale_tab_beta
|
||||
PUBLIC :: interp_beta
|
||||
PUBLIC :: interp_dbeta
|
||||
!
|
||||
SAVE
|
||||
!
|
||||
INTEGER :: nqx = 0
|
||||
!! size of interpolation table
|
||||
REAL(DP), PARAMETER:: dq = 0.01_dp
|
||||
!! grid step for interpolation table
|
||||
REAL(DP) :: qmax = 0.0_dp
|
||||
!! max q covered by the interpolation table
|
||||
REAL(DP), ALLOCATABLE :: tab_beta(:,:,:)
|
||||
!! interpolation table for numerical beta functions in reciprocal space
|
||||
!
|
||||
CONTAINS
|
||||
!
|
||||
!----------------------------------------------------------------------
|
||||
SUBROUTINE init_tab_beta ( qmax_, omega, comm, ierr )
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
! Compute interpolation table for beta(G) radial functions
|
||||
!
|
||||
USE upf_kinds, ONLY : dp
|
||||
USE atom, ONLY : rgrid
|
||||
USE uspp_param, ONLY : upf, lmaxq, nbetam, nsp
|
||||
USE mp, ONLY : mp_sum
|
||||
USE m_gth, ONLY : mk_ffnl_gth
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
REAL(dp), INTENT(IN) :: qmax_
|
||||
!! Interpolate q up to qmax_ (sqrt(Ry), q^2 is an energy)
|
||||
REAL(dp), INTENT(IN) :: omega
|
||||
!! Unit-cell volume
|
||||
INTEGER, INTENT(IN) :: comm
|
||||
!! MPI communicator, to split the workload
|
||||
INTEGER, INTENT(OUT) :: ierr
|
||||
!! error code: ierr = 0 if interpolation table (IT) was allocated
|
||||
!! ierr =-1 if IT had insufficient size and was re-allocated
|
||||
!! ierr =-2 if IT was already present and nothing is done
|
||||
!
|
||||
INTEGER :: ndm, startq, lastq, nt, l, nb, iq, ir
|
||||
REAL(dp) :: qi
|
||||
! q-point grid for interpolation
|
||||
REAL(dp) :: pref
|
||||
! the prefactor of the Q functions
|
||||
real(DP) :: vqint, d1
|
||||
!
|
||||
REAL(dp), allocatable :: aux (:)
|
||||
! work space
|
||||
REAL(dp), allocatable :: besr(:)
|
||||
! work space
|
||||
!
|
||||
IF ( .NOT. ALLOCATED(tab_beta) ) THEN
|
||||
!! table not yet allocated
|
||||
qmax = qmax_
|
||||
ierr = 0
|
||||
ELSE IF ( qmax_ > qmax ) THEN
|
||||
!! table ìs allocated but dimension insufficient: re-allocate
|
||||
!! (with some margin so that this does not happen too often)
|
||||
!$acc exit data delete(tab_beta)
|
||||
DEALLOCATE ( tab_beta )
|
||||
qmax = qmax_ + MAX(dq*100,qmax_-qmax)
|
||||
ierr =-1
|
||||
ELSE
|
||||
!! table already computed: exit
|
||||
ierr =-2
|
||||
RETURN
|
||||
END IF
|
||||
nqx = INT( qmax/dq + 4)
|
||||
allocate(tab_beta(nqx,nbetam,nsp))
|
||||
!$acc enter data create(tab_beta)
|
||||
ndm = MAXVAL ( upf(:)%kkbeta )
|
||||
allocate( aux (ndm) )
|
||||
allocate (besr( ndm))
|
||||
pref = fpi / sqrt (omega)
|
||||
call divide (comm, nqx, startq, lastq)
|
||||
tab_beta (:,:,:) = 0.d0
|
||||
do nt = 1, nsp
|
||||
do nb = 1, upf(nt)%nbeta
|
||||
l = upf(nt)%lll (nb)
|
||||
do iq = startq, lastq
|
||||
qi = (iq - 1) * dq
|
||||
if ( upf(nt)%is_gth ) then
|
||||
CALL mk_ffnl_gth( nt, nb, 1, omega, [ qi ] , tab_beta(iq,nb,nt) )
|
||||
else
|
||||
call sph_bes (upf(nt)%kkbeta, rgrid(nt)%r, qi, l, besr)
|
||||
do ir = 1, upf(nt)%kkbeta
|
||||
aux (ir) = upf(nt)%beta (ir, nb) * besr (ir) * rgrid(nt)%r(ir)
|
||||
enddo
|
||||
call simpson (upf(nt)%kkbeta, aux, rgrid(nt)%rab, vqint)
|
||||
tab_beta (iq, nb, nt) = vqint * pref
|
||||
end if
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
deallocate (besr)
|
||||
deallocate (aux)
|
||||
!
|
||||
call mp_sum( tab_beta, comm )
|
||||
!$acc update device (tab_beta)
|
||||
!
|
||||
END SUBROUTINE init_tab_beta
|
||||
!
|
||||
!----------------------------------------------------------------------
|
||||
SUBROUTINE interp_beta( nt, npw_, qg, vq )
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
USE upf_kinds, ONLY : dp
|
||||
USE uspp_param, ONLY : upf, nbetam
|
||||
!
|
||||
implicit none
|
||||
integer, intent(in) :: nt, npw_
|
||||
real(dp), intent(in ) :: qg(npw_)
|
||||
real(dp), intent(out) :: vq(npw_,nbetam)
|
||||
!
|
||||
integer :: i0, i1, i2, i3, nbnt, nb, ig
|
||||
real(dp):: qgr, px, ux, vx, wx
|
||||
!
|
||||
nbnt = upf(nt)%nbeta
|
||||
!$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
|
||||
!$acc parallel loop collapse(2)
|
||||
do nb = 1, nbnt
|
||||
DO ig = 1, npw_
|
||||
qgr = qg(ig)
|
||||
px = qgr / dq - DBLE(INT(qgr/dq))
|
||||
ux = 1.0_dp - px
|
||||
vx = 2.0_dp - px
|
||||
wx = 3.0_dp - px
|
||||
i0 = INT(qgr/dq) + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
if ( i3 <= nqx ) then
|
||||
vq(ig,nb) = &
|
||||
tab_beta(i0,nb,nt) * ux * vx * wx / 6.0_dp + &
|
||||
tab_beta(i1,nb,nt) * px * vx * wx / 2.0_dp - &
|
||||
tab_beta(i2,nb,nt) * px * ux * wx / 2.0_dp + &
|
||||
tab_beta(i3,nb,nt) * px * ux * vx / 6.0_dp
|
||||
else
|
||||
!! This case should never happen if tab_beta is properly allocated
|
||||
!! (setting q_max to be large enough) - for compatibility with GWW
|
||||
vq(ig,nb) = 0.0_dp
|
||||
end if
|
||||
END DO
|
||||
END DO
|
||||
!$acc end data
|
||||
!----------------------------------------------------------------------
|
||||
END SUBROUTINE interp_beta
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
!----------------------------------------------------------------------
|
||||
SUBROUTINE interp_dbeta( nt, npw, qg, vq )
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
USE upf_kinds, ONLY : dp
|
||||
USE uspp_param, ONLY : upf, nbetam
|
||||
!
|
||||
implicit none
|
||||
integer, intent(in) :: nt, npw
|
||||
real(dp), intent(in ) :: qg(npw)
|
||||
real(dp), intent(out) :: vq(npw,nbetam)
|
||||
!
|
||||
integer :: i0, i1, i2, i3, nbnt, nb, ig
|
||||
real(dp):: qgr, px, ux, vx, wx
|
||||
!
|
||||
nbnt = upf(nt)%nbeta
|
||||
!$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
|
||||
!$acc parallel loop collapse(2)
|
||||
DO nb = 1, nbnt
|
||||
DO ig = 1, npw
|
||||
qgr = qg(ig)
|
||||
px = qgr / dq - INT(qgr/dq)
|
||||
ux = 1.0_dp - px
|
||||
vx = 2.0_dp - px
|
||||
wx = 3.0_dp - px
|
||||
i0 = qgr / dq + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
IF ( i3 <= nqx ) THEN
|
||||
vq(ig,nb) = ( tab_beta(i0,nb,nt) * (-vx*wx-ux*wx-ux*vx)/6.0_dp + &
|
||||
tab_beta(i1,nb,nt) * (+vx*wx-px*wx-px*vx)/2.0_dp - &
|
||||
tab_beta(i2,nb,nt) * (+ux*wx-px*wx-px*ux)/2.0_dp + &
|
||||
tab_beta(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6.0_dp )/dq
|
||||
ELSE
|
||||
vq(ig,nb) = 0.0_dp
|
||||
END IF
|
||||
ENDDO
|
||||
END DO
|
||||
!$acc end data
|
||||
!----------------------------------------------------------------------
|
||||
END SUBROUTINE interp_dbeta
|
||||
!
|
||||
subroutine deallocate_tab_beta ()
|
||||
implicit none
|
||||
!
|
||||
!$acc exit data delete(tab_beta)
|
||||
if( allocated( tab_beta ) ) deallocate( tab_beta )
|
||||
!
|
||||
end subroutine deallocate_tab_beta
|
||||
!
|
||||
subroutine scale_tab_beta( vol_ratio_m1 )
|
||||
! vol_ratio_m1 = omega_old / omega
|
||||
implicit none
|
||||
real(DP), intent(in) :: vol_ratio_m1
|
||||
!
|
||||
tab_beta(:,:,:) = tab_beta(:,:,:) * SQRT(vol_ratio_m1)
|
||||
!$acc update device ( tab_beta)
|
||||
end subroutine scale_tab_beta
|
||||
!
|
||||
END MODULE beta_mod
|
|
@ -18,6 +18,7 @@ SUBROUTINE gen_us_dj_base( npw, npwx, igk, xk, nat, tau, ityp, ntyp, tpiba, &
|
|||
USE upf_const, ONLY: tpi
|
||||
USE uspp, ONLY: nkb, indv, nhtol, nhtolm
|
||||
USE uspp_param, ONLY: lmaxkb, nbetam, nh, nhm
|
||||
USE beta_mod, ONLY: interp_dbeta
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -202,47 +203,3 @@ SUBROUTINE gen_us_dj_base( npw, npwx, igk, xk, nat, tau, ityp, ntyp, tpiba, &
|
|||
RETURN
|
||||
!
|
||||
END SUBROUTINE gen_us_dj_base
|
||||
!
|
||||
!----------------------------------------------------------------------
|
||||
SUBROUTINE interp_dbeta( nt, npw, qg, vq )
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
USE upf_kinds, ONLY : dp
|
||||
USE uspp_param, ONLY : upf, nbetam
|
||||
USE uspp_data, ONLY : nqx, dq, tab_beta
|
||||
!
|
||||
implicit none
|
||||
integer, intent(in) :: nt, npw
|
||||
real(dp), intent(in ) :: qg(npw)
|
||||
real(dp), intent(out) :: vq(npw,nbetam)
|
||||
!
|
||||
integer :: i0, i1, i2, i3, nbnt, nb, ig
|
||||
real(dp):: qgr, px, ux, vx, wx
|
||||
!
|
||||
nbnt = upf(nt)%nbeta
|
||||
!$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
|
||||
!$acc parallel loop collapse(2)
|
||||
DO nb = 1, nbnt
|
||||
DO ig = 1, npw
|
||||
qgr = qg(ig)
|
||||
px = qgr / dq - INT(qgr/dq)
|
||||
ux = 1.0_dp - px
|
||||
vx = 2.0_dp - px
|
||||
wx = 3.0_dp - px
|
||||
i0 = qgr / dq + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
IF ( i3 <= nqx ) THEN
|
||||
vq(ig,nb) = ( tab_beta(i0,nb,nt) * (-vx*wx-ux*wx-ux*vx)/6.0_dp + &
|
||||
tab_beta(i1,nb,nt) * (+vx*wx-px*wx-px*vx)/2.0_dp - &
|
||||
tab_beta(i2,nb,nt) * (+ux*wx-px*wx-px*ux)/2.0_dp + &
|
||||
tab_beta(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6.0_dp )/dq
|
||||
ELSE
|
||||
vq(ig,nb) = 0.0_dp
|
||||
END IF
|
||||
ENDDO
|
||||
END DO
|
||||
!$acc end data
|
||||
!----------------------------------------------------------------------
|
||||
END SUBROUTINE interp_dbeta
|
||||
|
|
|
@ -20,6 +20,7 @@ SUBROUTINE gen_us_dy_base( npw, npwx, igk, xk, nat, tau, ityp, ntyp, tpiba, &
|
|||
USE upf_const, ONLY: tpi
|
||||
USE uspp, ONLY: nkb, indv, nhtol, nhtolm
|
||||
USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh, nhm
|
||||
USE beta_mod, ONLY: interp_beta
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
|
|
@ -1,69 +0,0 @@
|
|||
!
|
||||
! Copyright (C) 2021-2023 Quantum ESPRESSO Foundation
|
||||
! This file is distributed under the terms of the
|
||||
! GNU General Public License. See the file `License'
|
||||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
!----------------------------------------------------------------------
|
||||
SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm )
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
! Compute interpolation table for beta(G) radial functions
|
||||
!
|
||||
USE upf_kinds, ONLY : dp
|
||||
USE upf_const, ONLY : fpi
|
||||
USE atom, ONLY : rgrid
|
||||
USE uspp_param, ONLY : upf, lmaxq, nbetam, nsp
|
||||
USE uspp_data, ONLY : nqx, dq, tab_beta
|
||||
USE mp, ONLY : mp_sum
|
||||
USE m_gth, ONLY : mk_ffnl_gth
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
real(DP), intent(in) :: omega
|
||||
integer, intent(in) :: intra_bgrp_comm
|
||||
!
|
||||
INTEGER :: ndm, startq, lastq, nt, l, nb, iq, ir
|
||||
REAL(dp) :: qi
|
||||
! q-point grid for interpolation
|
||||
REAL(dp) :: pref
|
||||
! the prefactor of the Q functions
|
||||
real(DP) :: vqint, d1
|
||||
!
|
||||
REAL(dp), allocatable :: aux (:)
|
||||
! work space
|
||||
REAL(dp), allocatable :: besr(:)
|
||||
! work space
|
||||
!
|
||||
ndm = MAXVAL ( upf(:)%kkbeta )
|
||||
allocate( aux (ndm) )
|
||||
allocate (besr( ndm))
|
||||
pref = fpi / sqrt (omega)
|
||||
call divide (intra_bgrp_comm, nqx, startq, lastq)
|
||||
tab_beta (:,:,:) = 0.d0
|
||||
do nt = 1, nsp
|
||||
do nb = 1, upf(nt)%nbeta
|
||||
l = upf(nt)%lll (nb)
|
||||
do iq = startq, lastq
|
||||
qi = (iq - 1) * dq
|
||||
if ( upf(nt)%is_gth ) then
|
||||
CALL mk_ffnl_gth( nt, nb, 1, omega, [ qi ] , tab_beta(iq,nb,nt) )
|
||||
else
|
||||
call sph_bes (upf(nt)%kkbeta, rgrid(nt)%r, qi, l, besr)
|
||||
do ir = 1, upf(nt)%kkbeta
|
||||
aux (ir) = upf(nt)%beta (ir, nb) * besr (ir) * rgrid(nt)%r(ir)
|
||||
enddo
|
||||
call simpson (upf(nt)%kkbeta, aux, rgrid(nt)%rab, vqint)
|
||||
tab_beta (iq, nb, nt) = vqint * pref
|
||||
end if
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
deallocate (besr)
|
||||
deallocate (aux)
|
||||
!
|
||||
call mp_sum( tab_beta, intra_bgrp_comm )
|
||||
!$acc update device (tab_beta)
|
||||
!
|
||||
END SUBROUTINE init_tab_beta
|
|
@ -23,7 +23,6 @@ subroutine init_us_1( nat, ityp, omega, qmax, intra_bgrp_comm )
|
|||
! spherical harmonics in the Q expansion
|
||||
! f) It computes the interpolation table "qrad" for Q(G)
|
||||
! g) It computes the qq terms which define the S matrix.
|
||||
! h) It fills the interpolation table "tab_beta" for the beta functions
|
||||
!
|
||||
USE upf_kinds, ONLY : DP
|
||||
USE upf_const, ONLY : fpi, sqrt2
|
||||
|
@ -52,7 +51,7 @@ subroutine init_us_1( nat, ityp, omega, qmax, intra_bgrp_comm )
|
|||
real(DP) :: j
|
||||
! J=L+S (noninteger!)
|
||||
integer :: n1, m0, m1, n, li, mi, vi, vj, ijs, is1, is2, &
|
||||
lk, mk, vk, kh, lh, ijkb0, na
|
||||
lk, mk, kh, lh, ijkb0, na
|
||||
integer, external :: sph_ind
|
||||
complex(DP) :: coeff
|
||||
real(DP) :: ji, jk
|
||||
|
@ -170,12 +169,10 @@ subroutine init_us_1( nat, ityp, omega, qmax, intra_bgrp_comm )
|
|||
li = nhtol(ih, nt)
|
||||
ji = nhtoj(ih, nt)
|
||||
mi = nhtolm(ih, nt)-li*li
|
||||
vi = indv (ih, nt)
|
||||
do kh=1,nh(nt)
|
||||
lk = nhtol(kh, nt)
|
||||
jk = nhtoj(kh, nt)
|
||||
mk = nhtolm(kh, nt)-lk*lk
|
||||
vk = indv (kh, nt)
|
||||
if (li == lk .and. abs(ji-jk) < 1.d-7) then
|
||||
do is1=1,2
|
||||
do is2=1,2
|
||||
|
@ -251,10 +248,6 @@ subroutine init_us_1( nat, ityp, omega, qmax, intra_bgrp_comm )
|
|||
end do
|
||||
end if
|
||||
!
|
||||
! fill interpolation table for beta functions
|
||||
!
|
||||
CALL init_tab_beta ( omega, intra_bgrp_comm )
|
||||
!
|
||||
#if defined __CUDA
|
||||
!
|
||||
! update GPU memory (taking care of zero-dim allocations)
|
||||
|
|
|
@ -20,6 +20,7 @@ SUBROUTINE init_us_2_acc( npw_, npwx, igk_, q_, nat, tau, ityp, &
|
|||
USE upf_const, ONLY : tpi
|
||||
USE uspp, ONLY : nkb, nhtol, nhtolm, indv
|
||||
USE uspp_param, ONLY : lmaxkb, nbetam, nhm, nh, nsp
|
||||
USE beta_mod, ONLY : interp_beta
|
||||
!
|
||||
implicit none
|
||||
!
|
||||
|
@ -168,52 +169,6 @@ SUBROUTINE init_us_2_acc( npw_, npwx, igk_, q_, nat, tau, ityp, &
|
|||
deallocate(vkb1)
|
||||
!
|
||||
return
|
||||
!----------------------------------------------------------------------
|
||||
end subroutine init_us_2_acc
|
||||
!
|
||||
!----------------------------------------------------------------------
|
||||
SUBROUTINE interp_beta( nt, npw_, qg, vq )
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
USE upf_kinds, ONLY : dp
|
||||
USE uspp_param, ONLY : upf, nbetam
|
||||
USE uspp_data, ONLY : nqx, dq, tab_beta
|
||||
!
|
||||
implicit none
|
||||
integer, intent(in) :: nt, npw_
|
||||
real(dp), intent(in ) :: qg(npw_)
|
||||
real(dp), intent(out) :: vq(npw_,nbetam)
|
||||
!
|
||||
integer :: i0, i1, i2, i3, nbnt, nb, ig
|
||||
real(dp):: qgr, px, ux, vx, wx
|
||||
!
|
||||
nbnt = upf(nt)%nbeta
|
||||
!$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
|
||||
!$acc parallel loop collapse(2)
|
||||
do nb = 1, nbnt
|
||||
DO ig = 1, npw_
|
||||
qgr = qg(ig)
|
||||
px = qgr / dq - DBLE(INT(qgr/dq))
|
||||
ux = 1.0_dp - px
|
||||
vx = 2.0_dp - px
|
||||
wx = 3.0_dp - px
|
||||
i0 = INT(qgr/dq) + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
if ( i3 <= nqx ) then
|
||||
vq(ig,nb) = &
|
||||
tab_beta(i0,nb,nt) * ux * vx * wx / 6.0_dp + &
|
||||
tab_beta(i1,nb,nt) * px * vx * wx / 2.0_dp - &
|
||||
tab_beta(i2,nb,nt) * px * ux * wx / 2.0_dp + &
|
||||
tab_beta(i3,nb,nt) * px * ux * vx / 6.0_dp
|
||||
else
|
||||
!! This case should never happen if tab_beta is properly allocated
|
||||
!! (setting q_max to be large enough) - for compatibility with GWW
|
||||
vq(ig,nb) = 0.0_dp
|
||||
end if
|
||||
END DO
|
||||
END DO
|
||||
!$acc end data
|
||||
!----------------------------------------------------------------------
|
||||
END SUBROUTINE interp_beta
|
||||
!----------------------------------------------------------------------
|
||||
|
|
|
@ -16,7 +16,7 @@ MODULE uspp_data
|
|||
PRIVATE
|
||||
!
|
||||
PUBLIC :: nqxq, nqx, dq
|
||||
PUBLIC :: tab_beta, tab_at
|
||||
PUBLIC :: tab_at
|
||||
!
|
||||
PUBLIC :: allocate_uspp_data
|
||||
PUBLIC :: deallocate_uspp_data
|
||||
|
@ -27,9 +27,7 @@ MODULE uspp_data
|
|||
INTEGER :: nqx
|
||||
!! number of interpolation points
|
||||
REAL(DP), PARAMETER:: dq = 0.01D0
|
||||
!! space between points in the pseudopotential tab_beta.
|
||||
REAL(DP), ALLOCATABLE :: tab_beta(:,:,:)
|
||||
!! interpolation table for PP projectorss
|
||||
!! space between interpolation points
|
||||
REAL(DP), ALLOCATABLE :: tab_at(:,:,:)
|
||||
!! interpolation table for atomic wfc
|
||||
!
|
||||
|
@ -43,8 +41,6 @@ contains
|
|||
if (nqxq_/=nqxq) call upf_error("allocate_uspp_data","invalid nqxq_",1)
|
||||
if (nqx_/=nqx) call upf_error("allocate_uspp_data","invalid nqx_",1)
|
||||
!
|
||||
allocate(tab_beta(nqx_,nbetam,nsp))
|
||||
!$acc enter data create(tab_beta)
|
||||
allocate(tab_at(nqx_,nwfcm,nsp))
|
||||
!$acc enter data create(tab_at)
|
||||
!
|
||||
|
@ -52,8 +48,6 @@ contains
|
|||
!
|
||||
subroutine deallocate_uspp_data()
|
||||
implicit none
|
||||
!$acc exit data delete(tab_beta)
|
||||
if( allocated( tab_beta ) ) deallocate( tab_beta )
|
||||
!$acc exit data delete(tab_at)
|
||||
if( allocated( tab_at ) ) deallocate( tab_at )
|
||||
!
|
||||
|
@ -64,9 +58,8 @@ contains
|
|||
implicit none
|
||||
real(DP), intent(in) :: vol_ratio_m1
|
||||
!
|
||||
tab_beta(:,:,:) = tab_beta(:,:,:) * SQRT(vol_ratio_m1)
|
||||
tab_at(:,:,:) = tab_at(:,:,:) * SQRT(vol_ratio_m1)
|
||||
!$acc update device (tab_at, tab_beta)
|
||||
!$acc update device (tab_at)
|
||||
end subroutine scale_uspp_data
|
||||
!
|
||||
END MODULE uspp_data
|
||||
|
|
|
@ -1,16 +1,15 @@
|
|||
!
|
||||
! Copyright (C) 2001-2021 Quantum ESPRESSO group
|
||||
! Copyright (C) 2001-2024 Quantum ESPRESSO Foundation
|
||||
! This file is distributed under the terms of the
|
||||
! GNU General Public License. See the file `License'
|
||||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
#define __CUDA_KERNEL_YLM
|
||||
! use the CUDA Kernel version instead of the simple CUF version
|
||||
! that crashes for obscure reasons (at least on nvfortran 22.07)
|
||||
|
||||
module ylmr2_gpum
|
||||
#if defined(__CUDA) && defined(__CUDA_KERNEL_YLM)
|
||||
#if defined(__CUDA)
|
||||
|
||||
! CUDA Kernel version
|
||||
|
||||
use cudafor
|
||||
contains
|
||||
attributes(global) subroutine ylmr2_gpu_kernel (lmax,lmax2, ng, g, gg, ylm)
|
||||
|
@ -120,9 +119,9 @@ subroutine ylmr2_gpu(lmax2, ng, g, gg, ylm)
|
|||
! Numerical recursive algorithm based on the one given in Numerical
|
||||
! Recipes but avoiding the calculation of factorials that generate
|
||||
! overflow for lmax > 11
|
||||
! Last modified May 2nd, 2021, by PG
|
||||
! Last modified Jan 2024, by PG
|
||||
!
|
||||
#if defined(__CUDA) && defined(__CUDA_KERNEL_YLM)
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
USE ylmr2_gpum, ONLY : ylmr2_gpu_kernel
|
||||
implicit none
|
||||
|
@ -155,121 +154,6 @@ subroutine ylmr2_gpu(lmax2, ng, g, gg, ylm)
|
|||
tBlock = dim3(256,1,1)
|
||||
grid = dim3(ceiling(real(ng)/tBlock%x),1,1)
|
||||
call ylmr2_gpu_kernel<<<grid,tBlock>>>(lmax, lmax2, ng, g, gg, ylm)
|
||||
|
||||
#else
|
||||
USE upf_kinds, ONLY : DP
|
||||
USE upf_const, ONLY : pi, fpi
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
integer, intent(in) :: lmax2, ng
|
||||
real(DP), intent(in) :: g (3, ng), gg (ng)
|
||||
!
|
||||
! BEWARE: gg = g(1)^2 + g(2)^2 +g(3)^2 is not checked on input
|
||||
! Incorrect results will ensue if gg != g(1)^2 + g(2)^2 +g(3)^2
|
||||
!
|
||||
real(DP), intent(out) :: ylm (ng,lmax2)
|
||||
#if defined(__CUDA)
|
||||
attributes (device) :: g, gg, ylm
|
||||
#endif
|
||||
!
|
||||
! local variables
|
||||
!
|
||||
real(DP), parameter :: eps = 1.0d-9
|
||||
real(DP) :: cost , sent, phi
|
||||
real(DP) :: c, gmod
|
||||
integer :: lmax, ig, l, m, lm, lm1, lm2
|
||||
!
|
||||
if (ng < 1 .or. lmax2 < 1) return
|
||||
do lmax = 0, 25
|
||||
if ((lmax+1)**2 == lmax2) go to 10
|
||||
end do
|
||||
call upf_error (' ylmr', 'l > 25 or wrong number of Ylm required',lmax2)
|
||||
10 continue
|
||||
!
|
||||
if (lmax == 0) then
|
||||
!$cuf kernel do
|
||||
do ig=1, ng
|
||||
ylm(ig,1) = sqrt (1.d0 / fpi)
|
||||
end do
|
||||
return
|
||||
end if
|
||||
!
|
||||
!$cuf kernel do
|
||||
do ig=1,ng
|
||||
gmod = sqrt (gg (ig) )
|
||||
if (gmod < eps) then
|
||||
cost = 0.d0
|
||||
else
|
||||
cost = g(3,ig)/gmod
|
||||
endif
|
||||
sent = sqrt(max(0.0_dp,1.0_dp-cost*cost))
|
||||
!
|
||||
! cost = cos(theta), sent = sin(theta), with theta = polar angle
|
||||
!
|
||||
! The Q(:,l,m) are defined as sqrt ((l-m)!/(l+m)!) * P(:,l,m),
|
||||
! where P(:,l,m) are the Legendre Polynomials (0 <= m <= l),
|
||||
! and are currently stored into Ylm(:,lm) where lm = l**2+1+2*m
|
||||
! (one might also store them into an array Q(l,m) for each ig)
|
||||
!
|
||||
ylm (ig,1) = 1.d0
|
||||
ylm (ig,2) = cost
|
||||
ylm (ig,4) =-sent/sqrt(2.d0)
|
||||
do l = 2, lmax
|
||||
!
|
||||
! recursion on l for Q(:,l,m)
|
||||
!
|
||||
do m = 0, l - 2
|
||||
lm = (l )**2 + 1 + 2*m
|
||||
lm1= (l-1)**2 + 1 + 2*m
|
||||
lm2= (l-2)**2 + 1 + 2*m
|
||||
ylm(ig,lm) = cost*(2*l-1)/sqrt(DBLE(l*l-m*m))*ylm(ig,lm1) &
|
||||
- sqrt(DBLE((l-1)*(l-1)-m*m))/sqrt(DBLE(l*l-m*m))*ylm(ig,lm2)
|
||||
end do
|
||||
lm = (l )**2 + 1 + 2*l
|
||||
lm1= (l )**2 + 1 + 2*(l-1)
|
||||
lm2= (l-1)**2 + 1 + 2*(l-1)
|
||||
ylm(ig,lm1) = cost * sqrt(DBLE(2*l-1)) * ylm(ig,lm2)
|
||||
ylm(ig,lm ) = - sqrt(DBLE(2*l-1))/sqrt(DBLE(2*l))*sent*ylm(ig,lm2)
|
||||
!
|
||||
end do
|
||||
!
|
||||
! now add cos(phi), sin(phi), and other factors to get the true Ylm
|
||||
! beware the arc tan, it is defined modulo pi
|
||||
!
|
||||
if (g(1,ig) > eps) then
|
||||
phi = atan( g(2,ig)/g(1,ig) )
|
||||
else if (g(1,ig) < -eps) then
|
||||
phi = atan( g(2,ig)/g(1,ig) ) + pi
|
||||
else
|
||||
phi = sign( pi/2.d0,g(2,ig) )
|
||||
end if
|
||||
lm = 1
|
||||
ylm(ig,1) = ylm(ig,1) / sqrt(fpi)
|
||||
!
|
||||
do l = 1, lmax
|
||||
!
|
||||
c = sqrt (DBLE(2*l+1) / fpi)
|
||||
!
|
||||
! Y_lm, m = 0
|
||||
!
|
||||
lm = lm + 1
|
||||
ylm(ig, lm) = c * ylm(ig,lm)
|
||||
!
|
||||
do m = 1, l
|
||||
!
|
||||
! Y_lm, m > 0
|
||||
!
|
||||
lm = lm + 2
|
||||
ylm(ig, lm-1) = c * sqrt(2.d0) * ylm(ig,lm) * cos (m*phi)
|
||||
!
|
||||
! Y_lm, m < 0
|
||||
!
|
||||
ylm(ig, lm ) = c * sqrt(2.d0) * ylm(ig,lm) * sin (m*phi)
|
||||
!
|
||||
end do
|
||||
end do
|
||||
enddo
|
||||
!
|
||||
#endif
|
||||
return
|
||||
|
|
Loading…
Reference in New Issue