mirror of https://gitlab.com/QEF/q-e.git
Removal of spline interpolation
The need of the spline interpolation is anedoctical and lost in a distant past. A few tests show minimal differences between ordinary and spline interpolation. If spline interpolation turns out to be needed in some cases, it can be easily re-implemented instead of the current interpolation (instead of in addition to)
This commit is contained in:
parent
32d755d115
commit
164b9e86c8
|
@ -37,7 +37,6 @@ SUBROUTINE clean_pw( lflag )
|
|||
USE symm_base, ONLY : irt
|
||||
USE symme, ONLY : sym_rho_deallocate
|
||||
USE wavefunctions, ONLY : evc, psic, psic_nc
|
||||
USE uspp_data, ONLY : qrad, tab, tab_at, tab_d2y, spline_ps
|
||||
USE uspp, ONLY : deallocate_uspp
|
||||
USE uspp_data, ONLY : deallocate_uspp_data
|
||||
USE uspp_param, ONLY : upf
|
||||
|
|
|
@ -3770,9 +3770,8 @@ end associate
|
|||
USE constants, ONLY : tpi
|
||||
USE gvect, ONLY : eigts1, eigts2, eigts3, mill, g
|
||||
USE wvfct, ONLY : npwx, nbnd
|
||||
USE uspp_data, ONLY : nqx, dq, tab, tab_d2y, spline_ps
|
||||
USE uspp_data, ONLY : nqx, dq, tab
|
||||
USE m_gth, ONLY : mk_ffnl_gth
|
||||
USE splinelib
|
||||
USE uspp, ONLY : nkb, nhtol, nhtolm, indv
|
||||
USE uspp_param, ONLY : upf, lmaxkb, nhm, nh
|
||||
USE becmod, ONLY : calbec
|
||||
|
@ -3802,7 +3801,6 @@ end associate
|
|||
COMPLEX(DP) :: phase, pref
|
||||
COMPLEX(DP), ALLOCATABLE :: sk(:)
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: xdata(:)
|
||||
INTEGER :: iq
|
||||
INTEGER :: istart, iend
|
||||
!
|
||||
|
@ -3835,12 +3833,6 @@ end associate
|
|||
qg(ig) = SQRT(qg(ig))*tpiba
|
||||
ENDDO
|
||||
!
|
||||
IF (spline_ps) THEN
|
||||
ALLOCATE( xdata(nqx) )
|
||||
DO iq = 1, nqx
|
||||
xdata(iq) = (iq - 1) * dq
|
||||
ENDDO
|
||||
ENDIF
|
||||
! |beta_lm(q)> = (4pi/omega).Y_lm(q).f_l(q).(i^l).S(q)
|
||||
jkb = 0
|
||||
!
|
||||
|
@ -3852,9 +3844,6 @@ end associate
|
|||
CALL mk_ffnl_gth( nt, nb, npw_, omega, qg, vq )
|
||||
ELSE
|
||||
DO ig = 1, npw_
|
||||
IF (spline_ps) THEN
|
||||
vq(ig) = splint(xdata, tab(:,nb,nt), tab_d2y(:,nb,nt), qg(ig))
|
||||
ELSE
|
||||
px = qg (ig) / dq - INT(qg (ig) / dq)
|
||||
ux = 1.d0 - px
|
||||
vx = 2.d0 - px
|
||||
|
@ -3867,7 +3856,6 @@ end associate
|
|||
tab (i1, nb, nt) * px * vx * wx / 2.d0 - &
|
||||
tab (i2, nb, nt) * px * ux * wx / 2.d0 + &
|
||||
tab (i3, nb, nt) * px * ux * vx / 6.d0
|
||||
ENDIF
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
|
|
|
@ -17,9 +17,8 @@ SUBROUTINE gen_us_dj_base &
|
|||
USE upf_kinds, ONLY: dp
|
||||
USE upf_const, ONLY: tpi
|
||||
USE uspp, ONLY: nkb, indv, nhtol, nhtolm
|
||||
USE uspp_data, ONLY: nqx, tab, tab_d2y, dq, spline_ps
|
||||
USE uspp_data, ONLY: nqx, tab, dq
|
||||
USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh
|
||||
USE splinelib
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -79,7 +78,6 @@ SUBROUTINE gen_us_dj_base &
|
|||
REAL(DP), ALLOCATABLE :: djl(:,:,:), ylm(:,:), q(:), gk(:,:)
|
||||
REAL(DP) :: qt
|
||||
COMPLEX(DP), ALLOCATABLE :: sk(:)
|
||||
REAL(DP), ALLOCATABLE :: xdata(:)
|
||||
!
|
||||
IF (nkb == 0) RETURN
|
||||
!
|
||||
|
@ -104,35 +102,23 @@ SUBROUTINE gen_us_dj_base &
|
|||
CALL stop_clock( 'stres_us32' )
|
||||
CALL start_clock( 'stres_us33' )
|
||||
!
|
||||
IF (spline_ps) THEN
|
||||
ALLOCATE( xdata(nqx) )
|
||||
DO iq = 1, nqx
|
||||
xdata(iq) = (iq - 1) * dq
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
DO nt = 1, ntyp
|
||||
DO nb = 1, upf(nt)%nbeta
|
||||
!
|
||||
DO ig = 1, npw
|
||||
qt = SQRT(q (ig)) * tpiba
|
||||
IF (spline_ps) THEN
|
||||
djl(ig,nb,nt) = splint_deriv(xdata, tab(:,nb,nt), &
|
||||
tab_d2y(:,nb,nt), qt)
|
||||
ELSE
|
||||
px = qt / dq - INT(qt/dq)
|
||||
ux = 1.d0 - px
|
||||
vx = 2.d0 - px
|
||||
wx = 3.d0 - px
|
||||
i0 = qt / dq + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
djl(ig,nb,nt) = ( tab(i0, nb, nt) * (-vx*wx-ux*wx-ux*vx)/6.d0 + &
|
||||
tab(i1, nb, nt) * (+vx*wx-px*wx-px*vx)/2.d0 - &
|
||||
tab(i2, nb, nt) * (+ux*wx-px*wx-px*ux)/2.d0 + &
|
||||
tab(i3, nb, nt) * (+ux*vx-px*vx-px*ux)/6.d0 )/dq
|
||||
ENDIF
|
||||
px = qt / dq - INT(qt/dq)
|
||||
ux = 1.d0 - px
|
||||
vx = 2.d0 - px
|
||||
wx = 3.d0 - px
|
||||
i0 = qt / dq + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
djl(ig,nb,nt) = ( tab(i0, nb, nt) * (-vx*wx-ux*wx-ux*vx)/6.d0 + &
|
||||
tab(i1, nb, nt) * (+vx*wx-px*wx-px*vx)/2.d0 - &
|
||||
tab(i2, nb, nt) * (+ux*wx-px*wx-px*ux)/2.d0 + &
|
||||
tab(i3, nb, nt) * (+ux*vx-px*vx-px*ux)/6.d0 )/dq
|
||||
ENDDO
|
||||
!
|
||||
ENDDO
|
||||
|
@ -183,7 +169,6 @@ SUBROUTINE gen_us_dj_base &
|
|||
DEALLOCATE( sk )
|
||||
DEALLOCATE( ylm )
|
||||
DEALLOCATE( djl )
|
||||
IF (spline_ps) DEALLOCATE( xdata )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
|
|
@ -20,8 +20,7 @@ SUBROUTINE gen_us_dj_gpu_ &
|
|||
USE upf_kinds, ONLY: dp
|
||||
USE upf_const, ONLY: tpi
|
||||
USE uspp, ONLY: nkb, indv_d, nhtol_d, nhtolm_d
|
||||
USE uspp_data, ONLY: nqx, tab, tab_d2y, tab_d, dq, spline_ps
|
||||
USE splinelib
|
||||
USE uspp_data, ONLY: nqx, tab, tab_d, dq
|
||||
USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh, nhm
|
||||
USE device_fbuff_m, ONLY: dev_buf
|
||||
!
|
||||
|
@ -70,8 +69,6 @@ SUBROUTINE gen_us_dj_gpu_ &
|
|||
REAL(DP) :: px, ux, vx, wx, arg, u_ipol, xk1, xk2, xk3, qt
|
||||
COMPLEX(DP) :: pref
|
||||
INTEGER, ALLOCATABLE :: ityp_d(:), ih_d(:), na_d(:), nas_d(:)
|
||||
REAL(DP), ALLOCATABLE :: q(:), djl(:,:,:), ylm(:,:)
|
||||
REAL(DP), ALLOCATABLE :: xdata(:)
|
||||
!
|
||||
REAL(DP), POINTER :: gk_d(:,:), djl_d(:,:,:), ylm_d(:,:)
|
||||
REAL(DP), ALLOCATABLE :: q_d(:), tau_d(:,:)
|
||||
|
@ -106,30 +103,7 @@ SUBROUTINE gen_us_dj_gpu_ &
|
|||
!
|
||||
CALL ylmr2_gpu( (lmaxkb+1)**2, npw, gk_d, q_d, ylm_d )
|
||||
!
|
||||
IF ( spline_ps ) THEN
|
||||
ALLOCATE( q(npw), xdata(nqx), djl(npw,nbetam,ntyp) )
|
||||
q = q_d
|
||||
DO iq = 1, nqx
|
||||
xdata(iq) = (iq - 1) * dq
|
||||
ENDDO
|
||||
!
|
||||
DO nt = 1, ntyp
|
||||
! calculate beta in G-space using an interpolation table
|
||||
DO nb = 1, upf(nt)%nbeta
|
||||
DO ig = 1, npw
|
||||
qt = SQRT(q(ig)) * tpiba
|
||||
djl(ig,nb,nt) = splint_deriv( xdata, tab(:,nb,nt), &
|
||||
tab_d2y(:,nb,nt), qt )
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
djl_d = djl
|
||||
!
|
||||
DEALLOCATE( q, xdata, djl )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
DO nt = 1, ntyp
|
||||
DO nt = 1, ntyp
|
||||
nbm = upf(nt)%nbeta
|
||||
!$cuf kernel do (2) <<<*,*>>>
|
||||
DO nb = 1, nbm
|
||||
|
@ -149,9 +123,7 @@ SUBROUTINE gen_us_dj_gpu_ &
|
|||
tab_d(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6._DP)/dq
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
ENDDO
|
||||
!
|
||||
DEALLOCATE( q_d )
|
||||
!
|
||||
|
@ -199,7 +171,6 @@ SUBROUTINE gen_us_dj_gpu_ &
|
|||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
|
||||
!
|
||||
DEALLOCATE( phase_d )
|
||||
!
|
||||
|
|
|
@ -17,9 +17,8 @@ SUBROUTINE gen_us_dy_base &
|
|||
USE upf_kinds, ONLY: dp
|
||||
USE upf_const, ONLY: tpi
|
||||
USE uspp, ONLY: nkb, indv, nhtol, nhtolm
|
||||
USE uspp_data, ONLY: nqx, tab, tab_d2y, dq, spline_ps
|
||||
USE uspp_data, ONLY: nqx, tab, dq
|
||||
USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh
|
||||
USE splinelib
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -75,7 +74,6 @@ SUBROUTINE gen_us_dy_base &
|
|||
COMPLEX(DP) :: phase, pref
|
||||
!
|
||||
INTEGER :: iq
|
||||
REAL(DP), ALLOCATABLE :: xdata(:)
|
||||
!
|
||||
dvkb(:,:) = (0.d0, 0.d0)
|
||||
IF (lmaxkb <= 0) RETURN
|
||||
|
@ -103,34 +101,22 @@ SUBROUTINE gen_us_dy_base &
|
|||
q(ig) = SQRT(q(ig)) * tpiba
|
||||
ENDDO
|
||||
!
|
||||
IF ( spline_ps ) THEN
|
||||
ALLOCATE( xdata(nqx) )
|
||||
DO iq = 1, nqx
|
||||
xdata(iq) = (iq - 1) * dq
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
DO nt = 1, ntyp
|
||||
! calculate beta in G-space using an interpolation table
|
||||
DO nb = 1, upf(nt)%nbeta
|
||||
DO ig = 1, npw
|
||||
IF ( spline_ps ) THEN
|
||||
vkb0(ig,nb,nt) = splint( xdata, tab(:,nb,nt), &
|
||||
tab_d2y(:,nb,nt), q(ig) )
|
||||
ELSE
|
||||
px = q(ig)/dq - INT(q(ig)/dq)
|
||||
ux = 1.d0 - px
|
||||
vx = 2.d0 - px
|
||||
wx = 3.d0 - px
|
||||
i0 = q(ig)/dq + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
vkb0(ig, nb, nt) = tab(i0, nb, nt) * ux * vx * wx / 6.d0 + &
|
||||
tab(i1, nb, nt) * px * vx * wx / 2.d0 - &
|
||||
tab(i2, nb, nt) * px * ux * wx / 2.d0 + &
|
||||
tab(i3, nb, nt) * px * ux * vx / 6.d0
|
||||
ENDIF
|
||||
px = q(ig)/dq - INT(q(ig)/dq)
|
||||
ux = 1.d0 - px
|
||||
vx = 2.d0 - px
|
||||
wx = 3.d0 - px
|
||||
i0 = q(ig)/dq + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
vkb0(ig, nb, nt) = tab(i0, nb, nt) * ux * vx * wx / 6.d0 + &
|
||||
tab(i1, nb, nt) * px * vx * wx / 2.d0 - &
|
||||
tab(i2, nb, nt) * px * ux * wx / 2.d0 + &
|
||||
tab(i3, nb, nt) * px * ux * vx / 6.d0
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
|
@ -173,7 +159,6 @@ SUBROUTINE gen_us_dy_base &
|
|||
!
|
||||
DEALLOCATE( sk )
|
||||
DEALLOCATE( vkb0, dylm_u, gk )
|
||||
IF (spline_ps) DEALLOCATE( xdata )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
|
|
@ -19,8 +19,7 @@ SUBROUTINE gen_us_dy_gpu_ ( npw, npwx, igk_d, xk, nat, tau, ityp, ntyp, &
|
|||
USE upf_kinds, ONLY: dp
|
||||
USE upf_const, ONLY: tpi
|
||||
USE uspp, ONLY: nkb, indv_d, nhtol_d, nhtolm_d
|
||||
USE uspp_data, ONLY: nqx, tab, tab_d2y, tab_d, dq, spline_ps
|
||||
USE splinelib
|
||||
USE uspp_data, ONLY: nqx, tab, tab_d, dq
|
||||
USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh, nhm
|
||||
USE device_fbuff_m, ONLY: dev_buf
|
||||
!
|
||||
|
@ -72,11 +71,11 @@ SUBROUTINE gen_us_dy_gpu_ ( npw, npwx, igk_d, xk, nat, tau, ityp, ntyp, &
|
|||
!
|
||||
INTEGER, ALLOCATABLE :: ityp_d(:), ih_d(:), na_d(:), nas_d(:)
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: q(:), vkb0(:,:,:), dylm(:,:)
|
||||
REAL(DP), ALLOCATABLE :: xdata(:), tau_d(:,:), q_d(:)
|
||||
REAL(DP), ALLOCATABLE :: q(:), dylm(:,:)
|
||||
!
|
||||
REAL(DP), POINTER :: gk_d(:,:)
|
||||
REAL(DP), POINTER :: vkb0_d(:,:,:), dylm_u_d(:,:), dylm_d(:,:,:)
|
||||
REAL(DP), ALLOCATABLE :: q_d(:), tau_d(:,:)
|
||||
! dylm = d Y_lm/dr_i in cartesian axes
|
||||
! dylm_u as above projected on u
|
||||
COMPLEX(DP), ALLOCATABLE :: phase_d(:), sk_d(:,:)
|
||||
|
@ -140,35 +139,10 @@ SUBROUTINE gen_us_dy_gpu_ ( npw, npwx, igk_d, xk, nat, tau, ityp, ntyp, &
|
|||
ENDDO
|
||||
!
|
||||
!
|
||||
IF ( spline_ps ) THEN
|
||||
!
|
||||
! AF: using splint_eq ??
|
||||
!
|
||||
ALLOCATE( q(npw), xdata(nqx), vkb0(npw,nbetam,ntyp) )
|
||||
q = q_d
|
||||
DO iq = 1, nqx
|
||||
xdata(iq) = (iq - 1) * dq
|
||||
ENDDO
|
||||
!
|
||||
DO nt = 1, ntyp
|
||||
! calculate beta in G-space using an interpolation table
|
||||
DO nb = 1, upf(nt)%nbeta
|
||||
DO ig = 1, npw
|
||||
vkb0(ig,nb,nt) = splint( xdata, tab(:,nb,nt), &
|
||||
tab_d2y(:,nb,nt), q(ig) )
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
vkb0_d = vkb0
|
||||
!
|
||||
DEALLOCATE( q, xdata, vkb0 )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
DO nt = 1, ntyp
|
||||
nbm = upf(nt)%nbeta
|
||||
!$cuf kernel do (2) <<<*,*>>>
|
||||
DO nb = 1, nbm
|
||||
DO nt = 1, ntyp
|
||||
nbm = upf(nt)%nbeta
|
||||
!$cuf kernel do (2) <<<*,*>>>
|
||||
DO nb = 1, nbm
|
||||
DO ig = 1, npw
|
||||
px = q_d(ig)/dq - DBLE(INT(q_d(ig)/dq))
|
||||
ux = 1._DP - px
|
||||
|
@ -182,11 +156,9 @@ SUBROUTINE gen_us_dy_gpu_ ( npw, npwx, igk_d, xk, nat, tau, ityp, ntyp, &
|
|||
tab_d(i1,nb,nt) * px * vx * wx / 2._DP - &
|
||||
tab_d(i2,nb,nt) * px * ux * wx / 2._DP + &
|
||||
tab_d(i3,nb,nt) * px * ux * vx / 6._DP
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
ENDDO
|
||||
!
|
||||
DEALLOCATE( q_d )
|
||||
!
|
||||
|
|
|
@ -15,9 +15,8 @@ SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm )
|
|||
USE upf_const, ONLY : fpi
|
||||
USE atom, ONLY : rgrid
|
||||
USE uspp_param, ONLY : upf, lmaxq, nbetam, nsp
|
||||
USE uspp_data, ONLY : nqx, dq, tab, tab_d2y, spline_ps, tab_d, tab_d2y_d
|
||||
USE uspp_data, ONLY : nqx, dq, tab, tab_d
|
||||
USE mp, ONLY : mp_sum
|
||||
USE splinelib, ONLY : spline
|
||||
USE m_gth, ONLY : mk_ffnl_gth
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
@ -32,8 +31,6 @@ SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm )
|
|||
! the prefactor of the Q functions
|
||||
real(DP) :: vqint, d1
|
||||
!
|
||||
real(DP), allocatable :: xdata(:)
|
||||
! work space for spline
|
||||
REAL(dp), allocatable :: aux (:)
|
||||
! work space
|
||||
REAL(dp), allocatable :: besr(:)
|
||||
|
@ -68,30 +65,10 @@ SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm )
|
|||
!
|
||||
call mp_sum( tab, intra_bgrp_comm )
|
||||
!
|
||||
! initialize spline interpolation
|
||||
!
|
||||
if (spline_ps) then
|
||||
allocate( xdata(nqx) )
|
||||
do iq = 1, nqx
|
||||
xdata(iq) = (iq - 1) * dq
|
||||
enddo
|
||||
do nt = 1, nsp
|
||||
do nb = 1, upf(nt)%nbeta
|
||||
d1 = (tab(2,nb,nt) - tab(1,nb,nt)) / dq
|
||||
call spline(xdata, tab(:,nb,nt), 0.d0, d1, tab_d2y(:,nb,nt))
|
||||
enddo
|
||||
enddo
|
||||
deallocate(xdata)
|
||||
!
|
||||
endif
|
||||
!
|
||||
! update GPU memory (taking care of zero-dim allocations)
|
||||
!
|
||||
#if defined __CUDA
|
||||
if ( nbetam > 0 ) then
|
||||
tab_d=tab
|
||||
if (spline_ps) tab_d2y_d=tab_d2y
|
||||
endif
|
||||
if ( nbetam > 0 ) tab_d=tab
|
||||
#endif
|
||||
!
|
||||
END SUBROUTINE init_tab_beta
|
||||
|
|
|
@ -306,7 +306,7 @@ subroutine init_us_1( nat, ityp, omega, ngm, g, gg, intra_bgrp_comm )
|
|||
end do
|
||||
end if
|
||||
!
|
||||
! fill interpolation table tab (and tab_d2y for spline interpolation)
|
||||
! fill interpolation table tab
|
||||
!
|
||||
CALL init_tab_beta ( omega, intra_bgrp_comm )
|
||||
!
|
||||
|
|
|
@ -15,8 +15,7 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, &
|
|||
!
|
||||
USE upf_kinds, ONLY : DP
|
||||
USE upf_const, ONLY : tpi
|
||||
USE uspp_data, ONLY : nqx, dq, tab, tab_d2y, spline_ps
|
||||
USE splinelib
|
||||
USE uspp_data, ONLY : nqx, dq, tab
|
||||
USE uspp, ONLY : nkb, nhtol, nhtolm, indv
|
||||
USE uspp_param, ONLY : upf, lmaxkb, nhm, nh, nsp
|
||||
!
|
||||
|
@ -59,7 +58,6 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, &
|
|||
REAL(DP) :: px, ux, vx, wx, arg
|
||||
COMPLEX(DP) :: phase, pref
|
||||
REAL(DP), ALLOCATABLE :: gk(:,:), qg(:), vq(:), ylm(:,:), vkb1(:,:)
|
||||
REAL(DP), ALLOCATABLE :: xdata(:)
|
||||
COMPLEX(DP), ALLOCATABLE :: sk(:)
|
||||
INTEGER :: iq
|
||||
! cache blocking parameters
|
||||
|
@ -73,13 +71,6 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, &
|
|||
! setting cache blocking size
|
||||
numblock = (npw_+blocksize-1)/blocksize
|
||||
!
|
||||
IF (spline_ps) THEN
|
||||
ALLOCATE( xdata(nqx) )
|
||||
DO iq = 1, nqx
|
||||
xdata(iq) = (iq - 1) * dq
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
!$omp parallel private(vkb1, sk, qg, vq, ylm, gk, ig_orig, &
|
||||
!$omp realblocksize, jkb, px, ux, vx, wx, &
|
||||
!$omp i0, i1, i2, i3, lm, arg, phase, pref)
|
||||
|
@ -120,22 +111,18 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, &
|
|||
DO nb = 1, upf(nt)%nbeta
|
||||
!
|
||||
DO ig = 1, realblocksize
|
||||
IF (spline_ps) THEN
|
||||
vq(ig) = splint(xdata, tab(:,nb,nt), tab_d2y(:,nb,nt), qg(ig))
|
||||
ELSE
|
||||
px = qg(ig) / dq - INT( qg(ig)/dq )
|
||||
ux = 1.d0 - px
|
||||
vx = 2.d0 - px
|
||||
wx = 3.d0 - px
|
||||
i0 = INT( qg(ig)/dq ) + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
vq(ig) = tab(i0,nb,nt) * ux * vx * wx / 6.d0 + &
|
||||
tab(i1,nb,nt) * px * vx * wx / 2.d0 - &
|
||||
tab(i2,nb,nt) * px * ux * wx / 2.d0 + &
|
||||
tab(i3,nb,nt) * px * ux * vx / 6.d0
|
||||
ENDIF
|
||||
px = qg(ig) / dq - INT( qg(ig)/dq )
|
||||
ux = 1.d0 - px
|
||||
vx = 2.d0 - px
|
||||
wx = 3.d0 - px
|
||||
i0 = INT( qg(ig)/dq ) + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
vq(ig) = tab(i0,nb,nt) * ux * vx * wx / 6.d0 + &
|
||||
tab(i1,nb,nt) * px * vx * wx / 2.d0 - &
|
||||
tab(i2,nb,nt) * px * ux * wx / 2.d0 + &
|
||||
tab(i3,nb,nt) * px * ux * vx / 6.d0
|
||||
ENDDO
|
||||
! add spherical harmonic part (Y_lm(q)*f_l(q))
|
||||
DO ih = 1, nh(nt)
|
||||
|
@ -199,8 +186,6 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, &
|
|||
DEALLOCATE( sk )
|
||||
DEALLOCATE( vkb1 )
|
||||
!$omp end parallel
|
||||
!
|
||||
IF (spline_ps) DEALLOCATE( xdata )
|
||||
!
|
||||
CALL stop_clock( 'init_us_2:cpu' )
|
||||
!
|
||||
|
|
|
@ -16,8 +16,7 @@ SUBROUTINE init_us_2_base_gpu( npw_, npwx, igk__d, q_, nat, tau, ityp, &
|
|||
!
|
||||
USE upf_kinds, ONLY : DP
|
||||
USE upf_const, ONLY : tpi
|
||||
USE uspp_data, ONLY : nqx, dq, spline_ps, tab_d, tab_d2y_d
|
||||
USE splinelib, ONLY : splint_eq
|
||||
USE uspp_data, ONLY : nqx, dq, tab_d
|
||||
USE uspp, ONLY : nkb, nhtol, nhtolm, indv
|
||||
USE uspp_param, ONLY : upf, lmaxkb, nhm, nh, nsp
|
||||
USE device_fbuff_m, ONLY : dev_buf
|
||||
|
@ -121,41 +120,25 @@ SUBROUTINE init_us_2_base_gpu( npw_, npwx, igk__d, q_, nat, tau, ityp, &
|
|||
qg_d(ig) = sqrt(qg_d(ig))*tpiba
|
||||
enddo
|
||||
|
||||
! JR Don't need this when using splint_eq_gpu
|
||||
!if (spline_ps) then
|
||||
! allocate(xdata(nqx))
|
||||
! do iq = 1, nqx
|
||||
! xdata(iq) = (iq - 1) * dq
|
||||
! enddo
|
||||
!endif
|
||||
|
||||
! |beta_lm(q)> = (4pi/omega).Y_lm(q).f_l(q).(i^l).S(q)
|
||||
jkb = 0
|
||||
do nt = 1, nsp
|
||||
do nb = 1, upf(nt)%nbeta
|
||||
if (spline_ps) then
|
||||
call splint_eq(dq, tab_d(:,nb,nt), tab_d2y_d(:,nb,nt), qg_d, vq_d)
|
||||
else
|
||||
!$cuf kernel do(1) <<<*,*>>>
|
||||
do ig = 1, npw_
|
||||
rv_d = qg_d(ig)
|
||||
px = rv_d / dq - int (rv_d / dq)
|
||||
ux = 1.d0 - px
|
||||
vx = 2.d0 - px
|
||||
wx = 3.d0 - px
|
||||
i0 = INT( rv_d / dq ) + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
vq_d (ig) = ux * vx * (wx * tab_d(i0, nb, nt) + px * tab_d(i3, nb, nt)) / 6.d0 + &
|
||||
px * wx * (vx * tab_d(i1, nb, nt) - ux * tab_d(i2, nb, nt)) * 0.5d0
|
||||
!$cuf kernel do(1) <<<*,*>>>
|
||||
do ig = 1, npw_
|
||||
rv_d = qg_d(ig)
|
||||
px = rv_d / dq - int (rv_d / dq)
|
||||
ux = 1.d0 - px
|
||||
vx = 2.d0 - px
|
||||
wx = 3.d0 - px
|
||||
i0 = INT( rv_d / dq ) + 1
|
||||
i1 = i0 + 1
|
||||
i2 = i0 + 2
|
||||
i3 = i0 + 3
|
||||
vq_d (ig) = ux * vx * (wx * tab_d(i0, nb, nt) + px * tab_d(i3, nb, nt)) / 6.d0 + &
|
||||
px * wx * (vx * tab_d(i1, nb, nt) - ux * tab_d(i2, nb, nt)) * 0.5d0
|
||||
|
||||
!vq_d (ig) = tab_d (i0, nb, nt) * ux * vx * wx / 6.d0 + &
|
||||
! tab_d (i1, nb, nt) * px * vx * wx / 2.d0 - &
|
||||
! tab_d (i2, nb, nt) * px * ux * wx / 2.d0 + &
|
||||
! tab_d (i3, nb, nt) * px * ux * vx / 6.d0
|
||||
enddo
|
||||
endif
|
||||
enddo
|
||||
|
||||
! add spherical harmonic part (Y_lm(q)*f_l(q))
|
||||
do ih = 1, nh (nt)
|
||||
|
|
|
@ -15,13 +15,16 @@ MODULE uspp_data
|
|||
SAVE
|
||||
PRIVATE
|
||||
!
|
||||
PUBLIC :: nqxq, nqx, dq, spline_ps
|
||||
PUBLIC :: qrad, tab, tab_at, tab_d2y
|
||||
PUBLIC :: qrad_d, tab_d, tab_at_d, tab_d2y_d
|
||||
PUBLIC :: nqxq, nqx, dq
|
||||
PUBLIC :: qrad, tab, tab_at
|
||||
PUBLIC :: qrad_d, tab_d, tab_at_d
|
||||
!
|
||||
PUBLIC :: allocate_uspp_data
|
||||
PUBLIC :: deallocate_uspp_data
|
||||
PUBLIC :: scale_uspp_data
|
||||
! Next variables for compatibility only, to be removed
|
||||
LOGICAL, PUBLIC :: spline_ps=.TRUE.
|
||||
REAL(DP), ALLOCATABLE, PUBLIC :: tab_d2y(:,:,:)
|
||||
!
|
||||
INTEGER :: nqxq
|
||||
!! size of interpolation table
|
||||
|
@ -35,19 +38,15 @@ MODULE uspp_data
|
|||
!! interpolation table for PPs
|
||||
REAL(DP), ALLOCATABLE :: tab_at(:,:,:)
|
||||
!! interpolation table for atomic wfc
|
||||
LOGICAL :: spline_ps = .FALSE.
|
||||
REAL(DP), ALLOCATABLE :: tab_d2y(:,:,:)
|
||||
!! for cubic splines
|
||||
!
|
||||
! GPUs vars
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: qrad_d(:,:,:,:)
|
||||
REAL(DP), ALLOCATABLE :: tab_d(:,:,:)
|
||||
REAL(DP), ALLOCATABLE :: tab_at_d(:,:,:)
|
||||
REAL(DP), ALLOCATABLE :: tab_d2y_d(:,:,:)
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
attributes (DEVICE) :: qrad_d, tab_d, tab_at_d, tab_d2y_d
|
||||
attributes (DEVICE) :: qrad_d, tab_d, tab_at_d
|
||||
#endif
|
||||
!
|
||||
contains
|
||||
|
@ -63,7 +62,6 @@ contains
|
|||
if (lmaxq>0) allocate(qrad(nqxq_,nbetam*(nbetam+1)/2, lmaxq, nsp))
|
||||
allocate(tab(nqx_,nbetam,nsp))
|
||||
allocate(tab_at(nqx_,nwfcm,nsp))
|
||||
if (spline_ps) allocate(tab_d2y(nqx_,nbetam,nsp))
|
||||
!
|
||||
IF (use_gpu) then
|
||||
! allocations with zero size protected
|
||||
|
@ -72,7 +70,6 @@ contains
|
|||
allocate(qrad_d(nqxq_,nbetam*(nbetam+1)/2, lmaxq, nsp))
|
||||
if (nbetam>0) allocate(tab_d(nqx_,nbetam,nsp))
|
||||
if (nwfcm>0) allocate(tab_at_d(nqx_,nwfcm,nsp))
|
||||
if (spline_ps) allocate(tab_d2y_d(nqx_,nbetam,nsp))
|
||||
endif
|
||||
!
|
||||
end subroutine allocate_uspp_data
|
||||
|
@ -82,12 +79,10 @@ contains
|
|||
if( allocated( qrad ) ) deallocate( qrad )
|
||||
if( allocated( tab ) ) deallocate( tab )
|
||||
if( allocated( tab_at ) ) deallocate( tab_at )
|
||||
if( allocated( tab_d2y ) ) deallocate( tab_d2y )
|
||||
!
|
||||
if( allocated( qrad_d ) ) deallocate( qrad_d )
|
||||
if( allocated( tab_d ) ) deallocate( tab_d )
|
||||
if( allocated( tab_at_d ) ) deallocate( tab_at_d )
|
||||
if( allocated( tab_d2y_d ) ) deallocate( tab_d2y_d )
|
||||
end subroutine
|
||||
!
|
||||
subroutine scale_uspp_data( vol_ratio_m1 )
|
||||
|
|
Loading…
Reference in New Issue