From e4abd824bb263c27dd3ebcdd243eee8f6c0bcd38 Mon Sep 17 00:00:00 2001 From: Paolo Giannozzi Date: Sun, 9 Jan 2022 21:45:15 +0100 Subject: [PATCH] Cleanup of upflib Get rid of a few special cases of questionable usefulness but very effective in making the code more complex. Case 1: nonlocal terms of Goedecker-Teter-Hutter PPs can be computed on a grid and interpolated like for all other cases --- upflib/gen_us_dj.f90 | 5 -- upflib/gen_us_dj_gpu.f90 | 21 ++--- upflib/gen_us_dy.f90 | 43 ++++------ upflib/gen_us_dy_gpu.f90 | 2 - upflib/gth.f90 | 157 +--------------------------------- upflib/init_tab_beta.f90 | 18 ++-- upflib/init_us_2_base.f90 | 41 ++++----- upflib/init_us_2_base_gpu.f90 | 24 +----- upflib/init_us_b0.f90 | 1 - 9 files changed, 56 insertions(+), 256 deletions(-) diff --git a/upflib/gen_us_dj.f90 b/upflib/gen_us_dj.f90 index 06c80f3d2..e1563a19f 100644 --- a/upflib/gen_us_dj.f90 +++ b/upflib/gen_us_dj.f90 @@ -18,7 +18,6 @@ SUBROUTINE gen_us_dj_base & USE upf_const, ONLY: tpi USE uspp, ONLY: nkb, indv, nhtol, nhtolm USE uspp_data, ONLY: nqx, tab, tab_d2y, dq, spline_ps - USE m_gth, ONLY: mk_dffnl_gth USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh USE splinelib ! @@ -115,10 +114,6 @@ SUBROUTINE gen_us_dj_base & DO nt = 1, ntyp DO nb = 1, upf(nt)%nbeta ! - IF ( upf(nt)%is_gth ) THEN - CALL mk_dffnl_gth( nt, nb, npw, omega, tpiba, q, djl(1,nb,nt) ) - CYCLE - ENDIF DO ig = 1, npw qt = SQRT(q (ig)) * tpiba IF (spline_ps) THEN diff --git a/upflib/gen_us_dj_gpu.f90 b/upflib/gen_us_dj_gpu.f90 index bbae95c23..d2385a861 100644 --- a/upflib/gen_us_dj_gpu.f90 +++ b/upflib/gen_us_dj_gpu.f90 @@ -21,7 +21,6 @@ SUBROUTINE gen_us_dj_gpu_ & USE upf_const, ONLY: tpi USE uspp, ONLY: nkb, indv_d, nhtol_d, nhtolm_d USE uspp_data, ONLY: nqx, tab, tab_d2y, tab_d, dq, spline_ps - USE m_gth, ONLY: mk_dffnl_gth, mk_dffnl_gth_gpu USE splinelib USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh, nhm USE device_fbuff_m, ONLY: dev_buf @@ -117,10 +116,6 @@ SUBROUTINE gen_us_dj_gpu_ & DO nt = 1, ntyp ! calculate beta in G-space using an interpolation table DO nb = 1, upf(nt)%nbeta - IF ( upf(nt)%is_gth ) THEN - CALL mk_dffnl_gth( nt, nb, npw, omega, tpiba, q, djl(1,nb,nt) ) - CYCLE - ENDIF DO ig = 1, npw qt = SQRT(q(ig)) * tpiba djl(ig,nb,nt) = splint_deriv( xdata, tab(:,nb,nt), & @@ -136,14 +131,9 @@ SUBROUTINE gen_us_dj_gpu_ & ! DO nt = 1, ntyp nbm = upf(nt)%nbeta - IF ( upf(nt)%is_gth ) THEN - DO nb = 1, nbm - CALL mk_dffnl_gth_gpu( nt, nb, npw, omega, tpiba, q_d, djl_d(:,nb,nt) ) - ENDDO - ELSE - !$cuf kernel do (2) <<<*,*>>> - DO nb = 1, nbm - DO ig = 1, npw + !$cuf kernel do (2) <<<*,*>>> + DO nb = 1, nbm + DO ig = 1, npw qt = SQRT(q_d(ig)) * tpiba px = qt/dq - DBLE(INT(qt/dq)) ux = 1._DP - px @@ -157,9 +147,8 @@ SUBROUTINE gen_us_dj_gpu_ & tab_d(i1,nb,nt) * (+vx*wx-px*wx-px*vx)/2._DP - & tab_d(i2,nb,nt) * (+ux*wx-px*wx-px*ux)/2._DP + & tab_d(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6._DP)/dq - ENDDO - ENDDO - ENDIF + ENDDO + ENDDO ENDDO ! ENDIF diff --git a/upflib/gen_us_dy.f90 b/upflib/gen_us_dy.f90 index 18d58ff02..b590e9c28 100644 --- a/upflib/gen_us_dy.f90 +++ b/upflib/gen_us_dy.f90 @@ -19,7 +19,6 @@ SUBROUTINE gen_us_dy_base & USE uspp, ONLY: nkb, indv, nhtol, nhtolm USE uspp_data, ONLY: nqx, tab, tab_d2y, dq, spline_ps USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh - USE m_gth, ONLY : mk_ffnl_gth USE splinelib ! IMPLICIT NONE @@ -114,29 +113,25 @@ SUBROUTINE gen_us_dy_base & DO nt = 1, ntyp ! calculate beta in G-space using an interpolation table DO nb = 1, upf(nt)%nbeta - IF ( upf(nt)%is_gth ) THEN - CALL mk_ffnl_gth( nt, nb, npw, omega, q, vkb0(1,nb,nt) ) - ELSE - DO ig = 1, npw - IF ( spline_ps ) THEN - vkb0(ig,nb,nt) = splint( xdata, tab(:,nb,nt), & - tab_d2y(:,nb,nt), q(ig) ) - ELSE - px = q(ig)/dq - INT(q(ig)/dq) - ux = 1.d0 - px - vx = 2.d0 - px - wx = 3.d0 - px - i0 = q(ig)/dq + 1 - i1 = i0 + 1 - i2 = i0 + 2 - i3 = i0 + 3 - vkb0(ig, nb, nt) = tab(i0, nb, nt) * ux * vx * wx / 6.d0 + & - tab(i1, nb, nt) * px * vx * wx / 2.d0 - & - tab(i2, nb, nt) * px * ux * wx / 2.d0 + & - tab(i3, nb, nt) * px * ux * vx / 6.d0 - ENDIF - ENDDO - ENDIF + DO ig = 1, npw + IF ( spline_ps ) THEN + vkb0(ig,nb,nt) = splint( xdata, tab(:,nb,nt), & + tab_d2y(:,nb,nt), q(ig) ) + ELSE + px = q(ig)/dq - INT(q(ig)/dq) + ux = 1.d0 - px + vx = 2.d0 - px + wx = 3.d0 - px + i0 = q(ig)/dq + 1 + i1 = i0 + 1 + i2 = i0 + 2 + i3 = i0 + 3 + vkb0(ig, nb, nt) = tab(i0, nb, nt) * ux * vx * wx / 6.d0 + & + tab(i1, nb, nt) * px * vx * wx / 2.d0 - & + tab(i2, nb, nt) * px * ux * wx / 2.d0 + & + tab(i3, nb, nt) * px * ux * vx / 6.d0 + ENDIF + ENDDO ENDDO ENDDO ! diff --git a/upflib/gen_us_dy_gpu.f90 b/upflib/gen_us_dy_gpu.f90 index d1edd11dc..7dacec233 100644 --- a/upflib/gen_us_dy_gpu.f90 +++ b/upflib/gen_us_dy_gpu.f90 @@ -91,8 +91,6 @@ SUBROUTINE gen_us_dy_gpu_ ( npw, npwx, igk_d, xk, nat, tau, ityp, ntyp, & attributes(DEVICE) :: dvkb_d #endif ! - IF ( ANY(upf(1:ntyp)%is_gth ) ) & - CALL upf_error( 'gen_us_dy_gpu',' GTH not implemented', 1) dvkb_d = (0._DP,0._DP) ! IF (lmaxkb <= 0) RETURN diff --git a/upflib/gth.f90 b/upflib/gth.f90 index f543fc4e8..5ded6da62 100644 --- a/upflib/gth.f90 +++ b/upflib/gth.f90 @@ -13,8 +13,8 @@ module m_gth implicit none ! private - public :: gth_parameters, readgth, vloc_gth, dvloc_gth, dvloc_gth_gpu, setlocq_gth, & - mk_ffnl_gth, mk_dffnl_gth, mk_dffnl_gth_gpu, deallocate_gth + public :: gth_parameters, readgth, vloc_gth, dvloc_gth, & + dvloc_gth_gpu, setlocq_gth, mk_ffnl_gth, mk_dffnl_gth, deallocate_gth ! type gth_parameters integer :: itype, lloc, lmax @@ -267,159 +267,6 @@ contains ! end subroutine mk_dffnl_gth ! - !----------------------------------------------------------------------- - subroutine mk_dffnl_gth_gpu(itype, ibeta, nq, omega, tpiba, qg_d, dvq_d) - !----------------------------------------------------------------------- - ! - USE upf_kinds, ONLY: dp - USE upf_const, ONLY: pi, fpi, e2 - - implicit none - ! - ! I/O - integer, intent(in) :: itype, ibeta, nq - real(dp), intent(in) :: omega - real(dp), intent(in) :: tpiba - real(dp), intent(in) :: qg_d(nq) - real(dp), intent(out) :: dvq_d(nq) - ! - ! Local variables - integer, parameter :: nprj_max(0:3)=[3, 3, 2, 1] - integer :: ii, my_gth, ll, iproj - real(dp) :: rrl, rl2, qt, q1r2, q3r4, q5r6, qr2, qr4, qr6, fact, e_qr2_h -#if defined(__CUDA) - attributes(DEVICE) :: qg_d, dvq_d -#endif - ! - my_gth=0 - do ii=1,size(gth_p) - if (gth_p(ii)%itype==itype) then - my_gth=ii - exit - endif - enddo - if (my_gth==0) call upf_error('mk_dffnl_gth', 'cannot map itype in some gtp param. set', itype) - iproj=gth_p(my_gth)%ipr(ibeta) - ll=gth_p(my_gth)%lll(ibeta) - rrl=gth_p(my_gth)%rrl(ll) - if ( ll<0 .or. ll>3 ) call upf_error('mk_dffnl_gth', 'wrong l:', ll) - if ( iproj>nprj_max(ll) ) call upf_error('mk_dffnl_gth', 'projector exceeds max. n. of projectors', iproj) - ! - fact = e2 * fpi * pi**0.25_dp * sqrt( 2._dp**(ll+1) * rrl**(2*ll+3) & - / omega ) - ! - lif: if (ll==0) then ! s channel - ! - if(iproj==1)then - !$cuf kernel do (1) <<<*,*>>> - do ii=1,nq - qt = sqrt(qg_d(ii))*tpiba - rl2= rrl**2 - qr2= qt*qt*rl2 - e_qr2_h=exp(-0.5_dp*qr2) - ! - dvq_d(ii)=-fact * qt*rl2*e_qr2_h - end do - else if(iproj==2)then - !$cuf kernel do (1) <<<*,*>>> - do ii=1,nq - qt = sqrt(qg_d(ii))*tpiba - rl2= rrl**2 - q1r2=qt*rl2 - qr2= qt*q1r2 - q3r4=qr2*q1r2 - e_qr2_h=exp(-0.5_dp*qr2) - ! - dvq_d(ii)=fact * 2._dp/sqrt(15._dp) * e_qr2_h * (-5._dp*q1r2+q3r4) - end do - else if(iproj==3)then - !$cuf kernel do (1) <<<*,*>>> - do ii=1,nq - qt = sqrt(qg_d(ii))*tpiba - rl2= rrl**2 - q1r2=qt*rl2 - qr2= qt*q1r2 - q3r4=qr2*q1r2 - q5r6=qr2*q3r4 - e_qr2_h=exp(-0.5_dp*qr2) - ! - dvq_d(ii)=fact * (4._dp/3._dp)/sqrt(105._dp) * e_qr2_h * (-35._dp*q1r2 + 14._dp*q3r4 - q5r6) - end do - end if - ! - else if (ll==1) then lif ! p channel - ! - if(iproj==1)then - !$cuf kernel do (1) <<<*,*>>> - do ii=1,nq - qt = sqrt(qg_d(ii))*tpiba - qr2=(qt*rrl)**2 - e_qr2_h=exp(-0.5_dp*qr2) - ! - dvq_d(ii)=fact * (1._dp/sqrt(3._dp)) * e_qr2_h * (1._dp - qr2) - end do - else if(iproj==2)then - !$cuf kernel do (1) <<<*,*>>> - do ii=1,nq - qt = sqrt(qg_d(ii))*tpiba - qr2=(qt*rrl)**2 - qr4=qr2**2 - e_qr2_h=exp(-0.5_dp*qr2) - ! - dvq_d(ii)=fact * (2._dp/sqrt(105._dp)) * e_qr2_h * (5._dp - 8._dp*qr2 + qr4) - end do - else if(iproj==3)then - !$cuf kernel do (1) <<<*,*>>> - do ii=1,nq - qt = sqrt(qg_d(ii))*tpiba - qr2=(qt*rrl)**2 - qr4=qr2**2 - qr6=qr4*qr2 - e_qr2_h=exp(-0.5_dp*qr2) - ! - dvq_d(ii)=fact * (4._dp/3._dp)/sqrt(1155._dp) * e_qr2_h * (35._dp - 77._dp*qr2 + 19._dp*qr4 - qr6) - end do - end if - ! - else if (ll==2) then lif ! d channel [ ONLY 2 PROJECTORS!! ] - ! - if(iproj==1)then - !$cuf kernel do (1) <<<*,*>>> - do ii=1,nq - qt = sqrt(qg_d(ii))*tpiba - qr2=(qt*rrl)**2 - e_qr2_h=exp(-0.5_dp*qr2) - ! - dvq_d(ii)=fact * (1._dp/sqrt(15._dp)) * e_qr2_h * qt*(2._dp - qr2) - end do - else if(iproj==2)then - !$cuf kernel do (1) <<<*,*>>> - do ii=1,nq - qt = sqrt(qg_d(ii))*tpiba - qr2=(qt*rrl)**2 - qr4=qr2**2 - e_qr2_h=exp(-0.5_dp*qr2) - ! - dvq_d(ii)=fact * (2._dp/3._dp)/sqrt(105._dp) * e_qr2_h * qt*(14._dp - 11._dp*qr2 + qr4) - end do - end if - ! - else if (ll==3) then lif ! f channel [ ONLY 1 PROJECTOR!! ] - ! - !$cuf kernel do (1) <<<*,*>>> - do ii=1,nq - qt = qg_d(ii)*tpiba**2 - qr2=qt*rrl**2 - e_qr2_h=exp(-0.5_dp*qr2) - ! - dvq_d(ii)=fact * e_qr2_h * qt*(3._dp - qr2) / 105.0_dp - end do - ! - end if lif - ! - ! - end subroutine mk_dffnl_gth_gpu - !----------------------------------------------------------------------- subroutine vloc_gth(itype, zion, tpiba2, ngl, gl, omega, vloc) !----------------------------------------------------------------------- diff --git a/upflib/init_tab_beta.f90 b/upflib/init_tab_beta.f90 index 4c7ab96d6..73454a06b 100644 --- a/upflib/init_tab_beta.f90 +++ b/upflib/init_tab_beta.f90 @@ -18,6 +18,7 @@ SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm ) USE uspp_data, ONLY : nqx, dq, tab, tab_d2y, spline_ps, tab_d, tab_d2y_d USE mp, ONLY : mp_sum USE splinelib, ONLY : spline + USE m_gth, ONLY : mk_ffnl_gth ! IMPLICIT NONE ! @@ -45,17 +46,20 @@ SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm ) call divide (intra_bgrp_comm, nqx, startq, lastq) tab (:,:,:) = 0.d0 do nt = 1, nsp - if ( upf(nt)%is_gth ) cycle do nb = 1, upf(nt)%nbeta l = upf(nt)%lll (nb) do iq = startq, lastq qi = (iq - 1) * dq - call sph_bes (upf(nt)%kkbeta, rgrid(nt)%r, qi, l, besr) - do ir = 1, upf(nt)%kkbeta - aux (ir) = upf(nt)%beta (ir, nb) * besr (ir) * rgrid(nt)%r(ir) - enddo - call simpson (upf(nt)%kkbeta, aux, rgrid(nt)%rab, vqint) - tab (iq, nb, nt) = vqint * pref + if ( upf(nt)%is_gth ) then + CALL mk_ffnl_gth( nt, nb, 1, omega, [ qi ] , tab(iq,nb,nt) ) + else + call sph_bes (upf(nt)%kkbeta, rgrid(nt)%r, qi, l, besr) + do ir = 1, upf(nt)%kkbeta + aux (ir) = upf(nt)%beta (ir, nb) * besr (ir) * rgrid(nt)%r(ir) + enddo + call simpson (upf(nt)%kkbeta, aux, rgrid(nt)%rab, vqint) + tab (iq, nb, nt) = vqint * pref + end if enddo enddo enddo diff --git a/upflib/init_us_2_base.f90 b/upflib/init_us_2_base.f90 index f35c40c2e..5db155b9b 100644 --- a/upflib/init_us_2_base.f90 +++ b/upflib/init_us_2_base.f90 @@ -16,7 +16,6 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, & USE upf_kinds, ONLY : DP USE upf_const, ONLY : tpi USE uspp_data, ONLY : nqx, dq, tab, tab_d2y, spline_ps - USE m_gth, ONLY : mk_ffnl_gth USE splinelib USE uspp, ONLY : nkb, nhtol, nhtolm, indv USE uspp_param, ONLY : upf, lmaxkb, nhm, nh, nsp @@ -120,28 +119,24 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, & ! f_l(q)=\int _0 ^\infty dr r^2 f_l(r) j_l(q.r) DO nb = 1, upf(nt)%nbeta ! - IF ( upf(nt)%is_gth ) THEN - CALL mk_ffnl_gth( nt, nb, realblocksize, omega, qg, vq ) - ELSE - DO ig = 1, realblocksize - IF (spline_ps) THEN - vq(ig) = splint(xdata, tab(:,nb,nt), tab_d2y(:,nb,nt), qg(ig)) - ELSE - px = qg(ig) / dq - INT( qg(ig)/dq ) - ux = 1.d0 - px - vx = 2.d0 - px - wx = 3.d0 - px - i0 = INT( qg(ig)/dq ) + 1 - i1 = i0 + 1 - i2 = i0 + 2 - i3 = i0 + 3 - vq(ig) = tab(i0,nb,nt) * ux * vx * wx / 6.d0 + & - tab(i1,nb,nt) * px * vx * wx / 2.d0 - & - tab(i2,nb,nt) * px * ux * wx / 2.d0 + & - tab(i3,nb,nt) * px * ux * vx / 6.d0 - ENDIF - ENDDO - ENDIF + DO ig = 1, realblocksize + IF (spline_ps) THEN + vq(ig) = splint(xdata, tab(:,nb,nt), tab_d2y(:,nb,nt), qg(ig)) + ELSE + px = qg(ig) / dq - INT( qg(ig)/dq ) + ux = 1.d0 - px + vx = 2.d0 - px + wx = 3.d0 - px + i0 = INT( qg(ig)/dq ) + 1 + i1 = i0 + 1 + i2 = i0 + 2 + i3 = i0 + 3 + vq(ig) = tab(i0,nb,nt) * ux * vx * wx / 6.d0 + & + tab(i1,nb,nt) * px * vx * wx / 2.d0 - & + tab(i2,nb,nt) * px * ux * wx / 2.d0 + & + tab(i3,nb,nt) * px * ux * vx / 6.d0 + ENDIF + ENDDO ! add spherical harmonic part (Y_lm(q)*f_l(q)) DO ih = 1, nh(nt) IF (nb == indv(ih,nt) ) THEN diff --git a/upflib/init_us_2_base_gpu.f90 b/upflib/init_us_2_base_gpu.f90 index eb6bba1e9..2c025e0c0 100644 --- a/upflib/init_us_2_base_gpu.f90 +++ b/upflib/init_us_2_base_gpu.f90 @@ -17,7 +17,6 @@ SUBROUTINE init_us_2_base_gpu( npw_, npwx, igk__d, q_, nat, tau, ityp, & USE upf_kinds, ONLY : DP USE upf_const, ONLY : tpi USE uspp_data, ONLY : nqx, dq, spline_ps, tab_d, tab_d2y_d - USE m_gth, ONLY : mk_ffnl_gth USE splinelib, ONLY : splint_eq USE uspp, ONLY : nkb, nhtol, nhtolm, indv USE uspp_param, ONLY : upf, lmaxkb, nhm, nh, nsp @@ -68,13 +67,11 @@ SUBROUTINE init_us_2_base_gpu( npw_, npwx, igk__d, q_, nat, tau, ityp, & integer :: iv_d real(DP) :: px, ux, vx, wx, arg, q1, q2, q3 real(DP), pointer :: gk_d (:,:), qg_d (:), vq_d(:), ylm_d(:,:), vkb1_d(:,:) - real(DP), allocatable :: qg_h (:), vq_h(:) real(DP) :: rv_d complex(DP) :: phase, pref complex(DP), pointer :: sk_d(:) - logical :: is_gth integer :: iq #if defined(__CUDA) attributes(DEVICE) :: gk_d, qg_d, vq_d, ylm_d, vkb1_d, sk_d @@ -100,17 +97,6 @@ SUBROUTINE init_us_2_base_gpu( npw_, npwx, igk__d, q_, nat, tau, ityp, & CALL dev_buf%lock_buffer( ylm_d, (/ npw_, (lmaxkb + 1) **2 /), istat(5) ) CALL dev_buf%lock_buffer( gk_d, (/ 3, npw_ /), istat(6) ) IF (ANY(istat /= 0)) CALL upf_error( 'init_us_2_gpu', 'cannot allocate buffers', -1 ) - - is_gth = .false. - do nt = 1, nsp - is_gth = upf(nt)%is_gth - if (is_gth) then - allocate ( qg_h( npw_)) - allocate ( vq_h( npw_)) - is_gth = .true. - exit - end if - end do ! q1 = q_(1) q2 = q_(2) @@ -148,11 +134,7 @@ SUBROUTINE init_us_2_base_gpu( npw_, npwx, igk__d, q_, nat, tau, ityp, & jkb = 0 do nt = 1, nsp do nb = 1, upf(nt)%nbeta - if ( upf(nt)%is_gth ) then - qg_h = qg_d - CALL mk_ffnl_gth( nt, nb, npw_, omega, qg_h, vq_h ) - vq_d = vq_h - else if (spline_ps) then + if (spline_ps) then call splint_eq(dq, tab_d(:,nb,nt), tab_d2y_d(:,nb,nt), qg_d, vq_d) else !$cuf kernel do(1) <<<*,*>>> @@ -239,10 +221,6 @@ SUBROUTINE init_us_2_base_gpu( npw_, npwx, igk__d, q_, nat, tau, ityp, & CALL dev_buf%release_buffer( ylm_d, istat(5) ) CALL dev_buf%release_buffer( gk_d, istat(6) ) ! - IF (is_gth) THEN - deallocate ( qg_h, vq_h ) - END IF - ! CALL stop_clock( 'init_us_2:gpu' ) ! return diff --git a/upflib/init_us_b0.f90 b/upflib/init_us_b0.f90 index 9984f5b64..ee36da837 100644 --- a/upflib/init_us_b0.f90 +++ b/upflib/init_us_b0.f90 @@ -88,7 +88,6 @@ SUBROUTINE init_us_b0(ecutwfc,intra_bgrp_comm) !- loop over pseudopotentials DO nt = 1, nsp WRITE( stdout, '(/5X,a,i4)' ) 'Smoothing PSEUDO #', nt - IF ( upf(nt)%is_gth ) CYCLE ! !- compute original beta normalization in real space where grid is sufficient by definition DO nb = 1, upf(nt)%nbeta