diff --git a/GWW/simple/commutator.f90 b/GWW/simple/commutator.f90 index b62d73034..665b6fb73 100644 --- a/GWW/simple/commutator.f90 +++ b/GWW/simple/commutator.f90 @@ -14,9 +14,7 @@ subroutine gen_beta_simple (qk, npw_max, dvkb) USE klist, ONLY : ngk USE gvect, ONLY : mill, eigts1, eigts2, eigts3, g USE uspp, ONLY : nkb, indv, nhtol, nhtolm - USE uspp_data, ONLY : nqx, tab, tab_d2y, dq, spline_ps - USE m_gth, ONLY : mk_dffnl_gth - USE splinelib + USE uspp_data, ONLY : nqx, tab, dq USE uspp_param, ONLY : upf, lmaxkb, nbetam, nh USE io_global, ONLY : stdout ! @@ -47,7 +45,6 @@ subroutine gen_beta_simple (qk, npw_max, dvkb) real(DP), allocatable :: djl (:,:,:), ylm (:,:), q (:), gk (:,:) real(DP) :: qt complex(DP), allocatable :: sk (:) - real(DP), allocatable :: xdata(:) call start_clock('gen_beta1') @@ -73,25 +70,10 @@ subroutine gen_beta_simple (qk, npw_max, dvkb) call stop_clock('stres_us32') call start_clock('stres_us33') - if (spline_ps) then - allocate(xdata(nqx)) - do iq = 1, nqx - xdata(iq) = (iq - 1) * dq - enddo - endif - do nt = 1, ntyp do nb = 1, upf(nt)%nbeta - if ( upf(nt)%is_gth ) then - call mk_dffnl_gth( nt, nb, npw_max, omega, tpiba, q, djl(1,nb,nt) ) - cycle - endif do ig = 1, npw_max qt = sqrt(q (ig)) * tpiba - if (spline_ps) then - djl(ig,nb,nt) = splint_deriv(xdata, tab(:,nb,nt), & - tab_d2y(:,nb,nt), qt) - else px = qt / dq - int (qt / dq) ux = 1.d0 - px vx = 2.d0 - px @@ -108,7 +90,6 @@ subroutine gen_beta_simple (qk, npw_max, dvkb) else djl(ig,nb,nt) = 0.d0 ! Approximation endif - endif enddo enddo enddo @@ -156,7 +137,6 @@ subroutine gen_beta_simple (qk, npw_max, dvkb) deallocate (sk) deallocate (ylm) deallocate (djl) - if (spline_ps) deallocate(xdata) return end subroutine gen_beta_simple @@ -176,8 +156,7 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb) USE klist, ONLY : ngk, igk_k USE gvect, ONLY : mill, eigts1, eigts2, eigts3, g USE uspp, ONLY : nkb, indv, nhtol, nhtolm - USE uspp_data, ONLY : nqx, tab, tab_d2y, dq, spline_ps - USE splinelib + USE uspp_data, ONLY : nqx, tab, dq USE uspp_param, ONLY : upf, lmaxkb, nbetam, nh ! implicit none @@ -200,7 +179,6 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb) complex(DP) :: phase, pref integer :: iq - real(DP), allocatable :: xdata(:) ! ! call start_clock('gen_beta2') @@ -231,21 +209,10 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb) q (ig) = sqrt ( q(ig) ) * tpiba end do - if (spline_ps) then - allocate(xdata(nqx)) - do iq = 1, nqx - xdata(iq) = (iq - 1) * dq - enddo - endif - do nt = 1, ntyp ! calculate beta in G-space using an interpolation table do nb = 1, upf(nt)%nbeta do ig = 1, npw_max - if (spline_ps) then - vkb0(ig,nb,nt) = splint(xdata, tab(:,nb,nt), & - tab_d2y(:,nb,nt), q(ig)) - else px = q (ig) / dq - int (q (ig) / dq) ux = 1.d0 - px vx = 2.d0 - px @@ -262,7 +229,6 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb) else vkb0 (ig, nb, nt) = 0.d0 ! DEBUG endif - endif enddo enddo enddo @@ -308,7 +274,6 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb) deallocate ( sk ) deallocate ( vkb0, dylm_u, gk ) - if (spline_ps) deallocate(xdata) return end subroutine gen_beta_simple_2 diff --git a/GWW/simple/init_us_2_max.f90 b/GWW/simple/init_us_2_max.f90 index 86ac3d9ae..96f2f5a16 100644 --- a/GWW/simple/init_us_2_max.f90 +++ b/GWW/simple/init_us_2_max.f90 @@ -23,9 +23,7 @@ subroutine init_us_2_max (npw_, igk_, q_, vkb_) USE cell_base, ONLY : tpiba, omega USE constants, ONLY : tpi USE gvect, ONLY : eigts1, eigts2, eigts3, mill, g - USE uspp_data, ONLY : nqx, dq, tab, tab_d2y, spline_ps - USE m_gth, ONLY : mk_ffnl_gth - USE splinelib + USE uspp_data, ONLY : nqx, dq, tab USE uspp, ONLY : nkb, nhtol, nhtolm, indv USE uspp_param, ONLY : upf, lmaxkb, nhm, nh USE io_global, ONLY : stdout @@ -46,7 +44,6 @@ subroutine init_us_2_max (npw_, igk_, q_, vkb_) complex(DP) :: phase, pref complex(DP), allocatable :: sk(:) - real(DP), allocatable :: xdata(:) integer :: iq ! @@ -77,24 +74,12 @@ subroutine init_us_2_max (npw_, igk_, q_, vkb_) qg(ig) = sqrt(qg(ig))*tpiba enddo - if (spline_ps) then - allocate(xdata(nqx)) - do iq = 1, nqx - xdata(iq) = (iq - 1) * dq - enddo - endif ! |beta_lm(q)> = (4pi/omega).Y_lm(q).f_l(q).(i^l).S(q) jkb = 0 do nt = 1, ntyp ! calculate beta in G-space using an interpolation table f_l(q)=\int _0 ^\infty dr r^2 f_l(r) j_l(q.r) do nb = 1, upf(nt)%nbeta - if ( upf(nt)%is_gth ) then - call mk_ffnl_gth( nt, nb, npw_, omega, qg, vq ) - else do ig = 1, npw_ - if (spline_ps) then - vq(ig) = splint(xdata, tab(:,nb,nt), tab_d2y(:,nb,nt), qg(ig)) - else px = qg (ig) / dq - int (qg (ig) / dq) ux = 1.d0 - px vx = 2.d0 - px @@ -111,9 +96,7 @@ subroutine init_us_2_max (npw_, igk_, q_, vkb_) else vq(ig) = 0.0 endif - endif enddo - endif ! add spherical harmonic part (Y_lm(q)*f_l(q)) do ih = 1, nh (nt) if (nb.eq.indv (ih, nt) ) then diff --git a/PP/src/adduscore.f90 b/PP/src/adduscore.f90 index 4b7d35649..25dccea4d 100644 --- a/PP/src/adduscore.f90 +++ b/PP/src/adduscore.f90 @@ -56,7 +56,6 @@ SUBROUTINE US_make_ae_charge(rho) USE fft_base, ONLY : dfftp USE fft_types, ONLY : fft_index_to_3d USE mp_global, ONLY : me_pool - USE splinelib, ONLY : spline, splint USE cell_base, ONLY : at, bg, alat, omega USE io_global, ONLY : stdout, ionode diff --git a/PP/src/pw2gw.f90 b/PP/src/pw2gw.f90 index 40dc870c2..6fda260e2 100644 --- a/PP/src/pw2gw.f90 +++ b/PP/src/pw2gw.f90 @@ -35,7 +35,6 @@ PROGRAM pw2gw USE mp_images, ONLY : intra_image_comm USE mp_pools, ONLY : kunit USE environment,ONLY : environment_start, environment_end - USE uspp_data, ONLY : spline_ps USE kinds, ONLY : DP ! IMPLICIT NONE @@ -98,13 +97,9 @@ PROGRAM pw2gw CALL mp_bcast( DeltaE, ionode_id, intra_image_comm ) ! - - spline_ps = .false. - CALL read_file CALL openfil_pp ! - CALL mp_bcast(spline_ps, ionode_id, intra_image_comm) #if defined __MPI kunittmp = kunit #else @@ -158,7 +153,7 @@ SUBROUTINE compute_gw( omegamin, omegamax, d_omega, use_gmaps, qplda, vkb, vxcdi USE uspp_param, ONLY : upf, nh USE uspp, ONLY : nhtol - USE uspp_data, ONLY : tab, tab_d2y, spline_ps + USE uspp_data, ONLY : tab USE ions_base, ONLY : ntyp => nsp USE klist, ONLY : ngk @@ -194,8 +189,8 @@ SUBROUTINE compute_gw( omegamin, omegamax, d_omega, use_gmaps, qplda, vkb, vxcdi INTEGER, ALLOCATABLE :: igk_l2g(:) INTEGER :: npol ! - REAL(kind=DP), ALLOCATABLE :: vkb0(:), djl(:), vec_tab(:), vec_tab_d2y(:) - INTEGER :: nb, nt, size_tab, size_tab_d2y, ipw, l + REAL(kind=DP), ALLOCATABLE :: vkb0(:), djl(:), vec_tab(:) + INTEGER :: nb, nt, size_tab, ipw, l ! ! REAL(kind=DP) :: norma ! Variable needed only for DEBUG ! @@ -485,19 +480,14 @@ SUBROUTINE compute_gw( omegamin, omegamax, d_omega, use_gmaps, qplda, vkb, vxcdi ALLOCATE(vkb0(1:npw)) size_tab=size(tab,1) - size_tab_d2y=size(tab_d2y,1) ALLOCATE(vec_tab(1:size_tab)) - if(.not.allocated(vec_tab_d2y)) ALLOCATE(vec_tab_d2y(1:size_tab_d2y)) DO nt = 1, ntyp DO nb = 1, upf(nt)%nbeta vkb0(:) = 0.0_dp - vec_tab(:) = 0.0_dp - vec_tab_d2y(:) = 0.0_dp vec_tab(:) = tab(:,nb,nt) - IF(spline_ps) vec_tab_d2y(:) = tab_d2y(:,nb,nt) - CALL gen_us_vkb0(ik,npw,vkb0,size_tab,vec_tab,spline_ps,vec_tab_d2y) + CALL gen_us_vkb0(ik,npw,vkb0,size_tab,vec_tab) WRITE(15,*) "---------------DEBUG-VKB0----------------------" WRITE(15,*) "ik= ", ik WRITE(15,*) "nt= ", nt @@ -511,7 +501,6 @@ SUBROUTINE compute_gw( omegamin, omegamax, d_omega, use_gmaps, qplda, vkb, vxcdi DEALLOCATE(vkb0) DEALLOCATE(vec_tab) - IF(allocated(vec_tab_d2y)) DEALLOCATE(vec_tab_d2y) ENDDO !--------------------------- @@ -523,20 +512,14 @@ SUBROUTINE compute_gw( omegamin, omegamax, d_omega, use_gmaps, qplda, vkb, vxcdi ALLOCATE(djl(1:npw)) size_tab=size(tab,1) - size_tab_d2y=size(tab_d2y,1) ALLOCATE(vec_tab(1:size_tab)) - IF(.not. allocated(vec_tab_d2y)) ALLOCATE(vec_tab_d2y(1:size_tab_d2y)) DO nt = 1, ntyp DO nb = 1, upf(nt)%nbeta djl(:) = 0.0_dp - vec_tab(:) = 0.0_dp - vec_tab_d2y(:) = 0.0_dp vec_tab(:) = tab(:,nb,nt) - IF(spline_ps) vec_tab_d2y(:) = tab_d2y(:,nb,nt) - CALL gen_us_djl(ik,npw,djl,size_tab,vec_tab,spline_ps,vec_tab_d2y) + CALL gen_us_djl(ik,npw,djl,size_tab,vec_tab) ! WRITE(0,*) "---------------DEBUG-----------------------" - ! WRITE(0,*) "spline: ", spline_ps ! WRITE(0,*) "ik= ", ik ! WRITE(0,*) "nt= ", nt ! WRITE(0,*) "nb= ", nb @@ -549,7 +532,6 @@ SUBROUTINE compute_gw( omegamin, omegamax, d_omega, use_gmaps, qplda, vkb, vxcdi DEALLOCATE(djl) DEALLOCATE(vec_tab) - IF(allocated(vec_tab_d2y)) DEALLOCATE(vec_tab_d2y) ENDDO @@ -1154,7 +1136,7 @@ SUBROUTINE diropn_gw (unit, filename, recl, exst, mpime, nd_nmbr_ ) END SUBROUTINE diropn_gw !---------------------------------------------------------------------- -subroutine gen_us_djl (ik,npw,djl,size_tab,vec_tab, spline_ps, vec_tab_d2y) +subroutine gen_us_djl (ik,npw,djl,size_tab,vec_tab) !---------------------------------------------------------------------- ! ! Calculates the kleinman-bylander pseudopotentials with the @@ -1167,7 +1149,6 @@ subroutine gen_us_djl (ik,npw,djl,size_tab,vec_tab, spline_ps, vec_tab_d2y) USE klist, ONLY : xk, igk_k USE gvect, ONLY : g USE uspp_data, ONLY : nqx, dq - USE splinelib, ONLY : splint_deriv USE uspp_param, ONLY : upf ! implicit none @@ -1176,8 +1157,6 @@ subroutine gen_us_djl (ik,npw,djl,size_tab,vec_tab, spline_ps, vec_tab_d2y) real(DP), intent(inout) ::djl(1:npw) integer, intent(in) :: size_tab real(DP), intent(in) :: vec_tab(1:size_tab) - real(DP), intent(in) :: vec_tab_d2y(1:size_tab) - logical :: spline_ps ! integer :: i0, i1, i2, & i3, ig @@ -1186,7 +1165,6 @@ subroutine gen_us_djl (ik,npw,djl,size_tab,vec_tab, spline_ps, vec_tab_d2y) complex(DP), allocatable :: sk (:) integer :: iq - real(DP), allocatable :: xdata(:) real(DP) :: qt @@ -1204,20 +1182,9 @@ subroutine gen_us_djl (ik,npw,djl,size_tab,vec_tab, spline_ps, vec_tab_d2y) q (ig) = sqrt ( q(ig) ) * tpiba end do - if (spline_ps) then - allocate(xdata(nqx)) - do iq = 1, nqx - xdata(iq) = (iq - 1) * dq - enddo - endif - ! calculate beta in G-space using an interpolation table do ig = 1, npw qt = sqrt(q(ig)) * tpiba - if (spline_ps) then - djl(ig) = splint_deriv(xdata, vec_tab(:), & - vec_tab_d2y(:), qt) - else px = qt / dq - int (qt / dq) ux = 1.d0 - px vx = 2.d0 - px @@ -1230,17 +1197,15 @@ subroutine gen_us_djl (ik,npw,djl,size_tab,vec_tab, spline_ps, vec_tab_d2y) vec_tab (i1) * (+vx*wx-px*wx-px*vx) / 2.d0 - & vec_tab (i2) * (+ux*wx-px*wx-px*ux) / 2.d0 + & vec_tab (i3) * (+ux*vx-px*vx-px*ux) / 6.d0 - endif enddo deallocate (q) deallocate ( gk ) - if (spline_ps) deallocate(xdata) return end subroutine gen_us_djl ! !---------------------------------------------------------------------- -subroutine gen_us_vkb0 (ik,npw,vkb0,size_tab,vec_tab, spline_ps, vec_tab_d2y) +subroutine gen_us_vkb0 (ik,npw,vkb0,size_tab,vec_tab) !---------------------------------------------------------------------- ! ! Calculates the kleinman-bylander pseudopotentials with the @@ -1253,7 +1218,6 @@ subroutine gen_us_vkb0 (ik,npw,vkb0,size_tab,vec_tab, spline_ps, vec_tab_d2y) USE klist, ONLY : xk, igk_k USE gvect, ONLY : g USE uspp_data, ONLY : nqx, dq - USE splinelib, ONLY : splint USE uspp_param, ONLY : upf ! implicit none @@ -1262,8 +1226,6 @@ subroutine gen_us_vkb0 (ik,npw,vkb0,size_tab,vec_tab, spline_ps, vec_tab_d2y) real(DP), intent(inout) ::vkb0(1:npw) integer, intent(in) :: size_tab real(DP), intent(in) :: vec_tab(1:size_tab) - real(DP), intent(in) :: vec_tab_d2y(1:size_tab) - logical :: spline_ps ! integer :: na, nt, nb, ikb,i0, i1, i2, & i3, ig @@ -1272,7 +1234,6 @@ subroutine gen_us_vkb0 (ik,npw,vkb0,size_tab,vec_tab, spline_ps, vec_tab_d2y) complex(DP), allocatable :: sk (:) integer :: iq - real(DP), allocatable :: xdata(:) allocate ( gk(3,npw) ) allocate ( q(npw) ) @@ -1288,19 +1249,8 @@ subroutine gen_us_vkb0 (ik,npw,vkb0,size_tab,vec_tab, spline_ps, vec_tab_d2y) q (ig) = sqrt ( q(ig) ) * tpiba end do - if (spline_ps) then - allocate(xdata(nqx)) - do iq = 1, nqx - xdata(iq) = (iq - 1) * dq - enddo - endif - ! calculate beta in G-space using an interpolation table do ig = 1, npw - if (spline_ps) then - vkb0(ig) = splint(xdata, vec_tab(:), & - vec_tab_d2y(:), q(ig)) - else px = q (ig) / dq - int (q (ig) / dq) ux = 1.d0 - px vx = 2.d0 - px @@ -1313,11 +1263,9 @@ subroutine gen_us_vkb0 (ik,npw,vkb0,size_tab,vec_tab, spline_ps, vec_tab_d2y) vec_tab (i1) * px * vx * wx / 2.d0 - & vec_tab (i2) * px * ux * wx / 2.d0 + & vec_tab (i3) * px * ux * vx / 6.d0 - endif enddo deallocate (q) deallocate ( gk ) - if (spline_ps) deallocate(xdata) return end subroutine gen_us_vkb0 diff --git a/PW/src/clean_pw.f90 b/PW/src/clean_pw.f90 index 1b0c403f8..8981995f6 100644 --- a/PW/src/clean_pw.f90 +++ b/PW/src/clean_pw.f90 @@ -37,7 +37,6 @@ SUBROUTINE clean_pw( lflag ) USE symm_base, ONLY : irt USE symme, ONLY : sym_rho_deallocate USE wavefunctions, ONLY : evc, psic, psic_nc - USE uspp_data, ONLY : qrad, tab, tab_at, tab_d2y, spline_ps USE uspp, ONLY : deallocate_uspp USE uspp_data, ONLY : deallocate_uspp_data USE uspp_param, ONLY : upf diff --git a/PW/src/exx.f90 b/PW/src/exx.f90 index 75291790a..e36da196b 100644 --- a/PW/src/exx.f90 +++ b/PW/src/exx.f90 @@ -3784,9 +3784,8 @@ end associate USE constants, ONLY : tpi USE gvect, ONLY : eigts1, eigts2, eigts3, mill, g USE wvfct, ONLY : npwx, nbnd - USE uspp_data, ONLY : nqx, dq, tab, tab_d2y, spline_ps + USE uspp_data, ONLY : nqx, dq, tab USE m_gth, ONLY : mk_ffnl_gth - USE splinelib USE uspp, ONLY : nkb, nhtol, nhtolm, indv USE uspp_param, ONLY : upf, lmaxkb, nhm, nh USE becmod, ONLY : calbec @@ -3816,7 +3815,6 @@ end associate COMPLEX(DP) :: phase, pref COMPLEX(DP), ALLOCATABLE :: sk(:) ! - REAL(DP), ALLOCATABLE :: xdata(:) INTEGER :: iq INTEGER :: istart, iend ! @@ -3849,12 +3847,6 @@ end associate qg(ig) = SQRT(qg(ig))*tpiba ENDDO ! - IF (spline_ps) THEN - ALLOCATE( xdata(nqx) ) - DO iq = 1, nqx - xdata(iq) = (iq - 1) * dq - ENDDO - ENDIF ! |beta_lm(q)> = (4pi/omega).Y_lm(q).f_l(q).(i^l).S(q) jkb = 0 ! @@ -3866,9 +3858,6 @@ end associate CALL mk_ffnl_gth( nt, nb, npw_, omega, qg, vq ) ELSE DO ig = 1, npw_ - IF (spline_ps) THEN - vq(ig) = splint(xdata, tab(:,nb,nt), tab_d2y(:,nb,nt), qg(ig)) - ELSE px = qg (ig) / dq - INT(qg (ig) / dq) ux = 1.d0 - px vx = 2.d0 - px @@ -3881,7 +3870,6 @@ end associate tab (i1, nb, nt) * px * vx * wx / 2.d0 - & tab (i2, nb, nt) * px * ux * wx / 2.d0 + & tab (i3, nb, nt) * px * ux * vx / 6.d0 - ENDIF ENDDO ENDIF ! diff --git a/upflib/gen_us_dj.f90 b/upflib/gen_us_dj.f90 index e1563a19f..ff4de8d57 100644 --- a/upflib/gen_us_dj.f90 +++ b/upflib/gen_us_dj.f90 @@ -17,9 +17,8 @@ SUBROUTINE gen_us_dj_base & USE upf_kinds, ONLY: dp USE upf_const, ONLY: tpi USE uspp, ONLY: nkb, indv, nhtol, nhtolm - USE uspp_data, ONLY: nqx, tab, tab_d2y, dq, spline_ps + USE uspp_data, ONLY: nqx, tab, dq USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh - USE splinelib ! IMPLICIT NONE ! @@ -79,7 +78,6 @@ SUBROUTINE gen_us_dj_base & REAL(DP), ALLOCATABLE :: djl(:,:,:), ylm(:,:), q(:), gk(:,:) REAL(DP) :: qt COMPLEX(DP), ALLOCATABLE :: sk(:) - REAL(DP), ALLOCATABLE :: xdata(:) ! IF (nkb == 0) RETURN ! @@ -104,35 +102,23 @@ SUBROUTINE gen_us_dj_base & CALL stop_clock( 'stres_us32' ) CALL start_clock( 'stres_us33' ) ! - IF (spline_ps) THEN - ALLOCATE( xdata(nqx) ) - DO iq = 1, nqx - xdata(iq) = (iq - 1) * dq - ENDDO - ENDIF - ! DO nt = 1, ntyp DO nb = 1, upf(nt)%nbeta ! DO ig = 1, npw qt = SQRT(q (ig)) * tpiba - IF (spline_ps) THEN - djl(ig,nb,nt) = splint_deriv(xdata, tab(:,nb,nt), & - tab_d2y(:,nb,nt), qt) - ELSE - px = qt / dq - INT(qt/dq) - ux = 1.d0 - px - vx = 2.d0 - px - wx = 3.d0 - px - i0 = qt / dq + 1 - i1 = i0 + 1 - i2 = i0 + 2 - i3 = i0 + 3 - djl(ig,nb,nt) = ( tab(i0, nb, nt) * (-vx*wx-ux*wx-ux*vx)/6.d0 + & - tab(i1, nb, nt) * (+vx*wx-px*wx-px*vx)/2.d0 - & - tab(i2, nb, nt) * (+ux*wx-px*wx-px*ux)/2.d0 + & - tab(i3, nb, nt) * (+ux*vx-px*vx-px*ux)/6.d0 )/dq - ENDIF + px = qt / dq - INT(qt/dq) + ux = 1.d0 - px + vx = 2.d0 - px + wx = 3.d0 - px + i0 = qt / dq + 1 + i1 = i0 + 1 + i2 = i0 + 2 + i3 = i0 + 3 + djl(ig,nb,nt) = ( tab(i0, nb, nt) * (-vx*wx-ux*wx-ux*vx)/6.d0 + & + tab(i1, nb, nt) * (+vx*wx-px*wx-px*vx)/2.d0 - & + tab(i2, nb, nt) * (+ux*wx-px*wx-px*ux)/2.d0 + & + tab(i3, nb, nt) * (+ux*vx-px*vx-px*ux)/6.d0 )/dq ENDDO ! ENDDO @@ -183,7 +169,6 @@ SUBROUTINE gen_us_dj_base & DEALLOCATE( sk ) DEALLOCATE( ylm ) DEALLOCATE( djl ) - IF (spline_ps) DEALLOCATE( xdata ) ! RETURN ! diff --git a/upflib/gen_us_dj_gpu.f90 b/upflib/gen_us_dj_gpu.f90 index d2385a861..78869c293 100644 --- a/upflib/gen_us_dj_gpu.f90 +++ b/upflib/gen_us_dj_gpu.f90 @@ -20,8 +20,7 @@ SUBROUTINE gen_us_dj_gpu_ & USE upf_kinds, ONLY: dp USE upf_const, ONLY: tpi USE uspp, ONLY: nkb, indv_d, nhtol_d, nhtolm_d - USE uspp_data, ONLY: nqx, tab, tab_d2y, tab_d, dq, spline_ps - USE splinelib + USE uspp_data, ONLY: nqx, tab, tab_d, dq USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh, nhm USE device_fbuff_m, ONLY: dev_buf ! @@ -70,8 +69,6 @@ SUBROUTINE gen_us_dj_gpu_ & REAL(DP) :: px, ux, vx, wx, arg, u_ipol, xk1, xk2, xk3, qt COMPLEX(DP) :: pref INTEGER, ALLOCATABLE :: ityp_d(:), ih_d(:), na_d(:), nas_d(:) - REAL(DP), ALLOCATABLE :: q(:), djl(:,:,:), ylm(:,:) - REAL(DP), ALLOCATABLE :: xdata(:) ! REAL(DP), POINTER :: gk_d(:,:), djl_d(:,:,:), ylm_d(:,:) REAL(DP), ALLOCATABLE :: q_d(:), tau_d(:,:) @@ -106,30 +103,7 @@ SUBROUTINE gen_us_dj_gpu_ & ! CALL ylmr2_gpu( (lmaxkb+1)**2, npw, gk_d, q_d, ylm_d ) ! - IF ( spline_ps ) THEN - ALLOCATE( q(npw), xdata(nqx), djl(npw,nbetam,ntyp) ) - q = q_d - DO iq = 1, nqx - xdata(iq) = (iq - 1) * dq - ENDDO - ! - DO nt = 1, ntyp - ! calculate beta in G-space using an interpolation table - DO nb = 1, upf(nt)%nbeta - DO ig = 1, npw - qt = SQRT(q(ig)) * tpiba - djl(ig,nb,nt) = splint_deriv( xdata, tab(:,nb,nt), & - tab_d2y(:,nb,nt), qt ) - ENDDO - ENDDO - ENDDO - djl_d = djl - ! - DEALLOCATE( q, xdata, djl ) - ! - ELSE - ! - DO nt = 1, ntyp + DO nt = 1, ntyp nbm = upf(nt)%nbeta !$cuf kernel do (2) <<<*,*>>> DO nb = 1, nbm @@ -149,9 +123,7 @@ SUBROUTINE gen_us_dj_gpu_ & tab_d(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6._DP)/dq ENDDO ENDDO - ENDDO - ! - ENDIF + ENDDO ! DEALLOCATE( q_d ) ! @@ -199,7 +171,6 @@ SUBROUTINE gen_us_dj_gpu_ & ENDDO ENDDO ! - ! DEALLOCATE( phase_d ) ! diff --git a/upflib/gen_us_dy.f90 b/upflib/gen_us_dy.f90 index b590e9c28..05f5636ac 100644 --- a/upflib/gen_us_dy.f90 +++ b/upflib/gen_us_dy.f90 @@ -17,9 +17,8 @@ SUBROUTINE gen_us_dy_base & USE upf_kinds, ONLY: dp USE upf_const, ONLY: tpi USE uspp, ONLY: nkb, indv, nhtol, nhtolm - USE uspp_data, ONLY: nqx, tab, tab_d2y, dq, spline_ps + USE uspp_data, ONLY: nqx, tab, dq USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh - USE splinelib ! IMPLICIT NONE ! @@ -75,7 +74,6 @@ SUBROUTINE gen_us_dy_base & COMPLEX(DP) :: phase, pref ! INTEGER :: iq - REAL(DP), ALLOCATABLE :: xdata(:) ! dvkb(:,:) = (0.d0, 0.d0) IF (lmaxkb <= 0) RETURN @@ -103,34 +101,22 @@ SUBROUTINE gen_us_dy_base & q(ig) = SQRT(q(ig)) * tpiba ENDDO ! - IF ( spline_ps ) THEN - ALLOCATE( xdata(nqx) ) - DO iq = 1, nqx - xdata(iq) = (iq - 1) * dq - ENDDO - ENDIF - ! DO nt = 1, ntyp ! calculate beta in G-space using an interpolation table DO nb = 1, upf(nt)%nbeta DO ig = 1, npw - IF ( spline_ps ) THEN - vkb0(ig,nb,nt) = splint( xdata, tab(:,nb,nt), & - tab_d2y(:,nb,nt), q(ig) ) - ELSE - px = q(ig)/dq - INT(q(ig)/dq) - ux = 1.d0 - px - vx = 2.d0 - px - wx = 3.d0 - px - i0 = q(ig)/dq + 1 - i1 = i0 + 1 - i2 = i0 + 2 - i3 = i0 + 3 - vkb0(ig, nb, nt) = tab(i0, nb, nt) * ux * vx * wx / 6.d0 + & - tab(i1, nb, nt) * px * vx * wx / 2.d0 - & - tab(i2, nb, nt) * px * ux * wx / 2.d0 + & - tab(i3, nb, nt) * px * ux * vx / 6.d0 - ENDIF + px = q(ig)/dq - INT(q(ig)/dq) + ux = 1.d0 - px + vx = 2.d0 - px + wx = 3.d0 - px + i0 = q(ig)/dq + 1 + i1 = i0 + 1 + i2 = i0 + 2 + i3 = i0 + 3 + vkb0(ig, nb, nt) = tab(i0, nb, nt) * ux * vx * wx / 6.d0 + & + tab(i1, nb, nt) * px * vx * wx / 2.d0 - & + tab(i2, nb, nt) * px * ux * wx / 2.d0 + & + tab(i3, nb, nt) * px * ux * vx / 6.d0 ENDDO ENDDO ENDDO @@ -173,7 +159,6 @@ SUBROUTINE gen_us_dy_base & ! DEALLOCATE( sk ) DEALLOCATE( vkb0, dylm_u, gk ) - IF (spline_ps) DEALLOCATE( xdata ) ! RETURN ! diff --git a/upflib/gen_us_dy_gpu.f90 b/upflib/gen_us_dy_gpu.f90 index 7dacec233..fa7ba14ea 100644 --- a/upflib/gen_us_dy_gpu.f90 +++ b/upflib/gen_us_dy_gpu.f90 @@ -19,8 +19,7 @@ SUBROUTINE gen_us_dy_gpu_ ( npw, npwx, igk_d, xk, nat, tau, ityp, ntyp, & USE upf_kinds, ONLY: dp USE upf_const, ONLY: tpi USE uspp, ONLY: nkb, indv_d, nhtol_d, nhtolm_d - USE uspp_data, ONLY: nqx, tab, tab_d2y, tab_d, dq, spline_ps - USE splinelib + USE uspp_data, ONLY: nqx, tab, tab_d, dq USE uspp_param, ONLY: upf, lmaxkb, nbetam, nh, nhm USE device_fbuff_m, ONLY: dev_buf ! @@ -72,11 +71,11 @@ SUBROUTINE gen_us_dy_gpu_ ( npw, npwx, igk_d, xk, nat, tau, ityp, ntyp, & ! INTEGER, ALLOCATABLE :: ityp_d(:), ih_d(:), na_d(:), nas_d(:) ! - REAL(DP), ALLOCATABLE :: q(:), vkb0(:,:,:), dylm(:,:) - REAL(DP), ALLOCATABLE :: xdata(:), tau_d(:,:), q_d(:) + REAL(DP), ALLOCATABLE :: q(:), dylm(:,:) ! REAL(DP), POINTER :: gk_d(:,:) REAL(DP), POINTER :: vkb0_d(:,:,:), dylm_u_d(:,:), dylm_d(:,:,:) + REAL(DP), ALLOCATABLE :: q_d(:), tau_d(:,:) ! dylm = d Y_lm/dr_i in cartesian axes ! dylm_u as above projected on u COMPLEX(DP), ALLOCATABLE :: phase_d(:), sk_d(:,:) @@ -140,35 +139,10 @@ SUBROUTINE gen_us_dy_gpu_ ( npw, npwx, igk_d, xk, nat, tau, ityp, ntyp, & ENDDO ! ! - IF ( spline_ps ) THEN - ! - ! AF: using splint_eq ?? - ! - ALLOCATE( q(npw), xdata(nqx), vkb0(npw,nbetam,ntyp) ) - q = q_d - DO iq = 1, nqx - xdata(iq) = (iq - 1) * dq - ENDDO - ! - DO nt = 1, ntyp - ! calculate beta in G-space using an interpolation table - DO nb = 1, upf(nt)%nbeta - DO ig = 1, npw - vkb0(ig,nb,nt) = splint( xdata, tab(:,nb,nt), & - tab_d2y(:,nb,nt), q(ig) ) - ENDDO - ENDDO - ENDDO - vkb0_d = vkb0 - ! - DEALLOCATE( q, xdata, vkb0 ) - ! - ELSE - ! - DO nt = 1, ntyp - nbm = upf(nt)%nbeta - !$cuf kernel do (2) <<<*,*>>> - DO nb = 1, nbm + DO nt = 1, ntyp + nbm = upf(nt)%nbeta + !$cuf kernel do (2) <<<*,*>>> + DO nb = 1, nbm DO ig = 1, npw px = q_d(ig)/dq - DBLE(INT(q_d(ig)/dq)) ux = 1._DP - px @@ -182,11 +156,9 @@ SUBROUTINE gen_us_dy_gpu_ ( npw, npwx, igk_d, xk, nat, tau, ityp, ntyp, & tab_d(i1,nb,nt) * px * vx * wx / 2._DP - & tab_d(i2,nb,nt) * px * ux * wx / 2._DP + & tab_d(i3,nb,nt) * px * ux * vx / 6._DP - ENDDO - ENDDO + ENDDO ENDDO - ! - ENDIF + ENDDO ! DEALLOCATE( q_d ) ! diff --git a/upflib/init_tab_beta.f90 b/upflib/init_tab_beta.f90 index 73454a06b..54ee7af9c 100644 --- a/upflib/init_tab_beta.f90 +++ b/upflib/init_tab_beta.f90 @@ -15,9 +15,8 @@ SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm ) USE upf_const, ONLY : fpi USE atom, ONLY : rgrid USE uspp_param, ONLY : upf, lmaxq, nbetam, nsp - USE uspp_data, ONLY : nqx, dq, tab, tab_d2y, spline_ps, tab_d, tab_d2y_d + USE uspp_data, ONLY : nqx, dq, tab, tab_d USE mp, ONLY : mp_sum - USE splinelib, ONLY : spline USE m_gth, ONLY : mk_ffnl_gth ! IMPLICIT NONE @@ -32,8 +31,6 @@ SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm ) ! the prefactor of the Q functions real(DP) :: vqint, d1 ! - real(DP), allocatable :: xdata(:) - ! work space for spline REAL(dp), allocatable :: aux (:) ! work space REAL(dp), allocatable :: besr(:) @@ -68,30 +65,10 @@ SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm ) ! call mp_sum( tab, intra_bgrp_comm ) ! - ! initialize spline interpolation - ! - if (spline_ps) then - allocate( xdata(nqx) ) - do iq = 1, nqx - xdata(iq) = (iq - 1) * dq - enddo - do nt = 1, nsp - do nb = 1, upf(nt)%nbeta - d1 = (tab(2,nb,nt) - tab(1,nb,nt)) / dq - call spline(xdata, tab(:,nb,nt), 0.d0, d1, tab_d2y(:,nb,nt)) - enddo - enddo - deallocate(xdata) - ! - endif - ! ! update GPU memory (taking care of zero-dim allocations) ! #if defined __CUDA - if ( nbetam > 0 ) then - tab_d=tab - if (spline_ps) tab_d2y_d=tab_d2y - endif + if ( nbetam > 0 ) tab_d=tab #endif ! END SUBROUTINE init_tab_beta diff --git a/upflib/init_us_1.f90 b/upflib/init_us_1.f90 index ea7500a83..40a174ba6 100644 --- a/upflib/init_us_1.f90 +++ b/upflib/init_us_1.f90 @@ -306,7 +306,7 @@ subroutine init_us_1( nat, ityp, omega, ngm, g, gg, intra_bgrp_comm ) end do end if ! - ! fill interpolation table tab (and tab_d2y for spline interpolation) + ! fill interpolation table tab ! CALL init_tab_beta ( omega, intra_bgrp_comm ) ! diff --git a/upflib/init_us_2_base.f90 b/upflib/init_us_2_base.f90 index 5db155b9b..32f4f5620 100644 --- a/upflib/init_us_2_base.f90 +++ b/upflib/init_us_2_base.f90 @@ -15,8 +15,7 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, & ! USE upf_kinds, ONLY : DP USE upf_const, ONLY : tpi - USE uspp_data, ONLY : nqx, dq, tab, tab_d2y, spline_ps - USE splinelib + USE uspp_data, ONLY : nqx, dq, tab USE uspp, ONLY : nkb, nhtol, nhtolm, indv USE uspp_param, ONLY : upf, lmaxkb, nhm, nh, nsp ! @@ -59,7 +58,6 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, & REAL(DP) :: px, ux, vx, wx, arg COMPLEX(DP) :: phase, pref REAL(DP), ALLOCATABLE :: gk(:,:), qg(:), vq(:), ylm(:,:), vkb1(:,:) - REAL(DP), ALLOCATABLE :: xdata(:) COMPLEX(DP), ALLOCATABLE :: sk(:) INTEGER :: iq ! cache blocking parameters @@ -73,13 +71,6 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, & ! setting cache blocking size numblock = (npw_+blocksize-1)/blocksize ! - IF (spline_ps) THEN - ALLOCATE( xdata(nqx) ) - DO iq = 1, nqx - xdata(iq) = (iq - 1) * dq - ENDDO - ENDIF - ! !$omp parallel private(vkb1, sk, qg, vq, ylm, gk, ig_orig, & !$omp realblocksize, jkb, px, ux, vx, wx, & !$omp i0, i1, i2, i3, lm, arg, phase, pref) @@ -112,6 +103,11 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, & qg(ig) = SQRT(qg(ig))*tpiba ENDDO ! + ! This should not happen, but better to check + ! + IF ( INT(qg(realblocksize)/dq)+4 > size(tab,1) ) CALL errore & + ('init_us_2', 'internal error: dimension of interpolation table', 1 ) + ! ! |beta_lm(q)> = (4pi/omega).Y_lm(q).f_l(q).(i^l).S(q) jkb = 0 DO nt = 1, nsp @@ -120,22 +116,18 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, & DO nb = 1, upf(nt)%nbeta ! DO ig = 1, realblocksize - IF (spline_ps) THEN - vq(ig) = splint(xdata, tab(:,nb,nt), tab_d2y(:,nb,nt), qg(ig)) - ELSE - px = qg(ig) / dq - INT( qg(ig)/dq ) - ux = 1.d0 - px - vx = 2.d0 - px - wx = 3.d0 - px - i0 = INT( qg(ig)/dq ) + 1 - i1 = i0 + 1 - i2 = i0 + 2 - i3 = i0 + 3 - vq(ig) = tab(i0,nb,nt) * ux * vx * wx / 6.d0 + & - tab(i1,nb,nt) * px * vx * wx / 2.d0 - & - tab(i2,nb,nt) * px * ux * wx / 2.d0 + & - tab(i3,nb,nt) * px * ux * vx / 6.d0 - ENDIF + px = qg(ig) / dq - INT( qg(ig)/dq ) + ux = 1.d0 - px + vx = 2.d0 - px + wx = 3.d0 - px + i0 = INT( qg(ig)/dq ) + 1 + i1 = i0 + 1 + i2 = i0 + 2 + i3 = i0 + 3 + vq(ig) = tab(i0,nb,nt) * ux * vx * wx / 6.d0 + & + tab(i1,nb,nt) * px * vx * wx / 2.d0 - & + tab(i2,nb,nt) * px * ux * wx / 2.d0 + & + tab(i3,nb,nt) * px * ux * vx / 6.d0 ENDDO ! add spherical harmonic part (Y_lm(q)*f_l(q)) DO ih = 1, nh(nt) @@ -199,8 +191,6 @@ SUBROUTINE init_us_2_base( npw_, npwx, igk_, q_, nat, tau, ityp, & DEALLOCATE( sk ) DEALLOCATE( vkb1 ) !$omp end parallel - ! - IF (spline_ps) DEALLOCATE( xdata ) ! CALL stop_clock( 'init_us_2:cpu' ) ! diff --git a/upflib/init_us_2_base_gpu.f90 b/upflib/init_us_2_base_gpu.f90 index 01f95a1be..38b09f704 100644 --- a/upflib/init_us_2_base_gpu.f90 +++ b/upflib/init_us_2_base_gpu.f90 @@ -16,8 +16,7 @@ SUBROUTINE init_us_2_base_gpu( npw_, npwx, igk__d, q_, nat, tau, ityp, & ! USE upf_kinds, ONLY : DP USE upf_const, ONLY : tpi - USE uspp_data, ONLY : nqx, dq, spline_ps, tab_d, tab_d2y_d - USE splinelib, ONLY : splint_eq + USE uspp_data, ONLY : nqx, dq, tab_d USE uspp, ONLY : nkb, nhtol, nhtolm, indv USE uspp_param, ONLY : upf, lmaxkb, nhm, nh, nsp USE device_fbuff_m, ONLY : dev_buf @@ -121,41 +120,25 @@ SUBROUTINE init_us_2_base_gpu( npw_, npwx, igk__d, q_, nat, tau, ityp, & qg_d(ig) = sqrt(qg_d(ig))*tpiba enddo - ! JR Don't need this when using splint_eq_gpu - !if (spline_ps) then - ! allocate(xdata(nqx)) - ! do iq = 1, nqx - ! xdata(iq) = (iq - 1) * dq - ! enddo - !endif - ! |beta_lm(q)> = (4pi/omega).Y_lm(q).f_l(q).(i^l).S(q) jkb = 0 do nt = 1, nsp do nb = 1, upf(nt)%nbeta - if (spline_ps) then - call splint_eq(dq, tab_d(:,nb,nt), tab_d2y_d(:,nb,nt), qg_d, vq_d) - else - !$cuf kernel do(1) <<<*,*>>> - do ig = 1, npw_ - rv_d = qg_d(ig) - px = rv_d / dq - int (rv_d / dq) - ux = 1.d0 - px - vx = 2.d0 - px - wx = 3.d0 - px - i0 = INT( rv_d / dq ) + 1 - i1 = i0 + 1 - i2 = i0 + 2 - i3 = i0 + 3 - vq_d (ig) = ux * vx * (wx * tab_d(i0, nb, nt) + px * tab_d(i3, nb, nt)) / 6.d0 + & - px * wx * (vx * tab_d(i1, nb, nt) - ux * tab_d(i2, nb, nt)) * 0.5d0 + !$cuf kernel do(1) <<<*,*>>> + do ig = 1, npw_ + rv_d = qg_d(ig) + px = rv_d / dq - int (rv_d / dq) + ux = 1.d0 - px + vx = 2.d0 - px + wx = 3.d0 - px + i0 = INT( rv_d / dq ) + 1 + i1 = i0 + 1 + i2 = i0 + 2 + i3 = i0 + 3 + vq_d (ig) = ux * vx * (wx * tab_d(i0, nb, nt) + px * tab_d(i3, nb, nt)) / 6.d0 + & + px * wx * (vx * tab_d(i1, nb, nt) - ux * tab_d(i2, nb, nt)) * 0.5d0 - !vq_d (ig) = tab_d (i0, nb, nt) * ux * vx * wx / 6.d0 + & - ! tab_d (i1, nb, nt) * px * vx * wx / 2.d0 - & - ! tab_d (i2, nb, nt) * px * ux * wx / 2.d0 + & - ! tab_d (i3, nb, nt) * px * ux * vx / 6.d0 - enddo - endif + enddo ! add spherical harmonic part (Y_lm(q)*f_l(q)) do ih = 1, nh (nt) diff --git a/upflib/uspp_data.f90 b/upflib/uspp_data.f90 index 54a5b6c49..8f637ad12 100644 --- a/upflib/uspp_data.f90 +++ b/upflib/uspp_data.f90 @@ -15,13 +15,16 @@ MODULE uspp_data SAVE PRIVATE ! - PUBLIC :: nqxq, nqx, dq, spline_ps - PUBLIC :: qrad, tab, tab_at, tab_d2y - PUBLIC :: qrad_d, tab_d, tab_at_d, tab_d2y_d + PUBLIC :: nqxq, nqx, dq + PUBLIC :: qrad, tab, tab_at + PUBLIC :: qrad_d, tab_d, tab_at_d ! PUBLIC :: allocate_uspp_data PUBLIC :: deallocate_uspp_data PUBLIC :: scale_uspp_data + ! Next variables for compatibility only, to be removed + LOGICAL, PUBLIC :: spline_ps=.TRUE. + REAL(DP), ALLOCATABLE, PUBLIC :: tab_d2y(:,:,:) ! INTEGER :: nqxq !! size of interpolation table @@ -35,19 +38,15 @@ MODULE uspp_data !! interpolation table for PPs REAL(DP), ALLOCATABLE :: tab_at(:,:,:) !! interpolation table for atomic wfc - LOGICAL :: spline_ps = .FALSE. - REAL(DP), ALLOCATABLE :: tab_d2y(:,:,:) - !! for cubic splines ! ! GPUs vars ! REAL(DP), ALLOCATABLE :: qrad_d(:,:,:,:) REAL(DP), ALLOCATABLE :: tab_d(:,:,:) REAL(DP), ALLOCATABLE :: tab_at_d(:,:,:) - REAL(DP), ALLOCATABLE :: tab_d2y_d(:,:,:) ! #if defined(__CUDA) - attributes (DEVICE) :: qrad_d, tab_d, tab_at_d, tab_d2y_d + attributes (DEVICE) :: qrad_d, tab_d, tab_at_d #endif ! contains @@ -63,7 +62,6 @@ contains if (lmaxq>0) allocate(qrad(nqxq_,nbetam*(nbetam+1)/2, lmaxq, nsp)) allocate(tab(nqx_,nbetam,nsp)) allocate(tab_at(nqx_,nwfcm,nsp)) - if (spline_ps) allocate(tab_d2y(nqx_,nbetam,nsp)) ! IF (use_gpu) then ! allocations with zero size protected @@ -72,7 +70,6 @@ contains allocate(qrad_d(nqxq_,nbetam*(nbetam+1)/2, lmaxq, nsp)) if (nbetam>0) allocate(tab_d(nqx_,nbetam,nsp)) if (nwfcm>0) allocate(tab_at_d(nqx_,nwfcm,nsp)) - if (spline_ps) allocate(tab_d2y_d(nqx_,nbetam,nsp)) endif ! end subroutine allocate_uspp_data @@ -82,12 +79,10 @@ contains if( allocated( qrad ) ) deallocate( qrad ) if( allocated( tab ) ) deallocate( tab ) if( allocated( tab_at ) ) deallocate( tab_at ) - if( allocated( tab_d2y ) ) deallocate( tab_d2y ) ! if( allocated( qrad_d ) ) deallocate( qrad_d ) if( allocated( tab_d ) ) deallocate( tab_d ) if( allocated( tab_at_d ) ) deallocate( tab_at_d ) - if( allocated( tab_d2y_d ) ) deallocate( tab_d2y_d ) end subroutine ! subroutine scale_uspp_data( vol_ratio_m1 )