From e9c66da61189ef9afae8cd7eaa6372d39b01d6ef Mon Sep 17 00:00:00 2001
From: Paolo Giannozzi <p.giannozzi@gmail.com>
Date: Tue, 2 Jan 2024 17:37:01 +0100
Subject: [PATCH] Duplicated code from GWW deleted, small changes done to
 interpolation routines interp_beta and interp_dbeta to ensure that
 GWW/simple/ still work. NO WARRANTY.

---
 GWW/simple/Makefile          |   1 -
 GWW/simple/commutator.f90    |  60 +++-----------
 GWW/simple/init_us_2_max.f90 | 150 -----------------------------------
 GWW/simple/khamiltonian.f90  |   2 +-
 upflib/gen_us_dj.f90         |  12 ++-
 upflib/init_us_2_acc.f90     |  24 +++---
 6 files changed, 34 insertions(+), 215 deletions(-)
 delete mode 100644 GWW/simple/init_us_2_max.f90

diff --git a/GWW/simple/Makefile b/GWW/simple/Makefile
index 581d69db1..e86ddefb3 100644
--- a/GWW/simple/Makefile
+++ b/GWW/simple/Makefile
@@ -18,7 +18,6 @@ SIMPLEOBJS = \
   epe.o  \
   gk_sort_limit.o \
   khamiltonian.o \
-  init_us_2_max.o \
   commutator.o 
 
 
diff --git a/GWW/simple/commutator.f90 b/GWW/simple/commutator.f90
index 905be9764..33f52f466 100644
--- a/GWW/simple/commutator.f90
+++ b/GWW/simple/commutator.f90
@@ -14,8 +14,7 @@ subroutine gen_beta_simple (qk, npw_max, dvkb)
   USE klist,      ONLY : ngk
   USE gvect,      ONLY : mill, eigts1, eigts2, eigts3, g
   USE uspp,       ONLY : nkb, indv, nhtol, nhtolm
-  USE uspp_data,  ONLY : nqx, tab_beta, dq
-  USE uspp_param, ONLY : upf, lmaxkb, nbetam, nh
+  USE uspp_param, ONLY : lmaxkb, nbetam, nh
   USE io_global, ONLY : stdout
   !
   implicit none
@@ -26,7 +25,7 @@ subroutine gen_beta_simple (qk, npw_max, dvkb)
   !
   ! local variables
   !
-  integer :: ikb, nb, ih, ig, i0, i1, i2, i3 , nt
+  integer :: ikb, nb, ih, ig, nt
   ! counter on beta functions
   ! counter on beta functions
   ! counter on beta functions
@@ -34,7 +33,7 @@ subroutine gen_beta_simple (qk, npw_max, dvkb)
   ! index of the first nonzero point in the r
   ! counter on atomic type
 
-  real(DP) :: arg, px, ux, vx, wx
+  real(DP) :: arg
   ! argument of the atomic phase factor
 
   complex(DP) :: phase, pref
@@ -43,7 +42,6 @@ subroutine gen_beta_simple (qk, npw_max, dvkb)
 
   integer :: na, l, iig, lm, iq
   real(DP), allocatable :: djl (:,:,:), ylm (:,:), q (:), gk (:,:)
-  real(DP) ::  qt
   complex(DP), allocatable :: sk (:)
 
   call start_clock('gen_beta1')
@@ -70,28 +68,11 @@ subroutine gen_beta_simple (qk, npw_max, dvkb)
   call stop_clock('stres_us32')
   call start_clock('stres_us33')
 
+  do ig = 1, npw_max
+     q (ig) = SQRT (q(ig)) * tpiba
+  enddo
   do nt = 1, ntyp
-     do nb = 1, upf(nt)%nbeta
-        do ig = 1, npw_max
-           qt = sqrt(q (ig)) * tpiba
-             px = qt / dq - int (qt / dq)
-             ux = 1.d0 - px
-             vx = 2.d0 - px
-             wx = 3.d0 - px
-             i0 = qt / dq + 1
-             i1 = i0 + 1
-             i2 = i0 + 2
-             i3 = i0 + 3
-             if (i3 <= nqx) then ! Approximation
-                djl(ig,nb,nt) = ( tab_beta (i0, nb, nt) * (-vx*wx-ux*wx-ux*vx)/6.d0 + &
-                               tab_beta (i1, nb, nt) * (+vx*wx-px*wx-px*vx)/2.d0 - &
-                               tab_beta (i2, nb, nt) * (+ux*wx-px*wx-px*ux)/2.d0 + &
-                               tab_beta (i3, nb, nt) * (+ux*vx-px*vx-px*ux)/6.d0 )/dq
-             else
-                djl(ig,nb,nt) = 0.d0  ! Approximation
-             endif
-        enddo
-     enddo
+     CALL interp_dbeta( nt, npw_max, q, djl(:,:,nt) )
   enddo
   call stop_clock('stres_us33')
   call start_clock('stres_us34')
@@ -156,7 +137,6 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb)
   USE klist,      ONLY : ngk, igk_k
   USE gvect,      ONLY : mill, eigts1, eigts2, eigts3, g
   USE uspp,       ONLY : nkb, indv, nhtol, nhtolm
-  USE uspp_data,  ONLY : nqx, tab_beta, dq
   USE uspp_param, ONLY : upf, lmaxkb, nbetam, nh
   !
   implicit none
@@ -166,10 +146,9 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb)
   real(DP) :: u (3)
 
   complex(DP) :: dvkb (npw_max, nkb)
-  integer :: na, nt, nb, ih, l, lm, ikb, iig, ipol, i0, i1, i2, &
-       i3, ig
+  integer :: na, nt, nb, ih, l, lm, ikb, iig, ipol, ig
   real(DP), allocatable :: gk(:,:), q (:)
-  real(DP) :: px, ux, vx, wx, arg
+  real(DP) :: arg
 
   real(DP), allocatable :: vkb0 (:,:,:), dylm (:,:), dylm_u (:,:)
   ! dylm = d Y_lm/dr_i in cartesian axes
@@ -211,26 +190,7 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb)
 
   do nt = 1, ntyp
      ! calculate beta in G-space using an interpolation table
-     do nb = 1, upf(nt)%nbeta
-        do ig = 1, npw_max
-             px = q (ig) / dq - int (q (ig) / dq)
-             ux = 1.d0 - px
-             vx = 2.d0 - px
-             wx = 3.d0 - px
-             i0 = q (ig) / dq + 1
-             i1 = i0 + 1
-             i2 = i0 + 2
-             i3 = i0 + 3
-             if (i3<=nqx) then ! DEBUG
-                vkb0 (ig, nb, nt) = tab_beta (i0, nb, nt) * ux * vx * wx / 6.d0 + &
-                                    tab_beta (i1, nb, nt) * px * vx * wx / 2.d0 - &
-                                    tab_beta (i2, nb, nt) * px * ux * wx / 2.d0 + &
-                                    tab_beta (i3, nb, nt) * px * ux * vx / 6.d0
-             else
-                vkb0 (ig, nb, nt) = 0.d0 ! DEBUG
-             endif
-        enddo
-     enddo
+     CALL interp_beta( nt, npw_max, q, vkb0(:,:,nt) )
   enddo
 
   deallocate (q)
diff --git a/GWW/simple/init_us_2_max.f90 b/GWW/simple/init_us_2_max.f90
deleted file mode 100644
index e5ba80002..000000000
--- a/GWW/simple/init_us_2_max.f90
+++ /dev/null
@@ -1,150 +0,0 @@
-!
-! Copyright (C) 2001-2015 Quantum ESPRESSO group
-! This file is distributed under the terms of the
-! GNU General Public License. See the file `License'
-! in the root directory of the present distribution,
-! or http://www.gnu.org/copyleft/gpl.txt .
-!
-!
-!----------------------------------------------------------------------
-subroutine init_us_2_max (npw_, igk_, q_, vkb_)
-  !----------------------------------------------------------------------
-  !
-  !   Calculates beta functions (Kleinman-Bylander projectors), with
-  !   structure factor, for all atoms, in reciprocal space. On input:
-  !      npw_       : number of PWs 
-  !      igk_(npw_) : indices of G in the list of q+G vectors
-  !      q_(3)      : q vector (2pi/a units)
-  !  On output:
-  !      vkb_(npw_,nkb) : beta functions
-  !
-  USE kinds,      ONLY : DP
-  USE ions_base,  ONLY : nat, ntyp => nsp, ityp, tau
-  USE cell_base,  ONLY : tpiba, omega
-  USE constants,  ONLY : tpi
-  USE gvect,      ONLY : eigts1, eigts2, eigts3, mill, g
-  USE uspp_data,  ONLY : nqx, dq, tab_beta
-  USE uspp,       ONLY : nkb, nhtol, nhtolm, indv
-  USE uspp_param, ONLY : upf, lmaxkb, nhm, nh
-  USE io_global,  ONLY : stdout
-  !
-  implicit none
-  !
-  INTEGER, INTENT (IN) :: npw_, igk_ (npw_)
-  REAL(dp), INTENT(IN) :: q_(3)
-  COMPLEX(dp), INTENT(OUT) :: vkb_ (npw_, nkb)
-  !
-  !     Local variables
-  !
-  integer :: i0,i1,i2,i3, ig, lm, na, nt, nb, ih, jkb
-
-  real(DP) :: px, ux, vx, wx, arg
-  real(DP), allocatable :: gk (:,:), qg (:), vq (:), ylm (:,:), vkb1(:,:)
-
-  complex(DP) :: phase, pref
-  complex(DP), allocatable :: sk(:)
-
-  integer :: iq
-
-  !
-  !
-  if (lmaxkb.lt.0) return
-  call start_clock ('init_us_2_max')
-  allocate (vkb1( npw_,nhm))    
-  allocate (  sk( npw_))    
-  allocate (  qg( npw_))    
-  allocate (  vq( npw_))    
-  allocate ( ylm( npw_, (lmaxkb + 1) **2))    
-  allocate (  gk( 3, npw_))    
-  !
-!   write(*,'(3i4,i5,3f10.5)') size(tab,1), size(tab,2), size(tab,3), size(vq), q_
-
-  do ig = 1, npw_
-     gk (1,ig) = q_(1) + g(1, igk_(ig) )
-     gk (2,ig) = q_(2) + g(2, igk_(ig) )
-     gk (3,ig) = q_(3) + g(3, igk_(ig) )
-     qg (ig) = gk(1, ig)**2 +  gk(2, ig)**2 + gk(3, ig)**2
-  enddo
-  !
-  call ylmr2 ((lmaxkb+1)**2, npw_, gk, qg, ylm)
-  !
-  ! set now qg=|q+G| in atomic units
-  !
-  do ig = 1, npw_
-     qg(ig) = sqrt(qg(ig))*tpiba
-  enddo
-
-  ! |beta_lm(q)> = (4pi/omega).Y_lm(q).f_l(q).(i^l).S(q)
-  jkb = 0
-  do nt = 1, ntyp
-     ! calculate beta in G-space using an interpolation table f_l(q)=\int _0 ^\infty dr r^2 f_l(r) j_l(q.r)
-     do nb = 1, upf(nt)%nbeta
-           do ig = 1, npw_
-                px = qg (ig) / dq - int (qg (ig) / dq)
-                ux = 1.d0 - px
-                vx = 2.d0 - px
-                wx = 3.d0 - px
-                i0 = INT( qg (ig) / dq ) + 1
-                i1 = i0 + 1
-                i2 = i0 + 2
-                i3 = i0 + 3
-                if (i3<=nqx) then   ! WARNING: Here we change from the original subroutine init_us_2.f90
-                    vq (ig) = tab_beta (i0, nb, nt) * ux * vx * wx / 6.d0 + &
-                          tab_beta (i1, nb, nt) * px * vx * wx / 2.d0 - &
-                          tab_beta (i2, nb, nt) * px * ux * wx / 2.d0 + &
-                          tab_beta (i3, nb, nt) * px * ux * vx / 6.d0
-                else
-                   vq(ig) = 0.0
-                endif
-           enddo
-        ! add spherical harmonic part  (Y_lm(q)*f_l(q)) 
-        do ih = 1, nh (nt)
-           if (nb.eq.indv (ih, nt) ) then
-              !l = nhtol (ih, nt)
-              lm =nhtolm (ih, nt)
-              do ig = 1, npw_
-                 vkb1 (ig,ih) = ylm (ig, lm) * vq (ig)
-              enddo
-           endif
-        enddo
-     enddo
-     !
-     ! vkb1 contains all betas including angular part for type nt
-     ! now add the structure factor and factor (-i)^l
-     !
-     do na = 1, nat
-        ! ordering: first all betas for atoms of type 1
-        !           then  all betas for atoms of type 2  and so on
-        if (ityp (na) .eq.nt) then
-           arg = (q_(1) * tau (1, na) + &
-                  q_(2) * tau (2, na) + &
-                  q_(3) * tau (3, na) ) * tpi
-           phase = CMPLX(cos (arg), - sin (arg) ,kind=DP)
-           do ig = 1, npw_
-              sk (ig) = eigts1 (mill(1,igk_(ig)), na) * &
-                        eigts2 (mill(2,igk_(ig)), na) * &
-                        eigts3 (mill(3,igk_(ig)), na)
-           enddo
-           do ih = 1, nh (nt)
-              jkb = jkb + 1
-              pref = (0.d0, -1.d0) **nhtol (ih, nt) * phase
-              do ig = 1, npw_
-                 vkb_(ig, jkb) = vkb1 (ig,ih) * sk (ig) * pref
-              enddo
-           
-           enddo
-        endif
-     enddo
-  enddo
-
-  deallocate (gk)
-  deallocate (ylm)
-  deallocate (vq)
-  deallocate (qg)
-  deallocate (sk)
-  deallocate (vkb1)
-
-  call stop_clock ('init_us_2_max')
-  return
-end subroutine init_us_2_max
-
diff --git a/GWW/simple/khamiltonian.f90 b/GWW/simple/khamiltonian.f90
index 708677ab7..eee0cd23f 100644
--- a/GWW/simple/khamiltonian.f90
+++ b/GWW/simple/khamiltonian.f90
@@ -232,7 +232,7 @@ subroutine khamiltonian
                 !
                 call start_clock('Vnloc')
                 if (nkb>0) then
-                   call init_us_2_max(npw_max,igkk,qk,vkb_max)  ! get the projectors \beta_Ilm (k-dependent)
+                   call init_us_2(npw_max,igkk,qk,vkb_max)  ! get the projectors \beta_Ilm (k-dependent)
                 endif
                 !vkb_max(npwx+1:npw_max,1:nkb) = 0.d0 ! WARNING: HERE I PUT TO ZERO THE ELEMENTS OF BETA WiTH G > npwx
                 !
diff --git a/upflib/gen_us_dj.f90 b/upflib/gen_us_dj.f90
index 92e519a44..ef09b51c1 100644
--- a/upflib/gen_us_dj.f90
+++ b/upflib/gen_us_dj.f90
@@ -233,10 +233,14 @@ SUBROUTINE interp_dbeta( nt, npw, qg, vq )
         i1 = i0 + 1
         i2 = i0 + 2
         i3 = i0 + 3
-        vq(ig,nb) = ( tab_beta(i0,nb,nt) * (-vx*wx-ux*wx-ux*vx)/6.0_dp + &
-                      tab_beta(i1,nb,nt) * (+vx*wx-px*wx-px*vx)/2.0_dp - &
-                      tab_beta(i2,nb,nt) * (+ux*wx-px*wx-px*ux)/2.0_dp + &
-                      tab_beta(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6.0_dp ) / dq
+        IF ( i3 <= nqx ) THEN
+            vq(ig,nb) = ( tab_beta(i0,nb,nt) * (-vx*wx-ux*wx-ux*vx)/6.0_dp + &
+                          tab_beta(i1,nb,nt) * (+vx*wx-px*wx-px*vx)/2.0_dp - &
+                          tab_beta(i2,nb,nt) * (+ux*wx-px*wx-px*ux)/2.0_dp + &
+                          tab_beta(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6.0_dp )/dq
+        ELSE
+            vq(ig,nb) = 0.0_dp 
+        END IF
      ENDDO
   END DO
   !$acc end data
diff --git a/upflib/init_us_2_acc.f90 b/upflib/init_us_2_acc.f90
index 540d7efd5..3f5157041 100644
--- a/upflib/init_us_2_acc.f90
+++ b/upflib/init_us_2_acc.f90
@@ -176,7 +176,7 @@ SUBROUTINE interp_beta( nt, npw_, qg, vq )
   !
   USE upf_kinds,  ONLY : dp
   USE uspp_param, ONLY : upf, nbetam
-  USE uspp_data,  ONLY : dq, tab_beta
+  USE uspp_data,  ONLY : nqx, dq, tab_beta
   !
   implicit none
   integer, intent(in) :: nt, npw_
@@ -193,18 +193,24 @@ SUBROUTINE interp_beta( nt, npw_, qg, vq )
      DO ig = 1, npw_
         qgr = qg(ig)
         px = qgr / dq - DBLE(INT(qgr/dq))
-        ux = 1.d0 - px
-        vx = 2.d0 - px
-        wx = 3.d0 - px
+        ux = 1.0_dp - px
+        vx = 2.0_dp - px
+        wx = 3.0_dp - px
         i0 = INT(qgr/dq) + 1
         i1 = i0 + 1
         i2 = i0 + 2
         i3 = i0 + 3
-        vq(ig,nb) = &
-             tab_beta(i0,nb,nt) * ux * vx * wx / 6.d0 + &
-             tab_beta(i1,nb,nt) * px * vx * wx / 2.d0 - &
-             tab_beta(i2,nb,nt) * px * ux * wx / 2.d0 + &
-             tab_beta(i3,nb,nt) * px * ux * vx / 6.d0
+        if ( i3 <= nqx ) then
+           vq(ig,nb) = &
+             tab_beta(i0,nb,nt) * ux * vx * wx / 6.0_dp + &
+             tab_beta(i1,nb,nt) * px * vx * wx / 2.0_dp - &
+             tab_beta(i2,nb,nt) * px * ux * wx / 2.0_dp + &
+             tab_beta(i3,nb,nt) * px * ux * vx / 6.0_dp
+        else
+           !! This case should never happen if tab_beta is properly allocated
+           !! (setting q_max to be large enough) - for compatibility with GWW
+           vq(ig,nb) = 0.0_dp
+        end if
      END DO
   END DO
   !$acc end data