From d6146e551775ebd93d53f28b3d848a4ba0fc5b45 Mon Sep 17 00:00:00 2001
From: Paolo Giannozzi <p.giannozzi@gmail.com>
Date: Tue, 9 Jan 2024 15:03:28 +0100
Subject: [PATCH] Beta function interpolation routines encapsulated into module
 beta_mod. Important: allocation moved from allocate_nlpot to init_tab_beta

---
 GWW/simple/commutator.f90 |   2 +
 PP/src/pw2gw.f90          |   2 +-
 PW/src/hinit0.f90         |  18 ++-
 PW/src/read_file_new.f90  |  14 +++
 PW/src/scale_h.f90        |   2 +
 upflib/CMakeLists.txt     |   2 +-
 upflib/Makefile           |  10 +-
 upflib/beta_mod.f90       | 236 ++++++++++++++++++++++++++++++++++++++
 upflib/gen_us_dj.f90      |  45 +-------
 upflib/gen_us_dy.f90      |   1 +
 upflib/init_tab_beta.f90  |  69 -----------
 upflib/init_us_1.f90      |   4 -
 upflib/init_us_2_acc.f90  |  49 +-------
 upflib/uspp_data.f90      |  13 +--
 14 files changed, 283 insertions(+), 184 deletions(-)
 create mode 100644 upflib/beta_mod.f90
 delete mode 100644 upflib/init_tab_beta.f90

diff --git a/GWW/simple/commutator.f90 b/GWW/simple/commutator.f90
index 33f52f466..dcada1a97 100644
--- a/GWW/simple/commutator.f90
+++ b/GWW/simple/commutator.f90
@@ -14,6 +14,7 @@ subroutine gen_beta_simple (qk, npw_max, dvkb)
   USE klist,      ONLY : ngk
   USE gvect,      ONLY : mill, eigts1, eigts2, eigts3, g
   USE uspp,       ONLY : nkb, indv, nhtol, nhtolm
+  USE beta_mod,   ONLY : interp_dbeta
   USE uspp_param, ONLY : lmaxkb, nbetam, nh
   USE io_global, ONLY : stdout
   !
@@ -137,6 +138,7 @@ subroutine gen_beta_simple_2 (qk, npw_max, u, dvkb)
   USE klist,      ONLY : ngk, igk_k
   USE gvect,      ONLY : mill, eigts1, eigts2, eigts3, g
   USE uspp,       ONLY : nkb, indv, nhtol, nhtolm
+  USE beta_mod,   ONLY : interp_beta
   USE uspp_param, ONLY : upf, lmaxkb, nbetam, nh
   !
   implicit none
diff --git a/PP/src/pw2gw.f90 b/PP/src/pw2gw.f90
index 3c48f6b6e..7a046bc14 100644
--- a/PP/src/pw2gw.f90
+++ b/PP/src/pw2gw.f90
@@ -150,7 +150,7 @@ SUBROUTINE compute_gw( omegamin, omegamax, d_omega, use_gmaps, qplda, vkb, vxcdi
   USE parallel_include
   USE scf,       ONLY : rho, rho_core, rhog_core
   USE ener,      ONLY : etxc, vtxc
-
+  USE beta_mod,   ONLY : interp_beta, interp_dbeta
   USE uspp_param, ONLY : upf, nh, nbetam
   USE ions_base,  ONLY : ntyp => nsp
   USE klist,      ONLY : ngk
diff --git a/PW/src/hinit0.f90 b/PW/src/hinit0.f90
index 6b71f87e1..ca3b27c3c 100644
--- a/PW/src/hinit0.f90
+++ b/PW/src/hinit0.f90
@@ -22,6 +22,7 @@ SUBROUTINE hinit0()
   USE klist,            ONLY : qnorm
   USE gvecw,            ONLY : ecutwfc
   USE vlocal,           ONLY : strf
+  USE beta_mod,         ONLY : init_tab_beta
   USE realus,           ONLY : generate_qpointlist, betapointlist, &
                                init_realspace_vars, real_space
   USE ldaU,             ONLY : lda_plus_U, Hubbard_projectors
@@ -42,6 +43,7 @@ SUBROUTINE hinit0()
   !
   IMPLICIT NONE
   REAL (dp) :: alat_old, qmax
+  INTEGER   :: ierr
   LOGICAL   :: is_tau_read = .FALSE.
   !
 #if defined (__ENVIRON)
@@ -60,10 +62,20 @@ SUBROUTINE hinit0()
   IF (tbeta_smoothing) CALL init_us_b0(ecutwfc,intra_bgrp_comm)
   IF (tq_smoothing) CALL init_us_0(ecutrho,intra_bgrp_comm)
   qmax = (qnorm + sqrt(ecutrho))*cell_factor
-  ! qmax is the maximum needed |q+G|, increased by a factor (20% or so)
-  ! to avoid too frequent reallocations in variable-cell calculations
-  ! (qnorm=max|q| may be needed for hybrid EXX or phonon calculations)
+  !
+  ! qmax is the maximum |q+G|, for all G needed by the charge density,
+  ! increased by a factor (20% or so) to avoid too frequent reallocations 
+  ! in variable-cell calculations ( norm is an estimate of max|q|, that
+  ! may be needed for hybrid EXX or phonon calculations)
+  !
   CALL init_us_1(nat, ityp, omega, qmax, intra_bgrp_comm)
+  !
+  ! fill interpolation table for beta functions 
+  ! qmax as above, for all G needed by wavefunctions
+  !
+  qmax = (qnorm + sqrt(ecutwfc))*cell_factor
+  CALL init_tab_beta ( qmax, omega, intra_bgrp_comm, ierr )
+  !
   IF ( lda_plus_U .AND. ( Hubbard_projectors == 'pseudo' ) ) CALL init_q_aeps()
   CALL init_tab_atwfc (omega, intra_bgrp_comm)
   !
diff --git a/PW/src/read_file_new.f90 b/PW/src/read_file_new.f90
index 60d6b2c9c..36ca5f6f8 100644
--- a/PW/src/read_file_new.f90
+++ b/PW/src/read_file_new.f90
@@ -282,11 +282,14 @@ SUBROUTINE post_xml_init (  )
   USE rism_module,          ONLY : rism_tobe_alive, rism_pot3d
   USE rism3d_facade,        ONLY : lrism3d, rism3d_initialize, rism3d_read_to_restart
   USE xc_lib,               ONLY : xclib_dft_is_libxc, xclib_init_libxc
+  USE beta_mod,             ONLY : init_tab_beta
+  USE klist,                ONLY : qnorm
   !
   IMPLICIT NONE
   !
   REAL(DP) :: ehart, etxc, vtxc, etotefield, charge, qmax
   CHARACTER(LEN=37) :: dft_name
+  INTEGER :: ierr
   !
   ! ... initialize Libxc if needed
   !
@@ -359,9 +362,20 @@ SUBROUTINE post_xml_init (  )
   CALL init_vloc()
   IF (tbeta_smoothing) CALL init_us_b0(ecutwfc,intra_bgrp_comm)
   IF (tq_smoothing) CALL init_us_0(ecutrho,intra_bgrp_comm)
+  !
+  ! qmax is the maximum |G|, for all G needed by the charge density
+  !
   qmax = sqrt(ecutrho)*cell_factor
   CALL init_us_1(nat, ityp, omega, qmax, intra_bgrp_comm)
+  !
+  ! fill interpolation table for beta functions 
+  ! qmax is the maximum |q+G|, for all G needed by the wavefunctions
+  !
+  qmax = (qnorm + sqrt(ecutwfc))*cell_factor
+  CALL init_tab_beta ( qmax, omega, intra_bgrp_comm, ierr )
+  !
   IF ( lda_plus_u .AND. ( Hubbard_projectors == 'pseudo' ) ) CALL init_q_aeps()
+  !
   CALL init_tab_atwfc(omega, intra_bgrp_comm)
   !
   CALL struc_fact( nat, tau, nsp, ityp, ngm, g, bg, dfftp%nr1, dfftp%nr2,&
diff --git a/PW/src/scale_h.f90 b/PW/src/scale_h.f90
index fa8ae6431..ea599871d 100644
--- a/PW/src/scale_h.f90
+++ b/PW/src/scale_h.f90
@@ -29,6 +29,7 @@ SUBROUTINE scale_h
   USE rism_module,    ONLY : lrism, rism_reinit3d
   USE mp,             ONLY : mp_max
   USE mp_bands,       ONLY : intra_bgrp_comm
+  USE beta_mod,       ONLY : scale_tab_beta
   USE vloc_mod,       ONLY : scale_tab_vloc
   USE rhoc_mod,       ONLY : scale_tab_rhc
   USE rhoat_mod,      ONLY : scale_tab_rhoat
@@ -86,6 +87,7 @@ SUBROUTINE scale_h
   ! scale the non-local pseudopotential tables
   !
   call scale_uspp_data( omega_old/omega )
+  call scale_tab_beta( omega_old/omega )
   CALL scale_tab_rhc( omega_old/omega )
   CALL scale_tab_rhoat( omega_old/omega )
   CALL scale_tab_qrad( omega_old/omega )
diff --git a/upflib/CMakeLists.txt b/upflib/CMakeLists.txt
index 2e68c5b64..e12a2f524 100644
--- a/upflib/CMakeLists.txt
+++ b/upflib/CMakeLists.txt
@@ -8,7 +8,7 @@ set(src_upflib
     init_us_b0.f90
     init_us_1.f90
     init_tab_atwfc.f90
-    init_tab_beta.f90
+    beta_mod.f90
     interp_atwfc.f90
     init_us_2_acc.f90
     gth.f90
diff --git a/upflib/Makefile b/upflib/Makefile
index 1abb76748..98bba3e2a 100644
--- a/upflib/Makefile
+++ b/upflib/Makefile
@@ -10,10 +10,11 @@ MODFLAGS= $(MOD_FLAG)../UtilXlib $(MOD_FLAG).
 # OBJS_GPU   are GPU-specific
 
 OBJS_DEP= \
-vloc_mod.o \
-rhoc_mod.o \
-rhoat_mod.o \
+beta_mod.o \
 qrad_mod.o  \
+rhoat_mod.o \
+rhoc_mod.o \
+vloc_mod.o \
 qvan2.o \
 dqvan2.o \
 gen_us_dj.o \
@@ -22,8 +23,7 @@ init_us_0.o \
 init_us_b0.o \
 init_us_1.o \
 init_us_2_acc.o \
-init_tab_atwfc.o \
-init_tab_beta.o
+init_tab_atwfc.o
 
 OBJS_NODEP= \
 atom.o \
diff --git a/upflib/beta_mod.f90 b/upflib/beta_mod.f90
new file mode 100644
index 000000000..13d5f545a
--- /dev/null
+++ b/upflib/beta_mod.f90
@@ -0,0 +1,236 @@
+!
+! Copyright (C) 2024 Quantum ESPRESSO Foundation
+! This file is distributed under the terms of the
+! GNU General Public License. See the file `License'
+! in the root directory of the present distribution,
+! or http://www.gnu.org/copyleft/gpl.txt .
+!
+!
+MODULE beta_mod
+  !
+  !! Variables and routines for nonlocal beta functions in numerical form
+  !! Contains generation of interpolation tables in reciprocal space,
+  !! interpolation routines and other utility routines
+  !! Code moved to upflib and restructured by Paolo Giannozzi, 2024
+  !
+  USE upf_kinds,    ONLY : dp
+  USE upf_const,    ONLY : fpi, e2
+  !
+  IMPLICIT NONE
+  !
+  PRIVATE
+  PUBLIC :: init_tab_beta
+  PUBLIC :: deallocate_tab_beta
+  PUBLIC :: scale_tab_beta
+  PUBLIC :: interp_beta
+  PUBLIC :: interp_dbeta
+  !
+  SAVE
+  !
+  INTEGER :: nqx = 0
+  !! size of interpolation table
+  REAL(DP), PARAMETER:: dq = 0.01_dp
+  !! grid step for interpolation table
+  REAL(DP) :: qmax = 0.0_dp 
+  !! max q covered by the interpolation table
+  REAL(DP), ALLOCATABLE :: tab_beta(:,:,:)
+  !! interpolation table for numerical beta functions in reciprocal space
+  !
+CONTAINS
+!
+!----------------------------------------------------------------------
+SUBROUTINE init_tab_beta ( qmax_, omega, comm, ierr ) 
+  !----------------------------------------------------------------------
+  !
+  ! Compute interpolation table for beta(G) radial functions
+  !
+  USE upf_kinds,    ONLY : dp
+  USE atom,         ONLY : rgrid
+  USE uspp_param,   ONLY : upf, lmaxq, nbetam, nsp
+  USE mp,           ONLY : mp_sum
+  USE m_gth,        ONLY : mk_ffnl_gth
+  !
+  IMPLICIT NONE
+  !
+  REAL(dp), INTENT(IN) :: qmax_
+  !! Interpolate q up to qmax_ (sqrt(Ry), q^2 is an energy)
+  REAL(dp), INTENT(IN) :: omega
+  !! Unit-cell volume
+  INTEGER, INTENT(IN)  :: comm
+  !! MPI communicator, to split the workload
+  INTEGER, INTENT(OUT) :: ierr
+  !! error code: ierr = 0 if interpolation table (IT) was allocated
+  !!             ierr =-1 if IT had insufficient size and was re-allocated
+  !!             ierr =-2 if IT was already present and nothing is done
+  !
+  INTEGER :: ndm, startq, lastq, nt, l, nb, iq, ir
+  REAL(dp) :: qi
+  ! q-point grid for interpolation
+  REAL(dp) :: pref
+  ! the prefactor of the Q functions
+  real(DP) ::  vqint, d1
+  !
+  REAL(dp), allocatable :: aux (:)
+  ! work space
+  REAL(dp), allocatable :: besr(:)
+  ! work space
+  !
+  IF ( .NOT. ALLOCATED(tab_beta) ) THEN
+     !! table not yet allocated
+     qmax = qmax_
+     ierr = 0
+  ELSE IF ( qmax_ > qmax ) THEN
+     !! table ìs allocated but dimension insufficient: re-allocate
+     !! (with some margin so that this does not happen too often)
+     !$acc exit data delete(tab_beta)
+     DEALLOCATE ( tab_beta )
+     qmax = qmax_ + MAX(dq*100,qmax_-qmax)
+     ierr =-1
+  ELSE
+     !! table already computed: exit
+     ierr =-2
+     RETURN
+  END IF
+  nqx = INT( qmax/dq + 4)
+  allocate(tab_beta(nqx,nbetam,nsp))
+  !$acc enter data create(tab_beta)
+  ndm = MAXVAL ( upf(:)%kkbeta )
+  allocate( aux (ndm) )
+  allocate (besr( ndm))
+  pref = fpi / sqrt (omega)
+  call divide (comm, nqx, startq, lastq)
+  tab_beta (:,:,:) = 0.d0
+  do nt = 1, nsp
+     do nb = 1, upf(nt)%nbeta
+        l = upf(nt)%lll (nb)
+        do iq = startq, lastq
+           qi = (iq - 1) * dq
+           if ( upf(nt)%is_gth ) then
+              CALL mk_ffnl_gth( nt, nb, 1, omega, [ qi ] , tab_beta(iq,nb,nt) )
+           else
+              call sph_bes (upf(nt)%kkbeta, rgrid(nt)%r, qi, l, besr)
+              do ir = 1, upf(nt)%kkbeta
+                 aux (ir) = upf(nt)%beta (ir, nb) * besr (ir) * rgrid(nt)%r(ir)
+              enddo
+              call simpson (upf(nt)%kkbeta, aux, rgrid(nt)%rab, vqint)
+              tab_beta (iq, nb, nt) = vqint * pref
+           end if
+        enddo
+     enddo
+  enddo
+  deallocate (besr)
+  deallocate (aux)
+  !
+  call mp_sum(  tab_beta, comm )
+!$acc update device (tab_beta)
+  !
+END SUBROUTINE init_tab_beta
+!
+!----------------------------------------------------------------------
+SUBROUTINE interp_beta( nt, npw_, qg, vq )
+  !----------------------------------------------------------------------
+  !
+  USE upf_kinds,  ONLY : dp
+  USE uspp_param, ONLY : upf, nbetam
+  !
+  implicit none
+  integer, intent(in) :: nt, npw_
+  real(dp), intent(in ) :: qg(npw_)
+  real(dp), intent(out) :: vq(npw_,nbetam)
+  !
+  integer :: i0, i1, i2, i3, nbnt, nb, ig
+  real(dp):: qgr, px, ux, vx, wx
+  !
+  nbnt = upf(nt)%nbeta
+  !$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
+  !$acc parallel loop collapse(2)
+  do nb = 1, nbnt
+     DO ig = 1, npw_
+        qgr = qg(ig)
+        px = qgr / dq - DBLE(INT(qgr/dq))
+        ux = 1.0_dp - px
+        vx = 2.0_dp - px
+        wx = 3.0_dp - px
+        i0 = INT(qgr/dq) + 1
+        i1 = i0 + 1
+        i2 = i0 + 2
+        i3 = i0 + 3
+        if ( i3 <= nqx ) then
+           vq(ig,nb) = &
+             tab_beta(i0,nb,nt) * ux * vx * wx / 6.0_dp + &
+             tab_beta(i1,nb,nt) * px * vx * wx / 2.0_dp - &
+             tab_beta(i2,nb,nt) * px * ux * wx / 2.0_dp + &
+             tab_beta(i3,nb,nt) * px * ux * vx / 6.0_dp
+        else
+           !! This case should never happen if tab_beta is properly allocated
+           !! (setting q_max to be large enough) - for compatibility with GWW
+           vq(ig,nb) = 0.0_dp
+        end if
+     END DO
+  END DO
+  !$acc end data
+  !----------------------------------------------------------------------
+END SUBROUTINE interp_beta
+!----------------------------------------------------------------------
+!
+!----------------------------------------------------------------------
+SUBROUTINE interp_dbeta( nt, npw, qg, vq )
+  !----------------------------------------------------------------------
+  !
+  USE upf_kinds,  ONLY : dp
+  USE uspp_param, ONLY : upf, nbetam
+  !
+  implicit none
+  integer, intent(in) :: nt, npw
+  real(dp), intent(in ) :: qg(npw)
+  real(dp), intent(out) :: vq(npw,nbetam)
+  !
+  integer :: i0, i1, i2, i3, nbnt, nb, ig
+  real(dp):: qgr, px, ux, vx, wx
+  !
+  nbnt = upf(nt)%nbeta
+  !$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
+  !$acc parallel loop collapse(2)
+  DO nb = 1, nbnt
+     DO ig = 1, npw
+        qgr = qg(ig)
+        px = qgr / dq - INT(qgr/dq)
+        ux = 1.0_dp - px
+        vx = 2.0_dp - px
+        wx = 3.0_dp - px
+        i0 = qgr / dq + 1
+        i1 = i0 + 1
+        i2 = i0 + 2
+        i3 = i0 + 3
+        IF ( i3 <= nqx ) THEN
+            vq(ig,nb) = ( tab_beta(i0,nb,nt) * (-vx*wx-ux*wx-ux*vx)/6.0_dp + &
+                          tab_beta(i1,nb,nt) * (+vx*wx-px*wx-px*vx)/2.0_dp - &
+                          tab_beta(i2,nb,nt) * (+ux*wx-px*wx-px*ux)/2.0_dp + &
+                          tab_beta(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6.0_dp )/dq
+        ELSE
+            vq(ig,nb) = 0.0_dp 
+        END IF
+     ENDDO
+  END DO
+  !$acc end data
+  !----------------------------------------------------------------------
+END SUBROUTINE interp_dbeta
+!
+  subroutine deallocate_tab_beta ()
+    implicit none
+    !
+    !$acc exit data delete(tab_beta)
+    if( allocated( tab_beta ) )  deallocate( tab_beta )
+    !
+  end subroutine deallocate_tab_beta
+  !
+  subroutine scale_tab_beta( vol_ratio_m1 )
+    ! vol_ratio_m1 = omega_old / omega
+    implicit none
+    real(DP), intent(in) :: vol_ratio_m1
+    !
+    tab_beta(:,:,:) = tab_beta(:,:,:) * SQRT(vol_ratio_m1)
+!$acc update device ( tab_beta)
+  end subroutine scale_tab_beta
+   !
+END MODULE beta_mod
diff --git a/upflib/gen_us_dj.f90 b/upflib/gen_us_dj.f90
index ef09b51c1..b73892547 100644
--- a/upflib/gen_us_dj.f90
+++ b/upflib/gen_us_dj.f90
@@ -18,6 +18,7 @@ SUBROUTINE gen_us_dj_base( npw, npwx, igk, xk, nat, tau, ityp, ntyp, tpiba, &
   USE upf_const,  ONLY: tpi
   USE uspp,       ONLY: nkb, indv, nhtol, nhtolm
   USE uspp_param, ONLY: lmaxkb, nbetam, nh, nhm
+  USE beta_mod,   ONLY: interp_dbeta
   !
   IMPLICIT NONE
   !
@@ -202,47 +203,3 @@ SUBROUTINE gen_us_dj_base( npw, npwx, igk, xk, nat, tau, ityp, ntyp, tpiba, &
   RETURN
   !
 END SUBROUTINE gen_us_dj_base
-!
-!----------------------------------------------------------------------
-SUBROUTINE interp_dbeta( nt, npw, qg, vq )
-  !----------------------------------------------------------------------
-  !
-  USE upf_kinds,  ONLY : dp
-  USE uspp_param, ONLY : upf, nbetam
-  USE uspp_data,  ONLY : nqx, dq, tab_beta
-  !
-  implicit none
-  integer, intent(in) :: nt, npw
-  real(dp), intent(in ) :: qg(npw)
-  real(dp), intent(out) :: vq(npw,nbetam)
-  !
-  integer :: i0, i1, i2, i3, nbnt, nb, ig
-  real(dp):: qgr, px, ux, vx, wx
-  !
-  nbnt = upf(nt)%nbeta
-  !$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
-  !$acc parallel loop collapse(2)
-  DO nb = 1, nbnt
-     DO ig = 1, npw
-        qgr = qg(ig)
-        px = qgr / dq - INT(qgr/dq)
-        ux = 1.0_dp - px
-        vx = 2.0_dp - px
-        wx = 3.0_dp - px
-        i0 = qgr / dq + 1
-        i1 = i0 + 1
-        i2 = i0 + 2
-        i3 = i0 + 3
-        IF ( i3 <= nqx ) THEN
-            vq(ig,nb) = ( tab_beta(i0,nb,nt) * (-vx*wx-ux*wx-ux*vx)/6.0_dp + &
-                          tab_beta(i1,nb,nt) * (+vx*wx-px*wx-px*vx)/2.0_dp - &
-                          tab_beta(i2,nb,nt) * (+ux*wx-px*wx-px*ux)/2.0_dp + &
-                          tab_beta(i3,nb,nt) * (+ux*vx-px*vx-px*ux)/6.0_dp )/dq
-        ELSE
-            vq(ig,nb) = 0.0_dp 
-        END IF
-     ENDDO
-  END DO
-  !$acc end data
-  !----------------------------------------------------------------------
-END SUBROUTINE interp_dbeta
diff --git a/upflib/gen_us_dy.f90 b/upflib/gen_us_dy.f90
index 4dea2bfc6..c7f8f666e 100644
--- a/upflib/gen_us_dy.f90
+++ b/upflib/gen_us_dy.f90
@@ -20,6 +20,7 @@ SUBROUTINE gen_us_dy_base( npw, npwx, igk, xk, nat, tau, ityp, ntyp, tpiba, &
   USE upf_const,   ONLY: tpi
   USE uspp,        ONLY: nkb, indv, nhtol, nhtolm
   USE uspp_param,  ONLY: upf, lmaxkb, nbetam, nh, nhm
+  USE beta_mod,    ONLY: interp_beta
   !
   IMPLICIT NONE
   !
diff --git a/upflib/init_tab_beta.f90 b/upflib/init_tab_beta.f90
deleted file mode 100644
index b34253297..000000000
--- a/upflib/init_tab_beta.f90
+++ /dev/null
@@ -1,69 +0,0 @@
-!
-! Copyright (C) 2021-2023 Quantum ESPRESSO Foundation
-! This file is distributed under the terms of the
-! GNU General Public License. See the file `License'
-! in the root directory of the present distribution,
-! or http://www.gnu.org/copyleft/gpl.txt .
-!
-!----------------------------------------------------------------------
-SUBROUTINE init_tab_beta ( omega, intra_bgrp_comm ) 
-  !----------------------------------------------------------------------
-  !
-  ! Compute interpolation table for beta(G) radial functions
-  !
-  USE upf_kinds,    ONLY : dp
-  USE upf_const,    ONLY : fpi
-  USE atom,         ONLY : rgrid
-  USE uspp_param,   ONLY : upf, lmaxq, nbetam, nsp
-  USE uspp_data,    ONLY : nqx, dq, tab_beta
-  USE mp,           ONLY : mp_sum
-  USE m_gth,        ONLY : mk_ffnl_gth
-  !
-  IMPLICIT NONE
-  !
-  real(DP), intent(in) :: omega
-  integer,  intent(in) :: intra_bgrp_comm
-  !
-  INTEGER :: ndm, startq, lastq, nt, l, nb, iq, ir
-  REAL(dp) :: qi
-  ! q-point grid for interpolation
-  REAL(dp) :: pref
-  ! the prefactor of the Q functions
-  real(DP) ::  vqint, d1
-  !
-  REAL(dp), allocatable :: aux (:)
-  ! work space
-  REAL(dp), allocatable :: besr(:)
-  ! work space
-  !
-  ndm = MAXVAL ( upf(:)%kkbeta )
-  allocate( aux (ndm) )
-  allocate (besr( ndm))
-  pref = fpi / sqrt (omega)
-  call divide (intra_bgrp_comm, nqx, startq, lastq)
-  tab_beta (:,:,:) = 0.d0
-  do nt = 1, nsp
-     do nb = 1, upf(nt)%nbeta
-        l = upf(nt)%lll (nb)
-        do iq = startq, lastq
-           qi = (iq - 1) * dq
-           if ( upf(nt)%is_gth ) then
-              CALL mk_ffnl_gth( nt, nb, 1, omega, [ qi ] , tab_beta(iq,nb,nt) )
-           else
-              call sph_bes (upf(nt)%kkbeta, rgrid(nt)%r, qi, l, besr)
-              do ir = 1, upf(nt)%kkbeta
-                 aux (ir) = upf(nt)%beta (ir, nb) * besr (ir) * rgrid(nt)%r(ir)
-              enddo
-              call simpson (upf(nt)%kkbeta, aux, rgrid(nt)%rab, vqint)
-              tab_beta (iq, nb, nt) = vqint * pref
-           end if
-        enddo
-     enddo
-  enddo
-  deallocate (besr)
-  deallocate (aux)
-  !
-  call mp_sum(  tab_beta, intra_bgrp_comm )
-!$acc update device (tab_beta)
-  !
-END SUBROUTINE init_tab_beta
diff --git a/upflib/init_us_1.f90 b/upflib/init_us_1.f90
index 0fa4947dc..db3d8959c 100644
--- a/upflib/init_us_1.f90
+++ b/upflib/init_us_1.f90
@@ -251,10 +251,6 @@ subroutine init_us_1( nat, ityp, omega, qmax, intra_bgrp_comm )
      end do
   end if
   !
-  ! fill interpolation table for beta functions 
-  !
-  CALL init_tab_beta ( omega, intra_bgrp_comm )
-  !
 #if defined __CUDA
   !
   ! update GPU memory (taking care of zero-dim allocations)
diff --git a/upflib/init_us_2_acc.f90 b/upflib/init_us_2_acc.f90
index 3f5157041..1cec50c87 100644
--- a/upflib/init_us_2_acc.f90
+++ b/upflib/init_us_2_acc.f90
@@ -20,6 +20,7 @@ SUBROUTINE init_us_2_acc( npw_, npwx, igk_, q_, nat, tau, ityp, &
   USE upf_const,    ONLY : tpi
   USE uspp,         ONLY : nkb, nhtol, nhtolm, indv
   USE uspp_param,   ONLY : lmaxkb, nbetam, nhm, nh, nsp
+  USE beta_mod,     ONLY : interp_beta
   !
   implicit none
   !
@@ -168,52 +169,6 @@ SUBROUTINE init_us_2_acc( npw_, npwx, igk_, q_, nat, tau, ityp, &
   deallocate(vkb1)
   !
   return
+  !----------------------------------------------------------------------
 end subroutine init_us_2_acc
-!
-!----------------------------------------------------------------------
-SUBROUTINE interp_beta( nt, npw_, qg, vq )
-  !----------------------------------------------------------------------
-  !
-  USE upf_kinds,  ONLY : dp
-  USE uspp_param, ONLY : upf, nbetam
-  USE uspp_data,  ONLY : nqx, dq, tab_beta
-  !
-  implicit none
-  integer, intent(in) :: nt, npw_
-  real(dp), intent(in ) :: qg(npw_)
-  real(dp), intent(out) :: vq(npw_,nbetam)
-  !
-  integer :: i0, i1, i2, i3, nbnt, nb, ig
-  real(dp):: qgr, px, ux, vx, wx
-  !
-  nbnt = upf(nt)%nbeta
-  !$acc data present (tab_beta) present_or_copyin (qg) present_or_copyout (vq)
-  !$acc parallel loop collapse(2)
-  do nb = 1, nbnt
-     DO ig = 1, npw_
-        qgr = qg(ig)
-        px = qgr / dq - DBLE(INT(qgr/dq))
-        ux = 1.0_dp - px
-        vx = 2.0_dp - px
-        wx = 3.0_dp - px
-        i0 = INT(qgr/dq) + 1
-        i1 = i0 + 1
-        i2 = i0 + 2
-        i3 = i0 + 3
-        if ( i3 <= nqx ) then
-           vq(ig,nb) = &
-             tab_beta(i0,nb,nt) * ux * vx * wx / 6.0_dp + &
-             tab_beta(i1,nb,nt) * px * vx * wx / 2.0_dp - &
-             tab_beta(i2,nb,nt) * px * ux * wx / 2.0_dp + &
-             tab_beta(i3,nb,nt) * px * ux * vx / 6.0_dp
-        else
-           !! This case should never happen if tab_beta is properly allocated
-           !! (setting q_max to be large enough) - for compatibility with GWW
-           vq(ig,nb) = 0.0_dp
-        end if
-     END DO
-  END DO
-  !$acc end data
-  !----------------------------------------------------------------------
-END SUBROUTINE interp_beta
 !----------------------------------------------------------------------
diff --git a/upflib/uspp_data.f90 b/upflib/uspp_data.f90
index a01701dc7..d2daf7269 100644
--- a/upflib/uspp_data.f90
+++ b/upflib/uspp_data.f90
@@ -16,7 +16,7 @@ MODULE uspp_data
   PRIVATE
   !
   PUBLIC :: nqxq, nqx, dq
-  PUBLIC :: tab_beta, tab_at
+  PUBLIC :: tab_at
   !
   PUBLIC :: allocate_uspp_data
   PUBLIC :: deallocate_uspp_data
@@ -27,9 +27,7 @@ MODULE uspp_data
   INTEGER :: nqx
   !! number of interpolation points
   REAL(DP), PARAMETER:: dq = 0.01D0
-  !! space between points in the pseudopotential tab_beta.
-  REAL(DP), ALLOCATABLE :: tab_beta(:,:,:)
-  !! interpolation table for PP projectorss
+  !! space between interpolation points 
   REAL(DP), ALLOCATABLE :: tab_at(:,:,:)
   !! interpolation table for atomic wfc
   !
@@ -43,8 +41,6 @@ contains
      if (nqxq_/=nqxq) call upf_error("allocate_uspp_data","invalid nqxq_",1)
      if (nqx_/=nqx)   call upf_error("allocate_uspp_data","invalid nqx_",1)
      !
-     allocate(tab_beta(nqx_,nbetam,nsp))
-     !$acc enter data create(tab_beta)
      allocate(tab_at(nqx_,nwfcm,nsp))
      !$acc enter data create(tab_at)
      !
@@ -52,8 +48,6 @@ contains
   !
   subroutine deallocate_uspp_data()
      implicit none
-     !$acc exit data delete(tab_beta)
-     if( allocated( tab_beta ) )  deallocate( tab_beta )
      !$acc exit data delete(tab_at)
      if( allocated( tab_at ) )    deallocate( tab_at )
      !
@@ -64,9 +58,8 @@ contains
      implicit none
      real(DP), intent(in) :: vol_ratio_m1
      !
-     tab_beta(:,:,:) = tab_beta(:,:,:) * SQRT(vol_ratio_m1)
      tab_at(:,:,:)   = tab_at(:,:,:) * SQRT(vol_ratio_m1)
-!$acc update device (tab_at, tab_beta)
+!$acc update device (tab_at)
   end subroutine scale_uspp_data
   !
 END MODULE uspp_data