quantum-espresso/PW/sum_band.f90

1034 lines
35 KiB
Fortran

!
! Copyright (C) 2001-2007 Quantum-Espresso group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
#include "f_defs.h"
!
!----------------------------------------------------------------------------
SUBROUTINE sum_band()
!----------------------------------------------------------------------------
!
! ... calculates the symmetrized charge density and sum of occupied
! ... eigenvalues.
! ... this version works also for metals (gaussian spreading technique)
!
USE kinds, ONLY : DP
USE ener, ONLY : eband
USE control_flags, ONLY : diago_full_acc, gamma_only, tqr
USE cell_base, ONLY : at, bg, omega, tpiba
USE ions_base, ONLY : nat, ntyp => nsp, ityp
USE gvect, ONLY : nr1, nr2, nr3, nrx1, nrx2, nrx3, nrxx, &
ngm, g, nl, nlm
USE gsmooth, ONLY : nls, nlsm, nr1s, nr2s, nr3s, &
nrx1s, nrx2s, nrx3s, nrxxs, doublegrid
USE klist, ONLY : nks, nkstot, wk, xk, ngk
USE ldaU, ONLY : lda_plus_U
USE lsda_mod, ONLY : lsda, nspin, current_spin, isk
USE scf, ONLY : rho
USE symme, ONLY : nsym, s, ftau
USE io_files, ONLY : iunwfc, nwordwfc, iunigk
USE buffers, ONLY : get_buffer
USE uspp, ONLY : nkb, vkb, becsum, nhtol, nhtoj, indv, okvan
USE uspp_param, ONLY : upf, nh, nhm
USE wavefunctions_module, ONLY : evc, psic, psic_nc
USE noncollin_module, ONLY : noncolin, npol
USE spin_orb, ONLY : lspinorb, domag, fcoef
USE wvfct, ONLY : nbnd, npwx, npw, igk, wg, et, btype
USE mp_global, ONLY : root_image, npool, my_pool_id, inter_pool_comm
USE mp, ONLY : mp_bcast, mp_sum
USE funct, ONLY : dft_is_meta
USE paw_onecenter, ONLY : PAW_symmetrize
USE paw_variables, ONLY : okpaw
USE becmod, ONLY : allocate_bec, deallocate_bec
USE realus, ONLY : real_space, fft_orbital_gamma, initialisation_level,&
bfft_orbital_gamma, calbec_rs_gamma, s_psir_gamma, addusdens_r
USE wvfct, ONLY: nbnd
!
IMPLICIT NONE
!
! ... local variables
!
INTEGER :: ikb, jkb, ijkb0, ih, jh, ijh, na, np
! counters on beta functions, atoms, pseudopotentials
INTEGER :: ir, is, ig, ibnd, ik
! counter on 3D r points
! counter on spin polarizations
! counter on g vectors
! counter on bands
! counter on k points
real (DP), allocatable :: kplusg (:)
!
!
CALL start_clock( 'sum_band' )
!
becsum(:,:,:) = 0.D0
rho%of_r(:,:) = 0.D0
rho%of_g(:,:) = 0.D0
if ( dft_is_meta() ) then
rho%kin_r(:,:) = 0.D0
rho%kin_g(:,:) = 0.D0
allocate (kplusg(npwx))
end if
eband = 0.D0
!
! ... calculates weights of Kohn-Sham orbitals used in calculation of rho
!
CALL weights ( )
!
IF ( diago_full_acc ) THEN
!
! ... for diagonalization purposes all the bands are considered occupied
!
btype(:,:) = 1
!
ELSE
!
! ... for diagonalization purposes a band is considered empty when its
! ... occupation is less than 1.0 %
!
btype(:,:) = 1
!
FORALL( ik = 1:nks, wk(ik) > 0.D0 )
!
WHERE( wg(:,ik) / wk(ik) < 0.01D0 ) btype(:,ik) = 0
!
END FORALL
!
END IF
!
! ... Needed for LDA+U
!
IF ( lda_plus_u ) CALL new_ns(rho%ns)
!
IF ( okvan ) CALL allocate_bec (nkb,nbnd)
!
! ... specific routines are called to sum for each k point the contribution
! ... of the wavefunctions to the charge
!
IF ( gamma_only ) THEN
!
CALL sum_band_gamma()
!
ELSE
!
CALL sum_band_k()
!
END IF
!
IF( okpaw ) THEN
rho%bec(:,:,:) = becsum(:,:,:) ! becsum is filled in sum_band_{k|gamma}
! rho%bec has to be recollected and symmetrized, becsum must not, otherwise
! it will break stress routines.
#ifdef __PARA
CALL mp_sum(rho%bec, inter_pool_comm )
#endif
CALL PAW_symmetrize(rho%bec)
ENDIF
!
IF ( okvan ) CALL deallocate_bec ( )
!
! ... If a double grid is used, interpolate onto the fine grid
!
IF ( doublegrid ) THEN
!
DO is = 1, nspin
!
CALL interpolate( rho%of_r(1,is), rho%of_r(1,is), 1 )
if (dft_is_meta()) CALL interpolate(rho%kin_r(1,is),rho%kin_r(1,is),1)
!
END DO
!
END IF
!
! ... Here we add the Ultrasoft contribution to the charge
!
IF ( okvan ) CALL addusdens()
!
IF ( noncolin .AND. .NOT. domag ) rho%of_r(:,2:4)=0.D0
!
CALL mp_sum( eband, inter_pool_comm )
!
! ... symmetrization of the charge density (and local magnetization)
!
#if defined (__PARA)
!
! ... reduce charge density across pools
!
CALL mp_sum( rho%of_r, inter_pool_comm )
if (dft_is_meta() ) CALL mp_sum( rho%kin_r, inter_pool_comm )
!
IF ( noncolin ) THEN
!
CALL psymrho( rho%of_r(1,1), nrx1, nrx2, nrx3, nr1, nr2, nr3, nsym, s, ftau )
!
IF ( domag ) &
CALL psymrho_mag( rho%of_r(1,2), nrx1, nrx2, nrx3, &
nr1, nr2, nr3, nsym, s, ftau, bg, at )
!
ELSE
!
DO is = 1, nspin
!
CALL psymrho( rho%of_r(1,is), nrx1, nrx2, nrx3, &
nr1, nr2, nr3, nsym, s, ftau )
if (dft_is_meta() ) CALL psymrho( rho%kin_r(1,is), nrx1, nrx2, nrx3, &
nr1, nr2, nr3, nsym, s, ftau )
!
END DO
!
END IF
!
#else
!
IF ( noncolin ) THEN
!
CALL symrho( rho%of_r(1,1), nrx1, nrx2, nrx3, nr1, nr2, nr3, nsym, s, ftau )
!
IF ( domag ) &
CALL symrho_mag( rho%of_r(1,2), nrx1, nrx2, nrx3, &
nr1, nr2, nr3, nsym, s, ftau, bg, at )
!
ELSE
!
DO is = 1, nspin
!
CALL symrho( rho%of_r(1,is), nrx1, nrx2, nrx3, nr1, nr2, nr3, nsym, s, ftau )
if (dft_is_meta() ) CALL symrho( rho%kin_r(1,is), nrx1, nrx2, nrx3, &
nr1, nr2, nr3, nsym, s, ftau )
!
END DO
!
END IF
!
#endif
!
if (dft_is_meta()) deallocate (kplusg)
!
! ... synchronize rho%of_g to the calculated rho%of_r
!
DO is = 1, nspin
!
! use psic as work array
psic(:) = rho%of_r(:,is)
CALL cft3( psic, nr1, nr2, nr3, nrx1, nrx2, nrx3, -1 )
rho%of_g(:,is) = psic(nl(:))
!
IF ( okvan .AND. tqr ) THEN
! ... in case the augmentation charges are computed in real space
! ... we apply an FFT filter to the density in real space to
! ... remove features that are not compatible with the FFT grid.
!
psic(:) = ( 0.D0, 0.D0 )
psic(nl(:)) = rho%of_g(:,is)
IF ( gamma_only ) psic(nlm(:)) = CONJG( rho%of_g(:,is) )
CALL cft3( psic, nr1, nr2, nr3, nrx1, nrx2, nrx3, 1 )
rho%of_r(:,is) = psic(:)
!
END IF
!
END DO
!
! ... the same for rho%kin_r and rho%kin_g
!
IF ( dft_is_meta()) THEN
DO is = 1, nspin
!
! use psic as work array
psic(:) = rho%kin_r(:,is)
CALL cft3( psic, nr1, nr2, nr3, nrx1, nrx2, nrx3, -1 )
rho%kin_g(:,is) = psic(nl(:))
!
IF ( okvan .AND. tqr ) THEN
! ... in case the augmentation charges are computed in real space
! ... we apply an FFT filter to the density in real space to
! ... remove features that are not compatible with the FFT grid.
!
psic(:) = ( 0.D0, 0.D0 )
psic(nl(:)) = rho%kin_g(:,is)
IF ( gamma_only ) psic(nlm(:)) = CONJG( rho%kin_g(:,is) )
CALL cft3( psic, nr1, nr2, nr3, nrx1, nrx2, nrx3, 1 )
rho%kin_r(:,is) = psic(:)
!
END IF
!
END DO
END IF
!
CALL stop_clock( 'sum_band' )
!
RETURN
!
CONTAINS
!
! ... internal procedures
!
!-----------------------------------------------------------------------
SUBROUTINE sum_band_gamma()
!-----------------------------------------------------------------------
!
! ... gamma version
!
USE becmod, ONLY : rbecp, calbec
USE fft_parallel, ONLY : tg_cft3s
USE fft_base, ONLY : dffts
USE control_flags, ONLY : use_task_groups
USE mp_global, ONLY : nogrp, nolist, ogrp_comm, me_pool
USE mp, ONLY : mp_sum
!
IMPLICIT NONE
!
! ... local variables
!
REAL(DP) :: w1, w2
! weights
INTEGER :: idx, ioff, incr, v_siz, j
COMPLEX(DP), ALLOCATABLE :: tg_psi(:)
REAL(DP), ALLOCATABLE :: tg_rho(:)
LOGICAL :: use_tg
!
!
! ... here we sum for each k point the contribution
! ... of the wavefunctions to the charge
!
IF ( nks > 1 ) REWIND( iunigk )
!
use_tg = ( use_task_groups ) .AND. ( nbnd >= nogrp )
!
incr = 2
!
IF( use_tg ) THEN
!
IF( dft_is_meta() ) &
CALL errore( ' sum_band ', ' task groups with meta dft, not yet implemented ', 1 )
!
v_siz = dffts%nnrx * nogrp
!
ALLOCATE( tg_psi( v_siz ) )
ALLOCATE( tg_rho( v_siz ) )
!
tg_rho = 0.0_DP
!
incr = 2 * nogrp
!
END IF
!
k_loop: DO ik = 1, nks
!
IF ( lsda ) current_spin = isk(ik)
npw = ngk(ik)
!
IF ( nks > 1 ) THEN
!
READ( iunigk ) igk
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
!
END IF
!
IF ( nkb > 0 ) &
CALL init_us_2( npw, igk, xk(1,ik), vkb )
!
! ... here we compute the band energy: the sum of the eigenvalues
!
DO ibnd = 1, nbnd
!
! ... the sum of eband and demet is the integral for
! ... e < ef of e n(e) which reduces for degauss=0 to the sum of
! ... the eigenvalues.
!
eband = eband + et(ibnd,ik) * wg(ibnd,ik)
!
END DO
!
DO ibnd = 1, nbnd, incr
!
IF( use_tg ) THEN
!
tg_psi(:) = ( 0.D0, 0.D0 )
ioff = 0
!
DO idx = 1, 2*nogrp, 2
!
! ... 2*nogrp ffts at the same time
!
IF( idx + ibnd - 1 < nbnd ) THEN
DO j = 1, npw
tg_psi(nls (igk(j))+ioff) = evc(j,idx+ibnd-1) + (0.0d0,1.d0) * evc(j,idx+ibnd)
tg_psi(nlsm(igk(j))+ioff) = CONJG( evc(j,idx+ibnd-1) - (0.0d0,1.d0) * evc(j,idx+ibnd) )
END DO
ELSE IF( idx + ibnd - 1 == nbnd ) THEN
DO j = 1, npw
tg_psi(nls (igk(j))+ioff) = evc(j,idx+ibnd-1)
tg_psi(nlsm(igk(j))+ioff) = CONJG( evc(j,idx+ibnd-1) )
END DO
END IF
ioff = ioff + dffts%nnrx
END DO
!
CALL tg_cft3s ( tg_psi, dffts, 2, use_tg )
!
! Now the first proc of the group holds the first two bands
! of the 2*nogrp bands that we are processing at the same time,
! the second proc. holds the third and fourth band
! and so on
!
! Compute the proper factor for each band
!
DO idx = 1, nogrp
IF( nolist( idx ) == me_pool ) EXIT
END DO
!
! Remember two bands are packed in a single array :
! proc 0 has bands ibnd and ibnd+1
! proc 1 has bands ibnd+2 and ibnd+3
! ....
!
idx = 2 * idx - 1
!
IF( idx + ibnd - 1 < nbnd ) THEN
w1 = wg( idx + ibnd - 1, ik) / omega
w2 = wg( idx + ibnd , ik) / omega
ELSE IF( idx + ibnd - 1 == nbnd ) THEN
w1 = wg( idx + ibnd - 1, ik) / omega
w2 = w1
ELSE
w1 = 0.0d0
w2 = w1
END IF
!
DO ir = 1, dffts%tg_npp( me_pool + 1 ) * dffts%nr1x * dffts%nr2x
!
tg_rho(ir) = tg_rho(ir) + &
w1 * DBLE( tg_psi(ir) )**2 + &
w2 * AIMAG( tg_psi(ir) )**2
!
END DO
!
ELSE
!
psic(:) = ( 0.D0, 0.D0 )
!
IF ( ibnd < nbnd ) THEN
!
! ... two ffts at the same time
!
psic(nls(igk(1:npw))) = evc(1:npw,ibnd) + &
( 0.D0, 1.D0 ) * evc(1:npw,ibnd+1)
psic(nlsm(igk(1:npw))) = CONJG( evc(1:npw,ibnd) - &
( 0.D0, 1.D0 ) * evc(1:npw,ibnd+1) )
!
ELSE
!
psic(nls(igk(1:npw))) = evc(1:npw,ibnd)
psic(nlsm(igk(1:npw))) = CONJG( evc(1:npw,ibnd) )
!
END IF
!
CALL cft3s( psic, nr1s, nr2s, nr3s, nrx1s, nrx2s, nrx3s, 2 )
!
w1 = wg(ibnd,ik) / omega
!
! ... increment the charge density ...
!
IF ( ibnd < nbnd ) THEN
!
! ... two ffts at the same time
!
w2 = wg(ibnd+1,ik) / omega
!
ELSE
!
w2 = w1
!
END IF
!
DO ir = 1, nrxxs
!
rho%of_r(ir,current_spin) = rho%of_r(ir,current_spin) + &
w1 * DBLE( psic(ir) )**2 + &
w2 * AIMAG( psic(ir) )**2
!
END DO
!
END IF
!
IF (dft_is_meta()) THEN
DO j=1,3
psic(:) = ( 0.D0, 0.D0 )
!
kplusg (1:npw) = (xk(j,ik)+g(j,igk(1:npw))) * tpiba
IF ( ibnd < nbnd ) THEN
! ... two ffts at the same time
psic(nls(igk(1:npw))) = CMPLX (0d0, kplusg(1:npw)) * &
( evc(1:npw,ibnd) + &
( 0.D0, 1.D0 ) * evc(1:npw,ibnd+1) )
psic(nlsm(igk(1:npw))) = CMPLX (0d0, -kplusg(1:npw)) * &
CONJG( evc(1:npw,ibnd) - &
( 0.D0, 1.D0 ) * evc(1:npw,ibnd+1) )
ELSE
psic(nls(igk(1:npw))) = CMPLX (0d0, kplusg(1:npw)) * &
evc(1:npw,ibnd)
psic(nlsm(igk(1:npw))) = CMPLX (0d0, -kplusg(1:npw)) * &
CONJG( evc(1:npw,ibnd) )
END IF
!
CALL cft3s( psic, nr1s, nr2s, nr3s, nrx1s, nrx2s, nrx3s, 2 )
!
! ... increment the kinetic energy density ...
!
DO ir = 1, nrxxs
rho%kin_r(ir,current_spin) = &
rho%kin_r(ir,current_spin) + &
w1 * DBLE( psic(ir) )**2 + &
w2 * AIMAG( psic(ir) )**2
END DO
!
END DO
END IF
!
!
END DO
!
IF( use_tg ) THEN
!
! reduce the group charge
!
CALL mp_sum( tg_rho, gid = ogrp_comm )
!
ioff = 0
DO idx = 1, nogrp
IF( me_pool == nolist( idx ) ) EXIT
ioff = ioff + dffts%nr1x * dffts%nr2x * dffts%npp( nolist( idx ) + 1 )
END DO
!
! copy the charge back to the processor location
!
DO ir = 1, nrxxs
rho%of_r(ir,current_spin) = rho%of_r(ir,current_spin) + tg_rho(ir+ioff)
END DO
END IF
!
! ... If we have a US pseudopotential we compute here the becsum term
!
IF ( .NOT. okvan ) CYCLE k_loop
!
IF ( real_space ) then
!if (.not. initialisation_level == 15) CALL errore ('sum_band', 'improper initialisation of real space routines' , 4)
!print *, "sum band rolling the real space!"
do ibnd = 1 , nbnd , 2
!call check_fft_orbital_gamma(psi,ibnd,m)
call fft_orbital_gamma(evc,ibnd,nbnd) !transform the orbital to real space
call calbec_rs_gamma(ibnd,nbnd,rbecp) !(global rbecp is updated)
enddo
else
CALL calbec( npw, vkb, evc, rbecp )
endif
!
CALL start_clock( 'sum_band:becsum' )
!
DO ibnd = 1, nbnd
!
w1 = wg(ibnd,ik)
ijkb0 = 0
!
DO np = 1, ntyp
!
IF ( upf(np)%tvanp ) THEN
!
DO na = 1, nat
!
IF ( ityp(na) == np ) THEN
!
ijh = 1
!
DO ih = 1, nh(np)
!
ikb = ijkb0 + ih
!
becsum(ijh,na,current_spin) = &
becsum(ijh,na,current_spin) + &
w1 *rbecp(ikb,ibnd) *rbecp(ikb,ibnd)
!
ijh = ijh + 1
!
DO jh = ( ih + 1 ), nh(np)
!
jkb = ijkb0 + jh
!
becsum(ijh,na,current_spin) = &
becsum(ijh,na,current_spin) + &
w1 * 2.D0 *rbecp(ikb,ibnd) *rbecp(jkb,ibnd)
!
ijh = ijh + 1
!
END DO
!
END DO
!
ijkb0 = ijkb0 + nh(np)
!
END IF
!
END DO
!
ELSE
!
DO na = 1, nat
!
IF ( ityp(na) == np ) ijkb0 = ijkb0 + nh(np)
!
END DO
!
END IF
!
END DO
!
END DO
!
CALL stop_clock( 'sum_band:becsum' )
!
END DO k_loop
!
IF( use_tg ) THEN
DEALLOCATE( tg_psi )
DEALLOCATE( tg_rho )
END IF
!
RETURN
!
END SUBROUTINE sum_band_gamma
!
!
!-----------------------------------------------------------------------
SUBROUTINE sum_band_k()
!-----------------------------------------------------------------------
!
! ... k-points version
!
USE becmod, ONLY : becp, becp_nc, calbec
USE fft_parallel, ONLY : tg_cft3s
USE fft_base, ONLY : dffts
USE control_flags, ONLY : use_task_groups
USE mp_global, ONLY : nogrp, nolist, ogrp_comm, me_pool
USE mp, ONLY : mp_sum
!
IMPLICIT NONE
!
! ... local variables
!
REAL(DP) :: w1
! weights
COMPLEX(DP), ALLOCATABLE :: becsum_nc(:,:,:,:)
!
INTEGER :: ipol, js
!
INTEGER :: idx, ioff, incr, v_siz, j
COMPLEX(DP), ALLOCATABLE :: tg_psi(:)
REAL(DP), ALLOCATABLE :: tg_rho(:)
LOGICAL :: use_tg
#ifdef __OPENMP
INTEGER :: mytid, ntids, omp_get_thread_num, omp_get_num_threads, icnt
#endif
!
IF (okvan .AND. noncolin) THEN
ALLOCATE(becsum_nc(nhm*(nhm+1)/2,nat,npol,npol))
becsum_nc=(0.d0, 0.d0)
ENDIF
!
! ... here we sum for each k point the contribution
! ... of the wavefunctions to the charge
!
IF ( nks > 1 ) REWIND( iunigk )
!
use_tg = ( use_task_groups ) .AND. ( nbnd >= nogrp )
use_tg = use_tg .AND. ( .NOT. noncolin )
use_tg = use_tg .AND. ( .NOT. dft_is_meta() )
!
incr = 1
!
IF( use_tg ) THEN
!
v_siz = dffts%nnrx * nogrp
!
ALLOCATE( tg_psi( v_siz ) )
ALLOCATE( tg_rho( v_siz ) )
!
incr = nogrp
!
END IF
!
k_loop: DO ik = 1, nks
!
IF( use_tg ) tg_rho = 0.0_DP
IF ( lsda ) current_spin = isk(ik)
npw = ngk (ik)
!
IF ( nks > 1 ) THEN
!
READ( iunigk ) igk
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
!
END IF
!
IF ( nkb > 0 ) &
CALL init_us_2( npw, igk, xk(1,ik), vkb )
!
! ... here we compute the band energy: the sum of the eigenvalues
!
DO ibnd = 1, nbnd, incr
!
IF( use_tg ) THEN
DO idx = 1, nogrp
IF( idx + ibnd - 1 <= nbnd ) eband = eband + et( idx + ibnd - 1, ik ) * wg( idx + ibnd - 1, ik )
END DO
ELSE
eband = eband + et( ibnd, ik ) * wg( ibnd, ik )
END IF
!
! ... the sum of eband and demet is the integral for e < ef of
! ... e n(e) which reduces for degauss=0 to the sum of the
! ... eigenvalues
w1 = wg(ibnd,ik) / omega
!
IF (noncolin) THEN
psic_nc = (0.D0,0.D0)
DO ig = 1, npw
psic_nc(nls(igk(ig)),1)=evc(ig ,ibnd)
psic_nc(nls(igk(ig)),2)=evc(ig+npwx,ibnd)
END DO
call cft3s (psic_nc(1,1), nr1s, nr2s, nr3s, &
nrx1s,nrx2s,nrx3s, 2)
call cft3s (psic_nc(1,2), nr1s, nr2s, nr3s, &
nrx1s,nrx2s,nrx3s, 2)
!
! increment the charge density ...
!
DO ipol=1,npol
DO ir = 1, nrxxs
rho%of_r (ir, 1) = rho%of_r (ir, 1) + &
w1*( DBLE(psic_nc(ir,ipol))**2+AIMAG(psic_nc(ir,ipol))**2)
END DO
END DO
!
! In this case, calculate also the three
! components of the magnetization (stored in rho%of_r(ir,2-4))
!
IF (domag) THEN
DO ir = 1,nrxxs
rho%of_r(ir,2) = rho%of_r(ir,2) + w1*2.D0* &
(DBLE(psic_nc(ir,1))* DBLE(psic_nc(ir,2)) + &
AIMAG(psic_nc(ir,1))*AIMAG(psic_nc(ir,2)))
rho%of_r(ir,3) = rho%of_r(ir,3) + w1*2.D0* &
(DBLE(psic_nc(ir,1))*AIMAG(psic_nc(ir,2)) - &
DBLE(psic_nc(ir,2))*AIMAG(psic_nc(ir,1)))
rho%of_r(ir,4) = rho%of_r(ir,4) + w1* &
(DBLE(psic_nc(ir,1))**2+AIMAG(psic_nc(ir,1))**2 &
-DBLE(psic_nc(ir,2))**2-AIMAG(psic_nc(ir,2))**2)
END DO
ELSE
rho%of_r(:,2:4)=0.0_DP
END IF
!
ELSE
!
IF( use_tg ) THEN
!
!$omp parallel default(shared), private(j,ioff,idx)
!$omp do
DO j = 1, SIZE( tg_psi )
tg_psi(j) = ( 0.D0, 0.D0 )
END DO
!$omp end do
!
ioff = 0
!
DO idx = 1, nogrp
!
! ... nogrp ffts at the same time
!
IF( idx + ibnd - 1 <= nbnd ) THEN
!$omp do
DO j = 1, npw
tg_psi( nls( igk( j ) ) + ioff ) = evc( j, idx+ibnd-1 )
END DO
!$omp end do
END IF
ioff = ioff + dffts%nnrx
END DO
!$omp end parallel
!
CALL tg_cft3s ( tg_psi, dffts, 2, use_tg )
!
! Now the first proc of the group holds the first band
! of the nogrp bands that we are processing at the same time,
! the second proc. holds the second and so on
!
! Compute the proper factor for each band
!
DO idx = 1, nogrp
IF( nolist( idx ) == me_pool ) EXIT
END DO
!
! Remember
! proc 0 has bands ibnd
! proc 1 has bands ibnd+1
! ....
!
IF( idx + ibnd - 1 <= nbnd ) THEN
w1 = wg( idx + ibnd - 1, ik) / omega
ELSE
w1 = 0.0d0
END IF
!
!$omp parallel do
DO ir = 1, dffts%tg_npp( me_pool + 1 ) * dffts%nr1x * dffts%nr2x
!
tg_rho(ir) = tg_rho(ir) + w1 * ( DBLE( tg_psi(ir) )**2 + AIMAG( tg_psi(ir) )**2 )
!
END DO
!$omp end parallel do
!
ELSE
!
psic(:) = ( 0.D0, 0.D0 )
!
psic(nls(igk(1:npw))) = evc(1:npw,ibnd)
!
CALL cft3s( psic, nr1s, nr2s, nr3s, nrx1s, nrx2s, nrx3s, 2 )
!
! ... increment the charge density ...
!
!$omp parallel do
DO ir = 1, nrxxs
!
rho%of_r(ir,current_spin) = rho%of_r(ir,current_spin) + &
w1 * ( DBLE( psic(ir) )**2 + &
AIMAG( psic(ir) )**2 )
!
END DO
!$omp end parallel do
END IF
!
IF (dft_is_meta()) THEN
DO j=1,3
psic(:) = ( 0.D0, 0.D0 )
!
kplusg (1:npw) = (xk(j,ik)+g(j,igk(1:npw))) * tpiba
psic(nls(igk(1:npw))) = CMPLX (0d0, kplusg(1:npw)) * &
evc(1:npw,ibnd)
!
CALL cft3s( psic, nr1s, nr2s, nr3s, nrx1s, nrx2s, nrx3s, 2 )
!
! ... increment the kinetic energy density ...
!
DO ir = 1, nrxxs
rho%kin_r(ir,current_spin) = &
rho%kin_r(ir,current_spin) + &
w1 * ( DBLE( psic(ir) )**2 + &
AIMAG( psic(ir) )**2 )
END DO
!
END DO
END IF
!
END IF
!
END DO
!
IF( use_tg ) THEN
!
! reduce the group charge
!
CALL mp_sum( tg_rho, gid = ogrp_comm )
!
ioff = 0
DO idx = 1, nogrp
IF( me_pool == nolist( idx ) ) EXIT
ioff = ioff + dffts%nr1x * dffts%nr2x * dffts%npp( nolist( idx ) + 1 )
END DO
!
! copy the charge back to the proper processor location
!
!$omp parallel do
DO ir = 1, nrxxs
rho%of_r(ir,current_spin) = rho%of_r(ir,current_spin) + tg_rho(ir+ioff)
END DO
!$omp end parallel do
!
END IF
!
! ... If we have a US pseudopotential we compute here the becsum term
!
IF ( .NOT. okvan ) CYCLE k_loop
!
IF (noncolin) THEN
CALL calbec( npw, vkb, evc, becp_nc )
ELSE
CALL calbec( npw, vkb, evc, becp )
ENDIF
!
CALL start_clock( 'sum_band:becsum' )
!
!$omp parallel default(shared), private(ibnd,w1,ijkb0,np,na,ijh,ih,jh,ikb,jkb,is,js,mytid,ntids,icnt)
#ifdef __OPENMP
mytid = omp_get_thread_num() ! take the thread ID
ntids = omp_get_num_threads() ! take the number of threads
icnt = 0
#endif
!
DO ibnd = 1, nbnd
!
w1 = wg(ibnd,ik)
ijkb0 = 0
!
DO np = 1, ntyp
!
IF ( upf(np)%tvanp ) THEN
!
DO na = 1, nat
!
IF (ityp(na)==np) THEN
!
#ifdef __OPENMP
! distribute atoms round robin to threads
!
icnt = icnt + 1
!
IF( MOD( icnt, ntids ) /= mytid ) THEN
ijkb0 = ijkb0 + nh(np)
CYCLE
END IF
#endif
!
ijh = 1
!
DO ih = 1, nh(np)
!
ikb = ijkb0 + ih
!
IF (noncolin) THEN
!
DO is=1,npol
!
DO js=1,npol
becsum_nc(ijh,na,is,js) = &
becsum_nc(ijh,na,is,js)+w1 * &
CONJG(becp_nc(ikb,is,ibnd)) * &
becp_nc(ikb,js,ibnd)
END DO
!
END DO
!
ELSE
!
becsum(ijh,na,current_spin) = &
becsum(ijh,na,current_spin) + &
w1 * DBLE( CONJG( becp(ikb,ibnd) ) * &
becp(ikb,ibnd) )
!
END IF
!
ijh = ijh + 1
!
DO jh = ( ih + 1 ), nh(np)
!
jkb = ijkb0 + jh
!
IF (noncolin) THEN
!
DO is=1,npol
!
DO js=1,npol
becsum_nc(ijh,na,is,js) = &
becsum_nc(ijh,na,is,js) + w1 * &
CONJG(becp_nc(ikb,is,ibnd)) * &
becp_nc(jkb,js,ibnd)
END DO
!
END DO
!
ELSE
!
becsum(ijh,na,current_spin) = &
becsum(ijh,na,current_spin) + w1 * 2.D0 * &
DBLE( CONJG( becp(ikb,ibnd) ) * &
becp(jkb,ibnd) )
ENDIF
!
ijh = ijh + 1
!
END DO
!
END DO
!
ijkb0 = ijkb0 + nh(np)
!
END IF
!
END DO
!
ELSE
!
DO na = 1, nat
!
IF ( ityp(na) == np ) ijkb0 = ijkb0 + nh(np)
!
END DO
!
END IF
!
!$omp barrier
!
END DO
!
END DO
!
!$omp end parallel
!
CALL stop_clock( 'sum_band:becsum' )
!
END DO k_loop
IF( use_tg ) THEN
DEALLOCATE( tg_psi )
DEALLOCATE( tg_rho )
END IF
IF (noncolin.and.okvan) THEN
DO np = 1, ntyp
IF ( upf(np)%tvanp ) THEN
DO na = 1, nat
IF (ityp(na)==np) THEN
IF (upf(np)%has_so) THEN
CALL transform_becsum_so(becsum_nc,becsum,na)
ELSE
CALL transform_becsum_nc(becsum_nc,becsum,na)
END IF
END IF
END DO
END IF
END DO
END IF
!
IF ( ALLOCATED (becsum_nc) ) DEALLOCATE( becsum_nc )
!
RETURN
!
END SUBROUTINE sum_band_k
!
END SUBROUTINE sum_band