mirror of https://gitlab.com/QEF/q-e.git
708 lines
22 KiB
Fortran
708 lines
22 KiB
Fortran
!
|
|
! Copyright (C) 2002-2005 FPMD-CPV groups
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
!
|
|
#include "f_defs.h"
|
|
|
|
|
|
SUBROUTINE dforce1_x( co, ce, dco, dce, fio, fie, hg, v, psi_stored )
|
|
|
|
USE kinds, ONLY: DP
|
|
USE fft_base, ONLY: dffts
|
|
USE gvecw, ONLY: ngw
|
|
USE cp_interfaces, ONLY: fwfft, invfft
|
|
USE cell_base, ONLY: tpiba2
|
|
|
|
IMPLICIT NONE
|
|
|
|
! ... declare subroutine arguments
|
|
COMPLEX(DP), INTENT(OUT) :: dco(:), dce(:)
|
|
COMPLEX(DP), INTENT(IN) :: co(:), ce(:)
|
|
REAL(DP), INTENT(IN) :: fio, fie
|
|
REAL(DP), INTENT(IN) :: v(:)
|
|
REAL(DP), INTENT(IN) :: hg(:)
|
|
COMPLEX(DP), OPTIONAL :: psi_stored(:)
|
|
|
|
! ... declare other variables
|
|
!
|
|
COMPLEX(DP), ALLOCATABLE :: psi(:)
|
|
COMPLEX(DP) :: fp, fm, aro, are
|
|
REAL(DP) :: fioby2, fieby2, arg
|
|
INTEGER :: ig
|
|
|
|
! end of declarations
|
|
|
|
ALLOCATE( psi( SIZE(v) ) )
|
|
|
|
IF( PRESENT( psi_stored ) ) THEN
|
|
psi = psi_stored * CMPLX(v, 0.0d0)
|
|
ELSE
|
|
CALL c2psi( psi, dffts%nnr, co, ce, ngw, 2 )
|
|
CALL invfft( 'Wave', psi, dffts%nr1, dffts%nr2, dffts%nr3, dffts%nr1x, dffts%nr2x, dffts%nr3x )
|
|
psi = psi * CMPLX(v, 0.0d0)
|
|
END IF
|
|
|
|
CALL fwfft( 'Wave', psi, dffts%nr1, dffts%nr2, dffts%nr3, dffts%nr1x, dffts%nr2x, dffts%nr3x )
|
|
CALL psi2c( psi, dffts%nnr, dco, dce, ngw, 2 )
|
|
|
|
DEALLOCATE(psi)
|
|
|
|
fioby2 = fio * 0.5
|
|
fieby2 = fie * 0.5
|
|
|
|
DO ig = 1, SIZE(co)
|
|
fp = dco(ig) + dce(ig)
|
|
fm = dco(ig) - dce(ig)
|
|
aro = CMPLX( DBLE(fp), AIMAG(fm) )
|
|
are = CMPLX( AIMAG(fp), -DBLE(fm))
|
|
arg = tpiba2 * hg(ig)
|
|
dco(ig) = -fioby2 * (arg * co(ig) + aro)
|
|
dce(ig) = -fieby2 * (arg * ce(ig) + are)
|
|
END DO
|
|
|
|
RETURN
|
|
END SUBROUTINE dforce1_x
|
|
|
|
|
|
!=----------------------------------------------------------------------------=!
|
|
|
|
|
|
SUBROUTINE dforce2_x( fio, fie, df, da, vkb, beco, bece )
|
|
|
|
! this routine computes:
|
|
! the generalized force df=CMPLX(dfr,dfi) acting on the i-th
|
|
! electron state at the ik-th point of the Brillouin zone
|
|
! represented by the vector c=CMPLX(cr,ci)
|
|
! ----------------------------------------------
|
|
|
|
USE kinds, ONLY: DP
|
|
USE ions_base, ONLY: na
|
|
USE read_pseudo_module_fpmd, ONLY: nspnl
|
|
USE uspp_param, ONLY: nh
|
|
USE uspp, ONLY: nhtol, nhtolm, indv, beta, dvan, nkb
|
|
use cvan, ONLY: ish
|
|
!
|
|
|
|
IMPLICIT NONE
|
|
|
|
! ... declare subroutine arguments
|
|
COMPLEX(DP), INTENT(IN) :: vkb(:,:)
|
|
REAL(DP), INTENT(IN) :: fio, fie
|
|
COMPLEX(DP) :: df(:), da(:)
|
|
REAL(DP), INTENT(IN) :: beco(:)
|
|
REAL(DP), INTENT(IN) :: bece(:)
|
|
|
|
! ... declare other variables
|
|
REAL(DP) :: to, te
|
|
INTEGER :: l, is, ig, ngw, iv, inl, isa
|
|
|
|
! ----------------------------------------------
|
|
|
|
ngw = SIZE(df)
|
|
|
|
isa = 1
|
|
|
|
DO is = 1, nspnl
|
|
!
|
|
DO iv = 1, nh( is )
|
|
!
|
|
inl = ish(is) + (iv-1) * na(is) + 1
|
|
|
|
to = - fio * dvan( iv, iv, is )
|
|
!
|
|
te = - fie * dvan( iv, iv, is )
|
|
|
|
CALL DGEMV('N', 2*ngw, na(is), to, vkb( 1, inl ), &
|
|
2*SIZE(vkb,1), beco( inl ), 1, 1.0d0, df, 1)
|
|
!
|
|
CALL DGEMV('N', 2*ngw, na(is), te, vkb( 1, inl ), &
|
|
2*SIZE(vkb,1), bece( inl ), 1, 1.0d0, da, 1)
|
|
!
|
|
END DO
|
|
!
|
|
isa = isa + na( is )
|
|
!
|
|
END DO
|
|
!
|
|
|
|
RETURN
|
|
END SUBROUTINE dforce2_x
|
|
|
|
|
|
|
|
!=----------------------------------------------------------------------------=!
|
|
|
|
|
|
|
|
SUBROUTINE dforce_fpmd( ib, c, f, df, da, v, vkb, bec, n, noffset )
|
|
!
|
|
USE kinds, ONLY: DP
|
|
USE reciprocal_vectors, ONLY: ggp, g, gx
|
|
USE cp_interfaces
|
|
!
|
|
IMPLICIT NONE
|
|
!
|
|
INTEGER, INTENT(IN) :: ib ! band index
|
|
COMPLEX(DP), INTENT(IN) :: c(:,:)
|
|
COMPLEX(DP), INTENT(OUT) :: df(:), da(:)
|
|
REAL (DP), INTENT(IN) :: v(:), bec(:,:), f(:)
|
|
COMPLEX(DP), INTENT(IN) :: vkb(:,:)
|
|
INTEGER, INTENT(IN) :: n, noffset ! number of bands, and band index offset
|
|
!
|
|
COMPLEX(DP), ALLOCATABLE :: dum( : )
|
|
!
|
|
INTEGER :: in
|
|
!
|
|
IF( ib > n ) CALL errore( ' dforce ', ' ib out of range ', 1 )
|
|
!
|
|
in = noffset + ib - 1
|
|
!
|
|
IF( ib == n ) THEN
|
|
!
|
|
ALLOCATE( dum( SIZE( df ) ) )
|
|
!
|
|
CALL dforce1( c( :, in ), c( :, in ), df, dum, f( in ), f( in ), ggp, v )
|
|
!
|
|
CALL dforce2( f( in ), f( in ), df , dum , vkb, bec( :, in ), bec( :, in ) )
|
|
!
|
|
DEALLOCATE( dum )
|
|
!
|
|
ELSE
|
|
!
|
|
CALL dforce1( c( :, in ), c( :, in+1 ), df, da, f( in ), f(in+1), ggp, v )
|
|
!
|
|
CALL dforce2( f(in), f(in+1), df, da, vkb, bec( :, in ), bec( :, in+1 ) )
|
|
!
|
|
END IF
|
|
!
|
|
RETURN
|
|
END SUBROUTINE dforce_fpmd
|
|
|
|
|
|
! ----------------------------------------------
|
|
|
|
|
|
SUBROUTINE dforce_all( c, f, cgrad, vpot, vkb, bec, n, noffset )
|
|
!
|
|
USE kinds, ONLY: DP
|
|
USE cp_interfaces
|
|
|
|
IMPLICIT NONE
|
|
|
|
COMPLEX(DP), INTENT(INOUT) :: c(:,:)
|
|
REAL(DP), INTENT(IN) :: vpot(:), f(:)
|
|
COMPLEX(DP), INTENT(OUT) :: cgrad(:,:)
|
|
COMPLEX(DP), INTENT(IN) :: vkb(:,:)
|
|
REAL(DP), INTENT(IN) :: bec(:,:)
|
|
INTEGER, INTENT(IN) :: n, noffset
|
|
|
|
INTEGER :: ib, in
|
|
!
|
|
IF( n > 0 ) THEN
|
|
!
|
|
! Process two states at the same time
|
|
!
|
|
DO ib = 1, n-1, 2
|
|
!
|
|
in = ib + noffset - 1
|
|
!
|
|
CALL dforce( ib, c, f, cgrad(:,in), cgrad(:,in+1), vpot, vkb, bec, n, noffset )
|
|
!
|
|
END DO
|
|
!
|
|
! and now process the last state in case that n is odd
|
|
!
|
|
IF( MOD( n, 2 ) /= 0 ) THEN
|
|
!
|
|
in = n + noffset - 1
|
|
!
|
|
CALL dforce( n, c, f, cgrad(:,in), cgrad(:,in), vpot, vkb, bec, n, noffset )
|
|
!
|
|
END IF
|
|
!
|
|
END IF
|
|
!
|
|
RETURN
|
|
END SUBROUTINE dforce_all
|
|
|
|
|
|
|
|
!
|
|
!-------------------------------------------------------------------------
|
|
SUBROUTINE dforce ( bec, betae, i, c, ca, df, da, v, ispin, f, n, nspin )
|
|
!-----------------------------------------------------------------------
|
|
!computes: the generalized force df=CMPLX(dfr,dfi) acting on the i-th
|
|
! electron state at the gamma point of the brillouin zone
|
|
! represented by the vector c=CMPLX(cr,ci)
|
|
!
|
|
! d_n(g) = f_n { 0.5 g^2 c_n(g) + [vc_n](g) +
|
|
! sum_i,ij d^q_i,ij (-i)**l beta_i,i(g)
|
|
! e^-ig.r_i < beta_i,j | c_n >}
|
|
USE kinds, ONLY: dp
|
|
USE control_flags, ONLY: iprint
|
|
USE gvecs
|
|
USE gvecw, ONLY: ngw
|
|
USE cvan, ONLY: ish
|
|
USE uspp, ONLY: nhsa=>nkb, dvan, deeq
|
|
USE uspp_param, ONLY: nhm, nh
|
|
USE smooth_grid_dimensions, ONLY: nr1s, nr2s, nr3s, &
|
|
nr1sx, nr2sx, nr3sx, nnrsx
|
|
USE constants, ONLY: pi, fpi
|
|
USE ions_base, ONLY: nsp, na, nat
|
|
USE gvecw, ONLY: ggp
|
|
USE cell_base, ONLY: tpiba2
|
|
USE ensemble_dft, ONLY: tens
|
|
USE funct, ONLY: dft_is_meta
|
|
USE cp_interfaces, ONLY: fwfft, invfft
|
|
!
|
|
IMPLICIT NONE
|
|
!
|
|
INTEGER, INTENT(IN) :: i
|
|
INTEGER, INTENT(IN) :: n, nspin
|
|
INTEGER, INTENT(IN) :: ispin( n )
|
|
REAL(DP), INTENT(IN) :: f( n )
|
|
!
|
|
COMPLEX(DP) betae(ngw,nhsa), c(ngw), ca(ngw), df(ngw), da(ngw)
|
|
REAL(DP) bec(nhsa,n), v(nnrsx,nspin)
|
|
!
|
|
! local variables
|
|
!
|
|
INTEGER iv, jv, ia, is, isa, ism, ios, iss1, iss2, ir, ig, inl, jnl
|
|
REAL(DP) fi, fip, dd
|
|
COMPLEX(DP) fp,fm,ci
|
|
REAL(DP) af(nhsa), aa(nhsa) ! automatic arrays
|
|
COMPLEX(DP), ALLOCATABLE :: dtemp(:)
|
|
COMPLEX(DP), ALLOCATABLE :: psi(:)
|
|
!
|
|
!
|
|
CALL start_clock( 'dforce' )
|
|
!
|
|
ALLOCATE( psi( nnrsx ) )
|
|
ALLOCATE( dtemp( ngw ) )
|
|
!
|
|
! important: if n is odd => c(*,n+1)=0.
|
|
!
|
|
IF (MOD(n,2).NE.0.AND.i.EQ.n) THEN
|
|
DO ig=1,ngw
|
|
ca(ig)=(0.,0.)
|
|
END DO
|
|
ENDIF
|
|
!
|
|
ci=(0.0,1.0)
|
|
!
|
|
psi (:) = (0.d0, 0.d0)
|
|
DO ig=1,ngw
|
|
psi(nms(ig))=CONJG(c(ig)-ci*ca(ig))
|
|
psi(nps(ig))=c(ig)+ci*ca(ig)
|
|
END DO
|
|
|
|
CALL invfft('Wave',psi,nr1s,nr2s,nr3s,nr1sx,nr2sx,nr3sx)
|
|
!
|
|
iss1=ispin(i)
|
|
!
|
|
! the following avoids a potential out-of-bounds error
|
|
!
|
|
IF (i.NE.n) THEN
|
|
iss2=ispin(i+1)
|
|
ELSE
|
|
iss2=iss1
|
|
END IF
|
|
!
|
|
DO ir=1,nnrsx
|
|
psi(ir)=CMPLX(v(ir,iss1)* DBLE(psi(ir)), v(ir,iss2)*AIMAG(psi(ir)) )
|
|
END DO
|
|
!
|
|
CALL fwfft('Wave',psi,nr1s,nr2s,nr3s,nr1sx,nr2sx,nr3sx)
|
|
!
|
|
! note : the factor 0.5 appears
|
|
! in the kinetic energy because it is defined as 0.5*g**2
|
|
! in the potential part because of the logics
|
|
!
|
|
|
|
IF (tens) THEN
|
|
fi =-0.5
|
|
fip=-0.5
|
|
ELSE
|
|
fi =- f(i)*0.5
|
|
fip=-f(i+1)*0.5
|
|
END IF
|
|
|
|
DO ig=1,ngw
|
|
fp= psi(nps(ig)) + psi(nms(ig))
|
|
fm= psi(nps(ig)) - psi(nms(ig))
|
|
df(ig)= fi*(tpiba2*ggp(ig)* c(ig)+CMPLX(DBLE(fp), AIMAG(fm)))
|
|
da(ig)=fip*(tpiba2*ggp(ig)*ca(ig)+CMPLX(AIMAG(fp),-DBLE(fm)))
|
|
END DO
|
|
|
|
IF(dft_is_meta()) CALL dforce_meta(c,ca,df,da,psi,iss1,iss2,fi,fip) !METAGGA
|
|
!
|
|
! aa_i,i,n = sum_j d_i,ij <beta_i,j|c_n>
|
|
!
|
|
IF(nhsa.GT.0)THEN
|
|
DO inl=1,nhsa
|
|
af(inl)=0.
|
|
aa(inl)=0.
|
|
END DO
|
|
!
|
|
DO is=1,nsp
|
|
DO iv=1,nh(is)
|
|
DO jv=1,nh(is)
|
|
isa=0
|
|
DO ism=1,is-1
|
|
isa=isa+na(ism)
|
|
END DO
|
|
DO ia=1,na(is)
|
|
inl=ish(is)+(iv-1)*na(is)+ia
|
|
jnl=ish(is)+(jv-1)*na(is)+ia
|
|
isa=isa+1
|
|
dd = deeq(iv,jv,isa,iss1)+dvan(iv,jv,is)
|
|
IF(tens) THEN
|
|
af(inl)=af(inl)-dd*bec(jnl, i)
|
|
ELSE
|
|
af(inl)=af(inl)- f(i)*dd*bec(jnl, i)
|
|
END IF
|
|
dd = deeq(iv,jv,isa,iss2)+dvan(iv,jv,is)
|
|
IF(tens) THEN
|
|
IF (i.NE.n) aa(inl)=aa(inl)-dd*bec(jnl,i+1)
|
|
ELSE
|
|
IF (i.NE.n) aa(inl)=aa(inl)-f(i+1)*dd*bec(jnl,i+1)
|
|
END IF
|
|
END DO
|
|
END DO
|
|
END DO
|
|
END DO
|
|
!
|
|
dtemp = 0.0d0
|
|
CALL MXMA &
|
|
& (betae,1,2*ngw,af,1,nhsa,dtemp,1,2*ngw,2*ngw,nhsa,1)
|
|
DO ig=1,ngw
|
|
df(ig)=df(ig)+dtemp(ig)
|
|
END DO
|
|
!
|
|
dtemp = 0.0d0
|
|
CALL MXMA &
|
|
& (betae,1,2*ngw,aa,1,nhsa,dtemp,1,2*ngw,2*ngw,nhsa,1)
|
|
DO ig=1,ngw
|
|
da(ig)=da(ig)+dtemp(ig)
|
|
END DO
|
|
ENDIF
|
|
|
|
DEALLOCATE( dtemp )
|
|
DEALLOCATE( psi )
|
|
!
|
|
CALL stop_clock( 'dforce' )
|
|
!
|
|
RETURN
|
|
END SUBROUTINE dforce
|
|
!
|
|
!=========================================================================
|
|
!C. Bekas, IBM Research Zurich.
|
|
! dforce with Task Groups parallelization
|
|
!=========================================================================
|
|
!-------------------------------------------------------------------------
|
|
subroutine dforce_bgl (bec,betae,i,c,ca,df,da,v)
|
|
!-----------------------------------------------------------------------
|
|
!computes: the generalized force df=CMPLX(dfr,dfi) acting on the i-th
|
|
! electron state at the gamma point of the brillouin zone
|
|
! represented by the vector c=CMPLX(cr,ci)
|
|
!
|
|
! d_n(g) = f_n { 0.5 g^2 c_n(g) + [vc_n](g) +
|
|
! sum_i,ij d^q_i,ij (-i)**l beta_i,i(g)
|
|
! e^-ig.r_i < beta_i,j | c_n >}
|
|
use kinds, only: dp
|
|
use control_flags, only: iprint
|
|
use gvecs
|
|
use gvecw, only: ngw
|
|
use cvan, only: ish
|
|
use uspp, only: nhsa=>nkb, dvan, deeq
|
|
use uspp_param, only: nhm, nh
|
|
use smooth_grid_dimensions, only: nr1s, nr2s, nr3s, &
|
|
nr1sx, nr2sx, nr3sx, nnrsx
|
|
use electrons_base, only: n => nbsp, ispin, f, nspin
|
|
use constants, only: pi, fpi
|
|
use ions_base, only: nsp, na, nat
|
|
use gvecw, only: ggp
|
|
use cell_base, only: tpiba2
|
|
use ensemble_dft, only: tens
|
|
use funct, only: dft_is_meta
|
|
USE task_groups
|
|
use fft_base, only : dffts
|
|
use mp_global, only : nogrp, me_image, me_ogrp
|
|
USE cp_interfaces, ONLY: fwfft, invfft
|
|
use parallel_include
|
|
!
|
|
implicit none
|
|
!
|
|
!--------
|
|
!C. Bekas
|
|
! c and ca hold the coefficients for the input eigenvalues
|
|
! originaly they are vectors of length ngw
|
|
! In the task-groups version they are matrices with
|
|
! ngw rows and NOGRP columns
|
|
!-----------------------------------------------------------
|
|
|
|
!--------
|
|
!C. Bekas
|
|
!--------
|
|
!Observe the increased sizes for Task Groups
|
|
!C. Bekas: Increased size for matrix v
|
|
complex(DP) :: betae(ngw,nhsa), c(ngw,2*NOGRP), ca(ngw,2*NOGRP), df(ngw*(NOGRP+1)), da(ngw*(NOGRP+1))
|
|
real(DP) :: bec(nhsa,n), v( ( NOGRP + 1 ) * nr1sx * nr2sx * nr3sx, nspin )
|
|
integer i
|
|
! local variables
|
|
integer iv, jv, ia, is, isa, ism, ios, iss1, iss2, ir, ig, inl, jnl
|
|
real(DP) fi, fip, dd
|
|
complex(DP) fp,fm,ci
|
|
real(DP), ALLOCATABLE :: af(:,:), aa(:,:)
|
|
! C. Bekas: increased size for automatic arrays
|
|
complex(DP), ALLOCATABLE :: dtemp(:,:)
|
|
complex(DP), ALLOCATABLE :: psi(:)
|
|
COMPLEX(DP), ALLOCATABLE :: temp_psi( : )
|
|
|
|
!--------
|
|
!C. Bekas
|
|
!--------
|
|
INTEGER :: eig_index, index, index_df_da, ierr
|
|
INTEGER, DIMENSION(NOGRP) :: local_send_cnt, local_send_displ, local_recv_cnt, local_recv_displ
|
|
!
|
|
call start_clock( 'dforce' )
|
|
!
|
|
#ifdef __BGL
|
|
|
|
ALLOCATE( psi( strd * ( NOGRP+1 ) ))
|
|
ALLOCATE( temp_psi( 2 * (NOGRP+1) * dffts%nsw(1) * nr3sx ) )
|
|
ALLOCATE( af( nhsa, NOGRP ), aa( nhsa, NOGRP ), dtemp( ngw, NOGRP ) )
|
|
!
|
|
! important: if n is odd => c(*,n+1)=0.
|
|
!
|
|
if ( MOD(n,2) .ne. 0 .and. i .eq. n ) then
|
|
do ig = 1, ngw
|
|
ca(ig,:) = 0.0d0
|
|
end do
|
|
end if
|
|
!
|
|
ci = ( 0.0d0, 1.0d0 )
|
|
!
|
|
|
|
psi(:) = (0D0,0D0)
|
|
index = 1
|
|
eig_offset = 0
|
|
do eig_index = 1, 2*NOGRP, 2! Outer loop for eigenvalues
|
|
!The eig_index loop is executed only ONCE when NOGRP=1.
|
|
!Equivalent to the case with no task-groups
|
|
!dfft%nsw(me) holds the number of z-sticks for the current processor per wave-function
|
|
!We can either send these in the group with an mpi_allgather...or put the
|
|
!in the PSIS vector (in special positions) and send them with them.
|
|
!Otherwise we can do this once at the beginning, before the loop.
|
|
!we choose to do the latter one.
|
|
|
|
!---------------------------------------------
|
|
!strd is defined earlier in the rhoofr routine
|
|
!---------------------------------------------
|
|
|
|
do ig=1,ngw
|
|
psi(nms(ig)+eig_offset*strd)=conjg( c(ig,index) - ci*ca(ig,index) )
|
|
psi(nps(ig)+eig_offset*strd)=c(ig,index)+ci*ca(ig,index)
|
|
end do
|
|
eig_offset = eig_offset + 1
|
|
index = index + 2
|
|
end do
|
|
|
|
CALL invfft('Wave',psi,nr1s,nr2s,nr3s,nr1sx,nr2sx,nr3sx)
|
|
|
|
! task group is managed inside the fft driver
|
|
|
|
!
|
|
!==================================================================
|
|
!C. Bekas
|
|
!This logic is altered in the TG case, see below
|
|
!------------------------------------------------------------------
|
|
iss1=ispin(i)
|
|
|
|
!
|
|
if (i.ne.n) then
|
|
iss2=ispin(i+1)
|
|
else
|
|
iss2=iss1
|
|
end if
|
|
!==================================================================
|
|
|
|
|
|
!------------------------------------------------------------------
|
|
!Each wave function is multiplied term - to - term by the local
|
|
!potential, which is always the same for all eigenvalues
|
|
!The length of psi is so that it holds all parts of it in the
|
|
!plane-wave group
|
|
!------------------------------------------------------------------
|
|
do ir=1, nr1sx*nr2sx*tmp_npp(me_image+1)
|
|
psi(ir)=cmplx(v(ir,iss1)* DBLE(psi(ir)), &
|
|
& v(ir,iss2)*AIMAG(psi(ir)) )
|
|
end do
|
|
!
|
|
|
|
!-----------------------------------------------
|
|
!CALL TASK GROUP PARALLEL FORWARD FFT
|
|
!Note that the wavefunctions are already
|
|
!distributed according to the TASK-GROUPS
|
|
!scheme
|
|
!-----------------------------------------------
|
|
|
|
CALL fwfft('Wave',psi,nr1s,nr2s,nr3s,nr1sx,nr2sx,nr3sx)
|
|
|
|
|
|
!-------------------------------------------------
|
|
!Bring pencils back to their original distribution
|
|
!-------------------------------------------------
|
|
local_send_cnt(1) = nr3sx*dffts%nsw(NOLIST(1)+1)
|
|
local_send_displ(1) = 0
|
|
local_recv_cnt(1) = nr3sx*dffts%nsw(me_image+1)
|
|
local_recv_displ(1) = 0
|
|
DO index=2, NOGRP
|
|
local_send_cnt(index) = nr3sx*dffts%nsw(NOLIST(index)+1)
|
|
local_send_displ(index) = local_send_displ(index-1) + local_send_cnt(index-1)
|
|
|
|
local_recv_cnt(index) = nr3sx*dffts%nsw(me_image+1)
|
|
local_recv_displ(index) = local_recv_displ(index-1) + local_recv_cnt(index-1)
|
|
ENDDO
|
|
|
|
CALL start_clock('DFORCE_ALL')
|
|
#if defined __MPI
|
|
CALL MPI_Alltoallv(psi, &
|
|
local_send_cnt, local_send_displ, MPI_DOUBLE_COMPLEX, temp_psi, &
|
|
local_recv_cnt, local_recv_displ, MPI_DOUBLE_COMPLEX, ME_OGRP, IERR)
|
|
#endif
|
|
CALL stop_clock('DFORCE_ALL')
|
|
|
|
|
|
!
|
|
! note : the factor 0.5 appears
|
|
! in the kinetic energy because it is defined as 0.5*g**2
|
|
! in the potential part because of the logics
|
|
!
|
|
|
|
|
|
!--------------------------------------------------------------
|
|
!Each processor will treat its own part of the eigenstate
|
|
!assigned to its ORBITAL group
|
|
!--------------------------------------------------------------
|
|
eig_offset = 0
|
|
index_df_da = 1
|
|
DO index = 1, 2*NOGRP, 2
|
|
do ig=1,ngw
|
|
if (tens) then
|
|
fi = -0.5
|
|
fip = -0.5
|
|
else
|
|
fi = -0.5*f(i+index-1)
|
|
fip = -0.5*f(i+index)
|
|
endif
|
|
fp= temp_psi(nps(ig)+eig_offset) + temp_psi(nms(ig)+eig_offset)
|
|
fm= temp_psi(nps(ig)+eig_offset) - temp_psi(nms(ig)+eig_offset)
|
|
df(index_df_da)= fi*(tpiba2 * ggp(ig) * c(ig,index)+cmplx(real(fp), aimag(fm)))
|
|
da(index_df_da)= fip*(tpiba2 * ggp(ig) * ca(ig,index)+cmplx(aimag(fp),-real(fm)))
|
|
index_df_da = index_df_da + 1
|
|
enddo
|
|
eig_offset = eig_offset + nr3sx * dffts%nsw(me_image+1)
|
|
!We take into account the number of elements received from other members of the orbital group
|
|
ENDDO
|
|
|
|
!--------------------------------------------------------------------------------------------
|
|
!C. Bekas: I am not sure whether this is implemented correctly...need to check this carefully
|
|
if(dft_is_meta()) call dforce_meta(c,ca,df,da,psi,iss1,iss2,fi,fip) !METAGGA
|
|
!--------------------------------------------------------------------------------------------
|
|
!
|
|
! aa_i,i,n = sum_j d_i,ij <beta_i,j|c_n>
|
|
!
|
|
IF( nhsa > 0 ) THEN
|
|
|
|
do inl=1,nhsa
|
|
af(inl,:)=0.0d0
|
|
aa(inl,:)=0.0d0
|
|
end do
|
|
!
|
|
do is=1,nsp
|
|
do iv=1,nh(is)
|
|
do jv=1,nh(is)
|
|
isa=0
|
|
do ism=1,is-1
|
|
isa=isa+na(ism)
|
|
end do
|
|
do ia=1,na(is)
|
|
inl=ish(is)+(iv-1)*na(is)+ia
|
|
jnl=ish(is)+(jv-1)*na(is)+ia
|
|
isa=isa+1
|
|
dd = deeq(iv,jv,isa,iss1)+dvan(iv,jv,is)
|
|
|
|
!-------------------------------------------------
|
|
!C. Bekas
|
|
!Work on all currently treated (NOGRP) eigenvalues
|
|
!-------------------------------------------------
|
|
ig = 1
|
|
DO index = 1, 2*NOGRP, 2
|
|
if (tens) then
|
|
af(inl,ig) = af(inl,ig) - dd*bec(jnl, i+index-1 )
|
|
else
|
|
af(inl,ig) = af(inl,ig) - f(i+index-1)*dd*bec(jnl, i+index-1 )
|
|
endif
|
|
|
|
dd = deeq(iv,jv,isa,iss2)+dvan(iv,jv,is)
|
|
|
|
if (tens) then
|
|
if ((i+index-1).ne.n) aa(inl,ig) = aa(inl,ig) - dd*bec(jnl,i+index)
|
|
else
|
|
if ((i+index-1).ne.n) aa(inl,ig) = aa(inl,ig) - f(i+index)*dd*bec(jnl,i+index)
|
|
endif
|
|
ig = ig + 1
|
|
ENDDO
|
|
end do
|
|
end do
|
|
end do
|
|
end do
|
|
!
|
|
dtemp(:,:) = 0.0d0
|
|
|
|
call MXMA (betae, 1, 2*ngw, af, 1, nhsa, dtemp, 1, 2*ngw, 2*ngw, nhsa, NOGRP)
|
|
|
|
! DO index = 1, NOGRP
|
|
! DO ig = 1+(index-1)*ngw, index*ngw
|
|
! df(ig) = df(ig) + dtemp(ig,index)
|
|
! END DO
|
|
! ENDDO
|
|
|
|
DO ig = 1, NOGRP
|
|
df(1+(ig-1)*ngw:ig*ngw) = df(1+(ig-1)*ngw:ig*ngw) + dtemp(:,ig)
|
|
ENDDO
|
|
|
|
|
|
dtemp(:,:) = 0.0d0
|
|
|
|
call MXMA (betae, 1, 2*ngw, aa, 1, nhsa, dtemp, 1, 2*ngw, 2*ngw, nhsa, NOGRP)
|
|
|
|
! DO index = 1, NOGRP
|
|
! DO ig = 1+(index-1)*ngw, index*ngw
|
|
! da(ig) = da(ig) + dtemp(ig,index)
|
|
! ENDDO
|
|
! ENDDO
|
|
|
|
DO ig = 1, NOGRP
|
|
da(1+(ig-1)*ngw:ig*ngw) = da(1+(ig-1)*ngw:ig*ngw) + dtemp(:,ig)
|
|
ENDDO
|
|
|
|
END IF
|
|
|
|
|
|
DEALLOCATE( psi )
|
|
DEALLOCATE( temp_psi )
|
|
DEALLOCATE( af, aa, dtemp )
|
|
|
|
#endif
|
|
!
|
|
call stop_clock( 'dforce' )
|
|
!
|
|
return
|
|
end subroutine dforce_bgl
|
|
!
|