mirror of https://gitlab.com/QEF/q-e.git
1751 lines
66 KiB
Fortran
1751 lines
66 KiB
Fortran
!
|
|
! Copyright (C) 2001-2013 Quantum ESPRESSO group
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
!
|
|
!
|
|
|
|
!routines for rapid evaluation of Fock operators
|
|
|
|
MODULE exchange_custom
|
|
|
|
USE kinds, ONLY: DP
|
|
USE fft_custom_gwl
|
|
|
|
IMPLICIT NONE
|
|
|
|
TYPE exchange_cus
|
|
!data structure for general Fock operator
|
|
REAL(kind=DP), DIMENSION(:,:,:), POINTER :: wfc!valence wfcs in real space
|
|
REAL(kind=DP) :: dual!dual for defining the grid on real space
|
|
REAL(kind=DP) :: cutoff!G space cutoff in Ryberg
|
|
REAL(kind=DP) :: truncation_radius!for Coulomb interaction in Bohr
|
|
REAL(kind=DP), DIMENSION(:), POINTER :: fac!factors on G space of Coulomb interaction
|
|
INTEGER :: nbndv(2)!number of valence states
|
|
TYPE(fft_cus) :: fft_g2r!from wfcs to real space
|
|
TYPE(fft_cus) :: fft_r2g!from real space to restricted G space
|
|
TYPE(fft_cus) :: fft_small!for periodic calculations
|
|
!the following for small
|
|
REAL(kind=DP), DIMENSION(:), POINTER :: fac_s
|
|
INTEGER, DIMENSION(:,:,:), POINTER :: r2s_xy
|
|
INTEGER, DIMENSION(:,:,:), POINTER :: s2r_xy
|
|
INTEGER :: n(3),m(3)!I have the relation n*edge=diameter*m
|
|
LOGICAL :: l_localized!if true consider valence wfcs as localized
|
|
REAL(kind=DP) :: thrs_loc!threshold for localized valence wfcs
|
|
INTEGER, DIMENSION (:,:), POINTER :: n_loc!number of points above thrs
|
|
INTEGER, DIMENSION (:,:,:), POINTER :: tab_loc!table for points above threshold
|
|
INTEGER :: nspin!number of spin channels
|
|
REAL(kind=DP), DIMENSION(:,:,:,:,:),POINTER :: wfc_red!valence wfcs in real space ready for the reduced grid
|
|
END TYPE exchange_cus
|
|
|
|
SAVE
|
|
|
|
!to be use in pw.x
|
|
|
|
LOGICAL :: l_exchange_fast=.false.
|
|
REAL(kind=DP) :: exchange_fast_dual=2.d0
|
|
REAL(kind=DP) :: exchange_fast_cutoff=40.d0
|
|
REAL(kind=DP) :: exchange_fast_radius=10.d0
|
|
INTEGER :: exchange_fast_nbndv(2)
|
|
LOGICAL :: l_exchange_turbo=.false.
|
|
INTEGER :: exchange_m(3)
|
|
INTEGER :: exchange_n(3)
|
|
LOGICAL :: l_exchange_localized=.false.
|
|
REAL(kind=DP) :: exchange_thrs_loc
|
|
CONTAINS
|
|
|
|
|
|
SUBROUTINE periodic_fock_cus(ispin,psi,xpsi,exx_cus)
|
|
!apply Fock operator to a wavefunction
|
|
!experimental version work just with factor 1/2
|
|
|
|
USE io_global, ONLY : stdout, ionode,ionode_id
|
|
USE mp_pools, ONLY : me_pool,intra_pool_comm
|
|
USE cell_base, ONLY: at, alat, tpiba, omega, tpiba2,bg
|
|
USE constants, ONLY : e2, pi, tpi, fpi, RYTOEV
|
|
USE wavefunctions, ONLY : psic
|
|
USE mp, ONLY : mp_sum, mp_max
|
|
USE mp_world, ONLY : nproc
|
|
USE wvfct, ONLY : npwx, npw, wg
|
|
USE gvect
|
|
USE mp_wave, ONLY : mergewf,splitwf
|
|
|
|
implicit none
|
|
|
|
INTEGER, INTENT(in) :: ispin! spin channel
|
|
COMPLEX(kind=DP), INTENT(in) :: psi(npw)
|
|
COMPLEX(kind=DP), INTENT(inout) :: xpsi(npw)
|
|
|
|
TYPE(exchange_cus), INTENT(in) :: exx_cus
|
|
|
|
INTEGER, ALLOCATABLE :: r2s_xy(:) !real to small XY index
|
|
|
|
|
|
INTEGER :: i,j,k,n,ii,jj,kk,ig,iv,jv
|
|
INTEGER :: ix,iy,iz, ix_s,iy_s,iz_s,iz_eff
|
|
INTEGER :: rz_start,rz_end,iqq,iqq_s,rz_start_s,rz_end_s
|
|
REAL(kind=DP), ALLOCATABLE :: fac(:),prodr(:),prods(:,:),planes(:),vexc(:)
|
|
REAL(kind=DP) :: qq_fact, sca
|
|
INTEGER :: iorig, idest
|
|
INTEGER,ALLOCATABLE :: z2proc_s(:),z2proc(:)
|
|
INTEGER :: req, ierr
|
|
#if defined(__MPI)
|
|
INTEGER :: istatus(MPI_STATUS_SIZE)
|
|
#endif
|
|
COMPLEX(kind=DP), ALLOCATABLE :: prods_g(:,:)
|
|
COMPLEX(kind=DP), ALLOCATABLE :: psi_t(:),evc_g(:),vexc_g(:)
|
|
REAL(kind=DP), ALLOCATABLE :: psi_r(:),psi_r_red(:,:,:),plane(:)
|
|
|
|
INTEGER :: i_mod, j_mod,k_mod,iplane
|
|
INTEGER :: jd,jdmax,ir,nr3small,nr3small_max,nplane
|
|
REAL(kind=DP), ALLOCATABLE :: prod_r_red(:,:,:)
|
|
REAL(kind=DP), ALLOCATABLE :: vexc_red(:,:,:)
|
|
INTEGER :: ip_todo,token,ip_delta,tag,ip
|
|
REAL(kind=DP), ALLOCATABLE :: b_plane(:,:)
|
|
INTEGER, ALLOCATABLE :: b_iplane(:),b_z(:)
|
|
INTEGER, ALLOCATABLE :: proc_list(:)
|
|
INTEGER :: offset
|
|
|
|
!write(stdout,*) 'periodic_fock'
|
|
!FLUSH(stdout)
|
|
|
|
CALL start_clock('periodic_fock')
|
|
|
|
!setup correspondence grids
|
|
|
|
rz_start=0
|
|
rz_end =0
|
|
do ii=1,me_pool + 1
|
|
rz_start=rz_end+1
|
|
rz_end=rz_end+exx_cus%fft_g2r%dfftt%nr3p(ii)
|
|
end do
|
|
|
|
rz_start_s=0
|
|
rz_end_s=0
|
|
do ii=1,me_pool + 1
|
|
rz_start_s=rz_end_s+1
|
|
rz_end_s=rz_end_s+exx_cus%fft_small%dfftt%nr3p(ii)
|
|
end do
|
|
|
|
nr3small=rz_end_s-rz_start_s+1
|
|
|
|
allocate(z2proc_s(exx_cus%fft_small%nr3t))
|
|
allocate(z2proc(exx_cus%fft_g2r%nr3t))
|
|
allocate(vexc(exx_cus%fft_g2r%nrxxt))
|
|
allocate( evc_g( exx_cus%fft_g2r%ngmt_g ) )
|
|
allocate(vexc_g(exx_cus%fft_g2r%npwt))
|
|
j=0
|
|
k=0
|
|
do ii=1,nproc
|
|
j=k+1
|
|
k=k+exx_cus%fft_small%dfftt%nr3p(ii)
|
|
z2proc_s(j:k)=ii-1
|
|
end do
|
|
|
|
j=0
|
|
k=0
|
|
do ii=1,nproc
|
|
j=k+1
|
|
k=k+exx_cus%fft_g2r%dfftt%nr3p(ii)
|
|
z2proc(j:k)=ii-1
|
|
end do
|
|
|
|
|
|
|
|
|
|
|
|
allocate(fac(exx_cus%fft_small%ngmt))
|
|
|
|
!setup fac
|
|
do ig=1,exx_cus%fft_small%ngmt
|
|
|
|
qq_fact = exx_cus%fft_small%gt(1,ig)**2.d0 + exx_cus%fft_small%gt(2,ig)**2.d0 + exx_cus%fft_small%gt(3,ig)**2.d0
|
|
|
|
if (qq_fact > 1.d-8) then
|
|
fac(ig)=(e2*fpi/(exx_cus%fft_small%tpiba2_t*qq_fact))*(1.d0-dcos(dsqrt(qq_fact)*&
|
|
&(exx_cus%truncation_radius*dble(exx_cus%n(1))/dble(exx_cus%m(1)))*exx_cus%fft_small%tpiba_t))
|
|
else
|
|
fac(ig)=e2*fpi*((exx_cus%truncation_radius*dble(exx_cus%n(1))/dble(exx_cus%m(1)))**2.d0/2.d0)
|
|
|
|
endif
|
|
|
|
end do
|
|
fac(:)=fac(:)/omega/(dble(exx_cus%n(1)*exx_cus%n(2)*exx_cus%n(3)))
|
|
|
|
|
|
|
|
allocate(prodr(exx_cus%fft_g2r%nrxxt))
|
|
allocate(prods(exx_cus%fft_small%nrxxt,2))
|
|
allocate(planes(exx_cus%fft_small%nrx1t*exx_cus%fft_small%nrx2t))
|
|
allocate(prods_g(exx_cus%fft_small%ngmt,2))
|
|
allocate(plane(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t))
|
|
|
|
|
|
allocate(psi_t(exx_cus%fft_g2r%npwt))
|
|
allocate(psi_r(exx_cus%fft_g2r%nrxxt))
|
|
allocate(psi_r_red(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t,nr3small,exx_cus%m(3)))
|
|
allocate(prod_r_red(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t,nr3small,exx_cus%m(3)))
|
|
allocate(vexc_red(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t,nr3small,exx_cus%m(3)))
|
|
|
|
|
|
!loop on KS states
|
|
|
|
vexc(:)=0.d0
|
|
vexc_red=0.d0
|
|
|
|
CALL start_clock('pf_mergesplit')
|
|
if(exx_cus%fft_g2r%dual_t==4.d0) then
|
|
psi_t(1:exx_cus%fft_g2r%npwt)=psi(1:exx_cus%fft_g2r%npwt)
|
|
else
|
|
call mergewf(psi(:),evc_g,npw,ig_l2g,me_pool,nproc,ionode_id,intra_pool_comm)
|
|
call splitwf(psi_t(:),evc_g,exx_cus%fft_g2r%npwt,exx_cus%fft_g2r%ig_l2gt,&
|
|
&me_pool,nproc,ionode_id,intra_pool_comm)
|
|
endif
|
|
CALL stop_clock('pf_mergesplit')
|
|
|
|
psic(:)=(0.d0,0.d0)
|
|
psic(exx_cus%fft_g2r%nlt(1:exx_cus%fft_g2r%npwt)) = psi_t(1:exx_cus%fft_g2r%npwt)
|
|
psic(exx_cus%fft_g2r%nltm(1:exx_cus%fft_g2r%npwt)) = CONJG( psi_t(1:exx_cus%fft_g2r%npwt) )
|
|
|
|
CALL start_clock('pf_fftext')
|
|
CALL cft3t( exx_cus%fft_g2r, psic, exx_cus%fft_g2r%nr1t, exx_cus%fft_g2r%nr2t, exx_cus%fft_g2r%nr3t, &
|
|
&exx_cus%fft_g2r%nrx1t, exx_cus%fft_g2r%nrx2t, exx_cus%fft_g2r%nrx3t, 2 )
|
|
psi_r(1:exx_cus%fft_g2r%nrxxt)= DBLE(psic(1:exx_cus%fft_g2r%nrxxt))
|
|
CALL stop_clock('pf_fftext')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!put the psi wavefunction already on the small z distribution among processor
|
|
!!!!!!!!!!!
|
|
!first the internal case
|
|
do k=1,exx_cus%n(3)
|
|
do iz=rz_start,rz_end
|
|
iz_s=mod(iz-1+(k-1)*exx_cus%fft_g2r%nr3t,exx_cus%fft_small%nr3t)+1
|
|
iplane=(iz-1+(k-1)*exx_cus%fft_g2r%nr3t)/exx_cus%fft_small%nr3t+1
|
|
idest=z2proc_s(iz_s)
|
|
!put plane on small plane
|
|
if(me_pool==idest) then
|
|
do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
psi_r_red(iqq,iz_s-rz_start_s+1,iplane)=&
|
|
&psi_r((iz-rz_start)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+iqq)
|
|
enddo
|
|
endif
|
|
|
|
enddo
|
|
enddo
|
|
|
|
nr3small_max=nr3small
|
|
CALL mp_max( nr3small_max, intra_pool_comm )
|
|
|
|
allocate(b_plane(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t,nr3small_max))
|
|
allocate(b_iplane(nr3small_max),b_z(nr3small_max))
|
|
allocate(proc_list(nproc))
|
|
!loop on task delta
|
|
do ip_delta=1,nproc-1
|
|
|
|
if(mod(ip_delta,2)==0) then
|
|
!if(mod(me_pool+1,2)==0) then!even
|
|
! if(mod((me_pool+1)/2,2)==0) then
|
|
! token=0
|
|
! else
|
|
! token=1
|
|
! endif
|
|
|
|
! else
|
|
!
|
|
! if(mod((me_pool+2)/2,2)==0) then
|
|
! token=0
|
|
! else
|
|
! token=1
|
|
! endif
|
|
!
|
|
! endif
|
|
proc_list=0
|
|
do ip=1,nproc
|
|
if(proc_list(ip)==0) then
|
|
if(proc_list(mod(ip+ip_delta-1,nproc)+1)==0) then
|
|
proc_list(ip)=-1
|
|
proc_list(mod(ip+ip_delta-1,nproc)+1)=1
|
|
else
|
|
|
|
endif
|
|
endif
|
|
enddo
|
|
|
|
if(proc_list(me_pool+1) ==-1) then
|
|
token=0
|
|
else
|
|
token=1
|
|
endif
|
|
else
|
|
if(mod(me_pool+1,2)==0) then
|
|
token=0
|
|
else
|
|
token=1
|
|
endif
|
|
|
|
endif
|
|
do ip_todo=1,2
|
|
if(mod(ip_todo+token,2)==0) then
|
|
!if I am a sender
|
|
!loop on my data to see if and what I have to send
|
|
nplane=0
|
|
do k=1,exx_cus%n(3)
|
|
do iz=rz_start,rz_end
|
|
iz_s=mod(iz-1+(k-1)*exx_cus%fft_g2r%nr3t,exx_cus%fft_small%nr3t)+1
|
|
iplane=(iz-1+(k-1)*exx_cus%fft_g2r%nr3t)/exx_cus%fft_small%nr3t+1
|
|
idest=z2proc_s(iz_s)
|
|
if(idest==mod(me_pool+ip_delta,nproc)) then
|
|
nplane=nplane+1
|
|
do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
b_plane(iqq,nplane)=&
|
|
&psi_r((iz-rz_start)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+iqq)
|
|
enddo
|
|
b_z(nplane)=iz_s
|
|
b_iplane(nplane)=iplane
|
|
|
|
endif
|
|
enddo
|
|
enddo
|
|
!send nplane
|
|
#if defined(__MPI)
|
|
idest=mod(me_pool+ip_delta,nproc)
|
|
CALL MPI_ISEND( nplane,1, MPI_INTEGER, idest, 0, intra_pool_comm, req,IERR )
|
|
CALL MPI_WAIT(req,istatus,ierr)
|
|
|
|
if(nplane>0) then
|
|
CALL MPI_ISEND( b_plane,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t*nplane, MPI_DOUBLE_PRECISION, &
|
|
&idest, 1, intra_pool_comm, req,IERR )
|
|
CALL MPI_WAIT(req,istatus,ierr)
|
|
CALL MPI_ISEND( b_z,nplane, MPI_INTEGER,idest, 2, intra_pool_comm, req,IERR )
|
|
CALL MPI_WAIT(req,istatus,ierr)
|
|
CALL MPI_ISEND( b_iplane,nplane, MPI_INTEGER,idest, 3, intra_pool_comm, req,IERR )
|
|
CALL MPI_WAIT(req,istatus,ierr)
|
|
endif
|
|
#endif
|
|
|
|
else
|
|
!if I am receiver
|
|
!see if and what I have to receive
|
|
#if defined(__MPI)
|
|
iorig=me_pool-ip_delta
|
|
if(iorig<0) iorig=iorig+nproc
|
|
CALL MPI_RECV( nplane,1, MPI_INTEGER, iorig, 0, intra_pool_comm, istatus,IERR )
|
|
|
|
if(nplane>0) then
|
|
CALL MPI_RECV( b_plane,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t*nplane, MPI_DOUBLE_PRECISION, &
|
|
&iorig, 1, intra_pool_comm, istatus,IERR )
|
|
CALL MPI_RECV( b_z,nplane, MPI_INTEGER,iorig, 2, intra_pool_comm, istatus,IERR )
|
|
CALL MPI_RECV( b_iplane,nplane, MPI_INTEGER,iorig, 3, intra_pool_comm, istatus,IERR )
|
|
|
|
|
|
|
|
do ii=1,nplane
|
|
do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
psi_r_red(iqq,b_z(ii)-rz_start_s+1,b_iplane(ii))=b_plane(iqq,ii)
|
|
enddo
|
|
enddo
|
|
endif
|
|
#endif
|
|
endif
|
|
enddo
|
|
enddo
|
|
deallocate(b_plane,b_iplane,b_z,proc_list)
|
|
|
|
!!!!!!!!!
|
|
! do k=1,exx_cus%n(3)
|
|
! do iz=1,exx_cus%fft_g2r%nr3t
|
|
! if(iz >= rz_start .and. iz <= rz_end) then
|
|
! !if Z is mine determine owner and send it
|
|
! iz_s=mod(iz-1+(k-1)*exx_cus%fft_g2r%nr3t,exx_cus%fft_small%nr3t)+1
|
|
! iplane=(iz-1+(k-1)*exx_cus%fft_g2r%nr3t)/exx_cus%fft_small%nr3t+1
|
|
! idest=z2proc_s(iz_s)
|
|
! !put plane on small plane
|
|
! if(me_pool==idest) then
|
|
! do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
! psi_r_red(iqq,iz_s-rz_start_s+1,iplane)=&
|
|
! &psi_r((iz-rz_start)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+iqq)
|
|
! enddo
|
|
! else
|
|
! do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
! plane(iqq)=psi_r((iz-rz_start)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+iqq)
|
|
! enddo
|
|
! CALL MPI_ISEND( plane,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t, MPI_DOUBLE_PRECISION, &
|
|
! &idest, iz, intra_pool_comm, req,IERR )
|
|
! CALL MPI_WAIT(req,istatus,ierr)
|
|
!
|
|
! endif
|
|
!
|
|
! else
|
|
! iz_s=mod(iz-1+(k-1)*exx_cus%fft_g2r%nr3t,exx_cus%fft_small%nr3t)+1
|
|
! iplane=(iz-1+(k-1)*exx_cus%fft_g2r%nr3t)/exx_cus%fft_small%nr3t+1
|
|
! !if Z o small cell is mine receive it
|
|
! if(z2proc_s(iz_s)==me_pool) then
|
|
! iorig=z2proc(iz)
|
|
! CALL MPI_RECV( plane, exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t, MPI_DOUBLE_PRECISION, &
|
|
! &iorig, iz, intra_pool_comm, istatus, IERR )
|
|
! do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
! psi_r_red(iqq,iz_s-rz_start_s+1,iplane)=plane(iqq)
|
|
! enddo
|
|
!
|
|
!
|
|
!
|
|
! endif
|
|
! endif
|
|
! enddo
|
|
! enddo
|
|
|
|
|
|
|
|
!loop on KS valence states
|
|
|
|
CALL start_clock('pf_inner')
|
|
do k=1,exx_cus%n(3)
|
|
do j=1,exx_cus%n(2)
|
|
do i=1,exx_cus%n(1)
|
|
do jv=1,exx_cus%nbndv(ispin),2!loop on bands
|
|
!do product
|
|
if(jv<exx_cus%nbndv(ispin)) then
|
|
jdmax=1
|
|
else
|
|
jdmax=0
|
|
endif
|
|
prods=0.d0
|
|
do jd=0,jdmax
|
|
!!!!!!!!!!!!!!!
|
|
CALL start_clock('pf_product')
|
|
if(.not.exx_cus%l_localized) then
|
|
do ii=1,exx_cus%m(3)
|
|
do iz=1,nr3small
|
|
do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
prod_r_red(iqq,iz,ii)=psi_r_red(iqq,iz,ii)*exx_cus%wfc_red(iqq,iz,ii,jv+jd,ispin)
|
|
enddo
|
|
enddo
|
|
enddo
|
|
else
|
|
write(stdout,*) 'to be implmented yet'
|
|
FLUSH(stdout)
|
|
stop
|
|
endif
|
|
CALL stop_clock('pf_product')
|
|
|
|
|
|
CALL start_clock('pf_plane')
|
|
do iz_s=1,nr3small
|
|
planes(:)=0.d0
|
|
do ii=1,exx_cus%m(3)
|
|
!do iy=1,exx_cus%fft_g2r%nr2t
|
|
! do ix=1,exx_cus%fft_g2r%nr1t
|
|
! iqq=(iy-1)*exx_cus%fft_g2r%nrx1t+ix!ATTENZIONE TO BE VERIFY
|
|
do iqq=1,exx_cus%fft_g2r%nr1t*exx_cus%fft_g2r%nr2t
|
|
planes(exx_cus%r2s_xy(iqq,i,j))=planes(exx_cus%r2s_xy(iqq,i,j))+&
|
|
&prod_r_red(iqq,iz_s,ii)
|
|
enddo
|
|
! enddo
|
|
!enddo
|
|
enddo
|
|
offset=(iz_s-1)*(exx_cus%fft_small%nrx1t*exx_cus%fft_small%nrx2t)
|
|
do iqq_s=1,exx_cus%fft_small%nrx1t*exx_cus%fft_small%nrx2t
|
|
prods(offset+iqq_s,jd+1)=&
|
|
& planes(iqq_s)
|
|
enddo
|
|
|
|
|
|
enddo
|
|
CALL stop_clock('pf_plane')
|
|
|
|
|
|
|
|
enddo!on jd
|
|
|
|
! do fft
|
|
CALL start_clock('pf_cache')
|
|
psic(1:exx_cus%fft_small%nrxxt)=cmplx(prods(1:exx_cus%fft_small%nrxxt,1),prods(1:exx_cus%fft_small%nrxxt,2))
|
|
CALL stop_clock('pf_cache')
|
|
CALL start_clock('pf_fftinner')
|
|
CALL cft3t( exx_cus%fft_small, psic, exx_cus%fft_small%nr1t, exx_cus%fft_small%nr2t, exx_cus%fft_small%nr3t, &
|
|
&exx_cus%fft_small%nrx1t, exx_cus%fft_small%nrx2t, exx_cus%fft_small%nrx3t, -1 )
|
|
CALL stop_clock('pf_fftinner')
|
|
if(jdmax==0) then
|
|
CALL start_clock('pf_cache')
|
|
prods_g(1:exx_cus%fft_small%ngmt,1) = psic(exx_cus%fft_small%nlt(1:exx_cus%fft_small%ngmt))
|
|
CALL stop_clock('pf_cache')
|
|
!apply fac
|
|
CALL start_clock('pf_product')
|
|
prods_g(1:exx_cus%fft_small%ngmt,1)=fac(1:exx_cus%fft_small%ngmt)*prods_g(1:exx_cus%fft_small%ngmt,1)
|
|
CALL stop_clock('pf_product')
|
|
else
|
|
CALL start_clock('pf_cache')
|
|
prods_g(1:exx_cus%fft_small%ngmt,1)=0.5d0*(psic(exx_cus%fft_small%nlt(1:exx_cus%fft_small%ngmt))+&
|
|
&conjg( psic(exx_cus%fft_small%nltm(1:exx_cus%fft_small%ngmt))))
|
|
prods_g(1:exx_cus%fft_small%ngmt,2)=(0.d0,-0.5d0)*(psic(exx_cus%fft_small%nlt(1:exx_cus%fft_small%ngmt))-&
|
|
&conjg( psic(exx_cus%fft_small%nltm(1:exx_cus%fft_small%ngmt))))
|
|
CALL stop_clock('pf_cache')
|
|
CALL start_clock('pf_product')
|
|
prods_g(1:exx_cus%fft_small%ngmt,1)=fac(1:exx_cus%fft_small%ngmt)*prods_g(1:exx_cus%fft_small%ngmt,1)
|
|
prods_g(1:exx_cus%fft_small%ngmt,2)=fac(1:exx_cus%fft_small%ngmt)*prods_g(1:exx_cus%fft_small%ngmt,2)
|
|
CALL stop_clock('pf_product')
|
|
endif
|
|
!put back on large cell
|
|
psic=0.d0
|
|
CALL start_clock('pf_cache')
|
|
if(jdmax==0) then
|
|
psic(exx_cus%fft_small%nlt(1:exx_cus%fft_small%ngmt)) = prods_g(1:exx_cus%fft_small%ngmt,1)
|
|
psic(exx_cus%fft_small%nltm(1:exx_cus%fft_small%ngmt)) = CONJG( prods_g(1:exx_cus%fft_small%ngmt,1))
|
|
else
|
|
psic(exx_cus%fft_small%nlt(1:exx_cus%fft_small%ngmt))= prods_g(1:exx_cus%fft_small%ngmt,1)+&
|
|
&(0.d0,1.d0)*prods_g(1:exx_cus%fft_small%ngmt,2)
|
|
psic(exx_cus%fft_small%nltm(1:exx_cus%fft_small%ngmt)) = CONJG( prods_g(1:exx_cus%fft_small%ngmt,1) )+&
|
|
&(0.d0,1.d0)*CONJG( prods_g(1:exx_cus%fft_small%ngmt,2) )
|
|
|
|
endif
|
|
CALL stop_clock('pf_cache')
|
|
CALL start_clock('pf_fftinner')
|
|
CALL cft3t( exx_cus%fft_small, psic, exx_cus%fft_small%nr1t, exx_cus%fft_small%nr2t, exx_cus%fft_small%nr3t, &
|
|
&exx_cus%fft_small%nrx1t, exx_cus%fft_small%nrx2t, exx_cus%fft_small%nrx3t, +1 )
|
|
CALL stop_clock('pf_fftinner')
|
|
CALL start_clock('pf_cache')
|
|
prods(1:exx_cus%fft_small%nrxxt,1)=dble(psic(1:exx_cus%fft_small%nrxxt))
|
|
if(jdmax==1) then
|
|
prods(1:exx_cus%fft_small%nrxxt,2)=dimag(psic(1:exx_cus%fft_small%nrxxt))
|
|
endif
|
|
CALL stop_clock('pf_cache')
|
|
!!!!!!!!!!
|
|
|
|
do jd=0,jdmax
|
|
CALL start_clock('pf_plane2')
|
|
do iz_s=1,nr3small
|
|
plane(:)=0.d0
|
|
do jj=1,exx_cus%m(2)
|
|
do kk=1,exx_cus%m(1)
|
|
! do iy_s=1,exx_cus%fft_small%nr2t
|
|
! do ix_s=1,exx_cus%fft_small%nr1t
|
|
! iy=mod(iy_s+exx_cus%fft_small%nr2t*(jj-1)-1,exx_cus%fft_g2r%nr2t)+1
|
|
! ix=mod(ix_s+exx_cus%fft_small%nr1t*(kk-1)-1,exx_cus%fft_g2r%nr1t)+1
|
|
! iqq=(iy-1)*exx_cus%fft_g2r%nrx1t+ix
|
|
! iqq_s=(iy_s-1)*exx_cus%fft_small%nrx1t+ix_s
|
|
! plane(iqq)=prods((iz_s-1)*(exx_cus%fft_small%nrx1t*exx_cus%fft_small%nrx2t)+iqq_s,jd+1)
|
|
!
|
|
! enddo
|
|
! enddo
|
|
do iqq_s=1,exx_cus%fft_small%nrx1t*exx_cus%fft_small%nrx2t!problem if nrx1t /= nr1t
|
|
plane(exx_cus%s2r_xy(iqq_s,kk,jj))=&
|
|
&prods((iz_s-1)*(exx_cus%fft_small%nrx1t*exx_cus%fft_small%nrx2t)+iqq_s,jd+1)
|
|
enddo
|
|
enddo
|
|
enddo
|
|
do ii=1,exx_cus%m(3)
|
|
do iqq=1,exx_cus%fft_g2r%nr1t*exx_cus%fft_g2r%nr2t
|
|
prod_r_red(iqq,iz_s,ii)=plane(iqq)
|
|
enddo
|
|
enddo
|
|
enddo
|
|
CALL stop_clock('pf_plane2')
|
|
CALL start_clock('pf_product')
|
|
if(.not.exx_cus%l_localized) then
|
|
do ii=1,exx_cus%m(3)
|
|
do iz_s=1,nr3small
|
|
do iqq=1,exx_cus%fft_g2r%nr1t*exx_cus%fft_g2r%nr2t
|
|
vexc_red(iqq,iz_s,ii)=vexc_red(iqq,iz_s,ii)+prod_r_red(iqq,iz_s,ii)*&
|
|
&exx_cus%wfc_red(iqq,iz_s,ii,jv+jd,ispin)*wg(jv+jd,ispin)*dble(exx_cus%nspin)/2.d0
|
|
enddo
|
|
enddo
|
|
enddo
|
|
else
|
|
write(stdout,*) 'Not implemented yet'
|
|
stop
|
|
endif
|
|
CALL stop_clock('pf_product')
|
|
enddo
|
|
|
|
|
|
enddo!on nbndv
|
|
enddo
|
|
!end loop
|
|
enddo
|
|
enddo
|
|
CALL stop_clock('pf_inner')
|
|
!!!!!!!!!!!!!
|
|
!!!!!!!!!!!
|
|
!first the internal case
|
|
|
|
do iz_s=rz_start_s,rz_end_s
|
|
do ii=1,exx_cus%m(3)
|
|
iz=mod(iz_s+exx_cus%fft_small%nr3t*(ii-1)-1,exx_cus%fft_g2r%nrx3t)+1
|
|
idest=z2proc(iz)
|
|
if(me_pool==idest) then
|
|
do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
vexc((iz-rz_start)*exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t+iqq)=vexc_red(iqq,iz_s-rz_start_s+1,ii)
|
|
enddo
|
|
endif
|
|
enddo
|
|
enddo
|
|
allocate(proc_list(nproc))
|
|
|
|
allocate(b_plane(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t,nr3small_max*exx_cus%m(3)))
|
|
allocate(b_z(nr3small_max*exx_cus%m(3)))
|
|
!loop on task delta
|
|
do ip_delta=1,nproc-1
|
|
if(mod(ip_delta,2)==0) then
|
|
!if(mod(me_pool+1,2)==0) then!even
|
|
! if(mod((me_pool+1)/2,2)==0) then
|
|
! token=0
|
|
! else
|
|
! token=1
|
|
! endif
|
|
!
|
|
!else
|
|
!
|
|
! if(mod((me_pool+2)/2,2)==0) then
|
|
! token=0
|
|
! else
|
|
! token=1
|
|
! endif
|
|
!
|
|
!endif
|
|
proc_list=0
|
|
do ip=1,nproc
|
|
if(proc_list(ip)==0) then
|
|
if(proc_list(mod(ip+ip_delta-1,nproc)+1)==0) then
|
|
proc_list(ip)=-1
|
|
proc_list(mod(ip+ip_delta-1,nproc)+1)=1
|
|
else
|
|
|
|
endif
|
|
endif
|
|
enddo
|
|
|
|
if(proc_list(me_pool+1) ==-1) then
|
|
token=0
|
|
else
|
|
token=1
|
|
endif
|
|
|
|
|
|
else
|
|
if(mod(me_pool+1,2)==0) then
|
|
token=0
|
|
else
|
|
token=1
|
|
endif
|
|
|
|
endif
|
|
do ip_todo=1,2
|
|
if(mod(ip_todo+token,2)==0) then
|
|
!if I am a sender
|
|
!loop on my data to see if and what I have to send
|
|
nplane=0
|
|
do iz_s=rz_start_s,rz_end_s
|
|
do ii=1,exx_cus%m(3)
|
|
iz=mod(iz_s+exx_cus%fft_small%nr3t*(ii-1)-1,exx_cus%fft_g2r%nrx3t)+1
|
|
idest=z2proc(iz)
|
|
if(idest==mod(me_pool+ip_delta,nproc)) then
|
|
nplane=nplane+1
|
|
do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
b_plane(iqq,nplane)=vexc_red(iqq,iz_s-rz_start_s+1,ii)
|
|
enddo
|
|
b_z(nplane)=iz
|
|
endif
|
|
enddo
|
|
enddo
|
|
!send nplane
|
|
#if defined(__MPI)
|
|
idest=mod(me_pool+ip_delta,nproc)
|
|
CALL MPI_ISEND( nplane,1, MPI_INTEGER, idest, 0, intra_pool_comm, req,IERR )
|
|
CALL MPI_WAIT(req,istatus,ierr)
|
|
|
|
if(nplane>0) then
|
|
CALL MPI_ISEND( b_plane,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t*nplane, MPI_DOUBLE_PRECISION, &
|
|
&idest, 1, intra_pool_comm, req,IERR )
|
|
CALL MPI_WAIT(req,istatus,ierr)
|
|
CALL MPI_ISEND( b_z,nplane, MPI_INTEGER,idest, 2, intra_pool_comm, req,IERR )
|
|
CALL MPI_WAIT(req,istatus,ierr)
|
|
|
|
endif
|
|
#endif
|
|
|
|
else
|
|
!if I am receiver
|
|
!see if and what I have to receive
|
|
#if defined(__MPI)
|
|
iorig=me_pool-ip_delta
|
|
if(iorig<0) iorig=iorig+nproc
|
|
CALL MPI_RECV( nplane,1, MPI_INTEGER, iorig, 0, intra_pool_comm, istatus,IERR )
|
|
|
|
if(nplane>0) then
|
|
CALL MPI_RECV( b_plane,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t*nplane, MPI_DOUBLE_PRECISION, &
|
|
&iorig, 1, intra_pool_comm, istatus,IERR )
|
|
CALL MPI_RECV( b_z,nplane, MPI_INTEGER,iorig, 2, intra_pool_comm, istatus,IERR )
|
|
|
|
do ii=1,nplane
|
|
do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
vexc((b_z(ii)-rz_start)*exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t+iqq)=&
|
|
&b_plane(iqq,ii)
|
|
enddo
|
|
enddo
|
|
endif
|
|
#endif
|
|
endif
|
|
enddo
|
|
enddo
|
|
deallocate(b_plane,b_z,proc_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!!!!!!!!!!!!!
|
|
!now find vexc from vexc_red
|
|
! do iz_s=1,exx_cus%fft_small%nr3t
|
|
! !send and receive z and z+alat
|
|
! do ii=1,exx_cus%m(3)
|
|
! iz=mod(iz_s+exx_cus%fft_small%nr3t*(ii-1)-1,exx_cus%fft_g2r%nrx3t)+1
|
|
!
|
|
!
|
|
! !do periodic replica
|
|
! if(iz_s >= rz_start_s .and. iz_s <= rz_end_s) then
|
|
! !if Z is mine determine owner and send
|
|
! idest=z2proc(iz)
|
|
! !put plane on small plane
|
|
! if(me_pool==idest) then
|
|
! do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
! vexc((iz-rz_start)*exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t+iqq)=vexc_red(iqq,iz_s-rz_start_s+1,ii)
|
|
! enddo
|
|
! else
|
|
! do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
! plane(iqq)=vexc_red(iqq,iz_s-rz_start_s+1,ii)
|
|
! enddo
|
|
! CALL MPI_ISEND( plane, exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t, MPI_DOUBLE_PRECISION, &
|
|
! &idest, iz, intra_pool_comm, req,IERR )
|
|
! CALL MPI_WAIT(req,istatus,ierr)
|
|
!
|
|
! endif
|
|
! else
|
|
! !if Z on large cell is mine receive it
|
|
! if(z2proc(iz)==me_pool) then
|
|
! iorig=z2proc_s(iz_s)
|
|
! CALL MPI_RECV( plane, exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t, MPI_DOUBLE_PRECISION, &
|
|
! &iorig, iz, intra_pool_comm, istatus, IERR )
|
|
! do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
! vexc((iz-rz_start)*exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t+iqq)=plane(iqq)
|
|
! enddo
|
|
! endif
|
|
! endif
|
|
!
|
|
! enddo
|
|
! enddo
|
|
|
|
!!!!!!!!!
|
|
|
|
!do fft and back to standard ordering
|
|
!do scalar producs
|
|
psic(1:exx_cus%fft_g2r%nrxxt)=dcmplx(vexc(1:exx_cus%fft_g2r%nrxxt),0.d0)
|
|
CALL start_clock('pf_fftext')
|
|
CALL cft3t( exx_cus%fft_g2r, psic, exx_cus%fft_g2r%nr1t, exx_cus%fft_g2r%nr2t, exx_cus%fft_g2r%nr3t, &
|
|
&exx_cus%fft_g2r%nrx1t, exx_cus%fft_g2r%nrx2t, exx_cus%fft_g2r%nrx3t, -2 )
|
|
CALL stop_clock('pf_fftext')
|
|
vexc_g(1:exx_cus%fft_g2r%npwt) = psic(exx_cus%fft_g2r%nlt(1:exx_cus%fft_g2r%npwt))
|
|
|
|
|
|
|
|
!put in the order or wfcs
|
|
CALL start_clock('pf_mergesplit')
|
|
if(exx_cus%fft_g2r%dual_t==4.d0) then
|
|
xpsi(1:exx_cus%fft_g2r%npwt)=vexc_g(1:exx_cus%fft_g2r%npwt)
|
|
else
|
|
call mergewf(vexc_g,evc_g,exx_cus%fft_g2r%npwt,exx_cus%fft_g2r%ig_l2gt,me_pool,nproc,ionode_id,intra_pool_comm)
|
|
call splitwf(xpsi,evc_g,npw,ig_l2g,me_pool,nproc,ionode_id,intra_pool_comm)
|
|
endif
|
|
CALL stop_clock('pf_mergesplit')
|
|
|
|
|
|
|
|
deallocate(fac)
|
|
deallocate(prodr,prods)
|
|
deallocate(planes)
|
|
deallocate(z2proc_s)
|
|
deallocate(z2proc)
|
|
deallocate(prods_g)
|
|
deallocate(vexc)
|
|
deallocate(psi_t,psi_r,evc_g)
|
|
deallocate(psi_r_red,plane,prod_r_red)
|
|
deallocate(vexc_red)
|
|
CALL stop_clock('periodic_fock')
|
|
return
|
|
END SUBROUTINE periodic_fock_cus
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SUBROUTINE periodic_dft_exchange(nbnds,psi,exx_cus)
|
|
!experimental version work just with factor 1/2
|
|
|
|
USE io_global, ONLY : stdout, ionode,ionode_id
|
|
USE mp_pools, ONLY : me_pool,intra_pool_comm
|
|
USE cell_base, ONLY : at, alat, tpiba, omega, tpiba2,bg
|
|
USE constants, ONLY : e2, pi, tpi, fpi, RYTOEV
|
|
USE wavefunctions, ONLY : psic
|
|
USE mp, ONLY : mp_sum
|
|
USE mp_world, ONLY : world_comm, nproc
|
|
USE wvfct, ONLY : npwx, npw
|
|
USE gvect
|
|
USE mp_wave, ONLY : mergewf,splitwf
|
|
|
|
implicit none
|
|
|
|
INTEGER, INTENT(in) :: nbnds!total number of states
|
|
COMPLEX(kind=DP), INTENT(in) :: psi(npwx,nbnds)!wavefunctions
|
|
TYPE(exchange_cus), INTENT(in) :: exx_cus
|
|
|
|
TYPE(fft_cus) :: fft_small
|
|
INTEGER, ALLOCATABLE :: r2s_xy(:) !real to small XY index
|
|
INTEGER, ALLOCATABLE :: r2s_z(:)!real to small Z index (large)
|
|
|
|
INTEGER :: i,j,k,n,ii,jj,kk,ig,iv,jv
|
|
INTEGER :: ix,iy,iz, ix_s,iy_s,iz_s,iz_eff
|
|
INTEGER :: rz_start,rz_end,iqq,iqq_s,rz_start_s,rz_end_s
|
|
REAL(kind=DP), ALLOCATABLE :: fac(:),prodr(:),prods(:),planes(:),vexc(:)
|
|
REAL(kind=DP) :: qq_fact, sca
|
|
INTEGER :: iorig, idest
|
|
INTEGER,ALLOCATABLE :: z2proc_s(:),z2proc(:)
|
|
INTEGER :: req, ierr
|
|
#if defined(__MPI)
|
|
INTEGER :: istatus(MPI_STATUS_SIZE)
|
|
#endif
|
|
COMPLEX(kind=DP), ALLOCATABLE :: prods_g(:)
|
|
COMPLEX(kind=DP), ALLOCATABLE :: psi_t(:),evc_g(:)
|
|
REAL(kind=DP), ALLOCATABLE :: psi_r(:)
|
|
|
|
|
|
!setup small grid
|
|
fft_small%at_t(1:3,1:3)=exx_cus%fft_g2r%at_t(1:3,1:3)
|
|
fft_small%bg_t(1:3,1:3)=exx_cus%fft_g2r%bg_t(1:3,1:3)
|
|
fft_small%alat_t=exx_cus%fft_g2r%alat_t/2.d0
|
|
fft_small%omega_t=exx_cus%fft_g2r%omega_t/8.d0
|
|
fft_small%tpiba_t=exx_cus%fft_g2r%tpiba_t*2.d0
|
|
fft_small%tpiba2_t=exx_cus%fft_g2r%tpiba2_t*4.d0
|
|
|
|
fft_small%ecutt=exx_cus%fft_g2r%ecutt
|
|
fft_small%dual_t=exx_cus%fft_g2r%dual_t
|
|
|
|
call initialize_fft_custom_cell(fft_small)
|
|
|
|
write(stdout,*) 'Dimensions of real cell'
|
|
write(stdout,*) exx_cus%fft_g2r%nr1t,exx_cus%fft_g2r%nr2t,exx_cus%fft_g2r%nr3t
|
|
write(stdout,*) exx_cus%fft_g2r%nrx1t,exx_cus%fft_g2r%nrx2t,exx_cus%fft_g2r%nrx3t
|
|
|
|
write(stdout,*) 'Dimensions of small cell'
|
|
|
|
write(stdout,*) fft_small%nr1t,fft_small%nr2t,fft_small%nr3t
|
|
write(stdout,*) fft_small%nrx1t,fft_small%nrx2t,fft_small%nrx3t
|
|
|
|
|
|
|
|
allocate(r2s_xy(exx_cus%fft_g2r%nrxxt))
|
|
|
|
allocate(r2s_z(exx_cus%fft_g2r%nrxxt))
|
|
!setup correspondence grids
|
|
|
|
rz_start=0
|
|
rz_end =0
|
|
do ii=1,me_pool + 1
|
|
rz_start=rz_end+1
|
|
rz_end=rz_end+exx_cus%fft_g2r%dfftt%nr3p(ii)
|
|
end do
|
|
|
|
rz_start_s=0
|
|
rz_end_s=0
|
|
do ii=1,me_pool + 1
|
|
rz_start_s=rz_end_s+1
|
|
rz_end_s=rz_end_s+fft_small%dfftt%nr3p(ii)
|
|
end do
|
|
|
|
allocate(z2proc_s(fft_small%nr3t))
|
|
allocate(z2proc(exx_cus%fft_g2r%nr3t))
|
|
allocate(vexc(exx_cus%fft_g2r%nrxxt))
|
|
allocate( evc_g( exx_cus%fft_g2r%ngmt_g ) )
|
|
j=0
|
|
k=0
|
|
do ii=1,nproc
|
|
j=k+1
|
|
k=k+fft_small%dfftt%nr3p(ii)
|
|
z2proc_s(j:k)=ii-1
|
|
end do
|
|
|
|
j=0
|
|
k=0
|
|
do ii=1,nproc
|
|
j=k+1
|
|
k=k+exx_cus%fft_g2r%dfftt%nr3p(ii)
|
|
z2proc(j:k)=ii-1
|
|
end do
|
|
|
|
|
|
|
|
r2s_xy(:)=0
|
|
do iz=1,exx_cus%fft_g2r%dfftt%nr3p(me_pool+1)
|
|
do iy=1,exx_cus%fft_g2r%nr2t
|
|
do ix=1,exx_cus%fft_g2r%nr1t
|
|
iqq=(iz-1)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+(iy-1)*exx_cus%fft_g2r%nrx1t+ix
|
|
iy_s=mod(iy-1,fft_small%nr2t)+1
|
|
ix_s=mod(ix-1,fft_small%nr1t)+1
|
|
iqq_s=(iy_s-1)*fft_small%nrx1t+ix_s
|
|
r2s_xy(iqq)=iqq_s!XY correspondance only
|
|
iz_eff=iz+rz_start-1
|
|
iz_s=mod(iz_eff-1,fft_small%nr3t)+1
|
|
enddo
|
|
enddo
|
|
enddo
|
|
|
|
allocate(fac(fft_small%ngmt))
|
|
|
|
!setup fac
|
|
do ig=1,fft_small%ngmt
|
|
|
|
qq_fact = fft_small%gt(1,ig)**2.d0 + fft_small%gt(2,ig)**2.d0 + fft_small%gt(3,ig)**2.d0
|
|
|
|
if (qq_fact > 1.d-8) then
|
|
fac(ig)=(e2*fpi/(fft_small%tpiba2_t*qq_fact))*(1.d0-dcos(dsqrt(qq_fact)*&
|
|
&(exx_cus%truncation_radius/2.d0)*fft_small%tpiba_t))
|
|
else
|
|
fac(ig)=e2*fpi*((exx_cus%truncation_radius/2.d0)**2.d0/2.d0)
|
|
|
|
endif
|
|
|
|
end do
|
|
fac(:)=fac(:)/omega
|
|
|
|
|
|
allocate(prodr(exx_cus%fft_g2r%nrxxt))
|
|
allocate(prods(fft_small%nrxxt))
|
|
allocate(planes(fft_small%nrx1t*fft_small%nrx2t))
|
|
allocate(prods_g(fft_small%ngmt))
|
|
|
|
|
|
allocate(psi_t(exx_cus%fft_g2r%npwt))
|
|
allocate(psi_r(exx_cus%fft_g2r%nrxxt))
|
|
|
|
!loop on KS states
|
|
do iv=1,nbnds
|
|
vexc(:)=0.d0
|
|
|
|
|
|
if(exx_cus%fft_g2r%dual_t==4.d0) then
|
|
psi_t(1:exx_cus%fft_g2r%npwt)=psi(1:exx_cus%fft_g2r%npwt,iv)
|
|
else
|
|
call mergewf(psi(:,iv),evc_g,npw,ig_l2g,me_pool,nproc,ionode_id,intra_pool_comm)
|
|
call splitwf(psi_t(:),evc_g,exx_cus%fft_g2r%npwt,exx_cus%fft_g2r%ig_l2gt,&
|
|
&me_pool,nproc,ionode_id,intra_pool_comm)
|
|
endif
|
|
|
|
psic(:)=(0.d0,0.d0)
|
|
psic(exx_cus%fft_g2r%nlt(1:exx_cus%fft_g2r%npwt)) = psi_t(1:exx_cus%fft_g2r%npwt)
|
|
psic(exx_cus%fft_g2r%nltm(1:exx_cus%fft_g2r%npwt)) = CONJG( psi_t(1:exx_cus%fft_g2r%npwt) )
|
|
|
|
CALL cft3t( exx_cus%fft_g2r, psic, exx_cus%fft_g2r%nr1t, exx_cus%fft_g2r%nr2t, exx_cus%fft_g2r%nr3t, &
|
|
&exx_cus%fft_g2r%nrx1t, exx_cus%fft_g2r%nrx2t, exx_cus%fft_g2r%nrx3t, 2 )
|
|
psi_r(1:exx_cus%fft_g2r%nrxxt)= DBLE(psic(1:exx_cus%fft_g2r%nrxxt))
|
|
|
|
|
|
!loop on KS valence states
|
|
|
|
do jv=1,exx_cus%nbndv(1)
|
|
!do product
|
|
prodr(1:exx_cus%fft_g2r%nrxxt)=psi_r(1:exx_cus%fft_g2r%nrxxt)*exx_cus%wfc(1:exx_cus%fft_g2r%nrxxt,jv,1)
|
|
!put on small cell
|
|
!loop on real Z direction
|
|
prods(:)=0.d0
|
|
do iz=1,exx_cus%fft_g2r%nr3t
|
|
if(iz >= rz_start .and. iz <= rz_end) then
|
|
planes(:)=0.d0
|
|
do iy=1,exx_cus%fft_g2r%nr2t
|
|
do ix=1,exx_cus%fft_g2r%nr1t
|
|
iqq=(iz-rz_start)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+(iy-1)*exx_cus%fft_g2r%nrx1t+ix
|
|
planes(r2s_xy(iqq))=planes(r2s_xy(iqq))+prodr(iqq)
|
|
enddo
|
|
enddo
|
|
!if Z is mine determine owner and send it
|
|
iz_s=mod(iz-1,fft_small%nr3t)+1
|
|
idest=z2proc_s(iz_s)
|
|
!put plane on small plane
|
|
if(me_pool==idest) then
|
|
do iqq_s=1,fft_small%nrx1t*fft_small%nrx2t
|
|
prods((iz_s-rz_start_s)*(fft_small%nrx1t*fft_small%nrx2t)+iqq_s)=&
|
|
& prods((iz_s-rz_start_s)*(fft_small%nrx1t*fft_small%nrx2t)+iqq_s)+planes(iqq_s)
|
|
enddo
|
|
else
|
|
#if defined(__MPI)
|
|
CALL MPI_ISEND( planes, fft_small%nrx1t*fft_small%nrx2t, MPI_DOUBLE_PRECISION, &
|
|
&idest, iz, intra_pool_comm, req,IERR )
|
|
CALL MPI_WAIT(req,istatus,ierr)
|
|
#endif
|
|
endif
|
|
|
|
else
|
|
iz_s=mod(iz-1,fft_small%nr3t)+1
|
|
!if Z o small cell is mine receive it
|
|
if(z2proc_s(iz_s)==me_pool) then
|
|
iorig=z2proc(iz)
|
|
#if defined(__MPI)
|
|
CALL MPI_RECV( planes, fft_small%nrx1t*fft_small%nrx2t, MPI_DOUBLE_PRECISION, &
|
|
&iorig, iz, intra_pool_comm, istatus, IERR )
|
|
#endif
|
|
do iqq_s=1,fft_small%nrx1t*fft_small%nrx2t
|
|
prods((iz_s-rz_start_s)*(fft_small%nrx1t*fft_small%nrx2t)+iqq_s)=&
|
|
&prods((iz_s-rz_start_s)*(fft_small%nrx1t*fft_small%nrx2t)+iqq_s)+planes(iqq_s)
|
|
enddo
|
|
|
|
endif
|
|
endif
|
|
enddo
|
|
! do fft
|
|
psic(1:fft_small%nrxxt)=cmplx(prods(1:fft_small%nrxxt),0.d0)
|
|
CALL cft3t( fft_small, psic, fft_small%nr1t, fft_small%nr2t, fft_small%nr3t, &
|
|
&fft_small%nrx1t, fft_small%nrx2t, fft_small%nrx3t, -1 )
|
|
prods_g(1:fft_small%ngmt) = psic(fft_small%nlt(1:fft_small%ngmt))
|
|
|
|
!apply fac
|
|
prods_g(1:fft_small%ngmt)=fac(1:fft_small%ngmt)*prods_g(1:fft_small%ngmt)
|
|
!put back on large cell
|
|
psic=0.d0
|
|
psic(fft_small%nlt(1:fft_small%ngmt)) = prods_g(1:fft_small%ngmt)
|
|
psic(fft_small%nltm(1:fft_small%ngmt)) = CONJG( prods_g(1:fft_small%ngmt))
|
|
CALL cft3t( fft_small, psic, fft_small%nr1t, fft_small%nr2t, fft_small%nr3t, &
|
|
&fft_small%nrx1t, fft_small%nrx2t, fft_small%nrx3t, +1 )
|
|
prods(1:fft_small%nrxxt)=dble(psic(1:fft_small%nrxxt))
|
|
!loop on small z grid
|
|
do iz_s=1,fft_small%nr3t
|
|
!send and receive z and z+alat
|
|
do ii=1,2
|
|
iz=iz_s+fft_small%nr3t*(ii-1)
|
|
|
|
!do periodic replica
|
|
if(iz_s >= rz_start_s .and. iz_s <= rz_end_s) then
|
|
|
|
!if Z is mine determine owner and send
|
|
idest=z2proc(iz)
|
|
!put plane on small plane
|
|
if(me_pool==idest) then
|
|
do iqq_s=1,fft_small%nrx1t*fft_small%nrx2t
|
|
planes(iqq_s)=prods((iz_s-rz_start_s)*(fft_small%nrx1t*fft_small%nrx2t)+iqq_s)
|
|
enddo
|
|
!do replicas
|
|
do jj=1,2
|
|
do kk=1,2
|
|
do iy_s=1,fft_small%nr2t
|
|
do ix_s=1,fft_small%nr1t
|
|
iy=iy_s+fft_small%nr2t*(jj-1)
|
|
ix=ix_s+fft_small%nr1t*(kk-1)
|
|
iqq=(iz-rz_start)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+(iy-1)*exx_cus%fft_g2r%nrx1t+ix
|
|
prodr(iqq)=planes(r2s_xy(iqq))
|
|
enddo
|
|
enddo
|
|
enddo
|
|
enddo
|
|
else
|
|
do iqq_s=1,fft_small%nrx1t*fft_small%nrx2t
|
|
planes(iqq_s)=prods((iz_s-rz_start_s)*(fft_small%nrx1t*fft_small%nrx2t)+iqq_s)
|
|
enddo
|
|
#if defined(__MPI)
|
|
CALL MPI_ISEND( planes, fft_small%nrx1t*fft_small%nrx2t, MPI_DOUBLE_PRECISION, &
|
|
&idest, iz, intra_pool_comm, req,IERR )
|
|
CALL MPI_WAIT(req,istatus,ierr)
|
|
#endif
|
|
endif
|
|
|
|
else
|
|
!if Z on large cell is mine receive it
|
|
if(z2proc(iz)==me_pool) then
|
|
iorig=z2proc_s(iz_s)
|
|
#if defined(__MPI)
|
|
CALL MPI_RECV( planes, fft_small%nrx1t*fft_small%nrx2t, MPI_DOUBLE_PRECISION, &
|
|
&iorig, iz, intra_pool_comm, istatus, IERR )
|
|
#endif
|
|
do jj=1,2
|
|
do kk=1,2
|
|
do iy_s=1,fft_small%nr2t
|
|
do ix_s=1,fft_small%nr1t
|
|
iy=iy_s+fft_small%nr2t*(jj-1)
|
|
ix=ix_s+fft_small%nr1t*(kk-1)
|
|
iqq=(iz-rz_start)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+(iy-1)*exx_cus%fft_g2r%nrx1t+ix
|
|
prodr(iqq)=planes(r2s_xy(iqq))
|
|
enddo
|
|
enddo
|
|
enddo
|
|
enddo
|
|
|
|
endif
|
|
|
|
endif
|
|
enddo!ii
|
|
|
|
enddo
|
|
!do product
|
|
vexc(1:exx_cus%fft_g2r%nrxxt)=vexc(1:exx_cus%fft_g2r%nrxxt)+prodr(1:exx_cus%fft_g2r%nrxxt)*&
|
|
&exx_cus%wfc(1:exx_cus%fft_g2r%nrxxt,jv,1)
|
|
!sum up result terms
|
|
|
|
!end loop
|
|
enddo
|
|
!do scalar producs
|
|
sca=0.d0
|
|
do i=1,exx_cus%fft_g2r%nrxxt
|
|
sca=sca+psi_r(i)*vexc(i)
|
|
enddo
|
|
call mp_sum(sca,world_comm)
|
|
sca=sca/dble(exx_cus%fft_g2r%nr1t*exx_cus%fft_g2r%nr2t*exx_cus%fft_g2r%nr3t)
|
|
write(stdout,*) 'PERIODIC EXCHANGE', iv, sca
|
|
FLUSH(stdout)
|
|
!end loop
|
|
enddo
|
|
deallocate(r2s_xy,r2s_z)
|
|
deallocate(fac)
|
|
deallocate(prodr,prods)
|
|
deallocate(planes)
|
|
deallocate(z2proc_s)
|
|
deallocate(z2proc)
|
|
deallocate(prods_g)
|
|
deallocate(vexc)
|
|
deallocate(psi_t,psi_r,evc_g)
|
|
return
|
|
END SUBROUTINE periodic_dft_exchange
|
|
|
|
|
|
SUBROUTINE setup_exx_cus(nspin,num_nbndv_max,num_nbndv,ks_wfcs, exx_cus, dual, cutoff, truncation_radius)
|
|
!ATTENZIONE now only for cubic cell to be extended
|
|
|
|
|
|
USE io_global, ONLY : stdout, ionode, ionode_id
|
|
USE wvfct, ONLY : npwx, npw, nbnd
|
|
USE gvecw, ONLY : ecutwfc
|
|
USE cell_base, ONLY: at, alat, tpiba, omega, tpiba2,bg
|
|
USE constants, ONLY : e2, pi, tpi, fpi, RYTOEV
|
|
USE wavefunctions, ONLY : psic
|
|
USE mp_pools, ONLY : intra_pool_comm, me_pool
|
|
USE gvect
|
|
USE mp_wave, ONLY : mergewf,splitwf
|
|
USE mp, ONLY : mp_barrier, mp_sum
|
|
USE mp_world, ONLY : world_comm, mpime, nproc
|
|
|
|
IMPLICIT NONE
|
|
|
|
INTEGER, INTENT(in) :: nspin!spin multiplicity
|
|
INTEGER, INTENT(in) ::num_nbndv_max!max number of valence states for both spins
|
|
INTEGER, INTENT(in) :: num_nbndv(2)!number of valence states
|
|
COMPLEX(kind=DP), INTENT(in) :: ks_wfcs(npwx, num_nbndv_max,2)!KS valence wfcs
|
|
TYPE(exchange_cus), INTENT(out) :: exx_cus!the structure to be created
|
|
REAL(kind=DP), INTENT(in) :: dual!for defining the real space grid
|
|
REAL(kind=DP), INTENT(in) :: cutoff !for the defining the G space grid
|
|
REAL(kind=DP), INTENT(in) :: truncation_radius!Bohr
|
|
|
|
REAL(kind=DP) :: qq_fact
|
|
INTEGER :: ig,ii,i,j,iv,ir,n_max,is,k,jj,kk
|
|
COMPLEX(kind=DP), ALLOCATABLE :: state_fc_t(:,:),evc_g(:)
|
|
INTEGER :: rz_start,rz_end,ix,iy,iz,iqq,iqq_s,ix_s,iy_s
|
|
INTEGER :: rz_start_s,rz_end_s,nr3small
|
|
INTEGER :: iorig, idest,iz_s,jv
|
|
INTEGER,ALLOCATABLE :: z2proc_s(:),z2proc(:)
|
|
INTEGER iplane
|
|
REAL(kind=DP), ALLOCATABLE :: plane(:)
|
|
INTEGER :: req, ierr
|
|
#if defined(__MPI)
|
|
INTEGER :: istatus(MPI_STATUS_SIZE)
|
|
#endif
|
|
|
|
CALL start_clock('setup_exx')
|
|
!setup parameters
|
|
|
|
exx_cus%nbndv(1:2)=num_nbndv(1:2)
|
|
exx_cus%dual=dual
|
|
exx_cus%cutoff=cutoff
|
|
exx_cus%truncation_radius=truncation_radius
|
|
|
|
exx_cus%l_localized=l_exchange_localized
|
|
exx_cus%thrs_loc=exchange_thrs_loc
|
|
exx_cus%nspin=nspin
|
|
|
|
!define grids
|
|
|
|
exx_cus%fft_g2r%ecutt=ecutwfc
|
|
exx_cus%fft_g2r%dual_t=dual
|
|
call mp_barrier( world_comm )
|
|
write(stdout,*) 'Before initialize_fft_custom',exx_cus%fft_g2r%ecutt,exx_cus%fft_g2r%dual_t
|
|
FLUSH(stdout)
|
|
call initialize_fft_custom(exx_cus%fft_g2r)
|
|
write(stdout,*) "GRID G to R", exx_cus%fft_g2r%nr1t, exx_cus%fft_g2r%nr2t, exx_cus%fft_g2r%nr3t
|
|
write(stdout,*) "GRID G to R",exx_cus%fft_g2r%npwt
|
|
FLUSH(stdout)
|
|
|
|
exx_cus%fft_r2g%ecutt=cutoff
|
|
exx_cus%fft_r2g%dual_t=ecutwfc*dual/cutoff
|
|
call initialize_fft_custom(exx_cus%fft_r2g)
|
|
write(stdout,*) "GRID R to G", exx_cus%fft_r2g%nr1t, exx_cus%fft_r2g%nr2t, exx_cus%fft_r2g%nr3t
|
|
write(stdout,*) "GRID R to G",exx_cus%fft_r2g%npwt
|
|
FLUSH(stdout)
|
|
|
|
if(l_exchange_turbo) then
|
|
!setup small grid
|
|
exx_cus%m(1:3)=exchange_m(1:3)
|
|
exx_cus%n(1:3)=exchange_n(1:3)
|
|
do i=1,3
|
|
exx_cus%fft_small%at_t(1:3,i)=exx_cus%fft_g2r%at_t(1:3,i)
|
|
exx_cus%fft_small%bg_t(1:3,i)=exx_cus%fft_g2r%bg_t(1:3,i)
|
|
enddo
|
|
exx_cus%fft_small%alat_t=exx_cus%fft_g2r%alat_t*dble(exchange_n(1))/dble(exchange_m(1))
|
|
exx_cus%fft_small%omega_t=exx_cus%fft_g2r%omega_t*(dble(exchange_n(1))/dble(exchange_m(1)))**3.d0
|
|
exx_cus%fft_small%tpiba_t=exx_cus%fft_g2r%tpiba_t/(dble(exchange_n(1))/dble(exchange_m(1)))
|
|
exx_cus%fft_small%tpiba2_t=exx_cus%fft_g2r%tpiba2_t/(dble(exchange_n(1))/dble(exchange_m(1)))**2.d0
|
|
|
|
exx_cus%fft_small%ecutt=exx_cus%fft_g2r%ecutt
|
|
exx_cus%fft_small%dual_t=exx_cus%fft_g2r%dual_t
|
|
|
|
call initialize_fft_custom_cell(exx_cus%fft_small)
|
|
|
|
allocate(exx_cus%r2s_xy(exx_cus%fft_g2r%nrxxt,exx_cus%n(1),exx_cus%n(2)))
|
|
allocate(exx_cus%fac_s(exx_cus%fft_small%ngmt))
|
|
!setup correspondence grids
|
|
|
|
rz_start=0
|
|
rz_end =0
|
|
do ii=1,me_pool + 1
|
|
rz_start=rz_end+1
|
|
rz_end=rz_end+exx_cus%fft_g2r%dfftt%nr3p(ii)
|
|
end do
|
|
|
|
exx_cus%r2s_xy(:,:,:)=0
|
|
do i=1,exx_cus%n(1)
|
|
do j=1,exx_cus%n(2)
|
|
do iz=1,exx_cus%fft_g2r%dfftt%my_nr3p
|
|
do iy=1,exx_cus%fft_g2r%nr2t
|
|
do ix=1,exx_cus%fft_g2r%nr1t
|
|
iqq=(iz-1)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+(iy-1)*exx_cus%fft_g2r%nrx1t+ix
|
|
iy_s=mod(iy-1+(j-1)*exx_cus%fft_g2r%nr2t,exx_cus%fft_small%nr2t)+1
|
|
ix_s=mod(ix-1+(i-1)*exx_cus%fft_g2r%nr1t,exx_cus%fft_small%nr1t)+1
|
|
iqq_s=(iy_s-1)*exx_cus%fft_small%nrx1t+ix_s
|
|
exx_cus%r2s_xy(iqq,i,j)=iqq_s!XY correspondance only
|
|
enddo
|
|
enddo
|
|
enddo
|
|
enddo
|
|
enddo
|
|
|
|
allocate(exx_cus%s2r_xy(exx_cus%fft_small%nrx1t*exx_cus%fft_small%nrx2t,exx_cus%m(1),exx_cus%m(2)))
|
|
do jj=1,exx_cus%m(2)
|
|
do kk=1,exx_cus%m(1)
|
|
do iy_s=1,exx_cus%fft_small%nr2t
|
|
do ix_s=1,exx_cus%fft_small%nr1t
|
|
iy=mod(iy_s+exx_cus%fft_small%nr2t*(jj-1)-1,exx_cus%fft_g2r%nr2t)+1
|
|
ix=mod(ix_s+exx_cus%fft_small%nr1t*(kk-1)-1,exx_cus%fft_g2r%nr1t)+1
|
|
iqq=(iy-1)*exx_cus%fft_g2r%nrx1t+ix
|
|
iqq_s=(iy_s-1)*exx_cus%fft_small%nrx1t+ix_s
|
|
exx_cus%s2r_xy(iqq_s,kk,jj)=iqq
|
|
enddo
|
|
enddo
|
|
enddo
|
|
enddo
|
|
endif
|
|
!setup fac
|
|
|
|
allocate(exx_cus%fac(exx_cus%fft_r2g%ngmt))
|
|
|
|
do ig=1,exx_cus%fft_r2g%ngmt
|
|
|
|
qq_fact = exx_cus%fft_r2g%gt(1,ig)**2.d0 + exx_cus%fft_r2g%gt(2,ig)**2.d0 + exx_cus%fft_r2g%gt(3,ig)**2.d0
|
|
|
|
if (qq_fact > 1.d-8) then
|
|
exx_cus%fac(ig)=(e2*fpi/(tpiba2*qq_fact))*(1.d0-dcos(dsqrt(qq_fact)*exx_cus%truncation_radius*tpiba))
|
|
else
|
|
exx_cus%fac(ig)=e2*fpi*(exx_cus%truncation_radius**2.d0/2.d0)
|
|
|
|
endif
|
|
|
|
end do
|
|
exx_cus%fac(:)=exx_cus%fac(:)/omega
|
|
|
|
!put wfcs in real space
|
|
allocate(exx_cus%wfc(exx_cus%fft_g2r%nrxxt,num_nbndv_max,nspin))
|
|
|
|
allocate(state_fc_t(exx_cus%fft_g2r%npwt,num_nbndv_max))
|
|
allocate( evc_g( exx_cus%fft_g2r%ngmt_g ) )
|
|
|
|
do is=1,nspin
|
|
if(exx_cus%fft_g2r%dual_t==4.d0) then
|
|
state_fc_t(1:exx_cus%fft_g2r%npwt,1:exx_cus%nbndv(is))=ks_wfcs(1:exx_cus%fft_g2r%npwt,1:exx_cus%nbndv(is),is)
|
|
else
|
|
do ii=1,exx_cus%nbndv(is)
|
|
call mergewf(ks_wfcs(:,ii,is),evc_g,npw,ig_l2g,mpime,nproc,ionode_id,intra_pool_comm)
|
|
call splitwf(state_fc_t(:,ii),evc_g,exx_cus%fft_g2r%npwt,exx_cus%fft_g2r%ig_l2gt,&
|
|
&mpime,nproc,ionode_id,intra_pool_comm)
|
|
enddo
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
do ii=1,exx_cus%nbndv(is),2
|
|
psic(:)=(0.d0,0.d0)
|
|
if(ii==exx_cus%nbndv(is)) then
|
|
psic(exx_cus%fft_g2r%nlt(1:exx_cus%fft_g2r%npwt)) = state_fc_t(1:exx_cus%fft_g2r%npwt,ii)
|
|
psic(exx_cus%fft_g2r%nltm(1:exx_cus%fft_g2r%npwt)) = CONJG( state_fc_t(1:exx_cus%fft_g2r%npwt,ii) )
|
|
else
|
|
psic(exx_cus%fft_g2r%nlt(1:exx_cus%fft_g2r%npwt))=state_fc_t(1:exx_cus%fft_g2r%npwt,ii)+&
|
|
&(0.d0,1.d0)*state_fc_t(1:exx_cus%fft_g2r%npwt,ii+1)
|
|
psic(exx_cus%fft_g2r%nltm(1:exx_cus%fft_g2r%npwt)) = CONJG( state_fc_t(1:exx_cus%fft_g2r%npwt,ii) )+&
|
|
&(0.d0,1.d0)*CONJG( state_fc_t(1:exx_cus%fft_g2r%npwt,ii+1) )
|
|
endif
|
|
CALL cft3t( exx_cus%fft_g2r, psic, exx_cus%fft_g2r%nr1t, exx_cus%fft_g2r%nr2t, exx_cus%fft_g2r%nr3t, &
|
|
&exx_cus%fft_g2r%nrx1t, exx_cus%fft_g2r%nrx2t, exx_cus%fft_g2r%nrx3t, 2 )
|
|
exx_cus%wfc(1:exx_cus%fft_g2r%nrxxt,ii,is)= DBLE(psic(1:exx_cus%fft_g2r%nrxxt))
|
|
if(ii/=exx_cus%nbndv(1)) exx_cus%wfc(1:exx_cus%fft_g2r%nrxxt,ii+1,is)=&
|
|
&DIMAG(psic(1:exx_cus%fft_g2r%nrxxt))
|
|
enddo
|
|
enddo
|
|
!now put on the reduced grid
|
|
if(l_exchange_turbo) then
|
|
!find maximum
|
|
rz_start=0
|
|
rz_end =0
|
|
do ii=1,me_pool + 1
|
|
rz_start=rz_end+1
|
|
rz_end=rz_end+exx_cus%fft_g2r%dfftt%nr3p(ii)
|
|
end do
|
|
|
|
rz_start_s=0
|
|
rz_end_s=0
|
|
do ii=1,me_pool + 1
|
|
rz_start_s=rz_end_s+1
|
|
rz_end_s=rz_end_s+exx_cus%fft_small%dfftt%nr3p(ii)
|
|
end do
|
|
nr3small=rz_end_s-rz_start_s+1
|
|
allocate(exx_cus%wfc_red(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t,nr3small,exx_cus%m(3),num_nbndv_max,exx_cus%nspin))
|
|
|
|
allocate(z2proc_s(exx_cus%fft_small%nr3t))
|
|
allocate(z2proc(exx_cus%fft_g2r%nr3t))
|
|
|
|
j=0
|
|
k=0
|
|
do ii=1,nproc
|
|
j=k+1
|
|
k=k+exx_cus%fft_small%dfftt%nr3p(ii)
|
|
z2proc_s(j:k)=ii-1
|
|
end do
|
|
|
|
j=0
|
|
k=0
|
|
do ii=1,nproc
|
|
j=k+1
|
|
k=k+exx_cus%fft_g2r%dfftt%nr3p(ii)
|
|
z2proc(j:k)=ii-1
|
|
end do
|
|
|
|
|
|
allocate(plane(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t))
|
|
|
|
do k=1,exx_cus%n(3)
|
|
do iz=1,exx_cus%fft_g2r%nr3t
|
|
if(iz >= rz_start .and. iz <= rz_end) then
|
|
!if Z is mine determine owner and send it
|
|
iz_s=mod(iz-1+(k-1)*exx_cus%fft_g2r%nr3t,exx_cus%fft_small%nr3t)+1
|
|
iplane=(iz-1+(k-1)*exx_cus%fft_g2r%nr3t)/exx_cus%fft_small%nr3t+1
|
|
idest=z2proc_s(iz_s)
|
|
!put plane on small plane
|
|
if(me_pool==idest) then
|
|
do is=1,exx_cus%nspin
|
|
do jv=1,exx_cus%nbndv(is)
|
|
do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
exx_cus%wfc_red(iqq,iz_s-rz_start_s+1,iplane,jv,is)=&
|
|
&exx_cus%wfc((iz-rz_start)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+iqq,jv,is)
|
|
enddo
|
|
enddo
|
|
enddo
|
|
else
|
|
do is=1,exx_cus%nspin
|
|
do jv=1,exx_cus%nbndv(is)
|
|
do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
plane(iqq)=exx_cus%wfc((iz-rz_start)*(exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t)+iqq,jv,is)
|
|
enddo
|
|
#if defined(__MPI)
|
|
CALL MPI_ISEND( plane,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t, MPI_DOUBLE_PRECISION, &
|
|
&idest, iz, intra_pool_comm, req,IERR )
|
|
CALL MPI_WAIT(req,istatus,ierr)
|
|
#endif
|
|
enddo
|
|
enddo
|
|
endif
|
|
|
|
else
|
|
iz_s=mod(iz-1+(k-1)*exx_cus%fft_g2r%nr3t,exx_cus%fft_small%nr3t)+1
|
|
iplane=(iz-1+(k-1)*exx_cus%fft_g2r%nr3t)/exx_cus%fft_small%nr3t+1
|
|
!if Z o small cell is mine receive it
|
|
if(z2proc_s(iz_s)==me_pool) then
|
|
iorig=z2proc(iz)
|
|
do is=1,exx_cus%nspin
|
|
do jv=1,exx_cus%nbndv(is)
|
|
#if defined(__MPI)
|
|
CALL MPI_RECV( plane, exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t, MPI_DOUBLE_PRECISION, &
|
|
&iorig, iz, intra_pool_comm, istatus, IERR )
|
|
#endif
|
|
do iqq=1,exx_cus%fft_g2r%nrx1t*exx_cus%fft_g2r%nrx2t
|
|
exx_cus%wfc_red(iqq,iz_s-rz_start_s+1,iplane,jv,is)=plane(iqq)
|
|
enddo
|
|
|
|
enddo
|
|
enddo
|
|
|
|
endif
|
|
endif
|
|
enddo
|
|
enddo
|
|
deallocate(z2proc,z2proc_s,plane)
|
|
endif
|
|
deallocate(state_fc_t,evc_g)
|
|
|
|
if(exx_cus%l_localized) then
|
|
allocate(exx_cus%n_loc(num_nbndv_max,nspin))
|
|
allocate(exx_cus%tab_loc(exx_cus%fft_g2r%nrxxt,num_nbndv_max,nspin))!memory could be reduce here
|
|
do is=1,nspin
|
|
do iv=1,exx_cus%nbndv(is)
|
|
exx_cus%n_loc(iv,is)=0
|
|
do ir=1,exx_cus%fft_g2r%nrxxt
|
|
if((exx_cus%wfc(ir,iv,is)**2.d0>exx_cus%thrs_loc) )then
|
|
exx_cus%n_loc(iv,is)=exx_cus%n_loc(iv,is)+1
|
|
exx_cus%tab_loc(exx_cus%n_loc(iv,is),iv,is)=ir
|
|
endif
|
|
enddo
|
|
n_max=exx_cus%n_loc(iv,is)
|
|
call mp_sum(n_max,world_comm)
|
|
write(stdout,*) 'Using localized wfcs for exchange:',is,iv,n_max,&
|
|
&exx_cus%fft_g2r%nr1t*exx_cus%fft_g2r%nr2t*exx_cus%fft_g2r%nr3t
|
|
enddo
|
|
enddo
|
|
endif
|
|
|
|
CALL stop_clock('setup_exx')
|
|
return
|
|
END SUBROUTINE setup_exx_cus
|
|
|
|
SUBROUTINE free_memory_exx_cus(exx_cus)
|
|
|
|
IMPLICIT NONE
|
|
|
|
TYPE(exchange_cus) :: exx_cus
|
|
|
|
deallocate(exx_cus%wfc)
|
|
deallocate(exx_cus%fac)
|
|
if(l_exchange_turbo) then
|
|
deallocate(exx_cus%fac_s)
|
|
deallocate(exx_cus%r2s_xy)
|
|
deallocate(exx_cus%wfc_red)
|
|
deallocate(exx_cus%s2r_xy)
|
|
endif
|
|
if(exx_cus%l_localized) then
|
|
deallocate(exx_cus%n_loc)
|
|
deallocate(exx_cus%tab_loc)
|
|
endif
|
|
|
|
return
|
|
|
|
END SUBROUTINE free_memory_exx_cus
|
|
|
|
|
|
SUBROUTINE fock_cus(psi,xpsi,exx_cus)
|
|
!apply Fock operator to a wavefunction
|
|
|
|
USE io_global, ONLY : stdout, ionode, ionode_id
|
|
USE wvfct, ONLY : npwx, npw, nbnd
|
|
USE cell_base, ONLY: at, alat, tpiba, omega, tpiba2,bg
|
|
USE constants, ONLY : e2, pi, tpi, fpi, RYTOEV
|
|
USE wavefunctions, ONLY : psic
|
|
USE gvect
|
|
USE mp_pools, ONLY : intra_pool_comm
|
|
USE mp_world, ONLY : mpime, nproc
|
|
USE mp_wave, ONLY : mergewf,splitwf
|
|
|
|
|
|
IMPLICIT NONE
|
|
|
|
COMPLEX(kind=DP), INTENT(in) :: psi(npw)
|
|
COMPLEX(kind=DP), INTENT(inout) :: xpsi(npw)
|
|
TYPE(exchange_cus), INTENT(in) :: exx_cus
|
|
|
|
|
|
REAL(kind=DP), ALLOCATABLE :: prods_r(:,:),psi_r(:),prod_tot(:)
|
|
COMPLEX(kind=DP), ALLOCATABLE :: prods_g(:,:),evc_g(:), psi_t(:), prod_tot_g(:)
|
|
INTEGER :: iv,ig
|
|
INTEGER, ALLOCATABLE :: igkt(:)
|
|
|
|
CALL start_clock('fock')
|
|
|
|
allocate(prods_r(exx_cus%fft_g2r%nrxxt,exx_cus%nbndv(1)))
|
|
allocate(prod_tot(exx_cus%fft_g2r%nrxxt))
|
|
allocate(prods_g(exx_cus%fft_r2g%npwt,exx_cus%nbndv(1)))
|
|
allocate(prod_tot_g(exx_cus%fft_g2r%npwt))
|
|
allocate(psi_t(exx_cus%fft_g2r%npwt))
|
|
allocate(psi_r(exx_cus%fft_g2r%nrxxt))
|
|
|
|
!put psi on the g2r G grid
|
|
allocate( evc_g( exx_cus%fft_g2r%ngmt_g ) )
|
|
|
|
|
|
allocate( igkt( exx_cus%fft_g2r%npwt ) )
|
|
do ig=1,exx_cus%fft_g2r%npwt
|
|
igkt(ig)=ig
|
|
enddo
|
|
|
|
|
|
if(exx_cus%fft_g2r%dual_t==4.d0) then
|
|
psi_t(1:exx_cus%fft_g2r%npwt)=psi(1:exx_cus%fft_g2r%npwt)
|
|
else
|
|
call mergewf(psi,evc_g,npw,ig_l2g,mpime,nproc,ionode_id,intra_pool_comm)
|
|
call splitwf(psi_t(:),evc_g,exx_cus%fft_g2r%npwt,exx_cus%fft_g2r%ig_l2gt,&
|
|
&mpime,nproc,ionode_id,intra_pool_comm)
|
|
endif
|
|
|
|
|
|
|
|
!trasform psi to R space
|
|
|
|
psic(:)=(0.d0,0.d0)
|
|
|
|
psic(exx_cus%fft_g2r%nlt(1:exx_cus%fft_g2r%npwt)) = psi_t(1:exx_cus%fft_g2r%npwt)
|
|
psic(exx_cus%fft_g2r%nltm(1:exx_cus%fft_g2r%npwt)) = CONJG( psi_t(1:exx_cus%fft_g2r%npwt) )
|
|
|
|
CALL cft3t( exx_cus%fft_g2r, psic, exx_cus%fft_g2r%nr1t, exx_cus%fft_g2r%nr2t, exx_cus%fft_g2r%nr3t, &
|
|
&exx_cus%fft_g2r%nrx1t, exx_cus%fft_g2r%nrx2t, exx_cus%fft_g2r%nrx3t, 2 )
|
|
psi_r(1:exx_cus%fft_g2r%nrxxt)= DBLE(psic(1:exx_cus%fft_g2r%nrxxt))
|
|
|
|
|
|
|
|
!products with \Psi_v
|
|
do iv=1,exx_cus%nbndv(1)
|
|
prods_r(1:exx_cus%fft_g2r%nrxxt,iv)=psi_r(1:exx_cus%fft_g2r%nrxxt)*&
|
|
&exx_cus%wfc(1:exx_cus%fft_g2r%nrxxt,iv,1)
|
|
enddo
|
|
|
|
!to G r2G grid
|
|
do iv=1,exx_cus%nbndv(1),2
|
|
if(iv==exx_cus%nbndv(1)) then
|
|
psic(1:exx_cus%fft_r2g%nrxxt)=dcmplx(prods_r(1:exx_cus%fft_r2g%nrxxt,iv),0.d0)
|
|
else
|
|
psic(1:exx_cus%fft_r2g%nrxxt)=dcmplx(prods_r(1:exx_cus%fft_r2g%nrxxt,iv),prods_r(1:exx_cus%fft_r2g%nrxxt,iv+1))
|
|
endif
|
|
|
|
CALL cft3t( exx_cus%fft_r2g, psic, exx_cus%fft_r2g%nr1t, exx_cus%fft_r2g%nr2t, exx_cus%fft_r2g%nr3t, &
|
|
&exx_cus%fft_r2g%nrx1t, exx_cus%fft_r2g%nrx2t, exx_cus%fft_r2g%nrx3t, -2 )
|
|
if(iv==exx_cus%nbndv(1)) then
|
|
prods_g(1:exx_cus%fft_r2g%npwt, iv) = psic(exx_cus%fft_r2g%nlt(igkt(1:exx_cus%fft_r2g%npwt)))
|
|
else
|
|
prods_g(1:exx_cus%fft_r2g%npwt, iv)= 0.5d0*(psic(exx_cus%fft_r2g%nlt(igkt(1:exx_cus%fft_r2g%npwt)))+&
|
|
&conjg( psic(exx_cus%fft_r2g%nltm(igkt(1:exx_cus%fft_r2g%npwt)))))
|
|
prods_g(1:exx_cus%fft_r2g%npwt, iv+1)= (0.d0,-0.5d0)*(psic(exx_cus%fft_r2g%nlt(igkt(1:exx_cus%fft_r2g%npwt))) - &
|
|
&conjg(psic(exx_cus%fft_r2g%nltm(igkt(1:exx_cus%fft_r2g%npwt)))))
|
|
endif
|
|
enddo
|
|
|
|
|
|
|
|
!multiply with fac
|
|
do iv=1,exx_cus%nbndv(1)
|
|
prods_g(1:exx_cus%fft_r2g%npwt,iv)=prods_g(1:exx_cus%fft_r2g%npwt,iv)*exx_cus%fac(1:exx_cus%fft_r2g%npwt)
|
|
enddo
|
|
!to R r2G grid
|
|
|
|
do iv=1,exx_cus%nbndv(1),2
|
|
psic(:)=(0.d0,0.d0)
|
|
if(iv==exx_cus%nbndv(1)) then
|
|
psic(exx_cus%fft_r2g%nlt(1:exx_cus%fft_r2g%npwt)) = prods_g(1:exx_cus%fft_r2g%npwt,iv)
|
|
psic(exx_cus%fft_r2g%nltm(1:exx_cus%fft_r2g%npwt)) = CONJG( prods_g(1:exx_cus%fft_r2g%npwt,iv) )
|
|
else
|
|
psic(exx_cus%fft_r2g%nlt(1:exx_cus%fft_r2g%npwt))=prods_g(1:exx_cus%fft_r2g%npwt,iv)+&
|
|
&(0.d0,1.d0)*prods_g(1:exx_cus%fft_r2g%npwt,iv+1)
|
|
psic(exx_cus%fft_r2g%nltm(1:exx_cus%fft_r2g%npwt)) = CONJG( prods_g(1:exx_cus%fft_r2g%npwt,iv) )+&
|
|
&(0.d0,1.d0)*CONJG( prods_g(1:exx_cus%fft_r2g%npwt,iv+1) )
|
|
endif
|
|
CALL cft3t( exx_cus%fft_r2g, psic, exx_cus%fft_r2g%nr1t, exx_cus%fft_r2g%nr2t, exx_cus%fft_r2g%nr3t, &
|
|
&exx_cus%fft_r2g%nrx1t, exx_cus%fft_r2g%nrx2t, exx_cus%fft_r2g%nrx3t, 2 )
|
|
prods_r(1:exx_cus%fft_r2g%nrxxt,iv)= DBLE(psic(1:exx_cus%fft_r2g%nrxxt))
|
|
if(iv/=exx_cus%nbndv(1)) prods_r(1:exx_cus%fft_r2g%nrxxt,iv+1)=&
|
|
&DIMAG(psic(1:exx_cus%fft_r2g%nrxxt))
|
|
enddo
|
|
|
|
|
|
|
|
!products with \Psi_v
|
|
|
|
do iv=1,exx_cus%nbndv(1)
|
|
prods_r(1:exx_cus%fft_g2r%nrxxt,iv)= prods_r(1:exx_cus%fft_g2r%nrxxt,iv)*exx_cus%wfc(1:exx_cus%fft_g2r%nrxxt,iv,1)
|
|
enddo
|
|
|
|
!sum up
|
|
|
|
prod_tot=0.d0
|
|
do iv=1,exx_cus%nbndv(1)
|
|
prod_tot(1:exx_cus%fft_g2r%nrxxt)=prod_tot(1:exx_cus%fft_g2r%nrxxt)+prods_r(1:exx_cus%fft_g2r%nrxxt,iv)
|
|
enddo
|
|
|
|
!transform to G space g2r grid
|
|
|
|
|
|
psic(1:exx_cus%fft_g2r%nrxxt)=dcmplx(prod_tot(1:exx_cus%fft_g2r%nrxxt),0.d0)
|
|
|
|
CALL cft3t( exx_cus%fft_g2r, psic, exx_cus%fft_g2r%nr1t, exx_cus%fft_g2r%nr2t, exx_cus%fft_g2r%nr3t, &
|
|
&exx_cus%fft_g2r%nrx1t, exx_cus%fft_g2r%nrx2t, exx_cus%fft_g2r%nrx3t, -2 )
|
|
|
|
prod_tot_g(1:exx_cus%fft_g2r%npwt) = psic(exx_cus%fft_g2r%nlt(igkt(1:exx_cus%fft_g2r%npwt)))
|
|
|
|
|
|
|
|
!put in the order or wfcs
|
|
if(exx_cus%fft_g2r%dual_t==4.d0) then
|
|
xpsi(1:exx_cus%fft_g2r%npwt)=prod_tot_g(1:exx_cus%fft_g2r%npwt)
|
|
else
|
|
call mergewf(prod_tot_g,evc_g,exx_cus%fft_g2r%npwt,exx_cus%fft_g2r%ig_l2gt,mpime,nproc,ionode_id,intra_pool_comm)
|
|
call splitwf(xpsi,evc_g,npw,ig_l2g,mpime,nproc,ionode_id,intra_pool_comm)
|
|
endif
|
|
|
|
|
|
deallocate(prods_r,prods_g,prod_tot)
|
|
deallocate(evc_g,psi_t,psi_r,prod_tot_g)
|
|
deallocate(igkt)
|
|
|
|
CALL stop_clock('fock')
|
|
return
|
|
|
|
END SUBROUTINE fock_cus
|
|
|
|
SUBROUTINE fast_vexx(lda, n, m, psi, hpsi,exx_cus,exxalpha,ispin)
|
|
|
|
|
|
IMPLICIT NONE
|
|
|
|
INTEGER :: lda, n, m, nqi, myrank, mysize
|
|
COMPLEX(DP) :: psi(lda,m)
|
|
COMPLEX(DP) :: hpsi(lda,m)
|
|
TYPE(exchange_cus) :: exx_cus
|
|
REAL(kind=DP) :: exxalpha
|
|
INTEGER, INTENT(in) :: ispin
|
|
|
|
COMPLEX(kind=DP), ALLOCATABLE :: xpsi(:)
|
|
INTEGER :: ii
|
|
|
|
allocate(xpsi(lda))
|
|
do ii=1,m
|
|
if(.not.l_exchange_turbo) then
|
|
call fock_cus(psi(1,ii),xpsi,exx_cus)
|
|
else
|
|
call periodic_fock_cus(ispin,psi(1,ii),xpsi,exx_cus)
|
|
endif
|
|
hpsi(1:n,ii)=hpsi(1:n,ii)-exxalpha*xpsi(1:n)
|
|
enddo
|
|
deallocate(xpsi)
|
|
return
|
|
|
|
END SUBROUTINE fast_vexx
|
|
|
|
FUNCTION exchange_energy_fast(exx_cus,exxalpha)
|
|
|
|
USE wvfct, ONLY : npwx, npw, nbnd
|
|
USE wavefunctions, ONLY : evc
|
|
USE mp, ONLY : mp_sum
|
|
USE mp_world, ONLY : world_comm
|
|
USE gvect, ONLY : gstart
|
|
USE io_files, ONLY : prefix, tmp_dir, nwordwfc,iunwfc
|
|
|
|
IMPLICIT NONE
|
|
|
|
REAL(kind=DP) :: exchange_energy_fast
|
|
TYPE(exchange_cus) :: exx_cus
|
|
REAL(kind=DP) :: exxalpha
|
|
|
|
INTEGER :: ii,ig,is
|
|
COMPLEX(kind=DP), ALLOCATABLE :: psi(:,:),xpsi(:)
|
|
|
|
exchange_energy_fast=0.d0
|
|
allocate(xpsi(npwx),psi(npwx,nbnd))
|
|
|
|
do is=1,exx_cus%nspin
|
|
if(exx_cus%nspin==1) then
|
|
psi(1:npw,1:exx_cus%nbndv(is))=evc(1:npw,1:exx_cus%nbndv(is))
|
|
else
|
|
CALL davcio(psi,2*nwordwfc,iunwfc,is,-1)
|
|
endif
|
|
do ii=1,exx_cus%nbndv(is)
|
|
if(.not.l_exchange_turbo) then
|
|
call fock_cus(psi(:,ii),xpsi,exx_cus)
|
|
else
|
|
call periodic_fock_cus(is,psi(:,ii),xpsi,exx_cus)
|
|
endif
|
|
do ig=1,npw
|
|
exchange_energy_fast=exchange_energy_fast+2.d0*dble(psi(ig,ii)*conjg(xpsi(ig)))
|
|
enddo
|
|
if(gstart==2) exchange_energy_fast=exchange_energy_fast-dble(psi(1,ii)*conjg(xpsi(1)))
|
|
enddo
|
|
enddo
|
|
deallocate(xpsi,psi)
|
|
|
|
call mp_sum(exchange_energy_fast,world_comm)
|
|
if(exx_cus%nspin==1) then
|
|
exchange_energy_fast=-exchange_energy_fast*exxalpha*2.d0!the 2 is for spin ATTENZIONE
|
|
else
|
|
exchange_energy_fast=-exchange_energy_fast*exxalpha
|
|
endif
|
|
return
|
|
END FUNCTION exchange_energy_fast
|
|
|
|
subroutine dft_exchange_fast(ispin,nbnd_s,psi,exx_cus)
|
|
USE io_global, ONLY : stdout, ionode, ionode_id
|
|
USE wvfct, ONLY : npwx, npw, nbnd, wg
|
|
USE gvect
|
|
USE mp, ONLY : mp_sum
|
|
USE mp_world, ONLY : world_comm
|
|
|
|
implicit none
|
|
|
|
INTEGER, INTENT(in) :: ispin!spin channel
|
|
INTEGER, INTENT(in) :: nbnd_s!number of states
|
|
COMPLEX(kind=DP), INTENT(in) :: psi(npwx,nbnd_s)!wavefunctions
|
|
TYPE(exchange_cus), INTENT(in) :: exx_cus!descriptor of exchange
|
|
|
|
INTEGER :: ii,ig
|
|
COMPLEX(kind=DP), ALLOCATABLE :: xpsi(:,:)
|
|
REAL(kind=DP) :: sca
|
|
|
|
allocate(xpsi(npwx,nbnd_s))
|
|
!loop on states
|
|
|
|
do ii=1,nbnd_s
|
|
!apply X operator
|
|
if(.not.l_exchange_turbo) then
|
|
call fock_cus(psi(:,ii),xpsi(:,ii),exx_cus)
|
|
else
|
|
call periodic_fock_cus(ispin,psi(:,ii),xpsi(:,ii),exx_cus)
|
|
endif
|
|
enddo
|
|
|
|
!calculate overlap
|
|
do ii=1,nbnd_s
|
|
sca=0.d0
|
|
do ig=1,npw
|
|
sca=sca+2.d0*dble(psi(ig,ii)*conjg(xpsi(ig,ii)))
|
|
enddo
|
|
if(gstart==2) sca=sca-dble(psi(1,ii)*conjg(xpsi(1,ii)))
|
|
call mp_sum(sca,world_comm)
|
|
write(stdout,*) 'EXCHANGE FAST',ii, sca
|
|
enddo
|
|
FLUSH(stdout)
|
|
deallocate(xpsi)
|
|
return
|
|
end subroutine dft_exchange_fast
|
|
END MODULE exchange_custom
|