quantum-espresso/GWW/pw4gww/pola_lanczos.f90

2701 lines
85 KiB
Fortran

!
! Copyright (C) 2001-2013 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
!
!routines for the calculation of the polarization
!lanczos-style
!ONLY FOR NORMCONSERVING PSEUDOS !!!!!
subroutine pola_basis_lanczos(n_set,nstates,numpw, nsteps,ispin)
!this subroutine calculates the basis for every v
!the minimal orthonormal basis for the w_v(r)*w^P'_i(r) products
USE io_global, ONLY : stdout, ionode, ionode_id
USE io_files, ONLY : prefix, tmp_dir, diropn
USE kinds, ONLY : DP
USE wannier_gw
USE gvect
USE constants, ONLY : e2, pi, tpi, fpi
USE cell_base, ONLY: at, alat, tpiba, omega, tpiba2
USE wvfct, ONLY : npwx, npw, nbnd
USE gvecw, ONLY : ecutwfc
USE wavefunctions, ONLY : evc, psic
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : world_comm, mpime, nproc
USE mp_pools, ONLY : intra_pool_comm
USE gvecs, ONLY : doublegrid
USE fft_custom_gwl
USE mp_wave, ONLY : mergewf,splitwf
USE fft_base, ONLY : dfftp, dffts
USE fft_interfaces, ONLY : fwfft, invfft
implicit none
INTEGER, EXTERNAL :: find_free_unit
INTEGER, INTENT(in) :: n_set !defines the number of states to be read from disk at the same tim\e
INTEGER, INTENT(in) :: nstates!number of orthonormal states to retain
INTEGER, INTENT(in) :: numpw!dimension of polarization basis
INTEGER, INTENT(in) :: nsteps!number of lanczos steps
INTEGER, INTENT(in) :: ispin! spin channel
INTEGER :: iv,iw,ig,ii,jj
REAL(kind=DP), ALLOCATABLE :: wv_real(:),tmp_r(:),tmp_r2(:)
COMPLEX(kind=DP), ALLOCATABLE :: tmp_g(:), wp_prod(:,:,:)
INTEGER :: iungprod,iunrprod, iungresult,iuntmat
LOGICAL :: exst
REAL(kind=DP), ALLOCATABLE :: omat(:,:),omat_hold(:,:)
REAL(kind=DP), ALLOCATABLE :: eigen(:),work(:)
INTEGER :: lwork,info,liwork
COMPLEX(kind=DP), ALLOCATABLE :: wp_g(:,:)!product terms in g wfc grid
COMPLEX(kind=DP), ALLOCATABLE :: wp_g_t(:,:)!
REAL(kind=DP), ALLOCATABLE :: t_mat(:,:),t_mat_hold(:,:), t_mat_hold2(:,:)
CHARACTER(4) :: nfile
LOGICAL :: l_reduce_io=.true.!if true reduces io
COMPLEX(kind=DP), ALLOCATABLE :: p_basis(:,:)!polarizability basis
LOGICAL :: l_dsyevr=.true.!if true uses dsyevr
REAL(kind=DP), ALLOCATABLE :: vectors(:,:)!for dsyevr
INTEGER, ALLOCATABLE :: iwork(:), ifail(:)
INTEGER, ALLOCATABLE :: isuppz(:)
INTEGER :: n_found
LOGICAL :: l_fft_custom=.false.!if true uses custom fft grid
COMPLEX(kind=DP), ALLOCATABLE :: evc_t(:,:),p_basis_t(:,:)
COMPLEX(kind=DP), ALLOCATABLE :: evc_g(:)
LOGICAL :: l_sumrule=.false.!if true imposes the sum rule over the norm of Pc|\Phi_\mu\Psi_v> for each of them
REAL(kind=DP), ALLOCATABLE :: norms(:)
REAL(kind=DP) :: norm_t, c_norm,norm
REAL(kind=DP), ALLOCATABLE :: p_basis_r(:,:) !polarizabilty basis in real custom space
INTEGER :: ivv,nbuf
REAL(kind=DP) :: vl,vu
INTEGER :: il,iu
REAL(kind=DP), ALLOCATABLE :: t_eigen_hold(:)
TYPE(fft_cus) :: fc
write(stdout,*) 'Routine pola_basis_lanczos'
FLUSH(stdout)
fc%ecutt=ecutwfc
fc%dual_t=dual_vt
if(l_verbose) write(stdout,*) 'Call initialize_fft_custom'
FLUSH(stdout)
call initialize_fft_custom(fc)
allocate(evc_g(fc%ngmt_g))
allocate(wv_real(fc%nrxxt))
allocate(norms(numpw))
!read w^P'_i on file on real space
!open product of wanniers filed
iungprod = find_free_unit()
CALL diropn( iungprod, 'wiwjwfc_red', max_ngm*2, exst )
if(.not.l_reduce_io) then
iunrprod = find_free_unit()
CALL diropn( iunrprod, 'wiwjwfc_red_r', dfftp%nnr, exst )
endif
iungresult = find_free_unit()
CALL diropn( iungresult, 'vw_lanczos',npw*2, exst)
if(.not.l_reduce_io) then
allocate(tmp_g(max_ngm),tmp_r(dfftp%nnr))
do iw=1,numpw
call davcio(tmp_g,max_ngm*2,iungprod,iw,-1)
!trasform to r-space
psic(:)=(0.d0,0.d0)
do ig=1,max_ngm
psic(dfftp%nl(ig))=tmp_g(ig)
psic(dfftp%nlm(ig))=CONJG(tmp_g(ig))
enddo
CALL invfft ('Rho', psic, dfftp)
tmp_r(:)=dble(psic(:))
call davcio(tmp_r,dfftp%nnr,iunrprod,iw,1)
enddo
deallocate(tmp_g,tmp_r)
close(iunrprod)
else
!read polarizability basis functions
allocate(p_basis(max_ngm,numpw))
do iw=1,numpw
call davcio(p_basis(:,iw),max_ngm*2,iungprod,iw,-1)
enddo
endif
close(iungprod)
if(l_verbose) write(stdout,*) 'pola_basis_lanczos 1'
FLUSH(stdout)
!now polarizability basis are put on the ordering of the redueced grid, if required
allocate(p_basis_t(fc%npwt,numpw))
if(fc%dual_t==4.d0) then
p_basis_t(:,:)=p_basis(:,:)
else
call reorderwfp_col(numpw,npw,fc%npwt,p_basis(1,1),p_basis_t(1,1), npw,fc%npwt, &
& ig_l2g,fc%ig_l2gt,fc%ngmt_g,mpime, nproc,intra_pool_comm )
!do ii=1,numpw
! call mergewf(p_basis(:,ii),evc_g,npw,ig_l2g,mpime,nproc,ionode_id,intra_pool_comm)
! call splitwf(p_basis_t(:,ii),evc_g,fc%npwt,fc%ig_l2gt,mpime,nproc,ionode_id,intra_pool_comm)
!enddo
endif
!trasform to real space
allocate(p_basis_r(fc%nrxxt,numpw))
do ii=1,numpw,2
psic(:)=(0.d0,0.d0)
if(ii==numpw) then
psic(fc%nlt(1:fc%npwt)) = p_basis_t(1:fc%npwt,ii)
psic(fc%nltm(1:fc%npwt)) = CONJG( p_basis_t(1:fc%npwt,ii) )
else
psic(fc%nlt(1:fc%npwt))=p_basis_t(1:fc%npwt,ii)+(0.d0,1.d0)*p_basis_t(1:fc%npwt,ii+1)
psic(fc%nltm(1:fc%npwt)) = CONJG( p_basis_t(1:fc%npwt,ii) )+(0.d0,1.d0)*CONJG( p_basis_t(1:fc%npwt,ii+1) )
endif
CALL cft3t(fc, psic, fc%nr1t, fc%nr2t, fc%nr3t, fc%nrx1t, fc%nrx2t, fc%nrx3t, 2 )
p_basis_r(1:fc%nrxxt,ii)= DBLE(psic(1:fc%nrxxt))
if(ii/=numpw) p_basis_r(1:fc%nrxxt,ii+1)= DIMAG(psic(1:fc%nrxxt))
enddo
!now valence wavefunctions are put on the ordering of the reduced grid
allocate(evc_t(fc%npwt,num_nbndv(ispin)))
if(fc%dual_t==4.d0) then
evc_t(:,1:num_nbndv(ispin))=evc(:,1:num_nbndv(ispin))
else
call reorderwfp_col(num_nbndv(ispin),npw,fc%npwt,evc(1,1),evc_t(1,1), npw,fc%npwt, &
& ig_l2g,fc%ig_l2gt,fc%ngmt_g,mpime, nproc,intra_pool_comm )
endif
!loop on v
allocate(tmp_r(fc%nrxxt),tmp_r2(fc%nrxxt))
allocate(omat(numpw,numpw),omat_hold(numpw,numpw))
allocate(t_mat(numpw,nstates), t_mat_hold(numpw,nstates), t_mat_hold2(numpw,nstates))
allocate(wp_g(npw,nstates))
allocate(wp_g_t(fc%npwt,nstates))
allocate(t_eigen_hold(nstates))
nbuf=min(5,nproc)
allocate(wp_prod(fc%npwt,numpw,nbuf))
do ivv=1,num_nbndv(ispin),nbuf
!put iv on real space
do iv=ivv,min(ivv+nbuf-1,num_nbndv(ispin))
psic(:)=(0.d0,0.d0)
psic(fc%nlt(1:fc%npwt)) = evc_t(1:fc%npwt,iv)
psic(fc%nltm(1:fc%npwt)) = CONJG( evc_t(1:fc%npwt,iv) )
CALL cft3t(fc, psic, fc%nr1t, fc%nr2t, fc%nr3t, fc%nrx1t, fc%nrx2t, fc%nrx3t, 2 )
wv_real(1:fc%nrxxt)= DBLE(psic(1:fc%nrxxt))
!!loop on products of wanniers
if(.not.l_reduce_io) then
iunrprod = find_free_unit()
CALL diropn( iunrprod, 'wiwjwfc_red_r', dfftp%nnr, exst )
endif
! allocate(tmp_r(fc%nrxxt))
if(l_verbose) write(stdout,*) 'do fft'
FLUSH(stdout)
do ii=1,numpw,2
!!read n_set w^P'_i from disk
if(.not.l_reduce_io) then
call davcio(tmp_r,dfftp%nnr,iunrprod,ii,-1)
write(stdout,*) 'ERROR l_reduce_io must be true'
FLUSH(stdout)
stop
endif
tmp_r(1:fc%nrxxt)=p_basis_r(1:fc%nrxxt,ii)*wv_real(1:fc%nrxxt)
if(ii/=numpw) then
tmp_r2(1:fc%nrxxt)=p_basis_r(1:fc%nrxxt,ii+1)*wv_real(1:fc%nrxxt)
else
tmp_r2(1:fc%nrxxt)=0.d0
endif
!!form products with w_v and trasfrom in G space
psic(1:fc%nrxxt)=dcmplx(tmp_r(1:fc%nrxxt),tmp_r2(1:fc%nrxxt))
CALL cft3t(fc, psic, fc%nr1t, fc%nr2t, fc%nr3t, fc%nrx1t, fc%nrx2t, fc%nrx3t, -2 )
if(ii==numpw) then
wp_prod(1:fc%npwt, ii,iv-ivv+1) = psic(fc%nlt(1:fc%npwt))
else
wp_prod(1:fc%npwt, ii,iv-ivv+1)= 0.5d0*(psic(fc%nlt(1:fc%npwt))+conjg( psic(fc%nltm(1:fc%npwt))))
wp_prod(1:fc%npwt, ii+1,iv-ivv+1)= (0.d0,-0.5d0)*(psic(fc%nlt(1:fc%npwt)) - conjg(psic(fc%nltm(1:fc%npwt))))
endif
enddo
if(l_verbose) write(stdout,*) 'do pc_operator'
FLUSH(stdout)
call pc_operator_t_m(numpw,wp_prod(1,1,iv-ivv+1),evc_t,ispin, fc)
if(l_verbose) write(stdout,*) 'calculate omat'
FLUSH(stdout)
if(.not.l_reduce_io) close(iunrprod)
!!calculate overlap matrix
call dgemm('T','N',numpw,numpw,2*fc%npwt,2.d0,wp_prod(1,1,iv-ivv+1),2*fc%npwt,&
&wp_prod(1,1,iv-ivv+1),2*fc%npwt,0.d0,omat,numpw)
if(fc%gstart_t==2) then
do ii=1,numpw
do jj=1,numpw
omat(jj,ii)=omat(jj,ii)-dble(conjg(wp_prod(1,jj,iv-ivv+1))*wp_prod(1,ii,iv-ivv+1))
enddo
enddo
endif
do ii=1,numpw
call mp_sum(omat(1:numpw,ii), world_comm)
enddo
!set up norms
! do ii=1,numpw
! norms(ii)=omat(ii,ii)
! enddo
if(iv-ivv==mpime) then
omat_hold(:,:)=omat(:,:)
endif
enddo
!!
!!solve eigen/values vector problem
!!
if(l_verbose) write(stdout,*) 'solve eig'
FLUSH(stdout)
FLUSH(stdout)
do iv=ivv,min(ivv+nbuf-1,num_nbndv(ispin))
if(l_verbose) write(stdout,*) 'solve eig', iv
FLUSH(stdout)
if(iv-ivv==mpime) then
if(.not.l_dsyevr) then
allocate(eigen(numpw))
allocate(work(1))
call DSYEV( 'V', 'U', numpw, omat_hold, numpw, eigen, work, -1, info )
lwork=work(1)
deallocate(work)
allocate(work(lwork))
call DSYEV( 'V', 'U', numpw, omat_hold, numpw, eigen, work, lwork, info )
deallocate(work)
if(info/=0) then
write(stdout,*) 'ROUTINE pola_basis_lanczos, INFO:', info
stop
endif
! do iw=1,numpw
! write(stdout,*) 'EIGEN:',iv,iw, eigen(iw)
! enddo
! FLUSH(stdout)
else
if(l_verbose) write(stdout,*) 'ATT1'
FLUSH(stdout)
allocate(eigen(numpw))
allocate(vectors(numpw,nstates))
allocate(isuppz(2*nstates))
allocate(work(1),iwork(1))
if(l_verbose) write(stdout,*) 'ATT2'
FLUSH(stdout)
call DSYEVR('V','I','U',numpw,omat_hold,numpw,0.d0,0.d0,numpw-nstates+1,numpw,0.d0,n_found,eigen,&
& vectors,numpw,isuppz,work, -1,iwork,-1, info)
lwork=work(1)
liwork=iwork(1)
deallocate(work,iwork)
allocate(work(lwork))
allocate(iwork(liwork))
if(l_verbose) write(stdout,*) 'ATT3',numpw,nstates,size(omat_hold(:,1)),size(omat_hold(1,:)),lwork,liwork
FLUSH(stdout)
vl=0.d0
vu=0.d0
il=numpw-nstates+1
iu=numpw
call DSYEVR('V','I','U',numpw,omat_hold,numpw,vl,vu,il,iu,0.d0,n_found,eigen,&
& vectors,numpw,isuppz,work,lwork,iwork,liwork, info)
if(info/=0) then
write(stdout,*) 'ROUTINE pola_lanczos DSYEVR, INFO:', info
stop
endif
if(l_verbose) write(stdout,*) 'ATT4'
FLUSH(stdout)
deallocate(isuppz)
deallocate(work,iwork)
do iw=1,nstates,nstates-1
write(stdout,*) 'EIGEN T LOCAL:',iv,iw, eigen(iw)
enddo
FLUSH(stdout)
endif
!!find transformation matrix and write on disk
!
if(l_verbose) write(stdout,*) 'pola_basis_lanczos t_mat'
FLUSH(stdout)
if(.not.l_dsyevr) then
do ii=1,nstates
do jj=1,numpw
t_mat_hold(jj,ii)=omat_hold(jj,numpw-ii+1)*(dsqrt(eigen(numpw-ii+1)))
enddo
t_eigen_hold(ii)=eigen(numpw-ii+1)
enddo
else
do ii=1,nstates
do jj=1,numpw
t_mat_hold(jj,ii)=vectors(jj,ii)*(dsqrt(eigen(ii)))
enddo
t_eigen_hold(ii)=eigen(ii)
enddo
endif
!!find liner dependent products
if(.not.l_dsyevr) then
do ii=1,nstates
t_mat_hold2(:,ii)=omat_hold(:,numpw-ii+1)*(1.d0/dsqrt(eigen(numpw-ii+1)))
enddo
else
do ii=1,nstates
t_mat_hold2(:,ii)=vectors(:,ii)*(1.d0/dsqrt(eigen(ii)))
enddo
endif
deallocate(eigen)
if(l_dsyevr) deallocate(vectors)
endif
enddo
allocate(eigen(nstates))
do iv=ivv,min(ivv+nbuf-1,num_nbndv(ispin))
if(iv-ivv == mpime) then
t_mat(:,:)=t_mat_hold(:,:)
eigen(1:nstates)=t_eigen_hold(1:nstates)
endif
call mp_bcast(t_mat,iv-ivv,world_comm)
call mp_bcast(eigen(1:nstates),iv-ivv,world_comm)
!if required imposes sum rule
! if(l_sumrule) then
! norm_t=0.d0
! do jj=1,numpw
! do ii=1,nstates
! norm_t=norm_t+t_mat(jj,ii)**2.d0
! enddo
! enddo
! norm=0.d0
! do jj=1,numpw
! norm=norm+norms(jj)
! enddo
! c_norm=dsqrt(norm/norm_t)
! write(stdout,*) 'Sum rule:',c_norm
! t_mat(:,:)=t_mat(:,:)*c_norm
! endif
if(ionode) then
iuntmat = find_free_unit()
write(nfile,'(4i1)') iv/1000,mod(iv,1000)/100,mod(iv,100)/10,mod(iv,10)
if(ispin==1) then
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_mat_lanczos'//nfile, status='unknown',form='unformatted')
else
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_mat_lanczos2'//nfile, status='unknown',form='unformatted')
endif
write(iuntmat) iv
write(iuntmat) num_nbndv(ispin)
write(iuntmat) numpw
write(iuntmat) nstates
do ii=1,nstates
write(iuntmat) t_mat(1:numpw,ii)
enddo
close(iuntmat)
endif
!write on disk file with eigen values
if(ionode) then
iuntmat = find_free_unit()
write(nfile,'(4i1)') iv/1000,mod(iv,1000)/100,mod(iv,100)/10,mod(iv,10)
if(ispin==1) then
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_eig_lanczos'//nfile, status='unknown',form='unformatted')
else
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_eig_lanczos2'//nfile, status='unknown',form='unformatted')
endif
write(iuntmat) nstates
write(iuntmat) eigen(1:nstates)
close(iuntmat)
endif
if(l_verbose) write(stdout,*) 'pola_basis update wp_g'
FLUSH(stdout)
!!find liner dependent products
if(iv-ivv == mpime) then
t_mat(:,:)=t_mat_hold2(:,:)
endif
call mp_bcast(t_mat,iv-ivv,world_comm)
if(l_verbose) write(stdout,*) 'pola_basis update wp_g dgemm'
FLUSH(stdout)
call dgemm('N','N',2*fc%npwt,nstates,numpw,1.d0,wp_prod(1,1,iv-ivv+1),2*fc%npwt,t_mat,numpw,0.d0,wp_g_t,2*fc%npwt)
write(stdout,*) 'pola_basis update merge-split',iv,ivv
FLUSH(stdout)
!put the correct order
if(fc%dual_t==4.d0) then
wp_g(:,:)=wp_g_t(:,:)
else
call reorderwfp_col(nstates,fc%npwt,npw,wp_g_t(1,1),wp_g(1,1),fc%npwt,npw, &
& fc%ig_l2gt,ig_l2g,fc%ngmt_g,mpime, nproc,intra_pool_comm )
!do ii=1,nstates
! call mergewf(wp_g_t(:,ii),evc_g,fc%npwt,fc%ig_l2gt,mpime,nproc,ionode_id,intra_pool_comm)
! call splitwf(wp_g(:,ii),evc_g,npw,ig_l2g,mpime,nproc,ionode_id,intra_pool_comm)
!enddo
endif
if(l_verbose) write(stdout,*) 'pola_basis update davcio',iv
FLUSH(stdout)
!!write on disk
do ii=1,nstates
call davcio(wp_g(:,ii),npw*2,iungresult,ii+(iv-1)*nstates,1)
enddo
if(l_verbose) write(stdout,*) 'pola_basis update done'
FLUSH(stdout)
enddo
deallocate(eigen)
enddo
deallocate(t_mat,t_mat_hold,t_mat_hold2)
deallocate(omat,omat_hold,tmp_r,tmp_r2,p_basis_r)
deallocate(wp_g,wp_g_t)
close(iungresult)
deallocate(norms)
deallocate(wv_real,wp_prod)
deallocate(t_eigen_hold)
if(l_verbose) write(stdout,*) 'Exiting pola_basis_lanczos'
FLUSH(stdout)
if(l_reduce_io) deallocate(p_basis)
deallocate(p_basis_t,evc_t)
if(l_verbose) write(stdout,*) 'Call deallocate_fft_custom'
FLUSH(stdout)
deallocate(evc_g)
call deallocate_fft_custom(fc)
return
end subroutine pola_basis_lanczos
subroutine pc_operator(state,ispin,l_cond)
!this operator project the wavefunction state on the conduction
!subspace, the valence wavefunction are in evc
!ONLY FOR GAMMA POINT NOW!!!!
USE io_global, ONLY : stdout
USE kinds, ONLY : DP
USE gvect
USE wvfct, ONLY : npwx, npw, nbnd
USE wavefunctions, ONLY : evc, psic
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : world_comm
USE wannier_gw, ONLY : num_nbndv,num_nbnds
implicit none
COMPLEX(kind=DP), INTENT(inout) :: state(npw)!state to be projected
INTEGER, INTENT(in) :: ispin!spin channel
LOGICAL :: l_cond!if true project out alson conduction states till num_nbnds
INTEGER :: iv,ig
REAL(kind=DP), ALLOCATABLE :: prod(:)
if(.not.l_cond) then
if(num_nbndv(ispin)==0) return
allocate(prod(num_nbndv(ispin)))
call dgemm('T','N', num_nbndv(ispin),1,2*npw,2.d0,evc,2*npwx,state,2*npw,&
& 0.d0,prod,num_nbndv(ispin))
do iv=1,num_nbndv(ispin)
if(gstart==2) prod(iv)=prod(iv)-dble(conjg(evc(1,iv))*state(1))
enddo
call mp_sum(prod(:), world_comm)
call dgemm('N','N',2*npw,1,num_nbndv(ispin),-1.d0,evc,2*npwx,prod,&
&num_nbndv(ispin),1.d0,state,2*npw)
else
allocate(prod(num_nbnds))
call dgemm('T','N', num_nbnds,1,2*npw,2.d0,evc,2*npwx,state,2*npw,&
& 0.d0,prod,num_nbnds)
do iv=1,num_nbnds
if(gstart==2) prod(iv)=prod(iv)-dble(conjg(evc(1,iv))*state(1))
enddo
call mp_sum(prod(:), world_comm)
call dgemm('N','N',2*npw,1,num_nbnds,-1.d0,evc,2*npwx,prod,&
&num_nbnds,1.d0,state,2*npw)
endif
deallocate(prod)
return
end subroutine pc_operator
subroutine pc_operator_t(state,evc_t,ispin, fc)
!this operator project the wavefunction state on the conduction
!subspace, the valence wavefunction are in evc
!ONLY FOR GAMMA POINT NOW!!!!
USE io_global, ONLY : stdout
USE kinds, ONLY : DP
USE gvect
USE wvfct, ONLY : npwx, npw, nbnd
USE wavefunctions, ONLY : evc, psic
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : world_comm
USE wannier_gw, ONLY : num_nbndv
USE fft_custom_gwl
USE fft_base, ONLY : dfftp, dffts
USE fft_interfaces, ONLY : fwfft, invfft
implicit none
TYPE(fft_cus), INTENT(in) :: fc
COMPLEX(kind=DP), INTENT(inout) :: state(fc%npwt)!state to be projected
! COMPLEX(kind=DP), INTENT(inout) :: evc_t(fc%npwt,num_nbndv(ispin))
! above syntax not accepted by all compilers
COMPLEX(kind=DP), INTENT(inout) :: evc_t(fc%npwt,*)!valence states
INTEGER, INTENT(in) :: ispin!spin channel
INTEGER :: iv,ig
REAL(kind=DP), ALLOCATABLE :: prod(:)
allocate(prod(num_nbndv(ispin)))
call dgemm('T','N', num_nbndv(ispin),1,2*fc%npwt,2.d0,evc_t,2*fc%npwt,state,2*fc%npwt,&
& 0.d0,prod,num_nbndv(ispin))
do iv=1,num_nbndv(ispin)
if(fc%gstart_t==2) prod(iv)=prod(iv)-dble(conjg(evc_t(1,iv))*state(1))
enddo
call mp_sum(prod(:), world_comm)
call dgemm('N','N',2*fc%npwt,1,num_nbndv(ispin),-1.d0,evc_t,2*fc%npwt,prod,&
&num_nbndv(ispin),1.d0,state,2*fc%npwt)
deallocate(prod)
return
end subroutine pc_operator_t
subroutine lanczos_state(zstates, nstates, itype, nsteps,istate,ispin)
!this subroutine perform nsteps collective lanczos iterations
!on orthonormal zstates state
!GAMMA POINT ONLY!!!
USE io_global, ONLY : stdout, ionode, ionode_id
USE io_files, ONLY : prefix, tmp_dir
USE kinds, ONLY : DP
USE wannier_gw
USE gvect
USE constants, ONLY : e2, pi, tpi, fpi
USE cell_base, ONLY: at, alat, tpiba, omega, tpiba2
USE wvfct, ONLY : g2kin, npwx, npw, nbnd
USE wavefunctions, ONLY : evc, psic
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : mpime, nproc, world_comm
USE gvecs, ONLY : doublegrid
USE g_psi_mod, ONLY : h_diag, s_diag
USE becmod, ONLY : becp,allocate_bec_type,deallocate_bec_type
USE uspp, ONLY : vkb, nkb, okvan
USE klist, ONLY : xk,igk_k
USE noncollin_module, ONLY : noncolin, npol
USE fft_base, ONLY : dfftp, dffts
USE fft_interfaces, ONLY : fwfft, invfft
implicit none
INTEGER, EXTERNAL :: find_free_unit
COMPLEX(kind=DP), INTENT(in) :: zstates(npw,nstates)!states for starting lanczos chains
INTEGER, INTENT(in) :: nstates!number of states
INTEGER, INTENT(in) :: itype!matrices to be saved: 0 for polarization; 1 for self-energy; 2 for other uses
INTEGER, INTENT(in) :: nsteps!number of Lanczos iteration to be performed
INTEGER, INTENT(in) :: istate!corresponding KS state(for labelling output files)
INTEGER, INTENT(in) :: ispin!spin channel 1,2
COMPLEX(kind=DP), ALLOCATABLE :: psi_1(:,:),psi_2(:,:),psi_3(:,:)
COMPLEX(kind=DP), ALLOCATABLE :: u_0(:,:),u_1(:,:)
REAL(kind=DP), ALLOCATABLE :: alpha(:),beta(:),gamma(:), n_1(:)
REAL(kind=DP), ALLOCATABLE :: d(:,:),f(:,:),c(:)
REAL(kind=DP), ALLOCATABLE :: omat(:,:)!overlap with intermediate lanczos states to be saved on disk for each iteration!
CHARACTER(4) :: nfile
INTEGER :: is,ig,ii,jj,it
INTEGER :: iunlan
LOGICAL :: omat_div = .false.
INTEGER :: l_blk,nbegin,nend,nsize
INTEGER :: ip,nbegin_ip, nend_ip, nsize_ip
REAL(kind=DP), ALLOCATABLE :: omat_tot(:,:,:)
INTEGER :: iv
! allocate(alpha(nstates),beta(nstates),gamma(nstates),n_1(nstates))
! allocate(d(nsteps,nstates),f(nsteps,nstates),c(nstates))
!if omat_div is true distribute memory on MPI tasks, but it is slower
if(nstates < 1600) then
omat_div=.false.
else
omat_div=.true.
endif
call start_clock('lanczos_state')
!to decide whether to calculate the overlap matrix column by column or distributed on processors
if(omat_div) then
l_blk= (nstates)/nproc
if(l_blk*nproc < (nstates)) l_blk = l_blk+1
nbegin=mpime*l_blk+1
nend=nbegin+l_blk-1
if(nend > nstates) nend=nstates
nsize=nend-nbegin+1
allocate(omat(nstates,l_blk))
allocate(omat_tot(nstates,l_blk,nsteps))
else
l_blk=nstates
allocate(omat(nstates,nstates))
endif
allocate(psi_1(npw,l_blk),psi_2(npw,l_blk),psi_3(npw,l_blk))
allocate(u_0(npw,l_blk),u_1(npw,l_blk))
allocate(alpha(l_blk),beta(l_blk),gamma(l_blk),n_1(l_blk))
allocate(d(nsteps,nstates),f(nsteps,nstates),c(l_blk))
d(:,:)=0.d0
f(:,:)=0.d0
!loop on l_blk
ALLOCATE( h_diag( npwx,npol ) )
ALLOCATE( s_diag( npwx,npol ) )
call allocate_bec_type ( nkb, l_blk, becp)
IF ( nkb > 0 ) CALL init_us_2( npw, igk_k(1,1), xk(1,1), vkb )
g2kin(1:npw) = ( (g(1,igk_k(1:npw,1)) )**2 + &
( g(2,igk_k(1:npw,1)) )**2 + &
( g(3,igk_k(1:npw,1)) )**2 ) * tpiba2
ip=0
do iv=1,nstates,l_blk
nbegin_ip=iv
nend_ip=min(nbegin_ip+l_blk-1,nstates)
nsize_ip=nend_ip-nbegin_ip+1
!first step
psi_1(1:npw,1:nsize_ip)=zstates(1:npw,nbegin_ip:nend_ip)
!for h_psi allocations are required
write(stdout,*) 'lanczos_state:', istate,ispin
FLUSH(stdout)
!calculate H|\phi_i>
call h_psi( npw, npw, nsize_ip,psi_1(1,1), u_0 )
if(l_selfconsistent) call h_psi_self( npw, npw, nsize_ip,psi_1(1,1), u_0 )
if(l_scissor) call h_psi_scissor(ispin, npw, npw, nsize_ip,psi_1(1,1), u_0 )
!calculate n_1
n_1(1:nsize_ip)=0.d0
do is=1,nsize_ip
do ig=1,npw
n_1(is)=n_1(is)+2.d0*dble(conjg(u_0(ig,is))*u_0(ig,is))
enddo
if(gstart==2) n_1(is)=n_1(is)-dble(conjg(u_0(1,is))*u_0(1,is))
enddo
call mp_sum(n_1(1:nsize_ip), world_comm)
n_1(1:nsize_ip)=dsqrt(n_1(1:nsize_ip))
!calculate alpha
alpha(1:nsize_ip)=0.d0
do is=1,nsize_ip
do ig=1,npw
alpha(is)=alpha(is)+2.d0*dble(conjg(psi_1(ig,is))*u_0(ig,is))
enddo
if(gstart==2) alpha(is)=alpha(is)-dble(conjg(psi_1(1,is))*u_0(1,is))
enddo
call mp_sum(alpha(1:nsize_ip), world_comm)
alpha(1:nsize_ip)=alpha(1:nsize_ip)/n_1(1:nsize_ip)
!calculate psi_2 and beta
do is=1,nsize_ip
psi_2(:,is)=u_0(:,is)/n_1(is)-alpha(is)*psi_1(:,is)
enddo
beta(1:nsize_ip)=0.d0
do is=1,nsize_ip
do ig=1,npw
beta(is)=beta(is)+2.d0*dble(conjg(psi_2(ig,is))*psi_2(ig,is))
enddo
if(gstart==2) beta(is)=beta(is)-dble(conjg(psi_2(1,is))*psi_2(1,is))
enddo
call mp_sum(beta(1:nsize_ip), world_comm)
beta(1:nsize_ip)=dsqrt(beta(1:nsize_ip))
do is=1,nsize_ip
psi_2(:,is)=psi_2(:,is)/beta(is)
enddo
!calculate d
do is=1,nsize_ip
do ig=1,npw
d(1,is+nbegin_ip-1)=d(1,is+nbegin_ip-1)+2.d0*dble(conjg(psi_1(ig,is))*u_0(ig,is))
enddo
if(gstart==2) d(1,is+nbegin_ip-1)=d(1,is+nbegin_ip-1)-dble(conjg(psi_1(1,is))*u_0(1,is))
enddo
call mp_sum(d(1,nbegin_ip:nend_ip), world_comm)
if(l_verbose) write(stdout,*) 'Lanczos Diagonal 1', d(1,nbegin_ip:nend_ip)
FLUSH(stdout)
!calculate f
do is=1,nsize_ip
do ig=1,npw
f(1,is+nbegin_ip-1)=f(1,is+nbegin_ip-1)+2.d0*dble(conjg(psi_2(ig,is))*u_0(ig,is))
enddo
if(gstart==2) f(1,is+nbegin_ip-1)=f(1,is+nbegin_ip-1)-dble(conjg(psi_2(1,is))*u_0(1,is))
enddo
call mp_sum(f(1,nbegin_ip:nend_ip), world_comm)
if(l_verbose) write(stdout,*) 'ATTENZIONE1'
FLUSH(stdout)
!calculate overlaps and write on output file
if(omat_div) then
call dgemm('T','N',nstates,nsize_ip,2*npw,2.d0,zstates,2*npw,psi_1(1,1),2*npw,0.d0,omat,nstates)
if(gstart==2) then
do ii=1,nsize_ip
do jj=1,nstates
omat(jj,ii)=omat(jj,ii)-dble(conjg(zstates(1,jj))*psi_1(1,ii))
enddo
enddo
endif
do ii=1,nsize_ip
call mp_sum(omat(:,ii), world_comm)
enddo
if(ip==mpime) omat_tot(1:nstates,1:nsize_ip,1)=omat(1:nstates,1:nsize_ip)
else
omat(:,:)=0.d0
call dgemm('T','N',nstates,nstates,2*npw,2.d0,zstates,2*npw,psi_1,2*npw,0.d0,omat,nstates)
if(gstart==2) then
do ii=1,nstates
do jj=1,nstates
omat(jj,ii)=omat(jj,ii)-dble(conjg(zstates(1,jj))*psi_1(1,ii))
enddo
enddo
endif
if(l_verbose) write(stdout,*) 'ATTENZIONE2'
FLUSH(stdout)
do ii=1,nstates
call mp_sum(omat(:,ii), world_comm)
enddo
if(ionode) then
iunlan=find_free_unit()
write(nfile,'(4i1)') istate/1000,mod(istate,1000)/100,mod(istate,100)/10,mod(istate,10)
if(ispin==1) then
if(itype==0) then
open( unit= iunlan, file=trim(tmp_dir)//trim(prefix)//'.p_iter_lanczos',&
&status='unknown',form='unformatted')
else if(itype==1) then
open( unit= iunlan, file=trim(tmp_dir)//trim(prefix)//'.s_iter_lanczos'//'_'//nfile,&
&status='unknown',form='unformatted')
else
open( unit= iunlan, file=trim(tmp_dir)//trim(prefix)//'.o_iter_lanczos',&
&status='unknown',form='unformatted')
endif
else
if(itype==0) then
open( unit= iunlan, file=trim(tmp_dir)//trim(prefix)//'.p_iter_lanczos2', &
&status='unknown',form='unformatted')
else if(itype==1) then
open( unit= iunlan, file=trim(tmp_dir)//trim(prefix)//'.s_iter_lanczos2'//'_'//nfile, &
&status='unknown',form='unformatted')
else
open( unit= iunlan, file=trim(tmp_dir)//trim(prefix)//'.o_iter_lanczos2', &
&status='unknown',form='unformatted')
endif
endif
write(iunlan) nstates
write(iunlan) istate
write(iunlan) nsteps
do is=1,nstates
write(iunlan) omat(1:nstates,is)
enddo
endif
endif
if(l_verbose) write(stdout,*) 'ATTENZIONE3'
FLUSH(stdout)
!calculate second overlap
if(omat_div) then
call dgemm('T','N',nstates,nsize_ip,2*npw,2.d0,zstates,2*npw,psi_2(1,1),2*npw,0.d0,omat,nstates)
if(gstart==2) then
do ii=1,nsize_ip
do jj=1,nstates
omat(jj,ii)=omat(jj,ii)-dble(conjg(zstates(1,jj))*psi_2(1,ii))
enddo
enddo
endif
do ii=1,nsize_ip
call mp_sum(omat(:,ii), world_comm)
enddo
if(ip==mpime) omat_tot(1:nstates,1:nsize_ip,2)=omat(1:nstates,1:nsize_ip)
else
omat(:,:)=0.d0
call dgemm('T','N',nstates,nstates,2*npw,2.d0,zstates,2*npw,psi_2,2*npw,0.d0,omat,nstates)
if(gstart==2) then
do ii=1,nstates
do jj=1,nstates
omat(jj,ii)=omat(jj,ii)-dble(conjg(zstates(1,jj))*psi_2(1,ii))
enddo
enddo
endif
if(l_verbose) write(stdout,*) 'ATTENZIONE4'
FLUSH(stdout)
do ii=1,nstates
call mp_sum(omat(:,ii), world_comm)
enddo
if(ionode) then
do is=1,nstates
write(iunlan) omat(1:nstates,is)
enddo
endif
endif
!do iterate
do it=2,nsteps
if(l_verbose) write(stdout,*) 'lanczos h_psi'
FLUSH(stdout)
!calculate H|\phi_i+1>
call h_psi( npw, npw, nsize_ip,psi_2(1,1), u_1 )
if(l_selfconsistent) call h_psi_self( npw, npw, nsize_ip,psi_2(1,1), u_1 )
if(l_scissor) call h_psi_scissor( ispin,npw, npw, nsize_ip,psi_2(1,1), u_1 )
if(l_verbose) write(stdout,*) 'lanczos alfa beta gamma'
FLUSH(stdout)
!calculate n_1
n_1(1:nsize_ip)=0.d0
do is=1,nsize_ip
do ig=1,npw
n_1(is)=n_1(is)+2.d0*dble(conjg(u_1(ig,is))*u_1(ig,is))
enddo
if(gstart==2) n_1(is)=n_1(is)-dble(conjg(u_1(1,is))*u_1(1,is))
enddo
call mp_sum(n_1(1:nsize_ip), world_comm)
n_1(1:nsize_ip)=dsqrt(n_1(1:nsize_ip))
!calculate alpha
alpha(1:nsize_ip)=0.d0
do is=1,nsize_ip
do ig=1,npw
alpha(is)=alpha(is)+2.d0*dble(conjg(psi_1(ig,is))*u_1(ig,is))
enddo
if(gstart==2) alpha(is)=alpha(is)-dble(conjg(psi_1(1,is))*u_1(1,is))
enddo
call mp_sum(alpha(1:nsize_ip), world_comm)
alpha(1:nsize_ip)=alpha(1:nsize_ip)/n_1(1:nsize_ip)
!calculate beta
beta(1:nsize_ip)=0.d0
do is=1,nsize_ip
do ig=1,npw
beta(is)=beta(is)+2.d0*dble(conjg(psi_2(ig,is))*u_1(ig,is))
enddo
if(gstart==2) beta(is)=beta(is)-dble(conjg(psi_2(1,is))*u_1(1,is))
enddo
call mp_sum(beta(1:nsize_ip), world_comm)
beta(1:nsize_ip)=beta(1:nsize_ip)/n_1(1:nsize_ip)
!calculate psi_3 and gamma
do is=1,nsize_ip
psi_3(:,is)=u_1(:,is)/n_1(is)-alpha(is)*psi_1(:,is)-beta(is)*psi_2(:,is)
enddo
gamma(1:nsize_ip)=0.d0
do is=1,nsize_ip
do ig=1,npw
gamma(is)=gamma(is)+2.d0*dble(conjg(psi_3(ig,is))*psi_3(ig,is))
enddo
if(gstart==2) gamma(is)=gamma(is)-dble(conjg(psi_3(1,is))*psi_3(1,is))
enddo
call mp_sum(gamma(1:nsize_ip), world_comm)
gamma(1:nsize_ip)=dsqrt(gamma(1:nsize_ip))
do is=1,nsize_ip
psi_3(:,is)=psi_3(:,is)/gamma(is)
enddo
if(l_verbose) write(stdout,*) 'lanczos d f omat'
FLUSH(stdout)
!calculate d
do is=1,nsize_ip
do ig=1,npw
d(it,is+nbegin_ip-1)=d(it,is+nbegin_ip-1)+2.d0*dble(conjg(psi_2(ig,is))*u_1(ig,is))
enddo
if(gstart==2) d(it,is+nbegin_ip-1)=d(it,is+nbegin_ip-1)-dble(conjg(psi_2(1,is))*u_1(1,is))
enddo
call mp_sum(d(it,nbegin_ip:nend_ip), world_comm)
!calculate f
do is=1,nsize_ip
do ig=1,npw
f(it,is+nbegin_ip-1)=f(it,is+nbegin_ip-1)+2.d0*dble(conjg(psi_3(ig,is))*u_1(ig,is))
enddo
if(gstart==2) f(it,is+nbegin_ip-1)=f(it,is+nbegin_ip-1)-dble(conjg(psi_3(1,is))*u_1(1,is))
enddo
call mp_sum(f(it,nbegin_ip:nend_ip), world_comm)
!calculate overlap
if(it /=nsteps) then
if(omat_div) then
call dgemm('T','N',nstates,nsize_ip,2*npw,2.d0,zstates,2*npw,psi_3(1,1),2*npw,0.d0,omat,nstates)
if(gstart==2) then
do ii=1,nsize_ip
do jj=1,nstates
omat(jj,ii)=omat(jj,ii)-dble(conjg(zstates(1,jj))*psi_3(1,ii))
enddo
enddo
endif
do ii=1,nsize_ip
call mp_sum(omat(:,ii), world_comm)
enddo
if(ip==mpime) omat_tot(1:nstates,1:nsize_ip,it+1)=omat(1:nstates,1:nsize_ip)
else
omat(:,:)=0.d0
call dgemm('T','N',nstates,nstates,2*npw,2.d0,zstates,2*npw,psi_3,2*npw,0.d0,omat,nstates)
if(gstart==2) then
do ii=1,nstates
do jj=1,nstates
omat(jj,ii)=omat(jj,ii)-dble(conjg(zstates(1,jj))*psi_3(1,ii))
enddo
enddo
endif
do ii=1,nstates
call mp_sum(omat(:,ii), world_comm)
enddo
if(ionode) then
do is=1,nstates
write(iunlan) omat(1:nstates,is)
enddo
endif
endif
endif
!update arrays
psi_1(1:npw,1:nsize_ip)=psi_2(1:npw,1:nsize_ip)
psi_2(1:npw,1:nsize_ip)=psi_3(1:npw,1:nsize_ip)
u_0(1:npw,1:nsize_ip)=u_1(1:npw,1:nsize_ip)
enddo
ip=ip+1
enddo
!if omat is distribute here writes onfile
if(omat_div) then
if(ionode) then
iunlan=find_free_unit()
write(nfile,'(4i1)') istate/1000,mod(istate,1000)/100,mod(istate,100)/10,mod(istate,10)
if(ispin==1) then
if(itype==0) then
open( unit= iunlan, file=trim(tmp_dir)//trim(prefix)//'.p_iter_lanczos', &
&status='unknown',form='unformatted')
else
open( unit= iunlan, file=trim(tmp_dir)//trim(prefix)//'.s_iter_lanczos'//'_'//nfile, &
&status='unknown',form='unformatted')
endif
else
if(itype==0) then
open( unit= iunlan, file=trim(tmp_dir)//trim(prefix)//'.p_iter_lanczos2', &
&status='unknown',form='unformatted')
else
open( unit= iunlan, file=trim(tmp_dir)//trim(prefix)//'.s_iter_lanczos2'//'_'//nfile, &
&status='unknown',form='unformatted')
endif
endif
write(iunlan) nstates
write(iunlan) istate
write(iunlan) nsteps
endif
do it=1,nsteps
do ip=0,nproc-1
nbegin_ip=ip*l_blk+1
nend_ip=min(nbegin_ip+l_blk-1,nstates)
nsize_ip=nend_ip-nbegin_ip+1
if(nsize_ip >=1) then
if(mpime==ip) omat(1:nstates,1:nsize_ip)=omat_tot(1:nstates,1:nsize_ip,it)
do is=1,nsize_ip
call mp_bcast(omat(1:nstates,is),ip,world_comm)
if(ionode) write(iunlan) omat(1:nstates,is)
enddo
endif
enddo
enddo
endif
!write tridiagonal matrix on disk
if(ionode) then
do is=1,nstates
write(iunlan) d(1:nsteps,is)
enddo
do is=1,nstates
write(iunlan) f(1:nsteps,is)
enddo
endif
if(ionode) close(iunlan)
deallocate(psi_1,psi_2,psi_3)
deallocate(u_0,u_1)
deallocate(alpha,beta,gamma,n_1)
deallocate(f,d,omat,c)
deallocate(h_diag,s_diag)
call deallocate_bec_type(becp)
call stop_clock('lanczos_state')
return
end subroutine lanczos_state
subroutine orthonormalize_two_manifolds( state1, n1,state2, n2, threshold, state_out, n_out)
!this subroutine form am orthormal basis set from 2 manifold (with orthonormal basis sets)
!ONLY FOR NORM_CONSERVING CASE
USE io_global, ONLY : stdout, ionode, ionode_id
USE kinds, ONLY : DP
USE gvect
USE wvfct, ONLY : npwx, npw, nbnd
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : world_comm
USE fft_base, ONLY : dfftp, dffts
USE fft_interfaces, ONLY : fwfft, invfft
USE wannier_gw, ONLY : l_verbose
implicit none
COMPLEX(kind=DP), INTENT(in) :: state1(npw,n1)!1st orthonormal basis
INTEGER, INTENT(in) :: n1!number of 1st basis elements
COMPLEX(kind=DP), INTENT(in) :: state2(npw,n2)!2nd orthonormal basis
INTEGER, INTENT(in) :: n2!number of 2nd basis elements
REAL(kind=DP), INTENT(in) :: threshold!threshold for orthonormality
COMPLEX(kind=DP), INTENT(out) :: state_out(npw,n1+n2)!output basis set
INTEGER, INTENT(out) :: n_out!number of output states
INTEGER :: ii,jj
REAL(kind=DP), ALLOCATABLE :: omat(:,:),tmp_mat(:,:)
REAL(kind=DP), ALLOCATABLE :: eigen(:),work(:)
INTEGER :: lwork,info,liwork
REAL(kind=DP), ALLOCATABLE :: omat1(:,:),omat2(:,:)
!buid overlap matrix
if(l_verbose) write(stdout,*) 'orthonormalize dgemm'
FLUSH(stdout)
allocate(omat(n1+n2,n1+n2))
omat(:,:)=0.d0
do ii=1,n1+n2
omat(ii,ii)=1.d0
enddo
allocate(tmp_mat(n1,n2))
call dgemm('T','N',n1,n2,2*npw,2.d0,state1,2*npw,state2,2*npw,0.d0,tmp_mat,n1)
if(gstart==2) then
do ii=1,n2
do jj=1,n1
tmp_mat(jj,ii)=tmp_mat(jj,ii)-dble(conjg(state1(1,jj))*state2(1,ii))
enddo
enddo
endif
if(l_verbose) write(stdout,*) 'orthonormalize mp_sum'
FLUSH(stdout)
do ii=1,n2
call mp_sum(tmp_mat(:,ii), world_comm)
enddo
if(l_verbose) write(stdout,*) 'orthonormalize copy array'
FLUSH(stdout)
omat(1:n1,n1+1:n1+n2)=tmp_mat(1:n1,1:n2)
deallocate(tmp_mat)
!diagonalize
allocate(eigen(n1+n2))
if(l_verbose) write(stdout,*) 'orthonormalize dsyev'
FLUSH(stdout)
if(ionode) then
allocate(work(1))
call DSYEV( 'V', 'U', n1+n2, omat, n1+n2, eigen, work, -1, info )
lwork=work(1)
deallocate(work)
allocate(work(lwork))
call DSYEV( 'V', 'U', n1+n2, omat, n1+n2, eigen, work, lwork, info )
deallocate(work)
if(info/=0) then
write(stdout,*) 'ROUTINE orthonormalize_two_manifolds, INFO:', info
stop
endif
else
eigen(:)=0.d0
omat(:,:)=0.d0
endif
if(l_verbose) write(stdout,*) 'orthonormalize mp_bcast now mp_sum'
FLUSH(stdout)
do ii=1,n1+n2
!call mp_bcast(omat(:,ii), ionode_id,world_comm)
call mp_sum(omat(:,ii), world_comm)
enddo
!call mp_bcast(eigen(:), ionode_id,world_comm)
call mp_sum(eigen(:), world_comm)
do ii=1,n1+n2
if(l_verbose) write(stdout,*) 'EIGEN:',ii, eigen(ii)
enddo
FLUSH(stdout)
if(l_verbose) write(stdout,*) 'orthonormalize copy'
FLUSH(stdout)
!construct orthonormal basis set
! state_out(:,:)=(0.d0,0.d0)
n_out=0
do ii=1,n1+n2
if(eigen(ii) >= threshold) then
n_out=n_out+1
endif
enddo
allocate(omat1(n1,n_out),omat2(n2, n_out))
do ii=1,n_out
omat1(1:n1,ii)=omat(1:n1,n1+n2-n_out+ii)/dsqrt(eigen(n1+n2-n_out+ii))
enddo
do ii=1,n_out
omat2(1:n2,ii)=omat(n1+1:n1+n2,n1+n2-n_out+ii)/dsqrt(eigen(n1+n2-n_out+ii))
enddo
call dgemm('N','N',2*npw,n_out,n1,1.d0,state1,2*npw,omat1,n1,0.d0,state_out,2*npw)
call dgemm('N','N',2*npw,n_out,n2,1.d0,state2,2*npw,omat2,n2,1.d0,state_out,2*npw)
deallocate(omat1,omat2)
! n_out=0
! do ii=1,n1+n2
! if(eigen(ii) >= threshold) then
! n_out=n_out+1
! do jj=1,n1
! state_out(:,n_out)=state_out(:,n_out)+omat(jj,ii)*state1(:,jj)/dsqrt(eigen(ii))
! enddo
! do jj=1,n2
! state_out(:,n_out)=state_out(:,n_out)+omat(jj+n1,ii)*state2(:,jj)/dsqrt(eigen(ii))
! enddo
! endif
! enddo
write(stdout,*) 'orthonormalize_two_manifolds: basis dimension:', n_out
FLUSH(stdout)
deallocate (omat)
return
end subroutine orthonormalize_two_manifolds
subroutine global_pola_lanczos(nstates,nstates_eff,threshold,nglobal,nsteps,numpw,ispin,l_eigen)
!this subroutine from the orthonormal basis at each v
!construct a global basis for the lanczos calculation of the
!polarization
USE io_global, ONLY : stdout, ionode, ionode_id
USE io_files, ONLY : prefix, tmp_dir, diropn
USE kinds, ONLY : DP
USE wannier_gw, ONLY : num_nbndv,max_ngm,l_pmatrix
USE gvect
USE wvfct, ONLY : npwx, npw, nbnd
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : world_comm
USE wavefunctions, ONLY : evc, psic
USE gvect
USE gvecs, ONLY : doublegrid
USE fft_base, ONLY : dfftp, dffts
USE fft_interfaces, ONLY : fwfft, invfft
USE wannier_gw, ONLY : l_verbose
USE klist, ONLY : igk_k
implicit none
INTEGER, EXTERNAL :: find_free_unit
INTEGER, INTENT(in) :: nstates!number of orthonormal states for each v
INTEGER, INTENT(in) :: nstates_eff!effective number of orthonormal states for each v
REAL(kind=DP),INTENT(in) :: threshold!threshold for orthonormalization algorithm
INTEGER, INTENT(out) :: nglobal!total number of final orthonormal states
INTEGER, INTENT(in) :: nsteps!number of lanczos steps
INTEGER, INTENT(in) :: numpw!number of wannier products for testing
INTEGER, INTENT(in) :: ispin!spin channel 1,2
LOGICAL, INTENT(in) :: l_eigen!if true partial t states are scaled with the corresponding eigenvalue
INTEGER :: iunv,iuntmat
LOGICAL :: exst
INTEGER :: ii,jj,iv,ic
COMPLEX(kind=DP), ALLOCATABLE :: old_basis(:,:), new_basis(:,:),v_basis(:,:)
INTEGER :: nglobal_old
REAL(kind=DP), ALLOCATABLE :: t_mat(:,:)
CHARACTER(4) :: nfile
!for test:
REAL(kind=DP) :: sca,sca1
INTEGER :: iungprod,ig,iw
REAL(kind=DP), ALLOCATABLE :: wv_real(:),tmp_r(:)
COMPLEX(kind=DP), ALLOCATABLE :: tmp_g(:),wp_prod(:)
LOGICAL :: l_test=.false.
REAL(kind=DP)::proj_tot
INTEGER :: nbuffer,ndelta!for avoiding nested allocation/deallocation cycles
LOGICAL :: l_update_memory
INTEGER, PARAMETER :: offset=0!ATTENZIONE THEN PUT 0!!!!!!!
REAL(kind=DP), ALLOCATABLE :: eigen(:)
INTEGER :: idumm
if(num_nbndv(ispin) == 0) return
nbuffer=6*numpw
ndelta=numpw
!set first basis from first valence state
!if required read eigenvectors too
allocate(eigen(nstates))
if(l_eigen) then
if(ionode) then
iv=1
iuntmat = find_free_unit()
write(nfile,'(4i1)') iv/1000,mod(iv,1000)/100,mod(iv,100)/10,mod(iv,10)
if(ispin==1) then
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_eig_lanczos'//nfile, &
&status='old',form='unformatted')
else
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_eig_lanczos2'//nfile, &
&status='old',form='unformatted')
endif
read(iuntmat) idumm
read(iuntmat) eigen(1:nstates)
close(iuntmat)
endif
call mp_bcast(eigen, ionode_id,world_comm)
endif
allocate(old_basis(npw,nbuffer))
iunv = find_free_unit()
CALL diropn( iunv, 'vw_lanczos',npw*2, exst)
if(.not.l_eigen) then
do ii=1,nstates_eff
call davcio(old_basis(:,ii),npw*2,iunv,ii+offset,-1)
enddo
nglobal=nstates_eff
else
nglobal=1
call davcio(old_basis(:,nglobal),npw*2,iunv,1+offset,-1)
do ii=2,nstates_eff
if(eigen(ii) > threshold) then
nglobal=nglobal+1
call davcio(old_basis(:,nglobal),npw*2,iunv,ii+offset,-1)
endif
enddo
endif
!loop on valence states (from 2nd)
allocate(v_basis(npw,nstates_eff))
allocate(new_basis(npw,nbuffer))
do iv=2,num_nbndv(ispin)
!!read from disk
do ii=1,nstates_eff
call davcio(v_basis(:,ii),npw*2,iunv,ii+offset+(iv-1)*(nstates+offset),-1)
enddo
if(l_eigen) then
if(ionode) then
iuntmat = find_free_unit()
write(nfile,'(4i1)') iv/1000,mod(iv,1000)/100,mod(iv,100)/10,mod(iv,10)
if(ispin==1) then
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_eig_lanczos'//nfile, &
&status='old',form='unformatted')
else
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_eig_lanczos2'//nfile, &
&status='old',form='unformatted')
endif
read(iuntmat) idumm
read(iuntmat) eigen(1:nstates)
close(iuntmat)
endif
call mp_bcast(eigen, ionode_id,world_comm)
endif
if(nglobal+nstates_eff >nbuffer) then
deallocate(new_basis)
allocate(new_basis(npw,nbuffer+ndelta))
l_update_memory=.true.
else
l_update_memory=.false.
endif
!!calculate basis
nglobal_old=nglobal
if(l_verbose) write(stdout,*) 'Call orthonormalize_two_manifolds'
FLUSH(stdout)
if(.not.l_pmatrix) then
!call orthonormalize_two_manifolds( old_basis, nglobal_old,v_basis, nstates, threshold, new_basis, nglobal)
call orthonormalize_two_manifolds_prj( old_basis, nglobal_old,v_basis, nstates_eff, threshold, new_basis, nglobal,&
l_eigen,eigen)
else
call orthonormalize_two_manifolds_scalapack(old_basis, nglobal_old,v_basis, nstates_eff, threshold, new_basis, nglobal)
endif
if(l_verbose) write(stdout,*) 'Done orthonormalize_two_manifolds',ispin
FLUSH(stdout)
!!set arrays for next iteration
if(l_update_memory) then
deallocate(old_basis)
allocate(old_basis(npw,nbuffer+ndelta))
nbuffer=nbuffer+ndelta
endif
old_basis(:,1:nglobal)=new_basis(:,1:nglobal)
!deallocate(new_basis)
enddo
deallocate(new_basis)
write(stdout,*) 'TOTAL NUMBER OF GLOBAL T VECTORS: ', nglobal
!call lanczos chain routine
call lanczos_state(old_basis, nglobal, 0, nsteps,1,ispin)
!calculate matrix element and write on disk
allocate(t_mat(nglobal,nstates_eff))
do iv=1,num_nbndv(ispin)
write(nfile,'(4i1)') iv/1000,mod(iv,1000)/100,mod(iv,100)/10,mod(iv,10)
if(ionode) then
iuntmat=find_free_unit()
if(ispin==1) then
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.pt_mat_lanczos'//nfile, &
&status='unknown',form='unformatted')
else
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.pt_mat_lanczos2'//nfile, &
&status='unknown',form='unformatted')
endif
endif
do ii=1,nstates_eff
call davcio(v_basis(:,ii),npw*2,iunv,ii+offset+(iv-1)*(nstates+offset),-1)
enddo
call dgemm('T','N',nglobal,nstates_eff,2*npw,2.d0,old_basis,2*npw,v_basis,2*npw,0.d0,t_mat,nglobal)
if(gstart==2) then
do ii=1,nstates_eff
do jj=1,nglobal
t_mat(jj,ii)=t_mat(jj,ii)-dble(conjg(old_basis(1,jj))*v_basis(1,ii))
enddo
enddo
endif
call mp_sum(t_mat(:,:), world_comm)
if(ionode) then
write(iuntmat) nglobal
write(iuntmat) nstates_eff
write(iuntmat) iv
do ii=1,nstates_eff
write(iuntmat) t_mat(1:nglobal,ii)
enddo
close(iuntmat)
endif
enddo
close(iunv)
!THE FOLLOWING PART IS FOR TESTING POURPOSES
!test that the basis {t_i} is orthonormal
deallocate(t_mat)
if(l_test) then
write(stdout,*) 'TEST1'
FLUSH(stdout)
allocate(t_mat(nglobal,nglobal))
write(stdout,*) 'TEST2'
FLUSH(stdout)
call dgemm('T','N',nglobal,nglobal,2*npw,2.d0,old_basis,2*npw,old_basis,2*npw,0.d0,t_mat,nglobal)
if(gstart==2) then
do ii=1,nglobal
do jj=1,nglobal
t_mat(jj,ii)=t_mat(jj,ii)-dble(conjg(old_basis(1,jj))*old_basis(1,ii))
enddo
enddo
endif
write(stdout,*) 'TEST3'
FLUSH(stdout)
call mp_sum(t_mat(:,:), world_comm)
!!write diagonal terms
do ii=1,nglobal
sca=0.d0
do jj=1,nglobal
if(ii/=jj) sca=sca+abs(t_mat(ii,jj))
enddo
write(stdout,*) 'Diagonal',ii,t_mat(ii,ii),sca
FLUSH(stdout)
enddo
deallocate(t_mat)
allocate(t_mat(nglobal,1))
!test for representability
iungprod = find_free_unit()
CALL diropn( iungprod, 'wiwjwfc_red', max_ngm*2, exst )
allocate(wv_real(dfftp%nnr),tmp_r(dfftp%nnr),tmp_g(ngm),wp_prod(npw))
proj_tot=0.d0
do iv=1,num_nbndv(ispin),num_nbndv(ispin)-1!ATTENZIONE
!put iv on real space
psic(:)=(0.d0,0.d0)
psic(dffts%nl(igk_k(1:npw,1))) = evc(1:npw,iv)
psic(dffts%nlm(igk_k(1:npw,1))) = CONJG( evc(1:npw,iv) )
CALL invfft ('Wave', psic, dffts)
wv_real(:)= DBLE(psic(:))
!loop on wannier_products
do iw=1,numpw
call davcio(tmp_g,max_ngm*2,iungprod,iw,-1)
!trasform to r-space
psic(:)=(0.d0,0.d0)
do ig=1,max_ngm
psic(dfftp%nl(ig))=tmp_g(ig)
psic(dfftp%nlm(ig))=CONJG(tmp_g(ig))
enddo
CALL invfft ('Rho', psic, dfftp)
tmp_r(:)=dble(psic(:))
!!form products with w_v and trasfrom in G space
psic(:)=cmplx(tmp_r(:)*wv_real(:),0.d0)
CALL fwfft ('Wave', psic, dffts)
wp_prod(1:npw) = psic(dffts%nl(igk_k(1:npw,1)))
!!project on conduction subspace
call pc_operator(wp_prod(:),ispin, .false.)
!!do scalar product
call dgemm('T','N',nglobal,1,2*npw,2.d0,old_basis,2*npw,wp_prod,2*npw,0.d0,t_mat,nglobal)
if(gstart==2) then
do jj=1,nglobal
t_mat(jj,1)=t_mat(jj,1)-dble(conjg(old_basis(1,jj))*wp_prod(1))
enddo
endif
call mp_sum(t_mat(:,1), world_comm)
sca=0.d0
do ii=1,nglobal
sca=sca+t_mat(ii,1)**2.d0
end do
!calculate norm
sca1=0.d0
do ig=1,npw
sca1=sca1+2.d0*dble(conjg(wp_prod(ig))*wp_prod(ig))
enddo
if(gstart==2) sca1=sca1-dble(conjg(wp_prod(1))*wp_prod(1))
call mp_sum(sca1, world_comm)
write(stdout,*) 'Projection',iv,iw,sca/sca1
proj_tot=proj_tot+sca/sca1
! do ii=1,nglobal,50
! write(stdout,*) 'Q terms',iv,iw,ii, t_mat(ii,1)
! enddo
FLUSH(stdout)
enddo
enddo
write(stdout,*) 'Average projection', proj_tot/dble(numpw*2)
FLUSH(stdout)
deallocate(t_mat)
deallocate(wv_real,tmp_g,tmp_r,wp_prod)
close(iungprod)
!END OF TESTING PART
endif
deallocate(old_basis)
deallocate(v_basis)
deallocate(eigen)
return
end subroutine global_pola_lanczos
subroutine orthonormalize_two_manifolds_scalapack( state1, n1,state2, n2, threshold, state_out, n_out)
!this subroutine form am orthormal basis set from 2 manifold (with orthonormal basis sets)
!ONLY FOR NORM_CONSERVING CASE
USE io_global, ONLY : stdout, ionode, ionode_id
USE kinds, ONLY : DP
USE gvect
USE wvfct, ONLY : npwx, npw, nbnd
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : world_comm
USE wannier_gw, ONLY : p_mpime,p_nproc, npcol, nprow,icontxt,myrow,mycol
implicit none
COMPLEX(kind=DP), INTENT(in) :: state1(npw,n1)!1st orthonormal basis
INTEGER, INTENT(in) :: n1!number of 1st basis elements
COMPLEX(kind=DP), INTENT(in) :: state2(npw,n2)!2nd orthonormal basis
INTEGER, INTENT(in) :: n2!number of 2nd basis elements
REAL(kind=DP), INTENT(in) :: threshold!threshold for orthonormality
COMPLEX(kind=DP), INTENT(out) :: state_out(npw,n1+n2)!output basis set
INTEGER, INTENT(out) :: n_out!number of output states
INTEGER :: ii,jj
REAL(kind=DP), ALLOCATABLE :: omat(:,:),tmp_mat(:,:)
REAL(kind=DP), ALLOCATABLE :: eigen(:),work(:)
INTEGER :: lwork,info,liwork
INTEGER, ALLOCATABLE :: iwork(:)
INTEGER :: n, n_r,n_c,n_dimr,n_dimc,n1_r,n1_dimr,n2_c,n2_dimc
INTEGER :: icrow, iccol, iproc,ilrow,ilcol
INTEGER, EXTERNAL :: indxg2l,indxg2p
REAL(kind=DP), EXTERNAL :: ddot
REAL(kind=DP) :: sca
INTEGER :: desc_a(9),desc_b(9)
#if defined(__SCALAPACK)
!buid overlap matrix
n=n1+n2
n_r=ceiling(real(n)/real(max(nprow,npcol)))
n_c=ceiling(real(n)/real(max(nprow,npcol)))
n_dimr=ceiling (real(n)/real(n_r*nprow))*n_r
n_dimc=ceiling (real(n)/real(n_c*npcol))*n_c
n1_r=ceiling(real(n1)/real(max(nprow,npcol)))
n1_dimr=ceiling (real(n1)/real(n1_r*nprow))*n1_r
n2_c=ceiling(real(n2)/real(max(nprow,npcol)))
n2_dimc=ceiling (real(n2)/real(n2_c*npcol))*n2_c
allocate(omat(n_dimr,n_dimc))
omat(:,:)=0.d0
do ii=1,n
icrow = indxg2p(ii,n_r,0,0,nprow)
iccol = indxg2p(ii,n_c,0,0,npcol)
iproc=icrow*npcol+iccol
if(myrow==icrow .and. mycol==iccol) then
ilrow=indxg2l(ii,n_r,0,0,nprow)
ilcol=indxg2l(ii,n_c,0,0,npcol)
omat(ilrow,ilcol)=1.d0
endif
enddo
write(stdout,*) 'orthonormalize para1'
FLUSH(stdout)
do ii=1,n1
do jj=1,n2
sca=2.d0*ddot(2*npw,state1(:,ii),1,state2(:,jj),1)
if(gstart==2) sca=sca-dble(conjg(state1(1,ii))*state2(1,jj))
call mp_sum(sca, world_comm)
icrow = indxg2p(ii,n_r,0,0,nprow)
iccol = indxg2p(jj+n1,n_c,0,0,npcol)
iproc=icrow*npcol+iccol
if(myrow==icrow .and. mycol==iccol) then
ilrow=indxg2l(ii,n_r,0,0,nprow)
ilcol=indxg2l(jj+n1,n_c,0,0,npcol)
omat(ilrow,ilcol)=sca
endif
enddo
enddo
allocate(tmp_mat(n_dimr,n_dimc))
! A = omat
desc_a(1)=1
desc_a(2)=icontxt
desc_a(3)=n
desc_a(4)=n
desc_a(5)=n_r
desc_a(6)=n_c
desc_a(7)=0
desc_a(8)=0
desc_a(9)=n_dimr
!B = tmp_mat
desc_b(1)=1
desc_b(2)=icontxt
desc_b(3)=n
desc_b(4)=n
desc_b(5)=n_r
desc_b(6)=n_c
desc_b(7)=0
desc_b(8)=0
desc_b(9)=n_dimr
!diagonalize
allocate(work(1))
allocate(eigen(n))
write(stdout,*) 'orthonormalize para2'
FLUSH(stdout)
call pdsyev('V','U',n,omat,1,1,desc_a,eigen,tmp_mat,1,1,desc_b,work,-1,info)
lwork=work(1)
deallocate(work)
allocate(work(lwork))
call pdsyev('V','U',n,omat,1,1,desc_a,eigen,tmp_mat,1,1,desc_b,work,lwork,info)
deallocate(work)
if(info/=0) then
write(stdout,*) 'ROUTINE orthonormalize_two_manifolds_scalapack, INFO:', info
stop
endif
write(stdout,*) 'orthonormalize para3'
FLUSH(stdout)
do ii=1,n,n
write(stdout,*) 'EIGEN:',ii, eigen(ii)
enddo
FLUSH(stdout)
state_out(:,:)=(0.d0,0.d0)
n_out=0
do ii=1,n
if(eigen(ii) >= threshold) then
n_out=n_out+1
do jj=1,n1
icrow = indxg2p(jj,n_r,0,0,nprow)
iccol = indxg2p(ii,n_c,0,0,npcol)
iproc=icrow*npcol+iccol
if(myrow==icrow .and. mycol==iccol) then
ilrow=indxg2l(jj,n_r,0,0,nprow)
ilcol=indxg2l(ii,n_c,0,0,npcol)
sca=tmp_mat(ilrow,ilcol)
endif
call mp_bcast(sca, iproc,world_comm)
state_out(:,n_out)=state_out(:,n_out)+sca*state1(:,jj)/dsqrt(eigen(ii))
enddo
do jj=1,n2
icrow = indxg2p(jj+n1,n_r,0,0,nprow)
iccol = indxg2p(ii,n_c,0,0,npcol)
iproc=icrow*npcol+iccol
if(myrow==icrow .and. mycol==iccol) then
ilrow=indxg2l(jj+n1,n_r,0,0,nprow)
ilcol=indxg2l(ii,n_c,0,0,npcol)
sca=tmp_mat(ilrow,ilcol)
endif
call mp_bcast(sca, iproc,world_comm)
state_out(:,n_out)=state_out(:,n_out)+sca*state2(:,jj)/dsqrt(eigen(ii))
enddo
endif
enddo
write(stdout,*) 'orthonormalize para4'
FLUSH(stdout)
write(stdout,*) 'orthonormalize_two_manifolds: basis dimension:', n_out
FLUSH(stdout)
deallocate (omat,tmp_mat,eigen)
#endif
return
end subroutine orthonormalize_two_manifolds_scalapack
subroutine orthonormalize_two_manifolds_prj( state1, n1,state2, n2, threshold, state_out, n_out,l_w,weight)
!this subroutine form am orthormal basis set from 2 manifold (with orthonormal basis sets)
!ONLY FOR NORM_CONSERVING CASE
!first projects out of the second manifold the first one
!the orthonormalizes the second manifold
USE io_global, ONLY : stdout, ionode, ionode_id
USE kinds, ONLY : DP
USE gvect
USE wvfct, ONLY : npwx, npw, nbnd
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : world_comm
USE wannier_gw, ONLY : l_verbose
implicit none
COMPLEX(kind=DP), INTENT(in) :: state1(npw,n1)!1st orthonormal basis
INTEGER, INTENT(in) :: n1!number of 1st basis elements
COMPLEX(kind=DP), INTENT(inout) :: state2(npw,n2)!2nd orthonormal basis
INTEGER, INTENT(in) :: n2!number of 2nd basis elements
REAL(kind=DP), INTENT(in) :: threshold!threshold for orthonormality
COMPLEX(kind=DP), INTENT(out) :: state_out(npw,n1+n2)!output basis set
INTEGER, INTENT(out) :: n_out!number of output states
LOGICAL, INTENT(in) :: l_w!if true considere the weigth in eig
REAL(kind=DP), INTENT(in) :: weight(n2)!weigths
INTEGER :: ii,jj
REAL(kind=DP), ALLOCATABLE :: omat(:,:),tmp_mat(:,:)
REAL(kind=DP), ALLOCATABLE :: eigen(:),work(:)
INTEGER :: lwork,info,liwork
REAL(kind=DP), ALLOCATABLE :: omat1(:,:),omat2(:,:)
!buid overlap matrix
if(l_verbose) write(stdout,*) 'orthonormalize dgemm'
FLUSH(stdout)
allocate(tmp_mat(n1,n2))
call dgemm('T','N',n1,n2,2*npw,2.d0,state1,2*npw,state2,2*npw,0.d0,tmp_mat,n1)
if(gstart==2) then
do ii=1,n2
do jj=1,n1
tmp_mat(jj,ii)=tmp_mat(jj,ii)-dble(conjg(state1(1,jj))*state2(1,ii))
enddo
enddo
endif
if(l_verbose) write(stdout,*) 'orthonormalize mp_sum'
FLUSH(stdout)
do ii=1,n2
call mp_sum(tmp_mat(:,ii), world_comm)
enddo
call dgemm('N','N',2*npw, n2,n1,-1.d0,state1,2*npw,tmp_mat,n1,1.d0,state2,2*npw)
deallocate(tmp_mat)
if(l_w) then
do ii=1,n2
state2(1:npw,ii)=state2(1:npw,ii)*weight(ii)
enddo
endif
allocate(omat(n2,n2))
if(gstart==2) state2(1,1:n2)=dcmplx(dble(state2(1,1:n2)),0.d0)
call dgemm('T','N',n2,n2,2*npw,2.d0,state2,2*npw,state2,2*npw,0.d0,omat,n2)
if(gstart==2) then
do ii=1,n2
do jj=1,n2
omat(jj,ii)=omat(jj,ii)-dble(conjg(state2(1,jj))*state2(1,ii))
enddo
enddo
endif
if(l_verbose) write(stdout,*) 'orthonormalize mp_sum'
FLUSH(stdout)
do ii=1,n2
call mp_sum(omat(:,ii), world_comm)
enddo
!diagonalize
allocate(eigen(n2))
if(l_verbose) write(stdout,*) 'orthonormalize dsyev'
FLUSH(stdout)
if(ionode) then
allocate(work(1))
if(l_w) omat(1:n2,1:n2)=omat(1:n2,1:n2)/weight(1)!to avoid numerical instabilities in DSYEV
call DSYEV( 'V', 'U', n2, omat,n2, eigen, work, -1, info )
lwork=work(1)
deallocate(work)
allocate(work(lwork))
call DSYEV( 'V', 'U', n2, omat, n2, eigen, work, lwork, info )
deallocate(work)
if(info/=0) then
write(stdout,*) 'ROUTINE orthonormalize_two_manifolds, INFO:', info
stop
endif
if(l_w) eigen(1:n2)=eigen(1:n2)*weight(1)!to avoid numerical instabilities in DSYEV
else
eigen(:)=0.d0
omat(:,:)=0.d0
endif
if(l_verbose) write(stdout,*) 'orthonormalize mp_bcast now mp_sum'
FLUSH(stdout)
do ii=1,n2
!call mp_bcast(omat(:,ii), ionode_id,world_comm)
call mp_sum(omat(:,ii), world_comm)
enddo
!call mp_bcast(eigen(:), ionode_id,world_comm)
call mp_sum(eigen(:), world_comm)
! do ii=1,n2
do ii=1,n2,n2-1
write(stdout,*) 'EIGEN GLOBAL:',ii, eigen(ii)
enddo
FLUSH(stdout)
if(l_verbose) write(stdout,*) 'orthonormalize copy'
FLUSH(stdout)
!construct orthonormal basis set
! state_out(:,:)=(0.d0,0.d0)
n_out=0
do ii=1,n2
if(eigen(ii) >= threshold) then
n_out=n_out+1
endif
enddo
do ii=n2-n_out+1,n2
omat(1:n2,ii)=omat(1:n2,ii)/dsqrt(eigen(ii))
enddo
call dgemm('N','N',2*npw,n_out,n2,1.d0,state2,2*npw,omat(:,n2-n_out+1:n2),n2,0.d0,state_out,2*npw)
state_out(:,n_out+1:n_out+n1)=state1(:,1:n1)
n_out=n_out+n1
write(stdout,*) 'orthonormalize_two_manifolds: basis dimension:', n_out
FLUSH(stdout)
deallocate (omat,eigen)
return
end subroutine orthonormalize_two_manifolds_prj
subroutine pc_operator_test(state)
!this operator project the wavefunction state on the conduction
!subspace, the valence wavefunction are in evc
!ONLY FOR GAMMA POINT NOW!!!!
USE io_global, ONLY : stdout
USE kinds, ONLY : DP
USE gvect
USE wvfct, ONLY : npwx, npw, nbnd
USE wavefunctions, ONLY : evc, psic
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : world_comm
USE wannier_gw, ONLY : num_nbndv
implicit none
COMPLEX(kind=DP), INTENT(inout) :: state(npw)!state to be projected
INTEGER :: iv,ig
REAL(kind=DP), ALLOCATABLE :: prod(:)
allocate(prod(nbnd-num_nbndv(1)))
prod(:)=0.d0
call dgemm('T','N', nbnd-num_nbndv(1),1,2*npw,2.d0,evc(:,num_nbndv(1)+1:nbnd),2*npwx,state,2*npw,0.d0,prod,nbnd-num_nbndv(1))
do iv=num_nbndv(1)+1,nbnd
if(gstart==2) prod(iv-num_nbndv(1))=prod(iv-num_nbndv(1))-dble(conjg(evc(1,iv))*state(1))
enddo
call mp_sum(prod(:), world_comm)
call dgemm('N','N',2*npw,1,nbnd-num_nbndv(1),1.d0,evc(:,num_nbndv(1)+1:nbnd),2*npwx,prod,nbnd-num_nbndv(1),0.d0,state,2*npw)
deallocate(prod)
return
end subroutine pc_operator_test
subroutine pc_operator_t_m(numpw,state,evc_t,ispin,fc)
!this operator project the wavefunction state on the conduction
!subspace, the valence wavefunction are in evc
!it works on an arry of states
!ONLY FOR GAMMA POINT NOW!!!!
USE io_global, ONLY : stdout
USE kinds, ONLY : DP
USE gvect
USE wvfct, ONLY : npwx, npw, nbnd
USE wavefunctions, ONLY : evc, psic
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : world_comm
USE wannier_gw, ONLY : num_nbndv
USE fft_custom_gwl
implicit none
TYPE(fft_cus), INTENT(in) :: fc
INTEGER, INTENT(in) :: numpw!number of vectors
INTEGER, INTENT(in) :: ispin!spin channel
COMPLEX(kind=DP), INTENT(inout) :: state(fc%npwt,numpw)!state to be projected
! COMPLEX(kind=DP), INTENT(inout) :: evc_t(fc%npwt,num_nbndv(ispin))
! above syntax not accepted by all compilers
COMPLEX(kind=DP), INTENT(inout) :: evc_t(fc%npwt,*)!valence states
INTEGER :: ii,iv,ig
REAL(kind=DP), ALLOCATABLE :: prod(:,:)
allocate(prod(num_nbndv(ispin),numpw))
call dgemm('T','N', num_nbndv(ispin),numpw,2*fc%npwt,2.d0,evc_t,2*fc%npwt,state,2*fc%npwt,&
& 0.d0,prod,num_nbndv(ispin))
if(fc%gstart_t==2) then
do ii=1,numpw
do iv=1,num_nbndv(ispin)
prod(iv,ii)=prod(iv,ii)-dble(conjg(evc_t(1,iv))*state(1,ii))
enddo
enddo
endif
do ii=1,numpw
call mp_sum(prod(:,ii), world_comm)
enddo
call dgemm('N','N',2*fc%npwt,numpw,num_nbndv(ispin),-1.d0,evc_t,2*fc%npwt,prod,&
&num_nbndv(ispin),1.d0,state,2*fc%npwt)
deallocate(prod)
return
end subroutine pc_operator_t_m
subroutine pc_operator_t_r(numpw,state,evc_r,ispin,fc)
!NOT_TO_BE_INCLUDED_START
!this operator project the wavefunction state on the conduction
!subspace, the valence wavefunction are in evc
USE io_global, ONLY : stdout
USE kinds, ONLY : DP
USE gvect
USE wvfct, ONLY : npwx, npw, nbnd
USE wavefunctions, ONLY : evc, psic
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_world, ONLY : world_comm
USE wannier_gw, ONLY : num_nbndv
USE fft_custom_gwl
implicit none
TYPE(fft_cus), INTENT(in) :: fc
INTEGER, INTENT(in) :: numpw!number of vectors
INTEGER, INTENT(in) :: ispin!spin channel
REAL(kind=DP), INTENT(inout) :: state(fc%nrxxt,numpw)!state to be projected
! REAL(kind=DP), INTENT(inout) :: evc_r(fc%nrxxt,num_nbndv(ispin))
! above syntax not accepted by all compilers
REAL(kind=DP), INTENT(inout) :: evc_r(fc%nrxxt,*)!valence states
INTEGER :: ii,iv,ig
REAL(kind=DP), ALLOCATABLE :: prod(:,:)
allocate(prod(num_nbndv(ispin),numpw))
call dgemm('T','N', num_nbndv(ispin),numpw,fc%nrxxt,1.d0,evc_r,fc%nrxxt,state,fc%nrxxt,&
& 0.d0,prod,num_nbndv(ispin))
do ii=1,numpw
call mp_sum(prod(:,ii), world_comm)
prod(:,ii)=prod(:,ii)/dble(fc%nr1t*fc%nr2t*fc%nr3t)
enddo
call dgemm('N','N',fc%nrxxt,numpw,num_nbndv(ispin),-1.d0,evc_r,fc%nrxxt,prod,&
&num_nbndv(ispin),1.d0,state,fc%nrxxt)
deallocate(prod)
return
!NOT_TO_BE_INCLUDED_END
end subroutine pc_operator_t_r
subroutine h_psi_self( lda, n, m, psi, hpsi )
!NOT_TO_BE_INCLUDED_START
!add to hpsi part dur to self-consistent GW calculation
! ... input:
! ... lda leading dimension of arrays psi, spsi, hpsi
! ... n true dimension of psi, spsi, hpsi
! ... m number of states psi
! ... psi
!
! ... output:
! ... hpsi H*psi
!
USE kinds, ONLY : DP
USE gvect, ONLY : gstart
USE wvfct, ONLY : npwx, npw, nbnd,et
USE wavefunctions, ONLY : evc
USE wannier_gw, ONLY : n_gw_states, ene_gw, delta_self
USE mp, ONLY : mp_sum
USE mp_world, ONLY : world_comm
!
implicit none
INTEGER, INTENT(IN) :: lda, n, m
COMPLEX(kind=DP), INTENT(IN) :: psi(lda,m)
COMPLEX(kind=DP), INTENT(OUT) :: hpsi(lda,m)
INTEGER :: ii,jj
REAL(kind=DP), ALLOCATABLE :: prod(:,:)
!apply \Delta1
hpsi(1:n,1:m)=hpsi(1:n,1:m)+delta_self*psi(1:n,1:m)
!apply \Sum_i (e^GW_i-e^DFT_i-Delta)|\psi_i><\psi_i|
allocate(prod(n_gw_states,m))
prod(:,:)=0.d0
call dgemm('T','N', n_gw_states,m,2*npw,2.d0,evc,2*npwx,psi,2*lda,0.d0,prod,n_gw_states)
do ii=1,n_gw_states
do jj=1,m
if(gstart==2) prod(ii,jj)=prod(ii,jj)-dble(conjg(evc(1,ii))*psi(1,jj))
enddo
enddo
call mp_sum(prod,world_comm)
do jj=1,m
do ii=1,n_gw_states
prod(ii,jj)=prod(ii,jj)*(ene_gw(ii,1)-et(ii,1)-delta_self)
enddo
enddo
call dgemm('N','N',2*npw,m,n_gw_states,1.d0,evc,2*npwx,prod,n_gw_states,1.d0,hpsi,2*lda)
deallocate(prod)
return
!NOT_TO_BE_INCLUDED_END
end subroutine h_psi_self
subroutine h_psi_scissor( ispin,lda, n, m, psi, hpsi )
!NOT_TO_BE_INCLUDED_START
!add to hpsi part dur to self-consistent GW calculation
! ... input:
! ... lda leading dimension of arrays psi, spsi, hpsi
! ... n true dimension of psi, spsi, hpsi
! ... m number of states psi
! ... psi
! ... output:
! ... hpsi H*psi
USE kinds, ONLY : DP
USE gvect, ONLY : gstart
USE wvfct, ONLY : npwx, npw, nbnd,et
USE wavefunctions, ONLY : evc
USE wannier_gw, ONLY : num_nbndv,scissor
USE mp, ONLY : mp_sum
USE mp_world, ONLY : world_comm
USE constants, ONLY : rytoev
implicit none
INTEGER, INTENT(in) :: ispin!spin channel
INTEGER, INTENT(IN) :: lda, n, m
COMPLEX(kind=DP), INTENT(IN) :: psi(lda,m)
COMPLEX(kind=DP), INTENT(OUT) :: hpsi(lda,m)
INTEGER :: ii,jj
REAL(kind=DP), ALLOCATABLE :: prod(:,:)
allocate(prod(num_nbndv(ispin),m))
prod=0.d0
call dgemm('T','N', num_nbndv(ispin),m,2*npw,2.d0,evc,2*npwx,psi,2*lda,0.d0,prod,num_nbndv(ispin))
do ii=1,num_nbndv(ispin)
do jj=1,m
if(gstart==2) prod(ii,jj)=prod(ii,jj)-dble(conjg(evc(1,ii))*psi(1,jj))
enddo
enddo
call mp_sum(prod,world_comm)
do jj=1,m
do ii=1,num_nbndv(ispin)
prod(ii,jj)=prod(ii,jj)*(scissor(1)-scissor(2))/rytoev
enddo
enddo
call dgemm('N','N',2*npw,m,num_nbndv(ispin),1.d0,evc,2*npwx,prod,num_nbndv(ispin),&
&1.d0+scissor(2)/rytoev,hpsi,2*lda)
deallocate(prod)
return
!NOT_TO_BE_INCLUDED_END
end subroutine h_psi_scissor
subroutine pola_basis_lanczos_real(n_set,nstates,numpw, nsteps,ispin)
!NOT_TO_BE_INCLUDED_START
!this subroutine calculates the basis for every v
!the minimal orthonormal basis for the w_v(r)*w^P'_i(r) products
USE io_global, ONLY : stdout, ionode, ionode_id
USE io_files, ONLY : prefix, tmp_dir, diropn
USE kinds, ONLY : DP
USE wannier_gw
USE gvect
USE constants, ONLY : e2, pi, tpi, fpi
USE cell_base, ONLY: at, alat, tpiba, omega, tpiba2
USE wvfct, ONLY : npwx, npw, nbnd
USE gvecw, ONLY : ecutwfc
USE wavefunctions, ONLY : evc, psic
USE mp, ONLY : mp_sum, mp_barrier, mp_bcast
USE mp_pools, ONLY : intra_pool_comm
USE mp_world, ONLY : world_comm, mpime, nproc
USE gvecs, ONLY : doublegrid
USE fft_custom_gwl
USE mp_wave, ONLY : mergewf,splitwf
USE fft_base, ONLY : dfftp, dffts
USE fft_interfaces, ONLY : fwfft, invfft
implicit none
INTEGER, EXTERNAL :: find_free_unit
INTEGER, INTENT(in) :: n_set !defines the number of states to be read from disk at the same tim\e
INTEGER, INTENT(in) :: nstates!number of orthonormal states to retain
INTEGER, INTENT(in) :: numpw!dimension of polarization basis
INTEGER, INTENT(in) :: nsteps!number of lanczos steps
INTEGER, INTENT(in) :: ispin! spin channel
INTEGER :: iv,iw,ig,ii,jj,ir
REAL(kind=DP), ALLOCATABLE :: wv_real(:),tmp_r(:),tmp_r2(:)
COMPLEX(kind=DP), ALLOCATABLE :: tmp_g(:)
REAL(kind=DP), ALLOCATABLE :: wp_prod(:,:,:)
INTEGER :: iungprod,iunrprod, iungresult,iuntmat
LOGICAL :: exst
REAL(kind=DP), ALLOCATABLE :: omat(:,:),omat_hold(:,:)
REAL(kind=DP), ALLOCATABLE :: eigen(:),work(:)
INTEGER :: lwork,info,liwork
COMPLEX(kind=DP), ALLOCATABLE :: wp_g(:,:),wp_g_t2(:,:)!product terms in g wfc grid
REAL(kind=DP), ALLOCATABLE :: wp_g_t(:,:)!
REAL(kind=DP), ALLOCATABLE :: t_mat(:,:),t_mat_hold(:,:), t_mat_hold2(:,:)
CHARACTER(4) :: nfile
COMPLEX(kind=DP), ALLOCATABLE :: p_basis(:,:)!polarizability basis
LOGICAL :: l_dsyevr=.true.!if true uses dsyevr
REAL(kind=DP), ALLOCATABLE :: vectors(:,:)!for dsyevr
INTEGER, ALLOCATABLE :: iwork(:), ifail(:)
INTEGER, ALLOCATABLE :: isuppz(:)
INTEGER :: n_found
LOGICAL :: l_fft_custom=.false.!if true uses custom fft grid
COMPLEX(kind=DP), ALLOCATABLE :: evc_t(:,:),p_basis_t(:,:)
REAL(kind=DP), ALLOCATABLE :: evc_r(:,:)
COMPLEX(kind=DP), ALLOCATABLE :: evc_g(:)
LOGICAL :: l_sumrule=.false.!if true imposes the sum rule over the norm of Pc|\Phi_\mu\Psi_v> for each of them
REAL(kind=DP), ALLOCATABLE :: norms(:)
REAL(kind=DP) :: norm_t, c_norm,norm
REAL(kind=DP), ALLOCATABLE :: p_basis_r(:,:) !polarizabilty basis in real custom space
INTEGER :: ivv,nbuf
REAL(kind=DP) :: vl,vu
INTEGER :: il,iu
REAL(kind=DP), ALLOCATABLE :: t_eigen_hold(:)
REAL(kind=DP) :: sca
TYPE(fft_cus) :: fc
write(stdout,*) 'Routine pola_basis_lanczos_real'
FLUSH(stdout)
fc%ecutt=ecutwfc
fc%dual_t=dual_vt
if(l_verbose) write(stdout,*) 'Call initialize_fft_custom'
FLUSH(stdout)
call initialize_fft_custom(fc)
! allocate(evc_g(fc%ngmt_g))
! allocate(wv_real(dfftp%nnr))
! allocate(wp_prod(npw,numpw))
allocate(wv_real(fc%nrxxt))
! allocate(wp_g_t2(fc%npwt))
allocate(norms(numpw))
!read w^P'_i on file on real space
!open product of wanniers filed
iungprod = find_free_unit()
CALL diropn( iungprod, 'wiwjwfc_red', max_ngm*2, exst )
iungresult = find_free_unit()
CALL diropn( iungresult, 'vw_lanczos',npw*2, exst)
!read polarizability basis functions
allocate(p_basis(max_ngm,numpw))
do iw=1,numpw
call davcio(p_basis(:,iw),max_ngm*2,iungprod,iw,-1)
enddo
close(iungprod)
if(l_verbose) write(stdout,*) 'pola_basis_lanczos 1'
FLUSH(stdout)
!now polarizability basis are put on the ordering of the redueced grid, if required
allocate(p_basis_t(fc%npwt,numpw))
! if(fc%dual_t==4.d0) then
! p_basis_t(:,:)=p_basis(:,:)
! else
call reorderwfp (numpw,npw, fc%npwt,p_basis(:,:),p_basis_t(:,:), &
&npw,fc%npwt, ig_l2g,fc%ig_l2gt, fc%ngmt_g , mpime, nproc,ionode_id, intra_pool_comm )
! do ii=1,numpw
! call mergewf(p_basis(:,ii),evc_g,npw,ig_l2g,mpime,nproc,ionode_id,intra_pool_comm)
! call splitwf(p_basis_t(:,ii),evc_g,fc%npwt,fc%ig_l2gt,mpime,nproc,ionode_id,intra_pool_comm)
! enddo
!trasform to real space
allocate(p_basis_r(fc%nrxxt,numpw))
do ii=1,numpw,2
psic(:)=(0.d0,0.d0)
if(ii==numpw) then
psic(fc%nlt(1:fc%npwt)) = p_basis_t(1:fc%npwt,ii)
psic(fc%nltm(1:fc%npwt)) = CONJG( p_basis_t(1:fc%npwt,ii) )
else
psic(fc%nlt(1:fc%npwt))=p_basis_t(1:fc%npwt,ii)+(0.d0,1.d0)*p_basis_t(1:fc%npwt,ii+1)
psic(fc%nltm(1:fc%npwt)) = CONJG( p_basis_t(1:fc%npwt,ii) )+(0.d0,1.d0)*CONJG( p_basis_t(1:fc%npwt,ii+1) )
endif
CALL cft3t(fc, psic, fc%nr1t, fc%nr2t, fc%nr3t, fc%nrx1t, fc%nrx2t, fc%nrx3t, 2 )
p_basis_r(1:fc%nrxxt,ii)= DBLE(psic(1:fc%nrxxt))
if(ii/=numpw) p_basis_r(1:fc%nrxxt,ii+1)= DIMAG(psic(1:fc%nrxxt))
enddo
! endif
!now valence wavefunctions are put on the ordering of the reduced grid
allocate(evc_t(fc%npwt,num_nbndv(ispin)))
allocate(evc_r(fc%nrxxt,num_nbndv(ispin)))
if(fc%dual_t==4.d0) then
evc_t(:,1:num_nbndv(ispin))=evc(:,1:num_nbndv(ispin))
else
call reorderwfp (num_nbndv(ispin),npw, fc%npwt,evc(:,:),evc_t(:,:), &
&npw,fc%npwt, ig_l2g,fc%ig_l2gt, fc%ngmt_g , mpime, nproc,ionode_id, intra_pool_comm )
! do iv=1,num_nbndv(ispin)
! call mergewf(evc(:,iv),evc_g,npw,ig_l2g,mpime,nproc,ionode_id,intra_pool_comm)
! call splitwf(evc_t(:,iv),evc_g,fc%npwt,fc%ig_l2gt,mpime,nproc,ionode_id,intra_pool_comm)
! enddo
endif
evc_r=0.d0
do iv=1,num_nbndv(ispin)
psic(:)=(0.d0,0.d0)
psic(fc%nlt(1:fc%npwt)) = evc_t(1:fc%npwt,iv)
psic(fc%nltm(1:fc%npwt)) = CONJG( evc_t(1:fc%npwt,iv) )
CALL cft3t(fc, psic, fc%nr1t, fc%nr2t, fc%nr3t, fc%nrx1t, fc%nrx2t, fc%nrx3t, 2 )
evc_r(1:fc%nrxxt,iv)= DBLE(psic(1:fc%nrxxt))
enddo
!loop on v
allocate(tmp_r(fc%nrxxt),tmp_r2(fc%nrxxt))
allocate(omat(numpw,numpw),omat_hold(numpw,numpw))
allocate(t_mat(numpw,nstates), t_mat_hold(numpw,nstates), t_mat_hold2(numpw,nstates))
allocate(wp_g(npw,nstates))
allocate(wp_g_t(fc%nrxxt,nstates))
allocate(t_eigen_hold(nstates))
!set the number of products to be distributed
nbuf=min(12,nproc)
allocate(wp_prod(fc%nrxxt,numpw,nbuf))
wp_prod=0.d0
wp_g_t=0.d0
do ivv=1,num_nbndv(ispin),nbuf
!put iv on real space
do iv=ivv,min(ivv+nbuf-1,num_nbndv(ispin))
wv_real(1:fc%nrxxt)= evc_r(1:fc%nrxxt,iv)
!!loop on products of wanniers
! allocate(tmp_r(fc%nrxxt))
if(l_verbose) write(stdout,*) 'do fft',fc%nrxxt, numpw,iv-ivv+1
FLUSH(stdout)
do ii=1,numpw
wp_prod(1:fc%nrxxt, ii,iv-ivv+1)=p_basis_r(1:fc%nrxxt,ii)*wv_real(1:fc%nrxxt)
enddo
call pc_operator_t_r(numpw,wp_prod(1,1,iv-ivv+1),evc_r,ispin, fc)
if(l_verbose) write(stdout,*) 'calculate omat'
FLUSH(stdout)
!!calculate overlap matrix
call dgemm('T','N',numpw,numpw,fc%nrxxt,1.d0,wp_prod(1,1,iv-ivv+1),fc%nrxxt,&
&wp_prod(1,1,iv-ivv+1),fc%nrxxt,0.d0,omat,numpw)
do ii=1,numpw
call mp_sum(omat(1:numpw,ii),world_comm)
omat(1:numpw,ii)=omat(1:numpw,ii)/dble(fc%nr1t*fc%nr2t*fc%nr3t)
enddo
!set up norms
! do ii=1,numpw
! norms(ii)=omat(ii,ii)
! enddo
if(iv-ivv==mpime) then
omat_hold(:,:)=omat(:,:)
endif
enddo
!!
!!solve eigen/values vector problem
!!
if(l_verbose) write(stdout,*) 'solve eig'
FLUSH(stdout)
FLUSH(stdout)
do iv=ivv,min(ivv+nbuf-1,num_nbndv(ispin))
if(l_verbose) write(stdout,*) 'solve eig', iv
FLUSH(stdout)
if(iv-ivv==mpime) then
if(.not.l_dsyevr) then
allocate(eigen(numpw))
allocate(work(1))
call DSYEV( 'V', 'U', numpw, omat_hold, numpw, eigen, work, -1, info )
lwork=work(1)
deallocate(work)
allocate(work(lwork))
call DSYEV( 'V', 'U', numpw, omat_hold, numpw, eigen, work, lwork, info )
deallocate(work)
if(info/=0) then
write(stdout,*) 'ROUTINE pola_basis_lanczos, INFO:', info
stop
endif
! do iw=1,numpw
! write(stdout,*) 'EIGEN:',iv,iw, eigen(iw)
! enddo
! FLUSH(stdout)
else
if(l_verbose) write(stdout,*) 'ATT1'
FLUSH(stdout)
allocate(eigen(numpw))
allocate(vectors(numpw,nstates))
allocate(isuppz(2*nstates))
allocate(work(1),iwork(1))
if(l_verbose) write(stdout,*) 'ATT2'
FLUSH(stdout)
call DSYEVR('V','I','U',numpw,omat_hold,numpw,0.d0,0.d0,numpw-nstates+1,numpw,0.d0,n_found,eigen,&
& vectors,numpw,isuppz,work, -1,iwork,-1, info)
lwork=work(1)
liwork=iwork(1)
deallocate(work,iwork)
allocate(work(lwork))
allocate(iwork(liwork))
if(l_verbose) write(stdout,*) 'ATT3',numpw,nstates,size(omat_hold(:,1)),size(omat_hold(1,:)),lwork,liwork
FLUSH(stdout)
vl=0.d0
vu=0.d0
il=numpw-nstates+1
iu=numpw
call DSYEVR('V','I','U',numpw,omat_hold,numpw,vl,vu,il,iu,0.d0,n_found,eigen,&
& vectors,numpw,isuppz,work,lwork,iwork,liwork, info)
if(info/=0) then
write(stdout,*) 'ROUTINE pola_lanczos DSYEVR, INFO:', info
stop
endif
if(l_verbose) write(stdout,*) 'ATT4'
FLUSH(stdout)
deallocate(isuppz)
deallocate(work,iwork)
do iw=1,nstates
write(stdout,*) 'EIGEN:',iv,iw, eigen(iw)
enddo
FLUSH(stdout)
endif
!!find transformation matrix and write on disk
!
if(l_verbose) write(stdout,*) 'pola_basis_lanczos t_mat'
FLUSH(stdout)
if(.not.l_dsyevr) then
do ii=1,nstates
do jj=1,numpw
t_mat_hold(jj,ii)=omat_hold(jj,numpw-ii+1)*(dsqrt(eigen(numpw-ii+1)))
enddo
t_eigen_hold(ii)=eigen(numpw-ii+1)
enddo
else
do ii=1,nstates
do jj=1,numpw
t_mat_hold(jj,ii)=vectors(jj,ii)*(dsqrt(eigen(ii)))
enddo
t_eigen_hold(ii)=eigen(ii)
enddo
endif
!!find liner dependent products
if(.not.l_dsyevr) then
do ii=1,nstates
t_mat_hold2(:,ii)=omat_hold(:,numpw-ii+1)*(1.d0/dsqrt(eigen(numpw-ii+1)))
enddo
else
do ii=1,nstates
t_mat_hold2(:,ii)=vectors(:,ii)*(1.d0/dsqrt(eigen(ii)))
enddo
endif
deallocate(eigen)
if(l_dsyevr) deallocate(vectors)
endif
enddo
allocate(eigen(nstates))
do iv=ivv,min(ivv+nbuf-1,num_nbndv(ispin))
if(iv-ivv == mpime) then
t_mat(:,:)=t_mat_hold(:,:)
eigen(1:nstates)=t_eigen_hold(1:nstates)
endif
call mp_bcast(t_mat,iv-ivv,world_comm)
call mp_bcast(eigen(1:nstates),iv-ivv,world_comm)
!if required imposes sum rule
! if(l_sumrule) then
! norm_t=0.d0
! do jj=1,numpw
! do ii=1,nstates
! norm_t=norm_t+t_mat(jj,ii)**2.d0
! enddo
! enddo
! norm=0.d0
! do jj=1,numpw
! norm=norm+norms(jj)
! enddo
! c_norm=dsqrt(norm/norm_t)
! write(stdout,*) 'Sum rule:',c_norm
! t_mat(:,:)=t_mat(:,:)*c_norm
! endif
if(ionode) then
iuntmat = find_free_unit()
write(nfile,'(4i1)') iv/1000,mod(iv,1000)/100,mod(iv,100)/10,mod(iv,10)
if(ispin==1) then
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_mat_lanczos'//nfile, &
&status='unknown',form='unformatted')
else
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_mat_lanczos2'//nfile, &
&status='unknown',form='unformatted')
endif
write(iuntmat) iv
write(iuntmat) num_nbndv(ispin)
write(iuntmat) numpw
write(iuntmat) nstates
do ii=1,nstates
write(iuntmat) t_mat(1:numpw,ii)
enddo
close(iuntmat)
endif
!write on disk file with eigen values
if(ionode) then
iuntmat = find_free_unit()
write(nfile,'(4i1)') iv/1000,mod(iv,1000)/100,mod(iv,100)/10,mod(iv,10)
if(ispin==1) then
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_eig_lanczos'//nfile, &
&status='unknown',form='unformatted')
else
open( unit= iuntmat, file=trim(tmp_dir)//trim(prefix)//'.p_eig_lanczos2'//nfile, &
&status='unknown',form='unformatted')
endif
write(iuntmat) nstates
write(iuntmat) eigen(1:nstates)
close(iuntmat)
endif
if(l_verbose) write(stdout,*) 'pola_basis update wp_g'
FLUSH(stdout)
!!find liner dependent products
if(iv-ivv == mpime) then
t_mat(:,:)=t_mat_hold2(:,:)
endif
call mp_bcast(t_mat,iv-ivv,world_comm)
if(l_verbose) write(stdout,*) 'pola_basis update wp_g dgemm'
FLUSH(stdout)
call dgemm('N','N',fc%nrxxt,nstates,numpw,1.d0,wp_prod(1,1,iv-ivv+1),fc%nrxxt,t_mat,numpw,0.d0,wp_g_t,fc%nrxxt)
write(stdout,*) 'pola_basis update merge-split',iv,ivv
FLUSH(stdout)
!put the correct order
psic=0.d0
allocate(wp_g_t2(fc%npwt,nstates))
do ii=1,nstates,2
if(ii==nstates) then
psic(1:fc%nrxxt)=wp_g_t(1:fc%nrxxt,ii)
else
psic(1:fc%nrxxt)=cmplx(wp_g_t(1:fc%nrxxt,ii),wp_g_t(1:fc%nrxxt,ii+1))
endif
CALL cft3t( fc, psic, fc%nr1t, fc%nr2t, fc%nr3t, fc%nrx1t, fc%nrx2t, fc%nrx3t, -2 )
if(ii==nstates) then
wp_g_t2(1:fc%npwt,ii) = psic(fc%nlt(1:fc%npwt))
!project on conduction manifold
call pc_operator_t(wp_g_t2(:,ii),evc_t,ispin,fc)
else
wp_g_t2(1:fc%npwt, ii)= 0.5d0*(psic(fc%nlt(1:fc%npwt))+conjg( psic(fc%nltm(1:fc%npwt))))
wp_g_t2(1:fc%npwt, ii+1)= (0.d0,-0.5d0)*(psic(fc%nlt(1:fc%npwt)) - conjg(psic(fc%nltm(1:fc%npwt))))
call pc_operator_t(wp_g_t2(:,ii),evc_t,ispin,fc)
call pc_operator_t(wp_g_t2(:,ii+1),evc_t,ispin,fc)
endif
enddo
if(fc%dual_t==4.d0) then
wp_g(1:npw,1:nstates)=wp_g_t2(1:fc%npwt,1:nstates)
else
call reorderwfp (nstates,fc%npwt, npw,wp_g_t2,wp_g, &
&fc%npwt,npw, fc%ig_l2gt,ig_l2g, fc%ngmt_g , mpime, nproc,ionode_id, intra_pool_comm )
! call mergewf(wp_g_t2(:),evc_g,fc%npwt,fc%ig_l2gt,mpime,nproc,ionode_id,intra_pool_comm)
! call splitwf(wp_g(:,ii),evc_g,npw,ig_l2g,mpime,nproc,ionode_id,intra_pool_comm)
endif
deallocate(wp_g_t2)
if(l_verbose) write(stdout,*) 'pola_basis update davcio',iv
FLUSH(stdout)
!!write on disk
do ii=1,nstates
call davcio(wp_g(:,ii),npw*2,iungresult,ii+(iv-1)*nstates,1)
enddo
if(l_verbose) write(stdout,*) 'pola_basis update done'
FLUSH(stdout)
enddo
deallocate(eigen)
enddo
deallocate(evc_r)
deallocate(t_mat,t_mat_hold,t_mat_hold2)
deallocate(omat,omat_hold,tmp_r,tmp_r2,p_basis_r)
deallocate(wp_g,wp_g_t)
close(iungresult)
deallocate(norms)
deallocate(wv_real,wp_prod)
deallocate(t_eigen_hold)
if(l_verbose) write(stdout,*) 'Exiting pola_basis_lanczos'
FLUSH(stdout)
deallocate(p_basis)
deallocate(p_basis_t,evc_t)
if(l_verbose) write(stdout,*) 'Call deallocate_fft_custom'
FLUSH(stdout)
!deallocate(evc_g)
call deallocate_fft_custom(fc)
return
!NOT_TO_BE_INCLUDED_END
end subroutine pola_basis_lanczos_real