mirror of https://gitlab.com/QEF/q-e.git
480 lines
16 KiB
Fortran
480 lines
16 KiB
Fortran
!
|
|
! Copyright (C) 2001 PWSCF group
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
!
|
|
!
|
|
!----------------------------------------------------------------------
|
|
subroutine cdiisg (ndim, ndmx, nvec, nvecx, buflen, btype, psi, &
|
|
ethr, e, notcnv, iter, keep_flag)
|
|
!----------------------------------------------------------------------
|
|
!
|
|
! iterative solution of the eigenvalue problem:
|
|
!
|
|
! ( H - e S ) * psi = 0
|
|
!
|
|
! where h is a complex hermitean matrix, e is a real scalar,
|
|
! and S is a complex hermitean matrix and psi is a complex vector
|
|
!
|
|
! Gabriele Cipriani 3/00
|
|
!
|
|
#include "machine.h"
|
|
use parameters, only : DP
|
|
implicit none
|
|
logical :: keep_flag
|
|
! if to keep the old wfc in the subspac
|
|
|
|
integer :: ndim, ndmx, nvec, nvecx, buflen
|
|
! dimension of the matrix to be
|
|
! diagonalized
|
|
! leading dimension of matrix psi,
|
|
! as declared in the calling pgm unit
|
|
! number of sought eigeipairs
|
|
! max ""
|
|
! wfc buffer lenght
|
|
complex(kind=DP) :: psi (ndmx, nvecx)
|
|
! wfc
|
|
real(kind=DP) :: ethr
|
|
! energy threshold for convergence.
|
|
! root improvement is stopped, when two
|
|
! consecutive estimates of the root dif
|
|
! by less than ethr.
|
|
integer :: btype (nvecx)
|
|
! on OUTPUT
|
|
! complex*16 psi ! the first nvec columns contain the
|
|
! ! refined estimates of the eigenvectors
|
|
real(kind=DP) :: e (nvec)
|
|
real(kind=DP), allocatable :: et (:)
|
|
! energies
|
|
! idem
|
|
|
|
integer :: iter, notcnv
|
|
! number of iterations performed.
|
|
! number of unconverged (valence) roots
|
|
! LOCAL variables
|
|
!
|
|
! parameters
|
|
!
|
|
integer :: maxter, diis_steps, cbnd_steps
|
|
! maximum number of iterations
|
|
! maximum number diis refinements
|
|
! maximum number diis refinements for co
|
|
real(kind=DP) :: pi, min_tstep, max_tstep, small, big, shift, cfact
|
|
!
|
|
! minimum diis step lenght
|
|
! maximum diis step lenght
|
|
! diagonal shift for rmat
|
|
! convergence factor for the norm of the
|
|
|
|
parameter (maxter = 1, diis_steps = 4, cbnd_steps = 2, pi = &
|
|
3.14159265359d0, min_tstep = 0.1d0, max_tstep = 1.d0, small = &
|
|
1.d-10, big = 1.d10, shift = 1.d-12, cfact = 0.01d0)
|
|
integer :: kter
|
|
integer, allocatable :: conv (:)
|
|
integer ::nc, n, nb, ns, wfoff, &
|
|
bufoff, nopt, bufwfc, i, j, ib, nvectot, nbuf, ibuf, jbuf, ipos
|
|
integer, allocatable :: buf2wfc (:), wfc2buf (:,:)
|
|
! counter on iterations
|
|
! conv flag
|
|
! bnd type
|
|
! counter on wfc buffers
|
|
! counter on wfc
|
|
! counter on wfc in the buffer
|
|
! counter on diis steps
|
|
! first wfc of a given buffer
|
|
! buffer offset
|
|
! number of wfc still to optimize
|
|
! number of wfc in the buffer
|
|
! dimension of the iterative subspace
|
|
! indexing table for the buffe
|
|
! idem
|
|
complex(kind=DP), allocatable :: vc (:,:), vcc (:,:),&
|
|
rmat (:,:,:), smat (:,:,:), psip (:,:), psis (:,:), &
|
|
pres (:,:), dwfc (:,:), dres (:,:), eau(:), cstep(:)
|
|
complex(kind=DP) :: ZDOTC, hh (2, 2), ss (2, 2), hv (2, 2)
|
|
!
|
|
!
|
|
! residues matrix
|
|
! overlap matrix
|
|
! the product of H and
|
|
! the product of S and
|
|
! preconditioned residu
|
|
! iterative space
|
|
! residues
|
|
! scalar product routin
|
|
! auxiliary complex var
|
|
! diis trial step leng
|
|
|
|
real(kind=DP), allocatable :: nrm (:), ew (:), ep (:), np (:), ec (:)
|
|
real(kind=DP) :: h (2), tstep
|
|
|
|
external ZDOTC
|
|
!
|
|
! allocate the work arrays
|
|
!
|
|
|
|
call start_clock ('cdiisg')
|
|
allocate (vc( diis_steps , diis_steps))
|
|
allocate (vcc( diis_steps , buflen))
|
|
allocate (psip( ndmx , buflen))
|
|
allocate (psis( ndmx , buflen))
|
|
allocate (pres( ndmx , buflen))
|
|
allocate (dwfc( ndmx , diis_steps * buflen))
|
|
allocate (dres( ndmx , diis_steps * buflen))
|
|
allocate (ec( diis_steps))
|
|
allocate (ep( nvec))
|
|
allocate (ew( buflen))
|
|
allocate (et( 2 * nvec))
|
|
allocate (np( nvec))
|
|
allocate (eau( buflen))
|
|
allocate (nrm( buflen))
|
|
allocate (rmat( diis_steps, diis_steps, buflen))
|
|
allocate (smat( diis_steps, diis_steps, buflen))
|
|
allocate (conv( nvec))
|
|
allocate (cstep( buflen))
|
|
allocate (buf2wfc( buflen * diis_steps))
|
|
allocate (wfc2buf( buflen , diis_steps))
|
|
|
|
if (diis_steps.lt.cbnd_steps) call error ('cdiisg', 'wrong diis_steps', 1)
|
|
!
|
|
do n = 1, nvec
|
|
conv (n) = 0
|
|
ep (n) = 0.d0
|
|
np (n) = 0.d0
|
|
enddo
|
|
do n = 1, buflen
|
|
ew (n) = 0.d0
|
|
enddo
|
|
do n = 1, 2 * nvec
|
|
et (n) = 0.d0
|
|
enddo
|
|
do j = 1, diis_steps
|
|
do i = 1, buflen
|
|
buf2wfc (i * j) = 0
|
|
wfc2buf (i, j) = 0
|
|
enddo
|
|
enddo
|
|
! number of wfc buffers
|
|
nbuf = nvec / buflen
|
|
|
|
if (mod (nvec, buflen) .ne.0) nbuf = nbuf + 1
|
|
! DIIS
|
|
|
|
do kter = 1, maxter
|
|
iter = kter
|
|
|
|
if (kter.eq.1) nvectot = nvec
|
|
|
|
call rotate_wfc (ndim, ndmx, nvectot, nvectot, et, psi)
|
|
! write(*,'(4f12.6)')(et(n),n=1,nvec)
|
|
! save bands
|
|
if (keep_flag) then
|
|
call ZCOPY (ndmx * nvec, psi (1, 1), 1, psi (1, nvec + 1), &
|
|
1)
|
|
nvectot = 2 * nvec
|
|
else
|
|
nvectot = nvec
|
|
endif
|
|
! buffers' loop
|
|
|
|
do nc = 1, nbuf
|
|
wfoff = (nc - 1) * buflen + 1
|
|
bufwfc = min (nvec - wfoff + 1, buflen)
|
|
! starting indexing
|
|
nopt = bufwfc
|
|
bufoff = 1
|
|
|
|
ipos = bufwfc
|
|
do ib = 1, bufwfc
|
|
buf2wfc (ib) = ib
|
|
wfc2buf (ib, 1) = ib
|
|
enddo
|
|
! workspaces:
|
|
call setv (2 * diis_steps * diis_steps * buflen, 0.d0, rmat, 1)
|
|
call setv (2 * diis_steps * diis_steps * buflen, 0.d0, smat, 1)
|
|
call setv (2 * ndmx * diis_steps * buflen, 0.d0, dwfc, 1)
|
|
call setv (2 * ndmx * diis_steps * buflen, 0.d0, dres, 1)
|
|
call setv (2 * ndmx * buflen, 0.d0, psip, 1)
|
|
call setv (2 * ndmx * buflen, 0.d0, psis, 1)
|
|
|
|
call ZCOPY (ndmx * nopt, psi (1, wfoff), 1, dwfc (1, 1), 1)
|
|
! diis optimization:
|
|
|
|
do ns = 1, diis_steps
|
|
! residues
|
|
call h_psi (ndmx, ndim, nopt, dwfc (1, bufoff), psip, psis)
|
|
!
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nrm (ib) = 0.d0
|
|
nrm (ib) = DREAL (ZDOTC (ndim, dwfc (1,ibuf),1,psis (1,ib),1) )
|
|
enddo
|
|
#ifdef PARA
|
|
call reduce (nopt, nrm)
|
|
#endif
|
|
! rescale
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nrm (ib) = 1.d0 / dsqrt (nrm (ib) )
|
|
call DSCAL (2 * ndmx, nrm (ib), dwfc (1, ibuf), 1)
|
|
call DSCAL (2 * ndmx, nrm (ib), psip (1, ib), 1)
|
|
call DSCAL (2 * ndmx, nrm (ib), psis (1, ib), 1)
|
|
eau (ib) = - ZDOTC (ndim, dwfc (1, ibuf), 1, psip (1, ib),1)
|
|
enddo
|
|
#ifdef PARA
|
|
call reduce (2 * nopt, eau)
|
|
#endif
|
|
! energies
|
|
do ib = 1, nopt
|
|
ew (ib) = - DREAL (eau (ib) )
|
|
enddo
|
|
! residues
|
|
call ZCOPY (ndmx * nopt, psip, 1, dres (1, bufoff), 1)
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
call ZAXPY (ndmx, eau (ib), psis (1,ib),1,dres (1,ibuf),1)
|
|
enddo
|
|
! update rmat/smat
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nb = buf2wfc (ibuf)
|
|
do j = 1, ns
|
|
jbuf = wfc2buf (nb, j)
|
|
rmat (j, ns, nb) = ZDOTC (ndim,dres(1,jbuf),1,dres(1,ibuf),1)
|
|
#ifdef PARA
|
|
call reduce (2, rmat (j, ns, nb) )
|
|
#endif
|
|
rmat (ns, j, nb) = conjg (rmat (j, ns, nb) )
|
|
if (ns.eq.1) then
|
|
smat (ns, ns, nb) = (1.d0, 0.d0)
|
|
else
|
|
smat (ns, j, nb) = ZDOTC(ndim,psis(1,ib),1,dwfc(1,jbuf),1)
|
|
#ifdef PARA
|
|
call reduce (2, smat (ns, j, nb) )
|
|
#endif
|
|
smat (j, ns, nb) = conjg (smat (ns, j, nb) )
|
|
endif
|
|
enddo
|
|
enddo
|
|
! diagonal shift
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nb = buf2wfc (ibuf)
|
|
rmat (ns, ns, nb) = rmat (ns, ns, nb) + shift
|
|
smat (ns, ns, nb) = smat (ns, ns, nb) + shift
|
|
enddo
|
|
! dump
|
|
! write(6,*)
|
|
! do ib=1,nopt
|
|
! ibuf = bufoff+ib-1
|
|
! nb = buf2wfc(ibuf)
|
|
! n = wfoff+nb-1
|
|
! write(6,"('n ns rmat ew de ',2i5,3e14.6)")n,ns,
|
|
! + DREAL(rmat(ns,ns,nb)),ew(ib),
|
|
! + dabs(ew(ib)-ep(n))
|
|
! enddo
|
|
! write(6,*)
|
|
|
|
if (ns.eq.1) then
|
|
! first iteration
|
|
! ep np
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nb = buf2wfc (ibuf)
|
|
n = wfoff + nb - 1
|
|
ep (n) = ew (ib)
|
|
np (n) = rmat (ns, ns, nb)
|
|
enddo
|
|
! preconditioned residue
|
|
call ZCOPY (ndmx * nopt, dres (1, 1), 1, pres, 1)
|
|
call g_psi (ndmx, ndim, nopt, pres, ew)
|
|
! minimize rayleigh quotient
|
|
call h_psi (ndmx, ndim, nopt, pres, psip, psis)
|
|
if (.true.) then
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nb = buf2wfc (ibuf)
|
|
|
|
n = wfoff + nb - 1
|
|
call setv (8, 0.d0, hh, 1)
|
|
call setv (8, 0.d0, ss, 1)
|
|
hh (1, 1) = DCMPLX (ew (ib), 0.d0)
|
|
hh (1, 2) = ZDOTC (ndim, dwfc (1, ib), 1, psip (1, ib),1)
|
|
hh (2, 1) = conjg (hh (1, 2) )
|
|
hh (2, 2) = ZDOTC (ndim, pres (1, ib), 1, psip (1, ib),1)
|
|
ss (1, 1) = smat (1, 1, ib)
|
|
ss (1, 2) = ZDOTC (ndim, dwfc (1, ib), 1, psis (1, ib),1)
|
|
ss (2, 1) = conjg (ss (1, 2) )
|
|
ss (2, 2) = ZDOTC (ndim, pres (1, ib), 1, psis (1, ib),1)
|
|
#ifdef PARA
|
|
call reduce (8, hh)
|
|
call reduce (8, ss)
|
|
hh (1, 1) = DCMPLX (ew (ib), 0.d0)
|
|
ss (1, 1) = smat (1, 1, ib)
|
|
#endif
|
|
|
|
call cdiaghg (2, 2, hh, ss, 2, h, hv)
|
|
if (abs (hv (1, 1) ) .lt.small) then
|
|
cstep (ib) = DCMPLX(min_tstep, 0.d0)
|
|
else
|
|
cstep (ib) = hv (2, 1) / hv (1, 1)
|
|
! write(6,*)'cstep0 ',n,cstep(ib)
|
|
tstep = abs (cstep (ib) )
|
|
cstep (ib) = cstep (ib) / tstep
|
|
tstep = max (tstep, min_tstep)
|
|
tstep = min (tstep, max_tstep)
|
|
cstep (ib) = cstep (ib) * tstep
|
|
endif
|
|
! write(6,*)'cstep1 ',n,cstep(ib)
|
|
enddo
|
|
else
|
|
do ib = 1, nopt
|
|
cstep (ib) = ( - 1.d0, 0.d0)
|
|
enddo
|
|
endif
|
|
! next dwfc
|
|
if (diis_steps.eq.1) then
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nb = buf2wfc (ibuf)
|
|
n = wfoff + nb - 1
|
|
call ZCOPY (ndmx, dwfc (1, ib), 1, psi (1, n), 1)
|
|
call ZAXPY (ndmx, cstep (ib), pres(1,ib),1,psi (1, n),1)
|
|
enddo
|
|
else
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nb = buf2wfc (ibuf)
|
|
ipos = ipos + 1
|
|
call ZCOPY (ndmx, dwfc (1, ib), 1, dwfc (1, ipos), 1)
|
|
call ZAXPY (ndmx,cstep(ib),pres(1,ib),1,dwfc(1,ipos),1)
|
|
wfc2buf (nb, ns + 1) = ipos
|
|
buf2wfc (ipos) = nb
|
|
enddo
|
|
endif
|
|
!
|
|
bufoff = bufoff + nopt
|
|
|
|
nopt = ipos - bufoff + 1
|
|
! ns > 1
|
|
else
|
|
! check convergence
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nb = buf2wfc (ibuf)
|
|
n = wfoff + nb - 1
|
|
if (dabs (ew (ib) - ep (n) ) .lt.ethr .or. &
|
|
DREAL (rmat (ns, ns, nb) ) .lt.cfact * np (n) .or. &
|
|
((btype (n).eq.1) .and. (ns.eq.cbnd_steps)) ) conv(n)=1
|
|
ep (n) = ew (ib)
|
|
enddo
|
|
! solve diis eigenproblem
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nb = buf2wfc (ibuf)
|
|
call cdiaghg (ns, ns, rmat (1, 1, nb), smat (1, 1, nb), &
|
|
diis_steps, ec, vc)
|
|
if (ec (1) .lt.0.d0.and. (dabs (ec (1) ) .gt.small) ) then
|
|
write ( * , * ) ' *********** ec *********** '
|
|
write ( *, * ) (ec (i), i = 1, diis_steps)
|
|
call error ('cdiisg', 'ec(1) < 0', 1)
|
|
endif
|
|
do j = 1, ns
|
|
vcc (j, nb) = vc (j, 1)
|
|
enddo
|
|
enddo
|
|
! new residue
|
|
call setv (2 * ndmx * nopt, 0.d0, pres, 1)
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nb = buf2wfc (ibuf)
|
|
do j = 1, ns
|
|
jbuf = wfc2buf (nb, j)
|
|
call ZAXPY (ndmx,vcc(j,nb),dres(1,jbuf),1,pres(1,ib),1)
|
|
enddo
|
|
enddo
|
|
!
|
|
call g_psi (ndmx, ndim, nopt, pres, ew)
|
|
! new wfc
|
|
do ib = 1, nopt
|
|
ibuf = bufoff + ib - 1
|
|
nb = buf2wfc (ibuf)
|
|
n = wfoff + nb - 1
|
|
if (conv (n) .eq.1.or.ns.eq.diis_steps) then
|
|
call setv (2 * ndmx, 0.d0, psi (1, n), 1)
|
|
do j = 1, ns
|
|
jbuf = wfc2buf (nb, j)
|
|
call ZAXPY (ndmx,vcc(j,nb),dwfc(1,jbuf),1,psi(1,n),1)
|
|
enddo
|
|
call ZAXPY (ndmx,cstep(ib),pres(1,ib),1,psi(1,n),1)
|
|
else
|
|
ipos = ipos + 1
|
|
do j = 1, ns
|
|
jbuf = wfc2buf (nb, j)
|
|
call ZAXPY (ndmx, vcc(j,nb), dwfc(1,jbuf), 1, &
|
|
dwfc(1,ipos),1)
|
|
enddo
|
|
call ZAXPY (ndmx, cstep (ib), pres (1, ib), 1, &
|
|
dwfc (1,ipos), 1)
|
|
wfc2buf (nb, ns + 1) = ipos
|
|
buf2wfc (ipos) = nb
|
|
endif
|
|
enddo
|
|
!
|
|
bufoff = bufoff + nopt
|
|
!
|
|
nopt = ipos - bufoff + 1
|
|
if (nopt.eq.0) goto 10
|
|
endif
|
|
! ns loop
|
|
enddo
|
|
!
|
|
10 continue
|
|
! nc loop
|
|
enddo
|
|
! update convergence flags
|
|
notcnv = 0
|
|
do n = 1, nvec
|
|
if (btype (n) .eq.0.and.conv (n) .ne.1) notcnv = notcnv + 1
|
|
enddo
|
|
! exit?
|
|
if (notcnv.eq.0) goto 100
|
|
! kter loop
|
|
enddo
|
|
|
|
100 continue
|
|
!
|
|
call rotate_wfc (ndim, ndmx, nvectot, nvectot, et, psi)
|
|
!
|
|
do n = 1, nvec
|
|
e (n) = et (n)
|
|
enddo
|
|
|
|
deallocate (wfc2buf)
|
|
deallocate (buf2wfc)
|
|
deallocate (cstep)
|
|
deallocate (conv)
|
|
deallocate (smat)
|
|
deallocate (rmat)
|
|
deallocate (nrm)
|
|
deallocate (eau)
|
|
deallocate (np)
|
|
deallocate (et)
|
|
deallocate (ew)
|
|
deallocate (ep)
|
|
deallocate (ec)
|
|
deallocate (dres)
|
|
deallocate (dwfc)
|
|
deallocate (pres)
|
|
deallocate (psis)
|
|
deallocate (psip)
|
|
deallocate (vcc)
|
|
deallocate (vc)
|
|
|
|
call stop_clock ('cdiisg')
|
|
return
|
|
end subroutine cdiisg
|
|
|