quantum-espresso/PW/regterg.f90

1392 lines
38 KiB
Fortran
Raw Normal View History

!
! Copyright (C) 2003-2006 Quantum-ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
#define ZERO ( 0.D0, 0.D0 )
#define ONE ( 1.D0, 0.D0 )
!
#include "f_defs.h"
!
!----------------------------------------------------------------------------
SUBROUTINE regterg( npw, npwx, nvec, nvecx, evc, ethr, &
uspp, gstart, e, btype, notcnv, lrot, dav_iter )
!----------------------------------------------------------------------------
!
! ... iterative solution of the eigenvalue problem:
!
! ... ( H - e S ) * evc = 0
!
! ... where H is an hermitean operator, e is a real scalar,
! ... S is an uspp matrix, evc is a complex vector
! ... (real wavefunctions with only half plane waves stored)
!
USE kinds, ONLY : DP
USE io_global, ONLY : stdout
USE mp_global, ONLY : intra_pool_comm
This is the first iteration in trying to implement a real space treatment of projectors in USPPs. Hopefully this will allow one to study larger systems. The modifications are done primarily keeping TDDFPT code in mind (a branch of QE, you may see detailed explanation in qe-forge which I am trying to keep tightly integrated). Please do not modify/beautify/make more elegant the corresponding subroutines without prior notice, due to their dependencies. I have tested that the current modifications do not alter the behaviour of pw.x other than designed with a number of small tests in HG1. Some Pointers: -All the new subroutines reside in PW/realus.f90 -A new flag real_space in &electrons control the implementation -tqr flag is treated seperately. -The implementation works only for (serial) gamma point single point calculations. ToDo: -I have written K point and task groups implementations of most of the corresponding routines, but did not have time to implement. -Parallelism issues are still to be checked. -The discrepancy in total energy is <0.002 eV for cutoff of 55Ry/550Ry however, there are some strange force components. I do not know how this will effect a possible optimization scheme. Other: Trying the compile CVS version in HG1 of sissa, using the "default" compiler sets, I encountered a very strange compiler bug. Please have a look at Modules/read_cards.f90 for details. Remove the stupid workaround to your liking. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@5493 c92efa57-630b-4861-b058-cf58834340f0
2009-04-03 00:05:09 +08:00
USE mp, ONLY : mp_sum
USE realus, ONLY : real_space, fft_orbital_gamma, initialisation_level,&
bfft_orbital_gamma, calbec_rs_gamma, s_psir_gamma
USE wvfct, ONLY : nbnd
USE control_flags, ONLY : gamma_only
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx, gstart
! dimension of the matrix to be diagonalized
! leading dimension of matrix evc, as declared in the calling pgm unit
! integer number of searched low-lying roots
! maximum dimension of the reduced basis set
! (the basis set is refreshed when its dimension would exceed nvecx)
COMPLEX(DP), INTENT(INOUT) :: evc(npwx,nvec)
! evc contains the refined estimates of the eigenvectors
REAL(DP), INTENT(IN) :: ethr
! energy threshold for convergence: root improvement is stopped,
! when two consecutive estimates of the root differ by less than ethr.
LOGICAL, INTENT(IN) :: uspp
! if .FALSE. : S|psi> not needed
INTEGER, INTENT(IN) :: btype(nvec)
! band type ( 1 = occupied, 0 = empty )
LOGICAL, INTENT(IN) :: lrot
! .TRUE. if the wfc have already be rotated
REAL(DP), INTENT(OUT) :: e(nvec)
! contains the estimated roots.
INTEGER, INTENT(OUT) :: dav_iter, notcnv
! integer number of iterations performed
! number of unconverged roots
!
! ... LOCAL variables
!
INTEGER, PARAMETER :: maxter = 20
! maximum number of iterations
!
This is the first iteration in trying to implement a real space treatment of projectors in USPPs. Hopefully this will allow one to study larger systems. The modifications are done primarily keeping TDDFPT code in mind (a branch of QE, you may see detailed explanation in qe-forge which I am trying to keep tightly integrated). Please do not modify/beautify/make more elegant the corresponding subroutines without prior notice, due to their dependencies. I have tested that the current modifications do not alter the behaviour of pw.x other than designed with a number of small tests in HG1. Some Pointers: -All the new subroutines reside in PW/realus.f90 -A new flag real_space in &electrons control the implementation -tqr flag is treated seperately. -The implementation works only for (serial) gamma point single point calculations. ToDo: -I have written K point and task groups implementations of most of the corresponding routines, but did not have time to implement. -Parallelism issues are still to be checked. -The discrepancy in total energy is <0.002 eV for cutoff of 55Ry/550Ry however, there are some strange force components. I do not know how this will effect a possible optimization scheme. Other: Trying the compile CVS version in HG1 of sissa, using the "default" compiler sets, I encountered a very strange compiler bug. Please have a look at Modules/read_cards.f90 for details. Remove the stupid workaround to your liking. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@5493 c92efa57-630b-4861-b058-cf58834340f0
2009-04-03 00:05:09 +08:00
INTEGER :: kter, nbase, np, n, m, nb1, ibnd
! counter on iterations
! dimension of the reduced basis
! counter on the reduced basis vectors
! do-loop counters
This is the first iteration in trying to implement a real space treatment of projectors in USPPs. Hopefully this will allow one to study larger systems. The modifications are done primarily keeping TDDFPT code in mind (a branch of QE, you may see detailed explanation in qe-forge which I am trying to keep tightly integrated). Please do not modify/beautify/make more elegant the corresponding subroutines without prior notice, due to their dependencies. I have tested that the current modifications do not alter the behaviour of pw.x other than designed with a number of small tests in HG1. Some Pointers: -All the new subroutines reside in PW/realus.f90 -A new flag real_space in &electrons control the implementation -tqr flag is treated seperately. -The implementation works only for (serial) gamma point single point calculations. ToDo: -I have written K point and task groups implementations of most of the corresponding routines, but did not have time to implement. -Parallelism issues are still to be checked. -The discrepancy in total energy is <0.002 eV for cutoff of 55Ry/550Ry however, there are some strange force components. I do not know how this will effect a possible optimization scheme. Other: Trying the compile CVS version in HG1 of sissa, using the "default" compiler sets, I encountered a very strange compiler bug. Please have a look at Modules/read_cards.f90 for details. Remove the stupid workaround to your liking. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@5493 c92efa57-630b-4861-b058-cf58834340f0
2009-04-03 00:05:09 +08:00
! counter on the bands
REAL(DP), ALLOCATABLE :: hr(:,:), sr(:,:), vr(:,:), ew(:)
! Hamiltonian on the reduced basis
! S matrix on the reduced basis
! eigenvectors of the Hamiltonian
! eigenvalues of the reduced hamiltonian
COMPLEX(DP), ALLOCATABLE :: psi(:,:), hpsi(:,:), spsi(:,:)
! work space, contains psi
! the product of H and psi
! the product of S and psi
LOGICAL, ALLOCATABLE :: conv(:)
! true if the root is converged
REAL(DP) :: empty_ethr
! threshold for empty bands
INTEGER :: npw2, npwx2
!
REAL(DP), EXTERNAL :: DDOT
!
EXTERNAL h_psi, s_psi, g_psi
! h_psi(npwx,npw,nvec,psi,hpsi)
! calculates H|psi>
! s_psi(npwx,npw,nvec,psi,spsi)
! calculates S|psi> (if needed)
! Vectors psi,hpsi,spsi are dimensioned (npwx,nvec)
! g_psi(npwx,npw,notcnv,psi,e)
! calculates (diag(h)-e)^-1 * psi, diagonal approx. to (h-e)^-1*psi
! the first nvec columns contain the trial eigenvectors
!
CALL start_clock( 'regterg' )
!
IF ( nvec > nvecx / 2 ) CALL errore( 'regter', 'nvecx is too small', 1 )
!
! ... threshold for empty bands
!
empty_ethr = MAX( ( ethr * 5.D0 ), 1.D-5 )
!
ALLOCATE( psi( npwx, nvecx ) )
ALLOCATE( hpsi( npwx, nvecx ) )
!
IF ( uspp ) ALLOCATE( spsi( npwx, nvecx ) )
!
ALLOCATE( sr( nvecx, nvecx ) )
ALLOCATE( hr( nvecx, nvecx ) )
ALLOCATE( vr( nvecx, nvecx ) )
ALLOCATE( ew( nvecx ) )
ALLOCATE( conv( nvec ) )
!
npw2 = 2*npw
npwx2 = 2*npwx
notcnv = nvec
nbase = nvec
conv = .FALSE.
!
IF ( uspp ) spsi = ZERO
!
hpsi = ZERO
psi = ZERO
psi(:,1:nvec) = evc(:,1:nvec)
! ... set Im[ psi(G=0) ] - needed for numerical stability
IF ( gstart == 2 ) psi(1,1:nvec) = CMPLX( DBLE( psi(1,1:nvec) ), 0.D0 )
!
! ... hpsi contains h times the basis vectors
!
CALL h_psi( npwx, npw, nvec, psi, hpsi )
!
This is the first iteration in trying to implement a real space treatment of projectors in USPPs. Hopefully this will allow one to study larger systems. The modifications are done primarily keeping TDDFPT code in mind (a branch of QE, you may see detailed explanation in qe-forge which I am trying to keep tightly integrated). Please do not modify/beautify/make more elegant the corresponding subroutines without prior notice, due to their dependencies. I have tested that the current modifications do not alter the behaviour of pw.x other than designed with a number of small tests in HG1. Some Pointers: -All the new subroutines reside in PW/realus.f90 -A new flag real_space in &electrons control the implementation -tqr flag is treated seperately. -The implementation works only for (serial) gamma point single point calculations. ToDo: -I have written K point and task groups implementations of most of the corresponding routines, but did not have time to implement. -Parallelism issues are still to be checked. -The discrepancy in total energy is <0.002 eV for cutoff of 55Ry/550Ry however, there are some strange force components. I do not know how this will effect a possible optimization scheme. Other: Trying the compile CVS version in HG1 of sissa, using the "default" compiler sets, I encountered a very strange compiler bug. Please have a look at Modules/read_cards.f90 for details. Remove the stupid workaround to your liking. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@5493 c92efa57-630b-4861-b058-cf58834340f0
2009-04-03 00:05:09 +08:00
IF ( uspp ) then
if (real_space ) then
!print *, "regterg in real space"
if (gamma_only) then
do ibnd = 1 , nvec , 2
call fft_orbital_gamma(psi,ibnd,nvec) !transform the orbital to real space
call s_psir_gamma(ibnd,nvec)
call bfft_orbital_gamma(spsi,ibnd,nvec)
enddo
else
CALL errore( 'regter', 'k-point real space implementation under development', 1 )
endif
else
CALL s_psi( npwx, npw, nvec, psi, spsi )
endif
endif
!
! ... hr contains the projection of the hamiltonian onto the reduced
! ... space vr contains the eigenvectors of hr
!
hr(:,:) = 0.D0
sr(:,:) = 0.D0
vr(:,:) = 0.D0
!
CALL DGEMM( 'T', 'N', nbase, nbase, npw2, 2.D0 , &
psi, npwx2, hpsi, npwx2, 0.D0, hr, nvecx )
!
IF ( gstart == 2 ) &
CALL DGER( nbase, nbase, -1.D0, psi, npwx2, hpsi, npwx2, hr, nvecx )
!
CALL mp_sum( hr( :, 1:nbase ), intra_pool_comm )
!
IF ( uspp ) THEN
!
CALL DGEMM( 'T', 'N', nbase, nbase, npw2, 2.D0, &
psi, npwx2, spsi, npwx2, 0.D0, sr, nvecx )
!
IF ( gstart == 2 ) &
CALL DGER( nbase, nbase, -1.D0, psi, npwx2, spsi, npwx2, sr, nvecx )
!
ELSE
!
CALL DGEMM( 'T', 'N', nbase, nbase, npw2, 2.D0, &
psi, npwx2, psi, npwx2, 0.D0, sr, nvecx )
!
IF ( gstart == 2 ) &
CALL DGER( nbase, nbase, -1.D0, psi, npwx2, psi, npwx2, sr, nvecx )
!
END IF
!
CALL mp_sum( sr( :, 1:nbase ), intra_pool_comm )
!
IF ( lrot ) THEN
!
DO n = 1, nbase
!
e(n) = hr(n,n)
vr(n,n) = 1.D0
!
END DO
!
ELSE
!
! ... diagonalize the reduced hamiltonian
!
CALL rdiaghg( nbase, nvec, hr, sr, nvecx, ew, vr )
!
e(1:nvec) = ew(1:nvec)
!
END IF
!
! ... iterate
!
iterate: DO kter = 1, maxter
!
dav_iter = kter
!
CALL start_clock( 'regterg:update' )
!
np = 0
!
DO n = 1, nvec
!
IF ( .NOT. conv(n) ) THEN
!
! ... this root not yet converged ...
!
np = np + 1
!
! ... reorder eigenvectors so that coefficients for unconverged
! ... roots come first. This allows to use quick matrix-matrix
! ... multiplications to set a new basis vector (see below)
!
IF ( np /= n ) vr(:,np) = vr(:,n)
!
! ... for use in g_psi
!
ew(nbase+np) = e(n)
!
END IF
!
END DO
!
nb1 = nbase + 1
!
! ... expand the basis set with new basis vectors ( H - e*S )|psi> ...
!
IF ( uspp ) THEN
!
CALL DGEMM( 'N', 'N', npw2, notcnv, nbase, 1.D0, &
spsi, npwx2, vr, nvecx, 0.D0, psi(1,nb1), npwx2 )
!
ELSE
!
CALL DGEMM( 'N', 'N', npw2, notcnv, nbase, 1.D0, &
psi, npwx2, vr, nvecx, 0.D0, psi(1,nb1), npwx2 )
!
END IF
!
DO np = 1, notcnv
!
psi(:,nbase+np) = - ew(nbase+np) * psi(:,nbase+np)
!
END DO
!
CALL DGEMM( 'N', 'N', npw2, notcnv, nbase, 1.D0, &
hpsi, npwx2, vr, nvecx, 1.D0, psi(1,nb1), npwx2 )
!
CALL stop_clock( 'regterg:update' )
!
! ... approximate inverse iteration
!
CALL g_psi( npwx, npw, notcnv, 1, psi(1,nb1), ew(nb1) )
!
! ... "normalize" correction vectors psi(:,nb1:nbase+notcnv) in
! ... order to improve numerical stability of subspace diagonalization
! ... (cdiaghg) ew is used as work array :
!
! ... ew = <psi_i|psi_i>, i = nbase + 1, nbase + notcnv
!
DO n = 1, notcnv
!
ew(n) = 2.D0 * DDOT( npw2, psi(1,nbase+n), 1, psi(1,nbase+n), 1 )
!
IF ( gstart == 2 ) ew(n) = ew(n) - psi(1,nbase+n) * psi(1,nbase+n)
!
END DO
!
CALL mp_sum( ew( 1:notcnv ), intra_pool_comm )
!
DO n = 1, notcnv
!
psi(:,nbase+n) = psi(:,nbase+n) / SQRT( ew(n) )
! ... set Im[ psi(G=0) ] - needed for numerical stability
IF ( gstart == 2 ) psi(1,nbase+n) = CMPLX( DBLE(psi(1,nbase+n)), 0.D0 )
!
END DO
!
! ... here compute the hpsi and spsi of the new functions
!
CALL h_psi( npwx, npw, notcnv, psi(1,nb1), hpsi(1,nb1) )
!
This is the first iteration in trying to implement a real space treatment of projectors in USPPs. Hopefully this will allow one to study larger systems. The modifications are done primarily keeping TDDFPT code in mind (a branch of QE, you may see detailed explanation in qe-forge which I am trying to keep tightly integrated). Please do not modify/beautify/make more elegant the corresponding subroutines without prior notice, due to their dependencies. I have tested that the current modifications do not alter the behaviour of pw.x other than designed with a number of small tests in HG1. Some Pointers: -All the new subroutines reside in PW/realus.f90 -A new flag real_space in &electrons control the implementation -tqr flag is treated seperately. -The implementation works only for (serial) gamma point single point calculations. ToDo: -I have written K point and task groups implementations of most of the corresponding routines, but did not have time to implement. -Parallelism issues are still to be checked. -The discrepancy in total energy is <0.002 eV for cutoff of 55Ry/550Ry however, there are some strange force components. I do not know how this will effect a possible optimization scheme. Other: Trying the compile CVS version in HG1 of sissa, using the "default" compiler sets, I encountered a very strange compiler bug. Please have a look at Modules/read_cards.f90 for details. Remove the stupid workaround to your liking. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@5493 c92efa57-630b-4861-b058-cf58834340f0
2009-04-03 00:05:09 +08:00
IF ( uspp ) then
if (real_space ) then
!print *, "regterg in real space"
if (gamma_only) then
do ibnd = 1 , notcnv , 2
call fft_orbital_gamma(psi(1:,nb1:),ibnd,notcnv) !transform the orbital to real space
call s_psir_gamma(ibnd,notcnv)
call bfft_orbital_gamma(spsi(1:,nb1:),ibnd,notcnv)
enddo
else
CALL errore( 'regter', 'k-point real space implementation under development', 1 )
endif
else
CALL s_psi( npwx, npw, notcnv, psi(1,nb1), spsi(1,nb1) )
endif
endif
!
! ... update the reduced hamiltonian
!
CALL start_clock( 'regterg:overlap' )
!
CALL DGEMM( 'T', 'N', nbase+notcnv, notcnv, npw2, 2.D0, psi, &
npwx2, hpsi(1,nb1), npwx2, 0.D0, hr(1,nb1), nvecx )
!
IF ( gstart == 2 ) &
CALL DGER( nbase+notcnv, notcnv, -1.D0, psi, &
npwx2, hpsi(1,nb1), npwx2, hr(1,nb1), nvecx )
!
CALL mp_sum( hr( :, nb1 : nb1+notcnv-1 ), intra_pool_comm )
!
IF ( uspp ) THEN
!
CALL DGEMM( 'T', 'N', nbase+notcnv, notcnv, npw2, 2.D0, psi, &
npwx2, spsi(1,nb1), npwx2, 0.D0, sr(1,nb1), nvecx )
!
IF ( gstart == 2 ) &
CALL DGER( nbase+notcnv, notcnv, -1.D0, psi, &
npwx2, spsi(1,nb1), npwx2, sr(1,nb1), nvecx )
!
ELSE
!
CALL DGEMM( 'T', 'N', nbase+notcnv, notcnv, npw2, 2.D0, psi, &
npwx2, psi(1,nb1), npwx2, 0.D0, sr(1,nb1) , nvecx )
!
IF ( gstart == 2 ) &
CALL DGER( nbase+notcnv, notcnv, -1.D0, psi, &
npwx2, psi(1,nb1), npwx2, sr(1,nb1), nvecx )
!
END IF
!
CALL mp_sum( sr( :, nb1 : nb1+notcnv-1 ), intra_pool_comm )
!
CALL stop_clock( 'regterg:overlap' )
!
nbase = nbase + notcnv
!
DO n = 1, nbase
!
DO m = n + 1, nbase
!
hr(m,n) = hr(n,m)
sr(m,n) = sr(n,m)
!
END DO
!
END DO
!
! ... diagonalize the reduced hamiltonian
!
CALL rdiaghg( nbase, nvec, hr, sr, nvecx, ew, vr )
!
! ... test for convergence
!
WHERE( btype(1:nvec) == 1 )
!
conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < ethr ) )
!
ELSEWHERE
!
conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < empty_ethr ) )
!
END WHERE
!
notcnv = COUNT( .NOT. conv(:) )
!
e(1:nvec) = ew(1:nvec)
!
! ... if overall convergence has been achieved, or the dimension of
! ... the reduced basis set is becoming too large, or in any case if
! ... we are at the last iteration refresh the basis set. i.e. replace
! ... the first nvec elements with the current estimate of the
! ... eigenvectors; set the basis dimension to nvec.
!
IF ( notcnv == 0 .OR. &
nbase+notcnv > nvecx .OR. dav_iter == maxter ) THEN
!
CALL start_clock( 'regterg:last' )
!
CALL DGEMM( 'N', 'N', npw2, nvec, nbase, 1.D0, &
psi, npwx2, vr, nvecx, 0.D0, evc, npwx2 )
!
IF ( notcnv == 0 ) THEN
!
! ... all roots converged: return
!
CALL stop_clock( 'regterg:last' )
!
EXIT iterate
!
ELSE IF ( dav_iter == maxter ) THEN
!
! ... last iteration, some roots not converged: return
!
WRITE( stdout, '(5X,"WARNING: ",I5, &
& " eigenvalues not converged")' ) notcnv
!
CALL stop_clock( 'regterg:last' )
!
EXIT iterate
!
END IF
!
! ... refresh psi, H*psi and S*psi
!
psi(:,1:nvec) = evc(:,1:nvec)
!
IF ( uspp ) THEN
!
CALL DGEMM( 'N', 'N', npw2, nvec, nbase, 1.D0, spsi, &
npwx2, vr, nvecx, 0.D0, psi(1,nvec+1), npwx2 )
!
spsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec)
!
END IF
!
CALL DGEMM( 'N', 'N', npw2, nvec, nbase, 1.D0, hpsi, &
npwx2, vr, nvecx, 0.D0, psi(1,nvec+1), npwx2 )
!
hpsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec)
!
! ... refresh the reduced hamiltonian
!
nbase = nvec
!
hr(:,1:nbase) = 0.D0
sr(:,1:nbase) = 0.D0
vr(:,1:nbase) = 0.D0
!
DO n = 1, nbase
!
hr(n,n) = e(n)
sr(n,n) = 1.D0
vr(n,n) = 1.D0
!
END DO
!
CALL stop_clock( 'regterg:last' )
!
END IF
!
END DO iterate
!
DEALLOCATE( conv )
DEALLOCATE( ew )
DEALLOCATE( vr )
DEALLOCATE( hr )
DEALLOCATE( sr )
!
IF ( uspp ) DEALLOCATE( spsi )
!
DEALLOCATE( hpsi )
DEALLOCATE( psi )
!
CALL stop_clock( 'regterg' )
!
RETURN
!
END SUBROUTINE regterg
!
!
! Subroutine with distributed matrixes
! (written by Carlo Cavazzoni)
!
!----------------------------------------------------------------------------
SUBROUTINE pregterg( npw, npwx, nvec, nvecx, evc, ethr, &
uspp, gstart, e, btype, notcnv, lrot, dav_iter )
!----------------------------------------------------------------------------
!
! ... iterative solution of the eigenvalue problem:
!
! ... ( H - e S ) * evc = 0
!
! ... where H is an hermitean operator, e is a real scalar,
! ... S is an uspp matrix, evc is a complex vector
! ... (real wavefunctions with only half plane waves stored)
!
USE kinds, ONLY : DP
USE io_global, ONLY : stdout
USE mp_global, ONLY : npool, nproc_pool, me_pool, root_pool, &
intra_pool_comm, init_ortho_group, &
ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho
USE descriptors, ONLY : descla_siz_ , descla_init , lambda_node_ , la_nx_ , la_nrl_ , la_n_ , &
ilac_ , ilar_ , nlar_ , nlac_ , la_npc_ , la_npr_ , la_me_ , la_comm_ , &
la_myr_ , la_myc_ , nlax_ , descla_local_dims
USE parallel_toolkit, ONLY : dsqmdst, dsqmcll, dsqmred, dsqmsym
USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx, gstart
! dimension of the matrix to be diagonalized
! leading dimension of matrix evc, as declared in the calling pgm unit
! integer number of searched low-lying roots
! maximum dimension of the reduced basis set
! (the basis set is refreshed when its dimension would exceed nvecx)
COMPLEX(DP), INTENT(INOUT) :: evc(npwx,nvec)
! evc contains the refined estimates of the eigenvectors
REAL(DP), INTENT(IN) :: ethr
! energy threshold for convergence: root improvement is stopped,
! when two consecutive estimates of the root differ by less than ethr.
LOGICAL, INTENT(IN) :: uspp
! if .FALSE. : S|psi> not needed
INTEGER, INTENT(IN) :: btype(nvec)
! band type ( 1 = occupied, 0 = empty )
LOGICAL, INTENT(IN) :: lrot
! .TRUE. if the wfc have already be rotated
REAL(DP), INTENT(OUT) :: e(nvec)
! contains the estimated roots.
INTEGER, INTENT(OUT) :: dav_iter, notcnv
! integer number of iterations performed
! number of unconverged roots
!
! ... LOCAL variables
!
INTEGER, PARAMETER :: maxter = 20
! maximum number of iterations
!
INTEGER :: kter, nbase, np, n, m, nb1
! counter on iterations
! dimension of the reduced basis
! counter on the reduced basis vectors
! do-loop counters
REAL(DP), ALLOCATABLE :: ew(:)
REAL(DP), ALLOCATABLE :: hl(:,:), sl(:,:), vl(:,:)
! Hamiltonian on the reduced basis
! S matrix on the reduced basis
! eigenvectors of the Hamiltonian
! eigenvalues of the reduced hamiltonian
COMPLEX(DP), ALLOCATABLE :: psi(:,:), hpsi(:,:), spsi(:,:)
! work space, contains psi
! the product of H and psi
! the product of S and psi
LOGICAL, ALLOCATABLE :: conv(:)
! true if the root is converged
REAL(DP) :: empty_ethr
! threshold for empty bands
INTEGER :: npw2, npwx2
INTEGER :: desc( descla_siz_ ), desc_old( descla_siz_ )
INTEGER, ALLOCATABLE :: irc_ip( : )
INTEGER, ALLOCATABLE :: nrc_ip( : )
INTEGER, ALLOCATABLE :: rank_ip( :, : )
! matrix distribution descriptors
INTEGER :: nx
! maximum local block dimension
LOGICAL :: la_proc
! flag to distinguish procs involved in linear algebra
INTEGER, ALLOCATABLE :: notcnv_ip( : )
INTEGER, ALLOCATABLE :: ic_notcnv( : )
!
REAL(DP), EXTERNAL :: DDOT
!
EXTERNAL h_psi, s_psi, g_psi
! h_psi(npwx,npw,nvec,psi,hpsi)
! calculates H|psi>
! s_psi(npwx,npw,nvec,psi,spsi)
! calculates S|psi> (if needed)
! Vectors psi,hpsi,spsi are dimensioned (npwx,nvec)
! g_psi(npwx,npw,notcnv,psi,e)
! calculates (diag(h)-e)^-1 * psi, diagonal approx. to (h-e)^-1*psi
! the first nvec columns contain the trial eigenvectors
!
!
CALL start_clock( 'regterg' )
!
IF ( nvec > nvecx / 2 ) CALL errore( 'regter', 'nvecx is too small', 1 )
!
! ... threshold for empty bands
!
empty_ethr = MAX( ( ethr * 5.D0 ), 1.D-5 )
!
ALLOCATE( psi( npwx, nvecx ) )
ALLOCATE( hpsi( npwx, nvecx ) )
!
IF ( uspp ) ALLOCATE( spsi( npwx, nvecx ) )
!
! ... Initialize the matrix descriptor
!
ALLOCATE( ic_notcnv( np_ortho(2) ) )
ALLOCATE( notcnv_ip( np_ortho(2) ) )
ALLOCATE( irc_ip( np_ortho(1) ) )
ALLOCATE( nrc_ip( np_ortho(1) ) )
ALLOCATE( rank_ip( np_ortho(1), np_ortho(2) ) )
!
CALL desc_init( nvec, desc, irc_ip, nrc_ip )
!
IF( la_proc ) THEN
!
! only procs involved in the diagonalization need to allocate local
! matrix block.
!
ALLOCATE( vl( nx , nx ) )
ALLOCATE( sl( nx , nx ) )
ALLOCATE( hl( nx , nx ) )
!
ELSE
!
ALLOCATE( vl( 1 , 1 ) )
ALLOCATE( sl( 1 , 1 ) )
ALLOCATE( hl( 1 , 1 ) )
!
END IF
!
ALLOCATE( ew( nvecx ) )
ALLOCATE( conv( nvec ) )
!
npw2 = 2*npw
npwx2 = 2*npwx
notcnv = nvec
nbase = nvec
conv = .FALSE.
!
IF ( uspp ) spsi = ZERO
!
hpsi = ZERO
psi = ZERO
psi(:,1:nvec) = evc(:,1:nvec)
! ... set Im[ psi(G=0) ] - needed for numerical stability
IF ( gstart == 2 ) psi(1,1:nvec) = CMPLX( DBLE( psi(1,1:nvec) ), 0.D0 )
!
! ... hpsi contains h times the basis vectors
!
CALL h_psi( npwx, npw, nvec, psi, hpsi )
!
IF ( uspp ) CALL s_psi( npwx, npw, nvec, psi, spsi )
!
! ... hl contains the projection of the hamiltonian onto the reduced
! ... space, vl contains the eigenvectors of hl. Remember hl, vl and sl
! ... are all distributed across processors, global replicated matrixes
! ... here are never allocated
!
CALL compute_distmat( hl, psi, hpsi )
!
IF ( uspp ) THEN
!
CALL compute_distmat( sl, psi, spsi )
!
ELSE
!
CALL compute_distmat( sl, psi, psi )
!
END IF
!
IF ( lrot ) THEN
!
CALL set_e_from_h()
!
CALL set_to_identity( vl, desc )
!
ELSE
!
! ... diagonalize the reduced hamiltonian
! Calling block parallel algorithm
!
CALL prdiaghg( nbase, hl, sl, nx, ew, vl, desc )
!
e(1:nvec) = ew(1:nvec)
!
END IF
!
! ... iterate
!
iterate: DO kter = 1, maxter
!
dav_iter = kter
!
CALL start_clock( 'regterg:update' )
!
CALL reorder_v()
!
nb1 = nbase + 1
!
! ... expand the basis set with new basis vectors ( H - e*S )|psi> ...
!
CALL hpsi_dot_v()
!
CALL stop_clock( 'regterg:update' )
!
! ... approximate inverse iteration
!
CALL g_psi( npwx, npw, notcnv, 1, psi(1,nb1), ew(nb1) )
!
! ... "normalize" correction vectors psi(:,nb1:nbase+notcnv) in
! ... order to improve numerical stability of subspace diagonalization
! ... (cdiaghg) ew is used as work array :
!
! ... ew = <psi_i|psi_i>, i = nbase + 1, nbase + notcnv
!
DO n = 1, notcnv
!
ew(n) = 2.D0 * DDOT( npw2, psi(1,nbase+n), 1, psi(1,nbase+n), 1 )
!
IF ( gstart == 2 ) ew(n) = ew(n) - psi(1,nbase+n) * psi(1,nbase+n)
!
END DO
!
CALL mp_sum( ew( 1:notcnv ), intra_pool_comm )
!
DO n = 1, notcnv
!
psi(:,nbase+n) = psi(:,nbase+n) / SQRT( ew(n) )
! ... set Im[ psi(G=0) ] - needed for numerical stability
IF ( gstart == 2 ) psi(1,nbase+n) = CMPLX( DBLE(psi(1,nbase+n)), 0.D0 )
!
END DO
!
! ... here compute the hpsi and spsi of the new functions
!
CALL h_psi( npwx, npw, notcnv, psi(1,nb1), hpsi(1,nb1) )
!
IF ( uspp ) CALL s_psi( npwx, npw, notcnv, psi(1,nb1), spsi(1,nb1) )
!
! ... update the reduced hamiltonian
!
! we need to save the old descriptor in order to redistribute marixes
!
desc_old = desc
!
! ... RE-Initialize the matrix descriptor
!
CALL desc_init( nbase+notcnv, desc, irc_ip, nrc_ip )
!
IF( la_proc ) THEN
! redistribute hl and sl (see dsqmred), since the dimension of the subspace has changed
!
vl = hl
DEALLOCATE( hl )
ALLOCATE( hl( nx , nx ) )
CALL dsqmred( nbase, vl, desc_old( nlax_ ), desc_old, nbase+notcnv, hl, nx, desc )
vl = sl
DEALLOCATE( sl )
ALLOCATE( sl( nx , nx ) )
CALL dsqmred( nbase, vl, desc_old( nlax_ ), desc_old, nbase+notcnv, sl, nx, desc )
DEALLOCATE( vl )
ALLOCATE( vl( nx , nx ) )
END IF
!
CALL start_clock( 'regterg:overlap' )
!
CALL update_distmat( hl, psi, hpsi )
!
IF ( uspp ) THEN
!
CALL update_distmat( sl, psi, spsi )
!
ELSE
!
CALL update_distmat( sl, psi, psi )
!
END IF
!
CALL stop_clock( 'regterg:overlap' )
!
nbase = nbase + notcnv
!
! ... diagonalize the reduced hamiltonian
! Call block parallel algorithm
!
CALL prdiaghg( nbase, hl, sl, nx, ew, vl, desc )
!
! ... test for convergence
!
WHERE( btype(1:nvec) == 1 )
!
conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < ethr ) )
!
ELSEWHERE
!
conv(1:nvec) = ( ( ABS( ew(1:nvec) - e(1:nvec) ) < empty_ethr ) )
!
END WHERE
!
notcnv = COUNT( .NOT. conv(:) )
!
e(1:nvec) = ew(1:nvec)
!
! ... if overall convergence has been achieved, or the dimension of
! ... the reduced basis set is becoming too large, or in any case if
! ... we are at the last iteration refresh the basis set. i.e. replace
! ... the first nvec elements with the current estimate of the
! ... eigenvectors; set the basis dimension to nvec.
!
IF ( notcnv == 0 .OR. nbase+notcnv > nvecx .OR. dav_iter == maxter ) THEN
!
CALL start_clock( 'regterg:last' )
!
CALL refresh_evc()
!
IF ( notcnv == 0 ) THEN
!
! ... all roots converged: return
!
CALL stop_clock( 'regterg:last' )
!
EXIT iterate
!
ELSE IF ( dav_iter == maxter ) THEN
!
! ... last iteration, some roots not converged: return
!
WRITE( stdout, '(5X,"WARNING: ",I5, &
& " eigenvalues not converged")' ) notcnv
!
CALL stop_clock( 'regterg:last' )
!
EXIT iterate
!
END IF
!
! ... refresh psi, H*psi and S*psi
!
psi(:,1:nvec) = evc(:,1:nvec)
!
IF ( uspp ) THEN
!
CALL refresh_spsi()
!
END IF
!
CALL refresh_hpsi()
!
! ... refresh the reduced hamiltonian
!
nbase = nvec
!
CALL desc_init( nvec, desc, irc_ip, nrc_ip )
!
IF( la_proc ) THEN
!
! note that nx has been changed by desc_init
! we need to re-alloc with the new size.
!
DEALLOCATE( vl, hl, sl )
ALLOCATE( vl( nx, nx ) )
ALLOCATE( hl( nx, nx ) )
ALLOCATE( sl( nx, nx ) )
!
END IF
!
CALL set_h_from_e( )
!
CALL set_to_identity( vl, desc )
CALL set_to_identity( sl, desc )
!
CALL stop_clock( 'regterg:last' )
!
END IF
!
END DO iterate
!
DEALLOCATE( vl, hl, sl )
!
DEALLOCATE( rank_ip )
DEALLOCATE( ic_notcnv )
DEALLOCATE( irc_ip )
DEALLOCATE( nrc_ip )
DEALLOCATE( notcnv_ip )
DEALLOCATE( conv )
DEALLOCATE( ew )
!
IF ( uspp ) DEALLOCATE( spsi )
!
DEALLOCATE( hpsi )
DEALLOCATE( psi )
!
CALL stop_clock( 'regterg' )
!
RETURN
!
!
CONTAINS
!
!
SUBROUTINE desc_init( nsiz, desc, irc_ip, nrc_ip )
!
INTEGER, INTENT(IN) :: nsiz
INTEGER, INTENT(OUT) :: desc(:)
INTEGER, INTENT(OUT) :: irc_ip(:)
INTEGER, INTENT(OUT) :: nrc_ip(:)
INTEGER :: i, j, rank
!
CALL descla_init( desc, nsiz, nsiz, np_ortho, me_ortho, ortho_comm, ortho_comm_id )
!
nx = desc( nlax_ )
!
DO j = 0, desc( la_npc_ ) - 1
CALL descla_local_dims( irc_ip( j + 1 ), nrc_ip( j + 1 ), desc( la_n_ ), desc( la_nx_ ), np_ortho(1), j )
DO i = 0, desc( la_npr_ ) - 1
CALL GRID2D_RANK( 'R', desc( la_npr_ ), desc( la_npc_ ), i, j, rank )
rank_ip( i+1, j+1 ) = rank * leg_ortho
END DO
END DO
!
la_proc = .FALSE.
IF( desc( lambda_node_ ) > 0 ) la_proc = .TRUE.
!
RETURN
END SUBROUTINE desc_init
!
!
SUBROUTINE set_to_identity( distmat, desc )
INTEGER, INTENT(IN) :: desc(:)
REAL(DP), INTENT(OUT) :: distmat(:,:)
INTEGER :: i
distmat = 0_DP
IF( desc( la_myc_ ) == desc( la_myr_ ) .AND. desc( lambda_node_ ) > 0 ) THEN
DO i = 1, desc( nlac_ )
distmat( i, i ) = 1_DP
END DO
END IF
RETURN
END SUBROUTINE set_to_identity
!
!
SUBROUTINE reorder_v()
!
INTEGER :: ipc, ipr
INTEGER :: nc, ic
INTEGER :: nl, npl
!
np = 0
!
notcnv_ip = 0
!
n = 0
!
DO ipc = 1, desc( la_npc_ )
!
nc = nrc_ip( ipc )
ic = irc_ip( ipc )
!
npl = 0
!
IF( ic <= nvec ) THEN
!
DO nl = 1, min( nvec - ic + 1, nc )
!
n = n + 1
!
IF ( .NOT. conv(n) ) THEN
!
! ... this root not yet converged ...
!
np = np + 1
npl = npl + 1
IF( npl == 1 ) ic_notcnv( ipc ) = np
!
! ... reorder eigenvectors so that coefficients for unconverged
! ... roots come first. This allows to use quick matrix-matrix
! ... multiplications to set a new basis vector (see below)
!
notcnv_ip( ipc ) = notcnv_ip( ipc ) + 1
!
IF ( npl /= nl ) THEN
IF( la_proc .AND. desc( la_myc_ ) == ipc-1 ) THEN
vl( :, npl) = vl( :, nl )
END IF
END IF
!
! ... for use in g_psi
!
ew(nbase+np) = e(n)
!
END IF
!
END DO
!
END IF
!
END DO
!
END SUBROUTINE reorder_v
!
!
SUBROUTINE hpsi_dot_v()
!
INTEGER :: ipc, ipr
INTEGER :: nr, nc, ir, ic, notcl, root, np
REAL(DP), ALLOCATABLE :: vtmp( :, : )
COMPLEX(DP), ALLOCATABLE :: ptmp( :, : )
REAL(DP) :: beta
ALLOCATE( vtmp( nx, nx ) )
ALLOCATE( ptmp( npwx, nx ) )
DO ipc = 1, desc( la_npc_ )
!
IF( notcnv_ip( ipc ) > 0 ) THEN
notcl = notcnv_ip( ipc )
ic = ic_notcnv( ipc )
ptmp = 0.0d0
beta = 0.0d0
DO ipr = 1, desc( la_npr_ )
!
nr = nrc_ip( ipr )
ir = irc_ip( ipr )
!
root = rank_ip( ipr, ipc )
IF( ipr-1 == desc( la_myr_ ) .AND. ipc-1 == desc( la_myc_ ) .AND. la_proc ) THEN
vtmp(:,1:notcl) = vl(:,1:notcl)
END IF
CALL mp_bcast( vtmp(:,1:notcl), root, intra_pool_comm )
!
IF ( uspp ) THEN
!
CALL DGEMM( 'N', 'N', npw2, notcl, nr, 1.D0, &
spsi( 1, ir ), npwx2, vtmp, nx, beta, psi(1,nb1+ic-1), npwx2 )
!
ELSE
!
CALL DGEMM( 'N', 'N', npw2, notcl, nr, 1.D0, &
psi( 1, ir ), npwx2, vtmp, nx, beta, psi(1,nb1+ic-1), npwx2 )
!
END IF
!
CALL DGEMM( 'N', 'N', npw2, notcl, nr, 1.D0, &
hpsi( 1, ir ), npwx2, vtmp, nx, 1.D0, ptmp, npwx2 )
beta = 1.0d0
END DO
DO np = 1, notcl
!
psi(:,nbase+np+ic-1) = ptmp(:,np) - ew(nbase+np+ic-1) * psi(:,nbase+np+ic-1)
!
END DO
!
END IF
!
END DO
DEALLOCATE( vtmp )
DEALLOCATE( ptmp )
RETURN
END SUBROUTINE hpsi_dot_v
!
!
SUBROUTINE refresh_evc( )
!
INTEGER :: ipc, ipr
INTEGER :: nr, nc, ir, ic, root
REAL(DP), ALLOCATABLE :: vtmp( :, : )
REAL(DP) :: beta
ALLOCATE( vtmp( nx, nx ) )
!
DO ipc = 1, desc( la_npc_ )
!
nc = nrc_ip( ipc )
ic = irc_ip( ipc )
!
IF( ic <= nvec ) THEN
!
nc = min( nc, nvec - ic + 1 )
!
beta = 0.0d0
DO ipr = 1, desc( la_npr_ )
!
nr = nrc_ip( ipr )
ir = irc_ip( ipr )
!
root = rank_ip( ipr, ipc )
IF( ipr-1 == desc( la_myr_ ) .AND. ipc-1 == desc( la_myc_ ) .AND. la_proc ) THEN
!
! this proc sends his block
!
CALL mp_bcast( vl(:,1:nc), root, intra_pool_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
psi(1,ir), npwx2, vl, nx, beta, evc(1,ic), npwx2 )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_pool_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
psi(1,ir), npwx2, vtmp, nx, beta, evc(1,ic), npwx2 )
END IF
!
beta = 1.0d0
END DO
!
END IF
!
END DO
!
DEALLOCATE( vtmp )
RETURN
END SUBROUTINE refresh_evc
!
!
SUBROUTINE refresh_spsi( )
!
INTEGER :: ipc, ipr
INTEGER :: nr, nc, ir, ic, root
REAL(DP), ALLOCATABLE :: vtmp( :, : )
REAL(DP) :: beta
ALLOCATE( vtmp( nx, nx ) )
!
DO ipc = 1, desc( la_npc_ )
!
nc = nrc_ip( ipc )
ic = irc_ip( ipc )
!
IF( ic <= nvec ) THEN
!
nc = min( nc, nvec - ic + 1 )
!
beta = 0_DP
!
DO ipr = 1, desc( la_npr_ )
!
nr = nrc_ip( ipr )
ir = irc_ip( ipr )
!
root = rank_ip( ipr, ipc )
IF( ipr-1 == desc( la_myr_ ) .AND. ipc-1 == desc( la_myc_ ) .AND. la_proc ) THEN
!
! this proc sends his block
!
CALL mp_bcast( vl(:,1:nc), root, intra_pool_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
spsi(1,ir), npwx2, vl, nx, beta, psi(1,nvec+ic), npwx2 )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_pool_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
spsi(1,ir), npwx2, vtmp, nx, beta, psi(1,nvec+ic), npwx2 )
END IF
!
beta = 1_DP
END DO
!
END IF
!
END DO
!
spsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec)
!
DEALLOCATE( vtmp )
RETURN
END SUBROUTINE refresh_spsi
!
!
!
SUBROUTINE refresh_hpsi( )
!
INTEGER :: ipc, ipr
INTEGER :: nr, nc, ir, ic, root
REAL(DP), ALLOCATABLE :: vtmp( :, : )
REAL(DP) :: beta
ALLOCATE( vtmp( nx, nx ) )
!
DO ipc = 1, desc( la_npc_ )
!
nc = nrc_ip( ipc )
ic = irc_ip( ipc )
!
IF( ic <= nvec ) THEN
!
nc = min( nc, nvec - ic + 1 )
!
beta = 0.0d0
!
DO ipr = 1, desc( la_npr_ )
!
nr = nrc_ip( ipr )
ir = irc_ip( ipr )
!
root = rank_ip( ipr, ipc )
IF( ipr-1 == desc( la_myr_ ) .AND. ipc-1 == desc( la_myc_ ) .AND. la_proc ) THEN
!
! this proc sends his block
!
CALL mp_bcast( vl(:,1:nc), root, intra_pool_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
hpsi(1,ir), npwx2, vl, nx, beta, psi(1,nvec+ic), npwx2 )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_pool_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
hpsi(1,ir), npwx2, vtmp, nx, beta, psi(1,nvec+ic), npwx2 )
END IF
!
beta = 1.0d0
END DO
!
END IF
!
END DO
!
DEALLOCATE( vtmp )
hpsi(:,1:nvec) = psi(:,nvec+1:nvec+nvec)
RETURN
END SUBROUTINE refresh_hpsi
!
!
SUBROUTINE compute_distmat( dm, v, w )
!
! This subroutine compute <vi|wj> and store the
! result in distributed matrix dm
!
INTEGER :: ipc, ipr
INTEGER :: nr, nc, ir, ic, root
REAL(DP), INTENT(OUT) :: dm( :, : )
COMPLEX(DP) :: v(:,:), w(:,:)
REAL(DP), ALLOCATABLE :: work( :, : )
!
ALLOCATE( work( nx, nx ) )
!
work = 0.0d0
!
DO ipc = 1, desc( la_npc_ ) ! loop on column procs
!
nc = nrc_ip( ipc )
ic = irc_ip( ipc )
!
DO ipr = 1, ipc ! use symmetry for the loop on row procs
!
nr = nrc_ip( ipr )
ir = irc_ip( ipr )
!
! rank of the processor for which this block (ipr,ipc) is destinated
!
root = rank_ip( ipr, ipc )
! use blas subs. on the matrix block
CALL DGEMM( 'T', 'N', nr, nc, npw2, 2.D0 , &
v(1,ir), npwx2, w(1,ic), npwx2, 0.D0, work, nx )
IF ( gstart == 2 ) &
CALL DGER( nr, nc, -1.D0, v(1,ir), npwx2, w(1,ic), npwx2, work, nx )
! accumulate result on dm of root proc.
CALL mp_root_sum( work, dm, root, intra_pool_comm )
END DO
!
END DO
!
CALL dsqmsym( nbase, dm, nx, desc )
!
DEALLOCATE( work )
!
RETURN
END SUBROUTINE compute_distmat
!
!
SUBROUTINE update_distmat( dm, v, w )
!
INTEGER :: ipc, ipr
INTEGER :: nr, nc, ir, ic, root, icc, ii
REAL(DP) :: dm( :, : )
COMPLEX(DP) :: v(:,:), w(:,:)
REAL(DP), ALLOCATABLE :: vtmp( :, : )
ALLOCATE( vtmp( nx, nx ) )
!
vtmp = 0.0d0
!
DO ipc = 1, desc( la_npc_ )
!
nc = nrc_ip( ipc )
ic = irc_ip( ipc )
!
IF( ic+nc-1 >= nb1 ) THEN
nc = MIN( nc, ic+nc-1 - nb1 + 1 )
IF( ic >= nb1 ) THEN
ii = ic
icc = 1
ELSE
ii = nb1
icc = nb1-ic+1
END IF
DO ipr = 1, ipc ! desc( la_npr_ ) use symmetry
!
nr = nrc_ip( ipr )
ir = irc_ip( ipr )
!
root = rank_ip( ipr, ipc )
CALL DGEMM( 'T', 'N', nr, nc, npw2, 2.D0, v( 1, ir ), &
npwx2, w(1,ii), npwx2, 0.D0, vtmp, nx )
!
IF ( gstart == 2 ) &
CALL DGER( nr, nc, -1.D0, v( 1, ir ), npwx2, w(1,ii), npwx2, vtmp, nx )
IF( (desc( lambda_node_ ) > 0) .AND. (ipr-1 == desc( la_myr_ )) .AND. (ipc-1 == desc( la_myc_ )) ) THEN
CALL mp_root_sum( vtmp(:,1:nc), dm(:,icc:icc+nc-1), root, intra_pool_comm )
ELSE
CALL mp_root_sum( vtmp(:,1:nc), dm, root, intra_pool_comm )
END IF
END DO
!
END IF
!
END DO
!
CALL dsqmsym( nbase+notcnv, dm, nx, desc )
!
DEALLOCATE( vtmp )
RETURN
END SUBROUTINE update_distmat
!
!
!
SUBROUTINE set_e_from_h()
INTEGER :: nc, ic, i
e(1:nbase) = 0.0d0
IF( desc( la_myc_ ) == desc( la_myr_ ) .AND. la_proc ) THEN
nc = desc( nlac_ )
ic = desc( ilac_ )
DO i = 1, nc
e( i + ic - 1 ) = hl( i, i )
END DO
END IF
CALL mp_sum( e(1:nbase), intra_pool_comm )
RETURN
END SUBROUTINE set_e_from_h
!
SUBROUTINE set_h_from_e()
INTEGER :: nc, ic, i
IF( la_proc ) THEN
hl = 0.0d0
IF( desc( la_myc_ ) == desc( la_myr_ ) ) THEN
nc = desc( nlac_ )
ic = desc( ilac_ )
DO i = 1, nc
hl(i,i) = e( i + ic - 1 )
END DO
END IF
END IF
RETURN
END SUBROUTINE set_h_from_e
!
END SUBROUTINE pregterg