mirror of https://gitlab.com/QEF/q-e.git
198 lines
9.0 KiB
Fortran
198 lines
9.0 KiB
Fortran
! Copyright (C) 2015-2016 Aihui Zhou's group
|
|
!
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
!
|
|
!
|
|
!-------------------------------------------------------------------------------
|
|
!
|
|
! We propose some parallel orbital updating based plane wave basis methods
|
|
! for electronic structure calculations, which aims to the solution of the corresponding eigenvalue
|
|
! problems. Compared to the traditional plane wave methods, our methods have the feature of two level
|
|
! parallelization, which make them have great advantage in large-scale parallelization.
|
|
!
|
|
! The approach following Algorithm is the parallel orbital updating algorithm:
|
|
! 1. Choose initial $E_{\mathrm{cut}}^{(0)}$ and then obtain $V_{N_G^{0}}$, use the SCF method to solve
|
|
! the Kohn-Sham equation in $V_{G_0}$ and get the initial $(\lambda_i^{0},u_i^{0}), i=1, \cdots, N$
|
|
! and let $n=0$.
|
|
! 2. For $i=1,2,\ldots,N$, find $e_i^{n+1/2}\in V_{G_n}$ satisfying
|
|
! $$a(\rho_{in}^{n}; e_i^{n+1/2}, v) = -[(a(\rho_{in}^{n}; u_i^{n}, v) - \lambda_i^{n} (u_i^{n}, v))] $$
|
|
! in parallel , where $\rho_{in}^{n}$ is the input charge density obtained by the orbits obtained in the
|
|
! $n$-th iteration or the former iterations.
|
|
! 3. Find $\{\lambda_i^{n+1},u_i^{n+1}\} \in \mathbf{R}\times \tilde{V}_N$ satisfying
|
|
! $$a(\tilde{\rho}; u_i^{n+1}, v) = ( \lambda_i^{n+1}u_i^{n+1}, v) \quad \forall v \in \tilde{V}_N$$
|
|
! where $\tilde{V}_N = \mathrm{span}\{e_1^{n+1/2},\ldots,e_N^{n+1/2},u_1^{n},\ldots,u_N^{n}\}$,
|
|
! $\tilde{\rho}(x)$ is the input charge density obtained from the previous orbits.
|
|
! 4. Convergence check: if not converged, set $n=n+1$, go to step 2; else, stop.
|
|
!
|
|
! You can see the detailed information through
|
|
! X. Dai, X. Gong, A. Zhou, J. Zhu,
|
|
! A parallel orbital-updating approach for electronic structure calculations, arXiv:1405.0260 (2014).
|
|
! X. Dai, Z. Liu, X. Zhang, A. Zhou,
|
|
! A Parallel Orbital-updating Based Optimization Method for Electronic Structure Calculations,
|
|
! arXiv:1510.07230 (2015).
|
|
! Yan Pan, Xiaoying Dai, Xingao Gong, Stefano de Gironcoli, Gian-Marco Rignanese, and Aihui Zhou,
|
|
! A Parallel Orbital-updating Based Plane Wave Basis Method. J. Comp. Phys. 348, 482-492 (2017).
|
|
!
|
|
! The file is written mainly by Stefano de Gironcoli and Yan Pan.
|
|
!
|
|
! The following file is for solving step 2 of the parallel orbital updating algorithm.
|
|
!
|
|
#define ZERO ( 0.D0, 0.D0 )
|
|
#define ONE ( 1.D0, 0.D0 )
|
|
!
|
|
!----------------------------------------------------------------------------
|
|
SUBROUTINE pcg_k( hs_1psi_ptr, g_1psi_ptr, psi0, spsi0, npw, npwx, nbnd, npol, psi, ethr, iter, e, nhpsi )
|
|
!----------------------------------------------------------------------------
|
|
!
|
|
! ... solve the linear system
|
|
!
|
|
! [ H - e S + lambda Pv ]|\tilde\psi> = [e S - H ] |psi>
|
|
! Pc [ H - e S ]|\tilde\psi> = Pc [ e S - H ] |psi>
|
|
!
|
|
! the solution is sought until the residual norm is a fixed fraction of the RHS norm
|
|
! in this way the more accurate is the original problem the more accuratly the correction is computed
|
|
!
|
|
USE util_param, ONLY : DP, stdout
|
|
USE mp_bands_util, ONLY : intra_bgrp_comm
|
|
USE mp, ONLY : mp_sum
|
|
!
|
|
IMPLICIT NONE
|
|
!
|
|
! Following variables are temporary
|
|
|
|
COMPLEX(DP),INTENT(IN) :: psi0(npwx*npol,nbnd) ! psi0 needed to compute the Pv projection
|
|
COMPLEX(DP),INTENT(IN) :: spsi0(npwx*npol,nbnd) ! Spsi0 needed to compute the Pv projection
|
|
INTEGER, INTENT(IN) :: npw, npwx, nbnd, npol, iter ! input dimensions and iteration count
|
|
REAL(DP), INTENT(IN) :: ethr ! threshold for convergence.
|
|
REAL(DP), INTENT(IN) :: e ! current estimate of the target eigenvalue
|
|
COMPLEX(DP),INTENT(INOUT) :: psi(npwx*npol) ! input: the current estimate of the eigenvector,
|
|
! output: the estimated correction vector
|
|
INTEGER, INTENT(INOUT) :: nhpsi ! (updated) number of Hpsi evaluations
|
|
!
|
|
! ... LOCAL variables
|
|
!
|
|
INTEGER, PARAMETER :: maxter = 5 ! maximum number of CG iterations
|
|
!
|
|
COMPLEX(DP), ALLOCATABLE :: hpsi(:), & ! the product of H and psi
|
|
spsi(:), & ! the product of S and psi
|
|
b(:), & ! RHS for testing
|
|
r(:), p(:), sp(:), w(:),z(:) ! additional working vetors
|
|
|
|
COMPLEX(DP), ALLOCATABLE :: spsi0vec (:) ! the product of spsi0 and a trial vector
|
|
|
|
REAL(DP) :: g0, g1, g2, beta, alpha, gamma, ethr_cg, ff, ff0
|
|
INTEGER :: kdim, kdmx, cg_iter, ibnd
|
|
!
|
|
REAL(DP), EXTERNAL :: DDOT
|
|
|
|
EXTERNAL hs_1psi_ptr, g_1psi_ptr
|
|
! hs_1psi_ptr( npwx, npw, psi, hpsi, spsi )
|
|
!
|
|
CALL start_clock( 'pcg' )
|
|
!write (6,*) ' enter pcg' , e, 'npol = ', npol
|
|
!
|
|
kdim = npwx*(npol-1) + npw
|
|
kdmx = npwx* npol
|
|
!
|
|
ALLOCATE( hpsi( kdmx ), spsi( kdmx ), r( kdmx ), z( kdmx ), b( kdmx ) )
|
|
ALLOCATE( spsi0vec(nbnd) )
|
|
!
|
|
CALL start_clock( 'pcg:hs_1psi' )
|
|
CALL hs_1psi_ptr( npwx, npw, psi, hpsi, spsi ) ! apply H and S to a single wavefunction (no bgrp parallelization inside)
|
|
CALL stop_clock( 'pcg:hs_1psi' )
|
|
! define CG algorithm RHS and initial solution
|
|
r(:) = e * spsi(:) - hpsi(:) ! initial gradient
|
|
z(:) = r(:) ; call g_1psi_ptr(npwx,npw,z,e) ! initial preconditioned gradient
|
|
!- project on conduction bands
|
|
CALL start_clock( 'pcg:ortho' )
|
|
CALL ZGEMV( 'C', kdim, nbnd, ONE, spsi0, kdmx, z, 1, ZERO, spsi0vec, 1 )
|
|
CALL mp_sum( spsi0vec, intra_bgrp_comm )
|
|
CALL ZGEMV( 'N', kdim, nbnd, (-1.D0,0.D0), psi0, kdmx, spsi0vec, 1, ONE, z, 1 )
|
|
CALL stop_clock( 'pcg:ortho' )
|
|
!-
|
|
|
|
g0 = DDOT( 2*kdim, z ,1 ,r ,1)
|
|
CALL mp_sum( g0, intra_bgrp_comm ) ! g0 = < initial z | initial r >
|
|
|
|
ff = 0.d0 ; ff0 = ff
|
|
!write (6,*) 0, g0, ff
|
|
|
|
ALLOCATE( p( kdmx ), sp( kdmx ), w( kdmx ) )
|
|
|
|
! ethr_cg = ethr ! CG convergence threshold could be set from input but it is not ...
|
|
ethr_cg = 1.0D-2 ! it makes more sense to fix the convergence of the CG solution to a
|
|
! fixed function of the RHS (see ethr_cg update later).
|
|
ethr_cg = max ( 0.01*ethr, ethr_cg * g0 ) ! here we set the convergence of the correction
|
|
|
|
! save RHS for later
|
|
b(:) = r(:)
|
|
! zero the trial solution: comment next line is you are looking for |\psi_new> = |\psi> + |\tilde \psi>
|
|
psi(:) = ZERO
|
|
! initial search direction
|
|
p(:) = z(:)
|
|
|
|
iterate: &
|
|
DO cg_iter = 1, maxter
|
|
|
|
CALL start_clock( 'pcg:hs_1psi' )
|
|
CALL hs_1psi_ptr( npwx, npw, p, w, sp ) ! apply H to a single wavefunction (no bgrp parallelization here!)
|
|
CALL stop_clock( 'pcg:hs_1psi' )
|
|
w = w - e* sp
|
|
|
|
gamma = DDOT( 2*kdim, p ,1 ,w ,1)
|
|
CALL mp_sum( gamma, intra_bgrp_comm )
|
|
alpha = g0/gamma
|
|
|
|
psi(:) = psi(:) + alpha * p(:) ! updated solution
|
|
r(:) = r(:) - alpha * w(:) ! updated gradient
|
|
g2 = DDOT ( 2*kdim, z ,1 ,r ,1)
|
|
CALL mp_sum( g2, intra_bgrp_comm ) ! g2 = < old z | new r >
|
|
z(:) = r(:) ; call g_1psi_ptr(npwx,npw,z,e) ! updated preconditioned gradient
|
|
!- project on conduction bands
|
|
CALL start_clock( 'pcg:ortho' )
|
|
CALL ZGEMV( 'C', kdim, nbnd, ONE, spsi0, kdmx, z, 1, ZERO, spsi0vec, 1 )
|
|
CALL mp_sum( spsi0vec, intra_bgrp_comm )
|
|
CALL ZGEMV( 'N', kdim, nbnd, (-1.D0,0.D0), psi0, kdmx, spsi0vec, 1, ONE, z, 1 )
|
|
CALL stop_clock( 'pcg:ortho' )
|
|
!-
|
|
g1 = DDOT ( 2*kdim, z, 1, r ,1)
|
|
CALL mp_sum( g1, intra_bgrp_comm ) ! g1 = < new z | new r >
|
|
! evaluate the function
|
|
ff = - 0.5_DP * (DDOT(2*kdim, psi, 1, r ,1) + DDOT(2*kdim, psi, 1, b ,1) )
|
|
CALL mp_sum( ff, intra_bgrp_comm )
|
|
!write (6,*) cg_iter, g1, ff, gamma
|
|
if ( ff > ff0 .AND. ff0 < 0.d0 ) psi(:) = psi(:) - alpha * p(:) ! fallback solution if last iteration failed to improve the function... exit and hope next time it'll be better
|
|
IF ( ABS ( g1 ) < ethr_cg .OR. ( ff > ff0 ) ) EXIT iterate
|
|
|
|
beta = (g1-g2)/g0 ! Polak - Ribiere style update
|
|
g0 = g1 ! < new z | new r > -> < old z | old r >
|
|
p(:) = z(:) + beta * p(:) ! updated search direction
|
|
|
|
ff0 = ff ! updated minimum value reached by the function
|
|
|
|
END DO iterate
|
|
!write (6,*) ' exit pcg loop'
|
|
! orthogonalize to psi0 ...
|
|
! actually we are not doing that.. it would require both psi0 AND spsi0 to be computed and would
|
|
! remove an occupied orb contribution which is taken care of by the following rotate_wfc routine anyway
|
|
|
|
!if ( cg_iter == maxter.and. ABS(g1) > ethr_cg) &
|
|
! write (*,*) 'CG not converged maxter exceeded', cg_iter, g1, g0, ethr_cg
|
|
!IF ( ABS ( g1 ) < ethr_cg) write (*,*) 'CG correction converged ', cg_iter, g1, g0, ethr_cg
|
|
!IF ( ABS ( g1 ) > ethr_cg) write (*,*) 'CG not converged ', cg_iter, g1, g0, ethr_cg
|
|
|
|
nhpsi = nhpsi + cg_iter + 1
|
|
!
|
|
DEALLOCATE( spsi0vec )
|
|
DEALLOCATE( r, p, sp, w, z )
|
|
DEALLOCATE( hpsi, spsi )
|
|
!
|
|
CALL stop_clock( 'pcg' )
|
|
!
|
|
RETURN
|
|
!
|
|
END SUBROUTINE pcg_k
|