From 7ca961f9bc636b459136a1452223339e8249f782 Mon Sep 17 00:00:00 2001 From: Jae-Mo Lihm Date: Wed, 14 Jul 2021 14:46:20 +0900 Subject: [PATCH] Modularize noninteracting density response calculation A new subroutine nonint_rho_response is added. Currently, only used for solve_e. --- PHonon/PH/Makefile | 1 + PHonon/PH/rho_response.f90 | 224 ++++++++++++++++++++++++++++++++++ PHonon/PH/solve_e.f90 | 238 ++++++++++--------------------------- 3 files changed, 285 insertions(+), 178 deletions(-) create mode 100644 PHonon/PH/rho_response.f90 diff --git a/PHonon/PH/Makefile b/PHonon/PH/Makefile index a43b438d1..fab27701a 100644 --- a/PHonon/PH/Makefile +++ b/PHonon/PH/Makefile @@ -8,6 +8,7 @@ MODFLAGS= $(BASEMOD_FLAGS) \ $(MOD_FLAG)../../dft-d3 \ $(MOD_FLAG)../../LR_Modules PHOBJS = \ +rho_response.o \ acfdtest.o \ add_dkmds.o \ add_for_charges.o \ diff --git a/PHonon/PH/rho_response.f90 b/PHonon/PH/rho_response.f90 new file mode 100644 index 000000000..3875022e5 --- /dev/null +++ b/PHonon/PH/rho_response.f90 @@ -0,0 +1,224 @@ +! +! Copyright (C) 2001-2018 Quantum ESPRESSO group +! This file is distributed under the terms of the +! GNU General Public License. See the file `License' +! in the root directory of the present distribution, +! or http://www.gnu.org/copyleft/gpl.txt . +! +! +!------------------------------------------------------------------------------ +MODULE rho_response +CONTAINS +SUBROUTINE nonint_rho_response(first_iter, npert, lrdvpsi, iudvpsi, thresh, dvscfins, & + avg_iter, drhoout, dbecsum, dbecsum_nc) + !---------------------------------------------------------------------------- + !! Compute the density response to the perturbation dV = dV_bare + dV_ind by the + !! non-interacting susceptibility. Solve Sternheimer equation + !! (H - e) * dpsi = dvpsi = -P_c+ * (dV_bare + dV_ind) * psi. + !! + !! dV_bare * psi is read from buffer iudvpsi, so they must be already calculated. + !! dV_ind is given by input dvscfins, and dV_ind * psi is calculated in apply_dpot_bands. + !! + !! For USPPs, adddvscf is called, so relevant arrays must be already calculated. + !! For DFT+U, adddvhubscf is called, so relevant arrays must be already calculated. + !! + !! Input: + !! - first_iter: true if the first iteration, where dvscfins = 0 + !! - npert: number of perturbations + !! - lrdvpsi: record length for the buffer storing dV_bare * psi + !! - iudvpsi: unit for the buffer storing dV_bare * psi + !! - thresh: threshold for solving Sternheimer equation + !! - dvscfins: dV_ind calculated in the previous iteration + !! + !! Output: + !! - avg_iter: average number of iterations for the linear equation solver + !! - drhoout: induced charge density + !! - dbecsum: becsum with dpsi + !! - dbecsum_nc: becsum with dpsi. Optional, used if noncolin is true. + !---------------------------------------------------------------------------- + USE kinds, ONLY : DP + USE io_global, ONLY : stdout + USE buffers, ONLY : get_buffer, save_buffer + USE fft_base, ONLY : dfftp + USE ions_base, ONLY : nat + USE klist, ONLY : xk, wk, ngk, igk_k + USE lsda_mod, ONLY : lsda, nspin, current_spin, isk + USE wvfct, ONLY : nbnd, npwx, et + USE wavefunctions, ONLY : evc + USE noncollin_module, ONLY : noncolin, npol, nspin_mag + USE uspp, ONLY : vkb + USE uspp_param, ONLY : nhm + USE ldaU, ONLY : lda_plus_u + USE units_ph, ONLY : lrdwf, iudwf + USE units_lr, ONLY : iuwfc, lrwfc + USE control_lr, ONLY : nbnd_occ, lgamma + USE qpoint, ONLY : nksq, ikks, ikqs + USE eqv, ONLY : dpsi, dvpsi, evq + USE apply_dpot_mod, ONLY : apply_dpot_bands + ! + IMPLICIT NONE + ! + LOGICAL, INTENT(IN) :: first_iter + !! true if the first iteration. + INTEGER, INTENT(IN) :: npert + !! number of perturbations + INTEGER, INTENT(IN) :: lrdvpsi + !! record length for the buffer storing dV_bare * psi + INTEGER, INTENT(IN) :: iudvpsi + !! unit for the buffer storing dV_bare * psi + REAL(DP), INTENT(IN) :: thresh + !! threshold for solving the linear equation + REAL(DP), INTENT(OUT) :: avg_iter + !! average number of iterations for the linear equation solver + COMPLEX(DP), POINTER, INTENT(IN) :: dvscfins(:, :, :) + !! dV_ind calculated in the previous iteration + COMPLEX(DP), INTENT(INOUT) :: drhoout(dfftp%nnr, nspin_mag, npert) + !! induced charge density + COMPLEX(DP), INTENT(INOUT) :: dbecsum(nhm*(nhm+1)/2, nat, nspin_mag, npert) + !! becsum with dpsi + COMPLEX(DP), INTENT(INOUT), OPTIONAL :: dbecsum_nc(nhm, nhm, nat, nspin, npert) + !! becsum with dpsi. Used if noncolin is true. + ! + LOGICAL :: conv_root + !! true if linear system is converged + INTEGER :: ikk, ikq, npw, npwq, ipert, num_iter, ik, nrec + !! counters + INTEGER :: tot_num_iter + !! total number of iterations in cgsolve_all + INTEGER :: tot_cg_calls + !! total number of cgsolve_all calls + REAL(DP) :: anorm + !! the norm of the error of the conjugate gradient solution + REAL(DP), ALLOCATABLE :: h_diag(:, :) + !! diagonal part of the Hamiltonian, used for preconditioning + COMPLEX(DP) , ALLOCATABLE :: aux2(:, :) + !! temporary storage used in apply_dpot_bands + ! + EXTERNAL ch_psi_all, cg_psi + !! functions passed to cgsolve_all + ! + ! Initialization + ! + ALLOCATE(h_diag(npwx*npol, nbnd)) + ALLOCATE(aux2(npwx*npol, nbnd)) + ! + tot_num_iter = 0 + tot_cg_calls = 0 + drhoout = (0.d0, 0.d0) + dbecsum = (0.d0, 0.d0) + IF (noncolin) dbecsum_nc = (0.d0, 0.d0) + ! + DO ik = 1, nksq + ikk = ikks(ik) + ikq = ikqs(ik) + npw = ngk(ikk) + npwq = ngk(ikq) + ! + IF (lsda) current_spin = isk(ikk) + ! + ! reads unperturbed wavefunctions psi_k in G_space, for all bands + ! if q=0, evq is a pointer to evc + ! + IF (nksq > 1) THEN + IF (lgamma) THEN + CALL get_buffer(evc, lrwfc, iuwfc, ikk) + ELSE + CALL get_buffer(evc, lrwfc, iuwfc, ikk) + CALL get_buffer(evq, lrwfc, iuwfc, ikq) + ENDIF + ENDIF + ! + ! compute beta functions and kinetic energy for k-point ik + ! needed by h_psi, called by ch_psi_all, called by cgsolve_all + ! + CALL init_us_2(npwq, igk_k(1, ikq), xk(1, ikq), vkb) + CALL g2_kin(ikq) + ! + ! compute preconditioning matrix h_diag used by cgsolve_all + ! + CALL h_prec(ik, evq, h_diag) + ! + DO ipert = 1, npert + ! + ! read P_c^+ x psi_kpoint into dvpsi. + ! + nrec = (ipert - 1) * nksq + ik + CALL get_buffer(dvpsi, lrdvpsi, iudvpsi, nrec) + ! + IF (.NOT. first_iter) THEN + ! + ! calculates dvscf_q*psi_k in G_space, for all bands, k=kpoint + ! dvscf_q from previous iteration (mix_potential) + ! + CALL apply_dpot_bands(ik, nbnd_occ(ikk), dvscfins(:, :, ipert), evc, aux2) + dvpsi = dvpsi + aux2 + ! + ! In the case of US pseudopotentials there is an additional + ! selfconsist term which comes from the dependence of D on + ! V_{eff} on the bare change of the potential + ! + CALL adddvscf(ipert, ik) + ! + ! DFPT+U: add to dvpsi the scf part of the response + ! Hubbard potential dV_hub + ! + IF (lda_plus_u) CALL adddvhubscf(ipert, ik) + ! + ENDIF + ! + ! Orthogonalize dvpsi to valence states + ! + CALL orthogonalize(dvpsi, evq, ikk, ikq, dpsi, npwq, .FALSE.) + ! + ! Initial guess for dpsi + ! + IF (first_iter) THEN + ! + ! At the first iteration dpsi is set to zero + ! + dpsi(:, :) = (0.d0,0.d0) + ELSE + ! starting value for delta_psi is read from iudwf + ! + nrec = (ipert - 1) * nksq + ik + CALL get_buffer(dpsi, lrdwf, iudwf, nrec) + ENDIF + ! + ! iterative solution of the linear system (H-e)*dpsi=dvpsi + ! dvpsi=-P_c+ (dvbare+dvscf)*psi , dvscf fixed. + ! + conv_root = .TRUE. + ! + CALL cgsolve_all(ch_psi_all, cg_psi, et(1, ikk), dvpsi, dpsi, h_diag, & + npwx, npwq, thresh, ik, num_iter, conv_root, anorm, nbnd_occ(ikk), npol) + ! + tot_num_iter = tot_num_iter + num_iter + tot_cg_calls = tot_cg_calls + 1 + ! + IF (.NOT. conv_root) WRITE( stdout, "(5x, 'kpoint', i4, & + & ' solve_e: root not converged ', es10.3)") ik, anorm + ! + ! writes delta_psi on iunit iudwf, k=kpoint, + ! + nrec = (ipert - 1) * nksq + ik + CALL save_buffer(dpsi, lrdwf, iudwf, nrec) + ! + ! calculates dvscf, sum over k => dvscf_q_ipert + ! + IF (noncolin) THEN + CALL incdrhoscf_nc(drhoout(1,1,ipert), wk(ikk), ik, & + dbecsum_nc(1,1,1,1,ipert), dpsi, 1.0d0) + ELSE + CALL incdrhoscf(drhoout(1,current_spin,ipert), wk(ikk), & + ik, dbecsum(1,1,current_spin,ipert), dpsi) + ENDIF + ENDDO ! ipert + ENDDO ! ik + ! + avg_iter = REAL(tot_num_iter, DP) / REAL(tot_cg_calls, DP) + ! +!---------------------------------------------------------------------------- +END SUBROUTINE nonint_rho_response +!------------------------------------------------------------------------------ +END MODULE rho_response +!------------------------------------------------------------------------------ diff --git a/PHonon/PH/solve_e.f90 b/PHonon/PH/solve_e.f90 index 1f081d506..f8568b605 100644 --- a/PHonon/PH/solve_e.f90 +++ b/PHonon/PH/solve_e.f90 @@ -22,31 +22,33 @@ subroutine solve_e ! e) computes Delta rho, Delta V_{SCF} and symmetrizes them ! f) If lda_plus_u=.true. compute also the response occupation ! matrices dnsscf + ! Step b, c, d are done in nonint_rho_response. ! USE kinds, ONLY : DP USE ions_base, ONLY : nat USE io_global, ONLY : stdout, ionode - USE io_files, ONLY : prefix, diropn - USE cell_base, ONLY : tpiba2 - USE klist, ONLY : ltetra, lgauss, xk, wk, ngk, igk_k - USE gvect, ONLY : g + USE io_files, ONLY : diropn + USE mp, ONLY : mp_sum + USE mp_pools, ONLY : inter_pool_comm + USE mp_bands, ONLY : intra_bgrp_comm + USE klist, ONLY : ltetra, lgauss, xk, ngk, igk_k USE gvecs, ONLY : doublegrid USE fft_base, ONLY : dfftp, dffts - USE lsda_mod, ONLY : lsda, nspin, current_spin, isk + USE lsda_mod, ONLY : nspin USE spin_orb, ONLY : domag - USE wvfct, ONLY : nbnd, npwx, et + USE wvfct, ONLY : nbnd, npwx USE check_stop, ONLY : check_stop_now - USE buffers, ONLY : get_buffer, save_buffer + USE buffers, ONLY : get_buffer USE wavefunctions, ONLY : evc USE uspp, ONLY : okvan, vkb - USE uspp_param, ONLY : upf, nhm + USE uspp_param, ONLY : nhm USE noncollin_module, ONLY : noncolin, npol, nspin_mag USE scf, ONLY : rho USE paw_variables, ONLY : okpaw USE paw_onecenter, ONLY : paw_dpotential USE paw_symmetry, ONLY : paw_desymmetrize - USE units_ph, ONLY : lrdwf, iudwf, lrdrho, iudrho, lrebar, iuebar + USE units_ph, ONLY : lrdrho, iudrho, lrebar, iuebar USE units_lr, ONLY : iuwfc, lrwfc USE output, ONLY : fildrho USE control_ph, ONLY : ext_recover, rec_code, & @@ -54,56 +56,43 @@ subroutine solve_e alpha_mix, lgamma_gamma, niter_ph, & flmixdpot, rec_code_read USE recover_mod, ONLY : read_rec, write_rec - USE mp_pools, ONLY : inter_pool_comm - USE mp_bands, ONLY : intra_bgrp_comm, ntask_groups - USE mp, ONLY : mp_sum USE lrus, ONLY : int3_paw - USE qpoint, ONLY : nksq, ikks, ikqs - USE eqv, ONLY : dpsi, dvpsi - USE control_lr, ONLY : nbnd_occ, lgamma - USE dv_of_drho_lr - USE fft_helper_subroutines + USE qpoint, ONLY : nksq, ikks + USE control_lr, ONLY : lgamma + USE dv_of_drho_lr, ONLY : dv_of_drho USE fft_interfaces, ONLY : fft_interpolate USE ldaU, ONLY : lda_plus_u - USE apply_dpot_mod, ONLY : apply_dpot_allocate, apply_dpot_deallocate, & - apply_dpot_bands - - implicit none - - real(DP) :: thresh, anorm, averlt, dr2 - ! thresh: convergence threshold - ! anorm : the norm of the error - ! averlt: average number of iterations - ! dr2 : self-consistency error - real(DP), allocatable :: h_diag (:,:) - ! h_diag: diagonal part of the Hamiltonian - - complex(DP) , allocatable, target :: & - dvscfin (:,:,:) ! change of the scf potential (input) - complex(DP) , pointer :: & - dvscfins (:,:,:) ! change of the scf potential (smooth) - complex(DP) , allocatable :: & - dvscfout (:,:,:), & ! change of the scf potential (output) - dbecsum(:,:,:,:), & ! the becsum with dpsi - dbecsum_nc(:,:,:,:,:), & ! the becsum with dpsi - mixin(:), mixout(:), & ! auxiliary for paw mixing - ps (:,:), & - aux2(:,:) - - logical :: conv_root, exst - ! conv_root: true if linear system is converged - - integer :: npw, npwq - integer :: kter, iter0, ipol, ibnd, iter, lter, ik, ig, is, nrec, ndim, ios - ! counters - integer :: ltaver, lintercall - integer :: ikk, ikq - - real(DP) :: tcpu, get_clock - ! timing variables - - external ch_psi_all, cg_psi + USE apply_dpot_mod, ONLY : apply_dpot_allocate, apply_dpot_deallocate + USE rho_response, ONLY : nonint_rho_response + ! + IMPLICIT NONE + ! + LOGICAL :: exst + !! + INTEGER :: ikk, npw, kter, iter0, ipol, iter, ik, is, ndim + !! counters + REAL(DP) :: thresh + !! convergence threshold + REAL(DP) :: averlt + !! average number of iterations + REAL(DP) :: dr2 + !! self-consistency error + REAL(DP) :: tcpu, get_clock + !! timing variables + COMPLEX(DP), ALLOCATABLE, TARGET :: dvscfin (:,:,:) + !! change of the scf potential (input) + COMPLEX(DP), POINTER :: dvscfins (:,:,:) + !! change of the scf potential (smooth) + COMPLEX(DP), ALLOCATABLE :: dvscfout (:,:,:) + !! change of the scf potential (output) + COMPLEX(DP), ALLOCATABLE :: dbecsum(:,:,:,:) + !! the becsum with dpsi + COMPLEX(DP), ALLOCATABLE :: dbecsum_nc(:,:,:,:,:) + !! the becsum with dpsi + COMPLEX(DP), ALLOCATABLE :: mixin(:), mixout(:) + !! auxiliary for paw mixing + ! call start_clock ('solve_e') ! ! This routine is task group aware @@ -119,12 +108,10 @@ subroutine solve_e IF (okpaw) THEN ALLOCATE (mixin(dfftp%nnr*nspin_mag*3+(nhm*(nhm+1)*nat*nspin_mag*3)/2) ) ALLOCATE (mixout(dfftp%nnr*nspin_mag*3+(nhm*(nhm+1)*nat*nspin_mag*3)/2) ) + mixin=(0.0_DP,0.0_DP) ENDIF allocate (dbecsum( nhm*(nhm+1)/2, nat, nspin_mag, 3)) IF (noncolin) allocate (dbecsum_nc (nhm, nhm, nat, nspin, 3)) - allocate (h_diag(npwx*npol, nbnd)) - allocate (aux2(npwx*npol, nbnd)) - IF (okpaw) mixin=(0.0_DP,0.0_DP) CALL apply_dpot_allocate() if (rec_code_read == -20.AND.ext_recover) then @@ -158,7 +145,7 @@ subroutine solve_e if ( (lgauss .or. ltetra) .or..not.lgamma) call errore ('solve_e', & 'called in the wrong case', 1) ! - ! Compute P_c^+ x psi for all polarization and k points. + ! Compute P_c^+ x psi for all polarization and k points and store in buffer ! DO ik = 1, nksq DO ipol = 1, 3 @@ -186,8 +173,6 @@ subroutine solve_e ! FLUSH( stdout ) iter = kter + iter0 - ltaver = 0 - lintercall = 0 ! dvscfout(:,:,:)=(0.d0,0.d0) dbecsum(:,:,:,:)=(0.d0,0.d0) @@ -196,120 +181,21 @@ subroutine solve_e ! DFPT+U: at each iteration calculate dnsscf, ! i.e. the scf variation of the occupation matrix ns. ! - IF (lda_plus_u .AND. (iter.NE.1)) & - CALL dnsq_scf (3, .false., 0, 1, .false.) + IF (lda_plus_u .AND. (iter /= 1)) CALL dnsq_scf(3, .false., 0, 1, .false.) ! - do ik = 1, nksq - ikk=ikks(ik) - ikq=ikqs(ik) - ! - npw = ngk(ikk) - npwq= npw ! q=0 always in this routine - ! - if (lsda) current_spin = isk (ikk) - ! - ! reads unperturbed wavefunctions psi_k in G_space, for all bands - ! - if (nksq > 1) THEN - call get_buffer (evc, lrwfc, iuwfc, ikk) - end if - ! - ! compute beta functions and kinetic energy for k-point ik - ! needed by h_psi, called by ch_psi_all, called by cgsolve_all - ! - CALL init_us_2 (npw, igk_k(1,ikk), xk (1, ikk), vkb) - CALL g2_kin(ikk) - ! - ! compute preconditioning matrix h_diag used by cgsolve_all - ! - CALL h_prec (ik, evc, h_diag) - ! - do ipol = 1, 3 - ! - ! read P_c^+ x psi_kpoint into dvpsi. - ! - nrec = (ipol - 1) * nksq + ik - CALL get_buffer(dvpsi, lrebar, iuebar, nrec) - ! - if (iter > 1) then - ! - ! calculates dvscf_q*psi_k in G_space, for all bands, k=kpoint - ! dvscf_q from previous iteration (mix_potential) - ! - CALL apply_dpot_bands(ik, nbnd_occ(ikk), dvscfins(:, :, ipol), evc, aux2) - dvpsi = dvpsi + aux2 - ! - ! In the case of US pseudopotentials there is an additional - ! selfconsist term which comes from the dependence of D on - ! V_{eff} on the bare change of the potential - ! - call adddvscf(ipol,ik) - ! - ! DFPT+U: add to dvpsi the scf part of the response - ! Hubbard potential dV_hub - ! - if (lda_plus_u) call adddvhubscf (ipol, ik) - ! - endif - ! - ! Orthogonalize dvpsi to valence states: ps = - ! - CALL orthogonalize(dvpsi, evc, ikk, ikk, dpsi, npwq, .false.) - ! - if (iter == 1) then - ! - ! At the first iteration dpsi and dvscfin are set to zero, - ! - dpsi(:,:)=(0.d0,0.d0) - dvscfin(:,:,:)=(0.d0,0.d0) - ! - ! starting threshold for the iterative solution of the linear - ! system - ! - thresh = 1.d-2 - if (lnoloc) thresh = 1.d-5 - else - ! starting value for delta_psi is read from iudwf - ! - nrec = (ipol - 1) * nksq + ik - call get_buffer (dpsi, lrdwf, iudwf, nrec) - ! - ! threshold for iterative solution of the linear system - ! - thresh = min (0.1d0 * sqrt (dr2), 1.0d-2) - endif - ! - ! iterative solution of the linear system (H-e)*dpsi=dvpsi - ! dvpsi=-P_c+ (dvbare+dvscf)*psi , dvscf fixed. - ! - - conv_root = .true. - - call cgsolve_all (ch_psi_all,cg_psi,et(1,ikk),dvpsi,dpsi, & - h_diag,npwx,npw,thresh,ik,lter,conv_root,anorm,nbnd_occ(ikk),npol) - - ltaver = ltaver + lter - lintercall = lintercall + 1 - if (.not.conv_root) WRITE( stdout, "(5x,'kpoint',i4,' ibnd',i4, & - & ' solve_e: root not converged ',es10.3)") ik & - &, ibnd, anorm - ! - ! writes delta_psi on iunit iudwf, k=kpoint, - ! - nrec = (ipol - 1) * nksq + ik - call save_buffer(dpsi, lrdwf, iudwf, nrec) - ! - ! calculates dvscf, sum over k => dvscf_q_ipert - ! - IF (noncolin) THEN - call incdrhoscf_nc(dvscfout(1,1,ipol),wk(ikk),ik, & - dbecsum_nc(1,1,1,1,ipol), dpsi, 1.0d0) - ELSE - call incdrhoscf (dvscfout(1,current_spin,ipol), wk(ikk), & - ik, dbecsum(1,1,current_spin,ipol), dpsi) - ENDIF - enddo ! on polarizations - enddo ! on k points + ! set threshold for the iterative solution of the linear system + ! + IF (iter == 1) THEN + thresh = 1.d-2 + IF (lnoloc) thresh = 1.d-5 + ELSE + thresh = MIN(0.1d0 * SQRT(dr2), 1.0d-2) + ENDIF + ! + ! Compute dvscfout, the charge density response to the total potential + ! + CALL nonint_rho_response(iter==1, 3, lrebar, iuebar, thresh, dvscfins, & + averlt, dvscfout, dbecsum, dbecsum_nc) ! ! The calculation of dbecsum is distributed across processors ! (see addusdbec) - we sum over processors the contributions @@ -396,8 +282,6 @@ subroutine solve_e call newdq(dvscfin,3) - averlt = DBLE (ltaver) / DBLE (lintercall) - tcpu = get_clock ('PHONON') WRITE( stdout, '(/,5x," iter # ",i3," total cpu time :",f8.1, & & " secs av.it.: ",f5.1)') iter, tcpu, averlt @@ -426,7 +310,6 @@ subroutine solve_e 155 continue ! CALL apply_dpot_deallocate() - deallocate (h_diag) deallocate (dbecsum) deallocate (dvscfout) IF (okpaw) THEN @@ -436,7 +319,6 @@ subroutine solve_e if (doublegrid) deallocate (dvscfins) deallocate (dvscfin) if (noncolin) deallocate(dbecsum_nc) - deallocate(aux2) call stop_clock ('solve_e') return