mirror of https://gitlab.com/QEF/q-e.git
More addusdens sepeedup, at the price of some more memory usage; minor cleanup
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11470 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
parent
16b632e877
commit
ccf81750f8
8
Makefile
8
Makefile
|
@ -24,7 +24,6 @@ default :
|
|||
@echo ' want Quantum Transport with Wannier functions'
|
||||
@echo ' yambo electronic excitations with plane waves'
|
||||
@echo ' plumed Metadynamics plugin for pw or cp'
|
||||
@echo ' epw Electron-Phonon Coupling with wannier functions, EPW package'
|
||||
@echo ' gpu Download the latest QE-GPU package'
|
||||
@echo ' couple Library interface for coupling to external codes'
|
||||
@echo ' clean remove executables and objects'
|
||||
|
@ -34,6 +33,9 @@ default :
|
|||
echo ' tar-gui create a standalone PWgui tarball from the GUI sources'; fi
|
||||
@echo ' doc build documentation'
|
||||
@echo ' links create links to all executables in bin/'
|
||||
# @echo ' epw Electron-Phonon Coupling with wannier functions, EPW package'
|
||||
gww:
|
||||
@echo '"make gww" is obsolete, use "make gwl" instead '
|
||||
|
||||
###########################################################
|
||||
# Main targets
|
||||
|
@ -153,8 +155,8 @@ yambo: touch-dummy
|
|||
plumed: touch-dummy
|
||||
( cd install ; $(MAKE) -f plugins_makefile $@ || exit 1 )
|
||||
|
||||
epw: touch-dummy
|
||||
( cd install ; $(MAKE) -f plugins_makefile $@ || exit 1 )
|
||||
#epw: touch-dummy
|
||||
# ( cd install ; $(MAKE) -f plugins_makefile $@ || exit 1 )
|
||||
|
||||
gpu: touch-dummy
|
||||
( cd install ; $(MAKE) -f plugins_makefile $@ || exit 1 )
|
||||
|
|
|
@ -19,7 +19,7 @@ SUBROUTINE addusdens(rho)
|
|||
IMPLICIT NONE
|
||||
!
|
||||
!
|
||||
REAL(kind=dp), intent(inout) :: rho(dfftp%nnr,nspin_mag)
|
||||
REAL(kind=dp), INTENT(inout) :: rho(dfftp%nnr,nspin_mag)
|
||||
!
|
||||
IF ( tqr ) THEN
|
||||
CALL addusdens_r(rho,.true.)
|
||||
|
@ -29,14 +29,14 @@ SUBROUTINE addusdens(rho)
|
|||
#else
|
||||
CALL addusdens_g(rho)
|
||||
#endif
|
||||
END IF
|
||||
ENDIF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE addusdens
|
||||
!
|
||||
!----------------------------------------------------------------------
|
||||
subroutine addusdens_g(rho)
|
||||
SUBROUTINE addusdens_g(rho)
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
! This routine adds to the charge density the part which is due to
|
||||
|
@ -54,106 +54,113 @@ subroutine addusdens_g(rho)
|
|||
USE control_flags, ONLY : gamma_only
|
||||
USE wavefunctions_module, ONLY : psic
|
||||
!
|
||||
implicit none
|
||||
IMPLICIT NONE
|
||||
!
|
||||
REAL(kind=dp), intent(inout) :: rho(dfftp%nnr,nspin_mag)
|
||||
REAL(kind=dp), INTENT(inout) :: rho(dfftp%nnr,nspin_mag)
|
||||
!
|
||||
! here the local variables
|
||||
!
|
||||
|
||||
integer :: ig, na, nt, ih, jh, ijh, is, nab, nij
|
||||
INTEGER :: ig, na, nt, ih, jh, ijh, is, nab, nb, nij
|
||||
! counters
|
||||
|
||||
real(DP), allocatable :: tbecsum(:,:)
|
||||
real(DP), allocatable :: qmod (:), ylmk0 (:,:)
|
||||
! the modulus of G
|
||||
! the spherical harmonics
|
||||
REAL(DP), ALLOCATABLE :: tbecsum(:,:,:)
|
||||
! \sum_kv <\psi_kv|\beta_l><beta_m|\psi_kv> for each species of atoms
|
||||
REAL(DP), ALLOCATABLE :: qmod (:), ylmk0 (:,:)
|
||||
! modulus of G, spherical harmonics
|
||||
COMPLEX(DP), ALLOCATABLE :: skk(:,:), aux2(:,:)
|
||||
! structure factors, US contribution to rho
|
||||
COMPLEX(DP), ALLOCATABLE :: aux (:,:), qgm(:)
|
||||
! work space for rho(G,nspin), Fourier transform of q
|
||||
|
||||
complex(DP), allocatable :: skk(:), aux2(:,:)
|
||||
complex(DP), allocatable :: aux (:,:), qgm(:,:)
|
||||
! work space for rho(G,nspin)
|
||||
! Fourier transform of q
|
||||
IF (.not.okvan) RETURN
|
||||
|
||||
if (.not.okvan) return
|
||||
CALL start_clock ('addusdens')
|
||||
|
||||
call start_clock ('addusdens')
|
||||
|
||||
allocate (aux ( ngm, nspin_mag))
|
||||
allocate (qmod( ngm))
|
||||
allocate (ylmk0( ngm, lmaxq * lmaxq))
|
||||
ALLOCATE ( skk(ngm), aux2(ngm,nspin_mag) )
|
||||
ALLOCATE (aux ( ngm, nspin_mag) )
|
||||
ALLOCATE (qmod( ngm), qgm( ngm) )
|
||||
ALLOCATE (ylmk0( ngm, lmaxq * lmaxq) )
|
||||
|
||||
aux (:,:) = (0.d0, 0.d0)
|
||||
call ylmr2 (lmaxq * lmaxq, ngm, g, gg, ylmk0)
|
||||
do ig = 1, ngm
|
||||
CALL ylmr2 (lmaxq * lmaxq, ngm, g, gg, ylmk0)
|
||||
DO ig = 1, ngm
|
||||
qmod (ig) = sqrt (gg (ig) )
|
||||
enddo
|
||||
ENDDO
|
||||
!
|
||||
do nt = 1, ntyp
|
||||
if ( upf(nt)%tvanp ) then
|
||||
DO nt = 1, ntyp
|
||||
IF ( upf(nt)%tvanp ) THEN
|
||||
!
|
||||
! nij = max number of (ih,jh) pairs per atom type nt
|
||||
!
|
||||
nij = nh(nt)*(nh(nt)+1)/2
|
||||
!
|
||||
allocate (qgm(ngm,nij), tbecsum(nij,nspin_mag) )
|
||||
ijh = 0
|
||||
do ih = 1, nh (nt)
|
||||
do jh = ih, nh (nt)
|
||||
ijh = ijh + 1
|
||||
call qvan2 (ngm, ih, jh, nt, qmod, qgm(1,ijh), ylmk0)
|
||||
end do
|
||||
end do
|
||||
! count max number of atoms of type nt
|
||||
!
|
||||
do na = 1, nat
|
||||
nab = 0
|
||||
DO na = 1, nat
|
||||
IF ( ityp(na) == nt ) nab = nab + 1
|
||||
ENDDO
|
||||
!
|
||||
ALLOCATE ( skk(ngm,nab), tbecsum(nij,nab,nspin_mag), aux2(ngm,nij) )
|
||||
!
|
||||
nb = 0
|
||||
DO na = 1, nat
|
||||
IF ( ityp(na) == nt ) THEN
|
||||
!
|
||||
tbecsum(:,:) = becsum(1:nij,na,1:nspin_mag)
|
||||
!
|
||||
nb = nb + 1
|
||||
tbecsum(:,nb,:) = becsum(1:nij,na,:)
|
||||
!$omp parallel do default(shared) private(ig)
|
||||
do ig = 1, ngm
|
||||
skk(ig) = eigts1 (mill (1,ig), na) * &
|
||||
eigts2 (mill (2,ig), na) * &
|
||||
eigts3 (mill (3,ig), na)
|
||||
end do
|
||||
DO ig = 1, ngm
|
||||
skk(ig,nb) = eigts1 (mill (1,ig), na) * &
|
||||
eigts2 (mill (2,ig), na) * &
|
||||
eigts3 (mill (3,ig), na)
|
||||
ENDDO
|
||||
!$omp end parallel do
|
||||
CALL dgemm( 'N', 'N', 2*ngm, nspin_mag, nij, 1.0_dp, qgm, 2*ngm,&
|
||||
tbecsum, nij, 0.0_dp, aux2, 2*ngm )
|
||||
do is = 1, nspin_mag
|
||||
ENDIF
|
||||
ENDDO
|
||||
|
||||
DO is = 1, nspin_mag
|
||||
! sum over atoms
|
||||
CALL dgemm( 'N', 'T', 2*ngm, nij, nab, 1.0_dp, skk, 2*ngm,&
|
||||
tbecsum(1,1,is), nij, 0.0_dp, aux2, 2*ngm )
|
||||
! sum over lm indices of Q_{lm}
|
||||
ijh = 0
|
||||
DO ih = 1, nh (nt)
|
||||
DO jh = ih, nh (nt)
|
||||
ijh = ijh + 1
|
||||
CALL qvan2 (ngm, ih, jh, nt, qmod, qgm, ylmk0)
|
||||
!$omp parallel do default(shared) private(ig)
|
||||
do ig = 1, ngm
|
||||
aux(ig,is)=aux(ig,is) + aux2(ig,is)*skk(ig)
|
||||
enddo
|
||||
DO ig = 1, ngm
|
||||
aux(ig,is) = aux(ig,is) + aux2(ig,ijh)*qgm(ig)
|
||||
ENDDO
|
||||
!$omp end parallel do
|
||||
enddo
|
||||
endif
|
||||
enddo
|
||||
deallocate (tbecsum, qgm)
|
||||
endif
|
||||
enddo
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDIF
|
||||
DEALLOCATE (aux2, tbecsum, skk )
|
||||
ENDDO
|
||||
!
|
||||
deallocate (aux2, skk)
|
||||
deallocate (ylmk0)
|
||||
deallocate (qmod)
|
||||
DEALLOCATE (ylmk0)
|
||||
DEALLOCATE (qgm, qmod)
|
||||
!
|
||||
! convert aux to real space and add to the charge density
|
||||
!
|
||||
#ifdef DEBUG_ADDUSDENS
|
||||
call start_clock ('addus:fft')
|
||||
CALL start_clock ('addus:fft')
|
||||
#endif
|
||||
do is = 1, nspin_mag
|
||||
DO is = 1, nspin_mag
|
||||
psic(:) = (0.d0, 0.d0)
|
||||
psic( nl(:) ) = aux(:,is)
|
||||
if (gamma_only) psic( nlm(:) ) = CONJG(aux(:,is))
|
||||
IF (gamma_only) psic( nlm(:) ) = CONJG (aux(:,is))
|
||||
CALL invfft ('Dense', psic, dfftp)
|
||||
rho(:, is) = rho(:, is) + DBLE (psic (:) )
|
||||
enddo
|
||||
ENDDO
|
||||
#ifdef DEBUG_ADDUSDENS
|
||||
call stop_clock ('addus:fft')
|
||||
CALL stop_clock ('addus:fft')
|
||||
#endif
|
||||
deallocate (aux)
|
||||
DEALLOCATE (aux)
|
||||
|
||||
call stop_clock ('addusdens')
|
||||
return
|
||||
end subroutine addusdens_g
|
||||
CALL stop_clock ('addusdens')
|
||||
RETURN
|
||||
END SUBROUTINE addusdens_g
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ SUBROUTINE addusstres (sigmanlc)
|
|||
COMPLEX(DP), ALLOCATABLE :: aux(:), aux1(:,:), aux2(:,:), vg(:,:), qgm(:,:)
|
||||
! work space (complex)
|
||||
COMPLEX(DP) :: cfac
|
||||
REAL(dp) :: fac(3,nspin), sus(3,3), DDOT
|
||||
REAL(dp) :: fac(3,nspin), sus(3,3)
|
||||
! auxiliary variables
|
||||
REAL(DP) , ALLOCATABLE :: qmod(:), ylmk0(:,:), dylmk0(:,:), tbecsum(:,:)
|
||||
! work space (real)
|
||||
|
@ -120,7 +120,6 @@ SUBROUTINE addusstres (sigmanlc)
|
|||
DO jpol = 1, ipol
|
||||
sus (ipol, jpol) = sus (ipol, jpol) - omega * &
|
||||
fac (jpol, is)
|
||||
!!! DDOT ( 2*ngm, aux1(1,jpol), 1, aux2(1,is), 1 )
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDIF
|
||||
|
|
Loading…
Reference in New Issue