More addusdens sepeedup, at the price of some more memory usage; minor cleanup

git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11470 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
giannozz 2015-04-06 20:36:31 +00:00
parent 16b632e877
commit ccf81750f8
3 changed files with 80 additions and 72 deletions

View File

@ -24,7 +24,6 @@ default :
@echo ' want Quantum Transport with Wannier functions'
@echo ' yambo electronic excitations with plane waves'
@echo ' plumed Metadynamics plugin for pw or cp'
@echo ' epw Electron-Phonon Coupling with wannier functions, EPW package'
@echo ' gpu Download the latest QE-GPU package'
@echo ' couple Library interface for coupling to external codes'
@echo ' clean remove executables and objects'
@ -34,6 +33,9 @@ default :
echo ' tar-gui create a standalone PWgui tarball from the GUI sources'; fi
@echo ' doc build documentation'
@echo ' links create links to all executables in bin/'
# @echo ' epw Electron-Phonon Coupling with wannier functions, EPW package'
gww:
@echo '"make gww" is obsolete, use "make gwl" instead '
###########################################################
# Main targets
@ -153,8 +155,8 @@ yambo: touch-dummy
plumed: touch-dummy
( cd install ; $(MAKE) -f plugins_makefile $@ || exit 1 )
epw: touch-dummy
( cd install ; $(MAKE) -f plugins_makefile $@ || exit 1 )
#epw: touch-dummy
# ( cd install ; $(MAKE) -f plugins_makefile $@ || exit 1 )
gpu: touch-dummy
( cd install ; $(MAKE) -f plugins_makefile $@ || exit 1 )

View File

@ -19,7 +19,7 @@ SUBROUTINE addusdens(rho)
IMPLICIT NONE
!
!
REAL(kind=dp), intent(inout) :: rho(dfftp%nnr,nspin_mag)
REAL(kind=dp), INTENT(inout) :: rho(dfftp%nnr,nspin_mag)
!
IF ( tqr ) THEN
CALL addusdens_r(rho,.true.)
@ -29,14 +29,14 @@ SUBROUTINE addusdens(rho)
#else
CALL addusdens_g(rho)
#endif
END IF
ENDIF
!
RETURN
!
END SUBROUTINE addusdens
!
!----------------------------------------------------------------------
subroutine addusdens_g(rho)
SUBROUTINE addusdens_g(rho)
!----------------------------------------------------------------------
!
! This routine adds to the charge density the part which is due to
@ -54,106 +54,113 @@ subroutine addusdens_g(rho)
USE control_flags, ONLY : gamma_only
USE wavefunctions_module, ONLY : psic
!
implicit none
IMPLICIT NONE
!
REAL(kind=dp), intent(inout) :: rho(dfftp%nnr,nspin_mag)
REAL(kind=dp), INTENT(inout) :: rho(dfftp%nnr,nspin_mag)
!
! here the local variables
!
integer :: ig, na, nt, ih, jh, ijh, is, nab, nij
INTEGER :: ig, na, nt, ih, jh, ijh, is, nab, nb, nij
! counters
real(DP), allocatable :: tbecsum(:,:)
real(DP), allocatable :: qmod (:), ylmk0 (:,:)
! the modulus of G
! the spherical harmonics
REAL(DP), ALLOCATABLE :: tbecsum(:,:,:)
! \sum_kv <\psi_kv|\beta_l><beta_m|\psi_kv> for each species of atoms
REAL(DP), ALLOCATABLE :: qmod (:), ylmk0 (:,:)
! modulus of G, spherical harmonics
COMPLEX(DP), ALLOCATABLE :: skk(:,:), aux2(:,:)
! structure factors, US contribution to rho
COMPLEX(DP), ALLOCATABLE :: aux (:,:), qgm(:)
! work space for rho(G,nspin), Fourier transform of q
complex(DP), allocatable :: skk(:), aux2(:,:)
complex(DP), allocatable :: aux (:,:), qgm(:,:)
! work space for rho(G,nspin)
! Fourier transform of q
IF (.not.okvan) RETURN
if (.not.okvan) return
CALL start_clock ('addusdens')
call start_clock ('addusdens')
allocate (aux ( ngm, nspin_mag))
allocate (qmod( ngm))
allocate (ylmk0( ngm, lmaxq * lmaxq))
ALLOCATE ( skk(ngm), aux2(ngm,nspin_mag) )
ALLOCATE (aux ( ngm, nspin_mag) )
ALLOCATE (qmod( ngm), qgm( ngm) )
ALLOCATE (ylmk0( ngm, lmaxq * lmaxq) )
aux (:,:) = (0.d0, 0.d0)
call ylmr2 (lmaxq * lmaxq, ngm, g, gg, ylmk0)
do ig = 1, ngm
CALL ylmr2 (lmaxq * lmaxq, ngm, g, gg, ylmk0)
DO ig = 1, ngm
qmod (ig) = sqrt (gg (ig) )
enddo
ENDDO
!
do nt = 1, ntyp
if ( upf(nt)%tvanp ) then
DO nt = 1, ntyp
IF ( upf(nt)%tvanp ) THEN
!
! nij = max number of (ih,jh) pairs per atom type nt
!
nij = nh(nt)*(nh(nt)+1)/2
!
allocate (qgm(ngm,nij), tbecsum(nij,nspin_mag) )
ijh = 0
do ih = 1, nh (nt)
do jh = ih, nh (nt)
ijh = ijh + 1
call qvan2 (ngm, ih, jh, nt, qmod, qgm(1,ijh), ylmk0)
end do
end do
! count max number of atoms of type nt
!
do na = 1, nat
nab = 0
DO na = 1, nat
IF ( ityp(na) == nt ) nab = nab + 1
ENDDO
!
ALLOCATE ( skk(ngm,nab), tbecsum(nij,nab,nspin_mag), aux2(ngm,nij) )
!
nb = 0
DO na = 1, nat
IF ( ityp(na) == nt ) THEN
!
tbecsum(:,:) = becsum(1:nij,na,1:nspin_mag)
!
nb = nb + 1
tbecsum(:,nb,:) = becsum(1:nij,na,:)
!$omp parallel do default(shared) private(ig)
do ig = 1, ngm
skk(ig) = eigts1 (mill (1,ig), na) * &
eigts2 (mill (2,ig), na) * &
eigts3 (mill (3,ig), na)
end do
DO ig = 1, ngm
skk(ig,nb) = eigts1 (mill (1,ig), na) * &
eigts2 (mill (2,ig), na) * &
eigts3 (mill (3,ig), na)
ENDDO
!$omp end parallel do
CALL dgemm( 'N', 'N', 2*ngm, nspin_mag, nij, 1.0_dp, qgm, 2*ngm,&
tbecsum, nij, 0.0_dp, aux2, 2*ngm )
do is = 1, nspin_mag
ENDIF
ENDDO
DO is = 1, nspin_mag
! sum over atoms
CALL dgemm( 'N', 'T', 2*ngm, nij, nab, 1.0_dp, skk, 2*ngm,&
tbecsum(1,1,is), nij, 0.0_dp, aux2, 2*ngm )
! sum over lm indices of Q_{lm}
ijh = 0
DO ih = 1, nh (nt)
DO jh = ih, nh (nt)
ijh = ijh + 1
CALL qvan2 (ngm, ih, jh, nt, qmod, qgm, ylmk0)
!$omp parallel do default(shared) private(ig)
do ig = 1, ngm
aux(ig,is)=aux(ig,is) + aux2(ig,is)*skk(ig)
enddo
DO ig = 1, ngm
aux(ig,is) = aux(ig,is) + aux2(ig,ijh)*qgm(ig)
ENDDO
!$omp end parallel do
enddo
endif
enddo
deallocate (tbecsum, qgm)
endif
enddo
ENDDO
ENDDO
ENDDO
ENDIF
DEALLOCATE (aux2, tbecsum, skk )
ENDDO
!
deallocate (aux2, skk)
deallocate (ylmk0)
deallocate (qmod)
DEALLOCATE (ylmk0)
DEALLOCATE (qgm, qmod)
!
! convert aux to real space and add to the charge density
!
#ifdef DEBUG_ADDUSDENS
call start_clock ('addus:fft')
CALL start_clock ('addus:fft')
#endif
do is = 1, nspin_mag
DO is = 1, nspin_mag
psic(:) = (0.d0, 0.d0)
psic( nl(:) ) = aux(:,is)
if (gamma_only) psic( nlm(:) ) = CONJG(aux(:,is))
IF (gamma_only) psic( nlm(:) ) = CONJG (aux(:,is))
CALL invfft ('Dense', psic, dfftp)
rho(:, is) = rho(:, is) + DBLE (psic (:) )
enddo
ENDDO
#ifdef DEBUG_ADDUSDENS
call stop_clock ('addus:fft')
CALL stop_clock ('addus:fft')
#endif
deallocate (aux)
DEALLOCATE (aux)
call stop_clock ('addusdens')
return
end subroutine addusdens_g
CALL stop_clock ('addusdens')
RETURN
END SUBROUTINE addusdens_g

View File

@ -35,7 +35,7 @@ SUBROUTINE addusstres (sigmanlc)
COMPLEX(DP), ALLOCATABLE :: aux(:), aux1(:,:), aux2(:,:), vg(:,:), qgm(:,:)
! work space (complex)
COMPLEX(DP) :: cfac
REAL(dp) :: fac(3,nspin), sus(3,3), DDOT
REAL(dp) :: fac(3,nspin), sus(3,3)
! auxiliary variables
REAL(DP) , ALLOCATABLE :: qmod(:), ylmk0(:,:), dylmk0(:,:), tbecsum(:,:)
! work space (real)
@ -120,7 +120,6 @@ SUBROUTINE addusstres (sigmanlc)
DO jpol = 1, ipol
sus (ipol, jpol) = sus (ipol, jpol) - omega * &
fac (jpol, is)
!!! DDOT ( 2*ngm, aux1(1,jpol), 1, aux2(1,is), 1 )
ENDDO
ENDDO
ENDIF