Merge branch 'myqe' into 'develop'

Ewald parallelization + xlf and misc fixes

See merge request QEF/q-e!567
This commit is contained in:
giannozz 2019-08-22 09:04:24 +00:00
commit 4d8e1feda5
9 changed files with 60 additions and 43 deletions

View File

@ -259,7 +259,7 @@
! poor-man parallelization over bands
! - if nproc_pool=1 : nb_s=1, nb_e=n, mykey=0
! - if nproc_pool<=nbnd:each processor calculates band nb_s to nb_e; mykey=0
! - if nproc_pool>nbnd :each processor takes care of band na_s=nb_e;
! - if nproc_pool>nbnd :each processor takes care of band nb_s=nb_e;
! mykey labels how many times each band appears (mykey=0 first time etc.)
!
CALL block_distribute( n, me_pool, nproc_pool, nb_s, nb_e, mykey )

View File

@ -56,7 +56,7 @@ SUBROUTINE loadqmesh_para
!
ELSEIF ( (nqf1.ne.0) .and. (nqf2.ne.0) .and. (nqf3.ne.0) ) THEN ! generate grid
IF (mp_mesh_q) THEN
IF (lscreen) CALL errore ('If lscreen=.true. do not use mp_mesh_q',1)
IF (lscreen) CALL errore ('loadqmesh','If lscreen=.true. do not use mp_mesh_q',1)
! get size of the mp_mesh in the irr wedge
WRITE (stdout, '(a,3i4)') ' Using uniform MP q-mesh: ', nqf1, nqf2, nqf3
call set_sym_bl ( )
@ -226,7 +226,7 @@ SUBROUTINE loadqmesh_serial
!
ELSEIF ( (nqf1.ne.0) .and. (nqf2.ne.0) .and. (nqf3.ne.0) ) THEN ! generate grid
IF (mp_mesh_q) THEN
IF (lscreen) CALL errore ('If lscreen=.true. do not use mp_mesh_q',1)
IF (lscreen) CALL errore ('loadqmesh','If lscreen=.true. do not use mp_mesh_q',1)
! get size of the mp_mesh in the irr wedge
WRITE (stdout, '(a,3i4)') ' Using uniform q-mesh: ', nqf1, nqf2, nqf3
call set_sym_bl ( )

View File

@ -47,7 +47,9 @@ cgsolve_all_gamma.o \
realus.o
QEMODS = ../../Modules/libqemod.a ../../KS_Solvers/libks_solvers.a \
../../FFTXlib/libqefft.a ../../LAXlib/libqela.a ../../UtilXlib/libutil.a
../../FFTXlib/libqefft.a ../../LAXlib/libqela.a \
../../UtilXlib/libutil.a ../../dft-d3/libdftd3qe.a
# dft-d3 required by xlf for obscure reasons
PWOBJS = ../../PW/src/libpw.a
LIBMIN=

View File

@ -24,7 +24,9 @@ SIMPLEOBJS = \
QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a \
../../KS_Solvers/libks_solvers.a \
../../LAXlib/libqela.a ../../UtilXlib/libutil.a
../../LAXlib/libqela.a ../../UtilXlib/libutil.a \
../../dft-d3/libdftd3qe.a
# dft-d3 required by xlf for obscure reasons
PWOBJS = ../../PW/src/libpw.a
GWWOBJ = ../gww/libgww.a

View File

@ -17,7 +17,7 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
!
USE kinds
USE constants, ONLY : tpi, e2
USE mp_bands, ONLY : intra_bgrp_comm
USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
USE mp, ONLY : mp_sum
USE martyna_tuckerman, ONLY : wg_corr_ewald, do_comp_mt
USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_ewald
@ -25,17 +25,16 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
!
! first the dummy variables
!
integer :: nat, ntyp, ityp (nat), ngm, gstart
integer, intent(in) :: nat, ntyp, ityp (nat), ngm, gstart
! input: number of atoms in the unit cell
! input: number of different types of atoms
! input: the type of each atom
! input: number of plane waves for G sum
! input: first non-zero G vector
logical :: gamma_only
logical, intent(in) :: gamma_only
real(DP) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
at (3, 3), bg (3, 3), omega, alat, gcutm
! input: the positions of the atoms in the cell
! input: the coordinates of G vectors
@ -46,7 +45,7 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
! input: the volume of the unit cell
! input: lattice parameter
! input: cut-off of g vectors
complex(DP) :: strf (ngm, ntyp)
complex(DP), intent(in) :: strf (ngm, ntyp)
! input: structure factor
real(DP) :: ewald
! output: the ewald energy
@ -62,7 +61,9 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
! counter on atoms
! counter on atomic types
! number of R vectors included in r sum
integer :: na_s, na_e, mykey
! for parallelization of real-space sums
real(DP) :: charge, tpiba2, ewaldg, ewaldr, dtau (3), alpha, &
r (3, mxr), r2 (mxr), rmax, rr, upperbound, fact
! total ionic charge in the cell
@ -131,17 +132,24 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
endif
ENDIF
!
! R-space sum here (only for the processor that contains G=0)
! R-space sum here
!
! poor-man parallelization over atoms
! - if nproc_bgrp=1 : na_s=1, na_e=nat, mykey=0
! - if nproc_bgrp<=nat: each processor calculates atoms na_s to na_e; mykey=0
! - if nproc_bgrp>nat : each processor takes care of atom na_s=na_e;
! mykey labels how many times each atom appears (mykey=0 first time etc.)
!
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
ewaldr = 0.d0
if (gstart.eq.2) then
IF ( mykey == 0 ) THEN
rmax = 4.d0 / sqrt (alpha) / alat
!
! with this choice terms up to ZiZj*erfc(4) are counted (erfc(4)=2x10^-8
!
do na = 1, nat
do na = na_s, na_e
do nb = 1, nat
dtau (:) = tau (:, na) - tau (:, nb)
dtau (:) = tau (:, na) - tau (:, nb)
!
! generates nearest-neighbors shells
!

View File

@ -16,23 +16,22 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
!
USE kinds
USE constants, ONLY : tpi, e2
USE mp_bands, ONLY : intra_bgrp_comm
USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
USE mp, ONLY : mp_sum
USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_force_ew
implicit none
!
! First the dummy variables
!
integer :: nat, ntyp, ngm, ityp (nat), gstart
integer, intent(in) :: nat, ntyp, ngm, ityp (nat), gstart
! input: the number of atoms
! input: the number of types of atom
! input: the number of G vectors
! input: the type of each atom
! input: first non-zero G vector
logical :: gamma_only
logical, intent(in) :: gamma_only
real(DP) :: factor, tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
at (3, 3), bg (3, 3), omega, gcutm, alat
! input: the coordinates of the atoms
! input: the G vectors
@ -44,10 +43,10 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! input: cut-off of g vectors
! input: the edge of the cell
!
complex(DP) :: strf (ngm, ntyp)
complex(DP), intent(in) :: strf (ngm, ntyp)
! input: the structure factor on the potential
!
real(DP) :: forceion (3, nat)
real(DP), intent(out) :: forceion (3, nat)
! output: the ewald part of the forces
!
integer, parameter :: mxr=50
@ -61,6 +60,7 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! counter on atomic types
! the number of R vectors for real space su
! counter on polarization
integer :: na_s, na_e, mykey
real(DP) :: sumnb, arg, tpiba2, alpha, dtau (3), r (3, mxr), &
r2 (mxr), rmax, rr, charge, upperbound, fact
@ -135,29 +135,30 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
enddo
enddo
deallocate (aux)
if (gstart == 1) goto 100
!
! R-space sum here (only for the processor that contains G=0)
! R-space sum here (see ewald.f90 for details on parallelization)
!
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
IF ( mykey > 0 ) GO TO 100
rmax = 5.d0 / (sqrt (alpha) * alat)
!
! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1
!
do na = 1, nat
do na = na_s, na_e
do nb = 1, nat
if (nb.eq.na) goto 50
dtau (:) = tau (:, na) - tau (:, nb)
dtau (:) = tau (:, na) - tau (:, nb)
!
! generates nearest-neighbors shells r(i)=R(i)-dtau(i)
!
call rgen (dtau, rmax, mxr, at, bg, r, r2, nrm)
do n = 1, nrm
rr = sqrt (r2 (n) ) * alat
factor = zv (ityp (na) ) * zv (ityp (nb) ) * e2 / rr**2 * &
fact = zv (ityp (na) ) * zv (ityp (nb) ) * e2 / rr**2 * &
(qe_erfc (sqrt (alpha) * rr) / rr + &
sqrt (8.0d0 * alpha / tpi) * exp ( - alpha * rr**2) ) * alat
do ipol = 1, 3
forceion (ipol, na) = forceion (ipol, na) - factor * r (ipol, n)
forceion (ipol, na) = forceion (ipol, na) - fact * r (ipol, n)
enddo
enddo
50 continue

View File

@ -15,7 +15,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
!
USE kinds
USE constants, only : tpi, e2, eps6
USE mp_bands, ONLY : intra_bgrp_comm
USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
USE mp, ONLY : mp_sum
USE Coul_cut_2D, ONLY: do_cutoff_2D, cutoff_stres_sigmaewa
@ -23,18 +23,17 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
!
! first the dummy variables
!
integer :: nat, ntyp, ityp (nat), ngm, gstart
integer, intent(in) :: nat, ntyp, ityp (nat), ngm, gstart
! input: number of atoms in the unit cell
! input: number of different types of atoms
! input: the type of each atom
! input: number of plane waves for G sum
! input: first nonzero g vector
logical :: gamma_only
logical, intent(in) :: gamma_only
real(DP) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
at (3, 3), bg (3, 3), omega, alat, gcutm, sigmaewa (3, 3)
real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
at (3, 3), bg (3, 3), omega, alat, gcutm
! input: the positions of the atoms in the cell
! input: the coordinates of G vectors
! input: the square moduli of G vectors
@ -44,6 +43,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! input: the volume of the unit cell
! input: measure of length
! input: cut-off of g vectors
real(DP), intent(out) :: sigmaewa (3, 3)
! output: the ewald stress
!
! here the local variables
@ -57,6 +57,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! counter on atoms
! counter on atoms
! number of R vectors included in r sum
integer :: na_s, na_e, mykey
real(DP) :: charge, arg, tpiba2, dtau (3), alpha, r (3, mxr), &
r2 (mxr), rmax, rr, upperbound, fact, fac, g2, g2a, sdewald, sewald
@ -137,25 +138,26 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
sigmaewa (l, l) = sigmaewa (l, l) + sdewald
enddo
!
! R-space sum here (only for the processor that contains G=0)
! R-space sum here (see ewald.f90 for details on parallelization)
!
if (gstart.eq.2) then
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
IF ( mykey == 0 ) THEN
rmax = 4.0d0 / sqrt (alpha) / alat
!
! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1
!
do na = 1, nat
do na = na_s, na_e
do nb = 1, nat
dtau (:) = tau (:, na) - tau (:, nb)
dtau (:) = tau (:, na) - tau (:, nb)
!
! generates nearest-neighbors shells r(i)=R(i)-dtau(i)
!
call rgen (dtau, rmax, mxr, at, bg, r, r2, nrm)
do nr = 1, nrm
rr = sqrt (r2 (nr) ) * alat
fac = - e2 / 2.0d0 / omega * alat**2 * zv (ityp (na) ) * &
fac = - e2 / 2.0_dp/ omega * alat**2 * zv (ityp (na) ) * &
zv ( ityp (nb) ) / rr**3 * (qe_erfc (sqrt (alpha) * rr) + &
rr * sqrt (8 * alpha / tpi) * exp ( - alpha * rr**2) )
rr * sqrt (8.0_dp * alpha / tpi) * exp ( - alpha * rr**2) )
do l = 1, 3
do m = 1, l
sigmaewa (l, m) = sigmaewa (l, m) + fac * r(l,nr) * r(m,nr)

View File

@ -7,7 +7,8 @@ MODFLAGS= $(BASEMOD_FLAGS) \
$(MOD_FLAG)../src
QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a \
../../KS_Solvers/libks_solvers.a \
../../UtilXlib/libutil.a
../../UtilXlib/libutil.a ../../LAXlib/libqela.a
# libqela required by xlf for obscure reasons
PWOBJS = ../src/libpw.a
TLDEPS= pwlibs

View File

@ -52,7 +52,8 @@ subroutine dvex(nu,dvy)
! write (*,*) mu, oc(mu), ocs
if ( mu == nu ) then
doc = 0.d0
if(AND((l1 /= 0), (ocs > 0.d0))) then
!if(AND((l1 /= 0), (ocs > 0.d0))) then
if((l1 /= 0).AND.(ocs > 0.d0)) then
i = int(ocs)
doc = (i*(2.d0*ocs-i-1.d0)/(half-1.d0) - ocs*ocs/half) * half/ocs
end if