Merge branch 'myqe' into 'develop'

Ewald parallelization + xlf and misc fixes

See merge request QEF/q-e!567
This commit is contained in:
giannozz 2019-08-22 09:04:24 +00:00
commit 4d8e1feda5
9 changed files with 60 additions and 43 deletions

View File

@ -259,7 +259,7 @@
! poor-man parallelization over bands ! poor-man parallelization over bands
! - if nproc_pool=1 : nb_s=1, nb_e=n, mykey=0 ! - if nproc_pool=1 : nb_s=1, nb_e=n, mykey=0
! - if nproc_pool<=nbnd:each processor calculates band nb_s to nb_e; mykey=0 ! - if nproc_pool<=nbnd:each processor calculates band nb_s to nb_e; mykey=0
! - if nproc_pool>nbnd :each processor takes care of band na_s=nb_e; ! - if nproc_pool>nbnd :each processor takes care of band nb_s=nb_e;
! mykey labels how many times each band appears (mykey=0 first time etc.) ! mykey labels how many times each band appears (mykey=0 first time etc.)
! !
CALL block_distribute( n, me_pool, nproc_pool, nb_s, nb_e, mykey ) CALL block_distribute( n, me_pool, nproc_pool, nb_s, nb_e, mykey )

View File

@ -56,7 +56,7 @@ SUBROUTINE loadqmesh_para
! !
ELSEIF ( (nqf1.ne.0) .and. (nqf2.ne.0) .and. (nqf3.ne.0) ) THEN ! generate grid ELSEIF ( (nqf1.ne.0) .and. (nqf2.ne.0) .and. (nqf3.ne.0) ) THEN ! generate grid
IF (mp_mesh_q) THEN IF (mp_mesh_q) THEN
IF (lscreen) CALL errore ('If lscreen=.true. do not use mp_mesh_q',1) IF (lscreen) CALL errore ('loadqmesh','If lscreen=.true. do not use mp_mesh_q',1)
! get size of the mp_mesh in the irr wedge ! get size of the mp_mesh in the irr wedge
WRITE (stdout, '(a,3i4)') ' Using uniform MP q-mesh: ', nqf1, nqf2, nqf3 WRITE (stdout, '(a,3i4)') ' Using uniform MP q-mesh: ', nqf1, nqf2, nqf3
call set_sym_bl ( ) call set_sym_bl ( )
@ -226,7 +226,7 @@ SUBROUTINE loadqmesh_serial
! !
ELSEIF ( (nqf1.ne.0) .and. (nqf2.ne.0) .and. (nqf3.ne.0) ) THEN ! generate grid ELSEIF ( (nqf1.ne.0) .and. (nqf2.ne.0) .and. (nqf3.ne.0) ) THEN ! generate grid
IF (mp_mesh_q) THEN IF (mp_mesh_q) THEN
IF (lscreen) CALL errore ('If lscreen=.true. do not use mp_mesh_q',1) IF (lscreen) CALL errore ('loadqmesh','If lscreen=.true. do not use mp_mesh_q',1)
! get size of the mp_mesh in the irr wedge ! get size of the mp_mesh in the irr wedge
WRITE (stdout, '(a,3i4)') ' Using uniform q-mesh: ', nqf1, nqf2, nqf3 WRITE (stdout, '(a,3i4)') ' Using uniform q-mesh: ', nqf1, nqf2, nqf3
call set_sym_bl ( ) call set_sym_bl ( )

View File

@ -47,7 +47,9 @@ cgsolve_all_gamma.o \
realus.o realus.o
QEMODS = ../../Modules/libqemod.a ../../KS_Solvers/libks_solvers.a \ QEMODS = ../../Modules/libqemod.a ../../KS_Solvers/libks_solvers.a \
../../FFTXlib/libqefft.a ../../LAXlib/libqela.a ../../UtilXlib/libutil.a ../../FFTXlib/libqefft.a ../../LAXlib/libqela.a \
../../UtilXlib/libutil.a ../../dft-d3/libdftd3qe.a
# dft-d3 required by xlf for obscure reasons
PWOBJS = ../../PW/src/libpw.a PWOBJS = ../../PW/src/libpw.a
LIBMIN= LIBMIN=

View File

@ -24,7 +24,9 @@ SIMPLEOBJS = \
QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a \ QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a \
../../KS_Solvers/libks_solvers.a \ ../../KS_Solvers/libks_solvers.a \
../../LAXlib/libqela.a ../../UtilXlib/libutil.a ../../LAXlib/libqela.a ../../UtilXlib/libutil.a \
../../dft-d3/libdftd3qe.a
# dft-d3 required by xlf for obscure reasons
PWOBJS = ../../PW/src/libpw.a PWOBJS = ../../PW/src/libpw.a
GWWOBJ = ../gww/libgww.a GWWOBJ = ../gww/libgww.a

View File

@ -17,7 +17,7 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
! !
USE kinds USE kinds
USE constants, ONLY : tpi, e2 USE constants, ONLY : tpi, e2
USE mp_bands, ONLY : intra_bgrp_comm USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
USE mp, ONLY : mp_sum USE mp, ONLY : mp_sum
USE martyna_tuckerman, ONLY : wg_corr_ewald, do_comp_mt USE martyna_tuckerman, ONLY : wg_corr_ewald, do_comp_mt
USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_ewald USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_ewald
@ -25,17 +25,16 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
! !
! first the dummy variables ! first the dummy variables
! !
integer, intent(in) :: nat, ntyp, ityp (nat), ngm, gstart
integer :: nat, ntyp, ityp (nat), ngm, gstart
! input: number of atoms in the unit cell ! input: number of atoms in the unit cell
! input: number of different types of atoms ! input: number of different types of atoms
! input: the type of each atom ! input: the type of each atom
! input: number of plane waves for G sum ! input: number of plane waves for G sum
! input: first non-zero G vector ! input: first non-zero G vector
logical :: gamma_only logical, intent(in) :: gamma_only
real(DP) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), & real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
at (3, 3), bg (3, 3), omega, alat, gcutm at (3, 3), bg (3, 3), omega, alat, gcutm
! input: the positions of the atoms in the cell ! input: the positions of the atoms in the cell
! input: the coordinates of G vectors ! input: the coordinates of G vectors
@ -46,7 +45,7 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
! input: the volume of the unit cell ! input: the volume of the unit cell
! input: lattice parameter ! input: lattice parameter
! input: cut-off of g vectors ! input: cut-off of g vectors
complex(DP) :: strf (ngm, ntyp) complex(DP), intent(in) :: strf (ngm, ntyp)
! input: structure factor ! input: structure factor
real(DP) :: ewald real(DP) :: ewald
! output: the ewald energy ! output: the ewald energy
@ -62,7 +61,9 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
! counter on atoms ! counter on atoms
! counter on atomic types ! counter on atomic types
! number of R vectors included in r sum ! number of R vectors included in r sum
integer :: na_s, na_e, mykey
! for parallelization of real-space sums
real(DP) :: charge, tpiba2, ewaldg, ewaldr, dtau (3), alpha, & real(DP) :: charge, tpiba2, ewaldg, ewaldr, dtau (3), alpha, &
r (3, mxr), r2 (mxr), rmax, rr, upperbound, fact r (3, mxr), r2 (mxr), rmax, rr, upperbound, fact
! total ionic charge in the cell ! total ionic charge in the cell
@ -131,17 +132,24 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
endif endif
ENDIF ENDIF
! !
! R-space sum here (only for the processor that contains G=0) ! R-space sum here
! !
! poor-man parallelization over atoms
! - if nproc_bgrp=1 : na_s=1, na_e=nat, mykey=0
! - if nproc_bgrp<=nat: each processor calculates atoms na_s to na_e; mykey=0
! - if nproc_bgrp>nat : each processor takes care of atom na_s=na_e;
! mykey labels how many times each atom appears (mykey=0 first time etc.)
!
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
ewaldr = 0.d0 ewaldr = 0.d0
if (gstart.eq.2) then IF ( mykey == 0 ) THEN
rmax = 4.d0 / sqrt (alpha) / alat rmax = 4.d0 / sqrt (alpha) / alat
! !
! with this choice terms up to ZiZj*erfc(4) are counted (erfc(4)=2x10^-8 ! with this choice terms up to ZiZj*erfc(4) are counted (erfc(4)=2x10^-8
! !
do na = 1, nat do na = na_s, na_e
do nb = 1, nat do nb = 1, nat
dtau (:) = tau (:, na) - tau (:, nb) dtau (:) = tau (:, na) - tau (:, nb)
! !
! generates nearest-neighbors shells ! generates nearest-neighbors shells
! !

View File

@ -16,23 +16,22 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! !
USE kinds USE kinds
USE constants, ONLY : tpi, e2 USE constants, ONLY : tpi, e2
USE mp_bands, ONLY : intra_bgrp_comm USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
USE mp, ONLY : mp_sum USE mp, ONLY : mp_sum
USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_force_ew USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_force_ew
implicit none implicit none
! !
! First the dummy variables ! First the dummy variables
! !
integer, intent(in) :: nat, ntyp, ngm, ityp (nat), gstart
integer :: nat, ntyp, ngm, ityp (nat), gstart
! input: the number of atoms ! input: the number of atoms
! input: the number of types of atom ! input: the number of types of atom
! input: the number of G vectors ! input: the number of G vectors
! input: the type of each atom ! input: the type of each atom
! input: first non-zero G vector ! input: first non-zero G vector
logical :: gamma_only logical, intent(in) :: gamma_only
real(DP) :: factor, tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), & real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
at (3, 3), bg (3, 3), omega, gcutm, alat at (3, 3), bg (3, 3), omega, gcutm, alat
! input: the coordinates of the atoms ! input: the coordinates of the atoms
! input: the G vectors ! input: the G vectors
@ -44,10 +43,10 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! input: cut-off of g vectors ! input: cut-off of g vectors
! input: the edge of the cell ! input: the edge of the cell
! !
complex(DP) :: strf (ngm, ntyp) complex(DP), intent(in) :: strf (ngm, ntyp)
! input: the structure factor on the potential ! input: the structure factor on the potential
! !
real(DP) :: forceion (3, nat) real(DP), intent(out) :: forceion (3, nat)
! output: the ewald part of the forces ! output: the ewald part of the forces
! !
integer, parameter :: mxr=50 integer, parameter :: mxr=50
@ -61,6 +60,7 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! counter on atomic types ! counter on atomic types
! the number of R vectors for real space su ! the number of R vectors for real space su
! counter on polarization ! counter on polarization
integer :: na_s, na_e, mykey
real(DP) :: sumnb, arg, tpiba2, alpha, dtau (3), r (3, mxr), & real(DP) :: sumnb, arg, tpiba2, alpha, dtau (3), r (3, mxr), &
r2 (mxr), rmax, rr, charge, upperbound, fact r2 (mxr), rmax, rr, charge, upperbound, fact
@ -135,29 +135,30 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
enddo enddo
enddo enddo
deallocate (aux) deallocate (aux)
if (gstart == 1) goto 100
! !
! R-space sum here (only for the processor that contains G=0) ! R-space sum here (see ewald.f90 for details on parallelization)
! !
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
IF ( mykey > 0 ) GO TO 100
rmax = 5.d0 / (sqrt (alpha) * alat) rmax = 5.d0 / (sqrt (alpha) * alat)
! !
! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1 ! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1
! !
do na = 1, nat do na = na_s, na_e
do nb = 1, nat do nb = 1, nat
if (nb.eq.na) goto 50 if (nb.eq.na) goto 50
dtau (:) = tau (:, na) - tau (:, nb) dtau (:) = tau (:, na) - tau (:, nb)
! !
! generates nearest-neighbors shells r(i)=R(i)-dtau(i) ! generates nearest-neighbors shells r(i)=R(i)-dtau(i)
! !
call rgen (dtau, rmax, mxr, at, bg, r, r2, nrm) call rgen (dtau, rmax, mxr, at, bg, r, r2, nrm)
do n = 1, nrm do n = 1, nrm
rr = sqrt (r2 (n) ) * alat rr = sqrt (r2 (n) ) * alat
factor = zv (ityp (na) ) * zv (ityp (nb) ) * e2 / rr**2 * & fact = zv (ityp (na) ) * zv (ityp (nb) ) * e2 / rr**2 * &
(qe_erfc (sqrt (alpha) * rr) / rr + & (qe_erfc (sqrt (alpha) * rr) / rr + &
sqrt (8.0d0 * alpha / tpi) * exp ( - alpha * rr**2) ) * alat sqrt (8.0d0 * alpha / tpi) * exp ( - alpha * rr**2) ) * alat
do ipol = 1, 3 do ipol = 1, 3
forceion (ipol, na) = forceion (ipol, na) - factor * r (ipol, n) forceion (ipol, na) = forceion (ipol, na) - fact * r (ipol, n)
enddo enddo
enddo enddo
50 continue 50 continue

View File

@ -15,7 +15,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! !
USE kinds USE kinds
USE constants, only : tpi, e2, eps6 USE constants, only : tpi, e2, eps6
USE mp_bands, ONLY : intra_bgrp_comm USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
USE mp, ONLY : mp_sum USE mp, ONLY : mp_sum
USE Coul_cut_2D, ONLY: do_cutoff_2D, cutoff_stres_sigmaewa USE Coul_cut_2D, ONLY: do_cutoff_2D, cutoff_stres_sigmaewa
@ -23,18 +23,17 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! !
! first the dummy variables ! first the dummy variables
! !
integer, intent(in) :: nat, ntyp, ityp (nat), ngm, gstart
integer :: nat, ntyp, ityp (nat), ngm, gstart
! input: number of atoms in the unit cell ! input: number of atoms in the unit cell
! input: number of different types of atoms ! input: number of different types of atoms
! input: the type of each atom ! input: the type of each atom
! input: number of plane waves for G sum ! input: number of plane waves for G sum
! input: first nonzero g vector ! input: first nonzero g vector
logical :: gamma_only logical, intent(in) :: gamma_only
real(DP) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), & real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
at (3, 3), bg (3, 3), omega, alat, gcutm, sigmaewa (3, 3) at (3, 3), bg (3, 3), omega, alat, gcutm
! input: the positions of the atoms in the cell ! input: the positions of the atoms in the cell
! input: the coordinates of G vectors ! input: the coordinates of G vectors
! input: the square moduli of G vectors ! input: the square moduli of G vectors
@ -44,6 +43,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! input: the volume of the unit cell ! input: the volume of the unit cell
! input: measure of length ! input: measure of length
! input: cut-off of g vectors ! input: cut-off of g vectors
real(DP), intent(out) :: sigmaewa (3, 3)
! output: the ewald stress ! output: the ewald stress
! !
! here the local variables ! here the local variables
@ -57,6 +57,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
! counter on atoms ! counter on atoms
! counter on atoms ! counter on atoms
! number of R vectors included in r sum ! number of R vectors included in r sum
integer :: na_s, na_e, mykey
real(DP) :: charge, arg, tpiba2, dtau (3), alpha, r (3, mxr), & real(DP) :: charge, arg, tpiba2, dtau (3), alpha, r (3, mxr), &
r2 (mxr), rmax, rr, upperbound, fact, fac, g2, g2a, sdewald, sewald r2 (mxr), rmax, rr, upperbound, fact, fac, g2, g2a, sdewald, sewald
@ -137,25 +138,26 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
sigmaewa (l, l) = sigmaewa (l, l) + sdewald sigmaewa (l, l) = sigmaewa (l, l) + sdewald
enddo enddo
! !
! R-space sum here (only for the processor that contains G=0) ! R-space sum here (see ewald.f90 for details on parallelization)
! !
if (gstart.eq.2) then CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
IF ( mykey == 0 ) THEN
rmax = 4.0d0 / sqrt (alpha) / alat rmax = 4.0d0 / sqrt (alpha) / alat
! !
! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1 ! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1
! !
do na = 1, nat do na = na_s, na_e
do nb = 1, nat do nb = 1, nat
dtau (:) = tau (:, na) - tau (:, nb) dtau (:) = tau (:, na) - tau (:, nb)
! !
! generates nearest-neighbors shells r(i)=R(i)-dtau(i) ! generates nearest-neighbors shells r(i)=R(i)-dtau(i)
! !
call rgen (dtau, rmax, mxr, at, bg, r, r2, nrm) call rgen (dtau, rmax, mxr, at, bg, r, r2, nrm)
do nr = 1, nrm do nr = 1, nrm
rr = sqrt (r2 (nr) ) * alat rr = sqrt (r2 (nr) ) * alat
fac = - e2 / 2.0d0 / omega * alat**2 * zv (ityp (na) ) * & fac = - e2 / 2.0_dp/ omega * alat**2 * zv (ityp (na) ) * &
zv ( ityp (nb) ) / rr**3 * (qe_erfc (sqrt (alpha) * rr) + & zv ( ityp (nb) ) / rr**3 * (qe_erfc (sqrt (alpha) * rr) + &
rr * sqrt (8 * alpha / tpi) * exp ( - alpha * rr**2) ) rr * sqrt (8.0_dp * alpha / tpi) * exp ( - alpha * rr**2) )
do l = 1, 3 do l = 1, 3
do m = 1, l do m = 1, l
sigmaewa (l, m) = sigmaewa (l, m) + fac * r(l,nr) * r(m,nr) sigmaewa (l, m) = sigmaewa (l, m) + fac * r(l,nr) * r(m,nr)

View File

@ -7,7 +7,8 @@ MODFLAGS= $(BASEMOD_FLAGS) \
$(MOD_FLAG)../src $(MOD_FLAG)../src
QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a \ QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a \
../../KS_Solvers/libks_solvers.a \ ../../KS_Solvers/libks_solvers.a \
../../UtilXlib/libutil.a ../../UtilXlib/libutil.a ../../LAXlib/libqela.a
# libqela required by xlf for obscure reasons
PWOBJS = ../src/libpw.a PWOBJS = ../src/libpw.a
TLDEPS= pwlibs TLDEPS= pwlibs

View File

@ -52,7 +52,8 @@ subroutine dvex(nu,dvy)
! write (*,*) mu, oc(mu), ocs ! write (*,*) mu, oc(mu), ocs
if ( mu == nu ) then if ( mu == nu ) then
doc = 0.d0 doc = 0.d0
if(AND((l1 /= 0), (ocs > 0.d0))) then !if(AND((l1 /= 0), (ocs > 0.d0))) then
if((l1 /= 0).AND.(ocs > 0.d0)) then
i = int(ocs) i = int(ocs)
doc = (i*(2.d0*ocs-i-1.d0)/(half-1.d0) - ocs*ocs/half) * half/ocs doc = (i*(2.d0*ocs-i-1.d0)/(half-1.d0) - ocs*ocs/half) * half/ocs
end if end if