mirror of https://gitlab.com/QEF/q-e.git
Merge branch 'myqe' into 'develop'
Ewald parallelization + xlf and misc fixes See merge request QEF/q-e!567
This commit is contained in:
commit
4d8e1feda5
|
@ -259,7 +259,7 @@
|
|||
! poor-man parallelization over bands
|
||||
! - if nproc_pool=1 : nb_s=1, nb_e=n, mykey=0
|
||||
! - if nproc_pool<=nbnd:each processor calculates band nb_s to nb_e; mykey=0
|
||||
! - if nproc_pool>nbnd :each processor takes care of band na_s=nb_e;
|
||||
! - if nproc_pool>nbnd :each processor takes care of band nb_s=nb_e;
|
||||
! mykey labels how many times each band appears (mykey=0 first time etc.)
|
||||
!
|
||||
CALL block_distribute( n, me_pool, nproc_pool, nb_s, nb_e, mykey )
|
||||
|
|
|
@ -56,7 +56,7 @@ SUBROUTINE loadqmesh_para
|
|||
!
|
||||
ELSEIF ( (nqf1.ne.0) .and. (nqf2.ne.0) .and. (nqf3.ne.0) ) THEN ! generate grid
|
||||
IF (mp_mesh_q) THEN
|
||||
IF (lscreen) CALL errore ('If lscreen=.true. do not use mp_mesh_q',1)
|
||||
IF (lscreen) CALL errore ('loadqmesh','If lscreen=.true. do not use mp_mesh_q',1)
|
||||
! get size of the mp_mesh in the irr wedge
|
||||
WRITE (stdout, '(a,3i4)') ' Using uniform MP q-mesh: ', nqf1, nqf2, nqf3
|
||||
call set_sym_bl ( )
|
||||
|
@ -226,7 +226,7 @@ SUBROUTINE loadqmesh_serial
|
|||
!
|
||||
ELSEIF ( (nqf1.ne.0) .and. (nqf2.ne.0) .and. (nqf3.ne.0) ) THEN ! generate grid
|
||||
IF (mp_mesh_q) THEN
|
||||
IF (lscreen) CALL errore ('If lscreen=.true. do not use mp_mesh_q',1)
|
||||
IF (lscreen) CALL errore ('loadqmesh','If lscreen=.true. do not use mp_mesh_q',1)
|
||||
! get size of the mp_mesh in the irr wedge
|
||||
WRITE (stdout, '(a,3i4)') ' Using uniform q-mesh: ', nqf1, nqf2, nqf3
|
||||
call set_sym_bl ( )
|
||||
|
|
|
@ -47,7 +47,9 @@ cgsolve_all_gamma.o \
|
|||
realus.o
|
||||
|
||||
QEMODS = ../../Modules/libqemod.a ../../KS_Solvers/libks_solvers.a \
|
||||
../../FFTXlib/libqefft.a ../../LAXlib/libqela.a ../../UtilXlib/libutil.a
|
||||
../../FFTXlib/libqefft.a ../../LAXlib/libqela.a \
|
||||
../../UtilXlib/libutil.a ../../dft-d3/libdftd3qe.a
|
||||
# dft-d3 required by xlf for obscure reasons
|
||||
PWOBJS = ../../PW/src/libpw.a
|
||||
|
||||
LIBMIN=
|
||||
|
|
|
@ -24,7 +24,9 @@ SIMPLEOBJS = \
|
|||
|
||||
QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a \
|
||||
../../KS_Solvers/libks_solvers.a \
|
||||
../../LAXlib/libqela.a ../../UtilXlib/libutil.a
|
||||
../../LAXlib/libqela.a ../../UtilXlib/libutil.a \
|
||||
../../dft-d3/libdftd3qe.a
|
||||
# dft-d3 required by xlf for obscure reasons
|
||||
|
||||
PWOBJS = ../../PW/src/libpw.a
|
||||
GWWOBJ = ../gww/libgww.a
|
||||
|
|
|
@ -17,7 +17,7 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
|
|||
!
|
||||
USE kinds
|
||||
USE constants, ONLY : tpi, e2
|
||||
USE mp_bands, ONLY : intra_bgrp_comm
|
||||
USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
|
||||
USE mp, ONLY : mp_sum
|
||||
USE martyna_tuckerman, ONLY : wg_corr_ewald, do_comp_mt
|
||||
USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_ewald
|
||||
|
@ -25,17 +25,16 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
|
|||
!
|
||||
! first the dummy variables
|
||||
!
|
||||
|
||||
integer :: nat, ntyp, ityp (nat), ngm, gstart
|
||||
integer, intent(in) :: nat, ntyp, ityp (nat), ngm, gstart
|
||||
! input: number of atoms in the unit cell
|
||||
! input: number of different types of atoms
|
||||
! input: the type of each atom
|
||||
! input: number of plane waves for G sum
|
||||
! input: first non-zero G vector
|
||||
|
||||
logical :: gamma_only
|
||||
logical, intent(in) :: gamma_only
|
||||
|
||||
real(DP) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
|
||||
real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
|
||||
at (3, 3), bg (3, 3), omega, alat, gcutm
|
||||
! input: the positions of the atoms in the cell
|
||||
! input: the coordinates of G vectors
|
||||
|
@ -46,7 +45,7 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
|
|||
! input: the volume of the unit cell
|
||||
! input: lattice parameter
|
||||
! input: cut-off of g vectors
|
||||
complex(DP) :: strf (ngm, ntyp)
|
||||
complex(DP), intent(in) :: strf (ngm, ntyp)
|
||||
! input: structure factor
|
||||
real(DP) :: ewald
|
||||
! output: the ewald energy
|
||||
|
@ -62,7 +61,9 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
|
|||
! counter on atoms
|
||||
! counter on atomic types
|
||||
! number of R vectors included in r sum
|
||||
|
||||
integer :: na_s, na_e, mykey
|
||||
! for parallelization of real-space sums
|
||||
|
||||
real(DP) :: charge, tpiba2, ewaldg, ewaldr, dtau (3), alpha, &
|
||||
r (3, mxr), r2 (mxr), rmax, rr, upperbound, fact
|
||||
! total ionic charge in the cell
|
||||
|
@ -131,17 +132,24 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, &
|
|||
endif
|
||||
ENDIF
|
||||
!
|
||||
! R-space sum here (only for the processor that contains G=0)
|
||||
! R-space sum here
|
||||
!
|
||||
! poor-man parallelization over atoms
|
||||
! - if nproc_bgrp=1 : na_s=1, na_e=nat, mykey=0
|
||||
! - if nproc_bgrp<=nat: each processor calculates atoms na_s to na_e; mykey=0
|
||||
! - if nproc_bgrp>nat : each processor takes care of atom na_s=na_e;
|
||||
! mykey labels how many times each atom appears (mykey=0 first time etc.)
|
||||
!
|
||||
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
|
||||
ewaldr = 0.d0
|
||||
if (gstart.eq.2) then
|
||||
IF ( mykey == 0 ) THEN
|
||||
rmax = 4.d0 / sqrt (alpha) / alat
|
||||
!
|
||||
! with this choice terms up to ZiZj*erfc(4) are counted (erfc(4)=2x10^-8
|
||||
!
|
||||
do na = 1, nat
|
||||
do na = na_s, na_e
|
||||
do nb = 1, nat
|
||||
dtau (:) = tau (:, na) - tau (:, nb)
|
||||
dtau (:) = tau (:, na) - tau (:, nb)
|
||||
!
|
||||
! generates nearest-neighbors shells
|
||||
!
|
||||
|
|
|
@ -16,23 +16,22 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
!
|
||||
USE kinds
|
||||
USE constants, ONLY : tpi, e2
|
||||
USE mp_bands, ONLY : intra_bgrp_comm
|
||||
USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
|
||||
USE mp, ONLY : mp_sum
|
||||
USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_force_ew
|
||||
implicit none
|
||||
!
|
||||
! First the dummy variables
|
||||
!
|
||||
|
||||
integer :: nat, ntyp, ngm, ityp (nat), gstart
|
||||
integer, intent(in) :: nat, ntyp, ngm, ityp (nat), gstart
|
||||
! input: the number of atoms
|
||||
! input: the number of types of atom
|
||||
! input: the number of G vectors
|
||||
! input: the type of each atom
|
||||
! input: first non-zero G vector
|
||||
logical :: gamma_only
|
||||
logical, intent(in) :: gamma_only
|
||||
|
||||
real(DP) :: factor, tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
|
||||
real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
|
||||
at (3, 3), bg (3, 3), omega, gcutm, alat
|
||||
! input: the coordinates of the atoms
|
||||
! input: the G vectors
|
||||
|
@ -44,10 +43,10 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
! input: cut-off of g vectors
|
||||
! input: the edge of the cell
|
||||
!
|
||||
complex(DP) :: strf (ngm, ntyp)
|
||||
complex(DP), intent(in) :: strf (ngm, ntyp)
|
||||
! input: the structure factor on the potential
|
||||
!
|
||||
real(DP) :: forceion (3, nat)
|
||||
real(DP), intent(out) :: forceion (3, nat)
|
||||
! output: the ewald part of the forces
|
||||
!
|
||||
integer, parameter :: mxr=50
|
||||
|
@ -61,6 +60,7 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
! counter on atomic types
|
||||
! the number of R vectors for real space su
|
||||
! counter on polarization
|
||||
integer :: na_s, na_e, mykey
|
||||
|
||||
real(DP) :: sumnb, arg, tpiba2, alpha, dtau (3), r (3, mxr), &
|
||||
r2 (mxr), rmax, rr, charge, upperbound, fact
|
||||
|
@ -135,29 +135,30 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
enddo
|
||||
enddo
|
||||
deallocate (aux)
|
||||
if (gstart == 1) goto 100
|
||||
!
|
||||
! R-space sum here (only for the processor that contains G=0)
|
||||
! R-space sum here (see ewald.f90 for details on parallelization)
|
||||
!
|
||||
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
|
||||
IF ( mykey > 0 ) GO TO 100
|
||||
rmax = 5.d0 / (sqrt (alpha) * alat)
|
||||
!
|
||||
! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1
|
||||
!
|
||||
do na = 1, nat
|
||||
do na = na_s, na_e
|
||||
do nb = 1, nat
|
||||
if (nb.eq.na) goto 50
|
||||
dtau (:) = tau (:, na) - tau (:, nb)
|
||||
dtau (:) = tau (:, na) - tau (:, nb)
|
||||
!
|
||||
! generates nearest-neighbors shells r(i)=R(i)-dtau(i)
|
||||
!
|
||||
call rgen (dtau, rmax, mxr, at, bg, r, r2, nrm)
|
||||
do n = 1, nrm
|
||||
rr = sqrt (r2 (n) ) * alat
|
||||
factor = zv (ityp (na) ) * zv (ityp (nb) ) * e2 / rr**2 * &
|
||||
fact = zv (ityp (na) ) * zv (ityp (nb) ) * e2 / rr**2 * &
|
||||
(qe_erfc (sqrt (alpha) * rr) / rr + &
|
||||
sqrt (8.0d0 * alpha / tpi) * exp ( - alpha * rr**2) ) * alat
|
||||
do ipol = 1, 3
|
||||
forceion (ipol, na) = forceion (ipol, na) - factor * r (ipol, n)
|
||||
forceion (ipol, na) = forceion (ipol, na) - fact * r (ipol, n)
|
||||
enddo
|
||||
enddo
|
||||
50 continue
|
||||
|
|
|
@ -15,7 +15,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
!
|
||||
USE kinds
|
||||
USE constants, only : tpi, e2, eps6
|
||||
USE mp_bands, ONLY : intra_bgrp_comm
|
||||
USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
|
||||
USE mp, ONLY : mp_sum
|
||||
USE Coul_cut_2D, ONLY: do_cutoff_2D, cutoff_stres_sigmaewa
|
||||
|
||||
|
@ -23,18 +23,17 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
!
|
||||
! first the dummy variables
|
||||
!
|
||||
|
||||
integer :: nat, ntyp, ityp (nat), ngm, gstart
|
||||
integer, intent(in) :: nat, ntyp, ityp (nat), ngm, gstart
|
||||
! input: number of atoms in the unit cell
|
||||
! input: number of different types of atoms
|
||||
! input: the type of each atom
|
||||
! input: number of plane waves for G sum
|
||||
! input: first nonzero g vector
|
||||
|
||||
logical :: gamma_only
|
||||
logical, intent(in) :: gamma_only
|
||||
|
||||
real(DP) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
|
||||
at (3, 3), bg (3, 3), omega, alat, gcutm, sigmaewa (3, 3)
|
||||
real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), &
|
||||
at (3, 3), bg (3, 3), omega, alat, gcutm
|
||||
! input: the positions of the atoms in the cell
|
||||
! input: the coordinates of G vectors
|
||||
! input: the square moduli of G vectors
|
||||
|
@ -44,6 +43,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
! input: the volume of the unit cell
|
||||
! input: measure of length
|
||||
! input: cut-off of g vectors
|
||||
real(DP), intent(out) :: sigmaewa (3, 3)
|
||||
! output: the ewald stress
|
||||
!
|
||||
! here the local variables
|
||||
|
@ -57,6 +57,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
! counter on atoms
|
||||
! counter on atoms
|
||||
! number of R vectors included in r sum
|
||||
integer :: na_s, na_e, mykey
|
||||
|
||||
real(DP) :: charge, arg, tpiba2, dtau (3), alpha, r (3, mxr), &
|
||||
r2 (mxr), rmax, rr, upperbound, fact, fac, g2, g2a, sdewald, sewald
|
||||
|
@ -137,25 +138,26 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
sigmaewa (l, l) = sigmaewa (l, l) + sdewald
|
||||
enddo
|
||||
!
|
||||
! R-space sum here (only for the processor that contains G=0)
|
||||
! R-space sum here (see ewald.f90 for details on parallelization)
|
||||
!
|
||||
if (gstart.eq.2) then
|
||||
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
|
||||
IF ( mykey == 0 ) THEN
|
||||
rmax = 4.0d0 / sqrt (alpha) / alat
|
||||
!
|
||||
! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1
|
||||
!
|
||||
do na = 1, nat
|
||||
do na = na_s, na_e
|
||||
do nb = 1, nat
|
||||
dtau (:) = tau (:, na) - tau (:, nb)
|
||||
dtau (:) = tau (:, na) - tau (:, nb)
|
||||
!
|
||||
! generates nearest-neighbors shells r(i)=R(i)-dtau(i)
|
||||
!
|
||||
call rgen (dtau, rmax, mxr, at, bg, r, r2, nrm)
|
||||
do nr = 1, nrm
|
||||
rr = sqrt (r2 (nr) ) * alat
|
||||
fac = - e2 / 2.0d0 / omega * alat**2 * zv (ityp (na) ) * &
|
||||
fac = - e2 / 2.0_dp/ omega * alat**2 * zv (ityp (na) ) * &
|
||||
zv ( ityp (nb) ) / rr**3 * (qe_erfc (sqrt (alpha) * rr) + &
|
||||
rr * sqrt (8 * alpha / tpi) * exp ( - alpha * rr**2) )
|
||||
rr * sqrt (8.0_dp * alpha / tpi) * exp ( - alpha * rr**2) )
|
||||
do l = 1, 3
|
||||
do m = 1, l
|
||||
sigmaewa (l, m) = sigmaewa (l, m) + fac * r(l,nr) * r(m,nr)
|
||||
|
|
|
@ -7,7 +7,8 @@ MODFLAGS= $(BASEMOD_FLAGS) \
|
|||
$(MOD_FLAG)../src
|
||||
QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a \
|
||||
../../KS_Solvers/libks_solvers.a \
|
||||
../../UtilXlib/libutil.a
|
||||
../../UtilXlib/libutil.a ../../LAXlib/libqela.a
|
||||
# libqela required by xlf for obscure reasons
|
||||
PWOBJS = ../src/libpw.a
|
||||
|
||||
TLDEPS= pwlibs
|
||||
|
|
|
@ -52,7 +52,8 @@ subroutine dvex(nu,dvy)
|
|||
! write (*,*) mu, oc(mu), ocs
|
||||
if ( mu == nu ) then
|
||||
doc = 0.d0
|
||||
if(AND((l1 /= 0), (ocs > 0.d0))) then
|
||||
!if(AND((l1 /= 0), (ocs > 0.d0))) then
|
||||
if((l1 /= 0).AND.(ocs > 0.d0)) then
|
||||
i = int(ocs)
|
||||
doc = (i*(2.d0*ocs-i-1.d0)/(half-1.d0) - ocs*ocs/half) * half/ocs
|
||||
end if
|
||||
|
|
Loading…
Reference in New Issue