diff --git a/CPV/src/ldaU.f90 b/CPV/src/ldaU.f90 index 9272fed0f..e8ab65524 100644 --- a/CPV/src/ldaU.f90 +++ b/CPV/src/ldaU.f90 @@ -259,7 +259,7 @@ ! poor-man parallelization over bands ! - if nproc_pool=1 : nb_s=1, nb_e=n, mykey=0 ! - if nproc_pool<=nbnd:each processor calculates band nb_s to nb_e; mykey=0 - ! - if nproc_pool>nbnd :each processor takes care of band na_s=nb_e; + ! - if nproc_pool>nbnd :each processor takes care of band nb_s=nb_e; ! mykey labels how many times each band appears (mykey=0 first time etc.) ! CALL block_distribute( n, me_pool, nproc_pool, nb_s, nb_e, mykey ) diff --git a/EPW/src/loadqmesh.f90 b/EPW/src/loadqmesh.f90 index dc13e7c71..af095e50a 100644 --- a/EPW/src/loadqmesh.f90 +++ b/EPW/src/loadqmesh.f90 @@ -56,7 +56,7 @@ SUBROUTINE loadqmesh_para ! ELSEIF ( (nqf1.ne.0) .and. (nqf2.ne.0) .and. (nqf3.ne.0) ) THEN ! generate grid IF (mp_mesh_q) THEN - IF (lscreen) CALL errore ('If lscreen=.true. do not use mp_mesh_q',1) + IF (lscreen) CALL errore ('loadqmesh','If lscreen=.true. do not use mp_mesh_q',1) ! get size of the mp_mesh in the irr wedge WRITE (stdout, '(a,3i4)') ' Using uniform MP q-mesh: ', nqf1, nqf2, nqf3 call set_sym_bl ( ) @@ -226,7 +226,7 @@ SUBROUTINE loadqmesh_serial ! ELSEIF ( (nqf1.ne.0) .and. (nqf2.ne.0) .and. (nqf3.ne.0) ) THEN ! generate grid IF (mp_mesh_q) THEN - IF (lscreen) CALL errore ('If lscreen=.true. do not use mp_mesh_q',1) + IF (lscreen) CALL errore ('loadqmesh','If lscreen=.true. do not use mp_mesh_q',1) ! get size of the mp_mesh in the irr wedge WRITE (stdout, '(a,3i4)') ' Using uniform q-mesh: ', nqf1, nqf2, nqf3 call set_sym_bl ( ) diff --git a/GWW/pw4gww/Makefile b/GWW/pw4gww/Makefile index 5458471fb..c62eb9387 100644 --- a/GWW/pw4gww/Makefile +++ b/GWW/pw4gww/Makefile @@ -47,7 +47,9 @@ cgsolve_all_gamma.o \ realus.o QEMODS = ../../Modules/libqemod.a ../../KS_Solvers/libks_solvers.a \ - ../../FFTXlib/libqefft.a ../../LAXlib/libqela.a ../../UtilXlib/libutil.a + ../../FFTXlib/libqefft.a ../../LAXlib/libqela.a \ + ../../UtilXlib/libutil.a ../../dft-d3/libdftd3qe.a +# dft-d3 required by xlf for obscure reasons PWOBJS = ../../PW/src/libpw.a LIBMIN= diff --git a/GWW/simple/Makefile b/GWW/simple/Makefile index 1ffa6c878..50ad6872f 100644 --- a/GWW/simple/Makefile +++ b/GWW/simple/Makefile @@ -24,7 +24,9 @@ SIMPLEOBJS = \ QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a \ ../../KS_Solvers/libks_solvers.a \ - ../../LAXlib/libqela.a ../../UtilXlib/libutil.a + ../../LAXlib/libqela.a ../../UtilXlib/libutil.a \ + ../../dft-d3/libdftd3qe.a +# dft-d3 required by xlf for obscure reasons PWOBJS = ../../PW/src/libpw.a GWWOBJ = ../gww/libgww.a diff --git a/PW/src/ewald.f90 b/PW/src/ewald.f90 index 4db17ab9f..429dca2be 100644 --- a/PW/src/ewald.f90 +++ b/PW/src/ewald.f90 @@ -17,7 +17,7 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, & ! USE kinds USE constants, ONLY : tpi, e2 - USE mp_bands, ONLY : intra_bgrp_comm + USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp USE mp, ONLY : mp_sum USE martyna_tuckerman, ONLY : wg_corr_ewald, do_comp_mt USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_ewald @@ -25,17 +25,16 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, & ! ! first the dummy variables ! - - integer :: nat, ntyp, ityp (nat), ngm, gstart + integer, intent(in) :: nat, ntyp, ityp (nat), ngm, gstart ! input: number of atoms in the unit cell ! input: number of different types of atoms ! input: the type of each atom ! input: number of plane waves for G sum ! input: first non-zero G vector - logical :: gamma_only + logical, intent(in) :: gamma_only - real(DP) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), & + real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), & at (3, 3), bg (3, 3), omega, alat, gcutm ! input: the positions of the atoms in the cell ! input: the coordinates of G vectors @@ -46,7 +45,7 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, & ! input: the volume of the unit cell ! input: lattice parameter ! input: cut-off of g vectors - complex(DP) :: strf (ngm, ntyp) + complex(DP), intent(in) :: strf (ngm, ntyp) ! input: structure factor real(DP) :: ewald ! output: the ewald energy @@ -62,7 +61,9 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, & ! counter on atoms ! counter on atomic types ! number of R vectors included in r sum - + integer :: na_s, na_e, mykey + ! for parallelization of real-space sums + real(DP) :: charge, tpiba2, ewaldg, ewaldr, dtau (3), alpha, & r (3, mxr), r2 (mxr), rmax, rr, upperbound, fact ! total ionic charge in the cell @@ -131,17 +132,24 @@ function ewald (alat, nat, ntyp, ityp, zv, at, bg, tau, omega, g, & endif ENDIF ! - ! R-space sum here (only for the processor that contains G=0) + ! R-space sum here ! + ! poor-man parallelization over atoms + ! - if nproc_bgrp=1 : na_s=1, na_e=nat, mykey=0 + ! - if nproc_bgrp<=nat: each processor calculates atoms na_s to na_e; mykey=0 + ! - if nproc_bgrp>nat : each processor takes care of atom na_s=na_e; + ! mykey labels how many times each atom appears (mykey=0 first time etc.) + ! + CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey ) ewaldr = 0.d0 - if (gstart.eq.2) then + IF ( mykey == 0 ) THEN rmax = 4.d0 / sqrt (alpha) / alat ! ! with this choice terms up to ZiZj*erfc(4) are counted (erfc(4)=2x10^-8 ! - do na = 1, nat + do na = na_s, na_e do nb = 1, nat - dtau (:) = tau (:, na) - tau (:, nb) + dtau (:) = tau (:, na) - tau (:, nb) ! ! generates nearest-neighbors shells ! diff --git a/PW/src/force_ew.f90 b/PW/src/force_ew.f90 index 029ab88e0..e79cc5b8c 100644 --- a/PW/src/force_ew.f90 +++ b/PW/src/force_ew.f90 @@ -16,23 +16,22 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, & ! USE kinds USE constants, ONLY : tpi, e2 - USE mp_bands, ONLY : intra_bgrp_comm + USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp USE mp, ONLY : mp_sum USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_force_ew implicit none ! ! First the dummy variables ! - - integer :: nat, ntyp, ngm, ityp (nat), gstart + integer, intent(in) :: nat, ntyp, ngm, ityp (nat), gstart ! input: the number of atoms ! input: the number of types of atom ! input: the number of G vectors ! input: the type of each atom ! input: first non-zero G vector - logical :: gamma_only + logical, intent(in) :: gamma_only - real(DP) :: factor, tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), & + real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), & at (3, 3), bg (3, 3), omega, gcutm, alat ! input: the coordinates of the atoms ! input: the G vectors @@ -44,10 +43,10 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, & ! input: cut-off of g vectors ! input: the edge of the cell ! - complex(DP) :: strf (ngm, ntyp) + complex(DP), intent(in) :: strf (ngm, ntyp) ! input: the structure factor on the potential ! - real(DP) :: forceion (3, nat) + real(DP), intent(out) :: forceion (3, nat) ! output: the ewald part of the forces ! integer, parameter :: mxr=50 @@ -61,6 +60,7 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, & ! counter on atomic types ! the number of R vectors for real space su ! counter on polarization + integer :: na_s, na_e, mykey real(DP) :: sumnb, arg, tpiba2, alpha, dtau (3), r (3, mxr), & r2 (mxr), rmax, rr, charge, upperbound, fact @@ -135,29 +135,30 @@ subroutine force_ew (alat, nat, ntyp, ityp, zv, at, bg, tau, & enddo enddo deallocate (aux) - if (gstart == 1) goto 100 ! - ! R-space sum here (only for the processor that contains G=0) + ! R-space sum here (see ewald.f90 for details on parallelization) ! + CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey ) + IF ( mykey > 0 ) GO TO 100 rmax = 5.d0 / (sqrt (alpha) * alat) ! ! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1 ! - do na = 1, nat + do na = na_s, na_e do nb = 1, nat if (nb.eq.na) goto 50 - dtau (:) = tau (:, na) - tau (:, nb) + dtau (:) = tau (:, na) - tau (:, nb) ! ! generates nearest-neighbors shells r(i)=R(i)-dtau(i) ! call rgen (dtau, rmax, mxr, at, bg, r, r2, nrm) do n = 1, nrm rr = sqrt (r2 (n) ) * alat - factor = zv (ityp (na) ) * zv (ityp (nb) ) * e2 / rr**2 * & + fact = zv (ityp (na) ) * zv (ityp (nb) ) * e2 / rr**2 * & (qe_erfc (sqrt (alpha) * rr) / rr + & sqrt (8.0d0 * alpha / tpi) * exp ( - alpha * rr**2) ) * alat do ipol = 1, 3 - forceion (ipol, na) = forceion (ipol, na) - factor * r (ipol, n) + forceion (ipol, na) = forceion (ipol, na) - fact * r (ipol, n) enddo enddo 50 continue diff --git a/PW/src/stres_ewa.f90 b/PW/src/stres_ewa.f90 index fd54feaa9..8e7b15335 100644 --- a/PW/src/stres_ewa.f90 +++ b/PW/src/stres_ewa.f90 @@ -15,7 +15,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, & ! USE kinds USE constants, only : tpi, e2, eps6 - USE mp_bands, ONLY : intra_bgrp_comm + USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp USE mp, ONLY : mp_sum USE Coul_cut_2D, ONLY: do_cutoff_2D, cutoff_stres_sigmaewa @@ -23,18 +23,17 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, & ! ! first the dummy variables ! - - integer :: nat, ntyp, ityp (nat), ngm, gstart + integer, intent(in) :: nat, ntyp, ityp (nat), ngm, gstart ! input: number of atoms in the unit cell ! input: number of different types of atoms ! input: the type of each atom ! input: number of plane waves for G sum ! input: first nonzero g vector - logical :: gamma_only + logical, intent(in) :: gamma_only - real(DP) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), & - at (3, 3), bg (3, 3), omega, alat, gcutm, sigmaewa (3, 3) + real(DP), intent(in) :: tau (3, nat), g (3, ngm), gg (ngm), zv (ntyp), & + at (3, 3), bg (3, 3), omega, alat, gcutm ! input: the positions of the atoms in the cell ! input: the coordinates of G vectors ! input: the square moduli of G vectors @@ -44,6 +43,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, & ! input: the volume of the unit cell ! input: measure of length ! input: cut-off of g vectors + real(DP), intent(out) :: sigmaewa (3, 3) ! output: the ewald stress ! ! here the local variables @@ -57,6 +57,7 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, & ! counter on atoms ! counter on atoms ! number of R vectors included in r sum + integer :: na_s, na_e, mykey real(DP) :: charge, arg, tpiba2, dtau (3), alpha, r (3, mxr), & r2 (mxr), rmax, rr, upperbound, fact, fac, g2, g2a, sdewald, sewald @@ -137,25 +138,26 @@ subroutine stres_ewa (alat, nat, ntyp, ityp, zv, at, bg, tau, & sigmaewa (l, l) = sigmaewa (l, l) + sdewald enddo ! - ! R-space sum here (only for the processor that contains G=0) + ! R-space sum here (see ewald.f90 for details on parallelization) ! - if (gstart.eq.2) then + CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey ) + IF ( mykey == 0 ) THEN rmax = 4.0d0 / sqrt (alpha) / alat ! ! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1 ! - do na = 1, nat + do na = na_s, na_e do nb = 1, nat - dtau (:) = tau (:, na) - tau (:, nb) + dtau (:) = tau (:, na) - tau (:, nb) ! ! generates nearest-neighbors shells r(i)=R(i)-dtau(i) ! call rgen (dtau, rmax, mxr, at, bg, r, r2, nrm) do nr = 1, nrm rr = sqrt (r2 (nr) ) * alat - fac = - e2 / 2.0d0 / omega * alat**2 * zv (ityp (na) ) * & + fac = - e2 / 2.0_dp/ omega * alat**2 * zv (ityp (na) ) * & zv ( ityp (nb) ) / rr**3 * (qe_erfc (sqrt (alpha) * rr) + & - rr * sqrt (8 * alpha / tpi) * exp ( - alpha * rr**2) ) + rr * sqrt (8.0_dp * alpha / tpi) * exp ( - alpha * rr**2) ) do l = 1, 3 do m = 1, l sigmaewa (l, m) = sigmaewa (l, m) + fac * r(l,nr) * r(m,nr) diff --git a/PW/tools/Makefile b/PW/tools/Makefile index b9d1be2b4..8af7a6394 100644 --- a/PW/tools/Makefile +++ b/PW/tools/Makefile @@ -7,7 +7,8 @@ MODFLAGS= $(BASEMOD_FLAGS) \ $(MOD_FLAG)../src QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a \ ../../KS_Solvers/libks_solvers.a \ - ../../UtilXlib/libutil.a + ../../UtilXlib/libutil.a ../../LAXlib/libqela.a +# libqela required by xlf for obscure reasons PWOBJS = ../src/libpw.a TLDEPS= pwlibs diff --git a/atomic/src/dvex.f90 b/atomic/src/dvex.f90 index 56a1cc049..7d746cc43 100644 --- a/atomic/src/dvex.f90 +++ b/atomic/src/dvex.f90 @@ -52,7 +52,8 @@ subroutine dvex(nu,dvy) ! write (*,*) mu, oc(mu), ocs if ( mu == nu ) then doc = 0.d0 - if(AND((l1 /= 0), (ocs > 0.d0))) then + !if(AND((l1 /= 0), (ocs > 0.d0))) then + if((l1 /= 0).AND.(ocs > 0.d0)) then i = int(ocs) doc = (i*(2.d0*ocs-i-1.d0)/(half-1.d0) - ocs*ocs/half) * half/ocs end if