From 390a836b5ce6f73188a6d974d649d442cde4c778 Mon Sep 17 00:00:00 2001
From: Paolo Giannozzi
Date: Wed, 7 Mar 2018 18:06:27 +0100
Subject: [PATCH] More EXX restructuring: routines specific for band
parallelization moved to file exx_band.f90; make.depend updated accordingly.
Module exx now needs modules exx_base and exx_band. Next: all general
variables and routines moved to exx_base.
---
Doc/release-notes | 2 +-
GWW/pw4gww/make.depend | 3 +-
Makefile | 6 +-
PP/src/make.depend | 1 +
PW/src/Makefile | 1 +
PW/src/exx.f90 | 1652 ++--------------------------------------
PW/src/exx_band.f90 | 1558 +++++++++++++++++++++++++++++++++++++
PW/src/make.depend | 28 +-
TDDFPT/src/make.depend | 2 +-
9 files changed, 1651 insertions(+), 1602 deletions(-)
create mode 100644 PW/src/exx_band.f90
diff --git a/Doc/release-notes b/Doc/release-notes
index f87fc725c..07689e8aa 100644
--- a/Doc/release-notes
+++ b/Doc/release-notes
@@ -8,7 +8,7 @@ New in development version:
Localized exchange can deal with virtual orbitals (empty bands)
(Ivan Carnimeo)
- * Spin-polarized B3LYP (experimental)
+ * Spin-polarized X3LYP (experimental)
Problems fixed in development version:
diff --git a/GWW/pw4gww/make.depend b/GWW/pw4gww/make.depend
index 5dafe82c3..e9e26e640 100644
--- a/GWW/pw4gww/make.depend
+++ b/GWW/pw4gww/make.depend
@@ -275,6 +275,7 @@ produce_wannier_gamma.o : ../../Modules/wannier_gw.o
produce_wannier_gamma.o : ../../Modules/wavefunctions.o
produce_wannier_gamma.o : ../../PW/src/extfield.o
produce_wannier_gamma.o : ../../PW/src/exx.o
+produce_wannier_gamma.o : ../../PW/src/exx_base.o
produce_wannier_gamma.o : ../../PW/src/ldaU.o
produce_wannier_gamma.o : ../../PW/src/pwcom.o
produce_wannier_gamma.o : ../../PW/src/scf_mod.o
@@ -385,7 +386,7 @@ v_basis.o : ../../Modules/kind.o
v_basis.o : ../../Modules/mp_world.o
v_basis.o : ../../Modules/recvec.o
v_basis.o : ../../Modules/wannier_gw.o
-v_basis.o : ../../PW/src/exx.o
+v_basis.o : ../../PW/src/exx_base.o
v_basis.o : ../../PW/src/pwcom.o
v_basis.o : ../../UtilXlib/mp.o
wannier.o : ../../Modules/cell_base.o
diff --git a/Makefile b/Makefile
index cb08661c5..3317c799e 100644
--- a/Makefile
+++ b/Makefile
@@ -378,10 +378,6 @@ doc_clean :
( if test -f $$dir/Makefile ; then \
( cd $$dir; $(MAKE) TLDEPS= clean ) ; fi ) ; done
-depend: libiotk version
+depend: libiotk
@echo 'Checking dependencies...'
- ( if test -x install/makedeps.sh ; then install/makedeps.sh ; fi)
-# update file containing version number before looking for dependencies
-
-version:
- - ( cd Modules; make version )
diff --git a/PP/src/make.depend b/PP/src/make.depend
index 7024ae6f7..234f2d8b8 100644
--- a/PP/src/make.depend
+++ b/PP/src/make.depend
@@ -380,6 +380,7 @@ open_grid.o : ../../Modules/recvec.o
open_grid.o : ../../Modules/wavefunctions.o
open_grid.o : ../../PW/src/buffers.o
open_grid.o : ../../PW/src/exx.o
+open_grid.o : ../../PW/src/exx_base.o
open_grid.o : ../../PW/src/io_rho_xml.o
open_grid.o : ../../PW/src/pwcom.o
open_grid.o : ../../PW/src/scf_mod.o
diff --git a/PW/src/Makefile b/PW/src/Makefile
index e9aeee9d0..c73b169d0 100644
--- a/PW/src/Makefile
+++ b/PW/src/Makefile
@@ -73,6 +73,7 @@ ewald.o \
ewald_dipole.o \
extfield.o \
exx_base.o \
+exx_band.o \
exx.o \
fcp.o \
find_group.o \
diff --git a/PW/src/exx.f90 b/PW/src/exx.f90
index 4cceb1f59..49e54ce63 100644
--- a/PW/src/exx.f90
+++ b/PW/src/exx.f90
@@ -1,4 +1,4 @@
-! Copyright (C) 2005-2015 Quantum ESPRESSO group
+! Copyrigh(C) 2005-2018 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
@@ -15,44 +15,40 @@ MODULE exx
! Computer Physics Communications 2017, dx.doi.org/10.1016/j.cpc.2017.01.008
!
USE kinds, ONLY : DP
- USE coulomb_vcut_module, ONLY : vcut_init, vcut_type, vcut_info, &
- vcut_get, vcut_spheric_get
USE noncollin_module, ONLY : noncolin, npol
- USE io_global, ONLY : ionode
+ USE io_global, ONLY : ionode, stdout
!
USE control_flags, ONLY : gamma_only, tqr
USE fft_types, ONLY : fft_type_descriptor
USE stick_base, ONLY : sticks_map, sticks_map_deallocate
!
- USE io_global, ONLY : stdout
-
IMPLICIT NONE
SAVE
- COMPLEX(DP), ALLOCATABLE :: psi_exx(:,:), hpsi_exx(:,:)
- COMPLEX(DP), ALLOCATABLE :: evc_exx(:,:), psic_exx(:)
- INTEGER :: lda_original, n_original
- integer :: ibnd_buff_start, ibnd_buff_end
!
! general purpose vars
!
- REAL(DP):: exxalfa=0._dp ! 1 if exx, 0 elsewhere
- !
+ ! exxalfa is the parameter multiplying the exact-exchange part
+ REAL(DP):: exxalfa=0._dp
+ ! x_occupation(nbnd,nkstot) the weight of
+ ! auxiliary functions in the density matrix
REAL(DP), ALLOCATABLE :: x_occupation(:,:)
- ! x_occupation(nbnd,nkstot) the weight of
- ! auxiliary functions in the density matrix
-
- INTEGER :: x_nbnd_occ ! number of bands of auxiliary functions with
- ! at least some x_occupation > eps_occ
+ ! number of bands of auxiliary functions with
+ ! at least some x_occupation > eps_occ
+ INTEGER :: x_nbnd_occ
REAL(DP), PARAMETER :: eps_occ = 1.d-8
- INTEGER :: ibnd_start = 0 ! starting band index used in bgrp parallelization
- INTEGER :: ibnd_end = 0 ! ending band index used in bgrp parallelization
-
+ !
+ ! Buffers: temporary (complex) buffer for wfc storage
COMPLEX(DP), ALLOCATABLE :: exxbuff(:,:,:)
- ! temporary (complex) buffer for wfc storage
+ ! temporary (real) buffer for wfc storage
REAL(DP), ALLOCATABLE :: locbuff(:,:,:)
- ! temporary (real) buffer for wfc storage
+ ! buffer for matrix of localization integrals
REAL(DP), ALLOCATABLE :: locmat(:,:,:)
- ! buffer for matrix of localization integrals
+ !
+#if defined(__USE_INTEL_HBM_DIRECTIVES)
+!DIR$ ATTRIBUTES FASTMEM :: exxbuff
+#elif defined(__USE_CRAY_HBM_DIRECTIVES)
+!DIR$ memory(bandwidth) exxbuff
+#endif
!
LOGICAL :: use_ace ! true: Use Lin Lin's ACE method
! false: do not use ACE, use old algorithm instead
@@ -64,14 +60,6 @@ MODULE exx
! discard contribution to V_x if overlap between
! localized orbitals is smaller than "local_thr"
!
- !
-#if defined(__USE_INTEL_HBM_DIRECTIVES)
-!DIR$ ATTRIBUTES FASTMEM :: exxbuff
-#elif defined(__USE_CRAY_HBM_DIRECTIVES)
-!DIR$ memory(bandwidth) exxbuff
-#endif
- !
- !
! energy related variables
!
REAL(DP) :: fock0 = 0.0_DP, & ! sum
@@ -97,57 +85,11 @@ MODULE exx
! energy cutoff for custom grid
REAL(DP) :: ecutfock
!
- ! mapping for the data structure conversion
+ ! starting and ending band index used in bgrp parallelization
+ INTEGER :: ibnd_start = 0, ibnd_end = 0
+ ! starting and ending buffer index used in bgrp parallelization
+ integer :: ibnd_buff_start, ibnd_buff_end
!
- TYPE comm_packet
- INTEGER :: size
- INTEGER, ALLOCATABLE :: indices(:)
- COMPLEX(DP), ALLOCATABLE :: msg(:,:,:)
- END TYPE comm_packet
- TYPE(comm_packet), ALLOCATABLE :: comm_recv(:,:), comm_send(:,:)
- TYPE(comm_packet), ALLOCATABLE :: comm_recv_reverse(:,:)
- TYPE(comm_packet), ALLOCATABLE :: comm_send_reverse(:,:,:)
- INTEGER, ALLOCATABLE :: lda_local(:,:)
- INTEGER, ALLOCATABLE :: lda_exx(:,:)
- INTEGER, ALLOCATABLE :: ngk_local(:), ngk_exx(:)
- INTEGER, ALLOCATABLE :: igk_exx(:,:)
- INTEGER :: npwx_local = 0
- INTEGER :: npwx_exx = 0
- INTEGER :: npw_local = 0
- INTEGER :: npw_exx = 0
- INTEGER :: n_local = 0
- INTEGER :: nwordwfc_exx
- LOGICAL :: first_data_structure_change = .TRUE.
-
- INTEGER :: ngm_loc, ngm_g_loc, gstart_loc
- INTEGER, ALLOCATABLE :: ig_l2g_loc(:)
- REAL(DP), ALLOCATABLE :: g_loc(:,:), gg_loc(:)
- INTEGER, ALLOCATABLE :: mill_loc(:,:), nl_loc(:)
- INTEGER :: ngms_loc, ngms_g_loc
- INTEGER, ALLOCATABLE :: nls_loc(:)
- INTEGER, ALLOCATABLE :: nlm_loc(:)
- INTEGER, ALLOCATABLE :: nlsm_loc(:)
-
- !gcutm
- INTEGER :: ngm_exx, ngm_g_exx, gstart_exx
- INTEGER, ALLOCATABLE :: ig_l2g_exx(:)
- REAL(DP), ALLOCATABLE :: g_exx(:,:), gg_exx(:)
- INTEGER, ALLOCATABLE :: mill_exx(:,:), nl_exx(:)
- INTEGER :: ngms_exx, ngms_g_exx
- INTEGER, ALLOCATABLE :: nls_exx(:)
- INTEGER, ALLOCATABLE :: nlm_exx(:)
- INTEGER, ALLOCATABLE :: nlsm_exx(:)
- !gcutms
-
- !list of which coulomb factors have been calculated already
- LOGICAL, ALLOCATABLE :: coulomb_done(:,:)
-
- TYPE(fft_type_descriptor) :: dfftp_loc, dffts_loc
- TYPE(fft_type_descriptor) :: dfftp_exx, dffts_exx
- TYPE (sticks_map) :: smap_exx ! Stick map descriptor
- INTEGER :: ngw_loc, ngs_loc
- INTEGER :: ngw_exx, ngs_exx
-
CONTAINS
#define _CX(A) CMPLX(A,0._dp,kind=DP)
#define _CY(A) CMPLX(0._dp,-A,kind=DP)
@@ -170,6 +112,7 @@ MODULE exx
!
USE control_flags,ONLY : tqr
USE realus, ONLY : qpointlist, tabxx, tabp
+ USE exx_band, ONLY : smap_exx
IMPLICIT NONE
INTEGER :: ik, ngmt
@@ -342,7 +285,7 @@ MODULE exx
! DoLoc = .true. ... Real Array locbuff(ir, nbnd, nkqs)
! .false. ... Complex Array exxbuff(ir, nbnd/2, nkqs)
!
- USE wavefunctions_module, ONLY : evc, psic
+ USE wavefunctions_module, ONLY : evc
USE io_files, ONLY : nwordwfc, iunwfc_exx
USE buffers, ONLY : get_buffer
USE wvfct, ONLY : nbnd, npwx, wg, current_k
@@ -366,6 +309,8 @@ MODULE exx
USE exx_base, ONLY : nkqs, xkq_collect, index_xk, index_sym, &
exx_set_symm, rir, working_pool, exxdiv, erfc_scrlen, gau_scrlen, &
exx_divergence
+ USE exx_band, ONLY : change_data_structure, nwordwfc_exx, &
+ transform_evc_to_exx, igk_exx, evc_exx
!
IMPLICIT NONE
INTEGER :: ik,ibnd, i, j, k, ir, isym, ikq, ig
@@ -380,6 +325,7 @@ MODULE exx
!DIR$ memory(bandwidth) temppsic
#endif
COMPLEX(DP),ALLOCATABLE :: temppsic_nc(:,:), psic_nc(:,:)
+ COMPLEX(DP),ALLOCATABLE :: psic_exx(:)
INTEGER :: nxxs, nrxxs
#if defined(__MPI)
COMPLEX(DP),ALLOCATABLE :: temppsic_all(:), psic_all(:)
@@ -417,11 +363,6 @@ MODULE exx
! Note that nxxs is not the same as nrxxs in parallel case
nxxs = dfftt%nr1x *dfftt%nr2x *dfftt%nr3x
nrxxs= dfftt%nnr
- !allocate psic_exx
- IF(.not.allocated(psic_exx))THEN
- ALLOCATE(psic_exx(nrxxs))
- psic_exx = 0.0_DP
- END IF
#if defined(__MPI)
IF (noncolin) THEN
ALLOCATE(psic_all_nc(nxxs,npol), temppsic_all_nc(nxxs,npol) )
@@ -434,6 +375,7 @@ MODULE exx
ELSEIF ( .not. gamma_only ) THEN
ALLOCATE(temppsic(nrxxs))
ENDIF
+ ALLOCATE(psic_exx(nrxxs))
!
IF (.not.exx_is_active()) THEN
!
@@ -528,7 +470,6 @@ MODULE exx
ENDDO
ENDDO
END IF
-
!
! This is parallelized over pools. Each pool computes only its k-points
!
@@ -744,6 +685,7 @@ MODULE exx
ENDDO&
KPOINTS_LOOP
!
+ DEALLOCATE (psic_exx)
IF (noncolin) THEN
DEALLOCATE(temppsic_nc, psic_nc)
#if defined(__MPI)
@@ -809,6 +751,8 @@ MODULE exx
USE us_exx, ONLY : becxx
USE mp_exx, ONLY : negrp, inter_egrp_comm, init_index_over_band
USE wvfct, ONLY : nbnd
+ USE exx_band, ONLY : transform_psi_to_exx, transform_hpsi_to_local,&
+ psi_exx, hpsi_exx, igk_exx
!
IMPLICIT NONE
!
@@ -886,6 +830,7 @@ MODULE exx
USE paw_exx, ONLY : PAW_newdxx
USE exx_base, ONLY : nqs, index_xkq, index_xk, xkq_collect, &
coulomb_fac, g2_convolution_all
+ USE exx_band, ONLY : exxbuff_comm_gamma, result_sum, igk_exx
!
!
IMPLICIT NONE
@@ -909,7 +854,6 @@ MODULE exx
INTEGER :: h_ibnd, nrxxs
REAL(DP) :: x1, x2, xkp(3)
REAL(DP) :: xkq(3)
- ! temp array for vcut_spheric
INTEGER, EXTERNAL :: global_kpoint_index
LOGICAL :: l_fft_doubleband
LOGICAL :: l_fft_singleband
@@ -973,7 +917,8 @@ MODULE exx
index_end = min(index_start+jblock-1,nbnd/2)
!
!gather exxbuff for jblock_start:jblock_end
- call exxbuff_comm_gamma(exxtemp,current_k,nrxxs*npol,jblock_start,jblock_end,jblock)
+ call exxbuff_comm_gamma( exxbuff, exxtemp, current_k,nrxxs*npol,&
+ jblock_start,jblock_end,jblock)
!
LOOP_ON_PSI_BANDS : &
DO ii = 1, nibands(my_egrp_id+1)
@@ -1246,6 +1191,7 @@ MODULE exx
USE paw_exx, ONLY : PAW_newdxx
USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, &
coulomb_fac, g2_convolution_all
+ USE exx_band, ONLY : exxbuff_comm, result_sum, igk_exx
USE io_global, ONLY : stdout
!
!
@@ -1285,7 +1231,6 @@ MODULE exx
INTEGER :: h_ibnd, nrxxs
REAL(DP) :: x1, x2, xkp(3), omega_inv, nqs_inv
REAL(DP) :: xkq(3)
- ! temp array for vcut_spheric
INTEGER, EXTERNAL :: global_kpoint_index
DOUBLE PRECISION :: max, tempx
COMPLEX(DP), ALLOCATABLE :: big_result(:,:)
@@ -1380,18 +1325,10 @@ MODULE exx
!
END DO
!
- !
- !
- !
- !
- !
- !
!precompute these guys
omega_inv = 1.0 / omega
nqs_inv = 1.0 / nqs
!
- !
- !
!------------------------------------------------------------------------!
! Beginning of main loop
!------------------------------------------------------------------------!
@@ -1421,7 +1358,8 @@ MODULE exx
jblock_end = min(jblock_start+jblock-1,nbnd)
!
!gather exxbuff for jblock_start:jblock_end
- call exxbuff_comm(exxtemp,ikq,nrxxs*npol,jblock_start,jblock_end)
+ call exxbuff_comm( exxbuff, exxtemp, ikq, nrxxs*npol, &
+ jblock_start,jblock_end)
!
DO ii=1, nibands(my_egrp_id+1)
!
@@ -1711,6 +1649,7 @@ MODULE exx
USE becmod, ONLY : bec_type, allocate_bec_type, &
deallocate_bec_type, calbec
USE uspp, ONLY : okvan,nkb,vkb
+ USE exx_band, ONLY : nwordwfc_exx, igk_exx
IMPLICIT NONE
@@ -1830,6 +1769,9 @@ MODULE exx
addusxx_r, qvan_init, qvan_clean
USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, &
coulomb_fac, g2_convolution_all
+ USE exx_band, ONLY : exxbuff_comm_gamma, igk_exx, &
+ change_data_structure, transform_evc_to_exx, nwordwfc_exx, &
+ evc_exx
!
IMPLICIT NONE
!
@@ -1845,7 +1787,6 @@ MODULE exx
INTEGER :: h_ibnd, nrxxs, current_ik, ibnd_loop_start
REAL(DP) :: x1, x2
REAL(DP) :: xkq(3), xkp(3), vc
- ! temp array for vcut_spheric
INTEGER, EXTERNAL :: global_kpoint_index
!
TYPE(bec_type) :: becpsi
@@ -1936,7 +1877,8 @@ MODULE exx
index_end = min(index_start+jblock-1,nbnd/2)
!
!gather exxbuff for jblock_start:jblock_end
- call exxbuff_comm_gamma(exxtemp,ikk,nrxxs*npol,jblock_start,jblock_end,jblock)
+ call exxbuff_comm_gamma( exxbuff, exxtemp, ikk, nrxxs*npol,&
+ jblock_start,jblock_end,jblock)
!
JBND_LOOP : &
DO ii = 1, nibands(my_egrp_id+1)
@@ -2158,6 +2100,8 @@ MODULE exx
addusxx_r, qvan_init, qvan_clean
USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, &
coulomb_fac, g2_convolution_all
+ USE exx_band, ONLY : exxbuff_comm, change_data_structure, &
+ transform_evc_to_exx, nwordwfc_exx, igk_exx, evc_exx
!
IMPLICIT NONE
!
@@ -2176,7 +2120,6 @@ MODULE exx
INTEGER :: h_ibnd, nrxxs, current_ik, ibnd_loop_start, nblock, nrt, irt, ir_start, ir_end
REAL(DP) :: x1, x2
REAL(DP) :: xkq(3), xkp(3), vc, omega_inv
- ! temp array for vcut_spheric
INTEGER, EXTERNAL :: global_kpoint_index
!
TYPE(bec_type) :: becpsi
@@ -2290,7 +2233,8 @@ MODULE exx
jblock_end = min(jblock_start+jblock-1,nbnd)
!
!gather exxbuff for jblock_start:jblock_end
- call exxbuff_comm(exxtemp,ikq,nrxxs*npol,jblock_start,jblock_end)
+ call exxbuff_comm( exxbuff, exxtemp, ikq, nrxxs*npol, &
+ jblock_start,jblock_end)
!
JBND_LOOP : &
DO ii=1, nibands(my_egrp_id+1)
@@ -2469,10 +2413,14 @@ MODULE exx
USE fft_interfaces, ONLY : fwfft, invfft
USE uspp, ONLY : okvan
!
- USE exx_base, ONLY : nq1, nq2, nq3, nqs, eps, exxdiv, &
- x_gamma_extrapolation, on_double_grid, grid_factor, yukawa, &
- erfc_scrlen, use_coulomb_vcut_ws, use_coulomb_vcut_spheric, &
+ USE exx_base, ONLY : nq1, nq2, nq3, nqs, eps, exxdiv, &
+ x_gamma_extrapolation, on_double_grid, grid_factor, yukawa, &
+ erfc_scrlen, use_coulomb_vcut_ws, use_coulomb_vcut_spheric, &
gau_scrlen, vcut, index_xkq, index_xk, index_sym
+ USE exx_band, ONLY : exxbuff_comm, exxbuff_comm_gamma, &
+ change_data_structure, transform_evc_to_exx, g_exx, igk_exx, &
+ nwordwfc_exx, evc_exx
+ USE coulomb_vcut_module, ONLY : vcut_get, vcut_spheric_get
!
! ---- local variables -------------------------------------------------
!
@@ -2491,7 +2439,6 @@ MODULE exx
INTEGER :: ibnd_loop_start
REAL(DP) :: x1, x2
REAL(DP) :: qq, xk_cryst(3), sxk(3), xkq(3), vc(3,3), x, q(3)
- ! temp array for vcut_spheric
REAL(DP) :: delta(3,3)
COMPLEX(DP), ALLOCATABLE :: exxtemp(:,:)
INTEGER :: jstart, jend, ipair, jblock_start, jblock_end
@@ -2639,14 +2586,15 @@ MODULE exx
jblock_start = (ijt - 1) * (2*jblock) + 1
jblock_end = min(jblock_start+(2*jblock)-1,nbnd)
!
- call exxbuff_comm_gamma(exxtemp,ikq,nrxxs*npol,jblock_start,&
- jblock_end,jblock)
+ call exxbuff_comm_gamma(exxbuff, exxtemp, ikq, nrxxs*npol, &
+ jblock_start, jblock_end, jblock)
ELSE
!
jblock_start = (ijt - 1) * jblock + 1
jblock_end = min(jblock_start+jblock-1,nbnd)
!
- call exxbuff_comm(exxtemp,ikq,nrxxs*npol,jblock_start,jblock_end)
+ call exxbuff_comm(exxbuff, exxtemp, ikq, nrxxs*npol, &
+ jblock_start, jblock_end)
END IF
!
DO ii = 1, nibands(my_egrp_id+1)
@@ -2971,1488 +2919,6 @@ SUBROUTINE compute_becpsi (npw_, igk_, q_, evc_exx, becpsi_k)
return
END SUBROUTINE compute_becpsi
!-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE transform_evc_to_exx(type)
- !-----------------------------------------------------------------------
- USE mp_exx, ONLY : negrp
- USE wvfct, ONLY : npwx, nbnd
- USE io_files, ONLY : nwordwfc, iunwfc, iunwfc_exx
- USE klist, ONLY : nks, ngk, igk_k
- USE uspp, ONLY : nkb, vkb
- USE wavefunctions_module, ONLY : evc
- USE control_flags, ONLY : io_level
- USE buffers, ONLY : open_buffer, get_buffer, save_buffer
- USE mp_exx, ONLY : max_ibands
- !
- !
- IMPLICIT NONE
- !
- INTEGER, intent(in) :: type
- INTEGER :: lda, n, ik
- LOGICAL :: exst_mem, exst_file
- !
- IF (negrp.eq.1) THEN
- !
- ! no change in data structure is necessary
- ! just copy all of the required data
- !
- ! get evc_exx
- !
- IF(.not.allocated(evc_exx))THEN
- ALLOCATE(evc_exx(npwx*npol,nbnd))
- END IF
- evc_exx = evc
- !
- ! get igk_exx
- !
- IF(.not.allocated(igk_exx)) THEN
- ALLOCATE( igk_exx( npwx, nks ) )
- igk_exx = igk_k
- END IF
- !
- ! get the wfc buffer is used
- !
- iunwfc_exx = iunwfc
- nwordwfc_exx = nwordwfc
- !
- RETURN
- !
- END IF
- !
- ! change the data structure of evc and igk
- !
- lda = npwx
- n = npwx
- npwx_local = npwx
- IF( .not.allocated(ngk_local) ) allocate(ngk_local(nks))
- ngk_local = ngk
- !
- IF ( .not.allocated(comm_recv) ) THEN
- !
- ! initialize all of the conversion maps and change the data structure
- !
- CALL initialize_local_to_exact_map(lda, nbnd)
- ELSE
- !
- ! change the data structure
- !
- CALL change_data_structure(.TRUE.)
- END IF
- !
- lda = npwx
- n = npwx
- npwx_exx = npwx
- IF( .not.allocated(ngk_exx) ) allocate(ngk_exx(nks))
- ngk_exx = ngk
- !
- ! get evc_exx
- !
- IF(.not.allocated(evc_exx))THEN
- ALLOCATE(evc_exx(lda*npol,max_ibands+2))
- !
- ! ... open files/buffer for wavefunctions (nwordwfc set in openfil)
- ! ... io_level > 1 : open file, otherwise: open buffer
- !
- nwordwfc_exx = size(evc_exx)
- CALL open_buffer( iunwfc_exx, 'wfc_exx', nwordwfc_exx, io_level, &
- exst_mem, exst_file )
- END IF
- !
- DO ik=1, nks
- !
- ! read evc for the local data structure
- !
- IF ( nks > 1 ) CALL get_buffer(evc, nwordwfc, iunwfc, ik)
- !
- ! transform evc to the EXX data structure
- !
- CALL transform_to_exx(lda, n, nbnd, nbnd, ik, evc, evc_exx, type)
- !
- ! save evc to a buffer
- !
- IF ( nks > 1 ) CALL save_buffer ( evc_exx, nwordwfc_exx, iunwfc_exx, ik )
- END DO
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE transform_evc_to_exx
- !-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE transform_psi_to_exx(lda, n, m, psi)
- !-----------------------------------------------------------------------
- USE wvfct, ONLY : current_k, npwx, nbnd
- USE mp_exx, ONLY : negrp, nibands, my_egrp_id, max_ibands
- !
- !
- IMPLICIT NONE
- !
- Integer, INTENT(in) :: lda
- INTEGER, INTENT(in) :: m
- INTEGER, INTENT(inout) :: n
- COMPLEX(DP), INTENT(in) :: psi(lda*npol,m)
- !
- ! change to the EXX data strucutre
- !
- npwx_local = npwx
- n_local = n
- IF ( .not.allocated(comm_recv) ) THEN
- !
- ! initialize all of the conversion maps and change the data structure
- !
- CALL initialize_local_to_exact_map(lda, m)
- ELSE
- !
- ! change the data structure
- !
- CALL change_data_structure(.TRUE.)
- END IF
- npwx_exx = npwx
- n = ngk_exx(current_k)
- !
- ! get igk
- !
- CALL update_igk(.TRUE.)
- !
- ! get psi_exx
- !
- CALL transform_to_exx(lda, n, m, max_ibands, &
- current_k, psi, psi_exx, 0)
- !
- ! zero hpsi_exx
- !
- hpsi_exx = 0.d0
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE transform_psi_to_exx
- !-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE transform_hpsi_to_local(lda, n, m, hpsi)
- !-----------------------------------------------------------------------
- USE mp_exx, ONLY : iexx_istart, iexx_iend, my_egrp_id
- !
- IMPLICIT NONE
- !
- INTEGER, INTENT(in) :: lda
- INTEGER, INTENT(in) :: m
- INTEGER, INTENT(inout) :: n
- COMPLEX(DP), INTENT(out) :: hpsi(lda_original*npol,m)
- INTEGER :: m_exx
- !
- ! change to the local data structure
- !
- CALL change_data_structure(.FALSE.)
- !
- ! get igk
- !
- CALL update_igk(.FALSE.)
- n = n_local
- !
- ! transform hpsi_exx to the local data structure
- !
- m_exx = iexx_iend(my_egrp_id+1) - iexx_istart(my_egrp_id+1) + 1
- CALL transform_to_local(m,m_exx,hpsi_exx,hpsi)
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE transform_hpsi_to_local
- !-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE initialize_local_to_exact_map(lda, m)
- !-----------------------------------------------------------------------
- !
- ! determine the mapping between the local and EXX data structures
- !
- USE wvfct, ONLY : npwx, nbnd
- USE klist, ONLY : nks, igk_k
- USE mp_exx, ONLY : nproc_egrp, negrp, my_egrp_id, me_egrp, &
- intra_egrp_comm, inter_egrp_comm, &
- ibands, nibands, init_index_over_band, &
- iexx_istart, iexx_iend, max_ibands
- USE mp_pools, ONLY : nproc_pool, me_pool, intra_pool_comm
- USE mp, ONLY : mp_sum
- USE gvect, ONLY : ig_l2g
- USE uspp, ONLY : nkb
- !
- !
- IMPLICIT NONE
- !
- Integer :: lda
- INTEGER :: n, m
- INTEGER, ALLOCATABLE :: local_map(:,:), exx_map(:,:)
- INTEGER, ALLOCATABLE :: l2e_map(:,:), e2l_map(:,:,:)
- INTEGER, ALLOCATABLE :: psi_source(:), psi_source_exx(:,:)
- INTEGER :: current_index
- INTEGER :: i, j, k, ik, im, ig, count, iproc, prev, iegrp
- INTEGER :: total_lda(nks), prev_lda(nks)
- INTEGER :: total_lda_exx(nks), prev_lda_exx(nks)
- INTEGER :: n_local
- INTEGER :: lda_max_local, lda_max_exx
- INTEGER :: max_lda_egrp
- INTEGER :: request_send(nproc_egrp), request_recv(nproc_egrp)
- INTEGER :: ierr
- INTEGER :: egrp_base, total_lda_egrp(nks), prev_lda_egrp(nks)
- INTEGER :: igk_loc(npwx)
- !
- ! initialize the pair assignments
- !
- CALL init_index_over_band(inter_egrp_comm,nbnd,m)
- !
- ! allocate bookeeping arrays
- !
- IF ( .not.allocated(comm_recv) ) THEN
- ALLOCATE(comm_recv(nproc_egrp,nks),comm_send(nproc_egrp,nks))
- END IF
- IF ( .not.allocated(lda_local) ) THEN
- ALLOCATE(lda_local(nproc_pool,nks))
- ALLOCATE(lda_exx(nproc_egrp,nks))
- END IF
- !
- ! store the original values of lda and n
- !
- lda_original = lda
- n_original = n
- !
- ! construct the local map
- !
- lda_local = 0
- DO ik = 1, nks
- igk_loc = igk_k(:,ik)
- n = 0
- DO i = 1, size(igk_loc)
- IF(igk_loc(i).gt.0)n = n + 1
- END DO
- lda_local(me_pool+1,ik) = n
- CALL mp_sum(lda_local(:,ik),intra_pool_comm)
- total_lda(ik) = sum(lda_local(:,ik))
- prev_lda(ik) = sum(lda_local(1:me_pool,ik))
- END DO
- ALLOCATE(local_map(maxval(total_lda),nks))
- local_map = 0
- DO ik = 1, nks
- local_map(prev_lda(ik)+1:prev_lda(ik)+lda_local(me_pool+1,ik),ik) = &
- ig_l2g(igk_k(1:lda_local(me_pool+1,ik),ik))
- END DO
- CALL mp_sum(local_map,intra_pool_comm)
- !
- !-----------------------------------------!
- ! Switch to the exx data structure !
- !-----------------------------------------!
- !
- CALL change_data_structure(.TRUE.)
- !
- ! construct the exx map
- !
- lda_exx = 0
- DO ik = 1, nks
- n = 0
- DO i = 1, size(igk_exx(:,ik))
- IF(igk_exx(i,ik).gt.0)n = n + 1
- END DO
- lda_exx(me_egrp+1,ik) = n
- CALL mp_sum(lda_exx(:,ik),intra_egrp_comm)
- total_lda_exx(ik) = sum(lda_exx(:,ik))
- prev_lda_exx(ik) = sum(lda_exx(1:me_egrp,ik))
- END DO
- ALLOCATE(exx_map(maxval(total_lda_exx),nks))
- exx_map = 0
- DO ik = 1, nks
- exx_map(prev_lda_exx(ik)+1:prev_lda_exx(ik)+lda_exx(me_egrp+1,ik),ik) = &
- ig_l2g(igk_exx(1:lda_exx(me_egrp+1,ik),ik))
- END DO
- CALL mp_sum(exx_map,intra_egrp_comm)
- !
- ! construct the l2e_map
- !
- allocate( l2e_map(maxval(total_lda_exx),nks) )
- l2e_map = 0
- DO ik = 1, nks
- DO ig = 1, lda_exx(me_egrp+1,ik)
- DO j=1, total_lda(ik)
- IF( local_map(j,ik).EQ.exx_map(ig+prev_lda_exx(ik),ik) ) exit
- END DO
- l2e_map(ig+prev_lda_exx(ik),ik) = j
- END DO
- END DO
- CALL mp_sum(l2e_map,intra_egrp_comm)
- !
- ! plan communication for the data structure change
- !
- lda_max_local = maxval(lda_local)
- lda_max_exx = maxval(lda_exx)
- allocate(psi_source(maxval(total_lda_exx)))
- !
- DO ik = 1, nks
- !
- ! determine which task each value will come from
- !
- psi_source = 0
- DO ig = 1, lda_exx(me_egrp+1,ik)
- j = 1
- DO i = 1, nproc_pool
- j = j + lda_local(i,ik)
- IF( j.gt.l2e_map(ig+prev_lda_exx(ik),ik) ) exit
- END DO
- psi_source(ig+prev_lda_exx(ik)) = i-1
- END DO
- CALL mp_sum(psi_source,intra_egrp_comm)
- !
- ! allocate communication packets to receive psi and hpsi
- !
- DO iproc=0, nproc_egrp-1
- !
- ! determine how many values need to come from iproc
- !
- count = 0
- DO ig=1, lda_exx(me_egrp+1,ik)
- IF ( MODULO(psi_source(ig+prev_lda_exx(ik)),nproc_egrp)&
- .eq.iproc ) THEN
- count = count + 1
- END IF
- END DO
- !
- ! allocate the communication packet
- !
- comm_recv(iproc+1,ik)%size = count
- IF (count.gt.0) THEN
- IF (.not.ALLOCATED(comm_recv(iproc+1,ik)%msg)) THEN
- ALLOCATE(comm_recv(iproc+1,ik)%indices(count))
- ALLOCATE(comm_recv(iproc+1,ik)%msg(count,npol,max_ibands+2))
- END IF
- END IF
- !
- ! determine which values need to come from iproc
- !
- count = 0
- DO ig=1, lda_exx(me_egrp+1,ik)
- IF ( MODULO(psi_source(ig+prev_lda_exx(ik)),nproc_egrp)&
- .eq.iproc ) THEN
- count = count + 1
- comm_recv(iproc+1,ik)%indices(count) = ig
- END IF
- END DO
- !
- END DO
- !
- ! allocate communication packets to send psi and hpsi
- !
- prev = 0
- DO iproc=0, nproc_egrp-1
- !
- ! determine how many values need to be sent to iproc
- !
- count = 0
- DO ig=1, lda_exx(iproc+1,ik)
- IF ( MODULO(psi_source(ig+prev),nproc_egrp).eq.me_egrp ) THEN
- count = count + 1
- END IF
- END DO
- !
- ! allocate the communication packet
- !
- comm_send(iproc+1,ik)%size = count
- IF (count.gt.0) THEN
- IF (.not.ALLOCATED(comm_send(iproc+1,ik)%msg)) THEN
- ALLOCATE(comm_send(iproc+1,ik)%indices(count))
- ALLOCATE(comm_send(iproc+1,ik)%msg(count,npol,max_ibands+2))
- END IF
- END IF
- !
- ! determine which values need to be sent to iproc
- !
- count = 0
- DO ig=1, lda_exx(iproc+1,ik)
- IF ( MODULO(psi_source(ig+prev),nproc_egrp).eq.me_egrp ) THEN
- count = count + 1
- comm_send(iproc+1,ik)%indices(count) = l2e_map(ig+prev,ik)
- END IF
- END DO
- !
- prev = prev + lda_exx(iproc+1,ik)
- !
- END DO
- !
- END DO
- !
- ! allocate psi_exx and hpsi_exx
- !
- IF(allocated(psi_exx))DEALLOCATE(psi_exx)
- ALLOCATE(psi_exx(npwx*npol, max_ibands ))
- IF(allocated(hpsi_exx))DEALLOCATE(hpsi_exx)
- ALLOCATE(hpsi_exx(npwx*npol, max_ibands ))
- !
- ! allocate communication arrays for the exx to local transformation
- !
- IF ( .not.allocated(comm_recv_reverse) ) THEN
- ALLOCATE(comm_recv_reverse(nproc_egrp,nks))
- ALLOCATE(comm_send_reverse(nproc_egrp,negrp,nks))
- END IF
- !
- egrp_base = my_egrp_id*nproc_egrp
- DO ik = 1, nks
- total_lda_egrp(ik) = &
- sum( lda_local(egrp_base+1:(egrp_base+nproc_egrp),ik) )
- prev_lda_egrp(ik) = &
- sum( lda_local(egrp_base+1:(egrp_base+me_egrp),ik) )
- END DO
- !
- max_lda_egrp = 0
- DO j = 1, negrp
- DO ik = 1, nks
- max_lda_egrp = max(max_lda_egrp, &
- sum( lda_local((j-1)*nproc_egrp+1:j*nproc_egrp,ik) ) )
- END DO
- END DO
- !
- ! determine the e2l_map
- !
- allocate( e2l_map(max_lda_egrp,nks,negrp) )
- e2l_map = 0
- DO ik = 1, nks
- DO ig = 1, lda_local(me_pool+1,ik)
- DO j=1, total_lda_exx(ik)
- IF( local_map(ig+prev_lda(ik),ik).EQ.exx_map(j,ik) ) exit
- END DO
- e2l_map(ig+prev_lda_egrp(ik),ik,my_egrp_id+1) = j
- END DO
- END DO
- CALL mp_sum(e2l_map(:,:,my_egrp_id+1),intra_egrp_comm)
- CALL mp_sum(e2l_map,inter_egrp_comm)
- !
- ! plan communication for the local to EXX data structure transformation
- !
- allocate(psi_source_exx( max_lda_egrp, negrp ))
- DO ik = 1, nks
- !
- ! determine where each value is coming from
- !
- psi_source_exx = 0
- DO ig = 1, lda_local(me_pool+1,ik)
- j = 1
- DO i = 1, nproc_egrp
- j = j + lda_exx(i,ik)
- IF( j.gt.e2l_map(ig+prev_lda_egrp(ik),ik,my_egrp_id+1) ) exit
- END DO
- psi_source_exx(ig+prev_lda_egrp(ik),my_egrp_id+1) = i-1
- END DO
- CALL mp_sum(psi_source_exx(:,my_egrp_id+1),intra_egrp_comm)
- CALL mp_sum(psi_source_exx,inter_egrp_comm)
- !
- ! allocate communication packets to receive psi and hpsi (reverse)
- !
- DO iegrp=my_egrp_id+1, my_egrp_id+1
- DO iproc=0, nproc_egrp-1
- !
- ! determine how many values need to come from iproc
- !
- count = 0
- DO ig=1, lda_local(me_pool+1,ik)
- IF ( psi_source_exx(ig+prev_lda_egrp(ik),iegrp).eq.iproc ) THEN
- count = count + 1
- END IF
- END DO
- !
- ! allocate the communication packet
- !
- comm_recv_reverse(iproc+1,ik)%size = count
- IF (count.gt.0) THEN
- IF (.not.ALLOCATED(comm_recv_reverse(iproc+1,ik)%msg)) THEN
- ALLOCATE(comm_recv_reverse(iproc+1,ik)%indices(count))
- ALLOCATE(comm_recv_reverse(iproc+1,ik)%msg(count,npol,m+2))
- END IF
- END IF
- !
- ! determine which values need to come from iproc
- !
- count = 0
- DO ig=1, lda_local(me_pool+1,ik)
- IF ( psi_source_exx(ig+prev_lda_egrp(ik),iegrp).eq.iproc ) THEN
- count = count + 1
- comm_recv_reverse(iproc+1,ik)%indices(count) = ig
- END IF
- END DO
-
- END DO
- END DO
- !
- ! allocate communication packets to send psi and hpsi
- !
- DO iegrp=1, negrp
- prev = 0
- DO iproc=0, nproc_egrp-1
- !
- ! determine how many values need to be sent to iproc
- !
- count = 0
- DO ig=1, lda_local(iproc+(iegrp-1)*nproc_egrp+1,ik)
- IF ( psi_source_exx(ig+prev,iegrp).eq.me_egrp ) THEN
- count = count + 1
- END IF
- END DO
- !
- ! allocate the communication packet
- !
- comm_send_reverse(iproc+1,iegrp,ik)%size = count
- IF (count.gt.0) THEN
- IF (.not.ALLOCATED(comm_send_reverse(iproc+1,iegrp,ik)%msg))THEN
- ALLOCATE(comm_send_reverse(iproc+1,iegrp,ik)%indices(count))
- ALLOCATE(comm_send_reverse(iproc+1,iegrp,ik)%msg(count,npol,&
- iexx_iend(my_egrp_id+1)-iexx_istart(my_egrp_id+1)+3))
- END IF
- END IF
- !
- ! determine which values need to be sent to iproc
- !
- count = 0
- DO ig=1, lda_local(iproc+(iegrp-1)*nproc_egrp+1,ik)
- IF ( psi_source_exx(ig+prev,iegrp).eq.me_egrp ) THEN
- count = count + 1
- comm_send_reverse(iproc+1,iegrp,ik)%indices(count) = &
- e2l_map(ig+prev,ik,iegrp)
- END IF
- END DO
- !
- prev = prev + lda_local( (iegrp-1)*nproc_egrp+iproc+1, ik )
- !
- END DO
- END DO
- !
- END DO
- !
- ! deallocate arrays
- !
- DEALLOCATE( local_map, exx_map )
- DEALLOCATE( l2e_map, e2l_map )
- DEALLOCATE( psi_source, psi_source_exx )
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE initialize_local_to_exact_map
- !-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE transform_to_exx(lda, n, m, m_out, ik, psi, psi_out, type)
- !-----------------------------------------------------------------------
- !
- ! transform psi into the EXX data structure, and place the result in psi_out
- !
- USE wvfct, ONLY : nbnd
- USE mp, ONLY : mp_sum
- USE mp_pools, ONLY : nproc_pool, me_pool
- USE mp_exx, ONLY : intra_egrp_comm, inter_egrp_comm, &
- nproc_egrp, me_egrp, negrp, my_egrp_id, nibands, ibands, &
- max_ibands, all_start, all_end
- USE parallel_include
- !
- !
- IMPLICIT NONE
- !
- Integer :: lda
- INTEGER :: n, m, m_out
- COMPLEX(DP) :: psi(npwx_local*npol,m)
- COMPLEX(DP) :: psi_out(npwx_exx*npol,m_out)
- INTEGER, INTENT(in) :: type
-
- COMPLEX(DP), ALLOCATABLE :: psi_work(:,:,:,:), psi_gather(:,:)
- INTEGER :: i, j, im, iproc, ig, ik, iegrp
- INTEGER :: prev, lda_max_local
-
- INTEGER :: request_send(nproc_egrp), request_recv(nproc_egrp)
- INTEGER :: ierr
- INTEGER :: current_ik
- !INTEGER, EXTERNAL :: find_current_k
- INTEGER :: recvcount(negrp)
- INTEGER :: displs(negrp)
-#if defined(__MPI)
- INTEGER :: istatus(MPI_STATUS_SIZE)
-#endif
- INTEGER :: requests(max_ibands+2,negrp)
- !
- INTEGER :: ipol, my_lda, lda_offset, count
- lda_max_local = maxval(lda_local)
- current_ik = ik
- !
- !-------------------------------------------------------!
- !Communication Part 1
- !-------------------------------------------------------!
- !
- allocate(psi_work(lda_max_local,npol,m,negrp))
- allocate(psi_gather(lda_max_local*npol,m))
- DO im=1, m
- my_lda = lda_local(me_pool+1,current_ik)
- DO ipol=1, npol
- lda_offset = lda_max_local*(ipol-1)
- DO ig=1, my_lda
- psi_gather(lda_offset+ig,im) = psi(npwx_local*(ipol-1)+ig,im)
- END DO
- END DO
- END DO
- recvcount = lda_max_local*npol
- count = lda_max_local*npol
-#if defined (__MPI_NONBLOCKING)
- IF ( type.eq.0 ) THEN
- DO iegrp=1, negrp
- displs(iegrp) = (iegrp-1)*(count*m)
- END DO
- DO iegrp=1, negrp
- DO im=1, nibands(iegrp)
- IF ( my_egrp_id.eq.(iegrp-1) ) THEN
- DO j=1, negrp
- displs(j) = (j-1)*(count*m) + count*(ibands(im,iegrp)-1)
- END DO
- END IF
-#if defined(__MPI)
- CALL MPI_IGATHERV( psi_gather(:, ibands(im,iegrp) ), &
- count, MPI_DOUBLE_COMPLEX, &
- psi_work, &
- recvcount, displs, MPI_DOUBLE_COMPLEX, &
- iegrp-1, &
- inter_egrp_comm, requests(im,iegrp), ierr )
-#endif
- END DO
- END DO
-
- ELSE IF(type.eq.1) THEN
-#elif defined(__MPI)
- CALL MPI_ALLGATHER( psi_gather, &
- count*m, MPI_DOUBLE_COMPLEX, &
- psi_work, &
- count*m, MPI_DOUBLE_COMPLEX, &
- inter_egrp_comm, ierr )
-#endif
-
-#if defined (__MPI_NONBLOCKING)
- ELSE IF(type.eq.2) THEN !evc2
-
- DO iegrp=1, negrp
- displs(iegrp) = (iegrp-1)*(count*m)
- END DO
- DO iegrp=1, negrp
- DO im=1, all_end(iegrp) - all_start(iegrp) + 1
- !
- IF(all_start(iegrp).eq.0) CYCLE
- !
- IF ( my_egrp_id.eq.(iegrp-1) ) THEN
- DO j=1, negrp
- displs(j) = (j-1)*(count*m) + count*(im+all_start(iegrp)-2)
- END DO
- END IF
-#if defined(__MPI)
- CALL MPI_IGATHERV( psi_gather(:, im+all_start(iegrp)-1 ), &
- count, MPI_DOUBLE_COMPLEX, &
- psi_work, &
- recvcount, displs, MPI_DOUBLE_COMPLEX, &
- iegrp-1, &
- inter_egrp_comm, requests(im,iegrp), ierr )
-#endif
- END DO
- END DO
-
- END IF
- !
- IF(type.eq.0)THEN
- DO iegrp=1, negrp
- DO im=1, nibands(iegrp)
- CALL MPI_WAIT(requests(im,iegrp), istatus, ierr)
- END DO
- END DO
- ELSEIF(type.eq.2)THEN
- DO iegrp=1, negrp
- DO im=1, all_end(iegrp) - all_start(iegrp) + 1
- IF(all_start(iegrp).eq.0) CYCLE
- CALL MPI_WAIT(requests(im,iegrp), istatus, ierr)
- END DO
- END DO
- END IF
-#endif
- !
- !-------------------------------------------------------!
- !Communication Part 2
- !-------------------------------------------------------!
- !
- !send communication packets
- !
- DO iproc=0, nproc_egrp-1
- IF ( comm_send(iproc+1,current_ik)%size.gt.0) THEN
- DO i=1, comm_send(iproc+1,current_ik)%size
- ig = comm_send(iproc+1,current_ik)%indices(i)
- !
- !determine which egrp this corresponds to
- !
- prev = 0
- DO j=1, nproc_pool
- IF ((prev+lda_local(j,current_ik)).ge.ig) THEN
- ig = ig - prev
- exit
- END IF
- prev = prev + lda_local(j,current_ik)
- END DO
- !
- ! prepare the message
- !
- DO ipol=1, npol
- IF ( type.eq.0 ) THEN !psi or hpsi
- DO im=1, nibands(my_egrp_id+1)
- comm_send(iproc+1,current_ik)%msg(i,ipol,im) = &
- psi_work(ig,ipol,ibands(im,my_egrp_id+1),1+(j-1)/nproc_egrp)
- END DO
- ELSE IF (type.eq.1) THEN !evc
- DO im=1, m
- comm_send(iproc+1,current_ik)%msg(i,ipol,im) = &
- psi_work(ig,ipol,im,1+(j-1)/nproc_egrp)
- END DO
- ELSE IF ( type.eq.2 ) THEN !evc2
- IF(all_start(my_egrp_id+1).gt.0) THEN
- DO im=1, all_end(my_egrp_id+1) - all_start(my_egrp_id+1) + 1
- comm_send(iproc+1,current_ik)%msg(i,ipol,im) = &
- psi_work(ig,ipol,im+all_start(my_egrp_id+1)-1,1+(j-1)/nproc_egrp)
- END DO
- END IF
- END IF
- END DO
- !
- END DO
- !
- ! send the message
- !
-#if defined(__MPI)
- IF ( type.eq.0 ) THEN !psi or hpsi
- CALL MPI_ISEND( comm_send(iproc+1,current_ik)%msg, &
- comm_send(iproc+1,current_ik)%size*npol*nibands(my_egrp_id+1), &
- MPI_DOUBLE_COMPLEX, &
- iproc, 100+iproc*nproc_egrp+me_egrp, &
- intra_egrp_comm, request_send(iproc+1), ierr )
- ELSE IF (type.eq.1) THEN !evc
- CALL MPI_ISEND( comm_send(iproc+1,current_ik)%msg, &
- comm_send(iproc+1,current_ik)%size*npol*m, MPI_DOUBLE_COMPLEX, &
- iproc, 100+iproc*nproc_egrp+me_egrp, &
- intra_egrp_comm, request_send(iproc+1), ierr )
- ELSE IF (type.eq.2) THEN !evc2
- CALL MPI_ISEND( comm_send(iproc+1,current_ik)%msg, &
- comm_send(iproc+1,current_ik)%size*npol*(all_end(my_egrp_id+1)-all_start(my_egrp_id+1)+1), &
- MPI_DOUBLE_COMPLEX, &
- iproc, 100+iproc*nproc_egrp+me_egrp, &
- intra_egrp_comm, request_send(iproc+1), ierr )
- END IF
-#endif
- !
- END IF
- END DO
- !
- ! begin receiving the messages
- !
- DO iproc=0, nproc_egrp-1
- IF ( comm_recv(iproc+1,current_ik)%size.gt.0) THEN
- !
- ! receive the message
- !
-#if defined(__MPI)
- IF (type.eq.0) THEN !psi or hpsi
- CALL MPI_IRECV( comm_recv(iproc+1,current_ik)%msg, &
- comm_recv(iproc+1,current_ik)%size*npol*nibands(my_egrp_id+1), &
- MPI_DOUBLE_COMPLEX, &
- iproc, 100+me_egrp*nproc_egrp+iproc, &
- intra_egrp_comm, request_recv(iproc+1), ierr )
- ELSE IF (type.eq.1) THEN !evc
- CALL MPI_IRECV( comm_recv(iproc+1,current_ik)%msg, &
- comm_recv(iproc+1,current_ik)%size*npol*m, MPI_DOUBLE_COMPLEX, &
- iproc, 100+me_egrp*nproc_egrp+iproc, &
- intra_egrp_comm, request_recv(iproc+1), ierr )
- ELSE IF (type.eq.2) THEN !evc2
- CALL MPI_IRECV( comm_recv(iproc+1,current_ik)%msg, &
- comm_recv(iproc+1,current_ik)%size*npol*(all_end(my_egrp_id+1)-all_start(my_egrp_id+1)+1), &
- MPI_DOUBLE_COMPLEX, &
- iproc, 100+me_egrp*nproc_egrp+iproc, &
- intra_egrp_comm, request_recv(iproc+1), ierr )
- END IF
-#endif
- !
- END IF
- END DO
- !
- ! assign psi_out
- !
- DO iproc=0, nproc_egrp-1
- IF ( comm_recv(iproc+1,current_ik)%size.gt.0 ) THEN
- !
- ! wait for the message to be received
- !
-#if defined(__MPI)
- CALL MPI_WAIT(request_recv(iproc+1), istatus, ierr)
-#endif
- !
- DO i=1, comm_recv(iproc+1,current_ik)%size
- ig = comm_recv(iproc+1,current_ik)%indices(i)
- !
- ! place the message into the correct elements of psi_out
- !
- IF (type.eq.0) THEN !psi or hpsi
- DO im=1, nibands(my_egrp_id+1)
- DO ipol=1, npol
- psi_out(ig+npwx_exx*(ipol-1),im) = &
- comm_recv(iproc+1,current_ik)%msg(i,ipol,im)
- END DO
- END DO
- ELSE IF (type.eq.1) THEN !evc
- DO im=1, m
- DO ipol=1, npol
- psi_out(ig+npwx_exx*(ipol-1),im) = &
- comm_recv(iproc+1,current_ik)%msg(i,ipol,im)
- END DO
- END DO
- ELSE IF (type.eq.2) THEN !evc2
- DO im=1, all_end(my_egrp_id+1) - all_start(my_egrp_id+1) + 1
- DO ipol=1, npol
- psi_out(ig+npwx_exx*(ipol-1),im) = comm_recv(iproc+1,current_ik)%msg(i,ipol,im)
- END DO
- END DO
- END IF
- !
- END DO
- !
- END IF
- END DO
- !
- ! wait for everything to finish sending
- !
-#if defined(__MPI)
- DO iproc=0, nproc_egrp-1
- IF ( comm_send(iproc+1,current_ik)%size.gt.0 ) THEN
- CALL MPI_WAIT(request_send(iproc+1), istatus, ierr)
- END IF
- END DO
-#endif
- !
- ! deallocate arrays
- !
- DEALLOCATE( psi_work, psi_gather )
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE transform_to_exx
- !-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE change_data_structure(is_exx)
- !-----------------------------------------------------------------------
- !
- ! change between the local and EXX data structures
- ! is_exx = .TRUE. - change to the EXX data structure
- ! is_exx = .FALSE. - change to the local data strucutre
- !
- USE cell_base, ONLY : at, bg, tpiba2
- USE cellmd, ONLY : lmovecell
- USE wvfct, ONLY : npwx
- USE gvect, ONLY : gcutm, ig_l2g, g, gg, ngm, ngm_g, mill, &
- gstart, gvect_init, deallocate_gvect_exx
- USE gvecs, ONLY : gcutms, ngms, ngms_g, gvecs_init
- USE gvecw, ONLY : gkcut, ecutwfc, gcutw
- USE klist, ONLY : xk, nks, ngk
- USE mp_bands, ONLY : intra_bgrp_comm, ntask_groups, nyfft
- USE mp_exx, ONLY : intra_egrp_comm, me_egrp, exx_mode, nproc_egrp, &
- negrp, root_egrp
- USE io_global, ONLY : stdout
- USE fft_base, ONLY : dfftp, dffts, smap, fft_base_info
- USE fft_types, ONLY : fft_type_init
- USE recvec_subs, ONLY : ggen, ggens
-! USE task_groups, ONLY : task_groups_init
- !
- !
- IMPLICIT NONE
- !
- LOGICAL, intent(in) :: is_exx
- INTEGER, EXTERNAL :: n_plane_waves
- COMPLEX(DP), ALLOCATABLE :: work_space(:)
- INTEGER :: ik, i
- INTEGER :: ngm_, ngs_, ngw_
- LOGICAL exst
-#if defined (__MPI)
- LOGICAL :: lpara = .true.
-#else
- LOGICAL :: lpara = .false.
-#endif
- !
- IF (negrp.eq.1) RETURN
- !
- IF (first_data_structure_change) THEN
- allocate( ig_l2g_loc(ngm), g_loc(3,ngm), gg_loc(ngm) )
- allocate( mill_loc(3,ngm), nl_loc(ngm) )
- allocate( nls_loc(size(dffts%nl)) )
- IF ( gamma_only) THEN
- allocate( nlm_loc(size(dfftp%nlm)) )
- allocate( nlsm_loc(size(dffts%nlm)) )
- END IF
- ig_l2g_loc = ig_l2g
- g_loc = g
- gg_loc = gg
- mill_loc = mill
- nl_loc = dfftp%nl
- nls_loc = dffts%nl
- IF ( gamma_only) THEN
- nlm_loc = dfftp%nlm
- nlsm_loc = dffts%nlm
- END IF
- ngm_loc = ngm
- ngm_g_loc = ngm_g
- gstart_loc = gstart
- ngms_loc = ngms
- ngms_g_loc = ngms_g
- END IF
- !
- ! generate the gvectors for the new data structure
- !
- IF (is_exx) THEN
- exx_mode = 1
- IF(first_data_structure_change)THEN
- dfftp_loc = dfftp
- dffts_loc = dffts
-
- CALL fft_type_init( dffts_exx, smap_exx, "wave", gamma_only, &
- lpara, intra_egrp_comm, at, bg, gkcut, gcutms/gkcut, &
- nyfft=ntask_groups )
- CALL fft_type_init( dfftp_exx, smap_exx, "rho", gamma_only, &
- lpara, intra_egrp_comm, at, bg, gcutm, nyfft=nyfft )
- CALL fft_base_info( ionode, stdout )
- ngs_ = dffts_exx%ngl( dffts_exx%mype + 1 )
- ngm_ = dfftp_exx%ngl( dfftp_exx%mype + 1 )
- IF( gamma_only ) THEN
- ngs_ = (ngs_ + 1)/2
- ngm_ = (ngm_ + 1)/2
- END IF
- dfftp = dfftp_exx
- dffts = dffts_exx
- ngm = ngm_
- ngms = ngs_
- ELSE
- dfftp = dfftp_exx
- dffts = dffts_exx
- ngm = ngm_exx
- ngms = ngms_exx
- END IF
- call deallocate_gvect_exx()
- call gvect_init( ngm , intra_egrp_comm )
- call gvecs_init( ngms , intra_egrp_comm )
- ELSE
- exx_mode = 2
- dfftp = dfftp_loc
- dffts = dffts_loc
- ngm = ngm_loc
- ngms = ngms_loc
- call deallocate_gvect_exx()
- call gvect_init( ngm , intra_bgrp_comm )
- call gvecs_init( ngms , intra_bgrp_comm )
- exx_mode = 0
- END IF
- !
- IF (first_data_structure_change) THEN
- CALL ggen ( dfftp, gamma_only, at, bg, gcutm, ngm_g, ngm, &
- g, gg, mill, ig_l2g, gstart )
- CALL ggens( dffts, gamma_only, at, g, gg, mill, gcutms, ngms )
- allocate( ig_l2g_exx(ngm), g_exx(3,ngm), gg_exx(ngm) )
- allocate( mill_exx(3,ngm), nl_exx(ngm) )
- allocate( nls_exx(size(dffts%nl)) )
- allocate( nlm_exx(size(dfftp%nlm) ) )
- allocate( nlsm_exx(size(dffts%nlm) ) )
- ig_l2g_exx = ig_l2g
- g_exx = g
- gg_exx = gg
- mill_exx = mill
- nl_exx = dfftp%nl
- nls_exx = dffts%nl
- IF( gamma_only ) THEN
- nlm_exx = dfftp%nlm
- nlsm_exx = dffts%nlm
- END IF
- ngm_exx = ngm
- ngm_g_exx = ngm_g
- gstart_exx = gstart
- ngms_exx = ngms
- ngms_g_exx = ngms_g
- ELSE IF ( is_exx ) THEN
- ig_l2g = ig_l2g_exx
- g = g_exx
- gg = gg_exx
- mill = mill_exx
- ! workaround: here dfft?%nl* are unallocated
- ! some compilers go on and allocate, some others crash
- IF ( .NOT. ALLOCATED(dfftp%nl) ) ALLOCATE (dfftp%nl(size(nl_exx)))
- IF ( .NOT. ALLOCATED(dffts%nl) ) ALLOCATE (dffts%nl(size(nls_exx)))
- IF ( gamma_only .AND. .NOT.ALLOCATED(dfftp%nlm) ) ALLOCATE (dfftp%nlm(size(nlm_exx)))
- IF ( gamma_only .AND. .NOT.ALLOCATED(dffts%nlm) ) ALLOCATE (dffts%nlm(size(nlsm_exx)))
- ! end workaround. FIXME: this part of code must disappear ASAP
- dfftp%nl = nl_exx
- dffts%nl = nls_exx
- IF( gamma_only ) THEN
- dfftp%nlm = nlm_exx
- dffts%nlm = nlsm_exx
- ENDIF
- ngm = ngm_exx
- ngm_g = ngm_g_exx
- gstart = gstart_exx
- ngms = ngms_exx
- ngms_g = ngms_g_exx
- ELSE ! not is_exx
- ig_l2g = ig_l2g_loc
- g = g_loc
- gg = gg_loc
- mill = mill_loc
- dfftp%nl = nl_loc
- dffts%nl = nls_loc
- IF( gamma_only ) THEN
- dfftp%nlm = nlm_loc
- dffts%nlm = nlsm_loc
- END IF
- ngm = ngm_loc
- ngm_g = ngm_g_loc
- gstart = gstart_loc
- ngms = ngms_loc
- ngms_g = ngms_g_loc
- END IF
- !
- ! get npwx and ngk
- !
- IF ( is_exx.and.npwx_exx.gt.0 ) THEN
- npwx = npwx_exx
- ngk = ngk_exx
- ELSE IF ( .not.is_exx.and.npwx_local.gt.0 ) THEN
- npwx = npwx_local
- ngk = ngk_local
- ELSE
- npwx = n_plane_waves (gcutw, nks, xk, g, ngm)
- END IF
- !
- ! get igk
- !
- IF( first_data_structure_change ) THEN
- allocate(igk_exx(npwx,nks),work_space(npwx))
- first_data_structure_change = .FALSE.
- IF ( nks.eq.1 ) THEN
- CALL gk_sort( xk, ngm, g, ecutwfc / tpiba2, ngk, igk_exx, work_space )
- END IF
- IF ( nks > 1 ) THEN
- !
- DO ik = 1, nks
- CALL gk_sort( xk(1,ik), ngm, g, ecutwfc / tpiba2, ngk(ik), &
- igk_exx(1,ik), work_space )
- END DO
- !
- END IF
- DEALLOCATE( work_space )
- END IF
- !
- ! generate ngl and igtongl
- !
- CALL gshells( lmovecell )
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE change_data_structure
- !-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE update_igk(is_exx)
- !-----------------------------------------------------------------------
- USE cell_base, ONLY : tpiba2
- USE gvect, ONLY : ngm, g
- USE gvecw, ONLY : ecutwfc
- USE wvfct, ONLY : npwx, current_k
- USE klist, ONLY : xk, igk_k
- USE mp_exx, ONLY : negrp
- !
- !
- IMPLICIT NONE
- !
- LOGICAL, intent(in) :: is_exx
- COMPLEX(DP), ALLOCATABLE :: work_space(:)
- INTEGER :: comm
- INTEGER :: npw, ik, i
- LOGICAL exst
- !
- IF (negrp.eq.1) RETURN
- !
- ! get igk
- !
- allocate(work_space(npwx))
- ik = current_k
- IF(is_exx) THEN
- CALL gk_sort( xk(1,ik), ngm, g, ecutwfc / tpiba2, npw, igk_exx(1,ik), &
- work_space )
- ELSE
- CALL gk_sort( xk(1,ik), ngm, g, ecutwfc / tpiba2, npw, igk_k(1,ik), &
- work_space )
- END IF
- !
- DEALLOCATE( work_space )
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE update_igk
- !-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE result_sum (n, m, data)
- !-----------------------------------------------------------------------
- USE parallel_include
- USE mp_exx, ONLY : iexx_start, iexx_end, inter_egrp_comm, &
- intra_egrp_comm, my_egrp_id, negrp, &
- max_pairs, egrp_pairs, max_contributors, &
- contributed_bands, all_end, &
- iexx_istart, iexx_iend, band_roots
- USE mp, ONLY : mp_sum, mp_bcast
-
- INTEGER, INTENT(in) :: n, m
- COMPLEX(DP), INTENT(inout) :: data(n,m)
-#if defined(__MPI)
- INTEGER :: istatus(MPI_STATUS_SIZE)
- COMPLEX(DP), ALLOCATABLE :: recvbuf(:,:)
- COMPLEX(DP) :: data_sum(n,m), test(negrp)
- INTEGER :: im, iegrp, ibuf, i, j, nsending(m)
- INTEGER :: ncontributing(m)
- INTEGER :: contrib_this(negrp,m), displs(negrp,m)
-
- INTEGER sendcount, sendtype, ierr, root, request(m)
- INTEGER sendc(negrp), sendd(negrp)
-#endif
- !
- IF (negrp.eq.1) RETURN
- !
-#if defined(__MPI)
- ! gather data onto the correct nodes
- !
- ALLOCATE( recvbuf( n*max_contributors, max(1,iexx_end-iexx_start+1) ) )
- displs = 0
- ibuf = 0
- nsending = 0
- contrib_this = 0
- !
- DO im=1, m
- !
- IF(contributed_bands(im,my_egrp_id+1)) THEN
- sendcount = n
- ELSE
- sendcount = 0
- END IF
- !
- root = band_roots(im)
- !
- IF(my_egrp_id.eq.root) THEN
- !
- ! determine the number of sending processors
- !
- ibuf = ibuf + 1
- ncontributing(im) = 0
- DO iegrp=1, negrp
- IF(contributed_bands(im,iegrp)) THEN
- ncontributing(im) = ncontributing(im) + 1
- contrib_this(iegrp,im) = n
- IF(iegrp.lt.negrp) displs(iegrp+1,im) = displs(iegrp,im) + n
- nsending(im) = nsending(im) + 1
- ELSE
- contrib_this(iegrp,im) = 0
- IF(iegrp.lt.negrp) displs(iegrp+1,im) = displs(iegrp,im)
- END IF
- END DO
- END IF
- !
-#if defined(__MPI_NONBLOCKING)
- CALL MPI_IGATHERV(data(:,im), sendcount, MPI_DOUBLE_COMPLEX, &
- recvbuf(:,max(1,ibuf)), contrib_this(:,im), &
- displs(:,im), MPI_DOUBLE_COMPLEX, &
- root, inter_egrp_comm, request(im), ierr)
-#else
- CALL MPI_GATHERV(data(:,im), sendcount, MPI_DOUBLE_COMPLEX, &
- recvbuf(:,max(1,ibuf)), contrib_this(:,im), &
- displs(:,im), MPI_DOUBLE_COMPLEX, &
- root, inter_egrp_comm, ierr)
-#endif
- !
- END DO
- !
-#if defined(__MPI_NONBLOCKING)
- DO im=1, m
- CALL MPI_WAIT(request(im), istatus, ierr)
- END DO
-#endif
- !
- ! perform the sum
- !
- DO im=iexx_istart(my_egrp_id+1), iexx_iend(my_egrp_id+1)
- IF(im.eq.0)exit
- data(:,im) = 0._dp
- ibuf = im - iexx_istart(my_egrp_id+1) + 1
- DO j=1, nsending(im)
- DO i=1, n
- data(i,im) = data(i,im) + recvbuf(i+n*(j-1),ibuf)
- END DO
- END DO
- END DO
-#endif
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE result_sum
- !-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE transform_to_local(m, m_exx, psi, psi_out)
- !-----------------------------------------------------------------------
- USE mp, ONLY : mp_sum
- USE mp_pools, ONLY : nproc_pool, me_pool, intra_pool_comm
- USE mp_exx, ONLY : intra_egrp_comm, inter_egrp_comm, &
- nproc_egrp, me_egrp, negrp, my_egrp_id, iexx_istart, iexx_iend
- USE parallel_include
- USE wvfct, ONLY : current_k
- !
- !
- IMPLICIT NONE
- !
- INTEGER :: m, m_exx
- COMPLEX(DP) :: psi(npwx_exx*npol,m_exx)
- COMPLEX(DP) :: psi_out(npwx_local*npol,m)
- !
- INTEGER :: i, j, im, iproc, ig, ik, current_ik, iegrp
- INTEGER :: prev, lda_max_local, prev_lda_exx
- INTEGER :: my_bands, recv_bands, tag
- !
- INTEGER :: request_send(nproc_egrp,negrp), request_recv(nproc_egrp,negrp)
- INTEGER :: ierr
- INTEGER :: ipol
-#if defined(__MPI)
- INTEGER :: istatus(MPI_STATUS_SIZE)
-#endif
- !INTEGER, EXTERNAL :: find_current_k
- !
- current_ik = current_k
- prev_lda_exx = sum( lda_exx(1:me_egrp,current_ik) )
- !
- my_bands = iexx_iend(my_egrp_id+1) - iexx_istart(my_egrp_id+1) + 1
- !
- ! send communication packets
- !
- IF ( iexx_istart(my_egrp_id+1).gt.0 ) THEN
- DO iegrp=1, negrp
- DO iproc=0, nproc_egrp-1
- IF ( comm_send_reverse(iproc+1,iegrp,current_ik)%size.gt.0) THEN
- DO i=1, comm_send_reverse(iproc+1,iegrp,current_ik)%size
- ig = comm_send_reverse(iproc+1,iegrp,current_ik)%indices(i)
- ig = ig - prev_lda_exx
- !
- DO im=1, my_bands
- DO ipol=1, npol
- comm_send_reverse(iproc+1,iegrp,current_ik)%msg(i,ipol,im) = &
- psi(ig+npwx_exx*(ipol-1),im)
- END DO
- END DO
- !
- END DO
- !
- ! send the message
- !
- tag = 0
-#if defined(__MPI)
- CALL MPI_ISEND( comm_send_reverse(iproc+1,iegrp,current_ik)%msg, &
- comm_send_reverse(iproc+1,iegrp,current_ik)%size*npol*my_bands, &
- MPI_DOUBLE_COMPLEX, &
- iproc+(iegrp-1)*nproc_egrp, &
- tag, &
- intra_pool_comm, request_send(iproc+1,iegrp), ierr )
-#endif
- !
- END IF
- END DO
- END DO
- END IF
- !
- ! begin receiving the communication packets
- !
- DO iegrp=1, negrp
- !
- IF ( iexx_istart(iegrp).le.0 ) CYCLE
- !
- recv_bands = iexx_iend(iegrp) - iexx_istart(iegrp) + 1
- !
- DO iproc=0, nproc_egrp-1
- IF ( comm_recv_reverse(iproc+1,current_ik)%size.gt.0) THEN
- !
- !receive the message
- !
- tag = 0
-#if defined(__MPI)
- CALL MPI_IRECV( comm_recv_reverse(iproc+1,current_ik)%msg(:,:,iexx_istart(iegrp)), &
- comm_recv_reverse(iproc+1,current_ik)%size*npol*recv_bands, &
- MPI_DOUBLE_COMPLEX, &
- iproc+(iegrp-1)*nproc_egrp, &
- tag, &
- intra_pool_comm, request_recv(iproc+1,iegrp), ierr )
-#endif
- !
- END IF
- END DO
- END DO
- !
- ! assign psi
- !
- DO iproc=0, nproc_egrp-1
- IF ( comm_recv_reverse(iproc+1,current_ik)%size.gt.0 ) THEN
- !
-#if defined(__MPI)
- DO iegrp=1, negrp
- IF ( iexx_istart(iegrp).le.0 ) CYCLE
- CALL MPI_WAIT(request_recv(iproc+1,iegrp), istatus, ierr)
- END DO
-#endif
- !
- DO i=1, comm_recv_reverse(iproc+1,current_ik)%size
- ig = comm_recv_reverse(iproc+1,current_ik)%indices(i)
- !
- ! set psi_out
- !
- DO im=1, m
- DO ipol=1, npol
- psi_out(ig+npwx_local*(ipol-1),im) = psi_out(ig+npwx_local*(ipol-1),im) + &
- comm_recv_reverse(iproc+1,current_ik)%msg(i,ipol,im)
- END DO
- END DO
- !
- END DO
- !
- END IF
- END DO
- !
- ! wait for everything to finish sending
- !
-#if defined(__MPI)
- IF ( iexx_istart(my_egrp_id+1).gt.0 ) THEN
- DO iproc=0, nproc_egrp-1
- DO iegrp=1, negrp
- IF ( comm_send_reverse(iproc+1,iegrp,current_ik)%size.gt.0 ) THEN
- CALL MPI_WAIT(request_send(iproc+1,iegrp), istatus, ierr)
- END IF
- END DO
- END DO
- END IF
-#endif
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE transform_to_local
- !-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE exxbuff_comm(exxtemp,ikq,lda,jstart,jend)
- !-----------------------------------------------------------------------
- USE mp_exx, ONLY : my_egrp_id, inter_egrp_comm, jblock, &
- all_end, negrp
- USE mp, ONLY : mp_bcast
- USE parallel_include
- COMPLEX(DP), intent(inout) :: exxtemp(lda,jend-jstart+1)
- INTEGER, intent(in) :: ikq, lda, jstart, jend
- INTEGER :: jbnd, iegrp, ierr, request_exxbuff(jend-jstart+1)
-#if defined(__MPI)
- INTEGER :: istatus(MPI_STATUS_SIZE)
-#endif
-
- DO jbnd=jstart, jend
- !determine which band group has this value of exxbuff
- DO iegrp=1, negrp
- IF(all_end(iegrp) >= jbnd) exit
- END DO
-
- IF (iegrp == my_egrp_id+1) THEN
- exxtemp(:,jbnd-jstart+1) = exxbuff(:,jbnd,ikq)
- END IF
-
-#if defined(__MPI_NONBLOCKING)
- CALL MPI_IBCAST(exxtemp(:,jbnd-jstart+1), &
- lda, &
- MPI_DOUBLE_COMPLEX, &
- iegrp-1, &
- inter_egrp_comm, &
- request_exxbuff(jbnd-jstart+1), ierr)
-#elif defined(__MPI)
- CALL mp_bcast(exxtemp(:,jbnd-jstart+1),iegrp-1,inter_egrp_comm)
-#endif
- END DO
-
- DO jbnd=jstart, jend
-#if defined(__MPI_NONBLOCKING)
- CALL MPI_WAIT(request_exxbuff(jbnd-jstart+1), istatus, ierr)
-#endif
- END DO
-
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE exxbuff_comm
- !-----------------------------------------------------------------------
- !
- !-----------------------------------------------------------------------
- SUBROUTINE exxbuff_comm_gamma(exxtemp,ikq,lda,jstart,jend,jlength)
- !-----------------------------------------------------------------------
- USE mp_exx, ONLY : my_egrp_id, inter_egrp_comm, jblock, &
- all_end, negrp
- USE mp, ONLY : mp_bcast
- USE parallel_include
- COMPLEX(DP), intent(inout) :: exxtemp(lda*npol,jlength)
- INTEGER, intent(in) :: ikq, lda, jstart, jend, jlength
- INTEGER :: jbnd, iegrp, ierr, request_exxbuff(jend-jstart+1), ir
-#if defined(__MPI)
- INTEGER :: istatus(MPI_STATUS_SIZE)
-#endif
- REAL(DP), ALLOCATABLE :: work(:,:)
-
- CALL start_clock ('comm_buff')
-
- ALLOCATE ( work(lda*npol,jend-jstart+1) )
- DO jbnd=jstart, jend
- !determine which band group has this value of exxbuff
- DO iegrp=1, negrp
- IF(all_end(iegrp) >= jbnd) exit
- END DO
-
- IF (iegrp == my_egrp_id+1) THEN
-! exxtemp(:,jbnd-jstart+1) = exxbuff(:,jbnd,ikq)
- IF( MOD(jbnd,2) == 1 ) THEN
- work(:,jbnd-jstart+1) = REAL(exxbuff(:,jbnd/2+1,ikq))
- ELSE
- work(:,jbnd-jstart+1) = AIMAG(exxbuff(:,jbnd/2,ikq))
- END IF
- END IF
-
-#if defined(__MPI_NONBLOCKING)
- CALL MPI_IBCAST(work(:,jbnd-jstart+1), &
- lda*npol, &
- MPI_DOUBLE_PRECISION, &
- iegrp-1, &
- inter_egrp_comm, &
- request_exxbuff(jbnd-jstart+1), ierr)
-#elif defined(__MPI)
- CALL mp_bcast(work(:,jbnd-jstart+1),iegrp-1,inter_egrp_comm)
-#endif
- END DO
-
- DO jbnd=jstart, jend
-#if defined(__MPI_NONBLOCKING)
- CALL MPI_WAIT(request_exxbuff(jbnd-jstart+1), istatus, ierr)
-#endif
- END DO
- DO jbnd=jstart, jend, 2
- IF (jbnd < jend) THEN
- ! two real bands can be coupled into a single complex one
- DO ir=1, lda*npol
- exxtemp(ir,1+(jbnd-jstart+1)/2) = CMPLX( work(ir,jbnd-jstart+1),&
- work(ir,jbnd-jstart+2),&
- KIND=dp )
- END DO
- ELSE
- ! case of lone last band
- DO ir=1, lda*npol
- exxtemp(ir,1+(jbnd-jstart+1)/2) = CMPLX( work(ir,jbnd-jstart+1),&
- 0.0_dp, KIND=dp )
- END DO
- ENDIF
- END DO
- DEALLOCATE ( work )
-
- CALL stop_clock ('comm_buff')
- !
- !-----------------------------------------------------------------------
- END SUBROUTINE exxbuff_comm_gamma
- !-----------------------------------------------------------------------
SUBROUTINE aceinit( exex )
!
diff --git a/PW/src/exx_band.f90 b/PW/src/exx_band.f90
new file mode 100644
index 000000000..98c271281
--- /dev/null
+++ b/PW/src/exx_band.f90
@@ -0,0 +1,1558 @@
+! Copyright (C) 2005-2018 Quantum ESPRESSO group
+! This file is distributed under the terms of the
+! GNU General Public License. See the file `License'
+! in the root directory of the present distribution,
+! or http://www.gnu.org/copyleft/gpl.txt .
+!
+!--------------------------------------
+MODULE exx_band
+ !--------------------------------------
+ !
+ ! Variables and subroutines for band parallelization over pairs of bands:
+ ! see T. Barnes, T. Kurth, P. Carrier, N. Wichmann, D. Prendergast,
+ ! P.R.C. Kent, J. Deslippe, Computer Physics Communications 2017,
+ ! dx.doi.org/10.1016/j.cpc.2017.01.008
+ !
+ USE kinds, ONLY : DP
+ USE noncollin_module, ONLY : npol
+ USE io_global, ONLY : ionode, stdout
+ !
+ USE control_flags, ONLY : gamma_only
+ USE fft_types, ONLY : fft_type_descriptor
+ USE stick_base, ONLY : sticks_map
+ !
+ IMPLICIT NONE
+ SAVE
+ !!
+ COMPLEX(DP), ALLOCATABLE :: evc_exx(:,:)
+ COMPLEX(DP), ALLOCATABLE :: psi_exx(:,:), hpsi_exx(:,:)
+ INTEGER :: lda_original, n_original
+ INTEGER :: nwordwfc_exx
+ INTEGER, ALLOCATABLE :: igk_exx(:,:)
+ !
+ ! mapping for the data structure conversion
+ !
+ TYPE comm_packet
+ INTEGER :: size
+ INTEGER, ALLOCATABLE :: indices(:)
+ COMPLEX(DP), ALLOCATABLE :: msg(:,:,:)
+ END TYPE comm_packet
+ TYPE(comm_packet), ALLOCATABLE :: comm_recv(:,:), comm_send(:,:)
+ TYPE(comm_packet), ALLOCATABLE :: comm_recv_reverse(:,:)
+ TYPE(comm_packet), ALLOCATABLE :: comm_send_reverse(:,:,:)
+ INTEGER, ALLOCATABLE :: lda_local(:,:)
+ INTEGER, ALLOCATABLE :: lda_exx(:,:)
+ INTEGER, ALLOCATABLE :: ngk_local(:), ngk_exx(:)
+ INTEGER :: npwx_local = 0
+ INTEGER :: npwx_exx = 0
+ INTEGER :: n_local = 0
+ LOGICAL :: first_data_structure_change = .TRUE.
+
+ INTEGER :: ngm_loc, ngm_g_loc, gstart_loc
+ INTEGER, ALLOCATABLE :: ig_l2g_loc(:)
+ REAL(DP), ALLOCATABLE :: g_loc(:,:), gg_loc(:)
+ INTEGER, ALLOCATABLE :: mill_loc(:,:), nl_loc(:)
+ INTEGER :: ngms_loc, ngms_g_loc
+ INTEGER, ALLOCATABLE :: nls_loc(:)
+ INTEGER, ALLOCATABLE :: nlm_loc(:)
+ INTEGER, ALLOCATABLE :: nlsm_loc(:)
+
+ INTEGER :: ngm_exx, ngm_g_exx, gstart_exx
+ INTEGER, ALLOCATABLE :: ig_l2g_exx(:)
+ REAL(DP), ALLOCATABLE :: g_exx(:,:), gg_exx(:)
+ INTEGER, ALLOCATABLE :: mill_exx(:,:), nl_exx(:)
+ INTEGER :: ngms_exx, ngms_g_exx
+ INTEGER, ALLOCATABLE :: nls_exx(:)
+ INTEGER, ALLOCATABLE :: nlm_exx(:)
+ INTEGER, ALLOCATABLE :: nlsm_exx(:)
+
+ TYPE(fft_type_descriptor) :: dfftp_loc, dffts_loc
+ TYPE(fft_type_descriptor) :: dfftp_exx, dffts_exx
+ TYPE (sticks_map) :: smap_exx ! Stick map descriptor
+
+ CONTAINS
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE transform_evc_to_exx(type)
+ !-----------------------------------------------------------------------
+ !
+ USE wvfct, ONLY : npwx, nbnd
+ USE io_files, ONLY : nwordwfc, iunwfc, iunwfc_exx
+ USE klist, ONLY : nks, ngk, igk_k
+ USE uspp, ONLY : nkb, vkb
+ USE wavefunctions_module, ONLY : evc
+ USE control_flags, ONLY : io_level
+ USE buffers, ONLY : open_buffer, get_buffer, save_buffer
+ USE mp_exx, ONLY : max_ibands, negrp
+ !
+ !
+ IMPLICIT NONE
+ !
+ INTEGER, intent(in) :: type
+ INTEGER :: lda, n, ik
+ LOGICAL :: exst_mem, exst_file
+ !
+ IF (negrp.eq.1) THEN
+ !
+ ! no change in data structure is necessary
+ ! just copy all of the required data
+ !
+ ! get evc_exx
+ !
+ IF(.not.allocated(evc_exx))THEN
+ ALLOCATE(evc_exx(npwx*npol,nbnd))
+ END IF
+ evc_exx = evc
+ !
+ ! get igk_exx
+ !
+ IF(.not.allocated(igk_exx)) THEN
+ ALLOCATE( igk_exx( npwx, nks ) )
+ igk_exx = igk_k
+ END IF
+ !
+ ! get the wfc buffer is used
+ !
+ iunwfc_exx = iunwfc
+ nwordwfc_exx = nwordwfc
+ !
+ RETURN
+ !
+ END IF
+ !
+ ! change the data structure of evc and igk
+ !
+ lda = npwx
+ n = npwx
+ npwx_local = npwx
+ IF( .not.allocated(ngk_local) ) allocate(ngk_local(nks))
+ ngk_local = ngk
+ !
+ IF ( .not.allocated(comm_recv) ) THEN
+ !
+ ! initialize all of the conversion maps and change the data structure
+ !
+ CALL initialize_local_to_exact_map(lda, nbnd)
+ ELSE
+ !
+ ! change the data structure
+ !
+ CALL change_data_structure(.TRUE.)
+ END IF
+ !
+ lda = npwx
+ n = npwx
+ npwx_exx = npwx
+ IF( .not.allocated(ngk_exx) ) allocate(ngk_exx(nks))
+ ngk_exx = ngk
+ !
+ ! get evc_exx
+ !
+ IF(.not.allocated(evc_exx))THEN
+ ALLOCATE(evc_exx(lda*npol,max_ibands+2))
+ !
+ ! ... open files/buffer for wavefunctions (nwordwfc set in openfil)
+ ! ... io_level > 1 : open file, otherwise: open buffer
+ !
+ nwordwfc_exx = size(evc_exx)
+ CALL open_buffer( iunwfc_exx, 'wfc_exx', nwordwfc_exx, io_level, &
+ exst_mem, exst_file )
+ END IF
+ !
+ DO ik=1, nks
+ !
+ ! read evc for the local data structure
+ !
+ IF ( nks > 1 ) CALL get_buffer(evc, nwordwfc, iunwfc, ik)
+ !
+ ! transform evc to the EXX data structure
+ !
+ CALL transform_to_exx(lda, n, nbnd, nbnd, ik, evc, evc_exx, type)
+ !
+ ! save evc to a buffer
+ !
+ IF ( nks > 1 ) CALL save_buffer ( evc_exx, nwordwfc_exx, iunwfc_exx, ik )
+ END DO
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE transform_evc_to_exx
+ !-----------------------------------------------------------------------
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE transform_psi_to_exx(lda, n, m, psi)
+ !-----------------------------------------------------------------------
+ USE wvfct, ONLY : current_k, npwx, nbnd
+ USE mp_exx, ONLY : negrp, nibands, my_egrp_id, max_ibands
+ !
+ !
+ IMPLICIT NONE
+ !
+ Integer, INTENT(in) :: lda
+ INTEGER, INTENT(in) :: m
+ INTEGER, INTENT(inout) :: n
+ COMPLEX(DP), INTENT(in) :: psi(lda*npol,m)
+ !
+ ! change to the EXX data strucutre
+ !
+ npwx_local = npwx
+ n_local = n
+ IF ( .not.allocated(comm_recv) ) THEN
+ !
+ ! initialize all of the conversion maps and change the data structure
+ !
+ CALL initialize_local_to_exact_map(lda, m)
+ ELSE
+ !
+ ! change the data structure
+ !
+ CALL change_data_structure(.TRUE.)
+ END IF
+ npwx_exx = npwx
+ n = ngk_exx(current_k)
+ !
+ ! get igk
+ !
+ CALL update_igk(.TRUE.)
+ !
+ ! get psi_exx
+ !
+ CALL transform_to_exx(lda, n, m, max_ibands, &
+ current_k, psi, psi_exx, 0)
+ !
+ ! zero hpsi_exx
+ !
+ hpsi_exx = 0.d0
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE transform_psi_to_exx
+ !-----------------------------------------------------------------------
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE transform_hpsi_to_local(lda, n, m, hpsi)
+ !-----------------------------------------------------------------------
+ USE mp_exx, ONLY : iexx_istart, iexx_iend, my_egrp_id
+ !
+ IMPLICIT NONE
+ !
+ INTEGER, INTENT(in) :: lda
+ INTEGER, INTENT(in) :: m
+ INTEGER, INTENT(inout) :: n
+ COMPLEX(DP), INTENT(out) :: hpsi(lda_original*npol,m)
+ INTEGER :: m_exx
+ !
+ ! change to the local data structure
+ !
+ CALL change_data_structure(.FALSE.)
+ !
+ ! get igk
+ !
+ CALL update_igk(.FALSE.)
+ n = n_local
+ !
+ ! transform hpsi_exx to the local data structure
+ !
+ m_exx = iexx_iend(my_egrp_id+1) - iexx_istart(my_egrp_id+1) + 1
+ CALL transform_to_local(m,m_exx,hpsi_exx,hpsi)
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE transform_hpsi_to_local
+ !-----------------------------------------------------------------------
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE initialize_local_to_exact_map(lda, m)
+ !-----------------------------------------------------------------------
+ !
+ ! determine the mapping between the local and EXX data structures
+ !
+ USE wvfct, ONLY : npwx, nbnd
+ USE klist, ONLY : nks, igk_k
+ USE mp_exx, ONLY : nproc_egrp, negrp, my_egrp_id, me_egrp, &
+ intra_egrp_comm, inter_egrp_comm, &
+ ibands, nibands, init_index_over_band, &
+ iexx_istart, iexx_iend, max_ibands
+ USE mp_pools, ONLY : nproc_pool, me_pool, intra_pool_comm
+ USE mp, ONLY : mp_sum
+ USE gvect, ONLY : ig_l2g
+ USE uspp, ONLY : nkb
+ !
+ !
+ IMPLICIT NONE
+ !
+ Integer :: lda
+ INTEGER :: n, m
+ INTEGER, ALLOCATABLE :: local_map(:,:), exx_map(:,:)
+ INTEGER, ALLOCATABLE :: l2e_map(:,:), e2l_map(:,:,:)
+ INTEGER, ALLOCATABLE :: psi_source(:), psi_source_exx(:,:)
+ INTEGER :: current_index
+ INTEGER :: i, j, k, ik, im, ig, count, iproc, prev, iegrp
+ INTEGER :: total_lda(nks), prev_lda(nks)
+ INTEGER :: total_lda_exx(nks), prev_lda_exx(nks)
+ INTEGER :: n_local
+ INTEGER :: lda_max_local, lda_max_exx
+ INTEGER :: max_lda_egrp
+ INTEGER :: request_send(nproc_egrp), request_recv(nproc_egrp)
+ INTEGER :: ierr
+ INTEGER :: egrp_base, total_lda_egrp(nks), prev_lda_egrp(nks)
+ INTEGER :: igk_loc(npwx)
+ !
+ ! initialize the pair assignments
+ !
+ CALL init_index_over_band(inter_egrp_comm,nbnd,m)
+ !
+ ! allocate bookeeping arrays
+ !
+ IF ( .not.allocated(comm_recv) ) THEN
+ ALLOCATE(comm_recv(nproc_egrp,nks),comm_send(nproc_egrp,nks))
+ END IF
+ IF ( .not.allocated(lda_local) ) THEN
+ ALLOCATE(lda_local(nproc_pool,nks))
+ ALLOCATE(lda_exx(nproc_egrp,nks))
+ END IF
+ !
+ ! store the original values of lda and n
+ !
+ lda_original = lda
+ n_original = n
+ !
+ ! construct the local map
+ !
+ lda_local = 0
+ DO ik = 1, nks
+ igk_loc = igk_k(:,ik)
+ n = 0
+ DO i = 1, size(igk_loc)
+ IF(igk_loc(i).gt.0)n = n + 1
+ END DO
+ lda_local(me_pool+1,ik) = n
+ CALL mp_sum(lda_local(:,ik),intra_pool_comm)
+ total_lda(ik) = sum(lda_local(:,ik))
+ prev_lda(ik) = sum(lda_local(1:me_pool,ik))
+ END DO
+ ALLOCATE(local_map(maxval(total_lda),nks))
+ local_map = 0
+ DO ik = 1, nks
+ local_map(prev_lda(ik)+1:prev_lda(ik)+lda_local(me_pool+1,ik),ik) = &
+ ig_l2g(igk_k(1:lda_local(me_pool+1,ik),ik))
+ END DO
+ CALL mp_sum(local_map,intra_pool_comm)
+ !
+ !-----------------------------------------!
+ ! Switch to the exx data structure !
+ !-----------------------------------------!
+ !
+ CALL change_data_structure(.TRUE.)
+ !
+ ! construct the exx map
+ !
+ lda_exx = 0
+ DO ik = 1, nks
+ n = 0
+ DO i = 1, size(igk_exx(:,ik))
+ IF(igk_exx(i,ik).gt.0)n = n + 1
+ END DO
+ lda_exx(me_egrp+1,ik) = n
+ CALL mp_sum(lda_exx(:,ik),intra_egrp_comm)
+ total_lda_exx(ik) = sum(lda_exx(:,ik))
+ prev_lda_exx(ik) = sum(lda_exx(1:me_egrp,ik))
+ END DO
+ ALLOCATE(exx_map(maxval(total_lda_exx),nks))
+ exx_map = 0
+ DO ik = 1, nks
+ exx_map(prev_lda_exx(ik)+1:prev_lda_exx(ik)+lda_exx(me_egrp+1,ik),ik) = &
+ ig_l2g(igk_exx(1:lda_exx(me_egrp+1,ik),ik))
+ END DO
+ CALL mp_sum(exx_map,intra_egrp_comm)
+ !
+ ! construct the l2e_map
+ !
+ allocate( l2e_map(maxval(total_lda_exx),nks) )
+ l2e_map = 0
+ DO ik = 1, nks
+ DO ig = 1, lda_exx(me_egrp+1,ik)
+ DO j=1, total_lda(ik)
+ IF( local_map(j,ik).EQ.exx_map(ig+prev_lda_exx(ik),ik) ) exit
+ END DO
+ l2e_map(ig+prev_lda_exx(ik),ik) = j
+ END DO
+ END DO
+ CALL mp_sum(l2e_map,intra_egrp_comm)
+ !
+ ! plan communication for the data structure change
+ !
+ lda_max_local = maxval(lda_local)
+ lda_max_exx = maxval(lda_exx)
+ allocate(psi_source(maxval(total_lda_exx)))
+ !
+ DO ik = 1, nks
+ !
+ ! determine which task each value will come from
+ !
+ psi_source = 0
+ DO ig = 1, lda_exx(me_egrp+1,ik)
+ j = 1
+ DO i = 1, nproc_pool
+ j = j + lda_local(i,ik)
+ IF( j.gt.l2e_map(ig+prev_lda_exx(ik),ik) ) exit
+ END DO
+ psi_source(ig+prev_lda_exx(ik)) = i-1
+ END DO
+ CALL mp_sum(psi_source,intra_egrp_comm)
+ !
+ ! allocate communication packets to receive psi and hpsi
+ !
+ DO iproc=0, nproc_egrp-1
+ !
+ ! determine how many values need to come from iproc
+ !
+ count = 0
+ DO ig=1, lda_exx(me_egrp+1,ik)
+ IF ( MODULO(psi_source(ig+prev_lda_exx(ik)),nproc_egrp)&
+ .eq.iproc ) THEN
+ count = count + 1
+ END IF
+ END DO
+ !
+ ! allocate the communication packet
+ !
+ comm_recv(iproc+1,ik)%size = count
+ IF (count.gt.0) THEN
+ IF (.not.ALLOCATED(comm_recv(iproc+1,ik)%msg)) THEN
+ ALLOCATE(comm_recv(iproc+1,ik)%indices(count))
+ ALLOCATE(comm_recv(iproc+1,ik)%msg(count,npol,max_ibands+2))
+ END IF
+ END IF
+ !
+ ! determine which values need to come from iproc
+ !
+ count = 0
+ DO ig=1, lda_exx(me_egrp+1,ik)
+ IF ( MODULO(psi_source(ig+prev_lda_exx(ik)),nproc_egrp)&
+ .eq.iproc ) THEN
+ count = count + 1
+ comm_recv(iproc+1,ik)%indices(count) = ig
+ END IF
+ END DO
+ !
+ END DO
+ !
+ ! allocate communication packets to send psi and hpsi
+ !
+ prev = 0
+ DO iproc=0, nproc_egrp-1
+ !
+ ! determine how many values need to be sent to iproc
+ !
+ count = 0
+ DO ig=1, lda_exx(iproc+1,ik)
+ IF ( MODULO(psi_source(ig+prev),nproc_egrp).eq.me_egrp ) THEN
+ count = count + 1
+ END IF
+ END DO
+ !
+ ! allocate the communication packet
+ !
+ comm_send(iproc+1,ik)%size = count
+ IF (count.gt.0) THEN
+ IF (.not.ALLOCATED(comm_send(iproc+1,ik)%msg)) THEN
+ ALLOCATE(comm_send(iproc+1,ik)%indices(count))
+ ALLOCATE(comm_send(iproc+1,ik)%msg(count,npol,max_ibands+2))
+ END IF
+ END IF
+ !
+ ! determine which values need to be sent to iproc
+ !
+ count = 0
+ DO ig=1, lda_exx(iproc+1,ik)
+ IF ( MODULO(psi_source(ig+prev),nproc_egrp).eq.me_egrp ) THEN
+ count = count + 1
+ comm_send(iproc+1,ik)%indices(count) = l2e_map(ig+prev,ik)
+ END IF
+ END DO
+ !
+ prev = prev + lda_exx(iproc+1,ik)
+ !
+ END DO
+ !
+ END DO
+ !
+ ! allocate psi_exx and hpsi_exx
+ !
+ IF(allocated(psi_exx))DEALLOCATE(psi_exx)
+ ALLOCATE(psi_exx(npwx*npol, max_ibands ))
+ IF(allocated(hpsi_exx))DEALLOCATE(hpsi_exx)
+ ALLOCATE(hpsi_exx(npwx*npol, max_ibands ))
+ !
+ ! allocate communication arrays for the exx to local transformation
+ !
+ IF ( .not.allocated(comm_recv_reverse) ) THEN
+ ALLOCATE(comm_recv_reverse(nproc_egrp,nks))
+ ALLOCATE(comm_send_reverse(nproc_egrp,negrp,nks))
+ END IF
+ !
+ egrp_base = my_egrp_id*nproc_egrp
+ DO ik = 1, nks
+ total_lda_egrp(ik) = &
+ sum( lda_local(egrp_base+1:(egrp_base+nproc_egrp),ik) )
+ prev_lda_egrp(ik) = &
+ sum( lda_local(egrp_base+1:(egrp_base+me_egrp),ik) )
+ END DO
+ !
+ max_lda_egrp = 0
+ DO j = 1, negrp
+ DO ik = 1, nks
+ max_lda_egrp = max(max_lda_egrp, &
+ sum( lda_local((j-1)*nproc_egrp+1:j*nproc_egrp,ik) ) )
+ END DO
+ END DO
+ !
+ ! determine the e2l_map
+ !
+ allocate( e2l_map(max_lda_egrp,nks,negrp) )
+ e2l_map = 0
+ DO ik = 1, nks
+ DO ig = 1, lda_local(me_pool+1,ik)
+ DO j=1, total_lda_exx(ik)
+ IF( local_map(ig+prev_lda(ik),ik).EQ.exx_map(j,ik) ) exit
+ END DO
+ e2l_map(ig+prev_lda_egrp(ik),ik,my_egrp_id+1) = j
+ END DO
+ END DO
+ CALL mp_sum(e2l_map(:,:,my_egrp_id+1),intra_egrp_comm)
+ CALL mp_sum(e2l_map,inter_egrp_comm)
+ !
+ ! plan communication for the local to EXX data structure transformation
+ !
+ allocate(psi_source_exx( max_lda_egrp, negrp ))
+ DO ik = 1, nks
+ !
+ ! determine where each value is coming from
+ !
+ psi_source_exx = 0
+ DO ig = 1, lda_local(me_pool+1,ik)
+ j = 1
+ DO i = 1, nproc_egrp
+ j = j + lda_exx(i,ik)
+ IF( j.gt.e2l_map(ig+prev_lda_egrp(ik),ik,my_egrp_id+1) ) exit
+ END DO
+ psi_source_exx(ig+prev_lda_egrp(ik),my_egrp_id+1) = i-1
+ END DO
+ CALL mp_sum(psi_source_exx(:,my_egrp_id+1),intra_egrp_comm)
+ CALL mp_sum(psi_source_exx,inter_egrp_comm)
+ !
+ ! allocate communication packets to receive psi and hpsi (reverse)
+ !
+ DO iegrp=my_egrp_id+1, my_egrp_id+1
+ DO iproc=0, nproc_egrp-1
+ !
+ ! determine how many values need to come from iproc
+ !
+ count = 0
+ DO ig=1, lda_local(me_pool+1,ik)
+ IF ( psi_source_exx(ig+prev_lda_egrp(ik),iegrp).eq.iproc ) THEN
+ count = count + 1
+ END IF
+ END DO
+ !
+ ! allocate the communication packet
+ !
+ comm_recv_reverse(iproc+1,ik)%size = count
+ IF (count.gt.0) THEN
+ IF (.not.ALLOCATED(comm_recv_reverse(iproc+1,ik)%msg)) THEN
+ ALLOCATE(comm_recv_reverse(iproc+1,ik)%indices(count))
+ ALLOCATE(comm_recv_reverse(iproc+1,ik)%msg(count,npol,m+2))
+ END IF
+ END IF
+ !
+ ! determine which values need to come from iproc
+ !
+ count = 0
+ DO ig=1, lda_local(me_pool+1,ik)
+ IF ( psi_source_exx(ig+prev_lda_egrp(ik),iegrp).eq.iproc ) THEN
+ count = count + 1
+ comm_recv_reverse(iproc+1,ik)%indices(count) = ig
+ END IF
+ END DO
+
+ END DO
+ END DO
+ !
+ ! allocate communication packets to send psi and hpsi
+ !
+ DO iegrp=1, negrp
+ prev = 0
+ DO iproc=0, nproc_egrp-1
+ !
+ ! determine how many values need to be sent to iproc
+ !
+ count = 0
+ DO ig=1, lda_local(iproc+(iegrp-1)*nproc_egrp+1,ik)
+ IF ( psi_source_exx(ig+prev,iegrp).eq.me_egrp ) THEN
+ count = count + 1
+ END IF
+ END DO
+ !
+ ! allocate the communication packet
+ !
+ comm_send_reverse(iproc+1,iegrp,ik)%size = count
+ IF (count.gt.0) THEN
+ IF (.not.ALLOCATED(comm_send_reverse(iproc+1,iegrp,ik)%msg))THEN
+ ALLOCATE(comm_send_reverse(iproc+1,iegrp,ik)%indices(count))
+ ALLOCATE(comm_send_reverse(iproc+1,iegrp,ik)%msg(count,npol,&
+ iexx_iend(my_egrp_id+1)-iexx_istart(my_egrp_id+1)+3))
+ END IF
+ END IF
+ !
+ ! determine which values need to be sent to iproc
+ !
+ count = 0
+ DO ig=1, lda_local(iproc+(iegrp-1)*nproc_egrp+1,ik)
+ IF ( psi_source_exx(ig+prev,iegrp).eq.me_egrp ) THEN
+ count = count + 1
+ comm_send_reverse(iproc+1,iegrp,ik)%indices(count) = &
+ e2l_map(ig+prev,ik,iegrp)
+ END IF
+ END DO
+ !
+ prev = prev + lda_local( (iegrp-1)*nproc_egrp+iproc+1, ik )
+ !
+ END DO
+ END DO
+ !
+ END DO
+ !
+ ! deallocate arrays
+ !
+ DEALLOCATE( local_map, exx_map )
+ DEALLOCATE( l2e_map, e2l_map )
+ DEALLOCATE( psi_source, psi_source_exx )
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE initialize_local_to_exact_map
+ !-----------------------------------------------------------------------
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE transform_to_exx(lda, n, m, m_out, ik, psi, psi_out, type)
+ !-----------------------------------------------------------------------
+ !
+ ! transform psi into the EXX data structure, and place the result in psi_out
+ !
+ USE wvfct, ONLY : nbnd
+ USE mp, ONLY : mp_sum
+ USE mp_pools, ONLY : nproc_pool, me_pool
+ USE mp_exx, ONLY : intra_egrp_comm, inter_egrp_comm, &
+ nproc_egrp, me_egrp, negrp, my_egrp_id, nibands, ibands, &
+ max_ibands, all_start, all_end
+ USE parallel_include
+ !
+ !
+ IMPLICIT NONE
+ !
+ Integer :: lda
+ INTEGER :: n, m, m_out
+ COMPLEX(DP) :: psi(npwx_local*npol,m)
+ COMPLEX(DP) :: psi_out(npwx_exx*npol,m_out)
+ INTEGER, INTENT(in) :: type
+
+ COMPLEX(DP), ALLOCATABLE :: psi_work(:,:,:,:), psi_gather(:,:)
+ INTEGER :: i, j, im, iproc, ig, ik, iegrp
+ INTEGER :: prev, lda_max_local
+
+ INTEGER :: request_send(nproc_egrp), request_recv(nproc_egrp)
+ INTEGER :: ierr
+ INTEGER :: current_ik
+ !INTEGER, EXTERNAL :: find_current_k
+ INTEGER :: recvcount(negrp)
+ INTEGER :: displs(negrp)
+#if defined(__MPI)
+ INTEGER :: istatus(MPI_STATUS_SIZE)
+#endif
+ INTEGER :: requests(max_ibands+2,negrp)
+ !
+ INTEGER :: ipol, my_lda, lda_offset, count
+ lda_max_local = maxval(lda_local)
+ current_ik = ik
+ !
+ !-------------------------------------------------------!
+ !Communication Part 1
+ !-------------------------------------------------------!
+ !
+ allocate(psi_work(lda_max_local,npol,m,negrp))
+ allocate(psi_gather(lda_max_local*npol,m))
+ DO im=1, m
+ my_lda = lda_local(me_pool+1,current_ik)
+ DO ipol=1, npol
+ lda_offset = lda_max_local*(ipol-1)
+ DO ig=1, my_lda
+ psi_gather(lda_offset+ig,im) = psi(npwx_local*(ipol-1)+ig,im)
+ END DO
+ END DO
+ END DO
+ recvcount = lda_max_local*npol
+ count = lda_max_local*npol
+#if defined (__MPI_NONBLOCKING)
+ IF ( type.eq.0 ) THEN
+ DO iegrp=1, negrp
+ displs(iegrp) = (iegrp-1)*(count*m)
+ END DO
+ DO iegrp=1, negrp
+ DO im=1, nibands(iegrp)
+ IF ( my_egrp_id.eq.(iegrp-1) ) THEN
+ DO j=1, negrp
+ displs(j) = (j-1)*(count*m) + count*(ibands(im,iegrp)-1)
+ END DO
+ END IF
+#if defined(__MPI)
+ CALL MPI_IGATHERV( psi_gather(:, ibands(im,iegrp) ), &
+ count, MPI_DOUBLE_COMPLEX, &
+ psi_work, &
+ recvcount, displs, MPI_DOUBLE_COMPLEX, &
+ iegrp-1, &
+ inter_egrp_comm, requests(im,iegrp), ierr )
+#endif
+ END DO
+ END DO
+
+ ELSE IF(type.eq.1) THEN
+#elif defined(__MPI)
+ CALL MPI_ALLGATHER( psi_gather, &
+ count*m, MPI_DOUBLE_COMPLEX, &
+ psi_work, &
+ count*m, MPI_DOUBLE_COMPLEX, &
+ inter_egrp_comm, ierr )
+#endif
+
+#if defined (__MPI_NONBLOCKING)
+ ELSE IF(type.eq.2) THEN !evc2
+
+ DO iegrp=1, negrp
+ displs(iegrp) = (iegrp-1)*(count*m)
+ END DO
+ DO iegrp=1, negrp
+ DO im=1, all_end(iegrp) - all_start(iegrp) + 1
+ !
+ IF(all_start(iegrp).eq.0) CYCLE
+ !
+ IF ( my_egrp_id.eq.(iegrp-1) ) THEN
+ DO j=1, negrp
+ displs(j) = (j-1)*(count*m) + count*(im+all_start(iegrp)-2)
+ END DO
+ END IF
+#if defined(__MPI)
+ CALL MPI_IGATHERV( psi_gather(:, im+all_start(iegrp)-1 ), &
+ count, MPI_DOUBLE_COMPLEX, &
+ psi_work, &
+ recvcount, displs, MPI_DOUBLE_COMPLEX, &
+ iegrp-1, &
+ inter_egrp_comm, requests(im,iegrp), ierr )
+#endif
+ END DO
+ END DO
+
+ END IF
+ !
+ IF(type.eq.0)THEN
+ DO iegrp=1, negrp
+ DO im=1, nibands(iegrp)
+ CALL MPI_WAIT(requests(im,iegrp), istatus, ierr)
+ END DO
+ END DO
+ ELSEIF(type.eq.2)THEN
+ DO iegrp=1, negrp
+ DO im=1, all_end(iegrp) - all_start(iegrp) + 1
+ IF(all_start(iegrp).eq.0) CYCLE
+ CALL MPI_WAIT(requests(im,iegrp), istatus, ierr)
+ END DO
+ END DO
+ END IF
+#endif
+ !
+ !-------------------------------------------------------!
+ !Communication Part 2
+ !-------------------------------------------------------!
+ !
+ !send communication packets
+ !
+ DO iproc=0, nproc_egrp-1
+ IF ( comm_send(iproc+1,current_ik)%size.gt.0) THEN
+ DO i=1, comm_send(iproc+1,current_ik)%size
+ ig = comm_send(iproc+1,current_ik)%indices(i)
+ !
+ !determine which egrp this corresponds to
+ !
+ prev = 0
+ DO j=1, nproc_pool
+ IF ((prev+lda_local(j,current_ik)).ge.ig) THEN
+ ig = ig - prev
+ exit
+ END IF
+ prev = prev + lda_local(j,current_ik)
+ END DO
+ !
+ ! prepare the message
+ !
+ DO ipol=1, npol
+ IF ( type.eq.0 ) THEN !psi or hpsi
+ DO im=1, nibands(my_egrp_id+1)
+ comm_send(iproc+1,current_ik)%msg(i,ipol,im) = &
+ psi_work(ig,ipol,ibands(im,my_egrp_id+1),1+(j-1)/nproc_egrp)
+ END DO
+ ELSE IF (type.eq.1) THEN !evc
+ DO im=1, m
+ comm_send(iproc+1,current_ik)%msg(i,ipol,im) = &
+ psi_work(ig,ipol,im,1+(j-1)/nproc_egrp)
+ END DO
+ ELSE IF ( type.eq.2 ) THEN !evc2
+ IF(all_start(my_egrp_id+1).gt.0) THEN
+ DO im=1, all_end(my_egrp_id+1) - all_start(my_egrp_id+1) + 1
+ comm_send(iproc+1,current_ik)%msg(i,ipol,im) = &
+ psi_work(ig,ipol,im+all_start(my_egrp_id+1)-1,1+(j-1)/nproc_egrp)
+ END DO
+ END IF
+ END IF
+ END DO
+ !
+ END DO
+ !
+ ! send the message
+ !
+#if defined(__MPI)
+ IF ( type.eq.0 ) THEN !psi or hpsi
+ CALL MPI_ISEND( comm_send(iproc+1,current_ik)%msg, &
+ comm_send(iproc+1,current_ik)%size*npol*nibands(my_egrp_id+1), &
+ MPI_DOUBLE_COMPLEX, &
+ iproc, 100+iproc*nproc_egrp+me_egrp, &
+ intra_egrp_comm, request_send(iproc+1), ierr )
+ ELSE IF (type.eq.1) THEN !evc
+ CALL MPI_ISEND( comm_send(iproc+1,current_ik)%msg, &
+ comm_send(iproc+1,current_ik)%size*npol*m, MPI_DOUBLE_COMPLEX, &
+ iproc, 100+iproc*nproc_egrp+me_egrp, &
+ intra_egrp_comm, request_send(iproc+1), ierr )
+ ELSE IF (type.eq.2) THEN !evc2
+ CALL MPI_ISEND( comm_send(iproc+1,current_ik)%msg, &
+ comm_send(iproc+1,current_ik)%size*npol*(all_end(my_egrp_id+1)-all_start(my_egrp_id+1)+1), &
+ MPI_DOUBLE_COMPLEX, &
+ iproc, 100+iproc*nproc_egrp+me_egrp, &
+ intra_egrp_comm, request_send(iproc+1), ierr )
+ END IF
+#endif
+ !
+ END IF
+ END DO
+ !
+ ! begin receiving the messages
+ !
+ DO iproc=0, nproc_egrp-1
+ IF ( comm_recv(iproc+1,current_ik)%size.gt.0) THEN
+ !
+ ! receive the message
+ !
+#if defined(__MPI)
+ IF (type.eq.0) THEN !psi or hpsi
+ CALL MPI_IRECV( comm_recv(iproc+1,current_ik)%msg, &
+ comm_recv(iproc+1,current_ik)%size*npol*nibands(my_egrp_id+1), &
+ MPI_DOUBLE_COMPLEX, &
+ iproc, 100+me_egrp*nproc_egrp+iproc, &
+ intra_egrp_comm, request_recv(iproc+1), ierr )
+ ELSE IF (type.eq.1) THEN !evc
+ CALL MPI_IRECV( comm_recv(iproc+1,current_ik)%msg, &
+ comm_recv(iproc+1,current_ik)%size*npol*m, MPI_DOUBLE_COMPLEX, &
+ iproc, 100+me_egrp*nproc_egrp+iproc, &
+ intra_egrp_comm, request_recv(iproc+1), ierr )
+ ELSE IF (type.eq.2) THEN !evc2
+ CALL MPI_IRECV( comm_recv(iproc+1,current_ik)%msg, &
+ comm_recv(iproc+1,current_ik)%size*npol*(all_end(my_egrp_id+1)-all_start(my_egrp_id+1)+1), &
+ MPI_DOUBLE_COMPLEX, &
+ iproc, 100+me_egrp*nproc_egrp+iproc, &
+ intra_egrp_comm, request_recv(iproc+1), ierr )
+ END IF
+#endif
+ !
+ END IF
+ END DO
+ !
+ ! assign psi_out
+ !
+ DO iproc=0, nproc_egrp-1
+ IF ( comm_recv(iproc+1,current_ik)%size.gt.0 ) THEN
+ !
+ ! wait for the message to be received
+ !
+#if defined(__MPI)
+ CALL MPI_WAIT(request_recv(iproc+1), istatus, ierr)
+#endif
+ !
+ DO i=1, comm_recv(iproc+1,current_ik)%size
+ ig = comm_recv(iproc+1,current_ik)%indices(i)
+ !
+ ! place the message into the correct elements of psi_out
+ !
+ IF (type.eq.0) THEN !psi or hpsi
+ DO im=1, nibands(my_egrp_id+1)
+ DO ipol=1, npol
+ psi_out(ig+npwx_exx*(ipol-1),im) = &
+ comm_recv(iproc+1,current_ik)%msg(i,ipol,im)
+ END DO
+ END DO
+ ELSE IF (type.eq.1) THEN !evc
+ DO im=1, m
+ DO ipol=1, npol
+ psi_out(ig+npwx_exx*(ipol-1),im) = &
+ comm_recv(iproc+1,current_ik)%msg(i,ipol,im)
+ END DO
+ END DO
+ ELSE IF (type.eq.2) THEN !evc2
+ DO im=1, all_end(my_egrp_id+1) - all_start(my_egrp_id+1) + 1
+ DO ipol=1, npol
+ psi_out(ig+npwx_exx*(ipol-1),im) = comm_recv(iproc+1,current_ik)%msg(i,ipol,im)
+ END DO
+ END DO
+ END IF
+ !
+ END DO
+ !
+ END IF
+ END DO
+ !
+ ! wait for everything to finish sending
+ !
+#if defined(__MPI)
+ DO iproc=0, nproc_egrp-1
+ IF ( comm_send(iproc+1,current_ik)%size.gt.0 ) THEN
+ CALL MPI_WAIT(request_send(iproc+1), istatus, ierr)
+ END IF
+ END DO
+#endif
+ !
+ ! deallocate arrays
+ !
+ DEALLOCATE( psi_work, psi_gather )
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE transform_to_exx
+ !-----------------------------------------------------------------------
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE change_data_structure(is_exx)
+ !-----------------------------------------------------------------------
+ !
+ ! change between the local and EXX data structures
+ ! is_exx = .TRUE. - change to the EXX data structure
+ ! is_exx = .FALSE. - change to the local data strucutre
+ !
+ USE cell_base, ONLY : at, bg, tpiba2
+ USE cellmd, ONLY : lmovecell
+ USE wvfct, ONLY : npwx
+ USE gvect, ONLY : gcutm, ig_l2g, g, gg, ngm, ngm_g, mill, &
+ gstart, gvect_init, deallocate_gvect_exx
+ USE gvecs, ONLY : gcutms, ngms, ngms_g, gvecs_init
+ USE gvecw, ONLY : gkcut, ecutwfc, gcutw
+ USE klist, ONLY : xk, nks, ngk
+ USE mp_bands, ONLY : intra_bgrp_comm, ntask_groups, nyfft
+ USE mp_exx, ONLY : intra_egrp_comm, me_egrp, exx_mode, nproc_egrp, &
+ negrp, root_egrp
+ USE io_global, ONLY : stdout
+ USE fft_base, ONLY : dfftp, dffts, smap, fft_base_info
+ USE fft_types, ONLY : fft_type_init
+ USE recvec_subs, ONLY : ggen, ggens
+ !
+ IMPLICIT NONE
+ !
+ LOGICAL, intent(in) :: is_exx
+ INTEGER, EXTERNAL :: n_plane_waves
+ COMPLEX(DP), ALLOCATABLE :: work_space(:)
+ INTEGER :: ik, i
+ INTEGER :: ngm_, ngs_, ngw_
+ LOGICAL exst
+#if defined (__MPI)
+ LOGICAL :: lpara = .true.
+#else
+ LOGICAL :: lpara = .false.
+#endif
+ !
+ IF (negrp.eq.1) RETURN
+ !
+ IF (first_data_structure_change) THEN
+ allocate( ig_l2g_loc(ngm), g_loc(3,ngm), gg_loc(ngm) )
+ allocate( mill_loc(3,ngm), nl_loc(ngm) )
+ allocate( nls_loc(size(dffts%nl)) )
+ IF ( gamma_only) THEN
+ allocate( nlm_loc(size(dfftp%nlm)) )
+ allocate( nlsm_loc(size(dffts%nlm)) )
+ END IF
+ ig_l2g_loc = ig_l2g
+ g_loc = g
+ gg_loc = gg
+ mill_loc = mill
+ nl_loc = dfftp%nl
+ nls_loc = dffts%nl
+ IF ( gamma_only) THEN
+ nlm_loc = dfftp%nlm
+ nlsm_loc = dffts%nlm
+ END IF
+ ngm_loc = ngm
+ ngm_g_loc = ngm_g
+ gstart_loc = gstart
+ ngms_loc = ngms
+ ngms_g_loc = ngms_g
+ END IF
+ !
+ ! generate the gvectors for the new data structure
+ !
+ IF (is_exx) THEN
+ exx_mode = 1
+ IF(first_data_structure_change)THEN
+ dfftp_loc = dfftp
+ dffts_loc = dffts
+
+ CALL fft_type_init( dffts_exx, smap_exx, "wave", gamma_only, &
+ lpara, intra_egrp_comm, at, bg, gkcut, gcutms/gkcut, &
+ nyfft=ntask_groups )
+ CALL fft_type_init( dfftp_exx, smap_exx, "rho", gamma_only, &
+ lpara, intra_egrp_comm, at, bg, gcutm, nyfft=nyfft )
+ CALL fft_base_info( ionode, stdout )
+ ngs_ = dffts_exx%ngl( dffts_exx%mype + 1 )
+ ngm_ = dfftp_exx%ngl( dfftp_exx%mype + 1 )
+ IF( gamma_only ) THEN
+ ngs_ = (ngs_ + 1)/2
+ ngm_ = (ngm_ + 1)/2
+ END IF
+ dfftp = dfftp_exx
+ dffts = dffts_exx
+ ngm = ngm_
+ ngms = ngs_
+ ELSE
+ dfftp = dfftp_exx
+ dffts = dffts_exx
+ ngm = ngm_exx
+ ngms = ngms_exx
+ END IF
+ call deallocate_gvect_exx()
+ call gvect_init( ngm , intra_egrp_comm )
+ call gvecs_init( ngms , intra_egrp_comm )
+ ELSE
+ exx_mode = 2
+ dfftp = dfftp_loc
+ dffts = dffts_loc
+ ngm = ngm_loc
+ ngms = ngms_loc
+ call deallocate_gvect_exx()
+ call gvect_init( ngm , intra_bgrp_comm )
+ call gvecs_init( ngms , intra_bgrp_comm )
+ exx_mode = 0
+ END IF
+ !
+ IF (first_data_structure_change) THEN
+ CALL ggen ( dfftp, gamma_only, at, bg, gcutm, ngm_g, ngm, &
+ g, gg, mill, ig_l2g, gstart )
+ CALL ggens( dffts, gamma_only, at, g, gg, mill, gcutms, ngms )
+ allocate( ig_l2g_exx(ngm), g_exx(3,ngm), gg_exx(ngm) )
+ allocate( mill_exx(3,ngm), nl_exx(ngm) )
+ allocate( nls_exx(size(dffts%nl)) )
+ allocate( nlm_exx(size(dfftp%nlm) ) )
+ allocate( nlsm_exx(size(dffts%nlm) ) )
+ ig_l2g_exx = ig_l2g
+ g_exx = g
+ gg_exx = gg
+ mill_exx = mill
+ nl_exx = dfftp%nl
+ nls_exx = dffts%nl
+ IF( gamma_only ) THEN
+ nlm_exx = dfftp%nlm
+ nlsm_exx = dffts%nlm
+ END IF
+ ngm_exx = ngm
+ ngm_g_exx = ngm_g
+ gstart_exx = gstart
+ ngms_exx = ngms
+ ngms_g_exx = ngms_g
+ ELSE IF ( is_exx ) THEN
+ ig_l2g = ig_l2g_exx
+ g = g_exx
+ gg = gg_exx
+ mill = mill_exx
+ ! workaround: here dfft?%nl* are unallocated
+ ! some compilers go on and allocate, some others crash
+ IF ( .NOT. ALLOCATED(dfftp%nl) ) ALLOCATE (dfftp%nl(size(nl_exx)))
+ IF ( .NOT. ALLOCATED(dffts%nl) ) ALLOCATE (dffts%nl(size(nls_exx)))
+ IF ( gamma_only .AND. .NOT.ALLOCATED(dfftp%nlm) ) ALLOCATE (dfftp%nlm(size(nlm_exx)))
+ IF ( gamma_only .AND. .NOT.ALLOCATED(dffts%nlm) ) ALLOCATE (dffts%nlm(size(nlsm_exx)))
+ ! end workaround. FIXME: this part of code must disappear ASAP
+ dfftp%nl = nl_exx
+ dffts%nl = nls_exx
+ IF( gamma_only ) THEN
+ dfftp%nlm = nlm_exx
+ dffts%nlm = nlsm_exx
+ ENDIF
+ ngm = ngm_exx
+ ngm_g = ngm_g_exx
+ gstart = gstart_exx
+ ngms = ngms_exx
+ ngms_g = ngms_g_exx
+ ELSE ! not is_exx
+ ig_l2g = ig_l2g_loc
+ g = g_loc
+ gg = gg_loc
+ mill = mill_loc
+ dfftp%nl = nl_loc
+ dffts%nl = nls_loc
+ IF( gamma_only ) THEN
+ dfftp%nlm = nlm_loc
+ dffts%nlm = nlsm_loc
+ END IF
+ ngm = ngm_loc
+ ngm_g = ngm_g_loc
+ gstart = gstart_loc
+ ngms = ngms_loc
+ ngms_g = ngms_g_loc
+ END IF
+ !
+ ! get npwx and ngk
+ !
+ IF ( is_exx.and.npwx_exx.gt.0 ) THEN
+ npwx = npwx_exx
+ ngk = ngk_exx
+ ELSE IF ( .not.is_exx.and.npwx_local.gt.0 ) THEN
+ npwx = npwx_local
+ ngk = ngk_local
+ ELSE
+ npwx = n_plane_waves (gcutw, nks, xk, g, ngm)
+ END IF
+ !
+ ! get igk
+ !
+ IF( first_data_structure_change ) THEN
+ allocate(igk_exx(npwx,nks),work_space(npwx))
+ first_data_structure_change = .FALSE.
+ IF ( nks.eq.1 ) THEN
+ CALL gk_sort( xk, ngm, g, ecutwfc / tpiba2, ngk, igk_exx, work_space )
+ END IF
+ IF ( nks > 1 ) THEN
+ !
+ DO ik = 1, nks
+ CALL gk_sort( xk(1,ik), ngm, g, ecutwfc / tpiba2, ngk(ik), &
+ igk_exx(1,ik), work_space )
+ END DO
+ !
+ END IF
+ DEALLOCATE( work_space )
+ END IF
+ !
+ ! generate ngl and igtongl
+ !
+ CALL gshells( lmovecell )
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE change_data_structure
+ !-----------------------------------------------------------------------
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE update_igk(is_exx)
+ !-----------------------------------------------------------------------
+ USE cell_base, ONLY : tpiba2
+ USE gvect, ONLY : ngm, g
+ USE gvecw, ONLY : ecutwfc
+ USE wvfct, ONLY : npwx, current_k
+ USE klist, ONLY : xk, igk_k
+ USE mp_exx, ONLY : negrp
+ !
+ !
+ IMPLICIT NONE
+ !
+ LOGICAL, intent(in) :: is_exx
+ COMPLEX(DP), ALLOCATABLE :: work_space(:)
+ INTEGER :: comm
+ INTEGER :: npw, ik, i
+ LOGICAL exst
+ !
+ IF (negrp.eq.1) RETURN
+ !
+ ! get igk
+ !
+ allocate(work_space(npwx))
+ ik = current_k
+ IF(is_exx) THEN
+ CALL gk_sort( xk(1,ik), ngm, g, ecutwfc / tpiba2, npw, igk_exx(1,ik), &
+ work_space )
+ ELSE
+ CALL gk_sort( xk(1,ik), ngm, g, ecutwfc / tpiba2, npw, igk_k(1,ik), &
+ work_space )
+ END IF
+ !
+ DEALLOCATE( work_space )
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE update_igk
+ !-----------------------------------------------------------------------
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE result_sum (n, m, data)
+ !-----------------------------------------------------------------------
+ USE parallel_include
+ USE mp_exx, ONLY : iexx_start, iexx_end, inter_egrp_comm, &
+ intra_egrp_comm, my_egrp_id, negrp, &
+ max_pairs, egrp_pairs, max_contributors, &
+ contributed_bands, all_end, &
+ iexx_istart, iexx_iend, band_roots
+ USE mp, ONLY : mp_sum, mp_bcast
+
+ INTEGER, INTENT(in) :: n, m
+ COMPLEX(DP), INTENT(inout) :: data(n,m)
+#if defined(__MPI)
+ INTEGER :: istatus(MPI_STATUS_SIZE)
+ COMPLEX(DP), ALLOCATABLE :: recvbuf(:,:)
+ COMPLEX(DP) :: data_sum(n,m), test(negrp)
+ INTEGER :: im, iegrp, ibuf, i, j, nsending(m)
+ INTEGER :: ncontributing(m)
+ INTEGER :: contrib_this(negrp,m), displs(negrp,m)
+
+ INTEGER sendcount, sendtype, ierr, root, request(m)
+ INTEGER sendc(negrp), sendd(negrp)
+#endif
+ !
+ IF (negrp.eq.1) RETURN
+ !
+#if defined(__MPI)
+ ! gather data onto the correct nodes
+ !
+ ALLOCATE( recvbuf( n*max_contributors, max(1,iexx_end-iexx_start+1) ) )
+ displs = 0
+ ibuf = 0
+ nsending = 0
+ contrib_this = 0
+ !
+ DO im=1, m
+ !
+ IF(contributed_bands(im,my_egrp_id+1)) THEN
+ sendcount = n
+ ELSE
+ sendcount = 0
+ END IF
+ !
+ root = band_roots(im)
+ !
+ IF(my_egrp_id.eq.root) THEN
+ !
+ ! determine the number of sending processors
+ !
+ ibuf = ibuf + 1
+ ncontributing(im) = 0
+ DO iegrp=1, negrp
+ IF(contributed_bands(im,iegrp)) THEN
+ ncontributing(im) = ncontributing(im) + 1
+ contrib_this(iegrp,im) = n
+ IF(iegrp.lt.negrp) displs(iegrp+1,im) = displs(iegrp,im) + n
+ nsending(im) = nsending(im) + 1
+ ELSE
+ contrib_this(iegrp,im) = 0
+ IF(iegrp.lt.negrp) displs(iegrp+1,im) = displs(iegrp,im)
+ END IF
+ END DO
+ END IF
+ !
+#if defined(__MPI_NONBLOCKING)
+ CALL MPI_IGATHERV(data(:,im), sendcount, MPI_DOUBLE_COMPLEX, &
+ recvbuf(:,max(1,ibuf)), contrib_this(:,im), &
+ displs(:,im), MPI_DOUBLE_COMPLEX, &
+ root, inter_egrp_comm, request(im), ierr)
+#else
+ CALL MPI_GATHERV(data(:,im), sendcount, MPI_DOUBLE_COMPLEX, &
+ recvbuf(:,max(1,ibuf)), contrib_this(:,im), &
+ displs(:,im), MPI_DOUBLE_COMPLEX, &
+ root, inter_egrp_comm, ierr)
+#endif
+ !
+ END DO
+ !
+#if defined(__MPI_NONBLOCKING)
+ DO im=1, m
+ CALL MPI_WAIT(request(im), istatus, ierr)
+ END DO
+#endif
+ !
+ ! perform the sum
+ !
+ DO im=iexx_istart(my_egrp_id+1), iexx_iend(my_egrp_id+1)
+ IF(im.eq.0)exit
+ data(:,im) = 0._dp
+ ibuf = im - iexx_istart(my_egrp_id+1) + 1
+ DO j=1, nsending(im)
+ DO i=1, n
+ data(i,im) = data(i,im) + recvbuf(i+n*(j-1),ibuf)
+ END DO
+ END DO
+ END DO
+#endif
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE result_sum
+ !-----------------------------------------------------------------------
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE transform_to_local(m, m_exx, psi, psi_out)
+ !-----------------------------------------------------------------------
+ USE mp, ONLY : mp_sum
+ USE mp_pools, ONLY : nproc_pool, me_pool, intra_pool_comm
+ USE mp_exx, ONLY : intra_egrp_comm, inter_egrp_comm, &
+ nproc_egrp, me_egrp, negrp, my_egrp_id, iexx_istart, iexx_iend
+ USE parallel_include
+ USE wvfct, ONLY : current_k
+ !
+ !
+ IMPLICIT NONE
+ !
+ INTEGER :: m, m_exx
+ COMPLEX(DP) :: psi(npwx_exx*npol,m_exx)
+ COMPLEX(DP) :: psi_out(npwx_local*npol,m)
+ !
+ INTEGER :: i, j, im, iproc, ig, ik, current_ik, iegrp
+ INTEGER :: prev, lda_max_local, prev_lda_exx
+ INTEGER :: my_bands, recv_bands, tag
+ !
+ INTEGER :: request_send(nproc_egrp,negrp), request_recv(nproc_egrp,negrp)
+ INTEGER :: ierr
+ INTEGER :: ipol
+#if defined(__MPI)
+ INTEGER :: istatus(MPI_STATUS_SIZE)
+#endif
+ !INTEGER, EXTERNAL :: find_current_k
+ !
+ current_ik = current_k
+ prev_lda_exx = sum( lda_exx(1:me_egrp,current_ik) )
+ !
+ my_bands = iexx_iend(my_egrp_id+1) - iexx_istart(my_egrp_id+1) + 1
+ !
+ ! send communication packets
+ !
+ IF ( iexx_istart(my_egrp_id+1).gt.0 ) THEN
+ DO iegrp=1, negrp
+ DO iproc=0, nproc_egrp-1
+ IF ( comm_send_reverse(iproc+1,iegrp,current_ik)%size.gt.0) THEN
+ DO i=1, comm_send_reverse(iproc+1,iegrp,current_ik)%size
+ ig = comm_send_reverse(iproc+1,iegrp,current_ik)%indices(i)
+ ig = ig - prev_lda_exx
+ !
+ DO im=1, my_bands
+ DO ipol=1, npol
+ comm_send_reverse(iproc+1,iegrp,current_ik)%msg(i,ipol,im) = &
+ psi(ig+npwx_exx*(ipol-1),im)
+ END DO
+ END DO
+ !
+ END DO
+ !
+ ! send the message
+ !
+ tag = 0
+#if defined(__MPI)
+ CALL MPI_ISEND( comm_send_reverse(iproc+1,iegrp,current_ik)%msg, &
+ comm_send_reverse(iproc+1,iegrp,current_ik)%size*npol*my_bands, &
+ MPI_DOUBLE_COMPLEX, &
+ iproc+(iegrp-1)*nproc_egrp, &
+ tag, &
+ intra_pool_comm, request_send(iproc+1,iegrp), ierr )
+#endif
+ !
+ END IF
+ END DO
+ END DO
+ END IF
+ !
+ ! begin receiving the communication packets
+ !
+ DO iegrp=1, negrp
+ !
+ IF ( iexx_istart(iegrp).le.0 ) CYCLE
+ !
+ recv_bands = iexx_iend(iegrp) - iexx_istart(iegrp) + 1
+ !
+ DO iproc=0, nproc_egrp-1
+ IF ( comm_recv_reverse(iproc+1,current_ik)%size.gt.0) THEN
+ !
+ !receive the message
+ !
+ tag = 0
+#if defined(__MPI)
+ CALL MPI_IRECV( comm_recv_reverse(iproc+1,current_ik)%msg(:,:,iexx_istart(iegrp)), &
+ comm_recv_reverse(iproc+1,current_ik)%size*npol*recv_bands, &
+ MPI_DOUBLE_COMPLEX, &
+ iproc+(iegrp-1)*nproc_egrp, &
+ tag, &
+ intra_pool_comm, request_recv(iproc+1,iegrp), ierr )
+#endif
+ !
+ END IF
+ END DO
+ END DO
+ !
+ ! assign psi
+ !
+ DO iproc=0, nproc_egrp-1
+ IF ( comm_recv_reverse(iproc+1,current_ik)%size.gt.0 ) THEN
+ !
+#if defined(__MPI)
+ DO iegrp=1, negrp
+ IF ( iexx_istart(iegrp).le.0 ) CYCLE
+ CALL MPI_WAIT(request_recv(iproc+1,iegrp), istatus, ierr)
+ END DO
+#endif
+ !
+ DO i=1, comm_recv_reverse(iproc+1,current_ik)%size
+ ig = comm_recv_reverse(iproc+1,current_ik)%indices(i)
+ !
+ ! set psi_out
+ !
+ DO im=1, m
+ DO ipol=1, npol
+ psi_out(ig+npwx_local*(ipol-1),im) = psi_out(ig+npwx_local*(ipol-1),im) + &
+ comm_recv_reverse(iproc+1,current_ik)%msg(i,ipol,im)
+ END DO
+ END DO
+ !
+ END DO
+ !
+ END IF
+ END DO
+ !
+ ! wait for everything to finish sending
+ !
+#if defined(__MPI)
+ IF ( iexx_istart(my_egrp_id+1).gt.0 ) THEN
+ DO iproc=0, nproc_egrp-1
+ DO iegrp=1, negrp
+ IF ( comm_send_reverse(iproc+1,iegrp,current_ik)%size.gt.0 ) THEN
+ CALL MPI_WAIT(request_send(iproc+1,iegrp), istatus, ierr)
+ END IF
+ END DO
+ END DO
+ END IF
+#endif
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE transform_to_local
+ !-----------------------------------------------------------------------
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE exxbuff_comm(exxbuff,exxtemp,ikq,lda,jstart,jend)
+ !-----------------------------------------------------------------------
+ USE mp_exx, ONLY : my_egrp_id, inter_egrp_comm, jblock, &
+ all_end, negrp
+ USE mp, ONLY : mp_bcast
+ USE parallel_include
+ COMPLEX(DP), intent(in) :: exxbuff(:,:,:)
+ COMPLEX(DP), intent(inout) :: exxtemp(lda,jend-jstart+1)
+ INTEGER, intent(in) :: ikq, lda, jstart, jend
+ INTEGER :: jbnd, iegrp, ierr, request_exxbuff(jend-jstart+1)
+#if defined(__MPI)
+ INTEGER :: istatus(MPI_STATUS_SIZE)
+#endif
+
+ DO jbnd=jstart, jend
+ !determine which band group has this value of exxbuff
+ DO iegrp=1, negrp
+ IF(all_end(iegrp) >= jbnd) exit
+ END DO
+
+ IF (iegrp == my_egrp_id+1) THEN
+ exxtemp(:,jbnd-jstart+1) = exxbuff(:,jbnd,ikq)
+ END IF
+
+#if defined(__MPI_NONBLOCKING)
+ CALL MPI_IBCAST(exxtemp(:,jbnd-jstart+1), &
+ lda, &
+ MPI_DOUBLE_COMPLEX, &
+ iegrp-1, &
+ inter_egrp_comm, &
+ request_exxbuff(jbnd-jstart+1), ierr)
+#elif defined(__MPI)
+ CALL mp_bcast(exxtemp(:,jbnd-jstart+1),iegrp-1,inter_egrp_comm)
+#endif
+ END DO
+
+ DO jbnd=jstart, jend
+#if defined(__MPI_NONBLOCKING)
+ CALL MPI_WAIT(request_exxbuff(jbnd-jstart+1), istatus, ierr)
+#endif
+ END DO
+
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE exxbuff_comm
+ !-----------------------------------------------------------------------
+ !
+ !-----------------------------------------------------------------------
+ SUBROUTINE exxbuff_comm_gamma(exxbuff,exxtemp,ikq,lda,jstart,jend,jlength)
+ !-----------------------------------------------------------------------
+ USE mp_exx, ONLY : my_egrp_id, inter_egrp_comm, jblock, &
+ all_end, negrp
+ USE mp, ONLY : mp_bcast
+ USE parallel_include
+ COMPLEX(DP), intent(in) :: exxbuff(:,:,:)
+ COMPLEX(DP), intent(inout) :: exxtemp(lda*npol,jlength)
+ INTEGER, intent(in) :: ikq, lda, jstart, jend, jlength
+ INTEGER :: jbnd, iegrp, ierr, request_exxbuff(jend-jstart+1), ir
+#if defined(__MPI)
+ INTEGER :: istatus(MPI_STATUS_SIZE)
+#endif
+ REAL(DP), ALLOCATABLE :: work(:,:)
+
+ CALL start_clock ('comm_buff')
+
+ ALLOCATE ( work(lda*npol,jend-jstart+1) )
+ DO jbnd=jstart, jend
+ !determine which band group has this value of exxbuff
+ DO iegrp=1, negrp
+ IF(all_end(iegrp) >= jbnd) exit
+ END DO
+
+ IF (iegrp == my_egrp_id+1) THEN
+! exxtemp(:,jbnd-jstart+1) = exxbuff(:,jbnd,ikq)
+ IF( MOD(jbnd,2) == 1 ) THEN
+ work(:,jbnd-jstart+1) = REAL(exxbuff(:,jbnd/2+1,ikq))
+ ELSE
+ work(:,jbnd-jstart+1) = AIMAG(exxbuff(:,jbnd/2,ikq))
+ END IF
+ END IF
+
+#if defined(__MPI_NONBLOCKING)
+ CALL MPI_IBCAST(work(:,jbnd-jstart+1), &
+ lda*npol, &
+ MPI_DOUBLE_PRECISION, &
+ iegrp-1, &
+ inter_egrp_comm, &
+ request_exxbuff(jbnd-jstart+1), ierr)
+#elif defined(__MPI)
+ CALL mp_bcast(work(:,jbnd-jstart+1),iegrp-1,inter_egrp_comm)
+#endif
+ END DO
+
+ DO jbnd=jstart, jend
+#if defined(__MPI_NONBLOCKING)
+ CALL MPI_WAIT(request_exxbuff(jbnd-jstart+1), istatus, ierr)
+#endif
+ END DO
+ DO jbnd=jstart, jend, 2
+ IF (jbnd < jend) THEN
+ ! two real bands can be coupled into a single complex one
+ DO ir=1, lda*npol
+ exxtemp(ir,1+(jbnd-jstart+1)/2) = CMPLX( work(ir,jbnd-jstart+1),&
+ work(ir,jbnd-jstart+2),&
+ KIND=dp )
+ END DO
+ ELSE
+ ! case of lone last band
+ DO ir=1, lda*npol
+ exxtemp(ir,1+(jbnd-jstart+1)/2) = CMPLX( work(ir,jbnd-jstart+1),&
+ 0.0_dp, KIND=dp )
+ END DO
+ ENDIF
+ END DO
+ DEALLOCATE ( work )
+
+ CALL stop_clock ('comm_buff')
+ !
+ !-----------------------------------------------------------------------
+ END SUBROUTINE exxbuff_comm_gamma
+ !-----------------------------------------------------------------------
+
+END MODULE exx_band
+!-----------------------------------------------------------------------
diff --git a/PW/src/make.depend b/PW/src/make.depend
index e1efc68bc..f8c6a50fe 100644
--- a/PW/src/make.depend
+++ b/PW/src/make.depend
@@ -493,14 +493,35 @@ exx.o : ../../Modules/splinelib.o
exx.o : ../../Modules/uspp.o
exx.o : ../../Modules/wavefunctions.o
exx.o : ../../UtilXlib/mp.o
-exx.o : ../../UtilXlib/parallel_include.o
exx.o : buffers.o
+exx.o : exx_band.o
exx.o : exx_base.o
exx.o : paw_exx.o
exx.o : pwcom.o
exx.o : realus.o
exx.o : symm_base.o
exx.o : us_exx.o
+exx_band.o : ../../FFTXlib/fft_types.o
+exx_band.o : ../../FFTXlib/stick_base.o
+exx_band.o : ../../Modules/cell_base.o
+exx_band.o : ../../Modules/control_flags.o
+exx_band.o : ../../Modules/fft_base.o
+exx_band.o : ../../Modules/gvecw.o
+exx_band.o : ../../Modules/io_files.o
+exx_band.o : ../../Modules/io_global.o
+exx_band.o : ../../Modules/kind.o
+exx_band.o : ../../Modules/mp_bands.o
+exx_band.o : ../../Modules/mp_exx.o
+exx_band.o : ../../Modules/mp_pools.o
+exx_band.o : ../../Modules/noncol.o
+exx_band.o : ../../Modules/recvec.o
+exx_band.o : ../../Modules/recvec_subs.o
+exx_band.o : ../../Modules/uspp.o
+exx_band.o : ../../Modules/wavefunctions.o
+exx_band.o : ../../UtilXlib/mp.o
+exx_band.o : ../../UtilXlib/parallel_include.o
+exx_band.o : buffers.o
+exx_band.o : pwcom.o
exx_base.o : ../../FFTXlib/fft_types.o
exx_base.o : ../../FFTXlib/stick_base.o
exx_base.o : ../../Modules/cell_base.o
@@ -964,6 +985,7 @@ input.o : dynamics_module.o
input.o : esm.o
input.o : extfield.o
input.o : exx.o
+input.o : exx_base.o
input.o : ldaU.o
input.o : loc_scdm.o
input.o : martyna_tuckerman.o
@@ -1018,6 +1040,7 @@ loc_scdm.o : ../../Modules/mp_bands.o
loc_scdm.o : ../../Modules/noncol.o
loc_scdm.o : ../../UtilXlib/mp.o
loc_scdm.o : exx.o
+loc_scdm.o : exx_base.o
loc_scdm.o : pwcom.o
loc_scdm.o : scf_mod.o
make_pointlists.o : ../../Modules/cell_base.o
@@ -1082,6 +1105,7 @@ memory_report.o : ../../Modules/uspp.o
memory_report.o : ../../Modules/wannier_new.o
memory_report.o : atomic_wfc_mod.o
memory_report.o : exx.o
+memory_report.o : exx_base.o
memory_report.o : ldaU.o
memory_report.o : pwcom.o
mix_rho.o : ../../FFTXlib/fft_interfaces.o
@@ -1576,6 +1600,7 @@ pw_restart_new.o : bp_mod.o
pw_restart_new.o : buffers.o
pw_restart_new.o : extfield.o
pw_restart_new.o : exx.o
+pw_restart_new.o : exx_base.o
pw_restart_new.o : io_rho_xml.o
pw_restart_new.o : ldaU.o
pw_restart_new.o : pwcom.o
@@ -1872,6 +1897,7 @@ setup.o : atomic_wfc_mod.o
setup.o : bp_mod.o
setup.o : extfield.o
setup.o : exx.o
+setup.o : exx_base.o
setup.o : ldaU.o
setup.o : pw_restart.o
setup.o : pw_restart_new.o
diff --git a/TDDFPT/src/make.depend b/TDDFPT/src/make.depend
index b9ee8a5b3..345c9a531 100644
--- a/TDDFPT/src/make.depend
+++ b/TDDFPT/src/make.depend
@@ -293,6 +293,7 @@ lr_exx_kernel.o : ../../Modules/mp_global.o
lr_exx_kernel.o : ../../Modules/recvec.o
lr_exx_kernel.o : ../../Modules/wavefunctions.o
lr_exx_kernel.o : ../../PW/src/exx.o
+lr_exx_kernel.o : ../../PW/src/exx_base.o
lr_exx_kernel.o : ../../PW/src/pwcom.o
lr_exx_kernel.o : ../../PW/src/realus.o
lr_exx_kernel.o : ../../PW/src/symm_base.o
@@ -401,7 +402,6 @@ lr_read_wf.o : ../../Modules/recvec.o
lr_read_wf.o : ../../Modules/uspp.o
lr_read_wf.o : ../../Modules/wavefunctions.o
lr_read_wf.o : ../../PW/src/buffers.o
-lr_read_wf.o : ../../PW/src/exx.o
lr_read_wf.o : ../../PW/src/pwcom.o
lr_read_wf.o : ../../PW/src/realus.o
lr_read_wf.o : lr_exx_kernel.o