2013-01-28 17:21:12 +08:00
|
|
|
!
|
|
|
|
! Copyright (C) 2013 Quantum ESPRESSO group
|
|
|
|
! This file is distributed under the terms of the
|
|
|
|
! GNU General Public License. See the file `License'
|
|
|
|
! in the root directory of the present distribution,
|
|
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
|
|
!
|
|
|
|
!----------------------------------------------------------------------------
|
|
|
|
MODULE mp_bands
|
|
|
|
!----------------------------------------------------------------------------
|
|
|
|
!
|
2014-01-11 17:14:24 +08:00
|
|
|
USE mp, ONLY : mp_barrier, mp_bcast, mp_size, mp_rank, mp_comm_split
|
2013-01-28 17:21:12 +08:00
|
|
|
USE parallel_include
|
|
|
|
!
|
|
|
|
IMPLICIT NONE
|
|
|
|
SAVE
|
|
|
|
!
|
|
|
|
! ... Band groups (processors within a pool of bands)
|
|
|
|
! ... Subdivision of pool group, used for parallelization over bands
|
|
|
|
!
|
|
|
|
INTEGER :: nbgrp = 1 ! number of band groups
|
|
|
|
INTEGER :: nproc_bgrp = 1 ! number of processors within a band group
|
|
|
|
INTEGER :: me_bgrp = 0 ! index of the processor within a band group
|
|
|
|
INTEGER :: root_bgrp = 0 ! index of the root processor within a band group
|
|
|
|
INTEGER :: my_bgrp_id = 0 ! index of my band group
|
|
|
|
INTEGER :: inter_bgrp_comm = 0 ! inter band group communicator
|
|
|
|
INTEGER :: intra_bgrp_comm = 0 ! intra band group communicator
|
2016-01-23 19:53:56 +08:00
|
|
|
! Next variable is .T. if band parallelization is performed inside H\psi
|
|
|
|
! and S\psi, .F. otherwise (band parallelization can be performed outside
|
|
|
|
! H\psi and S\psi, though)
|
|
|
|
LOGICAL :: use_bgrp_in_hpsi = .FALSE.
|
2013-01-28 17:21:12 +08:00
|
|
|
!
|
2013-11-04 03:16:37 +08:00
|
|
|
! ... "task" groups (for band parallelization of FFT)
|
|
|
|
!
|
|
|
|
INTEGER :: ntask_groups = 1 ! number of proc. in an orbital "task group"
|
|
|
|
!
|
2013-01-28 17:21:12 +08:00
|
|
|
CONTAINS
|
|
|
|
!
|
|
|
|
!----------------------------------------------------------------------------
|
2013-11-04 03:16:37 +08:00
|
|
|
SUBROUTINE mp_start_bands( nband_, ntg_, parent_comm )
|
2013-01-28 17:21:12 +08:00
|
|
|
!---------------------------------------------------------------------------
|
|
|
|
!
|
2014-01-07 21:54:17 +08:00
|
|
|
! ... Divide processors (of the "parent_comm" group) into nband_ pools
|
2013-01-28 17:21:12 +08:00
|
|
|
! ... Requires: nband_, read from command line
|
|
|
|
! ... parent_comm, typically processors of a k-point pool
|
|
|
|
! ... (intra_pool_comm)
|
|
|
|
!
|
|
|
|
IMPLICIT NONE
|
|
|
|
!
|
2013-01-29 18:31:17 +08:00
|
|
|
INTEGER, INTENT(IN) :: nband_, parent_comm
|
2013-11-04 03:16:37 +08:00
|
|
|
INTEGER, INTENT(IN), OPTIONAL :: ntg_
|
2013-01-28 17:21:12 +08:00
|
|
|
!
|
2014-01-11 17:14:24 +08:00
|
|
|
INTEGER :: parent_nproc = 1, parent_mype = 0
|
2013-01-28 17:21:12 +08:00
|
|
|
!
|
|
|
|
#if defined (__MPI)
|
|
|
|
!
|
|
|
|
parent_nproc = mp_size( parent_comm )
|
|
|
|
parent_mype = mp_rank( parent_comm )
|
|
|
|
!
|
|
|
|
! ... nband_ must have been previously read from command line argument
|
|
|
|
! ... by a call to routine get_command_line
|
|
|
|
!
|
|
|
|
nbgrp = nband_
|
|
|
|
!
|
2014-01-07 21:54:17 +08:00
|
|
|
IF ( nbgrp < 1 .OR. nbgrp > parent_nproc ) CALL errore( 'mp_start_bands',&
|
2013-01-28 17:21:12 +08:00
|
|
|
'invalid number of band groups, out of range', 1 )
|
2014-01-07 21:54:17 +08:00
|
|
|
IF ( MOD( parent_nproc, nbgrp ) /= 0 ) CALL errore( 'mp_start_bands', &
|
2013-01-28 17:21:12 +08:00
|
|
|
'n. of band groups must be divisor of parent_nproc', 1 )
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
!
|
2016-01-23 19:53:56 +08:00
|
|
|
! set logical flag so that band parallelization in H\psi is allowed
|
|
|
|
! (can be disabled before calling H\psi if not desired)
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
!
|
2016-01-23 19:53:56 +08:00
|
|
|
use_bgrp_in_hpsi = ( nbgrp > 1 )
|
2013-01-28 17:21:12 +08:00
|
|
|
!
|
|
|
|
! ... Set number of processors per band group
|
|
|
|
!
|
|
|
|
nproc_bgrp = parent_nproc / nbgrp
|
|
|
|
!
|
|
|
|
! ... set index of band group for this processor ( 0 : nbgrp - 1 )
|
|
|
|
!
|
|
|
|
my_bgrp_id = parent_mype / nproc_bgrp
|
|
|
|
!
|
|
|
|
! ... set index of processor within the image ( 0 : nproc_image - 1 )
|
|
|
|
!
|
|
|
|
me_bgrp = MOD( parent_mype, nproc_bgrp )
|
|
|
|
!
|
|
|
|
CALL mp_barrier( parent_comm )
|
|
|
|
!
|
|
|
|
! ... the intra_bgrp_comm communicator is created
|
|
|
|
!
|
2014-01-11 17:14:24 +08:00
|
|
|
CALL mp_comm_split( parent_comm, my_bgrp_id, parent_mype, intra_bgrp_comm )
|
2013-01-28 17:21:12 +08:00
|
|
|
!
|
|
|
|
CALL mp_barrier( parent_comm )
|
|
|
|
!
|
|
|
|
! ... the inter_bgrp_comm communicator is created
|
|
|
|
!
|
2014-01-11 17:14:24 +08:00
|
|
|
CALL mp_comm_split( parent_comm, me_bgrp, parent_mype, inter_bgrp_comm )
|
2013-01-28 17:21:12 +08:00
|
|
|
!
|
2013-11-04 03:16:37 +08:00
|
|
|
IF ( PRESENT(ntg_) ) THEN
|
|
|
|
ntask_groups = ntg_
|
|
|
|
END IF
|
|
|
|
!
|
2013-01-28 17:21:12 +08:00
|
|
|
#endif
|
|
|
|
RETURN
|
|
|
|
!
|
|
|
|
END SUBROUTINE mp_start_bands
|
|
|
|
!
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
SUBROUTINE set_bgrp_indices(nbnd, ib_start, ib_end)
|
2013-01-28 17:21:12 +08:00
|
|
|
!
|
|
|
|
IMPLICIT NONE
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
INTEGER, INTENT(IN) :: nbnd
|
|
|
|
INTEGER, INTENT(OUT) :: ib_start, ib_end
|
2013-01-28 17:21:12 +08:00
|
|
|
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
INTEGER :: rest, nbnd_per_bgrp
|
2013-01-28 17:21:12 +08:00
|
|
|
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
rest = mod ( nbnd, nbgrp )
|
|
|
|
nbnd_per_bgrp = int( nbnd / nbgrp )
|
|
|
|
|
|
|
|
IF (rest > my_bgrp_id) THEN
|
|
|
|
ib_start = my_bgrp_id * (nbnd_per_bgrp+1) + 1
|
|
|
|
ib_end = (my_bgrp_id+1) * (nbnd_per_bgrp+1)
|
|
|
|
ELSE
|
|
|
|
ib_start = my_bgrp_id * nbnd_per_bgrp + rest + 1
|
|
|
|
ib_end = (my_bgrp_id+1) * nbnd_per_bgrp + rest
|
|
|
|
ENDIF
|
2013-01-28 17:21:12 +08:00
|
|
|
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
END SUBROUTINE set_bgrp_indices
|
2013-01-28 17:21:12 +08:00
|
|
|
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
INTEGER FUNCTION bgrp_start(nbnd)
|
|
|
|
!
|
|
|
|
IMPLICIT NONE
|
|
|
|
INTEGER, INTENT(IN) :: nbnd
|
|
|
|
|
|
|
|
INTEGER :: rest, nbnd_per_bgrp
|
|
|
|
|
|
|
|
rest = mod ( nbnd, nbgrp )
|
|
|
|
nbnd_per_bgrp = int( nbnd / nbgrp )
|
|
|
|
|
|
|
|
IF (rest > my_bgrp_id) THEN
|
|
|
|
bgrp_start = my_bgrp_id * (nbnd_per_bgrp+1) + 1
|
2013-01-28 17:21:12 +08:00
|
|
|
ELSE
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
bgrp_start = my_bgrp_id * nbnd_per_bgrp + rest + 1
|
2013-01-28 17:21:12 +08:00
|
|
|
ENDIF
|
|
|
|
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
END FUNCTION bgrp_start
|
|
|
|
|
|
|
|
INTEGER FUNCTION bgrp_end(nbnd)
|
|
|
|
!
|
|
|
|
IMPLICIT NONE
|
|
|
|
INTEGER, INTENT(IN) :: nbnd
|
|
|
|
|
|
|
|
INTEGER :: rest, nbnd_per_bgrp
|
|
|
|
|
|
|
|
rest = mod ( nbnd, nbgrp )
|
|
|
|
nbnd_per_bgrp = int( nbnd / nbgrp )
|
|
|
|
|
|
|
|
IF (rest > my_bgrp_id) THEN
|
|
|
|
bgrp_end = (my_bgrp_id+1) * (nbnd_per_bgrp+1)
|
|
|
|
ELSE
|
|
|
|
bgrp_end = (my_bgrp_id+1) * nbnd_per_bgrp + rest
|
|
|
|
ENDIF
|
|
|
|
|
|
|
|
END FUNCTION bgrp_end
|
|
|
|
|
2013-01-28 17:21:12 +08:00
|
|
|
END MODULE mp_bands
|
band group parallelization slightly modified to make it more flexible, and little
more efficient.
subroutine init_index_over_band ( comm, nbnd ) that set ibnd_start and ibnd_end
variables requiring comm=inter_bgrp_comm is removed and replaced by
subroutine set_bgrp_indices ( nbnd, ibnd_start, ibnd_end ) implementing the same
relationships between its arguments but:
- forcing the use of inter_bgrp_comm from the same mp_bands module,
- returning ibnd_start and ibnd_end as explicit outputs that are not anymore kept
in the module. In this way other quantities can be distributes if needed in any
given routine without too many non-local effects.
For compatibility with TDDFPT, that uses the bgrp parallelization and loads
ibnd_start/ibnd_end trhough mp_global module, these two variables are moved in
a dedicated module mp_bands_TDDFPT included in Module/mp_bands.f90. This is done
to avoid too much invasive changes in a code i don't know well. In this way the
needed changes are very localized and transparent, the code compiles correctly
so I think it should work exactly as before.
In my opinion the two variables should be moved somewhere inside TDDFPT.
Band parallelization is extended to h_psi(lda,n,m,psi,hpsi) and s_psi routines
(only when .not.exx_is_active because otherwise it is already used inside vexx)
for generic values of m (of course it gives a speedup only when m is not too small
compared to nbgrp but it works also if m < nbgrp ).
Compatibility with task groups has not be explored but should not be conceptually
different from how it works in the exx case.
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@11835 c92efa57-630b-4861-b058-cf58834340f0
2015-11-07 08:06:40 +08:00
|
|
|
!
|
|
|
|
!
|
|
|
|
MODULE mp_bands_TDDFPT
|
|
|
|
!
|
|
|
|
! NB: These two varialbles used to be in mp_bands and are loaded from mp_global in TDDFPT
|
|
|
|
! I think they would better stay in a TDDFPT specific module but leave them here not to
|
|
|
|
! be too invasive on a code I don't know well. SdG
|
|
|
|
!
|
|
|
|
INTEGER :: ibnd_start = 0 ! starting band index used in bgrp parallelization
|
|
|
|
INTEGER :: ibnd_end = 0 ! ending band index used in bgrp parallelization
|
|
|
|
!
|
|
|
|
END MODULE mp_bands_TDDFPT
|
|
|
|
!
|