quantum-espresso/Modules/task_groups.f90

251 lines
8.1 KiB
Fortran
Executable File

!-----------------------------------------
! VARIABLES FOR TASKGROUPS
! C. Bekas, October 2005
!-----------------------------------------
!
!Variable description
!--------------------
!MAXGRP: Maximum number of task-groups
!--------------------------------------------
MODULE GROUPS_MODULE
USE kinds, ONLY: DP
USE parameters, ONLY: MAXCPU, MAXGRP
IMPLICIT NONE
SAVE
INTEGER, DIMENSION(:), ALLOCATABLE :: ALL_Z_STICKS
INTEGER, DIMENSION(:), ALLOCATABLE :: NOLIST, NPLIST, PGROUP
INTEGER, DIMENSION(:), ALLOCATABLE :: tmp_nsw, tmp_npp, tmp_planes, tmp_revs, recvs, tmp_ismap
INTEGER, DIMENSION(:), ALLOCATABLE :: ngw_vec !GLOBAL VECTOR OF ALL NGW VECTORS
COMPLEX(DP), DIMENSION(:,:), ALLOCATABLE :: tg_betae
COMPLEX(DP), DIMENSION(:), ALLOCATABLE :: tg_c2, tg_c3
REAL(DP), DIMENSION(:,:), ALLOCATABLE :: tg_rhos
REAL(DP), DIMENSION(:), ALLOCATABLE :: tg_ggp
INTEGER, DIMENSION(:), ALLOCATABLE :: nnrsx_vec !INCREASE THIS TO THE MAXIMUM NUMBER OF PROCS IF NEEDED
REAL(DP), DIMENSION(:,:), ALLOCATABLE :: tmp_rhos, local_rhos
INTEGER :: recv_cnt(MAXGRP), recv_displ(MAXGRP), send_cnt(MAXGRP), send_displ(MAXGRP)
INTEGER :: SZ, CLOCK1, CLOCK2, CLOCK3, CLOCK4
INTEGER :: sticks_index, eig_offset, strd
REAL(DP) :: tm_tg, tm_rhoofr
CONTAINS
SUBROUTINE DEALLOCATE_GROUPS
IMPLICIT NONE
!DEALLOCATE GROUPS RELATED ARRAYS
IF (ALLOCATED(ALL_Z_STICKS)) DEALLOCATE(ALL_Z_STICKS)
IF (ALLOCATED(NOLIST)) DEALLOCATE(NOLIST)
IF (ALLOCATED(NPLIST)) DEALLOCATE(NPLIST)
IF (ALLOCATED(PGROUP)) DEALLOCATE(PGROUP)
IF (ALLOCATED(tmp_nsw)) DEALLOCATE(tmp_nsw)
IF (ALLOCATED(tmp_npp)) DEALLOCATE(tmp_npp)
IF (ALLOCATED(tmp_planes)) DEALLOCATE(tmp_planes)
IF (ALLOCATED(tmp_revs)) DEALLOCATE(tmp_revs)
IF (ALLOCATED(recvs)) DEALLOCATE(recvs)
IF (ALLOCATED(tmp_ismap)) DEALLOCATE(tmp_ismap)
IF (ALLOCATED(ngw_vec)) DEALLOCATE(ngw_vec)
IF (ALLOCATED(tg_betae)) DEALLOCATE(tg_betae)
IF (ALLOCATED(tg_c2)) DEALLOCATE(tg_c2)
IF (ALLOCATED(tg_c3)) DEALLOCATE(tg_c3)
IF (ALLOCATED(tg_rhos)) DEALLOCATE(tg_rhos)
IF (ALLOCATED(tg_ggp)) DEALLOCATE(tg_ggp)
IF (ALLOCATED(nnrsx_vec)) DEALLOCATE(nnrsx_vec)
IF (ALLOCATED(tmp_rhos)) DEALLOCATE(tmp_rhos)
IF (ALLOCATED(local_rhos)) DEALLOCATE(local_rhos)
END SUBROUTINE DEALLOCATE_GROUPS
!========================================================================================
! ADDED SUBROUTINEs FOR TASK GROUP PARALLIZATION
! C. Bekas, IBM Research, Zurich
! - GROUPS: Define and initialize Task Groups
! - tg_ivfftw: Inverse FFT driver for Task Groups
!=======================================================================================
!-----------------------------------------------------------------------
!=======================================================================
! SUBROUTINE GROUPS (added by C. Bekas)
! Define groups for task group parallilization
!=======================================================================
!-----------------------------------------------------------------------
SUBROUTINE GROUPS( nogrp_ , dffts )
!------------
!Modules used
!------------
USE mp_global, ONLY : mpime, nproc, group, root
USE mp_global, ONLY : NOGRP, NPGRP, ME_OGRP, ME_PGRP
USE mp, ONLY : mp_bcast
USE parameters, ONLY : MAXGRP
USE io_global, only : stdout
USE fft_types, only : fft_dlay_descriptor
USE electrons_base, only: nspin
USE parallel_include
IMPLICIT NONE
INTEGER, INTENT(IN) :: nogrp_
TYPE(fft_dlay_descriptor), INTENT(IN) :: dffts
!----------------------------------
!Local Variables declaration
!----------------------------------
!NPROC: Total number of processors
!NPGRP: Number of processors per group
INTEGER :: MSGLEN, I, J, N1, LABEL, IPOS, WORLD, NEWGROUP
INTEGER :: ios, IERR
!--------------------------------------------------------------
!Allocations
!--------------------------------------------------------------
!
ALLOCATE( nolist( maxgrp ) )
ALLOCATE( nplist( maxgrp ) )
tm_tg = 0D0
tm_rhoofr = 0D0
! Find the number of processors and my rank
!
SZ = NPROC
!--------------------------------------------------------------
!SUBDIVIDE THE PROCESSORS IN GROUPS
!
!THE NUMBER OF GROUPS HAS TO BE A DIVISOR OF THE NUMBER
!OF PROCESSORS
!--------------------------------------------------------------
IF( MOD( nproc, nogrp_ ) /= 0 ) &
CALL errore( " groups ", " nogrp should be a divisor of nproc ", 1 )
ALLOCATE( PGROUP( NPROC ) )
DO I = 1, NPROC
PGROUP( I ) = I - 1
ENDDO
nogrp = nogrp_
ALLOCATE( nnrsx_vec( sz ) )
!Find maximum chunk of local data concerning coefficients of eigenfunctions in g-space
#if defined __MPI
CALL MPI_Allgather(dffts%nnr, 1, MPI_INTEGER, nnrsx_vec, 1, MPI_INTEGER, group, IERR)
strd = MAXVAL( nnrsx_vec( 1:sz ) )
#else
strd = dffts%nnr
#endif
!---------------------------------------------------------------
!Broadcast the number of groups: NOGRP
!---------------------------------------------------------------
CALL mp_bcast( nogrp, root, group )
!-------------------------------------------------------------------------------------
!C. Bekas...TASK GROUP RELATED. FFT DATA STRUCTURES ARE ALREADY DEFINED ABOVE
!-------------------------------------------------------------------------------------
!dfft%nsw(me) holds the number of z-sticks for the current processor per wave-function
!We can either send these in the group with an mpi_allgather...or put the
!in the PSIS vector (in special positions) and send them with them.
!Otherwise we can do this once at the beginning, before the loop.
!we choose to do the latter one.
!-------------------------------------------------------------------------------------
!
ALLOCATE( ALL_Z_STICKS( SZ ) )
!
!ALL-Gather number of Z-sticks from all processors
!
#if defined __MPI
CALL MPI_Allgather(dffts%nsw(mpime+1), 1, MPI_INTEGER, ALL_Z_STICKS, 1, MPI_INTEGER, group, IERR)
#else
all_z_sticks( 1 ) = dffts%nsw( 1 )
#endif
IF (.NOT.ALLOCATED(tmp_nsw)) ALLOCATE(tmp_nsw(SZ))
IF (.NOT.ALLOCATED(tmp_npp)) THEN
ALLOCATE(tmp_npp(SZ))
tmp_npp(1)=-1
ENDIF
IF( NOGRP == 1 ) RETURN
NPGRP = NPROC / NOGRP
IF( NPGRP > MAXGRP ) THEN
CALL errore( "groups", "too many npgrp", 1 )
ENDIF
!
IF( NOGRP > MAXGRP ) THEN
CALL errore( "groups", "too many nogrp", 1 )
ENDIF
!--------------------------------------
!LIST OF PROCESSORS IN MY ORBITAL GROUP
!--------------------------------------
N1 = ( mpime / NOGRP ) * NOGRP - 1
DO I = 1, NOGRP
NOLIST( I ) = PGROUP( N1 + I + 1 )
IF( mpime .EQ. NOLIST( I ) ) IPOS = I - 1
ENDDO
!-----------------------------------------
!LIST OF PROCESSORS IN MY PLANE WAVE GROUP
!-----------------------------------------
DO I = 1, NPGRP
NPLIST( I ) = PGROUP( IPOS + ( I - 1 ) * NOGRP + 1 )
ENDDO
!-----------------
!SET UP THE GROUPS
!-----------------
DO I = 1, NPGRP
IF( mpime .EQ. NPLIST( I ) ) LABEL = I
ENDDO
!---------------------------------------
!CREATE ORBITAL GROUPS
!---------------------------------------
!
#if defined __MPI
! get the handle world to the main group
CALL MPI_COMM_GROUP( group, WORLD, IERR )
! create a new group handle containing processor whose indices are
! contained in array nogrp()
CALL MPI_GROUP_INCL( WORLD, NOGRP, NOLIST, NEWGROUP, IERR )
! build a communicator for the newgroup and store it in ME_OGRP
CALL MPI_COMM_CREATE( group, NEWGROUP, ME_OGRP, IERR )
#endif
DO I = 1, NOGRP
IF( mpime .EQ. NOLIST( I ) ) LABEL = I + MAXCPU
ENDDO
!---------------------------------------
!CREATE PLANEWAVE GROUPS
!---------------------------------------
!
#if defined __MPI
CALL MPI_COMM_GROUP( group, WORLD, IERR )
CALL MPI_GROUP_INCL( WORLD, NPGRP, NPLIST, NEWGROUP, IERR )
CALL MPI_COMM_CREATE( group, NEWGROUP, ME_PGRP, IERR )
#endif
RETURN
END SUBROUTINE GROUPS
END MODULE GROUPS_MODULE