mirror of https://gitlab.com/QEF/q-e.git
- cleanup, information on processor groups moved from task_group to
mp_global, in order to have a single place containing all info about the parallelization git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@4567 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
parent
5424c6e82f
commit
c465220073
|
@ -82,9 +82,8 @@ subroutine fft_scatter ( f_in, nrx3, nxx_, f_aux, ncp_, npp_, sign, use_tg )
|
|||
USE parallel_include
|
||||
#endif
|
||||
use mp_global, ONLY : nproc_pool, me_pool, intra_pool_comm, nproc, &
|
||||
my_image_id, nogrp, pgrp_comm
|
||||
my_image_id, nogrp, pgrp_comm, nplist
|
||||
USE kinds, ONLY : DP
|
||||
USE task_groups, ONLY : nplist
|
||||
|
||||
implicit none
|
||||
|
||||
|
@ -478,9 +477,8 @@ subroutine fft_scatter ( f_in, nrx3, nxx_, f_aux, ncp_, npp_, sign, use_tg )
|
|||
USE parallel_include
|
||||
#endif
|
||||
use mp_global, ONLY : nproc_pool, me_pool, intra_pool_comm, nproc, &
|
||||
my_image_id, nogrp, pgrp_comm
|
||||
my_image_id, nogrp, pgrp_comm, nplist
|
||||
USE kinds, ONLY : DP
|
||||
USE task_groups, ONLY : nplist
|
||||
|
||||
implicit none
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ CONTAINS
|
|||
! Task groups driver
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
SUBROUTINE tg_cft3s( f, dffts, sign )
|
||||
SUBROUTINE tg_cft3s( f, dfft, isgn )
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
!----------------------------------------------------------------------
|
||||
|
@ -27,135 +27,103 @@ SUBROUTINE tg_cft3s( f, dffts, sign )
|
|||
!Added: C. Bekas, Oct. 2005. Adopted from the CPMD code (A. Curioni)
|
||||
!Revised by Carlo Cavazzoni 2007.
|
||||
!
|
||||
! ... sign = +-1 : parallel 3d fft for rho and for the potential
|
||||
! ... isgn = +-1 : parallel 3d fft for rho and for the potential
|
||||
! NOT YET IMPLEMENTED WITH TASK GROUPS
|
||||
! ... sign = +-2 : parallel 3d fft for wavefunctions
|
||||
! ... isgn = +-2 : parallel 3d fft for wavefunctions
|
||||
!
|
||||
! ... sign = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R)
|
||||
! ... isgn = + : G-space to R-space, output = \sum_G f(G)exp(+iG*R)
|
||||
! ... fft along z using pencils (cft_1z)
|
||||
! ... transpose across nodes (fft_scatter)
|
||||
! ... and reorder
|
||||
! ... fft along y (using planes) and x (cft_2xy)
|
||||
! ... sign = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega
|
||||
! ... isgn = - : R-space to G-space, output = \int_R f(R)exp(-iG*R)/Omega
|
||||
! ... fft along x and y(using planes) (cft_2xy)
|
||||
! ... transpose across nodes (fft_scatter)
|
||||
! ... and reorder
|
||||
! ... fft along z using pencils (cft_1z)
|
||||
!
|
||||
! ... The array "planes" signals whether a fft is needed along y :
|
||||
! ... planes(i)=0 : column f(i,*,*) empty , don't do fft along y
|
||||
! ... planes(i)=1 : column f(i,*,*) filled, fft along y needed
|
||||
! ... The array "dfft%iplw" signals whether a fft is needed along y :
|
||||
! ... dfft%iplw(i)=0 : column f(i,*,*) empty , don't do fft along y
|
||||
! ... dfft%iplw(i)=1 : column f(i,*,*) filled, fft along y needed
|
||||
! ... "empty" = no active components are present in f(i,*,*)
|
||||
! ... after (sign>0) or before (sign<0) the fft on z direction
|
||||
! ... after (isgn>0) or before (isgn<0) the fft on z direction
|
||||
!
|
||||
! ... Note that if sign=+/-1 (fft on rho and pot.) all fft's are needed
|
||||
! ... Note that if isgn=+/-1 (fft on rho and pot.) all fft's are needed
|
||||
! ... and all planes(i) are set to 1
|
||||
!
|
||||
USE fft_scalar, ONLY : cft_1z, cft_2xy
|
||||
USE fft_base, ONLY : fft_scatter
|
||||
USE kinds, ONLY : DP
|
||||
USE mp_global, only : me_pool, nproc_pool, ogrp_comm, npgrp, nogrp, &
|
||||
intra_pool_comm
|
||||
intra_pool_comm, nolist, nplist
|
||||
USE fft_types, ONLY : fft_dlay_descriptor
|
||||
USE task_groups
|
||||
USE parallel_include
|
||||
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
COMPLEX(DP), INTENT(INOUT) :: f( : )
|
||||
type (fft_dlay_descriptor), intent(in) :: dffts
|
||||
INTEGER, INTENT(IN) :: sign
|
||||
type (fft_dlay_descriptor), intent(in) :: dfft
|
||||
INTEGER, INTENT(IN) :: isgn
|
||||
!
|
||||
INTEGER :: mc, i, j, ii, iproc, k
|
||||
INTEGER :: me_p
|
||||
INTEGER :: n1, n2, n3, nx1, nx2, nx3
|
||||
INTEGER :: idx, ierr
|
||||
LOGICAL :: tg
|
||||
COMPLEX(DP), ALLOCATABLE :: yf(:), aux (:)
|
||||
COMPLEX(DP) :: stmp
|
||||
INTEGER, DIMENSION(NOGRP) :: send_cnt, send_displ, recv_cnt, recv_displ
|
||||
INTEGER, SAVE :: ib = 1
|
||||
!
|
||||
!
|
||||
CALL start_clock( 'cft3s' )
|
||||
!
|
||||
n1 = dffts%nr1
|
||||
n2 = dffts%nr2
|
||||
n3 = dffts%nr3
|
||||
nx1 = dffts%nr1x
|
||||
nx2 = dffts%nr2x
|
||||
nx3 = dffts%nr3x
|
||||
n1 = dfft%nr1
|
||||
n2 = dfft%nr2
|
||||
n3 = dfft%nr3
|
||||
nx1 = dfft%nr1x
|
||||
nx2 = dfft%nr2x
|
||||
nx3 = dfft%nr3x
|
||||
!
|
||||
ALLOCATE( aux( (NOGRP+1)*strd ) )
|
||||
ALLOCATE( YF ( (NOGRP+1)*strd ) )
|
||||
ALLOCATE( aux( (NOGRP+1)*dfft%nnrx ) )
|
||||
ALLOCATE( YF ( (NOGRP+1)*dfft%nnrx ) )
|
||||
!
|
||||
me_p = me_pool + 1
|
||||
!
|
||||
IF ( sign > 0 ) THEN
|
||||
IF ( isgn > 0 ) THEN
|
||||
!
|
||||
IF ( sign /= 2 ) THEN
|
||||
IF ( isgn /= 2 ) THEN
|
||||
!
|
||||
CALL errore( ' tg_cfft ', ' task groups are implemented only for waves ', 1 )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
CALL pack_group_sticks()
|
||||
!
|
||||
send_cnt(1) = nx3 * dffts%nsw( me_p )
|
||||
IF( nx3 * dffts%nsw( me_p ) > strd ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent strd ', 1 )
|
||||
END IF
|
||||
send_displ(1) = 0
|
||||
recv_cnt(1) = nx3 * dffts%nsw( nolist(1) + 1 )
|
||||
recv_displ(1) = 0
|
||||
DO idx = 2, nogrp
|
||||
send_cnt(idx) = nx3 * dffts%nsw( me_p )
|
||||
send_displ(idx) = send_displ(idx-1) + strd
|
||||
recv_cnt(idx) = nx3 * dffts%nsw( nolist(idx) + 1 )
|
||||
recv_displ(idx) = recv_displ(idx-1) + recv_cnt(idx-1)
|
||||
ENDDO
|
||||
|
||||
IF( recv_displ(nogrp) + recv_cnt(nogrp) > SIZE( yf ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 1 )
|
||||
END IF
|
||||
IF( send_displ(nogrp) + send_cnt(nogrp) > SIZE( f ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 2 )
|
||||
END IF
|
||||
|
||||
CALL start_clock( 'ALLTOALL' )
|
||||
CALL cft_1z( yf, dfft%tg_nsw( me_p ), n3, nx3, isgn, aux )
|
||||
!
|
||||
! Collect all the sticks of the different states,
|
||||
! in "yf" processors will have all the sticks of the OGRP
|
||||
|
||||
#if defined __MPI
|
||||
|
||||
CALL MPI_ALLTOALLV( f(1), send_cnt, send_displ, MPI_DOUBLE_COMPLEX, yf(1), recv_cnt, &
|
||||
& recv_displ, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
|
||||
IF( ierr /= 0 ) THEN
|
||||
CALL errore( ' tg_cfft ', ' alltoall error 1 ', ABS(ierr) )
|
||||
END IF
|
||||
|
||||
#endif
|
||||
|
||||
!Transpose data for the 2-D FFT on the x-y plane
|
||||
!
|
||||
CALL cft_1z( yf, tmp_nsw( me_p ), n3, nx3, sign, aux )
|
||||
|
||||
!NOGRP*dfft%nnr: The length of aux and f
|
||||
!nr3x: The length of each Z-stick
|
||||
!aux: input - output
|
||||
!f: working space
|
||||
!isgn: type of scatter
|
||||
!dfft%nsw(me) holds the number of Z-sticks proc. me has.
|
||||
!dfft%npp: number of planes per processor
|
||||
!
|
||||
CALL fft_scatter( aux, nx3, (nogrp+1)*strd, f, tmp_nsw, tmp_npp, sign, use_tg = .TRUE. )
|
||||
CALL fft_scatter( aux, nx3, (nogrp+1)*dfft%nnrx, f, dfft%tg_nsw, dfft%tg_npp, isgn, dfft%use_task_groups )
|
||||
!
|
||||
f(:) = ( 0.D0 , 0.D0 )
|
||||
ii = 0
|
||||
!
|
||||
DO iproc = 1, nproc_pool
|
||||
!
|
||||
DO i = 1, dffts%nsw( iproc )
|
||||
DO i = 1, dfft%nsw( iproc )
|
||||
!
|
||||
mc = dffts%ismap( i + dffts%iss(iproc) )
|
||||
mc = dfft%ismap( i + dfft%iss(iproc) )
|
||||
!
|
||||
ii = ii + 1
|
||||
!
|
||||
DO j = 1, tmp_npp (me_p)
|
||||
DO j = 1, dfft%tg_npp (me_p)
|
||||
!
|
||||
f(mc+(j-1)*nx1*nx2) = aux(j+(ii-1)*tmp_npp( me_p ) )
|
||||
f(mc+(j-1)*nx1*nx2) = aux(j+(ii-1)* dfft%tg_npp( me_p ) )
|
||||
!
|
||||
END DO
|
||||
!
|
||||
|
@ -165,32 +133,31 @@ SUBROUTINE tg_cft3s( f, dffts, sign )
|
|||
!
|
||||
END IF
|
||||
!
|
||||
CALL cft_2xy( f, tmp_npp( me_p ), n1, n2, nx1, nx2, sign, dffts%iplw )
|
||||
|
||||
CALL cft_2xy( f, dfft%tg_npp( me_p ), n1, n2, nx1, nx2, isgn, dfft%iplw )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
IF ( sign /= -2 ) THEN
|
||||
IF ( isgn /= -2 ) THEN
|
||||
!
|
||||
CALL errore( ' tg_cfft ', ' task groups are implemented only for waves ', 1 )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
CALL cft_2xy( f, tmp_npp(me_p), n1, n2, nx1, nx2, sign, dffts%iplw )
|
||||
CALL cft_2xy( f, dfft%tg_npp(me_p), n1, n2, nx1, nx2, isgn, dfft%iplw )
|
||||
!
|
||||
ii = 0
|
||||
!
|
||||
DO iproc = 1, nproc_pool
|
||||
!
|
||||
DO i = 1, dffts%nsw(iproc)
|
||||
DO i = 1, dfft%nsw(iproc)
|
||||
!
|
||||
mc = dffts%ismap( i + dffts%iss(iproc) )
|
||||
mc = dfft%ismap( i + dfft%iss(iproc) )
|
||||
!
|
||||
ii = ii + 1
|
||||
!
|
||||
DO j = 1, tmp_npp(me_p)
|
||||
DO j = 1, dfft%tg_npp(me_p)
|
||||
!
|
||||
aux(j+(ii-1)*tmp_npp(me_p)) = f( mc + (j-1)*nx1*nx2)
|
||||
aux(j+(ii-1)* dfft%tg_npp(me_p)) = f( mc + (j-1)*nx1*nx2)
|
||||
!
|
||||
END DO
|
||||
!
|
||||
|
@ -198,42 +165,11 @@ SUBROUTINE tg_cft3s( f, dffts, sign )
|
|||
!
|
||||
END DO
|
||||
!
|
||||
CALL fft_scatter( aux, nx3, (NOGRP+1)*strd, f, tmp_nsw, tmp_npp, sign, use_tg = .TRUE. )
|
||||
CALL fft_scatter( aux, nx3, (NOGRP+1)*dfft%nnrx, f, dfft%tg_nsw, dfft%tg_npp, isgn, dfft%use_task_groups )
|
||||
!
|
||||
CALL cft_1z( aux, tmp_nsw(me_p), n3, nx3, sign, yf )
|
||||
!
|
||||
! Bring pencils back to their original distribution
|
||||
!
|
||||
send_cnt (1) = nx3 * dffts%nsw( nolist(1) + 1 )
|
||||
send_displ(1) = 0
|
||||
recv_cnt (1) = nx3 * dffts%nsw( me_p )
|
||||
recv_displ(1) = 0
|
||||
DO idx = 2, NOGRP
|
||||
send_cnt (idx) = nx3 * dffts%nsw( nolist(idx) + 1 )
|
||||
send_displ(idx) = send_displ(idx-1) + send_cnt(idx-1)
|
||||
recv_cnt (idx) = nx3 * dffts%nsw( me_p )
|
||||
recv_displ(idx) = recv_displ(idx-1) + recv_cnt(idx-1)
|
||||
ENDDO
|
||||
CALL cft_1z( aux, dfft%tg_nsw(me_p), n3, nx3, isgn, yf )
|
||||
|
||||
IF( recv_displ(nogrp) + recv_cnt(nogrp) > SIZE( f ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 3 )
|
||||
END IF
|
||||
IF( send_displ(nogrp) + send_cnt(nogrp) > SIZE( yf ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 4 )
|
||||
END IF
|
||||
|
||||
CALL start_clock( 'ALLTOALL' )
|
||||
|
||||
#if defined __MPI
|
||||
CALL MPI_Alltoallv( yf(1), &
|
||||
send_cnt, send_displ, MPI_DOUBLE_COMPLEX, f(1), &
|
||||
recv_cnt, recv_displ, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
|
||||
IF( ierr /= 0 ) THEN
|
||||
CALL errore( ' tg_cfft ', ' alltoall error 2 ', ABS(ierr) )
|
||||
END IF
|
||||
#endif
|
||||
|
||||
CALL stop_clock( 'ALLTOALL' )
|
||||
CALL unpack_group_sticks()
|
||||
|
||||
END IF
|
||||
!
|
||||
|
@ -245,6 +181,199 @@ SUBROUTINE tg_cft3s( f, dffts, sign )
|
|||
CALL stop_clock( 'cft3s' )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
CONTAINS
|
||||
!
|
||||
|
||||
SUBROUTINE pack_group_sticks()
|
||||
|
||||
INTEGER :: idx, ierr
|
||||
INTEGER, DIMENSION(nogrp+1) :: send_cnt, send_displ, recv_cnt, recv_displ
|
||||
!
|
||||
send_cnt(1) = nx3 * dfft%nsw( me_p )
|
||||
IF( nx3 * dfft%nsw( me_p ) > dfft%nnrx ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent dfft%nnrx ', 1 )
|
||||
END IF
|
||||
send_displ(1) = 0
|
||||
recv_cnt(1) = nx3 * dfft%nsw( nolist(1) + 1 )
|
||||
recv_displ(1) = 0
|
||||
DO idx = 2, nogrp
|
||||
send_cnt(idx) = nx3 * dfft%nsw( me_p )
|
||||
send_displ(idx) = send_displ(idx-1) + dfft%nnrx
|
||||
recv_cnt(idx) = nx3 * dfft%nsw( nolist(idx) + 1 )
|
||||
recv_displ(idx) = recv_displ(idx-1) + recv_cnt(idx-1)
|
||||
ENDDO
|
||||
|
||||
IF( recv_displ(nogrp) + recv_cnt(nogrp) > SIZE( yf ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 1 )
|
||||
END IF
|
||||
IF( send_displ(nogrp) + send_cnt(nogrp) > SIZE( f ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 2 )
|
||||
END IF
|
||||
|
||||
CALL start_clock( 'ALLTOALL' )
|
||||
!
|
||||
! Collect all the sticks of the different states,
|
||||
! in "yf" processors will have all the sticks of the OGRP
|
||||
|
||||
#if defined __MPI
|
||||
|
||||
CALL MPI_ALLTOALLV( f(1), send_cnt, send_displ, MPI_DOUBLE_COMPLEX, yf(1), recv_cnt, &
|
||||
& recv_displ, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
|
||||
IF( ierr /= 0 ) THEN
|
||||
CALL errore( ' tg_cfft ', ' alltoall error 1 ', ABS(ierr) )
|
||||
END IF
|
||||
|
||||
#endif
|
||||
|
||||
CALL stop_clock( 'ALLTOALL' )
|
||||
!
|
||||
!YF Contains all ( ~ NOGRP*dfft%nsw(me) ) Z-sticks
|
||||
!
|
||||
RETURN
|
||||
END SUBROUTINE pack_group_sticks
|
||||
|
||||
SUBROUTINE unpack_group_sticks()
|
||||
!
|
||||
! Bring pencils back to their original distribution
|
||||
!
|
||||
INTEGER :: idx, ierr
|
||||
INTEGER, DIMENSION(nogrp+1) :: send_cnt, send_displ, recv_cnt, recv_displ
|
||||
send_cnt (1) = nx3 * dfft%nsw( nolist(1) + 1 )
|
||||
send_displ(1) = 0
|
||||
recv_cnt (1) = nx3 * dfft%nsw( me_p )
|
||||
recv_displ(1) = 0
|
||||
DO idx = 2, NOGRP
|
||||
send_cnt (idx) = nx3 * dfft%nsw( nolist(idx) + 1 )
|
||||
send_displ(idx) = send_displ(idx-1) + send_cnt(idx-1)
|
||||
recv_cnt (idx) = nx3 * dfft%nsw( me_p )
|
||||
recv_displ(idx) = recv_displ(idx-1) + recv_cnt(idx-1)
|
||||
ENDDO
|
||||
|
||||
IF( recv_displ(nogrp) + recv_cnt(nogrp) > SIZE( f ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 3 )
|
||||
END IF
|
||||
IF( send_displ(nogrp) + send_cnt(nogrp) > SIZE( yf ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 4 )
|
||||
END IF
|
||||
|
||||
CALL start_clock( 'ALLTOALL' )
|
||||
|
||||
#if defined __MPI
|
||||
CALL MPI_Alltoallv( yf(1), &
|
||||
send_cnt, send_displ, MPI_DOUBLE_COMPLEX, f(1), &
|
||||
recv_cnt, recv_displ, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
|
||||
IF( ierr /= 0 ) THEN
|
||||
CALL errore( ' tg_cfft ', ' alltoall error 2 ', ABS(ierr) )
|
||||
END IF
|
||||
#endif
|
||||
|
||||
CALL stop_clock( 'ALLTOALL' )
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE unpack_group_sticks
|
||||
|
||||
#ifdef PIPPO
|
||||
SUBROUTINE fw_scatter( iopt )
|
||||
!
|
||||
use fft_base, only: fft_scatter
|
||||
!
|
||||
INTEGER, INTENT(IN) :: iopt
|
||||
INTEGER :: nppx
|
||||
!
|
||||
!
|
||||
IF( iopt == 2 ) THEN
|
||||
!
|
||||
if ( nproc_image == 1 ) then
|
||||
nppx = dfft%nr3x
|
||||
else
|
||||
nppx = dfft%npp( me )
|
||||
end if
|
||||
call fft_scatter( aux, nr3x, dfft%nnr, f, dfft%nsw, dfft%npp, iopt )
|
||||
f(:) = (0.d0, 0.d0)
|
||||
ii = 0
|
||||
do proc = 1, nproc_image
|
||||
do i = 1, dfft%nsw( proc )
|
||||
mc = dfft%ismap( i + dfft%iss( proc ) )
|
||||
ii = ii + 1
|
||||
do j = 1, dfft%npp( me )
|
||||
f( mc + ( j - 1 ) * dfft%nnp ) = aux( j + ( ii - 1 ) * nppx )
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!
|
||||
ELSE IF( iopt == 1 ) THEN
|
||||
!
|
||||
if ( nproc_image == 1 ) then
|
||||
nppx = dfft%nr3x
|
||||
else
|
||||
nppx = dfft%npp( me )
|
||||
end if
|
||||
call fft_scatter( aux, nr3x, dfft%nnr, f, dfft%nsp, dfft%npp, iopt )
|
||||
f(:) = (0.d0, 0.d0)
|
||||
do i = 1, dfft%nst
|
||||
mc = dfft%ismap( i )
|
||||
do j = 1, dfft%npp( me )
|
||||
f( mc + ( j - 1 ) * dfft%nnp ) = aux( j + ( i - 1 ) * nppx )
|
||||
end do
|
||||
end do
|
||||
!
|
||||
END IF
|
||||
!
|
||||
RETURN
|
||||
END SUBROUTINE fw_scatter
|
||||
|
||||
|
||||
|
||||
SUBROUTINE bw_scatter( iopt )
|
||||
!
|
||||
use fft_base, only: fft_scatter
|
||||
!
|
||||
INTEGER, INTENT(IN) :: iopt
|
||||
INTEGER :: nppx
|
||||
!
|
||||
!
|
||||
IF( iopt == -2 ) THEN
|
||||
!
|
||||
if ( nproc_image == 1 ) then
|
||||
nppx = dfft%nr3x
|
||||
else
|
||||
nppx = dfft%npp( me )
|
||||
end if
|
||||
ii = 0
|
||||
do proc = 1, nproc_image
|
||||
do i = 1, dfft%nsw( proc )
|
||||
mc = dfft%ismap( i + dfft%iss( proc ) )
|
||||
ii = ii + 1
|
||||
do j = 1, dfft%npp( me )
|
||||
aux( j + ( ii - 1 ) * nppx ) = f( mc + ( j - 1 ) * dfft%nnp )
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
call fft_scatter( aux, nr3x, dfft%nnr, f, dfft%nsw, dfft%npp, iopt )
|
||||
!
|
||||
ELSE IF( iopt == -1 ) THEN
|
||||
!
|
||||
if ( nproc_image == 1 ) then
|
||||
nppx = dfft%nr3x
|
||||
else
|
||||
nppx = dfft%npp( me )
|
||||
end if
|
||||
do i = 1, dfft%nst
|
||||
mc = dfft%ismap( i )
|
||||
do j = 1, dfft%npp( me )
|
||||
aux( j + ( i - 1 ) * nppx ) = f( mc + ( j - 1 ) * dfft%nnp )
|
||||
end do
|
||||
end do
|
||||
call fft_scatter( aux, nr3x, dfft%nnr, f, dfft%nsp, dfft%npp, iopt )
|
||||
!
|
||||
END IF
|
||||
!
|
||||
RETURN
|
||||
END SUBROUTINE bw_scatter
|
||||
|
||||
#endif
|
||||
|
||||
!
|
||||
END SUBROUTINE tg_cft3s
|
||||
!
|
||||
|
|
|
@ -53,6 +53,13 @@ MODULE fft_types
|
|||
INTEGER, POINTER :: imax3(:) ! the last local plane
|
||||
INTEGER, POINTER :: np3(:) ! number of local plane for the box fft
|
||||
!
|
||||
! task groups
|
||||
!
|
||||
LOGICAL :: use_task_groups
|
||||
INTEGER :: nnrx ! maximum among nnr
|
||||
INTEGER, POINTER :: tg_nsw(:) ! number of sticks per task group ( wave func )
|
||||
INTEGER, POINTER :: tg_npp(:) ! number of "Z" planes per task group
|
||||
!
|
||||
END TYPE
|
||||
|
||||
|
||||
|
@ -79,6 +86,7 @@ CONTAINS
|
|||
desc%nsp = 0
|
||||
desc%nsw = 0
|
||||
desc%ngl = 0
|
||||
desc%nwl = 0
|
||||
desc%npp = 0
|
||||
desc%ipp = 0
|
||||
desc%iss = 0
|
||||
|
@ -86,8 +94,13 @@ CONTAINS
|
|||
desc%ismap = 0
|
||||
desc%iplp = 0
|
||||
desc%iplw = 0
|
||||
|
||||
desc%id = 0
|
||||
|
||||
desc%use_task_groups = .FALSE.
|
||||
NULLIFY( desc%tg_nsw )
|
||||
NULLIFY( desc%tg_npp )
|
||||
|
||||
END SUBROUTINE fft_dlay_allocate
|
||||
|
||||
|
||||
|
@ -105,6 +118,11 @@ CONTAINS
|
|||
IF ( ASSOCIATED( desc%iplp ) ) DEALLOCATE( desc%iplp )
|
||||
IF ( ASSOCIATED( desc%iplw ) ) DEALLOCATE( desc%iplw )
|
||||
desc%id = 0
|
||||
IF( desc%use_task_groups ) THEN
|
||||
IF ( ASSOCIATED( desc%tg_nsw ) ) DEALLOCATE( desc%tg_nsw )
|
||||
IF ( ASSOCIATED( desc%tg_npp ) ) DEALLOCATE( desc%tg_npp )
|
||||
END IF
|
||||
desc%use_task_groups = .FALSE.
|
||||
END SUBROUTINE fft_dlay_deallocate
|
||||
|
||||
!=----------------------------------------------------------------------------=!
|
||||
|
@ -124,6 +142,7 @@ CONTAINS
|
|||
desc%npp = 0
|
||||
desc%ipp = 0
|
||||
desc%np3 = 0
|
||||
desc%use_task_groups = .FALSE.
|
||||
END SUBROUTINE fft_box_allocate
|
||||
|
||||
SUBROUTINE fft_box_deallocate( desc )
|
||||
|
@ -134,6 +153,7 @@ CONTAINS
|
|||
IF( ASSOCIATED( desc%npp ) ) DEALLOCATE( desc%npp )
|
||||
IF( ASSOCIATED( desc%ipp ) ) DEALLOCATE( desc%ipp )
|
||||
IF( ASSOCIATED( desc%np3 ) ) DEALLOCATE( desc%np3 )
|
||||
desc%use_task_groups = .FALSE.
|
||||
END SUBROUTINE fft_box_deallocate
|
||||
|
||||
|
||||
|
@ -181,6 +201,7 @@ CONTAINS
|
|||
IF( ( SIZE( ncp ) < nproc ) .OR. ( SIZE( ngp ) < nproc ) ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong stick dimensions ', 4 )
|
||||
|
||||
desc%use_task_groups = .FALSE.
|
||||
|
||||
! Set the number of "xy" planes for each processor
|
||||
! in other word do a slab partition along the z axis
|
||||
|
@ -240,11 +261,21 @@ CONTAINS
|
|||
! Set fft local workspace dimension
|
||||
|
||||
IF ( nproc == 1 ) THEN
|
||||
desc%nnr = nr1x * nr2x * nr3x
|
||||
desc%nnr = nr1x * nr2x * nr3x
|
||||
desc%nnrx = desc%nnr
|
||||
ELSE
|
||||
desc%nnr = MAX( nr3x * ncp(me), nr1x * nr2x * npp(me) )
|
||||
desc%nnr = MAX( 1, desc%nnr ) ! ensure that desc%nrr > 0 ( for extreme parallelism )
|
||||
desc%nnrx = desc%nnr
|
||||
DO i = 1, nproc
|
||||
desc%nnrx = MAX( desc%nnrx, nr3x * ncp( i ) )
|
||||
desc%nnrx = MAX( desc%nnrx, nr1x * nr2x * npp( i ) )
|
||||
END DO
|
||||
desc%nnrx = MAX( 1, desc%nnrx ) ! ensure that desc%nrr > 0 ( for extreme parallelism )
|
||||
END IF
|
||||
|
||||
|
||||
|
||||
desc%ngl( 1:nproc ) = ngp( 1:nproc )
|
||||
desc%nwl( 1:nproc ) = ngpw( 1:nproc )
|
||||
|
||||
|
@ -442,6 +473,7 @@ CONTAINS
|
|||
|
||||
END DO
|
||||
|
||||
desc%use_task_groups = .FALSE.
|
||||
|
||||
END SUBROUTINE fft_box_set
|
||||
|
||||
|
@ -492,6 +524,8 @@ CONTAINS
|
|||
desc%nnp = nr1x * nr2x
|
||||
desc%npp = nr3
|
||||
desc%ipp = 0
|
||||
desc%use_task_groups = .FALSE.
|
||||
desc%nnrx = desc%nnr
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE fft_dlay_scalar
|
||||
|
|
|
@ -57,14 +57,12 @@ fft_base.o : fft_types.o
|
|||
fft_base.o : kind.o
|
||||
fft_base.o : mp_global.o
|
||||
fft_base.o : parallel_include.o
|
||||
fft_base.o : task_groups.o
|
||||
fft_parallel.o : fft_base.o
|
||||
fft_parallel.o : fft_scalar.o
|
||||
fft_parallel.o : fft_types.o
|
||||
fft_parallel.o : kind.o
|
||||
fft_parallel.o : mp_global.o
|
||||
fft_parallel.o : parallel_include.o
|
||||
fft_parallel.o : task_groups.o
|
||||
|
||||
fft_scalar.o : kind.o
|
||||
fft_types.o : io_global.o
|
||||
|
|
|
@ -48,6 +48,8 @@ MODULE mp_global
|
|||
INTEGER :: np_ortho(2) = 1 ! size of the processor grid used in ortho
|
||||
INTEGER :: leg_ortho = 1 ! the distance in the father communicator
|
||||
! of two neighbour processors in ortho_comm
|
||||
INTEGER, ALLOCATABLE :: nolist(:) ! list of processors in my orbital task group
|
||||
INTEGER, ALLOCATABLE :: nplist(:) ! list of processors in my plane wave task group
|
||||
!
|
||||
! ... communicators
|
||||
!
|
||||
|
@ -87,6 +89,10 @@ MODULE mp_global
|
|||
inter_image_comm = group_i
|
||||
intra_image_comm = group_i
|
||||
ortho_comm = group_i
|
||||
ALLOCATE( nolist( nproc_i ) )
|
||||
ALLOCATE( nplist( nproc_i ) )
|
||||
nolist = 0
|
||||
nplist = 0
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
|
|
@ -17,27 +17,9 @@ MODULE task_groups
|
|||
IMPLICIT NONE
|
||||
SAVE
|
||||
|
||||
INTEGER, ALLOCATABLE :: nolist(:), nplist(:), pgroup(:)
|
||||
INTEGER, ALLOCATABLE :: tmp_nsw(:), tmp_npp(:)
|
||||
INTEGER :: strd
|
||||
INTEGER :: nswx ! maximum number of stick per processor
|
||||
|
||||
CONTAINS
|
||||
|
||||
SUBROUTINE DEALLOCATE_GROUPS
|
||||
|
||||
IMPLICIT NONE
|
||||
|
||||
! ... Deallocate groups related arrays
|
||||
|
||||
IF (ALLOCATED(nolist)) DEALLOCATE(nolist)
|
||||
IF (ALLOCATED(nplist)) DEALLOCATE(nplist)
|
||||
IF (ALLOCATED(pgroup)) DEALLOCATE(pgroup)
|
||||
IF (ALLOCATED(tmp_nsw)) DEALLOCATE(tmp_nsw)
|
||||
IF (ALLOCATED(tmp_npp)) DEALLOCATE(tmp_npp)
|
||||
|
||||
END SUBROUTINE DEALLOCATE_GROUPS
|
||||
|
||||
|
||||
!========================================================================================
|
||||
! ADDED SUBROUTINEs FOR TASK GROUP PARALLIZATION
|
||||
|
@ -58,6 +40,7 @@ SUBROUTINE task_groups_init( dffts )
|
|||
!
|
||||
USE mp_global, ONLY : me_pool, nproc_pool, intra_pool_comm
|
||||
USE mp_global, ONLY : NOGRP, NPGRP, ogrp_comm, pgrp_comm
|
||||
USE mp_global, ONLY : nolist, nplist
|
||||
USE mp, ONLY : mp_bcast
|
||||
USE io_global, only : stdout
|
||||
USE fft_types, only : fft_dlay_descriptor
|
||||
|
@ -68,17 +51,19 @@ SUBROUTINE task_groups_init( dffts )
|
|||
|
||||
IMPLICIT NONE
|
||||
|
||||
TYPE(fft_dlay_descriptor), INTENT(IN) :: dffts
|
||||
TYPE(fft_dlay_descriptor), INTENT(INOUT) :: dffts
|
||||
|
||||
!----------------------------------
|
||||
!Local Variables declaration
|
||||
!----------------------------------
|
||||
|
||||
INTEGER :: MSGLEN, I, J, N1, IPOS, WORLD, NEWGROUP
|
||||
INTEGER :: IERR
|
||||
INTEGER :: itsk, ntsk, color, key
|
||||
INTEGER :: num_planes, num_sticks
|
||||
INTEGER, DIMENSION(:), ALLOCATABLE :: nnrsx_vec
|
||||
INTEGER :: MSGLEN, I, J, N1, IPOS, WORLD, NEWGROUP
|
||||
INTEGER :: IERR
|
||||
INTEGER :: itsk, ntsk, color, key
|
||||
INTEGER :: num_planes, num_sticks
|
||||
INTEGER :: nnrsx_vec ( nproc_pool )
|
||||
INTEGER :: pgroup( nproc_pool )
|
||||
INTEGER :: strd
|
||||
|
||||
!
|
||||
WRITE( stdout, 100 ) nogrp, npgrp
|
||||
|
@ -94,17 +79,11 @@ SUBROUTINE task_groups_init( dffts )
|
|||
|
||||
IF( MOD( nproc_pool, nogrp ) /= 0 ) &
|
||||
CALL errore( " groups ", " nogrp should be a divisor of nproc_pool ", 1 )
|
||||
|
||||
ALLOCATE( pgroup( nproc_pool ) )
|
||||
!
|
||||
DO i = 1, nproc_pool
|
||||
pgroup( i ) = i - 1
|
||||
ENDDO
|
||||
!
|
||||
ALLOCATE( nplist( npgrp ) )
|
||||
!
|
||||
ALLOCATE( nolist( nogrp ) )
|
||||
!
|
||||
!--------------------------------------
|
||||
!LIST OF PROCESSORS IN MY ORBITAL GROUP
|
||||
!--------------------------------------
|
||||
|
@ -171,20 +150,16 @@ SUBROUTINE task_groups_init( dffts )
|
|||
#endif
|
||||
|
||||
|
||||
ALLOCATE( nnrsx_vec( nproc_pool ) )
|
||||
|
||||
!Find maximum chunk of local data concerning coefficients of eigenfunctions in g-space
|
||||
|
||||
#if defined __MPI
|
||||
CALL MPI_Allgather( dffts%nnr, 1, MPI_INTEGER, nnrsx_vec, 1, MPI_INTEGER, intra_pool_comm, IERR)
|
||||
strd = MAXVAL( nnrsx_vec( 1:nproc_pool ) )
|
||||
nswx = MAXVAL( dffts%nsw( 1:nproc_pool ) )
|
||||
#else
|
||||
strd = dffts%nnr
|
||||
nswx = dffts%nsw(1)
|
||||
#endif
|
||||
|
||||
DEALLOCATE( nnrsx_vec )
|
||||
IF( strd /= dffts%nnrx ) CALL errore( ' task_groups_init ', ' inconsistent nnrx ', 1 )
|
||||
|
||||
!-------------------------------------------------------------------------------------
|
||||
!C. Bekas...TASK GROUP RELATED. FFT DATA STRUCTURES ARE ALREADY DEFINED ABOVE
|
||||
|
@ -196,8 +171,8 @@ SUBROUTINE task_groups_init( dffts )
|
|||
!we choose to do the latter one.
|
||||
!-------------------------------------------------------------------------------------
|
||||
!
|
||||
ALLOCATE(tmp_nsw(nproc_pool))
|
||||
ALLOCATE(tmp_npp(nproc_pool))
|
||||
ALLOCATE( dffts%tg_nsw(nproc_pool))
|
||||
ALLOCATE( dffts%tg_npp(nproc_pool))
|
||||
|
||||
num_sticks = 0
|
||||
num_planes = 0
|
||||
|
@ -207,13 +182,15 @@ SUBROUTINE task_groups_init( dffts )
|
|||
ENDDO
|
||||
|
||||
#if defined __MPI
|
||||
CALL MPI_ALLGATHER(num_sticks, 1, MPI_INTEGER, tmp_nsw, 1, MPI_INTEGER, intra_pool_comm, IERR)
|
||||
CALL MPI_ALLGATHER(num_planes, 1, MPI_INTEGER, tmp_npp, 1, MPI_INTEGER, intra_pool_comm, IERR)
|
||||
CALL MPI_ALLGATHER(num_sticks, 1, MPI_INTEGER, dffts%tg_nsw(1), 1, MPI_INTEGER, intra_pool_comm, IERR)
|
||||
CALL MPI_ALLGATHER(num_planes, 1, MPI_INTEGER, dffts%tg_npp(1), 1, MPI_INTEGER, intra_pool_comm, IERR)
|
||||
#else
|
||||
tmp_nsw(1) = num_sticks
|
||||
tmp_npp(1) = num_planes
|
||||
dffts%tg_nsw(1) = num_sticks
|
||||
dffts%tg_npp(1) = num_planes
|
||||
#endif
|
||||
|
||||
dffts%use_task_groups = .TRUE.
|
||||
|
||||
RETURN
|
||||
|
||||
END SUBROUTINE task_groups_init
|
||||
|
|
Loading…
Reference in New Issue