mirror of https://gitlab.com/QEF/q-e.git
705 lines
23 KiB
Fortran
705 lines
23 KiB
Fortran
!
|
|
! Copyright (C) Quantum ESPRESSO group
|
|
!
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
!
|
|
!=----------------------------------------------------------------------=
|
|
MODULE stick_set
|
|
!=----------------------------------------------------------------------=
|
|
|
|
! ... Distribute G-vectors across processors as sticks and planes,
|
|
! ... initialize FFT descriptors for both dense and smooth grids
|
|
|
|
USE stick_base
|
|
USE fft_types, ONLY: fft_dlay_descriptor, fft_dlay_allocate, &
|
|
fft_dlay_set, fft_dlay_scalar
|
|
|
|
IMPLICIT NONE
|
|
|
|
INTEGER, PARAMETER :: DP = selected_real_kind(14,200)
|
|
|
|
PRIVATE
|
|
SAVE
|
|
|
|
PUBLIC :: pstickset, pstickset_custom
|
|
|
|
!=----------------------------------------------------------------------=
|
|
CONTAINS
|
|
!=----------------------------------------------------------------------=
|
|
|
|
SUBROUTINE pstickset( gamma_only, bg, gcut, gkcut, gcuts, &
|
|
dfftp, dffts, ngw, ngm, ngs, mype, root, nproc, comm, nogrp_ , &
|
|
ionode, stdout, dfft3d )
|
|
|
|
LOGICAL, INTENT(in) :: gamma_only
|
|
! ... bg(:,1), bg(:,2), bg(:,3) reciprocal space base vectors.
|
|
REAL(DP), INTENT(in) :: bg(3,3)
|
|
REAL(DP), INTENT(in) :: gcut, gkcut, gcuts
|
|
TYPE(fft_dlay_descriptor), INTENT(inout) :: dfftp, dffts
|
|
INTEGER, INTENT(out) :: ngw, ngm, ngs
|
|
|
|
INTEGER, INTENT(IN) :: mype, root, nproc, comm
|
|
INTEGER, INTENT(IN) :: nogrp_
|
|
LOGICAL, INTENT(IN) :: ionode
|
|
INTEGER, INTENT(IN) :: stdout
|
|
|
|
TYPE(fft_dlay_descriptor), OPTIONAL, INTENT(inout) :: dfft3d
|
|
|
|
|
|
LOGICAL :: tk
|
|
|
|
INTEGER :: ub(3), lb(3)
|
|
! ... ub(i), lb(i) upper and lower miller indexes
|
|
|
|
!
|
|
! ... Plane Waves
|
|
!
|
|
INTEGER, ALLOCATABLE :: stw(:,:)
|
|
! ... stick map (wave functions), stw(i,j) = number of G-vector in the
|
|
! ... stick whose x and y miller index are i and j
|
|
|
|
INTEGER, ALLOCATABLE :: nstpw(:)
|
|
! ... number of sticks (wave functions), nstpw(ip) = number of stick
|
|
! ... for processor ip
|
|
|
|
INTEGER, ALLOCATABLE :: sstpw(:)
|
|
! ... number of G-vectors (wave functions), sstpw(ip) = sum of the
|
|
! ... sticks length for processor ip = number of G-vectors
|
|
! ... owned by the processor ip
|
|
|
|
INTEGER :: nstw, nstpwx
|
|
! ... nstw local number of sticks (wave functions)
|
|
! ... nstpwx maximum among all processors of nstw
|
|
|
|
!
|
|
! ... Potentials
|
|
!
|
|
|
|
INTEGER, ALLOCATABLE :: st(:,:)
|
|
! ... stick map (potentials), st(i,j) = number of G-vector in the
|
|
! ... stick whose x and y miller index are i and j
|
|
|
|
INTEGER, ALLOCATABLE :: nstp(:)
|
|
! ... number of sticks (potentials), nstp(ip) = number of stick
|
|
! ... for processor ip
|
|
|
|
INTEGER, ALLOCATABLE :: sstp(:)
|
|
! ... number of G-vectors (potentials), sstp(ip) = sum of the
|
|
! ... sticks length for processor ip = number of G-vectors
|
|
! ... owned by the processor ip
|
|
|
|
INTEGER :: nst, nstpx
|
|
! ... nst local number of sticks (potentials)
|
|
! ... nstpx maximum among all processors of nst
|
|
|
|
!
|
|
! ... Smooth Mesh
|
|
!
|
|
|
|
INTEGER, ALLOCATABLE :: sts(:,:)
|
|
! ... stick map (smooth mesh), sts(i,j) = number of G-vector in the
|
|
! ... stick whose x and y miller index are i and j
|
|
|
|
INTEGER, ALLOCATABLE :: nstps(:)
|
|
! ... number of sticks (smooth mesh), nstp(ip) = number of stick
|
|
! ... for processor ip
|
|
|
|
INTEGER, ALLOCATABLE :: sstps(:)
|
|
! ... number of G-vectors (smooth mesh), sstps(ip) = sum of the
|
|
! ... sticks length for processor ip = number of G-vectors
|
|
! ... owned by the processor ip
|
|
|
|
INTEGER :: nsts
|
|
! ... nsts local number of sticks (smooth mesh)
|
|
|
|
|
|
INTEGER, ALLOCATABLE :: ist(:,:) ! sticks indices ordered
|
|
INTEGER :: ip, ngm_ , ngs_
|
|
INTEGER, ALLOCATABLE :: idx(:)
|
|
|
|
tk = .not. gamma_only
|
|
ub(1) = ( dfftp%nr1 - 1 ) / 2
|
|
ub(2) = ( dfftp%nr2 - 1 ) / 2
|
|
ub(3) = ( dfftp%nr3 - 1 ) / 2
|
|
lb = - ub
|
|
|
|
! ... Allocate maps
|
|
|
|
ALLOCATE( stw ( lb(1):ub(1), lb(2):ub(2) ) )
|
|
ALLOCATE( st ( lb(1):ub(1), lb(2):ub(2) ) )
|
|
ALLOCATE( sts ( lb(1):ub(1), lb(2):ub(2) ) )
|
|
|
|
st = 0
|
|
stw = 0
|
|
sts = 0
|
|
|
|
! ... Fill in the stick maps, for given g-space base and cut-off
|
|
|
|
CALL sticks_maps( tk, ub, lb, bg(:,1), bg(:,2), bg(:,3), &
|
|
gcut, gkcut, gcuts, st, stw, sts, mype, &
|
|
nproc, comm )
|
|
|
|
! ... Now count the number of stick nst and nstw
|
|
|
|
nst = count( st > 0 )
|
|
nstw = count( stw > 0 )
|
|
nsts = count( sts > 0 )
|
|
|
|
ALLOCATE(ist(nst,5))
|
|
|
|
ALLOCATE(nstp(nproc))
|
|
ALLOCATE(sstp(nproc))
|
|
|
|
ALLOCATE(nstpw(nproc))
|
|
ALLOCATE(sstpw(nproc))
|
|
|
|
ALLOCATE(nstps(nproc))
|
|
ALLOCATE(sstps(nproc))
|
|
|
|
! ... initialize the sticks indexes array ist
|
|
|
|
CALL sticks_countg( tk, ub, lb, st, stw, sts, &
|
|
ist(:,1), ist(:,2), ist(:,4), ist(:,3), ist(:,5) )
|
|
|
|
! ... Sorts the sticks according to their length
|
|
|
|
ALLOCATE( idx( nst ) )
|
|
|
|
CALL sticks_sort( ist(:,4), ist(:,3), ist(:,5), nst, idx, nproc )
|
|
|
|
! ... Set as first stick the stick containing the G=0
|
|
!
|
|
! DO iss = 1, nst
|
|
! IF( ist( idx( iss ), 1 ) == 0 .AND. ist( idx( iss ), 2 ) == 0 ) EXIT
|
|
! END DO
|
|
! itmp = idx( 1 )
|
|
! idx( 1 ) = idx( iss )
|
|
! idx( iss ) = itmp
|
|
|
|
CALL sticks_dist( tk, ub, lb, idx, ist(:,1), ist(:,2), ist(:,4), ist(:,3), ist(:,5), &
|
|
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc )
|
|
|
|
ngw = sstpw( mype + 1 )
|
|
ngm = sstp( mype + 1 )
|
|
ngs = sstps( mype + 1 )
|
|
|
|
CALL sticks_pairup( tk, ub, lb, idx, ist(:,1), ist(:,2), ist(:,4), ist(:,3), ist(:,5), &
|
|
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc )
|
|
|
|
! ... Allocate and Set fft data layout descriptors
|
|
|
|
#if defined(__MPI)
|
|
|
|
CALL fft_dlay_allocate( dfftp, mype, root, nproc, comm, nogrp_ , dfftp%nr1x, dfftp%nr2x )
|
|
CALL fft_dlay_allocate( dffts, mype, root, nproc, comm, nogrp_ , dffts%nr1x, dffts%nr2x )
|
|
|
|
CALL fft_dlay_set( dfftp, tk, nst, dfftp%nr1, dfftp%nr2, dfftp%nr3, dfftp%nr1x, dfftp%nr2x, dfftp%nr3x, &
|
|
ub, lb, idx, ist(:,1), ist(:,2), nstp, nstpw, sstp, sstpw, st, stw )
|
|
CALL fft_dlay_set( dffts, tk, nsts, dffts%nr1, dffts%nr2, dffts%nr3, dffts%nr1x, dffts%nr2x, dffts%nr3x, &
|
|
ub, lb, idx, ist(:,1), ist(:,2), nstps, nstpw, sstps, sstpw, sts, stw )
|
|
|
|
IF( PRESENT( dfft3d ) ) THEN
|
|
DEALLOCATE( stw )
|
|
ALLOCATE( stw( lb(2) : ub(2), lb(3) : ub(3) ) )
|
|
CALL sticks_maps_scalar( (.not.tk), ub, lb, bg(:,1),bg(:,2),bg(:,3), gcut, gkcut, gcuts, stw, ngm_ , ngs_ )
|
|
CALL fft_dlay_allocate( dfft3d, mype, root, nproc, comm, 1, max(dffts%nr1x, dffts%nr3x), dffts%nr2x )
|
|
CALL fft_dlay_scalar( dfft3d, ub, lb, dffts%nr1, dffts%nr2, dffts%nr3, dffts%nr1x, dffts%nr2x, dffts%nr3x, stw )
|
|
END IF
|
|
|
|
#else
|
|
|
|
DEALLOCATE( stw )
|
|
ALLOCATE( stw( lb(2) : ub(2), lb(3) : ub(3) ) )
|
|
|
|
CALL sticks_maps_scalar( (.not.tk), ub, lb, bg(:,1),bg(:,2),bg(:,3),&
|
|
gcut, gkcut, gcuts, stw, ngm_ , ngs_ )
|
|
|
|
IF( ngm_ /= ngm ) CALL fftx_error__( ' pstickset ', ' inconsistent ngm ', abs( ngm - ngm_ ) )
|
|
IF( ngs_ /= ngs ) CALL fftx_error__( ' pstickset ', ' inconsistent ngs ', abs( ngs - ngs_ ) )
|
|
|
|
CALL fft_dlay_allocate( dfftp, mype, root, nproc, comm, 1, max(dfftp%nr1x, dfftp%nr3x), dfftp%nr2x )
|
|
CALL fft_dlay_allocate( dffts, mype, root, nproc, comm, 1, max(dffts%nr1x, dffts%nr3x), dffts%nr2x )
|
|
|
|
CALL fft_dlay_scalar( dfftp, ub, lb, dfftp%nr1, dfftp%nr2, dfftp%nr3, dfftp%nr1x, dfftp%nr2x, dfftp%nr3x, stw )
|
|
CALL fft_dlay_scalar( dffts, ub, lb, dffts%nr1, dffts%nr2, dffts%nr3, dffts%nr1x, dffts%nr2x, dffts%nr3x, stw )
|
|
|
|
#endif
|
|
|
|
! ... Maximum number of sticks (potentials)
|
|
nstpx = maxval( nstp )
|
|
! ... Maximum number of sticks (wave func.)
|
|
nstpwx = maxval( nstpw )
|
|
|
|
!
|
|
! Initialize task groups.
|
|
! Note that this call modify dffts adding task group data.
|
|
!
|
|
CALL task_groups_init( dffts )
|
|
!
|
|
|
|
IF (ionode) THEN
|
|
WRITE( stdout,*)
|
|
IF ( nproc > 1 ) THEN
|
|
WRITE( stdout, '(5X,"Parallelization info")')
|
|
ELSE
|
|
WRITE( stdout, '(5X,"G-vector sticks info")')
|
|
ENDIF
|
|
WRITE( stdout, '(5X,"--------------------")')
|
|
WRITE( stdout, '(5X,"sticks: dense smooth PW", &
|
|
& 5X,"G-vecs: dense smooth PW")')
|
|
IF ( nproc > 1 ) THEN
|
|
WRITE( stdout,'(5X,"Min",4X,2I8,I7,12X,2I9,I8)') &
|
|
minval(nstp), minval(nstps), minval(nstpw), &
|
|
minval(sstp), minval(sstps), minval(sstpw)
|
|
WRITE( stdout,'(5X,"Max",4X,2I8,I7,12X,2I9,I8)') &
|
|
maxval(nstp), maxval(nstps), maxval(nstpw), &
|
|
maxval(sstp), maxval(sstps), maxval(sstpw)
|
|
END IF
|
|
WRITE( stdout,'(5X,"Sum",4X,2I8,I7,12X,2I9,I8)') &
|
|
sum(nstp), sum(nstps), sum(nstpw), &
|
|
sum(sstp), sum(sstps), sum(sstpw)
|
|
! in the case k=0, the lines above and below differ:
|
|
! above all sticks, below only those in the half sphere
|
|
IF ( .NOT. tk ) &
|
|
WRITE( stdout,'(5X,"Tot",4X,2I8,I7)') nst, nsts, nstw
|
|
ENDIF
|
|
|
|
DEALLOCATE( ist )
|
|
DEALLOCATE( idx )
|
|
|
|
DEALLOCATE( st, stw, sts )
|
|
DEALLOCATE( sstp )
|
|
DEALLOCATE( nstp )
|
|
DEALLOCATE( sstpw )
|
|
DEALLOCATE( nstpw )
|
|
DEALLOCATE( sstps )
|
|
DEALLOCATE( nstps )
|
|
|
|
IF(ionode) WRITE( stdout,*)
|
|
|
|
RETURN
|
|
END SUBROUTINE pstickset
|
|
|
|
!----------------------------------------------------------------------
|
|
|
|
SUBROUTINE pstickset_custom( gamma_only, bg, gcut, gkcut, gcuts, &
|
|
dfftp, dffts, ngw, ngm, ngs, mype, root, nproc, comm, nogrp_ )
|
|
|
|
LOGICAL, INTENT(in) :: gamma_only
|
|
! ... bg(:,1), bg(:,2), bg(:,3) reciprocal space base vectors.
|
|
REAL(DP), INTENT(in) :: bg(3,3)
|
|
REAL(DP), INTENT(in) :: gcut, gkcut, gcuts
|
|
TYPE(fft_dlay_descriptor), INTENT(inout) :: dfftp, dffts
|
|
INTEGER, INTENT(inout) :: ngw, ngm, ngs
|
|
|
|
INTEGER, INTENT(IN) :: mype, root, nproc, comm
|
|
INTEGER, INTENT(IN) :: nogrp_
|
|
|
|
|
|
LOGICAL :: tk
|
|
|
|
INTEGER :: ub(3), lb(3)
|
|
! ... ub(i), lb(i) upper and lower miller indexes
|
|
|
|
!
|
|
! ... Plane Waves
|
|
!
|
|
INTEGER, ALLOCATABLE :: stw(:,:)
|
|
! ... stick map (wave functions), stw(i,j) = number of G-vector in the
|
|
! ... stick whose x and y miller index are i and j
|
|
|
|
INTEGER, ALLOCATABLE :: nstpw(:)
|
|
! ... number of sticks (wave functions), nstpw(ip) = number of stick
|
|
! ... for processor ip
|
|
|
|
INTEGER, ALLOCATABLE :: sstpw(:)
|
|
! ... number of G-vectors (wave functions), sstpw(ip) = sum of the
|
|
! ... sticks length for processor ip = number of G-vectors
|
|
! ... owned by the processor ip
|
|
|
|
INTEGER :: nstw, nstpwx
|
|
! ... nstw local number of sticks (wave functions)
|
|
! ... nstpwx maximum among all processors of nstw
|
|
|
|
!
|
|
! ... Potentials
|
|
!
|
|
|
|
INTEGER, ALLOCATABLE :: st(:,:)
|
|
! ... stick map (potentials), st(i,j) = number of G-vector in the
|
|
! ... stick whose x and y miller index are i and j
|
|
|
|
INTEGER, ALLOCATABLE :: nstp(:)
|
|
! ... number of sticks (potentials), nstp(ip) = number of stick
|
|
! ... for processor ip
|
|
|
|
INTEGER, ALLOCATABLE :: sstp(:)
|
|
! ... number of G-vectors (potentials), sstp(ip) = sum of the
|
|
! ... sticks length for processor ip = number of G-vectors
|
|
! ... owned by the processor ip
|
|
|
|
INTEGER :: nst, nstpx
|
|
! ... nst local number of sticks (potentials)
|
|
! ... nstpx maximum among all processors of nst
|
|
|
|
!
|
|
! ... Smooth Mesh
|
|
!
|
|
|
|
INTEGER, ALLOCATABLE :: sts(:,:)
|
|
! ... stick map (smooth mesh), sts(i,j) = number of G-vector in the
|
|
! ... stick whose x and y miller index are i and j
|
|
|
|
INTEGER, ALLOCATABLE :: nstps(:)
|
|
! ... number of sticks (smooth mesh), nstp(ip) = number of stick
|
|
! ... for processor ip
|
|
|
|
INTEGER, ALLOCATABLE :: sstps(:)
|
|
! ... number of G-vectors (smooth mesh), sstps(ip) = sum of the
|
|
! ... sticks length for processor ip = number of G-vectors
|
|
! ... owned by the processor ip
|
|
|
|
INTEGER :: nsts
|
|
! ... nsts local number of sticks (smooth mesh)
|
|
|
|
|
|
INTEGER, ALLOCATABLE :: ist(:,:) ! sticks indices ordered
|
|
INTEGER :: ip, ngm_ , ngs_
|
|
INTEGER, ALLOCATABLE :: idx(:)
|
|
|
|
tk = .not. gamma_only
|
|
ub(1) = ( dfftp%nr1 - 1 ) / 2
|
|
ub(2) = ( dfftp%nr2 - 1 ) / 2
|
|
ub(3) = ( dfftp%nr3 - 1 ) / 2
|
|
lb = - ub
|
|
|
|
! ... Allocate maps
|
|
|
|
ALLOCATE( stw ( lb(1):ub(1), lb(2):ub(2) ) )
|
|
ALLOCATE( st ( lb(1):ub(1), lb(2):ub(2) ) )
|
|
ALLOCATE( sts ( lb(1):ub(1), lb(2):ub(2) ) )
|
|
|
|
st = 0
|
|
stw = 0
|
|
sts = 0
|
|
|
|
! ... Fill in the stick maps, for given g-space base and cut-off
|
|
|
|
CALL sticks_maps( tk, ub, lb, bg(:,1), bg(:,2), bg(:,3), &
|
|
gcut, gkcut, gcuts, st, stw, sts, mype, &
|
|
nproc, comm )
|
|
|
|
! ... Now count the number of stick nst and nstw
|
|
|
|
nst = count( st > 0 )
|
|
nstw = count( stw > 0 )
|
|
nsts = count( sts > 0 )
|
|
|
|
ALLOCATE(ist(nst,5))
|
|
|
|
ALLOCATE(nstp(nproc))
|
|
ALLOCATE(sstp(nproc))
|
|
|
|
ALLOCATE(nstpw(nproc))
|
|
ALLOCATE(sstpw(nproc))
|
|
|
|
ALLOCATE(nstps(nproc))
|
|
ALLOCATE(sstps(nproc))
|
|
|
|
! ... initialize the sticks indexes array ist
|
|
|
|
CALL sticks_countg( tk, ub, lb, st, stw, sts, &
|
|
ist(:,1), ist(:,2), ist(:,4), ist(:,3), ist(:,5) )
|
|
|
|
! ... Sorts the sticks according to their length
|
|
|
|
ALLOCATE( idx( nst ) )
|
|
|
|
CALL sticks_sort( ist(:,4), ist(:,3), ist(:,5), nst, idx, nproc )
|
|
|
|
! ... Distribute the sticks as in dfftp
|
|
|
|
CALL sticks_ordered_dist( tk, ub, lb, idx, ist(:,1), ist(:,2), ist(:,4), ist(:,3), ist(:,5), &
|
|
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc )
|
|
|
|
ngw = sstpw( mype + 1 )
|
|
ngm = sstp( mype + 1 )
|
|
ngs = sstps( mype + 1 )
|
|
|
|
CALL sticks_pairup( tk, ub, lb, idx, ist(:,1), ist(:,2), ist(:,4), ist(:,3), ist(:,5), &
|
|
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc )
|
|
|
|
! ... Allocate and Set fft data layout descriptors
|
|
|
|
#if defined(__MPI)
|
|
|
|
CALL fft_dlay_allocate( dffts, mype, root, nproc, comm, nogrp_ , dffts%nr1x, dffts%nr2x )
|
|
|
|
CALL fft_dlay_set( dffts, tk, nsts, dffts%nr1, dffts%nr2, dffts%nr3, dffts%nr1x, dffts%nr2x, dffts%nr3x, &
|
|
ub, lb, idx, ist(:,1), ist(:,2), nstps, nstpw, sstps, sstpw, sts, stw )
|
|
|
|
#else
|
|
|
|
DEALLOCATE( stw )
|
|
ALLOCATE( stw( lb(2) : ub(2), lb(3) : ub(3) ) )
|
|
|
|
CALL sticks_maps_scalar( (.not.tk), ub, lb, bg(:,1),bg(:,2),bg(:,3),&
|
|
gcut, gkcut, gcuts, stw, ngm_ , ngs_ )
|
|
|
|
IF( ngs_ /= ngs ) CALL fftx_error__( ' pstickset_custom ', ' inconsistent ngs ', abs( ngs - ngs_ ) )
|
|
|
|
CALL fft_dlay_allocate( dffts, mype, root, nproc, comm, 1, max(dffts%nr1x, dffts%nr3x), dffts%nr2x )
|
|
|
|
CALL fft_dlay_scalar( dffts, ub, lb, dffts%nr1, dffts%nr2, dffts%nr3, dffts%nr1x, dffts%nr2x, dffts%nr3x, stw )
|
|
|
|
#endif
|
|
|
|
! ... Maximum number of sticks (potentials)
|
|
nstpx = maxval( nstp )
|
|
! ... Maximum number of sticks (wave func.)
|
|
nstpwx = maxval( nstpw )
|
|
|
|
DEALLOCATE( ist )
|
|
DEALLOCATE( idx )
|
|
|
|
DEALLOCATE( st, stw, sts )
|
|
DEALLOCATE( sstp )
|
|
DEALLOCATE( nstp )
|
|
DEALLOCATE( sstpw )
|
|
DEALLOCATE( nstpw )
|
|
DEALLOCATE( sstps )
|
|
DEALLOCATE( nstps )
|
|
|
|
RETURN
|
|
END SUBROUTINE pstickset_custom
|
|
|
|
!-----------------------------------------
|
|
! Task groups Contributed by C. Bekas, October 2005
|
|
! Revised by C. Cavazzoni
|
|
!--------------------------------------------
|
|
|
|
SUBROUTINE task_groups_init( dffts )
|
|
|
|
!
|
|
USE fft_types, ONLY : fft_dlay_descriptor
|
|
|
|
! T.G.
|
|
! NPGRP: Number of processors per group
|
|
! NOGRP: Number of processors per orbital task group
|
|
|
|
IMPLICIT NONE
|
|
#if defined(__MPI)
|
|
INCLUDE 'mpif.h'
|
|
#endif
|
|
|
|
|
|
TYPE(fft_dlay_descriptor), INTENT(inout) :: dffts
|
|
|
|
INTEGER :: stdout = 6
|
|
|
|
!----------------------------------
|
|
!Local Variables declaration
|
|
!----------------------------------
|
|
|
|
INTEGER :: I
|
|
INTEGER :: IERR
|
|
INTEGER :: num_planes, num_sticks
|
|
INTEGER :: nnrsx_vec ( dffts%nproc )
|
|
INTEGER :: pgroup( dffts%nproc )
|
|
INTEGER :: strd
|
|
|
|
CALL task_groups_init_first( dffts )
|
|
!
|
|
#if defined(DEBUG)
|
|
IF ( dffts%nogrp > 1 ) WRITE( stdout, 100 ) dffts%nogrp, dffts%npgrp
|
|
|
|
100 FORMAT( /,3X,'Task Groups are in USE',/,3X,'groups and procs/group : ',I5,I5 )
|
|
#endif
|
|
|
|
!Find maximum chunk of local data concerning coefficients of eigenfunctions in g-space
|
|
|
|
#if defined(__MPI)
|
|
CALL MPI_Allgather( dffts%nnr, 1, MPI_INTEGER, nnrsx_vec, 1, MPI_INTEGER, dffts%comm, IERR)
|
|
strd = maxval( nnrsx_vec( 1:dffts%nproc ) )
|
|
#else
|
|
strd = dffts%nnr
|
|
#endif
|
|
|
|
IF( strd /= dffts%tg_nnr ) CALL fftx_error__( ' task_groups_init ', ' inconsistent nnr ', 1 )
|
|
|
|
!-------------------------------------------------------------------------------------
|
|
!C. Bekas...TASK GROUP RELATED. FFT DATA STRUCTURES ARE ALREADY DEFINED ABOVE
|
|
!-------------------------------------------------------------------------------------
|
|
!dfft%nsw(me) holds the number of z-sticks for the current processor per wave-function
|
|
!We can either send these in the group with an mpi_allgather...or put them
|
|
!in the PSIS vector (in special positions) and send them with them.
|
|
!Otherwise we can do this once at the beginning, before the loop.
|
|
!we choose to do the latter one.
|
|
!-------------------------------------------------------------------------------------
|
|
!
|
|
!
|
|
ALLOCATE( dffts%tg_nsw(dffts%nproc))
|
|
ALLOCATE( dffts%tg_npp(dffts%nproc))
|
|
|
|
num_sticks = 0
|
|
num_planes = 0
|
|
DO i = 1, dffts%nogrp
|
|
num_sticks = num_sticks + dffts%nsw( dffts%nolist(i) + 1 )
|
|
num_planes = num_planes + dffts%npp( dffts%nolist(i) + 1 )
|
|
ENDDO
|
|
|
|
#if defined(__MPI)
|
|
CALL MPI_ALLGATHER(num_sticks, 1, MPI_INTEGER, dffts%tg_nsw(1), 1, MPI_INTEGER, dffts%comm, IERR)
|
|
CALL MPI_ALLGATHER(num_planes, 1, MPI_INTEGER, dffts%tg_npp(1), 1, MPI_INTEGER, dffts%comm, IERR)
|
|
#else
|
|
dffts%tg_nsw(1) = num_sticks
|
|
dffts%tg_npp(1) = num_planes
|
|
#endif
|
|
|
|
ALLOCATE( dffts%tg_snd( dffts%nogrp ) )
|
|
ALLOCATE( dffts%tg_rcv( dffts%nogrp ) )
|
|
ALLOCATE( dffts%tg_psdsp( dffts%nogrp ) )
|
|
ALLOCATE( dffts%tg_usdsp( dffts%nogrp ) )
|
|
ALLOCATE( dffts%tg_rdsp( dffts%nogrp ) )
|
|
|
|
dffts%tg_snd(1) = dffts%nr3x * dffts%nsw( dffts%mype + 1 )
|
|
IF( dffts%nr3x * dffts%nsw( dffts%mype + 1 ) > dffts%tg_nnr ) THEN
|
|
CALL fftx_error__( ' task_groups_init ', ' inconsistent dffts%tg_nnr ', 1 )
|
|
ENDIF
|
|
dffts%tg_psdsp(1) = 0
|
|
dffts%tg_usdsp(1) = 0
|
|
dffts%tg_rcv(1) = dffts%nr3x * dffts%nsw( dffts%nolist(1) + 1 )
|
|
dffts%tg_rdsp(1) = 0
|
|
DO i = 2, dffts%nogrp
|
|
dffts%tg_snd(i) = dffts%nr3x * dffts%nsw( dffts%mype + 1 )
|
|
dffts%tg_psdsp(i) = dffts%tg_psdsp(i-1) + dffts%tg_nnr
|
|
dffts%tg_usdsp(i) = dffts%tg_usdsp(i-1) + dffts%tg_snd(i-1)
|
|
dffts%tg_rcv(i) = dffts%nr3x * dffts%nsw( dffts%nolist(i) + 1 )
|
|
dffts%tg_rdsp(i) = dffts%tg_rdsp(i-1) + dffts%tg_rcv(i-1)
|
|
ENDDO
|
|
|
|
RETURN
|
|
|
|
END SUBROUTINE task_groups_init
|
|
|
|
|
|
!
|
|
SUBROUTINE task_groups_init_first( dffts )
|
|
!
|
|
USE fft_types, ONLY : fft_dlay_descriptor
|
|
!
|
|
IMPLICIT NONE
|
|
#if defined(__MPI)
|
|
INCLUDE 'mpif.h'
|
|
#endif
|
|
!
|
|
TYPE(fft_dlay_descriptor), INTENT(inout) :: dffts
|
|
!
|
|
INTEGER :: i, n1, ipos, color, key, ierr, itsk, ntsk
|
|
INTEGER :: pgroup( dffts%nproc )
|
|
!
|
|
!SUBDIVIDE THE PROCESSORS IN GROUPS
|
|
!
|
|
DO i = 1, dffts%nproc
|
|
pgroup( i ) = i - 1
|
|
ENDDO
|
|
!
|
|
#if defined(__TASK_GROUP_WAVE_ORDER)
|
|
n1 = ( dffts%mype / dffts%npgrp ) * dffts%npgrp
|
|
ipos = dffts%mype - n1
|
|
#else
|
|
n1 = ( dffts%mype / dffts%nogrp ) * dffts%nogrp
|
|
ipos = dffts%mype - n1
|
|
#endif
|
|
!
|
|
!LIST OF PROCESSORS IN MY ORBITAL GROUP
|
|
! (processors dealing with my same pw's of different orbitals)
|
|
!
|
|
! processors in these group have contiguous indexes
|
|
!
|
|
DO i = 1, dffts%nogrp
|
|
#if defined(__TASK_GROUP_WAVE_ORDER)
|
|
dffts%nolist( i ) = pgroup( ipos + ( i - 1 ) * dffts%npgrp + 1 )
|
|
#else
|
|
dffts%nolist( i ) = pgroup( n1 + i )
|
|
#endif
|
|
ENDDO
|
|
!
|
|
!LIST OF PROCESSORS IN MY PLANE WAVE GROUP
|
|
! (processors dealing with different pw's of my same orbital)
|
|
!
|
|
DO i = 1, dffts%npgrp
|
|
#if defined(__TASK_GROUP_WAVE_ORDER)
|
|
dffts%nplist( i ) = pgroup( n1 + i )
|
|
#else
|
|
dffts%nplist( i ) = pgroup( ipos + ( i - 1 ) * dffts%nogrp + 1 )
|
|
#endif
|
|
ENDDO
|
|
!
|
|
!SET UP THE GROUPS
|
|
!
|
|
!CREATE ORBITAL GROUPS
|
|
!
|
|
#if defined(__MPI)
|
|
! processes with the same color are in the same new communicator
|
|
|
|
#if defined(__TASK_GROUP_WAVE_ORDER)
|
|
color = MOD( dffts%mype , dffts%npgrp )
|
|
key = dffts%mype / dffts%npgrp
|
|
#else
|
|
color = dffts%mype / dffts%nogrp
|
|
key = MOD( dffts%mype , dffts%nogrp )
|
|
#endif
|
|
|
|
|
|
CALL MPI_COMM_SPLIT( dffts%comm, color, key, dffts%ogrp_comm, ierr )
|
|
if( ierr /= 0 ) &
|
|
CALL fftx_error__( ' task_groups_init_first ', ' creating ogrp_comm ', ABS(ierr) )
|
|
CALL MPI_COMM_RANK( dffts%ogrp_comm, itsk, IERR )
|
|
CALL MPI_COMM_SIZE( dffts%ogrp_comm, ntsk, IERR )
|
|
IF( dffts%nogrp /= ntsk ) CALL fftx_error__( ' task_groups_init_first ', ' ogrp_comm size ', ntsk )
|
|
DO i = 1, dffts%nogrp
|
|
IF( dffts%mype == dffts%nolist( i ) ) THEN
|
|
IF( (i-1) /= itsk ) CALL fftx_error__( ' task_groups_init_first ', ' ogrp_comm rank ', itsk )
|
|
END IF
|
|
END DO
|
|
#endif
|
|
!
|
|
!CREATE PLANEWAVE GROUPS
|
|
!
|
|
#if defined(__MPI)
|
|
! processes with the same color are in the same new communicator
|
|
|
|
#if defined(__TASK_GROUP_WAVE_ORDER)
|
|
color = dffts%mype / dffts%npgrp
|
|
key = MOD( dffts%mype , dffts%npgrp )
|
|
#else
|
|
color = MOD( dffts%mype , dffts%nogrp )
|
|
key = dffts%mype / dffts%nogrp
|
|
#endif
|
|
|
|
CALL MPI_COMM_SPLIT( dffts%comm, color, key, dffts%pgrp_comm, ierr )
|
|
if( ierr /= 0 ) &
|
|
CALL fftx_error__( ' task_groups_init_first ', ' creating pgrp_comm ', ABS(ierr) )
|
|
CALL MPI_COMM_RANK( dffts%pgrp_comm, itsk, IERR )
|
|
CALL MPI_COMM_SIZE( dffts%pgrp_comm, ntsk, IERR )
|
|
IF( dffts%npgrp /= ntsk ) CALL fftx_error__( ' task_groups_init_first ', ' pgrp_comm size ', ntsk )
|
|
DO i = 1, dffts%npgrp
|
|
IF( dffts%mype == dffts%nplist( i ) ) THEN
|
|
IF( (i-1) /= itsk ) CALL fftx_error__( ' task_groups_init_first ', ' pgrp_comm rank ', itsk )
|
|
END IF
|
|
END DO
|
|
dffts%me_pgrp = itsk
|
|
#endif
|
|
|
|
RETURN
|
|
END SUBROUTINE task_groups_init_first
|
|
!
|
|
|
|
!=----------------------------------------------------------------------=
|
|
END MODULE stick_set
|
|
!=----------------------------------------------------------------------=
|