mirror of https://gitlab.com/QEF/q-e.git
Mior (but potentially dangerous) changes to parallel initialization:
most of what was in PW/startup.f90 moved into Modules/mp_global.f90 in view of a merge with CP git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@6037 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
parent
f1e241876b
commit
d7357d5cd9
|
@ -1,5 +1,5 @@
|
|||
!
|
||||
! Copyright (C) 2002-2003 PWSCF-FPMD-CP90 group
|
||||
! Copyright (C) 2002-2009 Quantum ESPRESSO group
|
||||
! This file is distributed under the terms of the
|
||||
! GNU General Public License. See the file `License'
|
||||
! in the root directory of the present distribution,
|
||||
|
@ -10,8 +10,6 @@
|
|||
# include "/cineca/prod/hpm/include/f_hpm.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
!------------------------------------------------------------------------------!
|
||||
MODULE mp
|
||||
!------------------------------------------------------------------------------!
|
||||
|
@ -22,10 +20,10 @@
|
|||
IMPLICIT NONE
|
||||
|
||||
PUBLIC :: mp_start, mp_end, mp_env, &
|
||||
mp_bcast, mp_stop, mp_sum, mp_max, mp_min, mp_rank, mp_size, &
|
||||
mp_bcast, mp_sum, mp_max, mp_min, mp_rank, mp_size, &
|
||||
mp_gather, mp_get, mp_put, mp_barrier, mp_report, mp_group_free, &
|
||||
mp_root_sum, mp_comm_free, mp_comm_create, mp_comm_group, mp_group_create, &
|
||||
mp_comm_split, mp_set_displs
|
||||
mp_root_sum, mp_comm_free, mp_comm_create, mp_comm_group, &
|
||||
mp_group_create, mp_comm_split, mp_set_displs
|
||||
!
|
||||
INTERFACE mp_bcast
|
||||
MODULE PROCEDURE mp_bcast_i1, mp_bcast_r1, mp_bcast_c1, &
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
!
|
||||
! Copyright (C) 2002-2004 PWSCF-FPMD-CP90 group
|
||||
! Copyright (C) 2002-2009 Quantum ESPRESSO group
|
||||
! This file is distributed under the terms of the
|
||||
! GNU General Public License. See the file `License'
|
||||
! in the root directory of the present distribution,
|
||||
|
@ -9,6 +9,9 @@
|
|||
MODULE mp_global
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
USE mp, ONLY : mp_comm_free, mp_size, mp_rank, mp_sum, mp_barrier, &
|
||||
mp_bcast, mp_start, mp_env
|
||||
USE io_global, ONLY : stdout, io_global_start, meta_ionode, meta_ionode_id
|
||||
USE parallel_include
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
@ -19,7 +22,7 @@ MODULE mp_global
|
|||
INTEGER :: root = 0 ! index of the absolute root processor
|
||||
INTEGER :: nproc = 1 ! absolute number of processor
|
||||
INTEGER :: nproc_file = 1 ! absolute number of processor written in the
|
||||
! xml punch file
|
||||
! xml punch file
|
||||
INTEGER :: world_comm = 0 ! communicator of all processor
|
||||
#if defined __SCALAPACK
|
||||
INTEGER :: me_blacs = 0 ! BLACS processor index starting from 0
|
||||
|
@ -47,12 +50,12 @@ MODULE mp_global
|
|||
INTEGER :: npgrp = 1 ! number of proc. in a plane-wave "task group"
|
||||
INTEGER :: nproc_pool = 1 ! number of processor within a pool
|
||||
INTEGER :: nproc_pool_file = 1 ! number of processor within a pool of
|
||||
! written in the xml punch file
|
||||
! written in the xml punch file
|
||||
INTEGER :: nproc_image = 1 ! number of processor within an image
|
||||
INTEGER :: np_ortho(2) = 1 ! size of the processor grid used in ortho
|
||||
INTEGER :: np_ortho1 = 1 ! size of the ortho group
|
||||
INTEGER :: leg_ortho = 1 ! the distance in the father communicator
|
||||
! of two neighbour processors in ortho_comm
|
||||
! of two neighbour processors in ortho_comm
|
||||
INTEGER, ALLOCATABLE :: nolist(:) ! list of processors in my orbital task group
|
||||
INTEGER, ALLOCATABLE :: nplist(:) ! list of processors in my plane wave task group
|
||||
!
|
||||
|
@ -70,425 +73,497 @@ MODULE mp_global
|
|||
INTEGER :: ortho_cntx = 0 ! BLACS context for ortho_comm
|
||||
#endif
|
||||
!
|
||||
CONTAINS
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE mp_global_start( root_i, mpime_i, group_i, nproc_i )
|
||||
!-----------------------------------------------------------------------
|
||||
CONTAINS
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE mp_startup ( use_task_groups, ortho_para )
|
||||
!-----------------------------------------------------------------------
|
||||
! ... This subroutine initializes MPI
|
||||
! ... Processes are organized in NIMAGE images each dealing with a subset of
|
||||
! ... images used to discretize the "path" (only in "path" optimizations)
|
||||
! ... Within each image processes are organized in NPOOL pools each dealing
|
||||
! ... with a subset of kpoints.
|
||||
! ... Within each pool R & G space distribution is performed.
|
||||
! ... NPROC is read from command line or can be set with the appropriate
|
||||
! ... environment variable ( for example use 'setenv MP_PROCS 8' on IBM SP
|
||||
! ... machine to run on NPROC=8 processors ); NIMAGE and NPOOL are read from
|
||||
! ... command line.
|
||||
! ... NPOOL must be a whole divisor of NPROC
|
||||
!
|
||||
IMPLICIT NONE
|
||||
LOGICAL, INTENT(OUT) :: use_task_groups
|
||||
INTEGER, INTENT(OUT) :: ortho_para
|
||||
INTEGER :: gid, ntask_groups, nproc_ortho
|
||||
!
|
||||
CALL mp_start()
|
||||
!
|
||||
CALL mp_env( nproc, mpime, gid )
|
||||
!
|
||||
! ... Set the I/O node
|
||||
!
|
||||
CALL io_global_start( mpime, 0 )
|
||||
!
|
||||
! ... Set global coordinate for this processor
|
||||
!
|
||||
CALL mp_global_start( 0, mpime, gid, nproc )
|
||||
!
|
||||
IF ( meta_ionode ) THEN
|
||||
!
|
||||
IMPLICIT NONE
|
||||
! ... How many pools ?
|
||||
!
|
||||
INTEGER, INTENT(IN) :: root_i, mpime_i, group_i, nproc_i
|
||||
CALL get_arg_npool( npool )
|
||||
!
|
||||
root = root_i
|
||||
mpime = mpime_i
|
||||
world_comm = group_i
|
||||
nproc = nproc_i
|
||||
nproc_pool = nproc_i
|
||||
nproc_image = nproc_i
|
||||
my_pool_id = 0
|
||||
my_image_id = 0
|
||||
me_pool = mpime
|
||||
me_image = mpime
|
||||
me_pgrp = me_pool
|
||||
root_pool = root
|
||||
root_image = root
|
||||
inter_pool_comm = group_i
|
||||
intra_pool_comm = group_i
|
||||
inter_image_comm = group_i
|
||||
intra_image_comm = group_i
|
||||
ortho_comm = group_i
|
||||
ALLOCATE( nolist( nproc_i ) )
|
||||
ALLOCATE( nplist( nproc_i ) )
|
||||
nolist = 0
|
||||
nplist = 0
|
||||
npool = MAX( npool, 1 )
|
||||
npool = MIN( npool, nproc )
|
||||
!
|
||||
RETURN
|
||||
! ... How many parallel images ?
|
||||
!
|
||||
END SUBROUTINE mp_global_start
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE mp_global_group_start( mep, myp, nprocp, num_of_pools )
|
||||
!-----------------------------------------------------------------------
|
||||
CALL get_arg_nimage( nimage )
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: mep, myp, nprocp, num_of_pools
|
||||
nimage = MAX( nimage, 1 )
|
||||
nimage = MIN( nimage, nproc )
|
||||
!
|
||||
me_pool = mep
|
||||
my_pool_id = myp
|
||||
nproc_pool = nprocp
|
||||
npool = num_of_pools
|
||||
! ... How many task groups ?
|
||||
!
|
||||
RETURN
|
||||
CALL get_arg_ntg( ntask_groups )
|
||||
!
|
||||
END SUBROUTINE mp_global_group_start
|
||||
!
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
SUBROUTINE init_pool( nimage_ , ntask_groups_ , nproc_ortho_ )
|
||||
! ... How many processors involved in diagonalization of Hamiltonian ?
|
||||
!
|
||||
CALL get_arg_northo( nproc_ortho )
|
||||
!
|
||||
END IF
|
||||
!
|
||||
CALL mp_barrier()
|
||||
!
|
||||
! ... transmit npool and nimage
|
||||
!
|
||||
CALL mp_bcast( npool, meta_ionode_id )
|
||||
CALL mp_bcast( nimage, meta_ionode_id )
|
||||
CALL mp_bcast( ntask_groups, meta_ionode_id )
|
||||
CALL mp_bcast( nproc_ortho, meta_ionode_id )
|
||||
!
|
||||
use_task_groups = ( ntask_groups > 0 )
|
||||
IF ( nproc_ortho > 0 ) ortho_para = nproc_ortho
|
||||
!
|
||||
! ... all pools are initialized here
|
||||
!
|
||||
CALL init_pool( nimage, ntask_groups, nproc_ortho )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE mp_startup
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE mp_global_start( root_i, mpime_i, group_i, nproc_i )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: root_i, mpime_i, group_i, nproc_i
|
||||
!
|
||||
root = root_i
|
||||
mpime = mpime_i
|
||||
world_comm = group_i
|
||||
nproc = nproc_i
|
||||
nproc_pool = nproc_i
|
||||
nproc_image = nproc_i
|
||||
my_pool_id = 0
|
||||
my_image_id = 0
|
||||
me_pool = mpime
|
||||
me_image = mpime
|
||||
me_pgrp = me_pool
|
||||
root_pool = root
|
||||
root_image = root
|
||||
inter_pool_comm = group_i
|
||||
intra_pool_comm = group_i
|
||||
inter_image_comm = group_i
|
||||
intra_image_comm = group_i
|
||||
ortho_comm = group_i
|
||||
ALLOCATE( nolist( nproc_i ) )
|
||||
ALLOCATE( nplist( nproc_i ) )
|
||||
nolist = 0
|
||||
nplist = 0
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE mp_global_start
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE mp_global_group_start( mep, myp, nprocp, num_of_pools )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: mep, myp, nprocp, num_of_pools
|
||||
!
|
||||
me_pool = mep
|
||||
my_pool_id = myp
|
||||
nproc_pool = nprocp
|
||||
npool = num_of_pools
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE mp_global_group_start
|
||||
!
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
! ... This routine initialize the pool : MPI division in pools and images
|
||||
!
|
||||
USE mp, ONLY : mp_barrier, mp_bcast
|
||||
USE parallel_include
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: nimage_
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: ntask_groups_
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: nproc_ortho_
|
||||
!
|
||||
INTEGER :: ierr = 0
|
||||
INTEGER :: nproc_ortho
|
||||
!
|
||||
SUBROUTINE init_pool( nimage_ , ntask_groups_ , nproc_ortho_ )
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
! ... This routine initialize the pool : MPI division in pools and images
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: nimage_
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: ntask_groups_
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: nproc_ortho_
|
||||
!
|
||||
INTEGER :: ierr = 0
|
||||
INTEGER :: nproc_ortho
|
||||
!
|
||||
#if defined (__PARA)
|
||||
!
|
||||
!
|
||||
IF( PRESENT( nimage_ ) ) THEN
|
||||
nimage = nimage_
|
||||
END IF
|
||||
!
|
||||
! ... here we set all parallel indeces (defined in mp_global):
|
||||
!
|
||||
!
|
||||
! ... number of cpus per image
|
||||
!
|
||||
nproc_image = nproc / nimage
|
||||
!
|
||||
IF ( nproc < nimage ) &
|
||||
CALL errore( 'startup', 'nproc < nimage', 1 )
|
||||
!
|
||||
IF ( MOD( nproc, nimage ) /= 0 ) &
|
||||
CALL errore( 'startup', 'nproc /= nproc_image * nimage', 1 )
|
||||
!
|
||||
! ... my_image_id = image index for this processor ( 0 : nimage - 1 )
|
||||
! ... me_image = processor index within the image ( 0 : nproc_image - 1 )
|
||||
!
|
||||
my_image_id = mpime / nproc_image
|
||||
me_image = MOD( mpime, nproc_image )
|
||||
!
|
||||
CALL mp_barrier()
|
||||
!
|
||||
! ... the intra_image_comm communicator is created
|
||||
!
|
||||
CALL MPI_COMM_SPLIT( MPI_COMM_WORLD, &
|
||||
my_image_id, mpime, intra_image_comm, ierr )
|
||||
!
|
||||
CALL errore( 'init_pool', 'intra_image_comm is wrong', ierr )
|
||||
!
|
||||
CALL mp_barrier()
|
||||
!
|
||||
! ... the inter_image_comm communicator is created
|
||||
!
|
||||
CALL MPI_COMM_SPLIT( MPI_COMM_WORLD, &
|
||||
me_image, mpime, inter_image_comm, ierr )
|
||||
!
|
||||
CALL errore( 'init_pool', 'inter_image_comm is wrong', ierr )
|
||||
!
|
||||
! ... number of cpus per pool of k-points (they are created inside each image)
|
||||
!
|
||||
nproc_pool = nproc_image / npool
|
||||
!
|
||||
IF ( MOD( nproc, npool ) /= 0 ) &
|
||||
CALL errore( 'startup', 'nproc /= nproc_pool * npool', 1 )
|
||||
!
|
||||
! ... my_pool_id = pool index for this processor ( 0 : npool - 1 )
|
||||
! ... me_pool = processor index within the pool ( 0 : nproc_pool - 1 )
|
||||
!
|
||||
my_pool_id = me_image / nproc_pool
|
||||
me_pool = MOD( me_image, nproc_pool )
|
||||
!
|
||||
CALL mp_barrier( intra_image_comm )
|
||||
!
|
||||
! ... the intra_pool_comm communicator is created
|
||||
!
|
||||
CALL MPI_COMM_SPLIT( intra_image_comm, &
|
||||
my_pool_id, me_image, intra_pool_comm, ierr )
|
||||
!
|
||||
CALL errore( 'init_pool', 'intra_pool_comm is wrong', ierr )
|
||||
!
|
||||
CALL mp_barrier( intra_image_comm )
|
||||
!
|
||||
! ... the inter_pool_comm communicator is created
|
||||
!
|
||||
CALL MPI_COMM_SPLIT( intra_image_comm, &
|
||||
me_pool, me_image, inter_pool_comm, ierr )
|
||||
!
|
||||
call errore( 'init_pool', 'inter_pool_comm is wrong', ierr )
|
||||
!
|
||||
!
|
||||
!
|
||||
IF( PRESENT( nimage_ ) ) THEN
|
||||
nimage = nimage_
|
||||
END IF
|
||||
!
|
||||
! ... here we set all parallel indeces (defined in mp_global):
|
||||
!
|
||||
!
|
||||
! ... number of cpus per image
|
||||
!
|
||||
nproc_image = nproc / nimage
|
||||
!
|
||||
IF ( nproc < nimage ) &
|
||||
CALL errore( 'startup', 'nproc < nimage', 1 )
|
||||
!
|
||||
IF ( MOD( nproc, nimage ) /= 0 ) &
|
||||
CALL errore( 'startup', 'nproc /= nproc_image * nimage', 1 )
|
||||
!
|
||||
! ... my_image_id = image index for this processor ( 0 : nimage - 1 )
|
||||
! ... me_image = processor index within the image ( 0 : nproc_image - 1 )
|
||||
!
|
||||
my_image_id = mpime / nproc_image
|
||||
me_image = MOD( mpime, nproc_image )
|
||||
!
|
||||
CALL mp_barrier()
|
||||
!
|
||||
! ... the intra_image_comm communicator is created
|
||||
!
|
||||
CALL MPI_COMM_SPLIT( MPI_COMM_WORLD, &
|
||||
my_image_id, mpime, intra_image_comm, ierr )
|
||||
!
|
||||
CALL errore( 'init_pool', 'intra_image_comm is wrong', ierr )
|
||||
!
|
||||
CALL mp_barrier()
|
||||
!
|
||||
! ... the inter_image_comm communicator is created
|
||||
!
|
||||
CALL MPI_COMM_SPLIT( MPI_COMM_WORLD, &
|
||||
me_image, mpime, inter_image_comm, ierr )
|
||||
!
|
||||
CALL errore( 'init_pool', 'inter_image_comm is wrong', ierr )
|
||||
!
|
||||
! ... number of cpus per pool of k-points (they are created inside each image)
|
||||
!
|
||||
nproc_pool = nproc_image / npool
|
||||
!
|
||||
IF ( MOD( nproc, npool ) /= 0 ) &
|
||||
CALL errore( 'startup', 'nproc /= nproc_pool * npool', 1 )
|
||||
!
|
||||
! ... my_pool_id = pool index for this processor ( 0 : npool - 1 )
|
||||
! ... me_pool = processor index within the pool ( 0 : nproc_pool - 1 )
|
||||
!
|
||||
my_pool_id = me_image / nproc_pool
|
||||
me_pool = MOD( me_image, nproc_pool )
|
||||
!
|
||||
CALL mp_barrier( intra_image_comm )
|
||||
!
|
||||
! ... the intra_pool_comm communicator is created
|
||||
!
|
||||
CALL MPI_COMM_SPLIT( intra_image_comm, &
|
||||
my_pool_id, me_image, intra_pool_comm, ierr )
|
||||
!
|
||||
CALL errore( 'init_pool', 'intra_pool_comm is wrong', ierr )
|
||||
!
|
||||
CALL mp_barrier( intra_image_comm )
|
||||
!
|
||||
! ... the inter_pool_comm communicator is created
|
||||
!
|
||||
CALL MPI_COMM_SPLIT( intra_image_comm, &
|
||||
me_pool, me_image, inter_pool_comm, ierr )
|
||||
!
|
||||
call errore( 'init_pool', 'inter_pool_comm is wrong', ierr )
|
||||
!
|
||||
#endif
|
||||
!
|
||||
!
|
||||
!
|
||||
!
|
||||
#if defined __SCALAPACK
|
||||
|
||||
CALL BLACS_PINFO( me_blacs, np_blacs )
|
||||
!WRITE(*,*) 'BLACS me_blacs, np_blacs = ', me_blacs, np_blacs
|
||||
CALL BLACS_GET( -1, 0, world_cntx )
|
||||
!WRITE(*,*) 'BLACS world_cntx = ', world_cntx
|
||||
|
||||
CALL BLACS_PINFO( me_blacs, np_blacs )
|
||||
!WRITE(*,*) 'BLACS me_blacs, np_blacs = ', me_blacs, np_blacs
|
||||
CALL BLACS_GET( -1, 0, world_cntx )
|
||||
!WRITE(*,*) 'BLACS world_cntx = ', world_cntx
|
||||
|
||||
#endif
|
||||
!
|
||||
nproc_ortho = nproc_pool
|
||||
!
|
||||
IF( PRESENT( nproc_ortho_ ) ) THEN
|
||||
IF( nproc_ortho_ < nproc_pool ) nproc_ortho = nproc_ortho_
|
||||
END IF
|
||||
!
|
||||
CALL init_ortho_group( nproc_ortho, intra_pool_comm )
|
||||
!
|
||||
IF( PRESENT( ntask_groups_ ) ) THEN
|
||||
IF( ntask_groups_ > 0 ) THEN
|
||||
nogrp = ntask_groups_
|
||||
CALL init_task_groups( )
|
||||
END IF
|
||||
END IF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE init_pool
|
||||
!
|
||||
nproc_ortho = nproc_pool
|
||||
!
|
||||
IF( PRESENT( nproc_ortho_ ) ) THEN
|
||||
IF( nproc_ortho_ < nproc_pool ) nproc_ortho = nproc_ortho_
|
||||
END IF
|
||||
!
|
||||
CALL init_ortho_group( nproc_ortho, intra_pool_comm )
|
||||
!
|
||||
IF( PRESENT( ntask_groups_ ) ) THEN
|
||||
IF( ntask_groups_ > 0 ) THEN
|
||||
nogrp = ntask_groups_
|
||||
CALL init_task_groups( )
|
||||
END IF
|
||||
END IF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE init_pool
|
||||
!
|
||||
!
|
||||
SUBROUTINE init_task_groups( )
|
||||
!
|
||||
INTEGER :: i, n1, ipos, color, key, ierr, itsk, ntsk
|
||||
INTEGER :: pgroup( nproc_pool )
|
||||
!
|
||||
!SUBDIVIDE THE PROCESSORS IN GROUPS
|
||||
!
|
||||
!THE NUMBER OF GROUPS HAS TO BE A DIVISOR OF THE NUMBER
|
||||
!OF PROCESSORS
|
||||
!
|
||||
IF( MOD( nproc_pool, nogrp ) /= 0 ) &
|
||||
CALL errore( " init_pool ", " nogrp should be a divisor of nproc_pool ", 1 )
|
||||
!
|
||||
npgrp = nproc_pool / nogrp
|
||||
SUBROUTINE init_task_groups( )
|
||||
!
|
||||
INTEGER :: i, n1, ipos, color, key, ierr, itsk, ntsk
|
||||
INTEGER :: pgroup( nproc_pool )
|
||||
!
|
||||
!SUBDIVIDE THE PROCESSORS IN GROUPS
|
||||
!
|
||||
!THE NUMBER OF GROUPS HAS TO BE A DIVISOR OF THE NUMBER
|
||||
!OF PROCESSORS
|
||||
!
|
||||
IF( MOD( nproc_pool, nogrp ) /= 0 ) &
|
||||
CALL errore( " init_pool ", " nogrp should be a divisor of nproc_pool ", 1 )
|
||||
!
|
||||
npgrp = nproc_pool / nogrp
|
||||
|
||||
DO i = 1, nproc_pool
|
||||
pgroup( i ) = i - 1
|
||||
ENDDO
|
||||
!
|
||||
!LIST OF PROCESSORS IN MY ORBITAL GROUP
|
||||
!
|
||||
! processors in these group have contiguous indexes
|
||||
!
|
||||
N1 = ( me_pool / NOGRP ) * NOGRP - 1
|
||||
DO i = 1, nogrp
|
||||
nolist( I ) = pgroup( N1 + I + 1 )
|
||||
IF( me_pool == nolist( I ) ) ipos = i - 1
|
||||
ENDDO
|
||||
!
|
||||
!LIST OF PROCESSORS IN MY PLANE WAVE GROUP
|
||||
!
|
||||
DO I = 1, npgrp
|
||||
nplist( I ) = pgroup( ipos + ( i - 1 ) * nogrp + 1 )
|
||||
ENDDO
|
||||
DO i = 1, nproc_pool
|
||||
pgroup( i ) = i - 1
|
||||
ENDDO
|
||||
!
|
||||
!LIST OF PROCESSORS IN MY ORBITAL GROUP
|
||||
!
|
||||
! processors in these group have contiguous indexes
|
||||
!
|
||||
N1 = ( me_pool / NOGRP ) * NOGRP - 1
|
||||
DO i = 1, nogrp
|
||||
nolist( I ) = pgroup( N1 + I + 1 )
|
||||
IF( me_pool == nolist( I ) ) ipos = i - 1
|
||||
ENDDO
|
||||
!
|
||||
!LIST OF PROCESSORS IN MY PLANE WAVE GROUP
|
||||
!
|
||||
DO I = 1, npgrp
|
||||
nplist( I ) = pgroup( ipos + ( i - 1 ) * nogrp + 1 )
|
||||
ENDDO
|
||||
|
||||
!
|
||||
!SET UP THE GROUPS
|
||||
!
|
||||
!
|
||||
!CREATE ORBITAL GROUPS
|
||||
!
|
||||
!
|
||||
!SET UP THE GROUPS
|
||||
!
|
||||
!
|
||||
!CREATE ORBITAL GROUPS
|
||||
!
|
||||
#if defined __MPI
|
||||
color = me_pool / nogrp
|
||||
key = MOD( me_pool , nogrp )
|
||||
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, ogrp_comm, ierr )
|
||||
if( ierr /= 0 ) &
|
||||
CALL errore( ' task_groups_init ', ' creating ogrp_comm ', ABS(ierr) )
|
||||
CALL MPI_COMM_RANK( ogrp_comm, itsk, IERR )
|
||||
CALL MPI_COMM_SIZE( ogrp_comm, ntsk, IERR )
|
||||
IF( nogrp /= ntsk ) CALL errore( ' task_groups_init ', ' ogrp_comm size ', ntsk )
|
||||
DO i = 1, nogrp
|
||||
IF( me_pool == nolist( i ) ) THEN
|
||||
IF( (i-1) /= itsk ) CALL errore( ' task_groups_init ', ' ogrp_comm rank ', itsk )
|
||||
END IF
|
||||
END DO
|
||||
color = me_pool / nogrp
|
||||
key = MOD( me_pool , nogrp )
|
||||
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, ogrp_comm, ierr )
|
||||
if( ierr /= 0 ) &
|
||||
CALL errore( ' task_groups_init ', ' creating ogrp_comm ', ABS(ierr) )
|
||||
CALL MPI_COMM_RANK( ogrp_comm, itsk, IERR )
|
||||
CALL MPI_COMM_SIZE( ogrp_comm, ntsk, IERR )
|
||||
IF( nogrp /= ntsk ) CALL errore( ' task_groups_init ', ' ogrp_comm size ', ntsk )
|
||||
DO i = 1, nogrp
|
||||
IF( me_pool == nolist( i ) ) THEN
|
||||
IF( (i-1) /= itsk ) CALL errore( ' task_groups_init ', ' ogrp_comm rank ', itsk )
|
||||
END IF
|
||||
END DO
|
||||
#endif
|
||||
!
|
||||
!CREATE PLANEWAVE GROUPS
|
||||
!
|
||||
!
|
||||
!CREATE PLANEWAVE GROUPS
|
||||
!
|
||||
#if defined __MPI
|
||||
color = MOD( me_pool , nogrp )
|
||||
key = me_pool / nogrp
|
||||
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, pgrp_comm, ierr )
|
||||
if( ierr /= 0 ) &
|
||||
CALL errore( ' task_groups_init ', ' creating pgrp_comm ', ABS(ierr) )
|
||||
CALL MPI_COMM_RANK( pgrp_comm, itsk, IERR )
|
||||
CALL MPI_COMM_SIZE( pgrp_comm, ntsk, IERR )
|
||||
IF( npgrp /= ntsk ) CALL errore( ' task_groups_init ', ' pgrp_comm size ', ntsk )
|
||||
DO i = 1, npgrp
|
||||
IF( me_pool == nplist( i ) ) THEN
|
||||
IF( (i-1) /= itsk ) CALL errore( ' task_groups_init ', ' pgrp_comm rank ', itsk )
|
||||
END IF
|
||||
END DO
|
||||
me_pgrp = itsk
|
||||
color = MOD( me_pool , nogrp )
|
||||
key = me_pool / nogrp
|
||||
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, pgrp_comm, ierr )
|
||||
if( ierr /= 0 ) &
|
||||
CALL errore( ' task_groups_init ', ' creating pgrp_comm ', ABS(ierr) )
|
||||
CALL MPI_COMM_RANK( pgrp_comm, itsk, IERR )
|
||||
CALL MPI_COMM_SIZE( pgrp_comm, ntsk, IERR )
|
||||
IF( npgrp /= ntsk ) CALL errore( ' task_groups_init ', ' pgrp_comm size ', ntsk )
|
||||
DO i = 1, npgrp
|
||||
IF( me_pool == nplist( i ) ) THEN
|
||||
IF( (i-1) /= itsk ) CALL errore( ' task_groups_init ', ' pgrp_comm rank ', itsk )
|
||||
END IF
|
||||
END DO
|
||||
me_pgrp = itsk
|
||||
#endif
|
||||
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE init_task_groups
|
||||
!
|
||||
!
|
||||
SUBROUTINE init_ortho_group( nproc_try, comm_all )
|
||||
!
|
||||
USE mp, ONLY : mp_comm_free, mp_size, mp_rank, mp_sum
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
||||
INTEGER, INTENT(IN) :: nproc_try, comm_all
|
||||
|
||||
LOGICAL, SAVE :: first = .true.
|
||||
INTEGER :: ierr, color, key, me_all, nproc_all
|
||||
INTEGER :: np_ortho1
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE init_task_groups
|
||||
!
|
||||
!
|
||||
SUBROUTINE init_ortho_group( nproc_try, comm_all )
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
||||
INTEGER, INTENT(IN) :: nproc_try, comm_all
|
||||
|
||||
LOGICAL, SAVE :: first = .true.
|
||||
INTEGER :: ierr, color, key, me_all, nproc_all
|
||||
INTEGER :: np_ortho1
|
||||
|
||||
#if defined __SCALAPACK
|
||||
INTEGER, ALLOCATABLE :: blacsmap(:,:)
|
||||
INTEGER :: nprow, npcol, myrow, mycol
|
||||
INTEGER, ALLOCATABLE :: blacsmap(:,:)
|
||||
INTEGER :: nprow, npcol, myrow, mycol
|
||||
#endif
|
||||
|
||||
|
||||
#if defined __MPI
|
||||
|
||||
me_all = mp_rank( comm_all )
|
||||
nproc_all = mp_size( comm_all )
|
||||
me_all = mp_rank( comm_all )
|
||||
nproc_all = mp_size( comm_all )
|
||||
|
||||
IF( nproc_try > nproc_all ) THEN
|
||||
CALL errore( " init_ortho_group ", " argument 1 out of range ", nproc_try )
|
||||
END IF
|
||||
IF( nproc_try > nproc_all ) THEN
|
||||
CALL errore( " init_ortho_group ", " argument 1 out of range ", nproc_try )
|
||||
END IF
|
||||
|
||||
IF( .NOT. first ) THEN
|
||||
!
|
||||
! free resources associated to the communicator
|
||||
!
|
||||
CALL mp_comm_free( ortho_comm )
|
||||
!
|
||||
IF( .NOT. first ) THEN
|
||||
!
|
||||
! free resources associated to the communicator
|
||||
!
|
||||
CALL mp_comm_free( ortho_comm )
|
||||
!
|
||||
#if defined __SCALAPACK
|
||||
CALL BLACS_GRIDEXIT( ortho_cntx )
|
||||
CALL BLACS_GRIDEXIT( ortho_cntx )
|
||||
#endif
|
||||
!
|
||||
END IF
|
||||
|
||||
! find the square closer (but lower) to nproc_try
|
||||
!
|
||||
CALL grid2d_dims( 'S', nproc_try, np_ortho(1), np_ortho(2) )
|
||||
!
|
||||
np_ortho1 = np_ortho(1) * np_ortho(2)
|
||||
!
|
||||
IF( nproc_all >= 4*np_ortho1 ) THEN
|
||||
!
|
||||
! here we choose a processor every 4, in order not to stress memory BW
|
||||
! on multi core procs, for which further performance enhancements are
|
||||
! possible using OpenMP BLAS inside regter/cegter/rdiaghg/cdiaghg
|
||||
! (to be implemented)
|
||||
!
|
||||
color = 0
|
||||
IF( me_all < 4*np_ortho1 .AND. MOD( me_all, 4 ) == 0 ) color = 1
|
||||
!
|
||||
leg_ortho = 4
|
||||
!
|
||||
ELSE IF( nproc_all >= 2*np_ortho1 ) THEN
|
||||
!
|
||||
! here we choose a processor every 2, in order not to stress memory BW
|
||||
!
|
||||
color = 0
|
||||
IF( me_all < 2*np_ortho1 .AND. MOD( me_all, 2 ) == 0 ) color = 1
|
||||
!
|
||||
leg_ortho = 2
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
! here we choose the first processors
|
||||
!
|
||||
color = 0
|
||||
IF( me_all < np_ortho1 ) color = 1
|
||||
!
|
||||
leg_ortho = 1
|
||||
!
|
||||
END IF
|
||||
!
|
||||
key = me_all
|
||||
!
|
||||
! initialize the communicator for the new group
|
||||
!
|
||||
CALL MPI_COMM_SPLIT( comm_all, color, key, ortho_comm, ierr )
|
||||
IF( ierr /= 0 ) &
|
||||
CALL errore( " init_ortho_group ", " error splitting communicator ", ierr )
|
||||
!
|
||||
! Computes coordinates of the processors, in row maior order
|
||||
!
|
||||
me_ortho1 = mp_rank( ortho_comm )
|
||||
np_ortho1 = mp_size( ortho_comm )
|
||||
IF( color == 1 .AND. np_ortho1 /= np_ortho(1) * np_ortho(2) ) &
|
||||
CALL errore( " init_ortho_group ", " wrong number of proc in ortho_comm ", ierr )
|
||||
!
|
||||
IF( me_all == 0 .AND. me_ortho1 /= 0 ) &
|
||||
CALL errore( " init_ortho_group ", " wrong root in ortho_comm ", ierr )
|
||||
!
|
||||
if( color == 1 ) then
|
||||
ortho_comm_id = 1
|
||||
CALL GRID2D_COORDS( 'R', me_ortho1, np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2) )
|
||||
CALL GRID2D_RANK( 'R', np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2), ierr )
|
||||
IF( ierr /= me_ortho1 ) &
|
||||
CALL errore( " init_ortho_group ", " wrong coordinates in ortho_comm ", ierr )
|
||||
IF( me_ortho1*leg_ortho /= me_all ) &
|
||||
CALL errore( " init_ortho_group ", " wrong rank assignment in ortho_comm ", ierr )
|
||||
else
|
||||
ortho_comm_id = 0
|
||||
me_ortho(1) = me_ortho1
|
||||
me_ortho(2) = me_ortho1
|
||||
endif
|
||||
!
|
||||
END IF
|
||||
|
||||
! find the square closer (but lower) to nproc_try
|
||||
!
|
||||
CALL grid2d_dims( 'S', nproc_try, np_ortho(1), np_ortho(2) )
|
||||
!
|
||||
np_ortho1 = np_ortho(1) * np_ortho(2)
|
||||
!
|
||||
IF( nproc_all >= 4*np_ortho1 ) THEN
|
||||
!
|
||||
! here we choose a processor every 4, in order not to stress memory BW
|
||||
! on multi core procs, for which further performance enhancements are
|
||||
! possible using OpenMP BLAS inside regter/cegter/rdiaghg/cdiaghg
|
||||
! (to be implemented)
|
||||
!
|
||||
color = 0
|
||||
IF( me_all < 4*np_ortho1 .AND. MOD( me_all, 4 ) == 0 ) color = 1
|
||||
!
|
||||
leg_ortho = 4
|
||||
!
|
||||
ELSE IF( nproc_all >= 2*np_ortho1 ) THEN
|
||||
!
|
||||
! here we choose a processor every 2, in order not to stress memory BW
|
||||
!
|
||||
color = 0
|
||||
IF( me_all < 2*np_ortho1 .AND. MOD( me_all, 2 ) == 0 ) color = 1
|
||||
!
|
||||
leg_ortho = 2
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
! here we choose the first processors
|
||||
!
|
||||
color = 0
|
||||
IF( me_all < np_ortho1 ) color = 1
|
||||
!
|
||||
leg_ortho = 1
|
||||
!
|
||||
END IF
|
||||
!
|
||||
key = me_all
|
||||
!
|
||||
! initialize the communicator for the new group
|
||||
!
|
||||
CALL MPI_COMM_SPLIT( comm_all, color, key, ortho_comm, ierr )
|
||||
IF( ierr /= 0 ) &
|
||||
CALL errore( " init_ortho_group ", " error splitting communicator ", ierr )
|
||||
!
|
||||
! Computes coordinates of the processors, in row maior order
|
||||
!
|
||||
me_ortho1 = mp_rank( ortho_comm )
|
||||
np_ortho1 = mp_size( ortho_comm )
|
||||
IF( color == 1 .AND. np_ortho1 /= np_ortho(1) * np_ortho(2) ) &
|
||||
CALL errore( " init_ortho_group ", " wrong number of proc in ortho_comm ", ierr )
|
||||
!
|
||||
IF( me_all == 0 .AND. me_ortho1 /= 0 ) &
|
||||
CALL errore( " init_ortho_group ", " wrong root in ortho_comm ", ierr )
|
||||
!
|
||||
if( color == 1 ) then
|
||||
ortho_comm_id = 1
|
||||
CALL GRID2D_COORDS( 'R', me_ortho1, np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2) )
|
||||
CALL GRID2D_RANK( 'R', np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2), ierr )
|
||||
IF( ierr /= me_ortho1 ) &
|
||||
CALL errore( " init_ortho_group ", " wrong coordinates in ortho_comm ", ierr )
|
||||
IF( me_ortho1*leg_ortho /= me_all ) &
|
||||
CALL errore( " init_ortho_group ", " wrong rank assignment in ortho_comm ", ierr )
|
||||
else
|
||||
ortho_comm_id = 0
|
||||
me_ortho(1) = me_ortho1
|
||||
me_ortho(2) = me_ortho1
|
||||
endif
|
||||
|
||||
#if defined __SCALAPACK
|
||||
|
||||
IF( ortho_comm_id > 0 ) THEN
|
||||
ALLOCATE( blacsmap( np_ortho(1), np_ortho(2) ) )
|
||||
blacsmap = 0
|
||||
blacsmap( me_ortho(1) + 1, me_ortho(2) + 1 ) = me_blacs
|
||||
nprow = np_ortho(1)
|
||||
npcol = np_ortho(2)
|
||||
ELSE
|
||||
nprow = np_ortho1
|
||||
npcol = 1
|
||||
ALLOCATE( blacsmap( np_ortho1, 1 ) )
|
||||
blacsmap = 0
|
||||
blacsmap( me_ortho1 + 1, 1 ) = me_blacs
|
||||
END IF
|
||||
IF( ortho_comm_id > 0 ) THEN
|
||||
ALLOCATE( blacsmap( np_ortho(1), np_ortho(2) ) )
|
||||
blacsmap = 0
|
||||
blacsmap( me_ortho(1) + 1, me_ortho(2) + 1 ) = me_blacs
|
||||
nprow = np_ortho(1)
|
||||
npcol = np_ortho(2)
|
||||
ELSE
|
||||
nprow = np_ortho1
|
||||
npcol = 1
|
||||
ALLOCATE( blacsmap( np_ortho1, 1 ) )
|
||||
blacsmap = 0
|
||||
blacsmap( me_ortho1 + 1, 1 ) = me_blacs
|
||||
END IF
|
||||
|
||||
CALL mp_sum( blacsmap, ortho_comm )
|
||||
|
||||
!WRITE( 1000 + me_image, * ) '-----'
|
||||
!WRITE( 1000 + me_image, * ) blacsmap
|
||||
CALL mp_sum( blacsmap, ortho_comm )
|
||||
|
||||
ortho_cntx = world_cntx
|
||||
CALL BLACS_GRIDMAP( ortho_cntx, blacsmap, nprow, nprow, npcol )
|
||||
!WRITE( 1000 + me_image, * ) '-----'
|
||||
!WRITE( 1000 + me_image, * ) blacsmap
|
||||
|
||||
CALL BLACS_GRIDINFO( ortho_cntx, nprow, npcol, myrow, mycol )
|
||||
ortho_cntx = world_cntx
|
||||
CALL BLACS_GRIDMAP( ortho_cntx, blacsmap, nprow, nprow, npcol )
|
||||
|
||||
!WRITE( 1000 + me_image, * ) nprow, npcol, myrow, mycol
|
||||
CALL BLACS_GRIDINFO( ortho_cntx, nprow, npcol, myrow, mycol )
|
||||
|
||||
IF( ortho_comm_id > 0 ) THEN
|
||||
IF( np_ortho(1) /= nprow ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong nprow ', 1 )
|
||||
IF( np_ortho(2) /= npcol ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong npcol ', 1 )
|
||||
IF( me_ortho(1) /= myrow ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong myrow ', 1 )
|
||||
IF( me_ortho(2) /= mycol ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong mycol ', 1 )
|
||||
END IF
|
||||
!WRITE( 1000 + me_image, * ) nprow, npcol, myrow, mycol
|
||||
|
||||
DEALLOCATE( blacsmap )
|
||||
IF( ortho_comm_id > 0 ) THEN
|
||||
IF( np_ortho(1) /= nprow ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong nprow ', 1 )
|
||||
IF( np_ortho(2) /= npcol ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong npcol ', 1 )
|
||||
IF( me_ortho(1) /= myrow ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong myrow ', 1 )
|
||||
IF( me_ortho(2) /= mycol ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong mycol ', 1 )
|
||||
END IF
|
||||
|
||||
DEALLOCATE( blacsmap )
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
ortho_comm_id = 1
|
||||
ortho_comm_id = 1
|
||||
|
||||
#endif
|
||||
|
||||
first = .false.
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE init_ortho_group
|
||||
!
|
||||
!
|
||||
first = .false.
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE init_ortho_group
|
||||
!
|
||||
!
|
||||
END MODULE mp_global
|
||||
|
|
|
@ -910,15 +910,15 @@ SUBROUTINE check_para_diag( nelec )
|
|||
!
|
||||
! we need at least 4 procs to use distributed algorithm
|
||||
!
|
||||
IF ( ionode ) WRITE( stdout, '(5X,"Too few procs for parallel algorithm")' )
|
||||
IF ( ionode ) WRITE( stdout, '(5X," we need at least 4 procs per pool")' )
|
||||
IF ( ionode ) WRITE( stdout, '(5X,"Too few procs for parallel ",&
|
||||
& "algorithm: we need at least 4 procs per pool")' )
|
||||
!
|
||||
ELSE IF( INT( nelec )/2 < nproc_pool ) THEN
|
||||
!
|
||||
! we need to have at least 1 electronic band per block
|
||||
!
|
||||
IF ( ionode ) WRITE( stdout, '(5X,"Too few electrons for parallel algorithm")')
|
||||
IF ( ionode ) WRITE( stdout, '(5X," we need at least as many bands as SQRT(nproc)")' )
|
||||
IF ( ionode ) WRITE(stdout,'(5X,"Too few electrons for parallel ",&
|
||||
& " algorithm: we need # of bands >= SQRT(nproc)")' )
|
||||
!
|
||||
END IF
|
||||
|
||||
|
|
121
PW/startup.f90
121
PW/startup.f90
|
@ -5,56 +5,15 @@
|
|||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
#if defined(__ABSOFT)
|
||||
# define getarg getarg_
|
||||
# define iargc iargc_
|
||||
#endif
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
SUBROUTINE startup( nd_nmbr, code, version )
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
! ... This subroutine initializes MPI
|
||||
! ... This subroutine initializes MPI and various other things
|
||||
!
|
||||
! ... Processes are organized in NIMAGE images each dealing with a subset of
|
||||
! ... images used to discretize the "path" (this only in "path" optimizations)
|
||||
! ... Within each image processes are organized in NPOOL pools each dealing
|
||||
! ... with a subset of kpoints.
|
||||
! ... Within each pool R & G space distribution is performed.
|
||||
! ... NPROC is read from command line or can be set with the appropriate
|
||||
! ... environment variable ( for example use 'setenv MP_PROCS 8' on IBM SP
|
||||
! ... machine to run on NPROC=8 processors ); NIMAGE and NPOOL are read from
|
||||
! ... command line.
|
||||
! ... NPOOL must be a whole divisor of NPROC
|
||||
!
|
||||
! ... An example without any environment variable set is the following:
|
||||
!
|
||||
! ... T3E :
|
||||
! ... mpprun -n 16 pw.x -npool 8 < input
|
||||
!
|
||||
! ... IBM SP :
|
||||
! ... poe pw.x -procs 16 -npool 8 < input
|
||||
!
|
||||
! ... ORIGIN /PC clusters using "mpirun" :
|
||||
! ... mpirun -np 16 pw.x -npool 8 < input
|
||||
!
|
||||
! ... COMPAQ :
|
||||
! ... prun -n 16 sh -c 'pw.x -npool 8 < input'
|
||||
!
|
||||
! ... PC clusters using "mpiexec" :
|
||||
! ... mpiexec -n 16 pw.x -npool 8 < input
|
||||
!
|
||||
! ... In this example you will use 16 processors divided into 8 pools
|
||||
! ... of 2 processors each (in this case you must have at least 8 k-points)
|
||||
!
|
||||
! ... The following two modules hold global information about processors
|
||||
! ... number, IDs and communicators
|
||||
!
|
||||
USE io_global, ONLY : stdout, io_global_start, meta_ionode, meta_ionode_id
|
||||
USE mp_global, ONLY : nproc, nproc_image, nimage, mpime, me_image, &
|
||||
my_image_id, root_image, npool, nproc_pool
|
||||
USE mp_global, ONLY : mp_global_start, init_pool
|
||||
USE mp, ONLY : mp_start, mp_env, mp_barrier, mp_bcast
|
||||
USE io_global, ONLY : stdout, meta_ionode
|
||||
USE mp_global, ONLY : mp_startup, nproc, nogrp, nimage, npool, &
|
||||
nproc_pool, me_image, nproc_image, root_image
|
||||
USE control_flags, ONLY : use_task_groups, ortho_para
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
@ -62,84 +21,20 @@ SUBROUTINE startup( nd_nmbr, code, version )
|
|||
CHARACTER (LEN=6) :: nd_nmbr
|
||||
CHARACTER (LEN=6) :: version
|
||||
CHARACTER (LEN=9) :: code
|
||||
CHARACTER (LEN=80) :: np
|
||||
INTEGER :: gid, node_number
|
||||
INTEGER :: nargs, iiarg
|
||||
INTEGER :: ntask_groups, nproc_ortho
|
||||
INTEGER :: iargc
|
||||
! do not define iargc as external: gfortran does not like
|
||||
#if defined __OPENMP
|
||||
INTEGER, EXTERNAL :: omp_get_max_threads
|
||||
#endif
|
||||
|
||||
!
|
||||
#if defined (__PARA)
|
||||
!
|
||||
! ... parallel case setup : MPI environment is initialized
|
||||
!
|
||||
CALL mp_start()
|
||||
!
|
||||
CALL mp_env( nproc, mpime, gid )
|
||||
!
|
||||
! ... Set the I/O node
|
||||
!
|
||||
CALL io_global_start( mpime, 0 )
|
||||
!
|
||||
! ... Set global coordinate for this processor
|
||||
!
|
||||
CALL mp_global_start( 0, mpime, gid, nproc )
|
||||
!
|
||||
IF ( meta_ionode ) THEN
|
||||
!
|
||||
! ... How many pools ?
|
||||
!
|
||||
CALL get_arg_npool( npool )
|
||||
!
|
||||
npool = MAX( npool, 1 )
|
||||
npool = MIN( npool, nproc )
|
||||
!
|
||||
! ... How many parallel images ?
|
||||
!
|
||||
CALL get_arg_nimage( nimage )
|
||||
!
|
||||
nimage = MAX( nimage, 1 )
|
||||
nimage = MIN( nimage, nproc )
|
||||
!
|
||||
! ... How many task groups ?
|
||||
!
|
||||
CALL get_arg_ntg( ntask_groups )
|
||||
!
|
||||
! ... How many processors involved in diagonalization of the Hamiltonian ?
|
||||
!
|
||||
CALL get_arg_northo( nproc_ortho )
|
||||
!
|
||||
END IF
|
||||
!
|
||||
CALL mp_barrier()
|
||||
!
|
||||
! ... transmit npool and nimage
|
||||
!
|
||||
CALL mp_bcast( npool, meta_ionode_id )
|
||||
CALL mp_bcast( nimage, meta_ionode_id )
|
||||
CALL mp_bcast( ntask_groups, meta_ionode_id )
|
||||
CALL mp_bcast( nproc_ortho, meta_ionode_id )
|
||||
!
|
||||
use_task_groups = ( ntask_groups > 0 )
|
||||
!
|
||||
IF( nproc_ortho > 0 ) THEN
|
||||
ortho_para = nproc_ortho
|
||||
END IF
|
||||
!
|
||||
! ... all pools are initialized here
|
||||
!
|
||||
CALL init_pool( nimage, ntask_groups, nproc_ortho )
|
||||
CALL mp_startup ( use_task_groups, ortho_para )
|
||||
!
|
||||
! ... set the processor label for files ( remember that
|
||||
! ... me_image = 0 : ( nproc_image - 1 ) )
|
||||
!
|
||||
node_number = ( me_image + 1 )
|
||||
!
|
||||
CALL set_nd_nmbr( nd_nmbr, node_number, nproc_image )
|
||||
CALL set_nd_nmbr( nd_nmbr, me_image+1, nproc_image )
|
||||
!
|
||||
! ... stdout is printed only by the root_image ( set in init_pool() )
|
||||
!
|
||||
|
@ -182,9 +77,9 @@ SUBROUTINE startup( nd_nmbr, code, version )
|
|||
IF ( nproc_pool > 1 ) &
|
||||
WRITE( stdout, &
|
||||
'(5X,"R & G space division: proc/pool = ",I4)' ) nproc_pool
|
||||
IF ( ntask_groups > 0 ) &
|
||||
IF ( nogrp > 1 ) &
|
||||
WRITE( stdout, &
|
||||
'(5X,"wavefunctions fft division: fft/group = ",I4)' ) ntask_groups
|
||||
'(5X,"wavefunctions fft division: fft/group = ",I4)' ) nogrp
|
||||
!
|
||||
END IF
|
||||
!
|
||||
|
|
Loading…
Reference in New Issue