Mior (but potentially dangerous) changes to parallel initialization:

most of what was in PW/startup.f90 moved into Modules/mp_global.f90 in
view of a merge with CP


git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@6037 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
giannozz 2009-10-18 17:48:34 +00:00
parent f1e241876b
commit d7357d5cd9
4 changed files with 465 additions and 497 deletions

View File

@ -1,5 +1,5 @@
!
! Copyright (C) 2002-2003 PWSCF-FPMD-CP90 group
! Copyright (C) 2002-2009 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
@ -10,8 +10,6 @@
# include "/cineca/prod/hpm/include/f_hpm.h"
#endif
!------------------------------------------------------------------------------!
MODULE mp
!------------------------------------------------------------------------------!
@ -22,10 +20,10 @@
IMPLICIT NONE
PUBLIC :: mp_start, mp_end, mp_env, &
mp_bcast, mp_stop, mp_sum, mp_max, mp_min, mp_rank, mp_size, &
mp_bcast, mp_sum, mp_max, mp_min, mp_rank, mp_size, &
mp_gather, mp_get, mp_put, mp_barrier, mp_report, mp_group_free, &
mp_root_sum, mp_comm_free, mp_comm_create, mp_comm_group, mp_group_create, &
mp_comm_split, mp_set_displs
mp_root_sum, mp_comm_free, mp_comm_create, mp_comm_group, &
mp_group_create, mp_comm_split, mp_set_displs
!
INTERFACE mp_bcast
MODULE PROCEDURE mp_bcast_i1, mp_bcast_r1, mp_bcast_c1, &

View File

@ -1,5 +1,5 @@
!
! Copyright (C) 2002-2004 PWSCF-FPMD-CP90 group
! Copyright (C) 2002-2009 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
@ -9,6 +9,9 @@
MODULE mp_global
!----------------------------------------------------------------------------
!
USE mp, ONLY : mp_comm_free, mp_size, mp_rank, mp_sum, mp_barrier, &
mp_bcast, mp_start, mp_env
USE io_global, ONLY : stdout, io_global_start, meta_ionode, meta_ionode_id
USE parallel_include
!
IMPLICIT NONE
@ -19,7 +22,7 @@ MODULE mp_global
INTEGER :: root = 0 ! index of the absolute root processor
INTEGER :: nproc = 1 ! absolute number of processor
INTEGER :: nproc_file = 1 ! absolute number of processor written in the
! xml punch file
! xml punch file
INTEGER :: world_comm = 0 ! communicator of all processor
#if defined __SCALAPACK
INTEGER :: me_blacs = 0 ! BLACS processor index starting from 0
@ -47,12 +50,12 @@ MODULE mp_global
INTEGER :: npgrp = 1 ! number of proc. in a plane-wave "task group"
INTEGER :: nproc_pool = 1 ! number of processor within a pool
INTEGER :: nproc_pool_file = 1 ! number of processor within a pool of
! written in the xml punch file
! written in the xml punch file
INTEGER :: nproc_image = 1 ! number of processor within an image
INTEGER :: np_ortho(2) = 1 ! size of the processor grid used in ortho
INTEGER :: np_ortho1 = 1 ! size of the ortho group
INTEGER :: leg_ortho = 1 ! the distance in the father communicator
! of two neighbour processors in ortho_comm
! of two neighbour processors in ortho_comm
INTEGER, ALLOCATABLE :: nolist(:) ! list of processors in my orbital task group
INTEGER, ALLOCATABLE :: nplist(:) ! list of processors in my plane wave task group
!
@ -70,425 +73,497 @@ MODULE mp_global
INTEGER :: ortho_cntx = 0 ! BLACS context for ortho_comm
#endif
!
CONTAINS
!
!-----------------------------------------------------------------------
SUBROUTINE mp_global_start( root_i, mpime_i, group_i, nproc_i )
!-----------------------------------------------------------------------
CONTAINS
!
!-----------------------------------------------------------------------
SUBROUTINE mp_startup ( use_task_groups, ortho_para )
!-----------------------------------------------------------------------
! ... This subroutine initializes MPI
! ... Processes are organized in NIMAGE images each dealing with a subset of
! ... images used to discretize the "path" (only in "path" optimizations)
! ... Within each image processes are organized in NPOOL pools each dealing
! ... with a subset of kpoints.
! ... Within each pool R & G space distribution is performed.
! ... NPROC is read from command line or can be set with the appropriate
! ... environment variable ( for example use 'setenv MP_PROCS 8' on IBM SP
! ... machine to run on NPROC=8 processors ); NIMAGE and NPOOL are read from
! ... command line.
! ... NPOOL must be a whole divisor of NPROC
!
IMPLICIT NONE
LOGICAL, INTENT(OUT) :: use_task_groups
INTEGER, INTENT(OUT) :: ortho_para
INTEGER :: gid, ntask_groups, nproc_ortho
!
CALL mp_start()
!
CALL mp_env( nproc, mpime, gid )
!
! ... Set the I/O node
!
CALL io_global_start( mpime, 0 )
!
! ... Set global coordinate for this processor
!
CALL mp_global_start( 0, mpime, gid, nproc )
!
IF ( meta_ionode ) THEN
!
IMPLICIT NONE
! ... How many pools ?
!
INTEGER, INTENT(IN) :: root_i, mpime_i, group_i, nproc_i
CALL get_arg_npool( npool )
!
root = root_i
mpime = mpime_i
world_comm = group_i
nproc = nproc_i
nproc_pool = nproc_i
nproc_image = nproc_i
my_pool_id = 0
my_image_id = 0
me_pool = mpime
me_image = mpime
me_pgrp = me_pool
root_pool = root
root_image = root
inter_pool_comm = group_i
intra_pool_comm = group_i
inter_image_comm = group_i
intra_image_comm = group_i
ortho_comm = group_i
ALLOCATE( nolist( nproc_i ) )
ALLOCATE( nplist( nproc_i ) )
nolist = 0
nplist = 0
npool = MAX( npool, 1 )
npool = MIN( npool, nproc )
!
RETURN
! ... How many parallel images ?
!
END SUBROUTINE mp_global_start
!
!-----------------------------------------------------------------------
SUBROUTINE mp_global_group_start( mep, myp, nprocp, num_of_pools )
!-----------------------------------------------------------------------
CALL get_arg_nimage( nimage )
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: mep, myp, nprocp, num_of_pools
nimage = MAX( nimage, 1 )
nimage = MIN( nimage, nproc )
!
me_pool = mep
my_pool_id = myp
nproc_pool = nprocp
npool = num_of_pools
! ... How many task groups ?
!
RETURN
CALL get_arg_ntg( ntask_groups )
!
END SUBROUTINE mp_global_group_start
!
!
!----------------------------------------------------------------------------
SUBROUTINE init_pool( nimage_ , ntask_groups_ , nproc_ortho_ )
! ... How many processors involved in diagonalization of Hamiltonian ?
!
CALL get_arg_northo( nproc_ortho )
!
END IF
!
CALL mp_barrier()
!
! ... transmit npool and nimage
!
CALL mp_bcast( npool, meta_ionode_id )
CALL mp_bcast( nimage, meta_ionode_id )
CALL mp_bcast( ntask_groups, meta_ionode_id )
CALL mp_bcast( nproc_ortho, meta_ionode_id )
!
use_task_groups = ( ntask_groups > 0 )
IF ( nproc_ortho > 0 ) ortho_para = nproc_ortho
!
! ... all pools are initialized here
!
CALL init_pool( nimage, ntask_groups, nproc_ortho )
!
RETURN
!
END SUBROUTINE mp_startup
!
!-----------------------------------------------------------------------
SUBROUTINE mp_global_start( root_i, mpime_i, group_i, nproc_i )
!-----------------------------------------------------------------------
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: root_i, mpime_i, group_i, nproc_i
!
root = root_i
mpime = mpime_i
world_comm = group_i
nproc = nproc_i
nproc_pool = nproc_i
nproc_image = nproc_i
my_pool_id = 0
my_image_id = 0
me_pool = mpime
me_image = mpime
me_pgrp = me_pool
root_pool = root
root_image = root
inter_pool_comm = group_i
intra_pool_comm = group_i
inter_image_comm = group_i
intra_image_comm = group_i
ortho_comm = group_i
ALLOCATE( nolist( nproc_i ) )
ALLOCATE( nplist( nproc_i ) )
nolist = 0
nplist = 0
!
RETURN
!
END SUBROUTINE mp_global_start
!
!-----------------------------------------------------------------------
SUBROUTINE mp_global_group_start( mep, myp, nprocp, num_of_pools )
!-----------------------------------------------------------------------
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: mep, myp, nprocp, num_of_pools
!
me_pool = mep
my_pool_id = myp
nproc_pool = nprocp
npool = num_of_pools
!
RETURN
!
END SUBROUTINE mp_global_group_start
!
!
!----------------------------------------------------------------------------
!
! ... This routine initialize the pool : MPI division in pools and images
!
USE mp, ONLY : mp_barrier, mp_bcast
USE parallel_include
!
IMPLICIT NONE
!
INTEGER, OPTIONAL, INTENT(IN) :: nimage_
INTEGER, OPTIONAL, INTENT(IN) :: ntask_groups_
INTEGER, OPTIONAL, INTENT(IN) :: nproc_ortho_
!
INTEGER :: ierr = 0
INTEGER :: nproc_ortho
!
SUBROUTINE init_pool( nimage_ , ntask_groups_ , nproc_ortho_ )
!----------------------------------------------------------------------------
!
! ... This routine initialize the pool : MPI division in pools and images
!
IMPLICIT NONE
!
INTEGER, OPTIONAL, INTENT(IN) :: nimage_
INTEGER, OPTIONAL, INTENT(IN) :: ntask_groups_
INTEGER, OPTIONAL, INTENT(IN) :: nproc_ortho_
!
INTEGER :: ierr = 0
INTEGER :: nproc_ortho
!
#if defined (__PARA)
!
!
IF( PRESENT( nimage_ ) ) THEN
nimage = nimage_
END IF
!
! ... here we set all parallel indeces (defined in mp_global):
!
!
! ... number of cpus per image
!
nproc_image = nproc / nimage
!
IF ( nproc < nimage ) &
CALL errore( 'startup', 'nproc < nimage', 1 )
!
IF ( MOD( nproc, nimage ) /= 0 ) &
CALL errore( 'startup', 'nproc /= nproc_image * nimage', 1 )
!
! ... my_image_id = image index for this processor ( 0 : nimage - 1 )
! ... me_image = processor index within the image ( 0 : nproc_image - 1 )
!
my_image_id = mpime / nproc_image
me_image = MOD( mpime, nproc_image )
!
CALL mp_barrier()
!
! ... the intra_image_comm communicator is created
!
CALL MPI_COMM_SPLIT( MPI_COMM_WORLD, &
my_image_id, mpime, intra_image_comm, ierr )
!
CALL errore( 'init_pool', 'intra_image_comm is wrong', ierr )
!
CALL mp_barrier()
!
! ... the inter_image_comm communicator is created
!
CALL MPI_COMM_SPLIT( MPI_COMM_WORLD, &
me_image, mpime, inter_image_comm, ierr )
!
CALL errore( 'init_pool', 'inter_image_comm is wrong', ierr )
!
! ... number of cpus per pool of k-points (they are created inside each image)
!
nproc_pool = nproc_image / npool
!
IF ( MOD( nproc, npool ) /= 0 ) &
CALL errore( 'startup', 'nproc /= nproc_pool * npool', 1 )
!
! ... my_pool_id = pool index for this processor ( 0 : npool - 1 )
! ... me_pool = processor index within the pool ( 0 : nproc_pool - 1 )
!
my_pool_id = me_image / nproc_pool
me_pool = MOD( me_image, nproc_pool )
!
CALL mp_barrier( intra_image_comm )
!
! ... the intra_pool_comm communicator is created
!
CALL MPI_COMM_SPLIT( intra_image_comm, &
my_pool_id, me_image, intra_pool_comm, ierr )
!
CALL errore( 'init_pool', 'intra_pool_comm is wrong', ierr )
!
CALL mp_barrier( intra_image_comm )
!
! ... the inter_pool_comm communicator is created
!
CALL MPI_COMM_SPLIT( intra_image_comm, &
me_pool, me_image, inter_pool_comm, ierr )
!
call errore( 'init_pool', 'inter_pool_comm is wrong', ierr )
!
!
!
IF( PRESENT( nimage_ ) ) THEN
nimage = nimage_
END IF
!
! ... here we set all parallel indeces (defined in mp_global):
!
!
! ... number of cpus per image
!
nproc_image = nproc / nimage
!
IF ( nproc < nimage ) &
CALL errore( 'startup', 'nproc < nimage', 1 )
!
IF ( MOD( nproc, nimage ) /= 0 ) &
CALL errore( 'startup', 'nproc /= nproc_image * nimage', 1 )
!
! ... my_image_id = image index for this processor ( 0 : nimage - 1 )
! ... me_image = processor index within the image ( 0 : nproc_image - 1 )
!
my_image_id = mpime / nproc_image
me_image = MOD( mpime, nproc_image )
!
CALL mp_barrier()
!
! ... the intra_image_comm communicator is created
!
CALL MPI_COMM_SPLIT( MPI_COMM_WORLD, &
my_image_id, mpime, intra_image_comm, ierr )
!
CALL errore( 'init_pool', 'intra_image_comm is wrong', ierr )
!
CALL mp_barrier()
!
! ... the inter_image_comm communicator is created
!
CALL MPI_COMM_SPLIT( MPI_COMM_WORLD, &
me_image, mpime, inter_image_comm, ierr )
!
CALL errore( 'init_pool', 'inter_image_comm is wrong', ierr )
!
! ... number of cpus per pool of k-points (they are created inside each image)
!
nproc_pool = nproc_image / npool
!
IF ( MOD( nproc, npool ) /= 0 ) &
CALL errore( 'startup', 'nproc /= nproc_pool * npool', 1 )
!
! ... my_pool_id = pool index for this processor ( 0 : npool - 1 )
! ... me_pool = processor index within the pool ( 0 : nproc_pool - 1 )
!
my_pool_id = me_image / nproc_pool
me_pool = MOD( me_image, nproc_pool )
!
CALL mp_barrier( intra_image_comm )
!
! ... the intra_pool_comm communicator is created
!
CALL MPI_COMM_SPLIT( intra_image_comm, &
my_pool_id, me_image, intra_pool_comm, ierr )
!
CALL errore( 'init_pool', 'intra_pool_comm is wrong', ierr )
!
CALL mp_barrier( intra_image_comm )
!
! ... the inter_pool_comm communicator is created
!
CALL MPI_COMM_SPLIT( intra_image_comm, &
me_pool, me_image, inter_pool_comm, ierr )
!
call errore( 'init_pool', 'inter_pool_comm is wrong', ierr )
!
#endif
!
!
!
!
#if defined __SCALAPACK
CALL BLACS_PINFO( me_blacs, np_blacs )
!WRITE(*,*) 'BLACS me_blacs, np_blacs = ', me_blacs, np_blacs
CALL BLACS_GET( -1, 0, world_cntx )
!WRITE(*,*) 'BLACS world_cntx = ', world_cntx
CALL BLACS_PINFO( me_blacs, np_blacs )
!WRITE(*,*) 'BLACS me_blacs, np_blacs = ', me_blacs, np_blacs
CALL BLACS_GET( -1, 0, world_cntx )
!WRITE(*,*) 'BLACS world_cntx = ', world_cntx
#endif
!
nproc_ortho = nproc_pool
!
IF( PRESENT( nproc_ortho_ ) ) THEN
IF( nproc_ortho_ < nproc_pool ) nproc_ortho = nproc_ortho_
END IF
!
CALL init_ortho_group( nproc_ortho, intra_pool_comm )
!
IF( PRESENT( ntask_groups_ ) ) THEN
IF( ntask_groups_ > 0 ) THEN
nogrp = ntask_groups_
CALL init_task_groups( )
END IF
END IF
!
RETURN
!
END SUBROUTINE init_pool
!
nproc_ortho = nproc_pool
!
IF( PRESENT( nproc_ortho_ ) ) THEN
IF( nproc_ortho_ < nproc_pool ) nproc_ortho = nproc_ortho_
END IF
!
CALL init_ortho_group( nproc_ortho, intra_pool_comm )
!
IF( PRESENT( ntask_groups_ ) ) THEN
IF( ntask_groups_ > 0 ) THEN
nogrp = ntask_groups_
CALL init_task_groups( )
END IF
END IF
!
RETURN
!
END SUBROUTINE init_pool
!
!
SUBROUTINE init_task_groups( )
!
INTEGER :: i, n1, ipos, color, key, ierr, itsk, ntsk
INTEGER :: pgroup( nproc_pool )
!
!SUBDIVIDE THE PROCESSORS IN GROUPS
!
!THE NUMBER OF GROUPS HAS TO BE A DIVISOR OF THE NUMBER
!OF PROCESSORS
!
IF( MOD( nproc_pool, nogrp ) /= 0 ) &
CALL errore( " init_pool ", " nogrp should be a divisor of nproc_pool ", 1 )
!
npgrp = nproc_pool / nogrp
SUBROUTINE init_task_groups( )
!
INTEGER :: i, n1, ipos, color, key, ierr, itsk, ntsk
INTEGER :: pgroup( nproc_pool )
!
!SUBDIVIDE THE PROCESSORS IN GROUPS
!
!THE NUMBER OF GROUPS HAS TO BE A DIVISOR OF THE NUMBER
!OF PROCESSORS
!
IF( MOD( nproc_pool, nogrp ) /= 0 ) &
CALL errore( " init_pool ", " nogrp should be a divisor of nproc_pool ", 1 )
!
npgrp = nproc_pool / nogrp
DO i = 1, nproc_pool
pgroup( i ) = i - 1
ENDDO
!
!LIST OF PROCESSORS IN MY ORBITAL GROUP
!
! processors in these group have contiguous indexes
!
N1 = ( me_pool / NOGRP ) * NOGRP - 1
DO i = 1, nogrp
nolist( I ) = pgroup( N1 + I + 1 )
IF( me_pool == nolist( I ) ) ipos = i - 1
ENDDO
!
!LIST OF PROCESSORS IN MY PLANE WAVE GROUP
!
DO I = 1, npgrp
nplist( I ) = pgroup( ipos + ( i - 1 ) * nogrp + 1 )
ENDDO
DO i = 1, nproc_pool
pgroup( i ) = i - 1
ENDDO
!
!LIST OF PROCESSORS IN MY ORBITAL GROUP
!
! processors in these group have contiguous indexes
!
N1 = ( me_pool / NOGRP ) * NOGRP - 1
DO i = 1, nogrp
nolist( I ) = pgroup( N1 + I + 1 )
IF( me_pool == nolist( I ) ) ipos = i - 1
ENDDO
!
!LIST OF PROCESSORS IN MY PLANE WAVE GROUP
!
DO I = 1, npgrp
nplist( I ) = pgroup( ipos + ( i - 1 ) * nogrp + 1 )
ENDDO
!
!SET UP THE GROUPS
!
!
!CREATE ORBITAL GROUPS
!
!
!SET UP THE GROUPS
!
!
!CREATE ORBITAL GROUPS
!
#if defined __MPI
color = me_pool / nogrp
key = MOD( me_pool , nogrp )
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, ogrp_comm, ierr )
if( ierr /= 0 ) &
CALL errore( ' task_groups_init ', ' creating ogrp_comm ', ABS(ierr) )
CALL MPI_COMM_RANK( ogrp_comm, itsk, IERR )
CALL MPI_COMM_SIZE( ogrp_comm, ntsk, IERR )
IF( nogrp /= ntsk ) CALL errore( ' task_groups_init ', ' ogrp_comm size ', ntsk )
DO i = 1, nogrp
IF( me_pool == nolist( i ) ) THEN
IF( (i-1) /= itsk ) CALL errore( ' task_groups_init ', ' ogrp_comm rank ', itsk )
END IF
END DO
color = me_pool / nogrp
key = MOD( me_pool , nogrp )
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, ogrp_comm, ierr )
if( ierr /= 0 ) &
CALL errore( ' task_groups_init ', ' creating ogrp_comm ', ABS(ierr) )
CALL MPI_COMM_RANK( ogrp_comm, itsk, IERR )
CALL MPI_COMM_SIZE( ogrp_comm, ntsk, IERR )
IF( nogrp /= ntsk ) CALL errore( ' task_groups_init ', ' ogrp_comm size ', ntsk )
DO i = 1, nogrp
IF( me_pool == nolist( i ) ) THEN
IF( (i-1) /= itsk ) CALL errore( ' task_groups_init ', ' ogrp_comm rank ', itsk )
END IF
END DO
#endif
!
!CREATE PLANEWAVE GROUPS
!
!
!CREATE PLANEWAVE GROUPS
!
#if defined __MPI
color = MOD( me_pool , nogrp )
key = me_pool / nogrp
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, pgrp_comm, ierr )
if( ierr /= 0 ) &
CALL errore( ' task_groups_init ', ' creating pgrp_comm ', ABS(ierr) )
CALL MPI_COMM_RANK( pgrp_comm, itsk, IERR )
CALL MPI_COMM_SIZE( pgrp_comm, ntsk, IERR )
IF( npgrp /= ntsk ) CALL errore( ' task_groups_init ', ' pgrp_comm size ', ntsk )
DO i = 1, npgrp
IF( me_pool == nplist( i ) ) THEN
IF( (i-1) /= itsk ) CALL errore( ' task_groups_init ', ' pgrp_comm rank ', itsk )
END IF
END DO
me_pgrp = itsk
color = MOD( me_pool , nogrp )
key = me_pool / nogrp
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, pgrp_comm, ierr )
if( ierr /= 0 ) &
CALL errore( ' task_groups_init ', ' creating pgrp_comm ', ABS(ierr) )
CALL MPI_COMM_RANK( pgrp_comm, itsk, IERR )
CALL MPI_COMM_SIZE( pgrp_comm, ntsk, IERR )
IF( npgrp /= ntsk ) CALL errore( ' task_groups_init ', ' pgrp_comm size ', ntsk )
DO i = 1, npgrp
IF( me_pool == nplist( i ) ) THEN
IF( (i-1) /= itsk ) CALL errore( ' task_groups_init ', ' pgrp_comm rank ', itsk )
END IF
END DO
me_pgrp = itsk
#endif
RETURN
END SUBROUTINE init_task_groups
!
!
SUBROUTINE init_ortho_group( nproc_try, comm_all )
!
USE mp, ONLY : mp_comm_free, mp_size, mp_rank, mp_sum
!
IMPLICIT NONE
INTEGER, INTENT(IN) :: nproc_try, comm_all
LOGICAL, SAVE :: first = .true.
INTEGER :: ierr, color, key, me_all, nproc_all
INTEGER :: np_ortho1
RETURN
END SUBROUTINE init_task_groups
!
!
SUBROUTINE init_ortho_group( nproc_try, comm_all )
!
IMPLICIT NONE
INTEGER, INTENT(IN) :: nproc_try, comm_all
LOGICAL, SAVE :: first = .true.
INTEGER :: ierr, color, key, me_all, nproc_all
INTEGER :: np_ortho1
#if defined __SCALAPACK
INTEGER, ALLOCATABLE :: blacsmap(:,:)
INTEGER :: nprow, npcol, myrow, mycol
INTEGER, ALLOCATABLE :: blacsmap(:,:)
INTEGER :: nprow, npcol, myrow, mycol
#endif
#if defined __MPI
me_all = mp_rank( comm_all )
nproc_all = mp_size( comm_all )
me_all = mp_rank( comm_all )
nproc_all = mp_size( comm_all )
IF( nproc_try > nproc_all ) THEN
CALL errore( " init_ortho_group ", " argument 1 out of range ", nproc_try )
END IF
IF( nproc_try > nproc_all ) THEN
CALL errore( " init_ortho_group ", " argument 1 out of range ", nproc_try )
END IF
IF( .NOT. first ) THEN
!
! free resources associated to the communicator
!
CALL mp_comm_free( ortho_comm )
!
IF( .NOT. first ) THEN
!
! free resources associated to the communicator
!
CALL mp_comm_free( ortho_comm )
!
#if defined __SCALAPACK
CALL BLACS_GRIDEXIT( ortho_cntx )
CALL BLACS_GRIDEXIT( ortho_cntx )
#endif
!
END IF
! find the square closer (but lower) to nproc_try
!
CALL grid2d_dims( 'S', nproc_try, np_ortho(1), np_ortho(2) )
!
np_ortho1 = np_ortho(1) * np_ortho(2)
!
IF( nproc_all >= 4*np_ortho1 ) THEN
!
! here we choose a processor every 4, in order not to stress memory BW
! on multi core procs, for which further performance enhancements are
! possible using OpenMP BLAS inside regter/cegter/rdiaghg/cdiaghg
! (to be implemented)
!
color = 0
IF( me_all < 4*np_ortho1 .AND. MOD( me_all, 4 ) == 0 ) color = 1
!
leg_ortho = 4
!
ELSE IF( nproc_all >= 2*np_ortho1 ) THEN
!
! here we choose a processor every 2, in order not to stress memory BW
!
color = 0
IF( me_all < 2*np_ortho1 .AND. MOD( me_all, 2 ) == 0 ) color = 1
!
leg_ortho = 2
!
ELSE
!
! here we choose the first processors
!
color = 0
IF( me_all < np_ortho1 ) color = 1
!
leg_ortho = 1
!
END IF
!
key = me_all
!
! initialize the communicator for the new group
!
CALL MPI_COMM_SPLIT( comm_all, color, key, ortho_comm, ierr )
IF( ierr /= 0 ) &
CALL errore( " init_ortho_group ", " error splitting communicator ", ierr )
!
! Computes coordinates of the processors, in row maior order
!
me_ortho1 = mp_rank( ortho_comm )
np_ortho1 = mp_size( ortho_comm )
IF( color == 1 .AND. np_ortho1 /= np_ortho(1) * np_ortho(2) ) &
CALL errore( " init_ortho_group ", " wrong number of proc in ortho_comm ", ierr )
!
IF( me_all == 0 .AND. me_ortho1 /= 0 ) &
CALL errore( " init_ortho_group ", " wrong root in ortho_comm ", ierr )
!
if( color == 1 ) then
ortho_comm_id = 1
CALL GRID2D_COORDS( 'R', me_ortho1, np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2) )
CALL GRID2D_RANK( 'R', np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2), ierr )
IF( ierr /= me_ortho1 ) &
CALL errore( " init_ortho_group ", " wrong coordinates in ortho_comm ", ierr )
IF( me_ortho1*leg_ortho /= me_all ) &
CALL errore( " init_ortho_group ", " wrong rank assignment in ortho_comm ", ierr )
else
ortho_comm_id = 0
me_ortho(1) = me_ortho1
me_ortho(2) = me_ortho1
endif
!
END IF
! find the square closer (but lower) to nproc_try
!
CALL grid2d_dims( 'S', nproc_try, np_ortho(1), np_ortho(2) )
!
np_ortho1 = np_ortho(1) * np_ortho(2)
!
IF( nproc_all >= 4*np_ortho1 ) THEN
!
! here we choose a processor every 4, in order not to stress memory BW
! on multi core procs, for which further performance enhancements are
! possible using OpenMP BLAS inside regter/cegter/rdiaghg/cdiaghg
! (to be implemented)
!
color = 0
IF( me_all < 4*np_ortho1 .AND. MOD( me_all, 4 ) == 0 ) color = 1
!
leg_ortho = 4
!
ELSE IF( nproc_all >= 2*np_ortho1 ) THEN
!
! here we choose a processor every 2, in order not to stress memory BW
!
color = 0
IF( me_all < 2*np_ortho1 .AND. MOD( me_all, 2 ) == 0 ) color = 1
!
leg_ortho = 2
!
ELSE
!
! here we choose the first processors
!
color = 0
IF( me_all < np_ortho1 ) color = 1
!
leg_ortho = 1
!
END IF
!
key = me_all
!
! initialize the communicator for the new group
!
CALL MPI_COMM_SPLIT( comm_all, color, key, ortho_comm, ierr )
IF( ierr /= 0 ) &
CALL errore( " init_ortho_group ", " error splitting communicator ", ierr )
!
! Computes coordinates of the processors, in row maior order
!
me_ortho1 = mp_rank( ortho_comm )
np_ortho1 = mp_size( ortho_comm )
IF( color == 1 .AND. np_ortho1 /= np_ortho(1) * np_ortho(2) ) &
CALL errore( " init_ortho_group ", " wrong number of proc in ortho_comm ", ierr )
!
IF( me_all == 0 .AND. me_ortho1 /= 0 ) &
CALL errore( " init_ortho_group ", " wrong root in ortho_comm ", ierr )
!
if( color == 1 ) then
ortho_comm_id = 1
CALL GRID2D_COORDS( 'R', me_ortho1, np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2) )
CALL GRID2D_RANK( 'R', np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2), ierr )
IF( ierr /= me_ortho1 ) &
CALL errore( " init_ortho_group ", " wrong coordinates in ortho_comm ", ierr )
IF( me_ortho1*leg_ortho /= me_all ) &
CALL errore( " init_ortho_group ", " wrong rank assignment in ortho_comm ", ierr )
else
ortho_comm_id = 0
me_ortho(1) = me_ortho1
me_ortho(2) = me_ortho1
endif
#if defined __SCALAPACK
IF( ortho_comm_id > 0 ) THEN
ALLOCATE( blacsmap( np_ortho(1), np_ortho(2) ) )
blacsmap = 0
blacsmap( me_ortho(1) + 1, me_ortho(2) + 1 ) = me_blacs
nprow = np_ortho(1)
npcol = np_ortho(2)
ELSE
nprow = np_ortho1
npcol = 1
ALLOCATE( blacsmap( np_ortho1, 1 ) )
blacsmap = 0
blacsmap( me_ortho1 + 1, 1 ) = me_blacs
END IF
IF( ortho_comm_id > 0 ) THEN
ALLOCATE( blacsmap( np_ortho(1), np_ortho(2) ) )
blacsmap = 0
blacsmap( me_ortho(1) + 1, me_ortho(2) + 1 ) = me_blacs
nprow = np_ortho(1)
npcol = np_ortho(2)
ELSE
nprow = np_ortho1
npcol = 1
ALLOCATE( blacsmap( np_ortho1, 1 ) )
blacsmap = 0
blacsmap( me_ortho1 + 1, 1 ) = me_blacs
END IF
CALL mp_sum( blacsmap, ortho_comm )
!WRITE( 1000 + me_image, * ) '-----'
!WRITE( 1000 + me_image, * ) blacsmap
CALL mp_sum( blacsmap, ortho_comm )
ortho_cntx = world_cntx
CALL BLACS_GRIDMAP( ortho_cntx, blacsmap, nprow, nprow, npcol )
!WRITE( 1000 + me_image, * ) '-----'
!WRITE( 1000 + me_image, * ) blacsmap
CALL BLACS_GRIDINFO( ortho_cntx, nprow, npcol, myrow, mycol )
ortho_cntx = world_cntx
CALL BLACS_GRIDMAP( ortho_cntx, blacsmap, nprow, nprow, npcol )
!WRITE( 1000 + me_image, * ) nprow, npcol, myrow, mycol
CALL BLACS_GRIDINFO( ortho_cntx, nprow, npcol, myrow, mycol )
IF( ortho_comm_id > 0 ) THEN
IF( np_ortho(1) /= nprow ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong nprow ', 1 )
IF( np_ortho(2) /= npcol ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong npcol ', 1 )
IF( me_ortho(1) /= myrow ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong myrow ', 1 )
IF( me_ortho(2) /= mycol ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong mycol ', 1 )
END IF
!WRITE( 1000 + me_image, * ) nprow, npcol, myrow, mycol
DEALLOCATE( blacsmap )
IF( ortho_comm_id > 0 ) THEN
IF( np_ortho(1) /= nprow ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong nprow ', 1 )
IF( np_ortho(2) /= npcol ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong npcol ', 1 )
IF( me_ortho(1) /= myrow ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong myrow ', 1 )
IF( me_ortho(2) /= mycol ) CALL errore( ' init_ortho_group ', ' problem with SCALAPACK, wrong mycol ', 1 )
END IF
DEALLOCATE( blacsmap )
#endif
#else
ortho_comm_id = 1
ortho_comm_id = 1
#endif
first = .false.
RETURN
END SUBROUTINE init_ortho_group
!
!
first = .false.
RETURN
END SUBROUTINE init_ortho_group
!
!
END MODULE mp_global

View File

@ -910,15 +910,15 @@ SUBROUTINE check_para_diag( nelec )
!
! we need at least 4 procs to use distributed algorithm
!
IF ( ionode ) WRITE( stdout, '(5X,"Too few procs for parallel algorithm")' )
IF ( ionode ) WRITE( stdout, '(5X," we need at least 4 procs per pool")' )
IF ( ionode ) WRITE( stdout, '(5X,"Too few procs for parallel ",&
& "algorithm: we need at least 4 procs per pool")' )
!
ELSE IF( INT( nelec )/2 < nproc_pool ) THEN
!
! we need to have at least 1 electronic band per block
!
IF ( ionode ) WRITE( stdout, '(5X,"Too few electrons for parallel algorithm")')
IF ( ionode ) WRITE( stdout, '(5X," we need at least as many bands as SQRT(nproc)")' )
IF ( ionode ) WRITE(stdout,'(5X,"Too few electrons for parallel ",&
& " algorithm: we need # of bands >= SQRT(nproc)")' )
!
END IF

View File

@ -5,56 +5,15 @@
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
#if defined(__ABSOFT)
# define getarg getarg_
# define iargc iargc_
#endif
!
!----------------------------------------------------------------------------
SUBROUTINE startup( nd_nmbr, code, version )
!----------------------------------------------------------------------------
!
! ... This subroutine initializes MPI
! ... This subroutine initializes MPI and various other things
!
! ... Processes are organized in NIMAGE images each dealing with a subset of
! ... images used to discretize the "path" (this only in "path" optimizations)
! ... Within each image processes are organized in NPOOL pools each dealing
! ... with a subset of kpoints.
! ... Within each pool R & G space distribution is performed.
! ... NPROC is read from command line or can be set with the appropriate
! ... environment variable ( for example use 'setenv MP_PROCS 8' on IBM SP
! ... machine to run on NPROC=8 processors ); NIMAGE and NPOOL are read from
! ... command line.
! ... NPOOL must be a whole divisor of NPROC
!
! ... An example without any environment variable set is the following:
!
! ... T3E :
! ... mpprun -n 16 pw.x -npool 8 < input
!
! ... IBM SP :
! ... poe pw.x -procs 16 -npool 8 < input
!
! ... ORIGIN /PC clusters using "mpirun" :
! ... mpirun -np 16 pw.x -npool 8 < input
!
! ... COMPAQ :
! ... prun -n 16 sh -c 'pw.x -npool 8 < input'
!
! ... PC clusters using "mpiexec" :
! ... mpiexec -n 16 pw.x -npool 8 < input
!
! ... In this example you will use 16 processors divided into 8 pools
! ... of 2 processors each (in this case you must have at least 8 k-points)
!
! ... The following two modules hold global information about processors
! ... number, IDs and communicators
!
USE io_global, ONLY : stdout, io_global_start, meta_ionode, meta_ionode_id
USE mp_global, ONLY : nproc, nproc_image, nimage, mpime, me_image, &
my_image_id, root_image, npool, nproc_pool
USE mp_global, ONLY : mp_global_start, init_pool
USE mp, ONLY : mp_start, mp_env, mp_barrier, mp_bcast
USE io_global, ONLY : stdout, meta_ionode
USE mp_global, ONLY : mp_startup, nproc, nogrp, nimage, npool, &
nproc_pool, me_image, nproc_image, root_image
USE control_flags, ONLY : use_task_groups, ortho_para
!
IMPLICIT NONE
@ -62,84 +21,20 @@ SUBROUTINE startup( nd_nmbr, code, version )
CHARACTER (LEN=6) :: nd_nmbr
CHARACTER (LEN=6) :: version
CHARACTER (LEN=9) :: code
CHARACTER (LEN=80) :: np
INTEGER :: gid, node_number
INTEGER :: nargs, iiarg
INTEGER :: ntask_groups, nproc_ortho
INTEGER :: iargc
! do not define iargc as external: gfortran does not like
#if defined __OPENMP
INTEGER, EXTERNAL :: omp_get_max_threads
#endif
!
#if defined (__PARA)
!
! ... parallel case setup : MPI environment is initialized
!
CALL mp_start()
!
CALL mp_env( nproc, mpime, gid )
!
! ... Set the I/O node
!
CALL io_global_start( mpime, 0 )
!
! ... Set global coordinate for this processor
!
CALL mp_global_start( 0, mpime, gid, nproc )
!
IF ( meta_ionode ) THEN
!
! ... How many pools ?
!
CALL get_arg_npool( npool )
!
npool = MAX( npool, 1 )
npool = MIN( npool, nproc )
!
! ... How many parallel images ?
!
CALL get_arg_nimage( nimage )
!
nimage = MAX( nimage, 1 )
nimage = MIN( nimage, nproc )
!
! ... How many task groups ?
!
CALL get_arg_ntg( ntask_groups )
!
! ... How many processors involved in diagonalization of the Hamiltonian ?
!
CALL get_arg_northo( nproc_ortho )
!
END IF
!
CALL mp_barrier()
!
! ... transmit npool and nimage
!
CALL mp_bcast( npool, meta_ionode_id )
CALL mp_bcast( nimage, meta_ionode_id )
CALL mp_bcast( ntask_groups, meta_ionode_id )
CALL mp_bcast( nproc_ortho, meta_ionode_id )
!
use_task_groups = ( ntask_groups > 0 )
!
IF( nproc_ortho > 0 ) THEN
ortho_para = nproc_ortho
END IF
!
! ... all pools are initialized here
!
CALL init_pool( nimage, ntask_groups, nproc_ortho )
CALL mp_startup ( use_task_groups, ortho_para )
!
! ... set the processor label for files ( remember that
! ... me_image = 0 : ( nproc_image - 1 ) )
!
node_number = ( me_image + 1 )
!
CALL set_nd_nmbr( nd_nmbr, node_number, nproc_image )
CALL set_nd_nmbr( nd_nmbr, me_image+1, nproc_image )
!
! ... stdout is printed only by the root_image ( set in init_pool() )
!
@ -182,9 +77,9 @@ SUBROUTINE startup( nd_nmbr, code, version )
IF ( nproc_pool > 1 ) &
WRITE( stdout, &
'(5X,"R & G space division: proc/pool = ",I4)' ) nproc_pool
IF ( ntask_groups > 0 ) &
IF ( nogrp > 1 ) &
WRITE( stdout, &
'(5X,"wavefunctions fft division: fft/group = ",I4)' ) ntask_groups
'(5X,"wavefunctions fft division: fft/group = ",I4)' ) nogrp
!
END IF
!