mirror of https://gitlab.com/QEF/q-e.git
- task groups variables moved inside fft_type
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@7439 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
parent
cf0b1a29af
commit
b21befe7aa
|
@ -115,8 +115,7 @@
|
|||
USE constants, ONLY: pi, fpi
|
||||
USE mp, ONLY: mp_sum
|
||||
USE io_global, ONLY: stdout, ionode
|
||||
USE mp_global, ONLY: intra_bgrp_comm, nogrp, me_bgrp, &
|
||||
use_task_groups, ogrp_comm, nolist, nbgrp, inter_bgrp_comm
|
||||
USE mp_global, ONLY: intra_bgrp_comm, nbgrp, inter_bgrp_comm, me_bgrp
|
||||
USE funct, ONLY: dft_is_meta
|
||||
USE cg_module, ONLY: tcg
|
||||
USE cp_interfaces, ONLY: stress_kin, enkin
|
||||
|
@ -295,7 +294,7 @@
|
|||
rhos(ir,iss1)=rhos(ir,iss1) + sa1*( DBLE(psis(ir)))**2
|
||||
END DO
|
||||
!
|
||||
ELSE IF( use_task_groups ) THEN
|
||||
ELSE IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
CALL loop_over_states_tg()
|
||||
!
|
||||
|
@ -500,13 +499,13 @@
|
|||
INTEGER :: from, ii, eig_index, eig_offset
|
||||
REAL(DP), ALLOCATABLE :: tmp_rhos(:,:)
|
||||
|
||||
ALLOCATE( psis( dffts%tg_nnr * nogrp ) )
|
||||
ALLOCATE( psis( dffts%tg_nnr * dffts%nogrp ) )
|
||||
!
|
||||
ALLOCATE( tmp_rhos ( dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_bgrp + 1 ), nspin ) )
|
||||
!
|
||||
tmp_rhos = 0_DP
|
||||
|
||||
do i = 1, nbsp_bgrp, 2*nogrp
|
||||
do i = 1, nbsp_bgrp, 2*dffts%nogrp
|
||||
!
|
||||
! Initialize wave-functions in Fourier space (to be FFTed)
|
||||
! The size of psis is nnr: which is equal to the total number
|
||||
|
@ -530,7 +529,7 @@
|
|||
!
|
||||
eig_offset = 0
|
||||
|
||||
do eig_index = 1, 2*nogrp, 2
|
||||
do eig_index = 1, 2*dffts%nogrp, 2
|
||||
!
|
||||
! here we pack 2*nogrp electronic states in the psis array
|
||||
!
|
||||
|
@ -571,8 +570,8 @@
|
|||
!
|
||||
! Compute the proper factor for each band
|
||||
!
|
||||
DO ii = 1, nogrp
|
||||
IF( nolist( ii ) == me_bgrp ) EXIT
|
||||
DO ii = 1, dffts%nogrp
|
||||
IF( dffts%nolist( ii ) == me_bgrp ) EXIT
|
||||
END DO
|
||||
!
|
||||
! Remember two bands are packed in a single array :
|
||||
|
@ -629,8 +628,8 @@
|
|||
CALL mp_sum( tmp_rhos, inter_bgrp_comm )
|
||||
END IF
|
||||
|
||||
IF ( nogrp > 1 ) THEN
|
||||
CALL mp_sum( tmp_rhos, gid = ogrp_comm )
|
||||
IF ( dffts%nogrp > 1 ) THEN
|
||||
CALL mp_sum( tmp_rhos, gid = dffts%ogrp_comm )
|
||||
ENDIF
|
||||
!
|
||||
!BRING CHARGE DENSITY BACK TO ITS ORIGINAL POSITION
|
||||
|
@ -639,9 +638,9 @@
|
|||
!orbital group then does a local copy (reshuffling) of its data
|
||||
!
|
||||
from = 1
|
||||
DO ii = 1, nogrp
|
||||
IF ( nolist( ii ) == me_bgrp ) EXIT !Exit the loop
|
||||
from = from + dffts%nr1x*dffts%nr2x*dffts%npp( nolist( ii ) + 1 )! From where to copy initially
|
||||
DO ii = 1, dffts%nogrp
|
||||
IF ( dffts%nolist( ii ) == me_bgrp ) EXIT !Exit the loop
|
||||
from = from + dffts%nr1x*dffts%nr2x*dffts%npp( dffts%nolist( ii ) + 1 )! From where to copy initially
|
||||
ENDDO
|
||||
!
|
||||
DO ir = 1, nspin
|
||||
|
|
|
@ -2188,7 +2188,7 @@ END FUNCTION
|
|||
! initialize FFT descriptor
|
||||
|
||||
CALL fft_box_set( dfftb, nr1b, nr2b, nr3b, nr1bx, nr2bx, nr3bx, &
|
||||
nat, irb, me_bgrp+1, nproc_bgrp, intra_bgrp_comm, dfftp%npp, dfftp%ipp )
|
||||
nat, irb, dfftp%npp, dfftp%ipp )
|
||||
|
||||
IF( iprsta > 2 ) THEN
|
||||
isa = 1
|
||||
|
|
|
@ -910,7 +910,6 @@ SUBROUTINE terminate_run()
|
|||
USE cp_main_variables, ONLY : acc
|
||||
USE cg_module, ONLY : tcg, print_clock_tcg
|
||||
USE mp, ONLY : mp_report
|
||||
USE mp_global, ONLY : use_task_groups
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -951,11 +950,7 @@ SUBROUTINE terminate_run()
|
|||
!
|
||||
IF (tcg) call print_clock_tcg()
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
!
|
||||
CALL print_clock( 'ALLTOALL' )
|
||||
!
|
||||
END IF
|
||||
CALL print_clock( 'ALLTOALL' )
|
||||
!
|
||||
CALL mp_report()
|
||||
!
|
||||
|
|
|
@ -39,8 +39,7 @@
|
|||
USE funct, ONLY: dft_is_meta
|
||||
USE fft_base, ONLY: dffts
|
||||
USE fft_interfaces, ONLY: fwfft, invfft
|
||||
USE mp_global, ONLY: nogrp, me_bgrp, ogrp_comm, &
|
||||
use_task_groups
|
||||
USE mp_global, ONLY: me_bgrp
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -69,9 +68,9 @@
|
|||
!
|
||||
CALL start_clock( 'dforce' )
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
nogrp_ = nogrp
|
||||
ALLOCATE( psi( dffts%tg_nnr * nogrp ) )
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
nogrp_ = dffts%nogrp
|
||||
ALLOCATE( psi( dffts%tg_nnr * dffts%nogrp ) )
|
||||
ELSE
|
||||
nogrp_ = 1
|
||||
ALLOCATE( psi( nrxxs ) )
|
||||
|
@ -120,7 +119,7 @@
|
|||
iss2 = iss1
|
||||
END IF
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
!$omp parallel do
|
||||
DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_bgrp + 1 )
|
||||
|
@ -161,7 +160,7 @@
|
|||
!$omp parallel default(none) &
|
||||
!$omp private( eig_offset, igno, fi, fip, idx, fp, fm, ig ) &
|
||||
!$omp shared( nogrp_ , f, ngw, psi, df, da, c, tpiba2, tens, dffts, me_bgrp, &
|
||||
!$omp i, n, ggp, use_task_groups, nls, nlsm )
|
||||
!$omp i, n, ggp, nls, nlsm )
|
||||
|
||||
eig_offset = 0
|
||||
igno = 1
|
||||
|
@ -176,7 +175,7 @@
|
|||
fi = -0.5d0*f(i+idx-1)
|
||||
fip = -0.5d0*f(i+idx)
|
||||
endif
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!$omp do
|
||||
DO ig=1,ngw
|
||||
fp= psi(nls(ig)+eig_offset) + psi(nlsm(ig)+eig_offset)
|
||||
|
|
|
@ -47,7 +47,8 @@
|
|||
USE electrons_module, ONLY: bmeshset
|
||||
USE electrons_base, ONLY: distribute_bands
|
||||
USE problem_size, ONLY: cpsizes
|
||||
USE mp_global, ONLY: nproc_bgrp, nbgrp, my_bgrp_id, intra_bgrp_comm
|
||||
USE mp_global, ONLY: me_bgrp, nproc_bgrp, nbgrp, my_bgrp_id, intra_bgrp_comm
|
||||
USE mp_global, ONLY: get_ntask_groups
|
||||
USE core, ONLY: nlcc_any
|
||||
USE uspp, ONLY: okvan
|
||||
|
||||
|
@ -56,7 +57,7 @@
|
|||
integer :: i
|
||||
real(dp) :: rat1, rat2, rat3
|
||||
real(dp) :: at(3,3), bg(3,3), tpiba2
|
||||
integer :: ng_, ngs_, ngm_ , ngw_
|
||||
integer :: ng_, ngs_, ngm_ , ngw_ , nogrp_
|
||||
|
||||
|
||||
tpiba2 = ( tpi / alat ) ** 2
|
||||
|
@ -117,9 +118,10 @@
|
|||
! ... set the sticks mesh and distribute g vectors among processors
|
||||
! ... pstickset lso sets the local real-space grid dimensions
|
||||
!
|
||||
nogrp_ = get_ntask_groups()
|
||||
|
||||
CALL pstickset( gamma_only, bg, gcutm, gkcut, gcutms, &
|
||||
dfftp, dffts, ngw_ , ngm_ , ngs_ )
|
||||
dfftp, dffts, ngw_ , ngm_ , ngs_ , me_bgrp, nproc_bgrp, intra_bgrp_comm, nogrp_ )
|
||||
!
|
||||
!
|
||||
! ... Initialize reciprocal space local and global dimensions
|
||||
|
|
|
@ -26,8 +26,8 @@
|
|||
!
|
||||
USE parallel_include
|
||||
USE kinds, ONLY : DP
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_bgrp, nolist,&
|
||||
use_task_groups, my_bgrp_id, nbgrp, inter_bgrp_comm
|
||||
USE mp_global, ONLY : me_bgrp, &
|
||||
my_bgrp_id, nbgrp, inter_bgrp_comm
|
||||
USE mp, ONLY : mp_sum
|
||||
USE fft_base, ONLY : dffts, tg_gather
|
||||
use wave_base, only : wave_steepest, wave_verlet
|
||||
|
@ -74,9 +74,9 @@
|
|||
IF( restart ) iflag = 2
|
||||
END IF
|
||||
|
||||
IF( use_task_groups ) THEN
|
||||
tg_rhos_siz = nogrp * dffts%tg_nnr
|
||||
c2_siz = nogrp * ngwx
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
tg_rhos_siz = dffts%nogrp * dffts%tg_nnr
|
||||
c2_siz = dffts%nogrp * ngwx
|
||||
ELSE
|
||||
tg_rhos_siz = 1
|
||||
c2_siz = ngw
|
||||
|
@ -116,7 +116,7 @@
|
|||
c2 = 0D0
|
||||
c3 = 0D0
|
||||
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
! The potential in rhos is distributed accros all processors
|
||||
! We need to redistribute it so that it is completely contained in the
|
||||
|
@ -126,7 +126,7 @@
|
|||
CALL tg_gather( dffts, rhos(:,i), tg_rhos(:,i) )
|
||||
END DO
|
||||
|
||||
incr = 2 * nogrp
|
||||
incr = 2 * dffts%nogrp
|
||||
|
||||
ELSE
|
||||
|
||||
|
@ -137,7 +137,7 @@
|
|||
|
||||
DO i = 1, nbsp_bgrp, incr
|
||||
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
!The input coefficients to dforce cover eigenstates i:i+2*NOGRP-1
|
||||
!Thus, in dforce the dummy arguments for c0_bgrp(1,i) and
|
||||
|
@ -241,11 +241,8 @@
|
|||
USE efield_module, ONLY : dforce_efield, tefield
|
||||
USE electrons_base, ONLY : ispin, nspin, f, n=>nbsp
|
||||
USE cp_interfaces, ONLY : dforce
|
||||
USE mp_global, ONLY : use_task_groups
|
||||
!
|
||||
USE gvecw, ONLY: ngw
|
||||
!
|
||||
!
|
||||
USE fft_base, ONLY: dffts
|
||||
USE electrons_base, ONLY: nx=>nbnd, nupdwn, iupdwn, nbspx, nbsp
|
||||
USE mp, ONLY: mp_sum
|
||||
USE mp_global, ONLY: intra_bgrp_comm
|
||||
|
@ -286,7 +283,7 @@
|
|||
'Electric field and sic are not implemented',2)
|
||||
IF( nspin == 1 ) CALL errore(' runcp_force_pairing ',' inconsistent nspin ', 1)
|
||||
|
||||
IF( use_task_groups ) CALL errore(' runcp_force_pairing ',' task_groups not implemented ', 1)
|
||||
IF( dffts%have_task_groups ) CALL errore(' runcp_force_pairing ',' task_groups not implemented ', 1)
|
||||
!
|
||||
ALLOCATE( emadt2( ngw ) )
|
||||
ALLOCATE( emaver( ngw ) )
|
||||
|
|
|
@ -13,7 +13,7 @@ MODULE environment
|
|||
USE io_files, ONLY: crash_file, crashunit, nd_nmbr
|
||||
USE io_global, ONLY: stdout, meta_ionode
|
||||
USE mp_global, ONLY: me_image, my_image_id, root_image, nimage, &
|
||||
nproc_image, nproc, nogrp, npool, nproc_pool, nbgrp
|
||||
nproc_image, nproc, npool, nproc_pool, nbgrp, get_ntask_groups
|
||||
USE global_version, ONLY: version_number
|
||||
|
||||
IMPLICIT NONE
|
||||
|
@ -194,8 +194,8 @@ CONTAINS
|
|||
'(5X,"K-points division: npool = ",I4)' ) npool
|
||||
IF ( nproc_pool > 1 ) WRITE( stdout, &
|
||||
'(5X,"R & G space division: proc/pool = ",I4)' ) nproc_pool
|
||||
IF ( nogrp > 1 ) WRITE( stdout, &
|
||||
'(5X,"wavefunctions fft division: fft/group = ",I4)' ) nogrp
|
||||
IF ( get_ntask_groups() > 1 ) WRITE( stdout, &
|
||||
'(5X,"wavefunctions fft division: fft/group = ",I4)' ) get_ntask_groups()
|
||||
!
|
||||
END SUBROUTINE parallel_info
|
||||
!==-----------------------------------------------------------------------==!
|
||||
|
|
|
@ -55,7 +55,8 @@
|
|||
! like infiniband, ethernet, myrinet
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
|
||||
SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg, &
|
||||
me_pgrp, npgrp, pgrp_comm, nplist )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
! transpose the fft grid across nodes
|
||||
|
@ -89,7 +90,7 @@ SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
|
|||
USE parallel_include
|
||||
#endif
|
||||
USE mp_global, ONLY : nproc_pool, me_pool, intra_pool_comm, nproc, &
|
||||
my_image_id, nogrp, pgrp_comm, nplist, me_pgrp, npgrp
|
||||
my_image_id
|
||||
USE kinds, ONLY : DP
|
||||
|
||||
IMPLICIT NONE
|
||||
|
@ -97,6 +98,7 @@ SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
|
|||
INTEGER, INTENT(in) :: nr3x, nxx_, sign, ncp_ (:), npp_ (:)
|
||||
COMPLEX (DP), INTENT(inout) :: f_in (nxx_), f_aux (nxx_)
|
||||
LOGICAL, OPTIONAL, INTENT(in) :: use_tg
|
||||
INTEGER, OPTIONAL, INTENT(in) :: pgrp_comm, nplist(:), npgrp, me_pgrp
|
||||
|
||||
#ifdef __PARA
|
||||
|
||||
|
@ -426,7 +428,8 @@ END SUBROUTINE fft_scatter
|
|||
! with a defined topology, like on bluegene and cray machine
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
|
||||
SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg, &
|
||||
me_pgrp, npgrp, pgrp_comm, nplist )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
! transpose the fft grid across nodes
|
||||
|
@ -459,8 +462,7 @@ SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
|
|||
#ifdef __PARA
|
||||
USE parallel_include
|
||||
#endif
|
||||
USE mp_global, ONLY : nproc_pool, me_pool, intra_pool_comm, nproc, &
|
||||
my_image_id, nogrp, pgrp_comm, nplist
|
||||
USE mp_global, ONLY : nproc_pool, me_pool, intra_pool_comm, nproc
|
||||
USE kinds, ONLY : DP
|
||||
|
||||
IMPLICIT NONE
|
||||
|
@ -468,6 +470,7 @@ SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
|
|||
INTEGER, INTENT(in) :: nr3x, nxx_, sign, ncp_ (:), npp_ (:)
|
||||
COMPLEX (DP), INTENT(inout) :: f_in (nxx_), f_aux (nxx_)
|
||||
LOGICAL, OPTIONAL, INTENT(in) :: use_tg
|
||||
INTEGER, OPTIONAL, INTENT(in) :: pgrp_comm, nplist(:), npgrp, me_pgrp
|
||||
|
||||
#ifdef __PARA
|
||||
|
||||
|
@ -491,7 +494,7 @@ SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
|
|||
!
|
||||
IF( use_tg_ ) THEN
|
||||
! This is the number of procs. in the plane-wave group
|
||||
nprocp = nproc_pool / nogrp
|
||||
nprocp = npgrp
|
||||
ELSE
|
||||
nprocp = nproc_pool
|
||||
ENDIF
|
||||
|
@ -1191,7 +1194,7 @@ SUBROUTINE tg_gather( dffts, v, tg_v )
|
|||
IF( size( tg_v ) < nsiz_tg ) &
|
||||
CALL errore( ' tg_gather ', ' tg_v too small ', ( nsiz_tg - size( tg_v ) ) )
|
||||
|
||||
nsiz = dffts%npp( dffts%myid+1 ) * dffts%nr1x * dffts%nr2x
|
||||
nsiz = dffts%npp( dffts%mype+1 ) * dffts%nr1x * dffts%nr2x
|
||||
|
||||
IF( size( v ) < nsiz ) &
|
||||
CALL errore( ' tg_gather ', ' v too small ', ( nsiz - size( v ) ) )
|
||||
|
|
|
@ -83,7 +83,6 @@
|
|||
use fft_scalar, only: cfft3d, cfft3ds, cft_b, cft_b_omp
|
||||
use fft_parallel, only: tg_cft3s
|
||||
USE fft_types, only: fft_dlay_descriptor
|
||||
use mp_global, only: use_task_groups
|
||||
|
||||
IMPLICIT none
|
||||
|
||||
|
@ -135,7 +134,7 @@
|
|||
ELSE IF( grid_type == 'Smooth' ) THEN
|
||||
call tg_cft3s( f, dffts, 1 )
|
||||
ELSE IF( grid_type == 'Wave' ) THEN
|
||||
call tg_cft3s( f, dffts, 2, use_task_groups )
|
||||
call tg_cft3s( f, dffts, 2, dffts%have_task_groups )
|
||||
ELSE IF( grid_type == 'Box' .AND. dfftb%np3( ia ) > 0 ) THEN
|
||||
#if defined __OPENMP && defined __FFTW
|
||||
call cft_b_omp( f, dfftb%nr1, dfftb%nr2, dfftb%nr3, &
|
||||
|
@ -200,7 +199,6 @@
|
|||
use fft_scalar, only: cfft3d, cfft3ds
|
||||
use fft_parallel, only: tg_cft3s
|
||||
USE fft_types, only: fft_dlay_descriptor
|
||||
use mp_global, only: use_task_groups
|
||||
|
||||
implicit none
|
||||
|
||||
|
@ -237,7 +235,7 @@
|
|||
ELSE IF( grid_type == 'Smooth' ) THEN
|
||||
call tg_cft3s(f,dffts,-1)
|
||||
ELSE IF( grid_type == 'Wave' ) THEN
|
||||
call tg_cft3s(f,dffts,-2, use_task_groups )
|
||||
call tg_cft3s(f,dffts,-2, dffts%have_task_groups )
|
||||
END IF
|
||||
|
||||
#else
|
||||
|
|
|
@ -60,8 +60,8 @@ SUBROUTINE tg_cft3s( f, dfft, isgn, use_task_groups )
|
|||
USE fft_scalar, ONLY : cft_1z, cft_2xy
|
||||
USE fft_base, ONLY : fft_scatter
|
||||
USE kinds, ONLY : DP
|
||||
USE mp_global, ONLY : me_pool, nproc_pool, ogrp_comm, npgrp, nogrp, &
|
||||
intra_pool_comm, nolist, nplist
|
||||
USE mp_global, ONLY : me_pool, nproc_pool, &
|
||||
intra_pool_comm
|
||||
USE fft_types, ONLY : fft_dlay_descriptor
|
||||
USE parallel_include
|
||||
|
||||
|
@ -99,8 +99,8 @@ SUBROUTINE tg_cft3s( f, dfft, isgn, use_task_groups )
|
|||
nx3 = dfft%nr3x
|
||||
!
|
||||
IF( use_tg ) THEN
|
||||
ALLOCATE( aux( nogrp * dfft%tg_nnr ) )
|
||||
ALLOCATE( YF ( nogrp * dfft%tg_nnr ) )
|
||||
ALLOCATE( aux( dfft%nogrp * dfft%tg_nnr ) )
|
||||
ALLOCATE( YF ( dfft%nogrp * dfft%tg_nnr ) )
|
||||
ELSE
|
||||
ALLOCATE( aux( dfft%tg_nnr ) )
|
||||
ENDIF
|
||||
|
@ -198,10 +198,10 @@ CONTAINS
|
|||
!
|
||||
IF( .not. use_tg ) RETURN
|
||||
!
|
||||
IF( dfft%tg_rdsp(nogrp) + dfft%tg_rcv(nogrp) > size( yf ) ) THEN
|
||||
IF( dfft%tg_rdsp(dfft%nogrp) + dfft%tg_rcv(dfft%nogrp) > size( yf ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 1 )
|
||||
ENDIF
|
||||
IF( dfft%tg_psdsp(nogrp) + dfft%tg_snd(nogrp) > size( f ) ) THEN
|
||||
IF( dfft%tg_psdsp(dfft%nogrp) + dfft%tg_snd(dfft%nogrp) > size( f ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 2 )
|
||||
ENDIF
|
||||
|
||||
|
@ -213,7 +213,7 @@ CONTAINS
|
|||
#if defined __MPI
|
||||
|
||||
CALL MPI_ALLTOALLV( f(1), dfft%tg_snd, dfft%tg_psdsp, MPI_DOUBLE_COMPLEX, yf(1), dfft%tg_rcv, &
|
||||
& dfft%tg_rdsp, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
|
||||
& dfft%tg_rdsp, MPI_DOUBLE_COMPLEX, dfft%ogrp_comm, IERR)
|
||||
IF( ierr /= 0 ) THEN
|
||||
CALL errore( ' tg_cfft ', ' alltoall error 1 ', abs(ierr) )
|
||||
ENDIF
|
||||
|
@ -237,10 +237,10 @@ CONTAINS
|
|||
!
|
||||
IF( .not. use_tg ) RETURN
|
||||
!
|
||||
IF( dfft%tg_usdsp(nogrp) + dfft%tg_snd(nogrp) > size( f ) ) THEN
|
||||
IF( dfft%tg_usdsp(dfft%nogrp) + dfft%tg_snd(dfft%nogrp) > size( f ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 3 )
|
||||
ENDIF
|
||||
IF( dfft%tg_rdsp(nogrp) + dfft%tg_rcv(nogrp) > size( yf ) ) THEN
|
||||
IF( dfft%tg_rdsp(dfft%nogrp) + dfft%tg_rcv(dfft%nogrp) > size( yf ) ) THEN
|
||||
CALL errore( ' tg_cfft ', ' inconsistent size ', 4 )
|
||||
ENDIF
|
||||
|
||||
|
@ -249,7 +249,7 @@ CONTAINS
|
|||
#if defined __MPI
|
||||
CALL MPI_Alltoallv( yf(1), &
|
||||
dfft%tg_rcv, dfft%tg_rdsp, MPI_DOUBLE_COMPLEX, f(1), &
|
||||
dfft%tg_snd, dfft%tg_usdsp, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
|
||||
dfft%tg_snd, dfft%tg_usdsp, MPI_DOUBLE_COMPLEX, dfft%ogrp_comm, IERR)
|
||||
IF( ierr /= 0 ) THEN
|
||||
CALL errore( ' tg_cfft ', ' alltoall error 2 ', abs(ierr) )
|
||||
ENDIF
|
||||
|
@ -288,7 +288,8 @@ CONTAINS
|
|||
npp = dfft%tg_npp( me_p )
|
||||
nnp = nx1*nx2
|
||||
!
|
||||
CALL fft_scatter( aux, nx3, nogrp*dfft%tg_nnr, f, dfft%tg_nsw, dfft%tg_npp, iopt, use_tg )
|
||||
CALL fft_scatter( aux, nx3, dfft%nogrp*dfft%tg_nnr, f, dfft%tg_nsw, dfft%tg_npp, iopt, use_tg, &
|
||||
dfft%me_pgrp, dfft%npgrp, dfft%pgrp_comm, dfft%nplist )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
|
@ -407,7 +408,8 @@ CONTAINS
|
|||
!
|
||||
IF( use_tg ) THEN
|
||||
!
|
||||
CALL fft_scatter( aux, nx3, nogrp*dfft%tg_nnr, f, dfft%tg_nsw, dfft%tg_npp, iopt, use_tg )
|
||||
CALL fft_scatter( aux, nx3, dfft%nogrp*dfft%tg_nnr, f, dfft%tg_nsw, dfft%tg_npp, iopt, use_tg, &
|
||||
dfft%me_pgrp, dfft%npgrp, dfft%pgrp_comm, dfft%nplist )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
|
|
|
@ -54,19 +54,22 @@ MODULE fft_types
|
|||
!
|
||||
! fft parallelization
|
||||
!
|
||||
INTEGER :: myid ! my processor id
|
||||
INTEGER :: mype ! my processor id (starting from 0) in the fft group
|
||||
INTEGER :: comm ! communicator of the fft gruop
|
||||
INTEGER :: nproc ! number of processor in the fft group
|
||||
!
|
||||
! task groups
|
||||
!
|
||||
INTEGER :: NOGRP
|
||||
INTEGER :: NPGRP
|
||||
INTEGER :: ogrp_comm
|
||||
INTEGER :: pgrp_comm
|
||||
INTEGER, POINTER :: nolist(:) ! number of sticks per task group ( wave func )
|
||||
!
|
||||
LOGICAL :: have_task_groups
|
||||
!
|
||||
INTEGER :: me_pgrp ! task id for plane wave task group
|
||||
INTEGER :: nogrp ! number of proc. in an orbital "task group"
|
||||
INTEGER :: npgrp ! number of proc. in a plane-wave "task group"
|
||||
INTEGER :: ogrp_comm ! orbital group communicator
|
||||
INTEGER :: pgrp_comm ! plane-wave group communicator
|
||||
INTEGER, POINTER :: nolist(:) ! list of pes in orbital group
|
||||
INTEGER, POINTER :: nplist(:) ! list of pes in pw group
|
||||
!
|
||||
INTEGER :: tg_nnr ! maximum among nnr
|
||||
INTEGER, POINTER :: tg_nsw(:) ! number of sticks per task group ( wave func )
|
||||
INTEGER, POINTER :: tg_npp(:) ! number of "Z" planes per task group
|
||||
|
@ -84,9 +87,10 @@ MODULE fft_types
|
|||
|
||||
CONTAINS
|
||||
|
||||
SUBROUTINE fft_dlay_allocate( desc, myid, nproc, comm, nx, ny )
|
||||
SUBROUTINE fft_dlay_allocate( desc, mype, nproc, comm, nogrp, nx, ny )
|
||||
TYPE (fft_dlay_descriptor) :: desc
|
||||
INTEGER, INTENT(in) :: myid, nproc, comm, nx, ny ! myid starting from 0
|
||||
INTEGER, INTENT(in) :: mype, nproc, comm, nx, ny ! mype starting from 0
|
||||
INTEGER, INTENT(in) :: nogrp ! number of task groups
|
||||
ALLOCATE( desc%nsp( nproc ) )
|
||||
ALLOCATE( desc%nsw( nproc ) )
|
||||
ALLOCATE( desc%ngl( nproc ) )
|
||||
|
@ -113,15 +117,25 @@ CONTAINS
|
|||
|
||||
desc%id = 0
|
||||
|
||||
desc%myid = myid
|
||||
desc%mype = mype
|
||||
desc%comm = comm
|
||||
desc%nproc = nproc
|
||||
desc%have_task_groups = .false.
|
||||
desc%NOGRP = 0
|
||||
desc%NPGRP = 0
|
||||
IF( nogrp > 1 ) &
|
||||
desc%have_task_groups = .true.
|
||||
desc%me_pgrp = 0
|
||||
!
|
||||
IF( MOD( nproc, MAX( 1, nogrp ) ) /= 0 ) &
|
||||
CALL errore( " fft_dlay_allocate ", "the number of task groups should be a divisor of nproc ", 1 )
|
||||
|
||||
desc%nogrp = MAX( 1, nogrp )
|
||||
desc%npgrp = nproc / MAX( 1, nogrp )
|
||||
desc%ogrp_comm = 0
|
||||
desc%pgrp_comm = 0
|
||||
NULLIFY( desc%nolist )
|
||||
ALLOCATE( desc%nolist( desc%nogrp ) )
|
||||
ALLOCATE( desc%nplist( desc%npgrp ) )
|
||||
desc%nolist = 0
|
||||
desc%nplist = 0
|
||||
NULLIFY( desc%tg_nsw )
|
||||
NULLIFY( desc%tg_npp )
|
||||
NULLIFY( desc%tg_snd )
|
||||
|
@ -146,9 +160,10 @@ CONTAINS
|
|||
IF ( associated( desc%ismap ) ) DEALLOCATE( desc%ismap )
|
||||
IF ( associated( desc%iplp ) ) DEALLOCATE( desc%iplp )
|
||||
IF ( associated( desc%iplw ) ) DEALLOCATE( desc%iplw )
|
||||
IF ( associated( desc%nolist ) ) DEALLOCATE( desc%nolist )
|
||||
IF ( associated( desc%nplist ) ) DEALLOCATE( desc%nplist )
|
||||
desc%id = 0
|
||||
IF( desc%have_task_groups ) THEN
|
||||
IF ( associated( desc%nolist ) ) DEALLOCATE( desc%nolist )
|
||||
IF ( associated( desc%tg_nsw ) ) DEALLOCATE( desc%tg_nsw )
|
||||
IF ( associated( desc%tg_npp ) ) DEALLOCATE( desc%tg_npp )
|
||||
IF ( associated( desc%tg_snd ) ) DEALLOCATE( desc%tg_snd )
|
||||
|
@ -162,9 +177,9 @@ CONTAINS
|
|||
|
||||
!=----------------------------------------------------------------------------=!
|
||||
|
||||
SUBROUTINE fft_box_allocate( desc, myid, nproc, comm, nat )
|
||||
SUBROUTINE fft_box_allocate( desc, mype, nproc, comm, nat )
|
||||
TYPE (fft_dlay_descriptor) :: desc
|
||||
INTEGER, INTENT(in) :: nat, nproc, myid, comm ! myid starting from 0
|
||||
INTEGER, INTENT(in) :: nat, nproc, mype, comm ! mype starting from 0
|
||||
ALLOCATE( desc%irb( 3, nat ) )
|
||||
ALLOCATE( desc%imin3( nat ) )
|
||||
ALLOCATE( desc%imax3( nat ) )
|
||||
|
@ -177,7 +192,7 @@ CONTAINS
|
|||
desc%npp = 0
|
||||
desc%ipp = 0
|
||||
desc%np3 = 0
|
||||
desc%myid = myid
|
||||
desc%mype = mype
|
||||
desc%nproc = nproc
|
||||
desc%comm = comm
|
||||
desc%have_task_groups = .false.
|
||||
|
@ -197,8 +212,8 @@ CONTAINS
|
|||
|
||||
!=----------------------------------------------------------------------------=!
|
||||
|
||||
SUBROUTINE fft_dlay_set( desc, tk, nst, nr1, nr2, nr3, nr1x, nr2x, nr3x, me, &
|
||||
nproc, comm, nogrp, ub, lb, idx, in1, in2, ncp, ncpw, ngp, ngpw, st, stw )
|
||||
SUBROUTINE fft_dlay_set( desc, tk, nst, nr1, nr2, nr3, nr1x, nr2x, nr3x, &
|
||||
ub, lb, idx, in1, in2, ncp, ncpw, ngp, ngpw, st, stw )
|
||||
|
||||
TYPE (fft_dlay_descriptor) :: desc
|
||||
|
||||
|
@ -206,10 +221,6 @@ CONTAINS
|
|||
INTEGER, INTENT(in) :: nst
|
||||
INTEGER, INTENT(in) :: nr1, nr2, nr3 ! size of real space grid
|
||||
INTEGER, INTENT(in) :: nr1x, nr2x, nr3x ! padded size of real space grid
|
||||
INTEGER, INTENT(in) :: me ! processor index (starting from 1)
|
||||
INTEGER, INTENT(in) :: nproc ! number of processors
|
||||
INTEGER, INTENT(in) :: comm ! communicator
|
||||
INTEGER, INTENT(in) :: nogrp ! number of processors in task-group
|
||||
INTEGER, INTENT(in) :: ub(3), lb(3) ! upper and lower bound of real space indices
|
||||
INTEGER, INTENT(in) :: idx(:)
|
||||
INTEGER, INTENT(in) :: in1(:)
|
||||
|
@ -221,15 +232,15 @@ CONTAINS
|
|||
INTEGER, INTENT(in) :: st( lb(1) : ub(1), lb(2) : ub(2) )
|
||||
INTEGER, INTENT(in) :: stw( lb(1) : ub(1), lb(2) : ub(2) )
|
||||
|
||||
INTEGER :: npp( nproc ), n3( nproc ), nsp( nproc )
|
||||
INTEGER :: npp( desc%nproc ), n3( desc%nproc ), nsp( desc%nproc )
|
||||
INTEGER :: np, nq, i, is, iss, i1, i2, m1, m2, n1, n2, ip
|
||||
|
||||
! Task-grouping C. Bekas
|
||||
!
|
||||
INTEGER :: sm
|
||||
|
||||
IF( ( size( desc%ngl ) < nproc ) .or. ( size( desc%npp ) < nproc ) .or. &
|
||||
( size( desc%ipp ) < nproc ) .or. ( size( desc%iss ) < nproc ) ) &
|
||||
IF( ( size( desc%ngl ) < desc%nproc ) .or. ( size( desc%npp ) < desc%nproc ) .or. &
|
||||
( size( desc%ipp ) < desc%nproc ) .or. ( size( desc%iss ) < desc%nproc ) ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong descriptor dimensions ', 1 )
|
||||
|
||||
IF( ( nr1 > nr1x ) .or. ( nr2 > nr2x ) .or. ( nr3 > nr3x ) ) &
|
||||
|
@ -238,37 +249,26 @@ CONTAINS
|
|||
IF( ( size( idx ) < nst ) .or. ( size( in1 ) < nst ) .or. ( size( in2 ) < nst ) ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong number of stick dimensions ', 3 )
|
||||
|
||||
IF( ( size( ncp ) < nproc ) .or. ( size( ngp ) < nproc ) ) &
|
||||
IF( ( size( ncp ) < desc%nproc ) .or. ( size( ngp ) < desc%nproc ) ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong stick dimensions ', 4 )
|
||||
|
||||
IF( desc%nproc /= nproc ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong number of processor ', 4 )
|
||||
|
||||
IF( desc%myid /= (me - 1) ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong processor index ', 4 )
|
||||
|
||||
IF( desc%comm /= comm ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong communicator ', 4 )
|
||||
|
||||
desc%have_task_groups = .false.
|
||||
|
||||
! Set the number of "xy" planes for each processor
|
||||
! in other word do a slab partition along the z axis
|
||||
|
||||
sm = 0
|
||||
npp = 0
|
||||
IF ( nproc == 1 ) THEN
|
||||
IF ( desc%nproc == 1 ) THEN
|
||||
npp(1) = nr3
|
||||
ELSEIF( nproc <= nr3 ) THEN
|
||||
np = nr3 / nproc
|
||||
nq = nr3 - np * nproc
|
||||
DO i = 1, nproc
|
||||
ELSEIF( desc%nproc <= nr3 ) THEN
|
||||
np = nr3 / desc%nproc
|
||||
nq = nr3 - np * desc%nproc
|
||||
DO i = 1, desc%nproc
|
||||
npp(i) = np
|
||||
IF ( i <= nq ) npp(i) = np + 1
|
||||
ENDDO
|
||||
ELSE
|
||||
DO ip = 1, nr3 ! some compiler complains for empty DO loops
|
||||
DO i = 1, nproc, nogrp
|
||||
DO i = 1, desc%nproc, desc%nogrp
|
||||
npp(i) = npp(i) + 1
|
||||
sm = sm + 1
|
||||
IF ( sm == nr3 ) exit
|
||||
|
@ -277,17 +277,17 @@ CONTAINS
|
|||
ENDDO
|
||||
ENDIF
|
||||
|
||||
desc%npp( 1:nproc ) = npp
|
||||
desc%npl = npp( me )
|
||||
desc%npp( 1:desc%nproc ) = npp
|
||||
desc%npl = npp( desc%mype + 1 )
|
||||
|
||||
! Find out the index of the starting plane on each proc
|
||||
|
||||
n3 = 0
|
||||
DO i = 2, nproc
|
||||
DO i = 2, desc%nproc
|
||||
n3(i) = n3(i-1) + npp(i-1)
|
||||
ENDDO
|
||||
|
||||
desc%ipp( 1:nproc ) = n3
|
||||
desc%ipp( 1:desc%nproc ) = n3
|
||||
|
||||
! Set the proper number of sticks
|
||||
|
||||
|
@ -309,14 +309,14 @@ CONTAINS
|
|||
|
||||
! Set fft local workspace dimension
|
||||
|
||||
IF ( nproc == 1 ) THEN
|
||||
IF ( desc%nproc == 1 ) THEN
|
||||
desc%nnr = nr1x * nr2x * nr3x
|
||||
desc%tg_nnr = desc%nnr
|
||||
ELSE
|
||||
desc%nnr = max( nr3x * ncp(me), nr1x * nr2x * npp(me) )
|
||||
desc%nnr = max( nr3x * ncp( desc%mype + 1 ), nr1x * nr2x * npp( desc%mype + 1 ) )
|
||||
desc%nnr = max( 1, desc%nnr ) ! ensure that desc%nrr > 0 ( for extreme parallelism )
|
||||
desc%tg_nnr = desc%nnr
|
||||
DO i = 1, nproc
|
||||
DO i = 1, desc%nproc
|
||||
desc%tg_nnr = max( desc%tg_nnr, nr3x * ncp( i ) )
|
||||
desc%tg_nnr = max( desc%tg_nnr, nr1x * nr2x * npp( i ) )
|
||||
ENDDO
|
||||
|
@ -325,8 +325,8 @@ CONTAINS
|
|||
|
||||
|
||||
|
||||
desc%ngl( 1:nproc ) = ngp( 1:nproc )
|
||||
desc%nwl( 1:nproc ) = ngpw( 1:nproc )
|
||||
desc%ngl( 1:desc%nproc ) = ngp( 1:desc%nproc )
|
||||
desc%nwl( 1:desc%nproc ) = ngpw( 1:desc%nproc )
|
||||
|
||||
IF( size( desc%isind ) < ( nr1x * nr2x ) ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong descriptor dimensions, isind ', 5 )
|
||||
|
@ -379,7 +379,7 @@ CONTAINS
|
|||
! local stick ( desc%iss )
|
||||
!
|
||||
|
||||
DO i = 1, nproc
|
||||
DO i = 1, desc%nproc
|
||||
IF( i == 1 ) THEN
|
||||
desc%iss( i ) = 0
|
||||
ELSE
|
||||
|
@ -406,7 +406,7 @@ CONTAINS
|
|||
IF( ip > 0 ) THEN
|
||||
nsp( ip ) = nsp( ip ) + 1
|
||||
desc%ismap( nsp( ip ) + desc%iss( ip ) ) = iss
|
||||
IF( ip == me ) THEN
|
||||
IF( ip == ( desc%mype + 1 ) ) THEN
|
||||
desc%isind( iss ) = nsp( ip )
|
||||
ELSE
|
||||
desc%isind( iss ) = 0
|
||||
|
@ -416,14 +416,14 @@ CONTAINS
|
|||
|
||||
! chack number of stick against the input value
|
||||
|
||||
IF( any( nsp( 1:nproc ) /= ncpw( 1:nproc ) ) ) THEN
|
||||
DO ip = 1, nproc
|
||||
IF( any( nsp( 1:desc%nproc ) /= ncpw( 1:desc%nproc ) ) ) THEN
|
||||
DO ip = 1, desc%nproc
|
||||
WRITE( stdout,*) ' * ', ip, ' * ', nsp( ip ), ' /= ', ncpw( ip )
|
||||
ENDDO
|
||||
CALL errore( ' fft_dlay_set ', ' inconsistent number of sticks ', 7 )
|
||||
ENDIF
|
||||
|
||||
desc%nsw( 1:nproc ) = nsp( 1:nproc )
|
||||
desc%nsw( 1:desc%nproc ) = nsp( 1:desc%nproc )
|
||||
|
||||
! then add pseudopotential stick
|
||||
|
||||
|
@ -432,7 +432,7 @@ CONTAINS
|
|||
IF( ip < 0 ) THEN
|
||||
nsp( -ip ) = nsp( -ip ) + 1
|
||||
desc%ismap( nsp( -ip ) + desc%iss( -ip ) ) = iss
|
||||
IF( -ip == me ) THEN
|
||||
IF( -ip == ( desc%mype + 1 ) ) THEN
|
||||
desc%isind( iss ) = nsp( -ip )
|
||||
ELSE
|
||||
desc%isind( iss ) = 0
|
||||
|
@ -442,14 +442,14 @@ CONTAINS
|
|||
|
||||
! chack number of stick against the input value
|
||||
|
||||
IF( any( nsp( 1:nproc ) /= ncp( 1:nproc ) ) ) THEN
|
||||
DO ip = 1, nproc
|
||||
IF( any( nsp( 1:desc%nproc ) /= ncp( 1:desc%nproc ) ) ) THEN
|
||||
DO ip = 1, desc%nproc
|
||||
WRITE( stdout,*) ' * ', ip, ' * ', nsp( ip ), ' /= ', ncp( ip )
|
||||
ENDDO
|
||||
CALL errore( ' fft_dlay_set ', ' inconsistent number of sticks ', 8 )
|
||||
ENDIF
|
||||
|
||||
desc%nsp( 1:nproc ) = nsp( 1:nproc )
|
||||
desc%nsp( 1:desc%nproc ) = nsp( 1:desc%nproc )
|
||||
|
||||
icount = icount + 1
|
||||
desc%id = icount
|
||||
|
@ -464,13 +464,13 @@ CONTAINS
|
|||
!=----------------------------------------------------------------------------=!
|
||||
|
||||
SUBROUTINE fft_box_set( desc, nr1b, nr2b, nr3b, nr1bx, nr2bx, nr3bx, nat, &
|
||||
irb, me, nproc, comm, npp, ipp )
|
||||
irb, npp, ipp )
|
||||
|
||||
IMPLICIT NONE
|
||||
|
||||
TYPE (fft_dlay_descriptor) :: desc
|
||||
|
||||
INTEGER, INTENT(in) :: nat, me, nproc, comm
|
||||
INTEGER, INTENT(in) :: nat
|
||||
INTEGER, INTENT(in) :: irb( :, : )
|
||||
INTEGER, INTENT(in) :: npp( : )
|
||||
INTEGER, INTENT(in) :: ipp( : )
|
||||
|
@ -483,19 +483,9 @@ CONTAINS
|
|||
CALL errore(" fft_box_set ", " inconsistent dimensions ", 1 )
|
||||
ENDIF
|
||||
|
||||
IF( nproc > size( desc%npp ) ) &
|
||||
IF( desc%nproc > size( desc%npp ) ) &
|
||||
CALL errore(" fft_box_set ", " inconsistent dimensions ", 2 )
|
||||
|
||||
IF( desc%nproc /= nproc ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong number of processor ', 4 )
|
||||
|
||||
IF( desc%myid /= (me - 1) ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong processor index ', 4 )
|
||||
|
||||
IF( desc%comm /= comm ) &
|
||||
CALL errore( ' fft_dlay_set ', ' wrong communicator ', 4 )
|
||||
|
||||
|
||||
desc%nr1 = nr1b
|
||||
desc%nr2 = nr2b
|
||||
desc%nr3 = nr3b
|
||||
|
@ -504,10 +494,10 @@ CONTAINS
|
|||
desc%nr3x = nr3bx
|
||||
|
||||
desc%irb( 1:3, 1:nat ) = irb( 1:3, 1:nat )
|
||||
desc%npp( 1:nproc ) = npp( 1:nproc )
|
||||
desc%ipp( 1:nproc ) = ipp( 1:nproc )
|
||||
desc%npp( 1:desc%nproc ) = npp( 1:desc%nproc )
|
||||
desc%ipp( 1:desc%nproc ) = ipp( 1:desc%nproc )
|
||||
|
||||
nr3 = sum( npp( 1:nproc ) )
|
||||
nr3 = sum( npp( 1:desc%nproc ) )
|
||||
|
||||
DO isa = 1, nat
|
||||
|
||||
|
@ -519,8 +509,8 @@ CONTAINS
|
|||
ibig3 = 1 + mod( irb3 + ir3 - 2, nr3 )
|
||||
IF( ibig3 < 1 .or. ibig3 > nr3 ) &
|
||||
& CALL errore(' fft_box_set ',' ibig3 wrong ', ibig3 )
|
||||
ibig3 = ibig3 - ipp( me )
|
||||
IF ( ibig3 > 0 .and. ibig3 <= npp(me) ) THEN
|
||||
ibig3 = ibig3 - ipp( desc%mype + 1 )
|
||||
IF ( ibig3 > 0 .and. ibig3 <= npp(desc%mype + 1) ) THEN
|
||||
imin3 = min( imin3, ir3 )
|
||||
imax3 = max( imax3, ir3 )
|
||||
ENDIF
|
||||
|
@ -583,8 +573,9 @@ CONTAINS
|
|||
desc%nnp = nr1x * nr2x
|
||||
desc%npp = nr3
|
||||
desc%ipp = 0
|
||||
desc%have_task_groups = .false.
|
||||
desc%tg_nnr = desc%nnr
|
||||
!
|
||||
desc%have_task_groups = .false.
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE fft_dlay_scalar
|
||||
|
|
|
@ -74,15 +74,7 @@ MODULE mp_global
|
|||
!
|
||||
! ... "task" groups (for band parallelization of FFT)
|
||||
!
|
||||
INTEGER :: nogrp = 1 ! number of proc. in an orbital "task group"
|
||||
INTEGER :: npgrp = 1 ! number of proc. in a plane-wave "task group"
|
||||
INTEGER :: me_pgrp = 0 ! task id for plane wave task group
|
||||
INTEGER, ALLOCATABLE :: nolist(:) ! list of procs in my orbital task group
|
||||
INTEGER, ALLOCATABLE :: nplist(:) ! list of procs in my plane wave task group
|
||||
INTEGER :: pgrp_comm = 0 ! plane-wave group communicator
|
||||
INTEGER :: ogrp_comm = 0 ! orbital group communicator
|
||||
LOGICAL :: &
|
||||
use_task_groups = .FALSE. ! if TRUE task groups parallelization is used
|
||||
INTEGER :: ntask_groups = 1 ! number of proc. in an orbital "task group"
|
||||
!
|
||||
! ... Misc parallelization info
|
||||
!
|
||||
|
@ -93,6 +85,7 @@ MODULE mp_global
|
|||
INTEGER :: nproc_pool_file = 1 ! in a pool
|
||||
!
|
||||
PRIVATE :: init_images, init_pools, init_bands, init_ortho
|
||||
PRIVATE :: ntask_groups
|
||||
!
|
||||
CONTAINS
|
||||
!
|
||||
|
@ -112,7 +105,7 @@ CONTAINS
|
|||
! ... NPOOL must be a whole divisor of NPROC
|
||||
!
|
||||
IMPLICIT NONE
|
||||
INTEGER :: world, ntask_groups, nproc_ortho_in, meta_ionode_id
|
||||
INTEGER :: world, nproc_ortho_in, meta_ionode_id
|
||||
INTEGER :: root = 0
|
||||
LOGICAL :: meta_ionode
|
||||
!
|
||||
|
@ -193,12 +186,6 @@ CONTAINS
|
|||
!
|
||||
CALL init_ortho( nproc_ortho_in )
|
||||
!
|
||||
use_task_groups = ( ntask_groups > 1 )
|
||||
IF( use_task_groups ) THEN
|
||||
nogrp = ntask_groups
|
||||
CALL init_task_groups( )
|
||||
END IF
|
||||
!
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
@ -225,7 +212,6 @@ CONTAINS
|
|||
me_pool = mpime
|
||||
me_image = mpime
|
||||
me_bgrp = mpime
|
||||
me_pgrp = me_pool
|
||||
root_pool = root
|
||||
root_image = root
|
||||
root_bgrp = root
|
||||
|
@ -236,10 +222,6 @@ CONTAINS
|
|||
inter_bgrp_comm = group_i
|
||||
intra_bgrp_comm = group_i
|
||||
ortho_comm = group_i
|
||||
ALLOCATE( nolist( nproc_i ) )
|
||||
ALLOCATE( nplist( nproc_i ) )
|
||||
nolist = 0
|
||||
nplist = 0
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
@ -251,8 +233,6 @@ CONTAINS
|
|||
!
|
||||
CALL mp_barrier()
|
||||
CALL mp_end ()
|
||||
IF (ALLOCATED (nolist) ) DEALLOCATE ( nolist )
|
||||
IF (ALLOCATED (nplist) ) DEALLOCATE ( nplist )
|
||||
!
|
||||
END SUBROUTINE mp_global_end
|
||||
!
|
||||
|
@ -468,87 +448,6 @@ CONTAINS
|
|||
END SUBROUTINE init_ortho
|
||||
!
|
||||
!
|
||||
SUBROUTINE init_task_groups( )
|
||||
!
|
||||
INTEGER :: i, n1, ipos, color, key, ierr, itsk, ntsk
|
||||
INTEGER :: pgroup( nproc_pool )
|
||||
!
|
||||
!SUBDIVIDE THE PROCESSORS IN GROUPS
|
||||
!
|
||||
!THE NUMBER OF GROUPS HAS TO BE A DIVISOR OF THE NUMBER
|
||||
!OF PROCESSORS
|
||||
!
|
||||
IF( MOD( nproc_pool, nogrp ) /= 0 ) &
|
||||
CALL errore( " init_task_groups ", "the number of task groups should be a divisor of nproc_pool ", 1 )
|
||||
!
|
||||
npgrp = nproc_pool / nogrp
|
||||
|
||||
DO i = 1, nproc_pool
|
||||
pgroup( i ) = i - 1
|
||||
ENDDO
|
||||
!
|
||||
!LIST OF PROCESSORS IN MY ORBITAL GROUP
|
||||
!
|
||||
! processors in these group have contiguous indexes
|
||||
!
|
||||
N1 = ( me_pool / NOGRP ) * NOGRP - 1
|
||||
DO i = 1, nogrp
|
||||
nolist( I ) = pgroup( N1 + I + 1 )
|
||||
IF( me_pool == nolist( I ) ) ipos = i - 1
|
||||
ENDDO
|
||||
!
|
||||
!LIST OF PROCESSORS IN MY PLANE WAVE GROUP
|
||||
!
|
||||
DO I = 1, npgrp
|
||||
nplist( I ) = pgroup( ipos + ( i - 1 ) * nogrp + 1 )
|
||||
ENDDO
|
||||
|
||||
!
|
||||
!SET UP THE GROUPS
|
||||
!
|
||||
!
|
||||
!CREATE ORBITAL GROUPS
|
||||
!
|
||||
#if defined __MPI
|
||||
color = me_pool / nogrp
|
||||
key = MOD( me_pool , nogrp )
|
||||
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, ogrp_comm, ierr )
|
||||
if( ierr /= 0 ) &
|
||||
CALL errore( ' init_task_groups ', ' creating ogrp_comm ', ABS(ierr) )
|
||||
CALL MPI_COMM_RANK( ogrp_comm, itsk, IERR )
|
||||
CALL MPI_COMM_SIZE( ogrp_comm, ntsk, IERR )
|
||||
IF( nogrp /= ntsk ) CALL errore( ' init_task_groups ', ' ogrp_comm size ', ntsk )
|
||||
DO i = 1, nogrp
|
||||
IF( me_pool == nolist( i ) ) THEN
|
||||
IF( (i-1) /= itsk ) CALL errore( ' init_task_groups ', ' ogrp_comm rank ', itsk )
|
||||
END IF
|
||||
END DO
|
||||
#endif
|
||||
!
|
||||
!CREATE PLANEWAVE GROUPS
|
||||
!
|
||||
#if defined __MPI
|
||||
color = MOD( me_pool , nogrp )
|
||||
key = me_pool / nogrp
|
||||
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, pgrp_comm, ierr )
|
||||
if( ierr /= 0 ) &
|
||||
CALL errore( ' init_task_groups ', ' creating pgrp_comm ', ABS(ierr) )
|
||||
CALL MPI_COMM_RANK( pgrp_comm, itsk, IERR )
|
||||
CALL MPI_COMM_SIZE( pgrp_comm, ntsk, IERR )
|
||||
IF( npgrp /= ntsk ) CALL errore( ' init_task_groups ', ' pgrp_comm size ', ntsk )
|
||||
DO i = 1, npgrp
|
||||
IF( me_pool == nplist( i ) ) THEN
|
||||
IF( (i-1) /= itsk ) CALL errore( ' init_task_groups ', ' pgrp_comm rank ', itsk )
|
||||
END IF
|
||||
END DO
|
||||
me_pgrp = itsk
|
||||
#endif
|
||||
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE init_task_groups
|
||||
!
|
||||
!
|
||||
SUBROUTINE init_ortho_group( nproc_try_in, comm_all )
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
@ -748,4 +647,11 @@ CONTAINS
|
|||
END SUBROUTINE distribute_over_bgrp
|
||||
!
|
||||
!
|
||||
FUNCTION get_ntask_groups()
|
||||
IMPLICIT NONE
|
||||
INTEGER :: get_ntask_groups
|
||||
get_ntask_groups = ntask_groups
|
||||
RETURN
|
||||
END FUNCTION get_ntask_groups
|
||||
!
|
||||
END MODULE mp_global
|
||||
|
|
|
@ -22,10 +22,6 @@
|
|||
USE io_global, ONLY: ionode, stdout
|
||||
USE fft_types, ONLY: fft_dlay_descriptor, fft_dlay_allocate, &
|
||||
fft_dlay_set, fft_dlay_scalar
|
||||
USE mp_global, ONLY: me_pool, nproc_pool, intra_pool_comm, nogrp, use_task_groups
|
||||
USE mp_global, ONLY : me_pool, nproc_pool, intra_pool_comm
|
||||
USE mp_global, ONLY : NOGRP, NPGRP, ogrp_comm, pgrp_comm
|
||||
USE mp_global, ONLY : nolist
|
||||
|
||||
PRIVATE
|
||||
SAVE
|
||||
|
@ -37,7 +33,7 @@
|
|||
!=----------------------------------------------------------------------=
|
||||
|
||||
SUBROUTINE pstickset( gamma_only, bg, gcut, gkcut, gcuts, &
|
||||
dfftp, dffts, ngw, ngm, ngs )
|
||||
dfftp, dffts, ngw, ngm, ngs, mype, nproc, comm, nogrp_ )
|
||||
|
||||
LOGICAL, INTENT(in) :: gamma_only
|
||||
! ... bg(:,1), bg(:,2), bg(:,3) reciprocal space base vectors.
|
||||
|
@ -46,6 +42,10 @@
|
|||
TYPE(fft_dlay_descriptor), INTENT(inout) :: dfftp, dffts
|
||||
INTEGER, INTENT(out) :: ngw, ngm, ngs
|
||||
|
||||
INTEGER, INTENT(IN) :: mype, nproc, comm
|
||||
INTEGER, INTENT(IN) :: nogrp_
|
||||
|
||||
|
||||
LOGICAL :: tk
|
||||
|
||||
INTEGER :: ub(3), lb(3)
|
||||
|
@ -136,8 +136,8 @@
|
|||
! ... Fill in the stick maps, for given g-space base and cut-off
|
||||
|
||||
CALL sticks_maps( tk, ub, lb, bg(:,1), bg(:,2), bg(:,3), &
|
||||
gcut, gkcut, gcuts, st, stw, sts, me_pool, &
|
||||
nproc_pool, intra_pool_comm )
|
||||
gcut, gkcut, gcuts, st, stw, sts, mype, &
|
||||
nproc, comm )
|
||||
|
||||
! ... Now count the number of stick nst and nstw
|
||||
|
||||
|
@ -156,14 +156,14 @@
|
|||
|
||||
ALLOCATE(ist(nst,5))
|
||||
|
||||
ALLOCATE(nstp(nproc_pool))
|
||||
ALLOCATE(sstp(nproc_pool))
|
||||
ALLOCATE(nstp(nproc))
|
||||
ALLOCATE(sstp(nproc))
|
||||
|
||||
ALLOCATE(nstpw(nproc_pool))
|
||||
ALLOCATE(sstpw(nproc_pool))
|
||||
ALLOCATE(nstpw(nproc))
|
||||
ALLOCATE(sstpw(nproc))
|
||||
|
||||
ALLOCATE(nstps(nproc_pool))
|
||||
ALLOCATE(sstps(nproc_pool))
|
||||
ALLOCATE(nstps(nproc))
|
||||
ALLOCATE(sstps(nproc))
|
||||
|
||||
! ... initialize the sticks indexes array ist
|
||||
|
||||
|
@ -174,7 +174,7 @@
|
|||
|
||||
ALLOCATE( idx( nst ) )
|
||||
|
||||
CALL sticks_sort( ist(:,4), ist(:,3), ist(:,5), nst, idx, nproc_pool )
|
||||
CALL sticks_sort( ist(:,4), ist(:,3), ist(:,5), nst, idx, nproc )
|
||||
|
||||
! ... Set as first stick the stick containing the G=0
|
||||
!
|
||||
|
@ -186,26 +186,26 @@
|
|||
! idx( iss ) = itmp
|
||||
|
||||
CALL sticks_dist( tk, ub, lb, idx, ist(:,1), ist(:,2), ist(:,4), ist(:,3), ist(:,5), &
|
||||
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc_pool )
|
||||
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc )
|
||||
|
||||
ngw = sstpw( me_pool + 1 )
|
||||
ngm = sstp( me_pool + 1 )
|
||||
ngs = sstps( me_pool + 1 )
|
||||
ngw = sstpw( mype + 1 )
|
||||
ngm = sstp( mype + 1 )
|
||||
ngs = sstps( mype + 1 )
|
||||
|
||||
CALL sticks_pairup( tk, ub, lb, idx, ist(:,1), ist(:,2), ist(:,4), ist(:,3), ist(:,5), &
|
||||
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc_pool )
|
||||
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc )
|
||||
|
||||
! ... Allocate and Set fft data layout descriptors
|
||||
|
||||
#if defined __PARA
|
||||
|
||||
CALL fft_dlay_allocate( dfftp, me_pool, nproc_pool, intra_pool_comm, nr1x, nr2x )
|
||||
CALL fft_dlay_allocate( dffts, me_pool, nproc_pool, intra_pool_comm, nr1sx, nr2sx )
|
||||
CALL fft_dlay_allocate( dfftp, mype, nproc, comm, nogrp_ , nr1x, nr2x )
|
||||
CALL fft_dlay_allocate( dffts, mype, nproc, comm, nogrp_ , nr1sx, nr2sx )
|
||||
|
||||
CALL fft_dlay_set( dfftp, tk, nst, nr1, nr2, nr3, nr1x, nr2x, nr3x, (me_pool+1), &
|
||||
nproc_pool, intra_pool_comm, nogrp, ub, lb, idx, ist(:,1), ist(:,2), nstp, nstpw, sstp, sstpw, st, stw )
|
||||
CALL fft_dlay_set( dffts, tk, nsts, nr1s, nr2s, nr3s, nr1sx, nr2sx, nr3sx, (me_pool+1), &
|
||||
nproc_pool, intra_pool_comm, nogrp, ub, lb, idx, ist(:,1), ist(:,2), nstps, nstpw, sstps, sstpw, sts, stw )
|
||||
CALL fft_dlay_set( dfftp, tk, nst, nr1, nr2, nr3, nr1x, nr2x, nr3x, &
|
||||
ub, lb, idx, ist(:,1), ist(:,2), nstp, nstpw, sstp, sstpw, st, stw )
|
||||
CALL fft_dlay_set( dffts, tk, nsts, nr1s, nr2s, nr3s, nr1sx, nr2sx, nr3sx, &
|
||||
ub, lb, idx, ist(:,1), ist(:,2), nstps, nstpw, sstps, sstpw, sts, stw )
|
||||
|
||||
#else
|
||||
|
||||
|
@ -218,8 +218,8 @@
|
|||
IF( ngm_ /= ngm ) CALL errore( ' pstickset ', ' inconsistent ngm ', abs( ngm - ngm_ ) )
|
||||
IF( ngs_ /= ngs ) CALL errore( ' pstickset ', ' inconsistent ngs ', abs( ngs - ngs_ ) )
|
||||
|
||||
CALL fft_dlay_allocate( dfftp, me_pool, nproc_pool, intra_pool_comm, max(nr1x, nr3x), nr2x )
|
||||
CALL fft_dlay_allocate( dffts, me_pool, nproc_pool, intra_pool_comm, max(nr1sx, nr3sx), nr2sx )
|
||||
CALL fft_dlay_allocate( dfftp, mype, nproc, comm, max(nr1x, nr3x), nr2x )
|
||||
CALL fft_dlay_allocate( dffts, mype, nproc, comm, max(nr1sx, nr3sx), nr2sx )
|
||||
|
||||
CALL fft_dlay_scalar( dfftp, ub, lb, nr1, nr2, nr3, nr1x, nr2x, nr3x, stw )
|
||||
CALL fft_dlay_scalar( dffts, ub, lb, nr1s, nr2s, nr3s, nr1sx, nr2sx, nr3sx, stw )
|
||||
|
@ -234,7 +234,7 @@
|
|||
! ... Maximum number of sticks (wave func.)
|
||||
nstpwx = maxval( nstpw )
|
||||
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
! Initialize task groups.
|
||||
! Note that this call modify dffts adding task group data.
|
||||
|
@ -247,7 +247,7 @@
|
|||
118 FORMAT(3X,' n.st n.stw n.sts n.g n.gw n.gs')
|
||||
WRITE( stdout,121) minval(nstp), minval(nstpw), minval(nstps), minval(sstp), minval(sstpw), minval(sstps)
|
||||
WRITE( stdout,122) maxval(nstp), maxval(nstpw), maxval(nstps), maxval(sstp), maxval(sstpw), maxval(sstps)
|
||||
! DO ip = 1, nproc_pool
|
||||
! DO ip = 1, nproc
|
||||
! IF (ionode) THEN
|
||||
! WRITE( stdout,120) ip, nstp(ip), nstpw(ip), nstps(ip), sstp(ip), sstpw(ip), sstps(ip)
|
||||
! END IF
|
||||
|
@ -304,20 +304,21 @@ SUBROUTINE task_groups_init( dffts )
|
|||
INTEGER :: I
|
||||
INTEGER :: IERR
|
||||
INTEGER :: num_planes, num_sticks
|
||||
INTEGER :: nnrsx_vec ( nproc_pool )
|
||||
INTEGER :: pgroup( nproc_pool )
|
||||
INTEGER :: nnrsx_vec ( dffts%nproc )
|
||||
INTEGER :: pgroup( dffts%nproc )
|
||||
INTEGER :: strd
|
||||
|
||||
CALL task_groups_init_first( dffts )
|
||||
!
|
||||
IF ( nogrp > 1 ) WRITE( stdout, 100 ) nogrp, npgrp
|
||||
IF ( dffts%nogrp > 1 ) WRITE( stdout, 100 ) dffts%nogrp, dffts%npgrp
|
||||
|
||||
100 FORMAT( /,3X,'Task Groups are in USE',/,3X,'groups and procs/group : ',I5,I5 )
|
||||
|
||||
!Find maximum chunk of local data concerning coefficients of eigenfunctions in g-space
|
||||
|
||||
#if defined __MPI
|
||||
CALL MPI_Allgather( dffts%nnr, 1, MPI_INTEGER, nnrsx_vec, 1, MPI_INTEGER, intra_pool_comm, IERR)
|
||||
strd = maxval( nnrsx_vec( 1:nproc_pool ) )
|
||||
CALL MPI_Allgather( dffts%nnr, 1, MPI_INTEGER, nnrsx_vec, 1, MPI_INTEGER, dffts%comm, IERR)
|
||||
strd = maxval( nnrsx_vec( 1:dffts%nproc ) )
|
||||
#else
|
||||
strd = dffts%nnr
|
||||
#endif
|
||||
|
@ -334,60 +335,131 @@ SUBROUTINE task_groups_init( dffts )
|
|||
!we choose to do the latter one.
|
||||
!-------------------------------------------------------------------------------------
|
||||
!
|
||||
dffts%nogrp = nogrp
|
||||
dffts%npgrp = npgrp
|
||||
dffts%ogrp_comm = ogrp_comm
|
||||
dffts%pgrp_comm = pgrp_comm
|
||||
!
|
||||
ALLOCATE( dffts%tg_nsw(nproc_pool))
|
||||
ALLOCATE( dffts%tg_npp(nproc_pool))
|
||||
ALLOCATE( dffts%nolist(nogrp))
|
||||
ALLOCATE( dffts%tg_nsw(dffts%nproc))
|
||||
ALLOCATE( dffts%tg_npp(dffts%nproc))
|
||||
|
||||
num_sticks = 0
|
||||
num_planes = 0
|
||||
DO i = 1, nogrp
|
||||
dffts%nolist( i ) = nolist( i )
|
||||
num_sticks = num_sticks + dffts%nsw( nolist(i) + 1 )
|
||||
num_planes = num_planes + dffts%npp( nolist(i) + 1 )
|
||||
DO i = 1, dffts%nogrp
|
||||
num_sticks = num_sticks + dffts%nsw( dffts%nolist(i) + 1 )
|
||||
num_planes = num_planes + dffts%npp( dffts%nolist(i) + 1 )
|
||||
ENDDO
|
||||
|
||||
#if defined __MPI
|
||||
CALL MPI_ALLGATHER(num_sticks, 1, MPI_INTEGER, dffts%tg_nsw(1), 1, MPI_INTEGER, intra_pool_comm, IERR)
|
||||
CALL MPI_ALLGATHER(num_planes, 1, MPI_INTEGER, dffts%tg_npp(1), 1, MPI_INTEGER, intra_pool_comm, IERR)
|
||||
CALL MPI_ALLGATHER(num_sticks, 1, MPI_INTEGER, dffts%tg_nsw(1), 1, MPI_INTEGER, dffts%comm, IERR)
|
||||
CALL MPI_ALLGATHER(num_planes, 1, MPI_INTEGER, dffts%tg_npp(1), 1, MPI_INTEGER, dffts%comm, IERR)
|
||||
#else
|
||||
dffts%tg_nsw(1) = num_sticks
|
||||
dffts%tg_npp(1) = num_planes
|
||||
#endif
|
||||
|
||||
ALLOCATE( dffts%tg_snd( nogrp ) )
|
||||
ALLOCATE( dffts%tg_rcv( nogrp ) )
|
||||
ALLOCATE( dffts%tg_psdsp( nogrp ) )
|
||||
ALLOCATE( dffts%tg_usdsp( nogrp ) )
|
||||
ALLOCATE( dffts%tg_rdsp( nogrp ) )
|
||||
ALLOCATE( dffts%tg_snd( dffts%nogrp ) )
|
||||
ALLOCATE( dffts%tg_rcv( dffts%nogrp ) )
|
||||
ALLOCATE( dffts%tg_psdsp( dffts%nogrp ) )
|
||||
ALLOCATE( dffts%tg_usdsp( dffts%nogrp ) )
|
||||
ALLOCATE( dffts%tg_rdsp( dffts%nogrp ) )
|
||||
|
||||
dffts%tg_snd(1) = dffts%nr3x * dffts%nsw( me_pool + 1 )
|
||||
IF( dffts%nr3x * dffts%nsw( me_pool + 1 ) > dffts%tg_nnr ) THEN
|
||||
dffts%tg_snd(1) = dffts%nr3x * dffts%nsw( dffts%mype + 1 )
|
||||
IF( dffts%nr3x * dffts%nsw( dffts%mype + 1 ) > dffts%tg_nnr ) THEN
|
||||
CALL errore( ' task_groups_init ', ' inconsistent dffts%tg_nnr ', 1 )
|
||||
ENDIF
|
||||
dffts%tg_psdsp(1) = 0
|
||||
dffts%tg_usdsp(1) = 0
|
||||
dffts%tg_rcv(1) = dffts%nr3x * dffts%nsw( nolist(1) + 1 )
|
||||
dffts%tg_rcv(1) = dffts%nr3x * dffts%nsw( dffts%nolist(1) + 1 )
|
||||
dffts%tg_rdsp(1) = 0
|
||||
DO i = 2, nogrp
|
||||
dffts%tg_snd(i) = dffts%nr3x * dffts%nsw( me_pool + 1 )
|
||||
DO i = 2, dffts%nogrp
|
||||
dffts%tg_snd(i) = dffts%nr3x * dffts%nsw( dffts%mype + 1 )
|
||||
dffts%tg_psdsp(i) = dffts%tg_psdsp(i-1) + dffts%tg_nnr
|
||||
dffts%tg_usdsp(i) = dffts%tg_usdsp(i-1) + dffts%tg_snd(i-1)
|
||||
dffts%tg_rcv(i) = dffts%nr3x * dffts%nsw( nolist(i) + 1 )
|
||||
dffts%tg_rcv(i) = dffts%nr3x * dffts%nsw( dffts%nolist(i) + 1 )
|
||||
dffts%tg_rdsp(i) = dffts%tg_rdsp(i-1) + dffts%tg_rcv(i-1)
|
||||
ENDDO
|
||||
|
||||
dffts%have_task_groups = .true.
|
||||
|
||||
RETURN
|
||||
|
||||
END SUBROUTINE task_groups_init
|
||||
|
||||
|
||||
!
|
||||
SUBROUTINE task_groups_init_first( dffts )
|
||||
|
||||
USE parallel_include
|
||||
!
|
||||
USE fft_types, ONLY : fft_dlay_descriptor
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
TYPE(fft_dlay_descriptor), INTENT(inout) :: dffts
|
||||
!
|
||||
INTEGER :: i, n1, ipos, color, key, ierr, itsk, ntsk
|
||||
INTEGER :: pgroup( dffts%nproc )
|
||||
!
|
||||
!SUBDIVIDE THE PROCESSORS IN GROUPS
|
||||
!
|
||||
DO i = 1, dffts%nproc
|
||||
pgroup( i ) = i - 1
|
||||
ENDDO
|
||||
!
|
||||
!LIST OF PROCESSORS IN MY ORBITAL GROUP
|
||||
!
|
||||
! processors in these group have contiguous indexes
|
||||
!
|
||||
n1 = ( dffts%mype / dffts%nogrp ) * dffts%nogrp - 1
|
||||
DO i = 1, dffts%nogrp
|
||||
dffts%nolist( i ) = pgroup( n1 + i + 1 )
|
||||
IF( dffts%mype == dffts%nolist( i ) ) ipos = i - 1
|
||||
ENDDO
|
||||
!
|
||||
!LIST OF PROCESSORS IN MY PLANE WAVE GROUP
|
||||
!
|
||||
DO I = 1, dffts%npgrp
|
||||
dffts%nplist( i ) = pgroup( ipos + ( i - 1 ) * dffts%nogrp + 1 )
|
||||
ENDDO
|
||||
!
|
||||
!SET UP THE GROUPS
|
||||
!
|
||||
!
|
||||
!CREATE ORBITAL GROUPS
|
||||
!
|
||||
#if defined __MPI
|
||||
color = dffts%mype / dffts%nogrp
|
||||
key = MOD( dffts%mype , dffts%nogrp )
|
||||
CALL MPI_COMM_SPLIT( dffts%comm, color, key, dffts%ogrp_comm, ierr )
|
||||
if( ierr /= 0 ) &
|
||||
CALL errore( ' init_task_groups ', ' creating ogrp_comm ', ABS(ierr) )
|
||||
CALL MPI_COMM_RANK( dffts%ogrp_comm, itsk, IERR )
|
||||
CALL MPI_COMM_SIZE( dffts%ogrp_comm, ntsk, IERR )
|
||||
IF( dffts%nogrp /= ntsk ) CALL errore( ' init_task_groups ', ' ogrp_comm size ', ntsk )
|
||||
DO i = 1, dffts%nogrp
|
||||
IF( dffts%mype == dffts%nolist( i ) ) THEN
|
||||
IF( (i-1) /= itsk ) CALL errore( ' init_task_groups ', ' ogrp_comm rank ', itsk )
|
||||
END IF
|
||||
END DO
|
||||
#endif
|
||||
!
|
||||
!CREATE PLANEWAVE GROUPS
|
||||
!
|
||||
#if defined __MPI
|
||||
color = MOD( dffts%mype , dffts%nogrp )
|
||||
key = dffts%mype / dffts%nogrp
|
||||
CALL MPI_COMM_SPLIT( dffts%comm, color, key, dffts%pgrp_comm, ierr )
|
||||
if( ierr /= 0 ) &
|
||||
CALL errore( ' init_task_groups ', ' creating pgrp_comm ', ABS(ierr) )
|
||||
CALL MPI_COMM_RANK( dffts%pgrp_comm, itsk, IERR )
|
||||
CALL MPI_COMM_SIZE( dffts%pgrp_comm, ntsk, IERR )
|
||||
IF( dffts%npgrp /= ntsk ) CALL errore( ' init_task_groups ', ' pgrp_comm size ', ntsk )
|
||||
DO i = 1, dffts%npgrp
|
||||
IF( dffts%mype == dffts%nplist( i ) ) THEN
|
||||
IF( (i-1) /= itsk ) CALL errore( ' init_task_groups ', ' pgrp_comm rank ', itsk )
|
||||
END IF
|
||||
END DO
|
||||
dffts%me_pgrp = itsk
|
||||
#endif
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE task_groups_init_first
|
||||
!
|
||||
!=----------------------------------------------------------------------=
|
||||
END MODULE stick_set
|
||||
!=----------------------------------------------------------------------=
|
||||
|
|
|
@ -16,7 +16,8 @@ SUBROUTINE data_structure( gamma_only )
|
|||
USE kinds, ONLY : DP
|
||||
USE io_global, ONLY : stdout
|
||||
USE mp, ONLY : mp_max
|
||||
USE mp_global, ONLY : inter_pool_comm, intra_pool_comm
|
||||
USE mp_global, ONLY : me_pool, nproc_pool, inter_pool_comm, intra_pool_comm
|
||||
USE mp_global, ONLY : get_ntask_groups
|
||||
USE fft_base, ONLY : dfftp, dffts
|
||||
USE cell_base, ONLY : bg, tpiba
|
||||
USE klist, ONLY : xk, nks
|
||||
|
@ -29,7 +30,7 @@ SUBROUTINE data_structure( gamma_only )
|
|||
IMPLICIT NONE
|
||||
LOGICAL, INTENT(in) :: gamma_only
|
||||
REAL (DP) :: gkcut
|
||||
INTEGER :: ik, ngm_, ngs_, ngw_
|
||||
INTEGER :: ik, ngm_, ngs_, ngw_ , nogrp
|
||||
!
|
||||
! ... calculate gkcut = max |k+G|^2, in (2pi/a)^2 units
|
||||
!
|
||||
|
@ -56,8 +57,11 @@ SUBROUTINE data_structure( gamma_only )
|
|||
!
|
||||
! ... set up fft descriptors, including parallel stuff: sticks, planes, etc.
|
||||
!
|
||||
nogrp = get_ntask_groups()
|
||||
!
|
||||
CALL pstickset( gamma_only, bg, gcutm, gkcut, gcutms, &
|
||||
dfftp, dffts, ngw_ , ngm_ , ngs_ )
|
||||
dfftp, dffts, ngw_ , ngm_ , ngs_ , me_pool, nproc_pool, intra_pool_comm, &
|
||||
nogrp )
|
||||
!
|
||||
! on output, ngm_ and ngs_ contain the local number of G-vectors
|
||||
! for the two grids. Initialize local and global number of G-vectors
|
||||
|
|
|
@ -36,7 +36,7 @@ SUBROUTINE h_psi( lda, n, m, psi, hpsi )
|
|||
USE realus, ONLY : real_space, fft_orbital_gamma, initialisation_level, &
|
||||
bfft_orbital_gamma, calbec_rs_gamma, &
|
||||
add_vuspsir_gamma, v_loc_psir
|
||||
USE mp_global,ONLY : nogrp, use_task_groups
|
||||
USE fft_base, ONLY : dffts
|
||||
#ifdef EXX
|
||||
USE exx, ONLY : vexx
|
||||
USE funct, ONLY : exx_is_active
|
||||
|
@ -74,8 +74,8 @@ SUBROUTINE h_psi( lda, n, m, psi, hpsi )
|
|||
CALL start_clock( 'h_psi:vloc' )
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
IF (( use_task_groups ) .AND. ( m >= nogrp )) then
|
||||
incr = 2 * nogrp
|
||||
IF ( dffts%have_task_groups .AND. ( m >= dffts%nogrp )) then
|
||||
incr = 2 * dffts%nogrp
|
||||
else
|
||||
incr = 2
|
||||
endif
|
||||
|
|
|
@ -614,9 +614,9 @@ h_epsi_her_set.o : buffers.o
|
|||
h_epsi_her_set.o : pwcom.o
|
||||
h_epsi_her_set.o : scf_mod.o
|
||||
h_psi.o : ../Modules/control_flags.o
|
||||
h_psi.o : ../Modules/fft_base.o
|
||||
h_psi.o : ../Modules/funct.o
|
||||
h_psi.o : ../Modules/kind.o
|
||||
h_psi.o : ../Modules/mp_global.o
|
||||
h_psi.o : ../Modules/recvec.o
|
||||
h_psi.o : ../Modules/uspp.o
|
||||
h_psi.o : becmod.o
|
||||
|
|
|
@ -14,7 +14,7 @@ SUBROUTINE print_clock_pw()
|
|||
!
|
||||
USE io_global, ONLY : stdout
|
||||
USE control_flags, ONLY : isolve, iverbosity, gamma_only
|
||||
USE mp_global, ONLY : mpime, root, use_task_groups
|
||||
USE mp_global, ONLY : mpime, root
|
||||
USE paw_variables, ONLY : okpaw
|
||||
USE realus, ONLY : real_space
|
||||
!
|
||||
|
@ -161,7 +161,7 @@ SUBROUTINE print_clock_pw()
|
|||
!
|
||||
CALL print_clock( 'reduce' )
|
||||
CALL print_clock( 'fft_scatter' )
|
||||
IF( use_task_groups ) CALL print_clock( 'ALLTOALL' )
|
||||
CALL print_clock( 'ALLTOALL' )
|
||||
#endif
|
||||
!
|
||||
#ifdef EXX
|
||||
|
|
118
PW/realus.f90
118
PW/realus.f90
|
@ -115,7 +115,6 @@ MODULE realus
|
|||
USE cell_base, ONLY : tpiba2
|
||||
USE control_flags, ONLY : tqr
|
||||
USE fft_base, ONLY : dffts
|
||||
USE mp_global, ONLY : nogrp, use_task_groups
|
||||
USE wavefunctions_module, ONLY : psic
|
||||
USE io_global, ONLY : stdout
|
||||
|
||||
|
@ -133,13 +132,13 @@ MODULE realus
|
|||
ALLOCATE(npw_k(nks))
|
||||
!allocate (psic_temp(size(psic)))
|
||||
!real space, allocation for task group fft work arrays
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
IF (allocated( tg_psic ) ) DEALLOCATE( tg_psic )
|
||||
!
|
||||
ALLOCATE( tg_psic( dffts%tg_nnr * nogrp ) )
|
||||
!ALLOCATE( tg_psic_temp( dffts%tg_nnr * nogrp ) )
|
||||
ALLOCATE( tg_vrs( dffts%tg_nnr * nogrp ) )
|
||||
ALLOCATE( tg_psic( dffts%tg_nnr * dffts%nogrp ) )
|
||||
!ALLOCATE( tg_psic_temp( dffts%tg_nnr * dffts%nogrp ) )
|
||||
ALLOCATE( tg_vrs( dffts%tg_nnr * dffts%nogrp ) )
|
||||
|
||||
!
|
||||
ENDIF
|
||||
|
@ -1556,9 +1555,8 @@ MODULE realus
|
|||
USE ions_base, ONLY : nat, ntyp => nsp, ityp
|
||||
USE smooth_grid_dimensions,ONLY : nr1s, nr2s, nr3s
|
||||
USE uspp_param, ONLY : nh, nhm
|
||||
USE fft_base, ONLY : tg_gather
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups,intra_pool_comm
|
||||
USE fft_base, ONLY : tg_gather, dffts
|
||||
USE mp_global, ONLY : me_pool, intra_pool_comm
|
||||
USE mp, ONLY : mp_sum
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
@ -1577,7 +1575,7 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 'calbec_rs' )
|
||||
!
|
||||
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
|
||||
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
|
||||
|
||||
CALL errore( 'calbec_rs_gamma', 'task_groups not implemented', 1 )
|
||||
|
||||
|
@ -1666,9 +1664,8 @@ MODULE realus
|
|||
USE smooth_grid_dimensions,ONLY : nr1s, nr2s, nr3s
|
||||
USE uspp_param, ONLY : nh, nhm
|
||||
USE becmod, ONLY : bec_type, becp
|
||||
USE fft_base, ONLY : tg_gather
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups
|
||||
USE fft_base, ONLY : tg_gather, dffts
|
||||
USE mp_global, ONLY : me_pool
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -1685,7 +1682,7 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 'calbec_rs' )
|
||||
!
|
||||
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
|
||||
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
|
||||
|
||||
CALL errore( 'calbec_rs_k', 'task_groups not implemented', 1 )
|
||||
|
||||
|
@ -1752,9 +1749,8 @@ MODULE realus
|
|||
USE lsda_mod, ONLY : current_spin
|
||||
USE uspp, ONLY : qq
|
||||
USE becmod, ONLY : bec_type, becp
|
||||
USE fft_base, ONLY : tg_gather
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups
|
||||
USE fft_base, ONLY : tg_gather, dffts
|
||||
USE mp_global, ONLY : me_pool
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -1770,7 +1766,7 @@ MODULE realus
|
|||
|
||||
|
||||
CALL start_clock( 's_psir' )
|
||||
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
|
||||
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
|
||||
|
||||
CALL errore( 's_psir_gamma', 'task_groups not implemented', 1 )
|
||||
|
||||
|
@ -1853,9 +1849,8 @@ MODULE realus
|
|||
USE lsda_mod, ONLY : current_spin
|
||||
USE uspp, ONLY : qq
|
||||
USE becmod, ONLY : bec_type, becp
|
||||
USE fft_base, ONLY : tg_gather
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups
|
||||
USE fft_base, ONLY : tg_gather, dffts
|
||||
USE mp_global, ONLY : me_pool
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -1872,7 +1867,7 @@ MODULE realus
|
|||
|
||||
|
||||
CALL start_clock( 's_psir' )
|
||||
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
|
||||
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
|
||||
|
||||
CALL errore( 's_psir_k', 'task_groups not implemented', 1 )
|
||||
|
||||
|
@ -1956,9 +1951,8 @@ MODULE realus
|
|||
USE lsda_mod, ONLY : current_spin
|
||||
USE uspp, ONLY : deeq
|
||||
USE becmod, ONLY : bec_type, becp
|
||||
USE fft_base, ONLY : tg_gather
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups
|
||||
USE fft_base, ONLY : tg_gather, dffts
|
||||
USE mp_global, ONLY : me_pool
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -1972,7 +1966,7 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 'add_vuspsir' )
|
||||
|
||||
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
|
||||
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
|
||||
|
||||
CALL errore( 'add_vuspsir_gamma', 'task_groups not implemented', 1 )
|
||||
|
||||
|
@ -2060,9 +2054,8 @@ MODULE realus
|
|||
USE lsda_mod, ONLY : current_spin
|
||||
USE uspp, ONLY : deeq
|
||||
USE becmod, ONLY : bec_type, becp
|
||||
USE fft_base, ONLY : tg_gather
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups
|
||||
USE fft_base, ONLY : tg_gather, dffts
|
||||
USE mp_global, ONLY : me_pool
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -2078,7 +2071,7 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 'add_vuspsir' )
|
||||
|
||||
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
|
||||
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
|
||||
|
||||
CALL errore( 'add_vuspsir_k', 'task_groups not implemented', 1 )
|
||||
|
||||
|
@ -2157,8 +2150,7 @@ MODULE realus
|
|||
USE kinds, ONLY : DP
|
||||
USE fft_base, ONLY : dffts, tg_gather
|
||||
USE fft_interfaces,ONLY : invfft
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups
|
||||
USE mp_global, ONLY : me_pool
|
||||
|
||||
IMPLICIT NONE
|
||||
|
||||
|
@ -2180,7 +2172,7 @@ MODULE realus
|
|||
|
||||
!Task groups
|
||||
!COMPLEX(DP), ALLOCATABLE :: tg_psic(:)
|
||||
INTEGER :: recv_cnt( nogrp ), recv_displ( nogrp )
|
||||
INTEGER :: recv_cnt( dffts%nogrp ), recv_displ( dffts%nogrp )
|
||||
INTEGER :: v_siz
|
||||
|
||||
!The new task group version based on vloc_psi
|
||||
|
@ -2190,16 +2182,16 @@ MODULE realus
|
|||
! The following is dirty trick to prevent usage of task groups if
|
||||
! the number of bands is smaller than the number of task groups
|
||||
!
|
||||
use_tg = use_task_groups
|
||||
use_task_groups = ( use_task_groups ) .and. ( nbnd >= nogrp )
|
||||
use_tg = dffts%have_task_groups
|
||||
dffts%have_task_groups = ( dffts%have_task_groups ) .and. ( nbnd >= dffts%nogrp )
|
||||
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
|
||||
tg_psic = (0.d0, 0.d0)
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, 2*nogrp, 2
|
||||
DO idx = 1, 2*dffts%nogrp, 2
|
||||
|
||||
IF( idx + ibnd - 1 < nbnd ) THEN
|
||||
DO j = 1, npw_k(1)
|
||||
|
@ -2225,7 +2217,7 @@ MODULE realus
|
|||
!
|
||||
IF (present(conserved)) THEN
|
||||
IF (conserved .eqv. .true.) THEN
|
||||
IF (.not. allocated(tg_psic_temp)) ALLOCATE( tg_psic_temp( dffts%tg_nnr * nogrp ) )
|
||||
IF (.not. allocated(tg_psic_temp)) ALLOCATE( tg_psic_temp( dffts%tg_nnr * dffts%nogrp ) )
|
||||
tg_psic_temp=tg_psic
|
||||
ENDIF
|
||||
ENDIF
|
||||
|
@ -2275,7 +2267,7 @@ MODULE realus
|
|||
|
||||
ENDIF
|
||||
|
||||
use_task_groups = use_tg
|
||||
dffts%have_task_groups = use_tg
|
||||
|
||||
!if (.not. allocated(psic)) CALL errore( 'fft_orbital_gamma', 'psic not allocated', 2 )
|
||||
! OLD VERSION ! Based on an algorithm found somewhere in the TDDFT codes, generalised to k points
|
||||
|
@ -2322,8 +2314,7 @@ MODULE realus
|
|||
USE kinds, ONLY : DP
|
||||
USE fft_base, ONLY : dffts, tg_gather
|
||||
USE fft_interfaces,ONLY : fwfft
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups
|
||||
USE mp_global, ONLY : me_pool
|
||||
|
||||
IMPLICIT NONE
|
||||
|
||||
|
@ -2342,20 +2333,20 @@ MODULE realus
|
|||
LOGICAL :: use_tg
|
||||
|
||||
!Task groups
|
||||
INTEGER :: recv_cnt( nogrp ), recv_displ( nogrp )
|
||||
INTEGER :: recv_cnt( dffts%nogrp ), recv_displ( dffts%nogrp )
|
||||
INTEGER :: v_siz
|
||||
!print *, "->fourier space"
|
||||
CALL start_clock( 'bfft_orbital' )
|
||||
!New task_groups versions
|
||||
use_tg = use_task_groups
|
||||
use_task_groups = ( use_task_groups ) .and. ( nbnd >= nogrp )
|
||||
IF( use_task_groups ) THEN
|
||||
use_tg = dffts%have_task_groups
|
||||
dffts%have_task_groups = ( dffts%have_task_groups ) .and. ( nbnd >= dffts%nogrp )
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
CALL fwfft ('Wave', tg_psic, dffts )
|
||||
!
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, 2*nogrp, 2
|
||||
DO idx = 1, 2*dffts%nogrp, 2
|
||||
!
|
||||
IF( idx + ibnd - 1 < nbnd ) THEN
|
||||
DO j = 1, npw_k(1)
|
||||
|
@ -2407,7 +2398,7 @@ MODULE realus
|
|||
ENDIF
|
||||
ENDIF
|
||||
ENDIF
|
||||
use_task_groups = use_tg
|
||||
dffts%have_task_groups = use_tg
|
||||
!! OLD VERSION Based on the algorithm found in lr_apply_liovillian
|
||||
!!print * ,"a"
|
||||
!CALL fwfft ('Wave', psic, dffts)
|
||||
|
@ -2460,8 +2451,7 @@ MODULE realus
|
|||
USE kinds, ONLY : DP
|
||||
USE fft_base, ONLY : dffts
|
||||
USE fft_interfaces,ONLY : invfft
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups
|
||||
USE mp_global, ONLY : me_pool
|
||||
USE wvfct, ONLY : igk
|
||||
|
||||
IMPLICIT NONE
|
||||
|
@ -2477,15 +2467,15 @@ MODULE realus
|
|||
LOGICAL :: use_tg
|
||||
|
||||
CALL start_clock( 'fft_orbital' )
|
||||
use_tg = use_task_groups
|
||||
use_task_groups = ( use_task_groups ) .and. ( nbnd >= nogrp )
|
||||
use_tg = dffts%have_task_groups
|
||||
dffts%have_task_groups = ( dffts%have_task_groups ) .and. ( nbnd >= dffts%nogrp )
|
||||
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
tg_psic = ( 0.D0, 0.D0 )
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, nogrp
|
||||
DO idx = 1, dffts%nogrp
|
||||
!
|
||||
IF( idx + ibnd - 1 <= nbnd ) THEN
|
||||
!DO j = 1, size(orbital,1)
|
||||
|
@ -2500,7 +2490,7 @@ MODULE realus
|
|||
CALL invfft ('Wave', tg_psic, dffts)
|
||||
IF (present(conserved)) THEN
|
||||
IF (conserved .eqv. .true.) THEN
|
||||
IF (.not. allocated(tg_psic_temp)) ALLOCATE( tg_psic_temp( dffts%tg_nnr * nogrp ) )
|
||||
IF (.not. allocated(tg_psic_temp)) ALLOCATE( tg_psic_temp( dffts%tg_nnr * dffts%nogrp ) )
|
||||
tg_psic_temp=tg_psic
|
||||
ENDIF
|
||||
ENDIF
|
||||
|
@ -2520,7 +2510,7 @@ MODULE realus
|
|||
ENDIF
|
||||
!
|
||||
ENDIF
|
||||
use_task_groups = use_tg
|
||||
dffts%have_task_groups = use_tg
|
||||
CALL stop_clock( 'fft_orbital' )
|
||||
END SUBROUTINE fft_orbital_k
|
||||
!--------------------------------------------------------------------------
|
||||
|
@ -2538,8 +2528,7 @@ MODULE realus
|
|||
USE kinds, ONLY : DP
|
||||
USE fft_base, ONLY : dffts
|
||||
USE fft_interfaces,ONLY : fwfft
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups
|
||||
USE mp_global, ONLY : me_pool
|
||||
USE wvfct, ONLY : igk
|
||||
|
||||
IMPLICIT NONE
|
||||
|
@ -2555,16 +2544,16 @@ MODULE realus
|
|||
LOGICAL :: use_tg
|
||||
|
||||
CALL start_clock( 'bfft_orbital' )
|
||||
use_tg = use_task_groups
|
||||
use_task_groups = ( use_task_groups ) .and. ( nbnd >= nogrp )
|
||||
use_tg = dffts%have_task_groups
|
||||
dffts%have_task_groups = ( dffts%have_task_groups ) .and. ( nbnd >= dffts%nogrp )
|
||||
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
CALL fwfft ('Wave', tg_psic, dffts)
|
||||
!
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, nogrp
|
||||
DO idx = 1, dffts%nogrp
|
||||
!
|
||||
IF( idx + ibnd - 1 <= nbnd ) THEN
|
||||
orbital (:, ibnd+idx-1) = tg_psic( nls(igk(:)) + ioff )
|
||||
|
@ -2591,7 +2580,7 @@ MODULE realus
|
|||
ENDIF
|
||||
ENDIF
|
||||
ENDIF
|
||||
use_task_groups = use_tg
|
||||
dffts%have_task_groups = use_tg
|
||||
CALL stop_clock( 'bfft_orbital' )
|
||||
END SUBROUTINE bfft_orbital_k
|
||||
!--------------------------------------------------------------------------
|
||||
|
@ -2605,8 +2594,7 @@ MODULE realus
|
|||
USE gvecs, ONLY : nls,nlsm,doublegrid
|
||||
USE kinds, ONLY : DP
|
||||
USE fft_base, ONLY : dffts, tg_gather
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
|
||||
use_task_groups
|
||||
USE mp_global, ONLY : me_pool
|
||||
USE scf, ONLY : vrs
|
||||
USE lsda_mod, ONLY : current_spin
|
||||
|
||||
|
@ -2623,11 +2611,11 @@ MODULE realus
|
|||
|
||||
!Task groups
|
||||
REAL(DP), ALLOCATABLE :: tg_v(:)
|
||||
INTEGER :: recv_cnt( nogrp ), recv_displ( nogrp )
|
||||
INTEGER :: recv_cnt( dffts%nogrp ), recv_displ( dffts%nogrp )
|
||||
INTEGER :: v_siz
|
||||
CALL start_clock( 'v_loc_psir' )
|
||||
|
||||
IF( use_task_groups .and. nbnd >= nogrp ) THEN
|
||||
IF( dffts%have_task_groups .and. nbnd >= dffts%nogrp ) THEN
|
||||
IF (ibnd == 1 ) THEN
|
||||
CALL tg_gather( dffts, vrs(:,current_spin), tg_v ) !if ibnd==1 this is a new calculation, and tg_v should be distributed.
|
||||
ENDIF
|
||||
|
|
|
@ -228,8 +228,7 @@ SUBROUTINE sum_band()
|
|||
! ... gamma version
|
||||
!
|
||||
USE becmod, ONLY : bec_type, becp, calbec
|
||||
USE mp_global, ONLY : nogrp, nolist, ogrp_comm, me_pool, &
|
||||
use_task_groups
|
||||
USE mp_global, ONLY : me_pool
|
||||
USE mp, ONLY : mp_sum
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
@ -249,28 +248,28 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
IF ( nks > 1 ) REWIND( iunigk )
|
||||
!
|
||||
use_tg = use_task_groups
|
||||
use_task_groups = ( use_task_groups ) .AND. ( nbnd >= nogrp )
|
||||
use_tg = dffts%have_task_groups
|
||||
dffts%have_task_groups = ( dffts%have_task_groups ) .AND. ( nbnd >= dffts%nogrp )
|
||||
!
|
||||
incr = 2
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
IF( dft_is_meta() ) &
|
||||
CALL errore( ' sum_band ', ' task groups with meta dft, not yet implemented ', 1 )
|
||||
!
|
||||
v_siz = dffts%tg_nnr * nogrp
|
||||
v_siz = dffts%tg_nnr * dffts%nogrp
|
||||
!
|
||||
ALLOCATE( tg_psi( v_siz ) )
|
||||
ALLOCATE( tg_rho( v_siz ) )
|
||||
!
|
||||
incr = 2 * nogrp
|
||||
incr = 2 * dffts%nogrp
|
||||
!
|
||||
END IF
|
||||
!
|
||||
k_loop: DO ik = 1, nks
|
||||
!
|
||||
IF( use_task_groups ) tg_rho = 0.0_DP
|
||||
IF( dffts%have_task_groups ) tg_rho = 0.0_DP
|
||||
IF ( lsda ) current_spin = isk(ik)
|
||||
!
|
||||
npw = ngk(ik)
|
||||
|
@ -299,14 +298,14 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
DO ibnd = 1, nbnd, incr
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
tg_psi(:) = ( 0.D0, 0.D0 )
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, 2*nogrp, 2
|
||||
DO idx = 1, 2*dffts%nogrp, 2
|
||||
!
|
||||
! ... 2*nogrp ffts at the same time
|
||||
! ... 2*dffts%nogrp ffts at the same time
|
||||
!
|
||||
IF( idx + ibnd - 1 < nbnd ) THEN
|
||||
DO j = 1, npw
|
||||
|
@ -327,14 +326,14 @@ SUBROUTINE sum_band()
|
|||
CALL invfft ('Wave', tg_psi, dffts)
|
||||
!
|
||||
! Now the first proc of the group holds the first two bands
|
||||
! of the 2*nogrp bands that we are processing at the same time,
|
||||
! of the 2*dffts%nogrp bands that we are processing at the same time,
|
||||
! the second proc. holds the third and fourth band
|
||||
! and so on
|
||||
!
|
||||
! Compute the proper factor for each band
|
||||
!
|
||||
DO idx = 1, nogrp
|
||||
IF( nolist( idx ) == me_pool ) EXIT
|
||||
DO idx = 1, dffts%nogrp
|
||||
IF( dffts%nolist( idx ) == me_pool ) EXIT
|
||||
END DO
|
||||
!
|
||||
! Remember two bands are packed in a single array :
|
||||
|
@ -437,16 +436,16 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
END DO
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
! reduce the group charge
|
||||
!
|
||||
CALL mp_sum( tg_rho, gid = ogrp_comm )
|
||||
CALL mp_sum( tg_rho, gid = dffts%ogrp_comm )
|
||||
!
|
||||
ioff = 0
|
||||
DO idx = 1, nogrp
|
||||
IF( me_pool == nolist( idx ) ) EXIT
|
||||
ioff = ioff + dffts%nr1x * dffts%nr2x * dffts%npp( nolist( idx ) + 1 )
|
||||
DO idx = 1, dffts%nogrp
|
||||
IF( me_pool == dffts%nolist( idx ) ) EXIT
|
||||
ioff = ioff + dffts%nr1x * dffts%nr2x * dffts%npp( dffts%nolist( idx ) + 1 )
|
||||
END DO
|
||||
!
|
||||
! copy the charge back to the processor location
|
||||
|
@ -538,11 +537,11 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
END DO k_loop
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
DEALLOCATE( tg_psi )
|
||||
DEALLOCATE( tg_rho )
|
||||
END IF
|
||||
use_task_groups = use_tg
|
||||
dffts%have_task_groups = use_tg
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
@ -556,8 +555,7 @@ SUBROUTINE sum_band()
|
|||
! ... k-points version
|
||||
!
|
||||
USE becmod, ONLY : bec_type, becp, calbec
|
||||
USE mp_global, ONLY : nogrp, nolist, ogrp_comm, me_pool, &
|
||||
use_task_groups
|
||||
USE mp_global, ONLY : me_pool
|
||||
USE mp, ONLY : mp_sum
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
@ -588,26 +586,26 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
IF ( nks > 1 ) REWIND( iunigk )
|
||||
!
|
||||
use_tg = use_task_groups
|
||||
use_task_groups = ( use_task_groups ) .AND. ( nbnd >= nogrp ) &
|
||||
use_tg = dffts%have_task_groups
|
||||
dffts%have_task_groups = ( dffts%have_task_groups ) .AND. ( nbnd >= dffts%nogrp ) &
|
||||
.AND. ( .NOT. noncolin ) .AND. ( .NOT. dft_is_meta() )
|
||||
!
|
||||
incr = 1
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
v_siz = dffts%tg_nnr * nogrp
|
||||
v_siz = dffts%tg_nnr * dffts%nogrp
|
||||
!
|
||||
ALLOCATE( tg_psi( v_siz ) )
|
||||
ALLOCATE( tg_rho( v_siz ) )
|
||||
!
|
||||
incr = nogrp
|
||||
incr = dffts%nogrp
|
||||
!
|
||||
END IF
|
||||
!
|
||||
k_loop: DO ik = 1, nks
|
||||
!
|
||||
IF( use_task_groups ) tg_rho = 0.0_DP
|
||||
IF( dffts%have_task_groups ) tg_rho = 0.0_DP
|
||||
|
||||
IF ( lsda ) current_spin = isk(ik)
|
||||
npw = ngk (ik)
|
||||
|
@ -626,8 +624,8 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
DO ibnd = 1, nbnd, incr
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
DO idx = 1, nogrp
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
DO idx = 1, dffts%nogrp
|
||||
IF( idx + ibnd - 1 <= nbnd ) eband = eband + et( idx + ibnd - 1, ik ) * wg( idx + ibnd - 1, ik )
|
||||
END DO
|
||||
ELSE
|
||||
|
@ -665,7 +663,7 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
ELSE
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
!$omp parallel default(shared), private(j,ioff,idx)
|
||||
!$omp do
|
||||
|
@ -676,9 +674,9 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, nogrp
|
||||
DO idx = 1, dffts%nogrp
|
||||
!
|
||||
! ... nogrp ffts at the same time
|
||||
! ... dffts%nogrp ffts at the same time
|
||||
!
|
||||
IF( idx + ibnd - 1 <= nbnd ) THEN
|
||||
!$omp do
|
||||
|
@ -696,13 +694,13 @@ SUBROUTINE sum_band()
|
|||
CALL invfft ('Wave', tg_psi, dffts)
|
||||
!
|
||||
! Now the first proc of the group holds the first band
|
||||
! of the nogrp bands that we are processing at the same time,
|
||||
! of the dffts%nogrp bands that we are processing at the same time,
|
||||
! the second proc. holds the second and so on
|
||||
!
|
||||
! Compute the proper factor for each band
|
||||
!
|
||||
DO idx = 1, nogrp
|
||||
IF( nolist( idx ) == me_pool ) EXIT
|
||||
DO idx = 1, dffts%nogrp
|
||||
IF( dffts%nolist( idx ) == me_pool ) EXIT
|
||||
END DO
|
||||
!
|
||||
! Remember
|
||||
|
@ -752,16 +750,16 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
END DO
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
! reduce the group charge
|
||||
!
|
||||
CALL mp_sum( tg_rho, gid = ogrp_comm )
|
||||
CALL mp_sum( tg_rho, gid = dffts%ogrp_comm )
|
||||
!
|
||||
ioff = 0
|
||||
DO idx = 1, nogrp
|
||||
IF( me_pool == nolist( idx ) ) EXIT
|
||||
ioff = ioff + dffts%nr1x * dffts%nr2x * dffts%npp( nolist( idx ) + 1 )
|
||||
DO idx = 1, dffts%nogrp
|
||||
IF( me_pool == dffts%nolist( idx ) ) EXIT
|
||||
ioff = ioff + dffts%nr1x * dffts%nr2x * dffts%npp( dffts%nolist( idx ) + 1 )
|
||||
END DO
|
||||
!
|
||||
! copy the charge back to the proper processor location
|
||||
|
@ -902,11 +900,11 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
END DO k_loop
|
||||
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
DEALLOCATE( tg_psi )
|
||||
DEALLOCATE( tg_rho )
|
||||
END IF
|
||||
use_task_groups = use_tg
|
||||
dffts%have_task_groups = use_tg
|
||||
|
||||
IF (noncolin.and.okvan) THEN
|
||||
DO np = 1, ntyp
|
||||
|
|
|
@ -16,7 +16,7 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
|
|||
USE kinds, ONLY : DP
|
||||
USE gvecs, ONLY : nls, nlsm
|
||||
USE wvfct, ONLY : igk
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, use_task_groups
|
||||
USE mp_global, ONLY : me_pool
|
||||
USE fft_base, ONLY : dffts, tg_gather
|
||||
USE fft_interfaces,ONLY : fwfft, invfft
|
||||
USE wavefunctions_module, ONLY: psic
|
||||
|
@ -43,19 +43,19 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
|
|||
! The following is dirty trick to prevent usage of task groups if
|
||||
! the number of bands is smaller than the number of task groups
|
||||
!
|
||||
use_tg = use_task_groups
|
||||
use_task_groups = use_task_groups .and. ( m >= nogrp )
|
||||
use_tg = dffts%have_task_groups
|
||||
dffts%have_task_groups = dffts%have_task_groups .and. ( m >= dffts%nogrp )
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
v_siz = dffts%tg_nnr * nogrp
|
||||
v_siz = dffts%tg_nnr * dffts%nogrp
|
||||
!
|
||||
ALLOCATE( tg_v ( v_siz ) )
|
||||
ALLOCATE( tg_psic( v_siz ) )
|
||||
!
|
||||
CALL tg_gather( dffts, v, tg_v )
|
||||
!
|
||||
incr = 2 * nogrp
|
||||
incr = 2 * dffts%nogrp
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
|
@ -63,12 +63,12 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
DO ibnd = 1, m, incr
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
tg_psic = (0.d0, 0.d0)
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, 2*nogrp, 2
|
||||
DO idx = 1, 2*dffts%nogrp, 2
|
||||
IF( idx + ibnd - 1 < m ) THEN
|
||||
DO j = 1, n
|
||||
tg_psic(nls (igk(j))+ioff) = psi(j,idx+ibnd-1) + &
|
||||
|
@ -109,7 +109,7 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
|
|||
! product with the potential v on the smooth grid
|
||||
! back to reciprocal space
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
CALL invfft ('Wave', tg_psic, dffts)
|
||||
!
|
||||
|
@ -133,11 +133,11 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
! addition to the total product
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, 2*nogrp, 2
|
||||
DO idx = 1, 2*dffts%nogrp, 2
|
||||
!
|
||||
IF( idx + ibnd - 1 < m ) THEN
|
||||
DO j = 1, n
|
||||
|
@ -181,13 +181,13 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
ENDDO
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
DEALLOCATE( tg_psic )
|
||||
DEALLOCATE( tg_v )
|
||||
!
|
||||
ENDIF
|
||||
use_task_groups = use_tg
|
||||
dffts%have_task_groups = use_tg
|
||||
!
|
||||
RETURN
|
||||
END SUBROUTINE vloc_psi_gamma
|
||||
|
@ -202,7 +202,7 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
|
|||
USE kinds, ONLY : DP
|
||||
USE gvecs, ONLY : nls, nlsm
|
||||
USE wvfct, ONLY : igk
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, use_task_groups
|
||||
USE mp_global, ONLY : me_pool
|
||||
USE fft_base, ONLY : dffts, tg_gather
|
||||
USE fft_interfaces,ONLY : fwfft, invfft
|
||||
USE wavefunctions_module, ONLY: psic
|
||||
|
@ -226,20 +226,20 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
|
|||
! The following is dirty trick to prevent usage of task groups if
|
||||
! the number of bands is smaller than the number of task groups
|
||||
!
|
||||
use_tg = use_task_groups
|
||||
use_task_groups = use_task_groups .and. ( m >= nogrp )
|
||||
use_tg = dffts%have_task_groups
|
||||
dffts%have_task_groups = dffts%have_task_groups .and. ( m >= dffts%nogrp )
|
||||
!
|
||||
incr = 1
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
v_siz = dffts%tg_nnr * nogrp
|
||||
v_siz = dffts%tg_nnr * dffts%nogrp
|
||||
!
|
||||
ALLOCATE( tg_v ( v_siz ) )
|
||||
ALLOCATE( tg_psic( v_siz ) )
|
||||
!
|
||||
CALL tg_gather( dffts, v, tg_v )
|
||||
incr = nogrp
|
||||
incr = dffts%nogrp
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
|
@ -247,12 +247,12 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
DO ibnd = 1, m, incr
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
tg_psic = (0.d0, 0.d0)
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, nogrp
|
||||
DO idx = 1, dffts%nogrp
|
||||
|
||||
IF( idx + ibnd - 1 <= m ) THEN
|
||||
!$omp parallel do
|
||||
|
@ -281,7 +281,7 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
|
|||
! product with the potential v on the smooth grid
|
||||
! back to reciprocal space
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
!$omp parallel do
|
||||
DO j = 1, dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_pool + 1 )
|
||||
|
@ -305,11 +305,11 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
! addition to the total product
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, nogrp
|
||||
DO idx = 1, dffts%nogrp
|
||||
!
|
||||
IF( idx + ibnd - 1 <= m ) THEN
|
||||
!$omp parallel do
|
||||
|
@ -333,13 +333,13 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
ENDDO
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
DEALLOCATE( tg_psic )
|
||||
DEALLOCATE( tg_v )
|
||||
!
|
||||
ENDIF
|
||||
use_task_groups = use_tg
|
||||
dffts%have_task_groups = use_tg
|
||||
!
|
||||
RETURN
|
||||
END SUBROUTINE vloc_psi_k
|
||||
|
@ -354,7 +354,7 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
|
|||
USE kinds, ONLY : DP
|
||||
USE gvecs, ONLY : nls, nlsm
|
||||
USE wvfct, ONLY : igk
|
||||
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, use_task_groups
|
||||
USE mp_global, ONLY : me_pool
|
||||
USE fft_base, ONLY : dffts, dfftp, tg_gather
|
||||
USE fft_interfaces,ONLY : fwfft, invfft
|
||||
USE noncollin_module, ONLY: npol
|
||||
|
@ -382,32 +382,32 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
|
|||
! The following is dirty trick to prevent usage of task groups if
|
||||
! the number of bands is smaller than the number of task groups
|
||||
!
|
||||
use_tg = use_task_groups
|
||||
use_task_groups = use_task_groups .and. ( m >= nogrp )
|
||||
use_tg = dffts%have_task_groups
|
||||
dffts%have_task_groups = dffts%have_task_groups .and. ( m >= dffts%nogrp )
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
v_siz = dffts%tg_nnr * nogrp
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
v_siz = dffts%tg_nnr * dffts%nogrp
|
||||
ALLOCATE( tg_v( v_siz, 4 ) )
|
||||
CALL tg_gather( dffts, v(:,1), tg_v(:,1) )
|
||||
CALL tg_gather( dffts, v(:,2), tg_v(:,2) )
|
||||
CALL tg_gather( dffts, v(:,3), tg_v(:,3) )
|
||||
CALL tg_gather( dffts, v(:,4), tg_v(:,4) )
|
||||
ALLOCATE( tg_psic( v_siz, npol ) )
|
||||
incr = nogrp
|
||||
incr = dffts%nogrp
|
||||
ENDIF
|
||||
!
|
||||
! the local potential V_Loc psi. First the psi in real space
|
||||
!
|
||||
DO ibnd = 1, m, incr
|
||||
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
DO ipol = 1, npol
|
||||
!
|
||||
tg_psic(:,ipol) = ( 0.D0, 0.D0 )
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, nogrp
|
||||
DO idx = 1, dffts%nogrp
|
||||
!
|
||||
IF( idx + ibnd - 1 <= m ) THEN
|
||||
DO j = 1, n
|
||||
|
@ -436,7 +436,7 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
! product with the potential v = (vltot+vr) on the smooth grid
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
DO j=1, dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_pool + 1 )
|
||||
sup = tg_psic(j,1) * (tg_v(j,1)+tg_v(j,4)) + &
|
||||
tg_psic(j,2) * (tg_v(j,2)-(0.d0,1.d0)*tg_v(j,3))
|
||||
|
@ -458,7 +458,7 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
! back to reciprocal space
|
||||
!
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
DO ipol = 1, npol
|
||||
|
||||
|
@ -466,7 +466,7 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
ioff = 0
|
||||
!
|
||||
DO idx = 1, nogrp
|
||||
DO idx = 1, dffts%nogrp
|
||||
!
|
||||
IF( idx + ibnd - 1 <= m ) THEN
|
||||
DO j = 1, n
|
||||
|
@ -499,13 +499,13 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
|
|||
|
||||
ENDDO
|
||||
|
||||
IF( use_task_groups ) THEN
|
||||
IF( dffts%have_task_groups ) THEN
|
||||
!
|
||||
DEALLOCATE( tg_v )
|
||||
DEALLOCATE( tg_psic )
|
||||
!
|
||||
ENDIF
|
||||
use_task_groups = use_tg
|
||||
dffts%have_task_groups = use_tg
|
||||
!
|
||||
RETURN
|
||||
END SUBROUTINE vloc_psi_nc
|
||||
|
|
Loading…
Reference in New Issue