- task groups variables moved inside fft_type

git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@7439 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
ccavazzoni 2011-01-23 18:12:50 +00:00
parent cf0b1a29af
commit b21befe7aa
20 changed files with 426 additions and 472 deletions

View File

@ -115,8 +115,7 @@
USE constants, ONLY: pi, fpi
USE mp, ONLY: mp_sum
USE io_global, ONLY: stdout, ionode
USE mp_global, ONLY: intra_bgrp_comm, nogrp, me_bgrp, &
use_task_groups, ogrp_comm, nolist, nbgrp, inter_bgrp_comm
USE mp_global, ONLY: intra_bgrp_comm, nbgrp, inter_bgrp_comm, me_bgrp
USE funct, ONLY: dft_is_meta
USE cg_module, ONLY: tcg
USE cp_interfaces, ONLY: stress_kin, enkin
@ -295,7 +294,7 @@
rhos(ir,iss1)=rhos(ir,iss1) + sa1*( DBLE(psis(ir)))**2
END DO
!
ELSE IF( use_task_groups ) THEN
ELSE IF( dffts%have_task_groups ) THEN
!
CALL loop_over_states_tg()
!
@ -500,13 +499,13 @@
INTEGER :: from, ii, eig_index, eig_offset
REAL(DP), ALLOCATABLE :: tmp_rhos(:,:)
ALLOCATE( psis( dffts%tg_nnr * nogrp ) )
ALLOCATE( psis( dffts%tg_nnr * dffts%nogrp ) )
!
ALLOCATE( tmp_rhos ( dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_bgrp + 1 ), nspin ) )
!
tmp_rhos = 0_DP
do i = 1, nbsp_bgrp, 2*nogrp
do i = 1, nbsp_bgrp, 2*dffts%nogrp
!
! Initialize wave-functions in Fourier space (to be FFTed)
! The size of psis is nnr: which is equal to the total number
@ -530,7 +529,7 @@
!
eig_offset = 0
do eig_index = 1, 2*nogrp, 2
do eig_index = 1, 2*dffts%nogrp, 2
!
! here we pack 2*nogrp electronic states in the psis array
!
@ -571,8 +570,8 @@
!
! Compute the proper factor for each band
!
DO ii = 1, nogrp
IF( nolist( ii ) == me_bgrp ) EXIT
DO ii = 1, dffts%nogrp
IF( dffts%nolist( ii ) == me_bgrp ) EXIT
END DO
!
! Remember two bands are packed in a single array :
@ -629,8 +628,8 @@
CALL mp_sum( tmp_rhos, inter_bgrp_comm )
END IF
IF ( nogrp > 1 ) THEN
CALL mp_sum( tmp_rhos, gid = ogrp_comm )
IF ( dffts%nogrp > 1 ) THEN
CALL mp_sum( tmp_rhos, gid = dffts%ogrp_comm )
ENDIF
!
!BRING CHARGE DENSITY BACK TO ITS ORIGINAL POSITION
@ -639,9 +638,9 @@
!orbital group then does a local copy (reshuffling) of its data
!
from = 1
DO ii = 1, nogrp
IF ( nolist( ii ) == me_bgrp ) EXIT !Exit the loop
from = from + dffts%nr1x*dffts%nr2x*dffts%npp( nolist( ii ) + 1 )! From where to copy initially
DO ii = 1, dffts%nogrp
IF ( dffts%nolist( ii ) == me_bgrp ) EXIT !Exit the loop
from = from + dffts%nr1x*dffts%nr2x*dffts%npp( dffts%nolist( ii ) + 1 )! From where to copy initially
ENDDO
!
DO ir = 1, nspin

View File

@ -2188,7 +2188,7 @@ END FUNCTION
! initialize FFT descriptor
CALL fft_box_set( dfftb, nr1b, nr2b, nr3b, nr1bx, nr2bx, nr3bx, &
nat, irb, me_bgrp+1, nproc_bgrp, intra_bgrp_comm, dfftp%npp, dfftp%ipp )
nat, irb, dfftp%npp, dfftp%ipp )
IF( iprsta > 2 ) THEN
isa = 1

View File

@ -910,7 +910,6 @@ SUBROUTINE terminate_run()
USE cp_main_variables, ONLY : acc
USE cg_module, ONLY : tcg, print_clock_tcg
USE mp, ONLY : mp_report
USE mp_global, ONLY : use_task_groups
!
IMPLICIT NONE
!
@ -951,11 +950,7 @@ SUBROUTINE terminate_run()
!
IF (tcg) call print_clock_tcg()
!
IF( use_task_groups ) THEN
!
CALL print_clock( 'ALLTOALL' )
!
END IF
CALL print_clock( 'ALLTOALL' )
!
CALL mp_report()
!

View File

@ -39,8 +39,7 @@
USE funct, ONLY: dft_is_meta
USE fft_base, ONLY: dffts
USE fft_interfaces, ONLY: fwfft, invfft
USE mp_global, ONLY: nogrp, me_bgrp, ogrp_comm, &
use_task_groups
USE mp_global, ONLY: me_bgrp
!
IMPLICIT NONE
!
@ -69,9 +68,9 @@
!
CALL start_clock( 'dforce' )
!
IF( use_task_groups ) THEN
nogrp_ = nogrp
ALLOCATE( psi( dffts%tg_nnr * nogrp ) )
IF( dffts%have_task_groups ) THEN
nogrp_ = dffts%nogrp
ALLOCATE( psi( dffts%tg_nnr * dffts%nogrp ) )
ELSE
nogrp_ = 1
ALLOCATE( psi( nrxxs ) )
@ -120,7 +119,7 @@
iss2 = iss1
END IF
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
!$omp parallel do
DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_bgrp + 1 )
@ -161,7 +160,7 @@
!$omp parallel default(none) &
!$omp private( eig_offset, igno, fi, fip, idx, fp, fm, ig ) &
!$omp shared( nogrp_ , f, ngw, psi, df, da, c, tpiba2, tens, dffts, me_bgrp, &
!$omp i, n, ggp, use_task_groups, nls, nlsm )
!$omp i, n, ggp, nls, nlsm )
eig_offset = 0
igno = 1
@ -176,7 +175,7 @@
fi = -0.5d0*f(i+idx-1)
fip = -0.5d0*f(i+idx)
endif
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!$omp do
DO ig=1,ngw
fp= psi(nls(ig)+eig_offset) + psi(nlsm(ig)+eig_offset)

View File

@ -47,7 +47,8 @@
USE electrons_module, ONLY: bmeshset
USE electrons_base, ONLY: distribute_bands
USE problem_size, ONLY: cpsizes
USE mp_global, ONLY: nproc_bgrp, nbgrp, my_bgrp_id, intra_bgrp_comm
USE mp_global, ONLY: me_bgrp, nproc_bgrp, nbgrp, my_bgrp_id, intra_bgrp_comm
USE mp_global, ONLY: get_ntask_groups
USE core, ONLY: nlcc_any
USE uspp, ONLY: okvan
@ -56,7 +57,7 @@
integer :: i
real(dp) :: rat1, rat2, rat3
real(dp) :: at(3,3), bg(3,3), tpiba2
integer :: ng_, ngs_, ngm_ , ngw_
integer :: ng_, ngs_, ngm_ , ngw_ , nogrp_
tpiba2 = ( tpi / alat ) ** 2
@ -117,9 +118,10 @@
! ... set the sticks mesh and distribute g vectors among processors
! ... pstickset lso sets the local real-space grid dimensions
!
nogrp_ = get_ntask_groups()
CALL pstickset( gamma_only, bg, gcutm, gkcut, gcutms, &
dfftp, dffts, ngw_ , ngm_ , ngs_ )
dfftp, dffts, ngw_ , ngm_ , ngs_ , me_bgrp, nproc_bgrp, intra_bgrp_comm, nogrp_ )
!
!
! ... Initialize reciprocal space local and global dimensions

View File

@ -26,8 +26,8 @@
!
USE parallel_include
USE kinds, ONLY : DP
USE mp_global, ONLY : nogrp, ogrp_comm, me_bgrp, nolist,&
use_task_groups, my_bgrp_id, nbgrp, inter_bgrp_comm
USE mp_global, ONLY : me_bgrp, &
my_bgrp_id, nbgrp, inter_bgrp_comm
USE mp, ONLY : mp_sum
USE fft_base, ONLY : dffts, tg_gather
use wave_base, only : wave_steepest, wave_verlet
@ -74,9 +74,9 @@
IF( restart ) iflag = 2
END IF
IF( use_task_groups ) THEN
tg_rhos_siz = nogrp * dffts%tg_nnr
c2_siz = nogrp * ngwx
IF( dffts%have_task_groups ) THEN
tg_rhos_siz = dffts%nogrp * dffts%tg_nnr
c2_siz = dffts%nogrp * ngwx
ELSE
tg_rhos_siz = 1
c2_siz = ngw
@ -116,7 +116,7 @@
c2 = 0D0
c3 = 0D0
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
! The potential in rhos is distributed accros all processors
! We need to redistribute it so that it is completely contained in the
@ -126,7 +126,7 @@
CALL tg_gather( dffts, rhos(:,i), tg_rhos(:,i) )
END DO
incr = 2 * nogrp
incr = 2 * dffts%nogrp
ELSE
@ -137,7 +137,7 @@
DO i = 1, nbsp_bgrp, incr
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
!The input coefficients to dforce cover eigenstates i:i+2*NOGRP-1
!Thus, in dforce the dummy arguments for c0_bgrp(1,i) and
@ -241,11 +241,8 @@
USE efield_module, ONLY : dforce_efield, tefield
USE electrons_base, ONLY : ispin, nspin, f, n=>nbsp
USE cp_interfaces, ONLY : dforce
USE mp_global, ONLY : use_task_groups
!
USE gvecw, ONLY: ngw
!
!
USE fft_base, ONLY: dffts
USE electrons_base, ONLY: nx=>nbnd, nupdwn, iupdwn, nbspx, nbsp
USE mp, ONLY: mp_sum
USE mp_global, ONLY: intra_bgrp_comm
@ -286,7 +283,7 @@
'Electric field and sic are not implemented',2)
IF( nspin == 1 ) CALL errore(' runcp_force_pairing ',' inconsistent nspin ', 1)
IF( use_task_groups ) CALL errore(' runcp_force_pairing ',' task_groups not implemented ', 1)
IF( dffts%have_task_groups ) CALL errore(' runcp_force_pairing ',' task_groups not implemented ', 1)
!
ALLOCATE( emadt2( ngw ) )
ALLOCATE( emaver( ngw ) )

View File

@ -13,7 +13,7 @@ MODULE environment
USE io_files, ONLY: crash_file, crashunit, nd_nmbr
USE io_global, ONLY: stdout, meta_ionode
USE mp_global, ONLY: me_image, my_image_id, root_image, nimage, &
nproc_image, nproc, nogrp, npool, nproc_pool, nbgrp
nproc_image, nproc, npool, nproc_pool, nbgrp, get_ntask_groups
USE global_version, ONLY: version_number
IMPLICIT NONE
@ -194,8 +194,8 @@ CONTAINS
'(5X,"K-points division: npool = ",I4)' ) npool
IF ( nproc_pool > 1 ) WRITE( stdout, &
'(5X,"R & G space division: proc/pool = ",I4)' ) nproc_pool
IF ( nogrp > 1 ) WRITE( stdout, &
'(5X,"wavefunctions fft division: fft/group = ",I4)' ) nogrp
IF ( get_ntask_groups() > 1 ) WRITE( stdout, &
'(5X,"wavefunctions fft division: fft/group = ",I4)' ) get_ntask_groups()
!
END SUBROUTINE parallel_info
!==-----------------------------------------------------------------------==!

View File

@ -55,7 +55,8 @@
! like infiniband, ethernet, myrinet
!
!-----------------------------------------------------------------------
SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg, &
me_pgrp, npgrp, pgrp_comm, nplist )
!-----------------------------------------------------------------------
!
! transpose the fft grid across nodes
@ -89,7 +90,7 @@ SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
USE parallel_include
#endif
USE mp_global, ONLY : nproc_pool, me_pool, intra_pool_comm, nproc, &
my_image_id, nogrp, pgrp_comm, nplist, me_pgrp, npgrp
my_image_id
USE kinds, ONLY : DP
IMPLICIT NONE
@ -97,6 +98,7 @@ SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
INTEGER, INTENT(in) :: nr3x, nxx_, sign, ncp_ (:), npp_ (:)
COMPLEX (DP), INTENT(inout) :: f_in (nxx_), f_aux (nxx_)
LOGICAL, OPTIONAL, INTENT(in) :: use_tg
INTEGER, OPTIONAL, INTENT(in) :: pgrp_comm, nplist(:), npgrp, me_pgrp
#ifdef __PARA
@ -426,7 +428,8 @@ END SUBROUTINE fft_scatter
! with a defined topology, like on bluegene and cray machine
!
!-----------------------------------------------------------------------
SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg, &
me_pgrp, npgrp, pgrp_comm, nplist )
!-----------------------------------------------------------------------
!
! transpose the fft grid across nodes
@ -459,8 +462,7 @@ SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
#ifdef __PARA
USE parallel_include
#endif
USE mp_global, ONLY : nproc_pool, me_pool, intra_pool_comm, nproc, &
my_image_id, nogrp, pgrp_comm, nplist
USE mp_global, ONLY : nproc_pool, me_pool, intra_pool_comm, nproc
USE kinds, ONLY : DP
IMPLICIT NONE
@ -468,6 +470,7 @@ SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
INTEGER, INTENT(in) :: nr3x, nxx_, sign, ncp_ (:), npp_ (:)
COMPLEX (DP), INTENT(inout) :: f_in (nxx_), f_aux (nxx_)
LOGICAL, OPTIONAL, INTENT(in) :: use_tg
INTEGER, OPTIONAL, INTENT(in) :: pgrp_comm, nplist(:), npgrp, me_pgrp
#ifdef __PARA
@ -491,7 +494,7 @@ SUBROUTINE fft_scatter ( f_in, nr3x, nxx_, f_aux, ncp_, npp_, sign, use_tg )
!
IF( use_tg_ ) THEN
! This is the number of procs. in the plane-wave group
nprocp = nproc_pool / nogrp
nprocp = npgrp
ELSE
nprocp = nproc_pool
ENDIF
@ -1191,7 +1194,7 @@ SUBROUTINE tg_gather( dffts, v, tg_v )
IF( size( tg_v ) < nsiz_tg ) &
CALL errore( ' tg_gather ', ' tg_v too small ', ( nsiz_tg - size( tg_v ) ) )
nsiz = dffts%npp( dffts%myid+1 ) * dffts%nr1x * dffts%nr2x
nsiz = dffts%npp( dffts%mype+1 ) * dffts%nr1x * dffts%nr2x
IF( size( v ) < nsiz ) &
CALL errore( ' tg_gather ', ' v too small ', ( nsiz - size( v ) ) )

View File

@ -83,7 +83,6 @@
use fft_scalar, only: cfft3d, cfft3ds, cft_b, cft_b_omp
use fft_parallel, only: tg_cft3s
USE fft_types, only: fft_dlay_descriptor
use mp_global, only: use_task_groups
IMPLICIT none
@ -135,7 +134,7 @@
ELSE IF( grid_type == 'Smooth' ) THEN
call tg_cft3s( f, dffts, 1 )
ELSE IF( grid_type == 'Wave' ) THEN
call tg_cft3s( f, dffts, 2, use_task_groups )
call tg_cft3s( f, dffts, 2, dffts%have_task_groups )
ELSE IF( grid_type == 'Box' .AND. dfftb%np3( ia ) > 0 ) THEN
#if defined __OPENMP && defined __FFTW
call cft_b_omp( f, dfftb%nr1, dfftb%nr2, dfftb%nr3, &
@ -200,7 +199,6 @@
use fft_scalar, only: cfft3d, cfft3ds
use fft_parallel, only: tg_cft3s
USE fft_types, only: fft_dlay_descriptor
use mp_global, only: use_task_groups
implicit none
@ -237,7 +235,7 @@
ELSE IF( grid_type == 'Smooth' ) THEN
call tg_cft3s(f,dffts,-1)
ELSE IF( grid_type == 'Wave' ) THEN
call tg_cft3s(f,dffts,-2, use_task_groups )
call tg_cft3s(f,dffts,-2, dffts%have_task_groups )
END IF
#else

View File

@ -60,8 +60,8 @@ SUBROUTINE tg_cft3s( f, dfft, isgn, use_task_groups )
USE fft_scalar, ONLY : cft_1z, cft_2xy
USE fft_base, ONLY : fft_scatter
USE kinds, ONLY : DP
USE mp_global, ONLY : me_pool, nproc_pool, ogrp_comm, npgrp, nogrp, &
intra_pool_comm, nolist, nplist
USE mp_global, ONLY : me_pool, nproc_pool, &
intra_pool_comm
USE fft_types, ONLY : fft_dlay_descriptor
USE parallel_include
@ -99,8 +99,8 @@ SUBROUTINE tg_cft3s( f, dfft, isgn, use_task_groups )
nx3 = dfft%nr3x
!
IF( use_tg ) THEN
ALLOCATE( aux( nogrp * dfft%tg_nnr ) )
ALLOCATE( YF ( nogrp * dfft%tg_nnr ) )
ALLOCATE( aux( dfft%nogrp * dfft%tg_nnr ) )
ALLOCATE( YF ( dfft%nogrp * dfft%tg_nnr ) )
ELSE
ALLOCATE( aux( dfft%tg_nnr ) )
ENDIF
@ -198,10 +198,10 @@ CONTAINS
!
IF( .not. use_tg ) RETURN
!
IF( dfft%tg_rdsp(nogrp) + dfft%tg_rcv(nogrp) > size( yf ) ) THEN
IF( dfft%tg_rdsp(dfft%nogrp) + dfft%tg_rcv(dfft%nogrp) > size( yf ) ) THEN
CALL errore( ' tg_cfft ', ' inconsistent size ', 1 )
ENDIF
IF( dfft%tg_psdsp(nogrp) + dfft%tg_snd(nogrp) > size( f ) ) THEN
IF( dfft%tg_psdsp(dfft%nogrp) + dfft%tg_snd(dfft%nogrp) > size( f ) ) THEN
CALL errore( ' tg_cfft ', ' inconsistent size ', 2 )
ENDIF
@ -213,7 +213,7 @@ CONTAINS
#if defined __MPI
CALL MPI_ALLTOALLV( f(1), dfft%tg_snd, dfft%tg_psdsp, MPI_DOUBLE_COMPLEX, yf(1), dfft%tg_rcv, &
& dfft%tg_rdsp, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
& dfft%tg_rdsp, MPI_DOUBLE_COMPLEX, dfft%ogrp_comm, IERR)
IF( ierr /= 0 ) THEN
CALL errore( ' tg_cfft ', ' alltoall error 1 ', abs(ierr) )
ENDIF
@ -237,10 +237,10 @@ CONTAINS
!
IF( .not. use_tg ) RETURN
!
IF( dfft%tg_usdsp(nogrp) + dfft%tg_snd(nogrp) > size( f ) ) THEN
IF( dfft%tg_usdsp(dfft%nogrp) + dfft%tg_snd(dfft%nogrp) > size( f ) ) THEN
CALL errore( ' tg_cfft ', ' inconsistent size ', 3 )
ENDIF
IF( dfft%tg_rdsp(nogrp) + dfft%tg_rcv(nogrp) > size( yf ) ) THEN
IF( dfft%tg_rdsp(dfft%nogrp) + dfft%tg_rcv(dfft%nogrp) > size( yf ) ) THEN
CALL errore( ' tg_cfft ', ' inconsistent size ', 4 )
ENDIF
@ -249,7 +249,7 @@ CONTAINS
#if defined __MPI
CALL MPI_Alltoallv( yf(1), &
dfft%tg_rcv, dfft%tg_rdsp, MPI_DOUBLE_COMPLEX, f(1), &
dfft%tg_snd, dfft%tg_usdsp, MPI_DOUBLE_COMPLEX, ogrp_comm, IERR)
dfft%tg_snd, dfft%tg_usdsp, MPI_DOUBLE_COMPLEX, dfft%ogrp_comm, IERR)
IF( ierr /= 0 ) THEN
CALL errore( ' tg_cfft ', ' alltoall error 2 ', abs(ierr) )
ENDIF
@ -288,7 +288,8 @@ CONTAINS
npp = dfft%tg_npp( me_p )
nnp = nx1*nx2
!
CALL fft_scatter( aux, nx3, nogrp*dfft%tg_nnr, f, dfft%tg_nsw, dfft%tg_npp, iopt, use_tg )
CALL fft_scatter( aux, nx3, dfft%nogrp*dfft%tg_nnr, f, dfft%tg_nsw, dfft%tg_npp, iopt, use_tg, &
dfft%me_pgrp, dfft%npgrp, dfft%pgrp_comm, dfft%nplist )
!
ELSE
!
@ -407,7 +408,8 @@ CONTAINS
!
IF( use_tg ) THEN
!
CALL fft_scatter( aux, nx3, nogrp*dfft%tg_nnr, f, dfft%tg_nsw, dfft%tg_npp, iopt, use_tg )
CALL fft_scatter( aux, nx3, dfft%nogrp*dfft%tg_nnr, f, dfft%tg_nsw, dfft%tg_npp, iopt, use_tg, &
dfft%me_pgrp, dfft%npgrp, dfft%pgrp_comm, dfft%nplist )
!
ELSE
!

View File

@ -54,19 +54,22 @@ MODULE fft_types
!
! fft parallelization
!
INTEGER :: myid ! my processor id
INTEGER :: mype ! my processor id (starting from 0) in the fft group
INTEGER :: comm ! communicator of the fft gruop
INTEGER :: nproc ! number of processor in the fft group
!
! task groups
!
INTEGER :: NOGRP
INTEGER :: NPGRP
INTEGER :: ogrp_comm
INTEGER :: pgrp_comm
INTEGER, POINTER :: nolist(:) ! number of sticks per task group ( wave func )
!
LOGICAL :: have_task_groups
!
INTEGER :: me_pgrp ! task id for plane wave task group
INTEGER :: nogrp ! number of proc. in an orbital "task group"
INTEGER :: npgrp ! number of proc. in a plane-wave "task group"
INTEGER :: ogrp_comm ! orbital group communicator
INTEGER :: pgrp_comm ! plane-wave group communicator
INTEGER, POINTER :: nolist(:) ! list of pes in orbital group
INTEGER, POINTER :: nplist(:) ! list of pes in pw group
!
INTEGER :: tg_nnr ! maximum among nnr
INTEGER, POINTER :: tg_nsw(:) ! number of sticks per task group ( wave func )
INTEGER, POINTER :: tg_npp(:) ! number of "Z" planes per task group
@ -84,9 +87,10 @@ MODULE fft_types
CONTAINS
SUBROUTINE fft_dlay_allocate( desc, myid, nproc, comm, nx, ny )
SUBROUTINE fft_dlay_allocate( desc, mype, nproc, comm, nogrp, nx, ny )
TYPE (fft_dlay_descriptor) :: desc
INTEGER, INTENT(in) :: myid, nproc, comm, nx, ny ! myid starting from 0
INTEGER, INTENT(in) :: mype, nproc, comm, nx, ny ! mype starting from 0
INTEGER, INTENT(in) :: nogrp ! number of task groups
ALLOCATE( desc%nsp( nproc ) )
ALLOCATE( desc%nsw( nproc ) )
ALLOCATE( desc%ngl( nproc ) )
@ -113,15 +117,25 @@ CONTAINS
desc%id = 0
desc%myid = myid
desc%mype = mype
desc%comm = comm
desc%nproc = nproc
desc%have_task_groups = .false.
desc%NOGRP = 0
desc%NPGRP = 0
IF( nogrp > 1 ) &
desc%have_task_groups = .true.
desc%me_pgrp = 0
!
IF( MOD( nproc, MAX( 1, nogrp ) ) /= 0 ) &
CALL errore( " fft_dlay_allocate ", "the number of task groups should be a divisor of nproc ", 1 )
desc%nogrp = MAX( 1, nogrp )
desc%npgrp = nproc / MAX( 1, nogrp )
desc%ogrp_comm = 0
desc%pgrp_comm = 0
NULLIFY( desc%nolist )
ALLOCATE( desc%nolist( desc%nogrp ) )
ALLOCATE( desc%nplist( desc%npgrp ) )
desc%nolist = 0
desc%nplist = 0
NULLIFY( desc%tg_nsw )
NULLIFY( desc%tg_npp )
NULLIFY( desc%tg_snd )
@ -146,9 +160,10 @@ CONTAINS
IF ( associated( desc%ismap ) ) DEALLOCATE( desc%ismap )
IF ( associated( desc%iplp ) ) DEALLOCATE( desc%iplp )
IF ( associated( desc%iplw ) ) DEALLOCATE( desc%iplw )
IF ( associated( desc%nolist ) ) DEALLOCATE( desc%nolist )
IF ( associated( desc%nplist ) ) DEALLOCATE( desc%nplist )
desc%id = 0
IF( desc%have_task_groups ) THEN
IF ( associated( desc%nolist ) ) DEALLOCATE( desc%nolist )
IF ( associated( desc%tg_nsw ) ) DEALLOCATE( desc%tg_nsw )
IF ( associated( desc%tg_npp ) ) DEALLOCATE( desc%tg_npp )
IF ( associated( desc%tg_snd ) ) DEALLOCATE( desc%tg_snd )
@ -162,9 +177,9 @@ CONTAINS
!=----------------------------------------------------------------------------=!
SUBROUTINE fft_box_allocate( desc, myid, nproc, comm, nat )
SUBROUTINE fft_box_allocate( desc, mype, nproc, comm, nat )
TYPE (fft_dlay_descriptor) :: desc
INTEGER, INTENT(in) :: nat, nproc, myid, comm ! myid starting from 0
INTEGER, INTENT(in) :: nat, nproc, mype, comm ! mype starting from 0
ALLOCATE( desc%irb( 3, nat ) )
ALLOCATE( desc%imin3( nat ) )
ALLOCATE( desc%imax3( nat ) )
@ -177,7 +192,7 @@ CONTAINS
desc%npp = 0
desc%ipp = 0
desc%np3 = 0
desc%myid = myid
desc%mype = mype
desc%nproc = nproc
desc%comm = comm
desc%have_task_groups = .false.
@ -197,8 +212,8 @@ CONTAINS
!=----------------------------------------------------------------------------=!
SUBROUTINE fft_dlay_set( desc, tk, nst, nr1, nr2, nr3, nr1x, nr2x, nr3x, me, &
nproc, comm, nogrp, ub, lb, idx, in1, in2, ncp, ncpw, ngp, ngpw, st, stw )
SUBROUTINE fft_dlay_set( desc, tk, nst, nr1, nr2, nr3, nr1x, nr2x, nr3x, &
ub, lb, idx, in1, in2, ncp, ncpw, ngp, ngpw, st, stw )
TYPE (fft_dlay_descriptor) :: desc
@ -206,10 +221,6 @@ CONTAINS
INTEGER, INTENT(in) :: nst
INTEGER, INTENT(in) :: nr1, nr2, nr3 ! size of real space grid
INTEGER, INTENT(in) :: nr1x, nr2x, nr3x ! padded size of real space grid
INTEGER, INTENT(in) :: me ! processor index (starting from 1)
INTEGER, INTENT(in) :: nproc ! number of processors
INTEGER, INTENT(in) :: comm ! communicator
INTEGER, INTENT(in) :: nogrp ! number of processors in task-group
INTEGER, INTENT(in) :: ub(3), lb(3) ! upper and lower bound of real space indices
INTEGER, INTENT(in) :: idx(:)
INTEGER, INTENT(in) :: in1(:)
@ -221,15 +232,15 @@ CONTAINS
INTEGER, INTENT(in) :: st( lb(1) : ub(1), lb(2) : ub(2) )
INTEGER, INTENT(in) :: stw( lb(1) : ub(1), lb(2) : ub(2) )
INTEGER :: npp( nproc ), n3( nproc ), nsp( nproc )
INTEGER :: npp( desc%nproc ), n3( desc%nproc ), nsp( desc%nproc )
INTEGER :: np, nq, i, is, iss, i1, i2, m1, m2, n1, n2, ip
! Task-grouping C. Bekas
!
INTEGER :: sm
IF( ( size( desc%ngl ) < nproc ) .or. ( size( desc%npp ) < nproc ) .or. &
( size( desc%ipp ) < nproc ) .or. ( size( desc%iss ) < nproc ) ) &
IF( ( size( desc%ngl ) < desc%nproc ) .or. ( size( desc%npp ) < desc%nproc ) .or. &
( size( desc%ipp ) < desc%nproc ) .or. ( size( desc%iss ) < desc%nproc ) ) &
CALL errore( ' fft_dlay_set ', ' wrong descriptor dimensions ', 1 )
IF( ( nr1 > nr1x ) .or. ( nr2 > nr2x ) .or. ( nr3 > nr3x ) ) &
@ -238,37 +249,26 @@ CONTAINS
IF( ( size( idx ) < nst ) .or. ( size( in1 ) < nst ) .or. ( size( in2 ) < nst ) ) &
CALL errore( ' fft_dlay_set ', ' wrong number of stick dimensions ', 3 )
IF( ( size( ncp ) < nproc ) .or. ( size( ngp ) < nproc ) ) &
IF( ( size( ncp ) < desc%nproc ) .or. ( size( ngp ) < desc%nproc ) ) &
CALL errore( ' fft_dlay_set ', ' wrong stick dimensions ', 4 )
IF( desc%nproc /= nproc ) &
CALL errore( ' fft_dlay_set ', ' wrong number of processor ', 4 )
IF( desc%myid /= (me - 1) ) &
CALL errore( ' fft_dlay_set ', ' wrong processor index ', 4 )
IF( desc%comm /= comm ) &
CALL errore( ' fft_dlay_set ', ' wrong communicator ', 4 )
desc%have_task_groups = .false.
! Set the number of "xy" planes for each processor
! in other word do a slab partition along the z axis
sm = 0
npp = 0
IF ( nproc == 1 ) THEN
IF ( desc%nproc == 1 ) THEN
npp(1) = nr3
ELSEIF( nproc <= nr3 ) THEN
np = nr3 / nproc
nq = nr3 - np * nproc
DO i = 1, nproc
ELSEIF( desc%nproc <= nr3 ) THEN
np = nr3 / desc%nproc
nq = nr3 - np * desc%nproc
DO i = 1, desc%nproc
npp(i) = np
IF ( i <= nq ) npp(i) = np + 1
ENDDO
ELSE
DO ip = 1, nr3 ! some compiler complains for empty DO loops
DO i = 1, nproc, nogrp
DO i = 1, desc%nproc, desc%nogrp
npp(i) = npp(i) + 1
sm = sm + 1
IF ( sm == nr3 ) exit
@ -277,17 +277,17 @@ CONTAINS
ENDDO
ENDIF
desc%npp( 1:nproc ) = npp
desc%npl = npp( me )
desc%npp( 1:desc%nproc ) = npp
desc%npl = npp( desc%mype + 1 )
! Find out the index of the starting plane on each proc
n3 = 0
DO i = 2, nproc
DO i = 2, desc%nproc
n3(i) = n3(i-1) + npp(i-1)
ENDDO
desc%ipp( 1:nproc ) = n3
desc%ipp( 1:desc%nproc ) = n3
! Set the proper number of sticks
@ -309,14 +309,14 @@ CONTAINS
! Set fft local workspace dimension
IF ( nproc == 1 ) THEN
IF ( desc%nproc == 1 ) THEN
desc%nnr = nr1x * nr2x * nr3x
desc%tg_nnr = desc%nnr
ELSE
desc%nnr = max( nr3x * ncp(me), nr1x * nr2x * npp(me) )
desc%nnr = max( nr3x * ncp( desc%mype + 1 ), nr1x * nr2x * npp( desc%mype + 1 ) )
desc%nnr = max( 1, desc%nnr ) ! ensure that desc%nrr > 0 ( for extreme parallelism )
desc%tg_nnr = desc%nnr
DO i = 1, nproc
DO i = 1, desc%nproc
desc%tg_nnr = max( desc%tg_nnr, nr3x * ncp( i ) )
desc%tg_nnr = max( desc%tg_nnr, nr1x * nr2x * npp( i ) )
ENDDO
@ -325,8 +325,8 @@ CONTAINS
desc%ngl( 1:nproc ) = ngp( 1:nproc )
desc%nwl( 1:nproc ) = ngpw( 1:nproc )
desc%ngl( 1:desc%nproc ) = ngp( 1:desc%nproc )
desc%nwl( 1:desc%nproc ) = ngpw( 1:desc%nproc )
IF( size( desc%isind ) < ( nr1x * nr2x ) ) &
CALL errore( ' fft_dlay_set ', ' wrong descriptor dimensions, isind ', 5 )
@ -379,7 +379,7 @@ CONTAINS
! local stick ( desc%iss )
!
DO i = 1, nproc
DO i = 1, desc%nproc
IF( i == 1 ) THEN
desc%iss( i ) = 0
ELSE
@ -406,7 +406,7 @@ CONTAINS
IF( ip > 0 ) THEN
nsp( ip ) = nsp( ip ) + 1
desc%ismap( nsp( ip ) + desc%iss( ip ) ) = iss
IF( ip == me ) THEN
IF( ip == ( desc%mype + 1 ) ) THEN
desc%isind( iss ) = nsp( ip )
ELSE
desc%isind( iss ) = 0
@ -416,14 +416,14 @@ CONTAINS
! chack number of stick against the input value
IF( any( nsp( 1:nproc ) /= ncpw( 1:nproc ) ) ) THEN
DO ip = 1, nproc
IF( any( nsp( 1:desc%nproc ) /= ncpw( 1:desc%nproc ) ) ) THEN
DO ip = 1, desc%nproc
WRITE( stdout,*) ' * ', ip, ' * ', nsp( ip ), ' /= ', ncpw( ip )
ENDDO
CALL errore( ' fft_dlay_set ', ' inconsistent number of sticks ', 7 )
ENDIF
desc%nsw( 1:nproc ) = nsp( 1:nproc )
desc%nsw( 1:desc%nproc ) = nsp( 1:desc%nproc )
! then add pseudopotential stick
@ -432,7 +432,7 @@ CONTAINS
IF( ip < 0 ) THEN
nsp( -ip ) = nsp( -ip ) + 1
desc%ismap( nsp( -ip ) + desc%iss( -ip ) ) = iss
IF( -ip == me ) THEN
IF( -ip == ( desc%mype + 1 ) ) THEN
desc%isind( iss ) = nsp( -ip )
ELSE
desc%isind( iss ) = 0
@ -442,14 +442,14 @@ CONTAINS
! chack number of stick against the input value
IF( any( nsp( 1:nproc ) /= ncp( 1:nproc ) ) ) THEN
DO ip = 1, nproc
IF( any( nsp( 1:desc%nproc ) /= ncp( 1:desc%nproc ) ) ) THEN
DO ip = 1, desc%nproc
WRITE( stdout,*) ' * ', ip, ' * ', nsp( ip ), ' /= ', ncp( ip )
ENDDO
CALL errore( ' fft_dlay_set ', ' inconsistent number of sticks ', 8 )
ENDIF
desc%nsp( 1:nproc ) = nsp( 1:nproc )
desc%nsp( 1:desc%nproc ) = nsp( 1:desc%nproc )
icount = icount + 1
desc%id = icount
@ -464,13 +464,13 @@ CONTAINS
!=----------------------------------------------------------------------------=!
SUBROUTINE fft_box_set( desc, nr1b, nr2b, nr3b, nr1bx, nr2bx, nr3bx, nat, &
irb, me, nproc, comm, npp, ipp )
irb, npp, ipp )
IMPLICIT NONE
TYPE (fft_dlay_descriptor) :: desc
INTEGER, INTENT(in) :: nat, me, nproc, comm
INTEGER, INTENT(in) :: nat
INTEGER, INTENT(in) :: irb( :, : )
INTEGER, INTENT(in) :: npp( : )
INTEGER, INTENT(in) :: ipp( : )
@ -483,19 +483,9 @@ CONTAINS
CALL errore(" fft_box_set ", " inconsistent dimensions ", 1 )
ENDIF
IF( nproc > size( desc%npp ) ) &
IF( desc%nproc > size( desc%npp ) ) &
CALL errore(" fft_box_set ", " inconsistent dimensions ", 2 )
IF( desc%nproc /= nproc ) &
CALL errore( ' fft_dlay_set ', ' wrong number of processor ', 4 )
IF( desc%myid /= (me - 1) ) &
CALL errore( ' fft_dlay_set ', ' wrong processor index ', 4 )
IF( desc%comm /= comm ) &
CALL errore( ' fft_dlay_set ', ' wrong communicator ', 4 )
desc%nr1 = nr1b
desc%nr2 = nr2b
desc%nr3 = nr3b
@ -504,10 +494,10 @@ CONTAINS
desc%nr3x = nr3bx
desc%irb( 1:3, 1:nat ) = irb( 1:3, 1:nat )
desc%npp( 1:nproc ) = npp( 1:nproc )
desc%ipp( 1:nproc ) = ipp( 1:nproc )
desc%npp( 1:desc%nproc ) = npp( 1:desc%nproc )
desc%ipp( 1:desc%nproc ) = ipp( 1:desc%nproc )
nr3 = sum( npp( 1:nproc ) )
nr3 = sum( npp( 1:desc%nproc ) )
DO isa = 1, nat
@ -519,8 +509,8 @@ CONTAINS
ibig3 = 1 + mod( irb3 + ir3 - 2, nr3 )
IF( ibig3 < 1 .or. ibig3 > nr3 ) &
& CALL errore(' fft_box_set ',' ibig3 wrong ', ibig3 )
ibig3 = ibig3 - ipp( me )
IF ( ibig3 > 0 .and. ibig3 <= npp(me) ) THEN
ibig3 = ibig3 - ipp( desc%mype + 1 )
IF ( ibig3 > 0 .and. ibig3 <= npp(desc%mype + 1) ) THEN
imin3 = min( imin3, ir3 )
imax3 = max( imax3, ir3 )
ENDIF
@ -583,8 +573,9 @@ CONTAINS
desc%nnp = nr1x * nr2x
desc%npp = nr3
desc%ipp = 0
desc%have_task_groups = .false.
desc%tg_nnr = desc%nnr
!
desc%have_task_groups = .false.
RETURN
END SUBROUTINE fft_dlay_scalar

View File

@ -74,15 +74,7 @@ MODULE mp_global
!
! ... "task" groups (for band parallelization of FFT)
!
INTEGER :: nogrp = 1 ! number of proc. in an orbital "task group"
INTEGER :: npgrp = 1 ! number of proc. in a plane-wave "task group"
INTEGER :: me_pgrp = 0 ! task id for plane wave task group
INTEGER, ALLOCATABLE :: nolist(:) ! list of procs in my orbital task group
INTEGER, ALLOCATABLE :: nplist(:) ! list of procs in my plane wave task group
INTEGER :: pgrp_comm = 0 ! plane-wave group communicator
INTEGER :: ogrp_comm = 0 ! orbital group communicator
LOGICAL :: &
use_task_groups = .FALSE. ! if TRUE task groups parallelization is used
INTEGER :: ntask_groups = 1 ! number of proc. in an orbital "task group"
!
! ... Misc parallelization info
!
@ -93,6 +85,7 @@ MODULE mp_global
INTEGER :: nproc_pool_file = 1 ! in a pool
!
PRIVATE :: init_images, init_pools, init_bands, init_ortho
PRIVATE :: ntask_groups
!
CONTAINS
!
@ -112,7 +105,7 @@ CONTAINS
! ... NPOOL must be a whole divisor of NPROC
!
IMPLICIT NONE
INTEGER :: world, ntask_groups, nproc_ortho_in, meta_ionode_id
INTEGER :: world, nproc_ortho_in, meta_ionode_id
INTEGER :: root = 0
LOGICAL :: meta_ionode
!
@ -193,12 +186,6 @@ CONTAINS
!
CALL init_ortho( nproc_ortho_in )
!
use_task_groups = ( ntask_groups > 1 )
IF( use_task_groups ) THEN
nogrp = ntask_groups
CALL init_task_groups( )
END IF
!
!
RETURN
!
@ -225,7 +212,6 @@ CONTAINS
me_pool = mpime
me_image = mpime
me_bgrp = mpime
me_pgrp = me_pool
root_pool = root
root_image = root
root_bgrp = root
@ -236,10 +222,6 @@ CONTAINS
inter_bgrp_comm = group_i
intra_bgrp_comm = group_i
ortho_comm = group_i
ALLOCATE( nolist( nproc_i ) )
ALLOCATE( nplist( nproc_i ) )
nolist = 0
nplist = 0
!
RETURN
!
@ -251,8 +233,6 @@ CONTAINS
!
CALL mp_barrier()
CALL mp_end ()
IF (ALLOCATED (nolist) ) DEALLOCATE ( nolist )
IF (ALLOCATED (nplist) ) DEALLOCATE ( nplist )
!
END SUBROUTINE mp_global_end
!
@ -468,87 +448,6 @@ CONTAINS
END SUBROUTINE init_ortho
!
!
SUBROUTINE init_task_groups( )
!
INTEGER :: i, n1, ipos, color, key, ierr, itsk, ntsk
INTEGER :: pgroup( nproc_pool )
!
!SUBDIVIDE THE PROCESSORS IN GROUPS
!
!THE NUMBER OF GROUPS HAS TO BE A DIVISOR OF THE NUMBER
!OF PROCESSORS
!
IF( MOD( nproc_pool, nogrp ) /= 0 ) &
CALL errore( " init_task_groups ", "the number of task groups should be a divisor of nproc_pool ", 1 )
!
npgrp = nproc_pool / nogrp
DO i = 1, nproc_pool
pgroup( i ) = i - 1
ENDDO
!
!LIST OF PROCESSORS IN MY ORBITAL GROUP
!
! processors in these group have contiguous indexes
!
N1 = ( me_pool / NOGRP ) * NOGRP - 1
DO i = 1, nogrp
nolist( I ) = pgroup( N1 + I + 1 )
IF( me_pool == nolist( I ) ) ipos = i - 1
ENDDO
!
!LIST OF PROCESSORS IN MY PLANE WAVE GROUP
!
DO I = 1, npgrp
nplist( I ) = pgroup( ipos + ( i - 1 ) * nogrp + 1 )
ENDDO
!
!SET UP THE GROUPS
!
!
!CREATE ORBITAL GROUPS
!
#if defined __MPI
color = me_pool / nogrp
key = MOD( me_pool , nogrp )
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, ogrp_comm, ierr )
if( ierr /= 0 ) &
CALL errore( ' init_task_groups ', ' creating ogrp_comm ', ABS(ierr) )
CALL MPI_COMM_RANK( ogrp_comm, itsk, IERR )
CALL MPI_COMM_SIZE( ogrp_comm, ntsk, IERR )
IF( nogrp /= ntsk ) CALL errore( ' init_task_groups ', ' ogrp_comm size ', ntsk )
DO i = 1, nogrp
IF( me_pool == nolist( i ) ) THEN
IF( (i-1) /= itsk ) CALL errore( ' init_task_groups ', ' ogrp_comm rank ', itsk )
END IF
END DO
#endif
!
!CREATE PLANEWAVE GROUPS
!
#if defined __MPI
color = MOD( me_pool , nogrp )
key = me_pool / nogrp
CALL MPI_COMM_SPLIT( intra_pool_comm, color, key, pgrp_comm, ierr )
if( ierr /= 0 ) &
CALL errore( ' init_task_groups ', ' creating pgrp_comm ', ABS(ierr) )
CALL MPI_COMM_RANK( pgrp_comm, itsk, IERR )
CALL MPI_COMM_SIZE( pgrp_comm, ntsk, IERR )
IF( npgrp /= ntsk ) CALL errore( ' init_task_groups ', ' pgrp_comm size ', ntsk )
DO i = 1, npgrp
IF( me_pool == nplist( i ) ) THEN
IF( (i-1) /= itsk ) CALL errore( ' init_task_groups ', ' pgrp_comm rank ', itsk )
END IF
END DO
me_pgrp = itsk
#endif
RETURN
END SUBROUTINE init_task_groups
!
!
SUBROUTINE init_ortho_group( nproc_try_in, comm_all )
!
IMPLICIT NONE
@ -748,4 +647,11 @@ CONTAINS
END SUBROUTINE distribute_over_bgrp
!
!
FUNCTION get_ntask_groups()
IMPLICIT NONE
INTEGER :: get_ntask_groups
get_ntask_groups = ntask_groups
RETURN
END FUNCTION get_ntask_groups
!
END MODULE mp_global

View File

@ -22,10 +22,6 @@
USE io_global, ONLY: ionode, stdout
USE fft_types, ONLY: fft_dlay_descriptor, fft_dlay_allocate, &
fft_dlay_set, fft_dlay_scalar
USE mp_global, ONLY: me_pool, nproc_pool, intra_pool_comm, nogrp, use_task_groups
USE mp_global, ONLY : me_pool, nproc_pool, intra_pool_comm
USE mp_global, ONLY : NOGRP, NPGRP, ogrp_comm, pgrp_comm
USE mp_global, ONLY : nolist
PRIVATE
SAVE
@ -37,7 +33,7 @@
!=----------------------------------------------------------------------=
SUBROUTINE pstickset( gamma_only, bg, gcut, gkcut, gcuts, &
dfftp, dffts, ngw, ngm, ngs )
dfftp, dffts, ngw, ngm, ngs, mype, nproc, comm, nogrp_ )
LOGICAL, INTENT(in) :: gamma_only
! ... bg(:,1), bg(:,2), bg(:,3) reciprocal space base vectors.
@ -46,6 +42,10 @@
TYPE(fft_dlay_descriptor), INTENT(inout) :: dfftp, dffts
INTEGER, INTENT(out) :: ngw, ngm, ngs
INTEGER, INTENT(IN) :: mype, nproc, comm
INTEGER, INTENT(IN) :: nogrp_
LOGICAL :: tk
INTEGER :: ub(3), lb(3)
@ -136,8 +136,8 @@
! ... Fill in the stick maps, for given g-space base and cut-off
CALL sticks_maps( tk, ub, lb, bg(:,1), bg(:,2), bg(:,3), &
gcut, gkcut, gcuts, st, stw, sts, me_pool, &
nproc_pool, intra_pool_comm )
gcut, gkcut, gcuts, st, stw, sts, mype, &
nproc, comm )
! ... Now count the number of stick nst and nstw
@ -156,14 +156,14 @@
ALLOCATE(ist(nst,5))
ALLOCATE(nstp(nproc_pool))
ALLOCATE(sstp(nproc_pool))
ALLOCATE(nstp(nproc))
ALLOCATE(sstp(nproc))
ALLOCATE(nstpw(nproc_pool))
ALLOCATE(sstpw(nproc_pool))
ALLOCATE(nstpw(nproc))
ALLOCATE(sstpw(nproc))
ALLOCATE(nstps(nproc_pool))
ALLOCATE(sstps(nproc_pool))
ALLOCATE(nstps(nproc))
ALLOCATE(sstps(nproc))
! ... initialize the sticks indexes array ist
@ -174,7 +174,7 @@
ALLOCATE( idx( nst ) )
CALL sticks_sort( ist(:,4), ist(:,3), ist(:,5), nst, idx, nproc_pool )
CALL sticks_sort( ist(:,4), ist(:,3), ist(:,5), nst, idx, nproc )
! ... Set as first stick the stick containing the G=0
!
@ -186,26 +186,26 @@
! idx( iss ) = itmp
CALL sticks_dist( tk, ub, lb, idx, ist(:,1), ist(:,2), ist(:,4), ist(:,3), ist(:,5), &
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc_pool )
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc )
ngw = sstpw( me_pool + 1 )
ngm = sstp( me_pool + 1 )
ngs = sstps( me_pool + 1 )
ngw = sstpw( mype + 1 )
ngm = sstp( mype + 1 )
ngs = sstps( mype + 1 )
CALL sticks_pairup( tk, ub, lb, idx, ist(:,1), ist(:,2), ist(:,4), ist(:,3), ist(:,5), &
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc_pool )
nst, nstp, nstpw, nstps, sstp, sstpw, sstps, st, stw, sts, nproc )
! ... Allocate and Set fft data layout descriptors
#if defined __PARA
CALL fft_dlay_allocate( dfftp, me_pool, nproc_pool, intra_pool_comm, nr1x, nr2x )
CALL fft_dlay_allocate( dffts, me_pool, nproc_pool, intra_pool_comm, nr1sx, nr2sx )
CALL fft_dlay_allocate( dfftp, mype, nproc, comm, nogrp_ , nr1x, nr2x )
CALL fft_dlay_allocate( dffts, mype, nproc, comm, nogrp_ , nr1sx, nr2sx )
CALL fft_dlay_set( dfftp, tk, nst, nr1, nr2, nr3, nr1x, nr2x, nr3x, (me_pool+1), &
nproc_pool, intra_pool_comm, nogrp, ub, lb, idx, ist(:,1), ist(:,2), nstp, nstpw, sstp, sstpw, st, stw )
CALL fft_dlay_set( dffts, tk, nsts, nr1s, nr2s, nr3s, nr1sx, nr2sx, nr3sx, (me_pool+1), &
nproc_pool, intra_pool_comm, nogrp, ub, lb, idx, ist(:,1), ist(:,2), nstps, nstpw, sstps, sstpw, sts, stw )
CALL fft_dlay_set( dfftp, tk, nst, nr1, nr2, nr3, nr1x, nr2x, nr3x, &
ub, lb, idx, ist(:,1), ist(:,2), nstp, nstpw, sstp, sstpw, st, stw )
CALL fft_dlay_set( dffts, tk, nsts, nr1s, nr2s, nr3s, nr1sx, nr2sx, nr3sx, &
ub, lb, idx, ist(:,1), ist(:,2), nstps, nstpw, sstps, sstpw, sts, stw )
#else
@ -218,8 +218,8 @@
IF( ngm_ /= ngm ) CALL errore( ' pstickset ', ' inconsistent ngm ', abs( ngm - ngm_ ) )
IF( ngs_ /= ngs ) CALL errore( ' pstickset ', ' inconsistent ngs ', abs( ngs - ngs_ ) )
CALL fft_dlay_allocate( dfftp, me_pool, nproc_pool, intra_pool_comm, max(nr1x, nr3x), nr2x )
CALL fft_dlay_allocate( dffts, me_pool, nproc_pool, intra_pool_comm, max(nr1sx, nr3sx), nr2sx )
CALL fft_dlay_allocate( dfftp, mype, nproc, comm, max(nr1x, nr3x), nr2x )
CALL fft_dlay_allocate( dffts, mype, nproc, comm, max(nr1sx, nr3sx), nr2sx )
CALL fft_dlay_scalar( dfftp, ub, lb, nr1, nr2, nr3, nr1x, nr2x, nr3x, stw )
CALL fft_dlay_scalar( dffts, ub, lb, nr1s, nr2s, nr3s, nr1sx, nr2sx, nr3sx, stw )
@ -234,7 +234,7 @@
! ... Maximum number of sticks (wave func.)
nstpwx = maxval( nstpw )
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
! Initialize task groups.
! Note that this call modify dffts adding task group data.
@ -247,7 +247,7 @@
118 FORMAT(3X,' n.st n.stw n.sts n.g n.gw n.gs')
WRITE( stdout,121) minval(nstp), minval(nstpw), minval(nstps), minval(sstp), minval(sstpw), minval(sstps)
WRITE( stdout,122) maxval(nstp), maxval(nstpw), maxval(nstps), maxval(sstp), maxval(sstpw), maxval(sstps)
! DO ip = 1, nproc_pool
! DO ip = 1, nproc
! IF (ionode) THEN
! WRITE( stdout,120) ip, nstp(ip), nstpw(ip), nstps(ip), sstp(ip), sstpw(ip), sstps(ip)
! END IF
@ -304,20 +304,21 @@ SUBROUTINE task_groups_init( dffts )
INTEGER :: I
INTEGER :: IERR
INTEGER :: num_planes, num_sticks
INTEGER :: nnrsx_vec ( nproc_pool )
INTEGER :: pgroup( nproc_pool )
INTEGER :: nnrsx_vec ( dffts%nproc )
INTEGER :: pgroup( dffts%nproc )
INTEGER :: strd
CALL task_groups_init_first( dffts )
!
IF ( nogrp > 1 ) WRITE( stdout, 100 ) nogrp, npgrp
IF ( dffts%nogrp > 1 ) WRITE( stdout, 100 ) dffts%nogrp, dffts%npgrp
100 FORMAT( /,3X,'Task Groups are in USE',/,3X,'groups and procs/group : ',I5,I5 )
!Find maximum chunk of local data concerning coefficients of eigenfunctions in g-space
#if defined __MPI
CALL MPI_Allgather( dffts%nnr, 1, MPI_INTEGER, nnrsx_vec, 1, MPI_INTEGER, intra_pool_comm, IERR)
strd = maxval( nnrsx_vec( 1:nproc_pool ) )
CALL MPI_Allgather( dffts%nnr, 1, MPI_INTEGER, nnrsx_vec, 1, MPI_INTEGER, dffts%comm, IERR)
strd = maxval( nnrsx_vec( 1:dffts%nproc ) )
#else
strd = dffts%nnr
#endif
@ -334,60 +335,131 @@ SUBROUTINE task_groups_init( dffts )
!we choose to do the latter one.
!-------------------------------------------------------------------------------------
!
dffts%nogrp = nogrp
dffts%npgrp = npgrp
dffts%ogrp_comm = ogrp_comm
dffts%pgrp_comm = pgrp_comm
!
ALLOCATE( dffts%tg_nsw(nproc_pool))
ALLOCATE( dffts%tg_npp(nproc_pool))
ALLOCATE( dffts%nolist(nogrp))
ALLOCATE( dffts%tg_nsw(dffts%nproc))
ALLOCATE( dffts%tg_npp(dffts%nproc))
num_sticks = 0
num_planes = 0
DO i = 1, nogrp
dffts%nolist( i ) = nolist( i )
num_sticks = num_sticks + dffts%nsw( nolist(i) + 1 )
num_planes = num_planes + dffts%npp( nolist(i) + 1 )
DO i = 1, dffts%nogrp
num_sticks = num_sticks + dffts%nsw( dffts%nolist(i) + 1 )
num_planes = num_planes + dffts%npp( dffts%nolist(i) + 1 )
ENDDO
#if defined __MPI
CALL MPI_ALLGATHER(num_sticks, 1, MPI_INTEGER, dffts%tg_nsw(1), 1, MPI_INTEGER, intra_pool_comm, IERR)
CALL MPI_ALLGATHER(num_planes, 1, MPI_INTEGER, dffts%tg_npp(1), 1, MPI_INTEGER, intra_pool_comm, IERR)
CALL MPI_ALLGATHER(num_sticks, 1, MPI_INTEGER, dffts%tg_nsw(1), 1, MPI_INTEGER, dffts%comm, IERR)
CALL MPI_ALLGATHER(num_planes, 1, MPI_INTEGER, dffts%tg_npp(1), 1, MPI_INTEGER, dffts%comm, IERR)
#else
dffts%tg_nsw(1) = num_sticks
dffts%tg_npp(1) = num_planes
#endif
ALLOCATE( dffts%tg_snd( nogrp ) )
ALLOCATE( dffts%tg_rcv( nogrp ) )
ALLOCATE( dffts%tg_psdsp( nogrp ) )
ALLOCATE( dffts%tg_usdsp( nogrp ) )
ALLOCATE( dffts%tg_rdsp( nogrp ) )
ALLOCATE( dffts%tg_snd( dffts%nogrp ) )
ALLOCATE( dffts%tg_rcv( dffts%nogrp ) )
ALLOCATE( dffts%tg_psdsp( dffts%nogrp ) )
ALLOCATE( dffts%tg_usdsp( dffts%nogrp ) )
ALLOCATE( dffts%tg_rdsp( dffts%nogrp ) )
dffts%tg_snd(1) = dffts%nr3x * dffts%nsw( me_pool + 1 )
IF( dffts%nr3x * dffts%nsw( me_pool + 1 ) > dffts%tg_nnr ) THEN
dffts%tg_snd(1) = dffts%nr3x * dffts%nsw( dffts%mype + 1 )
IF( dffts%nr3x * dffts%nsw( dffts%mype + 1 ) > dffts%tg_nnr ) THEN
CALL errore( ' task_groups_init ', ' inconsistent dffts%tg_nnr ', 1 )
ENDIF
dffts%tg_psdsp(1) = 0
dffts%tg_usdsp(1) = 0
dffts%tg_rcv(1) = dffts%nr3x * dffts%nsw( nolist(1) + 1 )
dffts%tg_rcv(1) = dffts%nr3x * dffts%nsw( dffts%nolist(1) + 1 )
dffts%tg_rdsp(1) = 0
DO i = 2, nogrp
dffts%tg_snd(i) = dffts%nr3x * dffts%nsw( me_pool + 1 )
DO i = 2, dffts%nogrp
dffts%tg_snd(i) = dffts%nr3x * dffts%nsw( dffts%mype + 1 )
dffts%tg_psdsp(i) = dffts%tg_psdsp(i-1) + dffts%tg_nnr
dffts%tg_usdsp(i) = dffts%tg_usdsp(i-1) + dffts%tg_snd(i-1)
dffts%tg_rcv(i) = dffts%nr3x * dffts%nsw( nolist(i) + 1 )
dffts%tg_rcv(i) = dffts%nr3x * dffts%nsw( dffts%nolist(i) + 1 )
dffts%tg_rdsp(i) = dffts%tg_rdsp(i-1) + dffts%tg_rcv(i-1)
ENDDO
dffts%have_task_groups = .true.
RETURN
END SUBROUTINE task_groups_init
!
SUBROUTINE task_groups_init_first( dffts )
USE parallel_include
!
USE fft_types, ONLY : fft_dlay_descriptor
!
IMPLICIT NONE
!
TYPE(fft_dlay_descriptor), INTENT(inout) :: dffts
!
INTEGER :: i, n1, ipos, color, key, ierr, itsk, ntsk
INTEGER :: pgroup( dffts%nproc )
!
!SUBDIVIDE THE PROCESSORS IN GROUPS
!
DO i = 1, dffts%nproc
pgroup( i ) = i - 1
ENDDO
!
!LIST OF PROCESSORS IN MY ORBITAL GROUP
!
! processors in these group have contiguous indexes
!
n1 = ( dffts%mype / dffts%nogrp ) * dffts%nogrp - 1
DO i = 1, dffts%nogrp
dffts%nolist( i ) = pgroup( n1 + i + 1 )
IF( dffts%mype == dffts%nolist( i ) ) ipos = i - 1
ENDDO
!
!LIST OF PROCESSORS IN MY PLANE WAVE GROUP
!
DO I = 1, dffts%npgrp
dffts%nplist( i ) = pgroup( ipos + ( i - 1 ) * dffts%nogrp + 1 )
ENDDO
!
!SET UP THE GROUPS
!
!
!CREATE ORBITAL GROUPS
!
#if defined __MPI
color = dffts%mype / dffts%nogrp
key = MOD( dffts%mype , dffts%nogrp )
CALL MPI_COMM_SPLIT( dffts%comm, color, key, dffts%ogrp_comm, ierr )
if( ierr /= 0 ) &
CALL errore( ' init_task_groups ', ' creating ogrp_comm ', ABS(ierr) )
CALL MPI_COMM_RANK( dffts%ogrp_comm, itsk, IERR )
CALL MPI_COMM_SIZE( dffts%ogrp_comm, ntsk, IERR )
IF( dffts%nogrp /= ntsk ) CALL errore( ' init_task_groups ', ' ogrp_comm size ', ntsk )
DO i = 1, dffts%nogrp
IF( dffts%mype == dffts%nolist( i ) ) THEN
IF( (i-1) /= itsk ) CALL errore( ' init_task_groups ', ' ogrp_comm rank ', itsk )
END IF
END DO
#endif
!
!CREATE PLANEWAVE GROUPS
!
#if defined __MPI
color = MOD( dffts%mype , dffts%nogrp )
key = dffts%mype / dffts%nogrp
CALL MPI_COMM_SPLIT( dffts%comm, color, key, dffts%pgrp_comm, ierr )
if( ierr /= 0 ) &
CALL errore( ' init_task_groups ', ' creating pgrp_comm ', ABS(ierr) )
CALL MPI_COMM_RANK( dffts%pgrp_comm, itsk, IERR )
CALL MPI_COMM_SIZE( dffts%pgrp_comm, ntsk, IERR )
IF( dffts%npgrp /= ntsk ) CALL errore( ' init_task_groups ', ' pgrp_comm size ', ntsk )
DO i = 1, dffts%npgrp
IF( dffts%mype == dffts%nplist( i ) ) THEN
IF( (i-1) /= itsk ) CALL errore( ' init_task_groups ', ' pgrp_comm rank ', itsk )
END IF
END DO
dffts%me_pgrp = itsk
#endif
RETURN
END SUBROUTINE task_groups_init_first
!
!=----------------------------------------------------------------------=
END MODULE stick_set
!=----------------------------------------------------------------------=

View File

@ -16,7 +16,8 @@ SUBROUTINE data_structure( gamma_only )
USE kinds, ONLY : DP
USE io_global, ONLY : stdout
USE mp, ONLY : mp_max
USE mp_global, ONLY : inter_pool_comm, intra_pool_comm
USE mp_global, ONLY : me_pool, nproc_pool, inter_pool_comm, intra_pool_comm
USE mp_global, ONLY : get_ntask_groups
USE fft_base, ONLY : dfftp, dffts
USE cell_base, ONLY : bg, tpiba
USE klist, ONLY : xk, nks
@ -29,7 +30,7 @@ SUBROUTINE data_structure( gamma_only )
IMPLICIT NONE
LOGICAL, INTENT(in) :: gamma_only
REAL (DP) :: gkcut
INTEGER :: ik, ngm_, ngs_, ngw_
INTEGER :: ik, ngm_, ngs_, ngw_ , nogrp
!
! ... calculate gkcut = max |k+G|^2, in (2pi/a)^2 units
!
@ -56,8 +57,11 @@ SUBROUTINE data_structure( gamma_only )
!
! ... set up fft descriptors, including parallel stuff: sticks, planes, etc.
!
nogrp = get_ntask_groups()
!
CALL pstickset( gamma_only, bg, gcutm, gkcut, gcutms, &
dfftp, dffts, ngw_ , ngm_ , ngs_ )
dfftp, dffts, ngw_ , ngm_ , ngs_ , me_pool, nproc_pool, intra_pool_comm, &
nogrp )
!
! on output, ngm_ and ngs_ contain the local number of G-vectors
! for the two grids. Initialize local and global number of G-vectors

View File

@ -36,7 +36,7 @@ SUBROUTINE h_psi( lda, n, m, psi, hpsi )
USE realus, ONLY : real_space, fft_orbital_gamma, initialisation_level, &
bfft_orbital_gamma, calbec_rs_gamma, &
add_vuspsir_gamma, v_loc_psir
USE mp_global,ONLY : nogrp, use_task_groups
USE fft_base, ONLY : dffts
#ifdef EXX
USE exx, ONLY : vexx
USE funct, ONLY : exx_is_active
@ -74,8 +74,8 @@ SUBROUTINE h_psi( lda, n, m, psi, hpsi )
CALL start_clock( 'h_psi:vloc' )
IF ( gamma_only ) THEN
!
IF (( use_task_groups ) .AND. ( m >= nogrp )) then
incr = 2 * nogrp
IF ( dffts%have_task_groups .AND. ( m >= dffts%nogrp )) then
incr = 2 * dffts%nogrp
else
incr = 2
endif

View File

@ -614,9 +614,9 @@ h_epsi_her_set.o : buffers.o
h_epsi_her_set.o : pwcom.o
h_epsi_her_set.o : scf_mod.o
h_psi.o : ../Modules/control_flags.o
h_psi.o : ../Modules/fft_base.o
h_psi.o : ../Modules/funct.o
h_psi.o : ../Modules/kind.o
h_psi.o : ../Modules/mp_global.o
h_psi.o : ../Modules/recvec.o
h_psi.o : ../Modules/uspp.o
h_psi.o : becmod.o

View File

@ -14,7 +14,7 @@ SUBROUTINE print_clock_pw()
!
USE io_global, ONLY : stdout
USE control_flags, ONLY : isolve, iverbosity, gamma_only
USE mp_global, ONLY : mpime, root, use_task_groups
USE mp_global, ONLY : mpime, root
USE paw_variables, ONLY : okpaw
USE realus, ONLY : real_space
!
@ -161,7 +161,7 @@ SUBROUTINE print_clock_pw()
!
CALL print_clock( 'reduce' )
CALL print_clock( 'fft_scatter' )
IF( use_task_groups ) CALL print_clock( 'ALLTOALL' )
CALL print_clock( 'ALLTOALL' )
#endif
!
#ifdef EXX

View File

@ -115,7 +115,6 @@ MODULE realus
USE cell_base, ONLY : tpiba2
USE control_flags, ONLY : tqr
USE fft_base, ONLY : dffts
USE mp_global, ONLY : nogrp, use_task_groups
USE wavefunctions_module, ONLY : psic
USE io_global, ONLY : stdout
@ -133,13 +132,13 @@ MODULE realus
ALLOCATE(npw_k(nks))
!allocate (psic_temp(size(psic)))
!real space, allocation for task group fft work arrays
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
IF (allocated( tg_psic ) ) DEALLOCATE( tg_psic )
!
ALLOCATE( tg_psic( dffts%tg_nnr * nogrp ) )
!ALLOCATE( tg_psic_temp( dffts%tg_nnr * nogrp ) )
ALLOCATE( tg_vrs( dffts%tg_nnr * nogrp ) )
ALLOCATE( tg_psic( dffts%tg_nnr * dffts%nogrp ) )
!ALLOCATE( tg_psic_temp( dffts%tg_nnr * dffts%nogrp ) )
ALLOCATE( tg_vrs( dffts%tg_nnr * dffts%nogrp ) )
!
ENDIF
@ -1556,9 +1555,8 @@ MODULE realus
USE ions_base, ONLY : nat, ntyp => nsp, ityp
USE smooth_grid_dimensions,ONLY : nr1s, nr2s, nr3s
USE uspp_param, ONLY : nh, nhm
USE fft_base, ONLY : tg_gather
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups,intra_pool_comm
USE fft_base, ONLY : tg_gather, dffts
USE mp_global, ONLY : me_pool, intra_pool_comm
USE mp, ONLY : mp_sum
!
IMPLICIT NONE
@ -1577,7 +1575,7 @@ MODULE realus
!
CALL start_clock( 'calbec_rs' )
!
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
CALL errore( 'calbec_rs_gamma', 'task_groups not implemented', 1 )
@ -1666,9 +1664,8 @@ MODULE realus
USE smooth_grid_dimensions,ONLY : nr1s, nr2s, nr3s
USE uspp_param, ONLY : nh, nhm
USE becmod, ONLY : bec_type, becp
USE fft_base, ONLY : tg_gather
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups
USE fft_base, ONLY : tg_gather, dffts
USE mp_global, ONLY : me_pool
!
IMPLICIT NONE
!
@ -1685,7 +1682,7 @@ MODULE realus
!
CALL start_clock( 'calbec_rs' )
!
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
CALL errore( 'calbec_rs_k', 'task_groups not implemented', 1 )
@ -1752,9 +1749,8 @@ MODULE realus
USE lsda_mod, ONLY : current_spin
USE uspp, ONLY : qq
USE becmod, ONLY : bec_type, becp
USE fft_base, ONLY : tg_gather
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups
USE fft_base, ONLY : tg_gather, dffts
USE mp_global, ONLY : me_pool
!
IMPLICIT NONE
!
@ -1770,7 +1766,7 @@ MODULE realus
CALL start_clock( 's_psir' )
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
CALL errore( 's_psir_gamma', 'task_groups not implemented', 1 )
@ -1853,9 +1849,8 @@ MODULE realus
USE lsda_mod, ONLY : current_spin
USE uspp, ONLY : qq
USE becmod, ONLY : bec_type, becp
USE fft_base, ONLY : tg_gather
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups
USE fft_base, ONLY : tg_gather, dffts
USE mp_global, ONLY : me_pool
!
IMPLICIT NONE
!
@ -1872,7 +1867,7 @@ MODULE realus
CALL start_clock( 's_psir' )
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
CALL errore( 's_psir_k', 'task_groups not implemented', 1 )
@ -1956,9 +1951,8 @@ MODULE realus
USE lsda_mod, ONLY : current_spin
USE uspp, ONLY : deeq
USE becmod, ONLY : bec_type, becp
USE fft_base, ONLY : tg_gather
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups
USE fft_base, ONLY : tg_gather, dffts
USE mp_global, ONLY : me_pool
!
IMPLICIT NONE
!
@ -1972,7 +1966,7 @@ MODULE realus
!
CALL start_clock( 'add_vuspsir' )
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
CALL errore( 'add_vuspsir_gamma', 'task_groups not implemented', 1 )
@ -2060,9 +2054,8 @@ MODULE realus
USE lsda_mod, ONLY : current_spin
USE uspp, ONLY : deeq
USE becmod, ONLY : bec_type, becp
USE fft_base, ONLY : tg_gather
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups
USE fft_base, ONLY : tg_gather, dffts
USE mp_global, ONLY : me_pool
!
IMPLICIT NONE
!
@ -2078,7 +2071,7 @@ MODULE realus
!
CALL start_clock( 'add_vuspsir' )
IF( ( use_task_groups ) .and. ( m >= nogrp ) ) THEN
IF( ( dffts%have_task_groups ) .and. ( m >= dffts%nogrp ) ) THEN
CALL errore( 'add_vuspsir_k', 'task_groups not implemented', 1 )
@ -2157,8 +2150,7 @@ MODULE realus
USE kinds, ONLY : DP
USE fft_base, ONLY : dffts, tg_gather
USE fft_interfaces,ONLY : invfft
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups
USE mp_global, ONLY : me_pool
IMPLICIT NONE
@ -2180,7 +2172,7 @@ MODULE realus
!Task groups
!COMPLEX(DP), ALLOCATABLE :: tg_psic(:)
INTEGER :: recv_cnt( nogrp ), recv_displ( nogrp )
INTEGER :: recv_cnt( dffts%nogrp ), recv_displ( dffts%nogrp )
INTEGER :: v_siz
!The new task group version based on vloc_psi
@ -2190,16 +2182,16 @@ MODULE realus
! The following is dirty trick to prevent usage of task groups if
! the number of bands is smaller than the number of task groups
!
use_tg = use_task_groups
use_task_groups = ( use_task_groups ) .and. ( nbnd >= nogrp )
use_tg = dffts%have_task_groups
dffts%have_task_groups = ( dffts%have_task_groups ) .and. ( nbnd >= dffts%nogrp )
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
tg_psic = (0.d0, 0.d0)
ioff = 0
!
DO idx = 1, 2*nogrp, 2
DO idx = 1, 2*dffts%nogrp, 2
IF( idx + ibnd - 1 < nbnd ) THEN
DO j = 1, npw_k(1)
@ -2225,7 +2217,7 @@ MODULE realus
!
IF (present(conserved)) THEN
IF (conserved .eqv. .true.) THEN
IF (.not. allocated(tg_psic_temp)) ALLOCATE( tg_psic_temp( dffts%tg_nnr * nogrp ) )
IF (.not. allocated(tg_psic_temp)) ALLOCATE( tg_psic_temp( dffts%tg_nnr * dffts%nogrp ) )
tg_psic_temp=tg_psic
ENDIF
ENDIF
@ -2275,7 +2267,7 @@ MODULE realus
ENDIF
use_task_groups = use_tg
dffts%have_task_groups = use_tg
!if (.not. allocated(psic)) CALL errore( 'fft_orbital_gamma', 'psic not allocated', 2 )
! OLD VERSION ! Based on an algorithm found somewhere in the TDDFT codes, generalised to k points
@ -2322,8 +2314,7 @@ MODULE realus
USE kinds, ONLY : DP
USE fft_base, ONLY : dffts, tg_gather
USE fft_interfaces,ONLY : fwfft
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups
USE mp_global, ONLY : me_pool
IMPLICIT NONE
@ -2342,20 +2333,20 @@ MODULE realus
LOGICAL :: use_tg
!Task groups
INTEGER :: recv_cnt( nogrp ), recv_displ( nogrp )
INTEGER :: recv_cnt( dffts%nogrp ), recv_displ( dffts%nogrp )
INTEGER :: v_siz
!print *, "->fourier space"
CALL start_clock( 'bfft_orbital' )
!New task_groups versions
use_tg = use_task_groups
use_task_groups = ( use_task_groups ) .and. ( nbnd >= nogrp )
IF( use_task_groups ) THEN
use_tg = dffts%have_task_groups
dffts%have_task_groups = ( dffts%have_task_groups ) .and. ( nbnd >= dffts%nogrp )
IF( dffts%have_task_groups ) THEN
!
CALL fwfft ('Wave', tg_psic, dffts )
!
ioff = 0
!
DO idx = 1, 2*nogrp, 2
DO idx = 1, 2*dffts%nogrp, 2
!
IF( idx + ibnd - 1 < nbnd ) THEN
DO j = 1, npw_k(1)
@ -2407,7 +2398,7 @@ MODULE realus
ENDIF
ENDIF
ENDIF
use_task_groups = use_tg
dffts%have_task_groups = use_tg
!! OLD VERSION Based on the algorithm found in lr_apply_liovillian
!!print * ,"a"
!CALL fwfft ('Wave', psic, dffts)
@ -2460,8 +2451,7 @@ MODULE realus
USE kinds, ONLY : DP
USE fft_base, ONLY : dffts
USE fft_interfaces,ONLY : invfft
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups
USE mp_global, ONLY : me_pool
USE wvfct, ONLY : igk
IMPLICIT NONE
@ -2477,15 +2467,15 @@ MODULE realus
LOGICAL :: use_tg
CALL start_clock( 'fft_orbital' )
use_tg = use_task_groups
use_task_groups = ( use_task_groups ) .and. ( nbnd >= nogrp )
use_tg = dffts%have_task_groups
dffts%have_task_groups = ( dffts%have_task_groups ) .and. ( nbnd >= dffts%nogrp )
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
tg_psic = ( 0.D0, 0.D0 )
ioff = 0
!
DO idx = 1, nogrp
DO idx = 1, dffts%nogrp
!
IF( idx + ibnd - 1 <= nbnd ) THEN
!DO j = 1, size(orbital,1)
@ -2500,7 +2490,7 @@ MODULE realus
CALL invfft ('Wave', tg_psic, dffts)
IF (present(conserved)) THEN
IF (conserved .eqv. .true.) THEN
IF (.not. allocated(tg_psic_temp)) ALLOCATE( tg_psic_temp( dffts%tg_nnr * nogrp ) )
IF (.not. allocated(tg_psic_temp)) ALLOCATE( tg_psic_temp( dffts%tg_nnr * dffts%nogrp ) )
tg_psic_temp=tg_psic
ENDIF
ENDIF
@ -2520,7 +2510,7 @@ MODULE realus
ENDIF
!
ENDIF
use_task_groups = use_tg
dffts%have_task_groups = use_tg
CALL stop_clock( 'fft_orbital' )
END SUBROUTINE fft_orbital_k
!--------------------------------------------------------------------------
@ -2538,8 +2528,7 @@ MODULE realus
USE kinds, ONLY : DP
USE fft_base, ONLY : dffts
USE fft_interfaces,ONLY : fwfft
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups
USE mp_global, ONLY : me_pool
USE wvfct, ONLY : igk
IMPLICIT NONE
@ -2555,16 +2544,16 @@ MODULE realus
LOGICAL :: use_tg
CALL start_clock( 'bfft_orbital' )
use_tg = use_task_groups
use_task_groups = ( use_task_groups ) .and. ( nbnd >= nogrp )
use_tg = dffts%have_task_groups
dffts%have_task_groups = ( dffts%have_task_groups ) .and. ( nbnd >= dffts%nogrp )
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
CALL fwfft ('Wave', tg_psic, dffts)
!
ioff = 0
!
DO idx = 1, nogrp
DO idx = 1, dffts%nogrp
!
IF( idx + ibnd - 1 <= nbnd ) THEN
orbital (:, ibnd+idx-1) = tg_psic( nls(igk(:)) + ioff )
@ -2591,7 +2580,7 @@ MODULE realus
ENDIF
ENDIF
ENDIF
use_task_groups = use_tg
dffts%have_task_groups = use_tg
CALL stop_clock( 'bfft_orbital' )
END SUBROUTINE bfft_orbital_k
!--------------------------------------------------------------------------
@ -2605,8 +2594,7 @@ MODULE realus
USE gvecs, ONLY : nls,nlsm,doublegrid
USE kinds, ONLY : DP
USE fft_base, ONLY : dffts, tg_gather
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, &
use_task_groups
USE mp_global, ONLY : me_pool
USE scf, ONLY : vrs
USE lsda_mod, ONLY : current_spin
@ -2623,11 +2611,11 @@ MODULE realus
!Task groups
REAL(DP), ALLOCATABLE :: tg_v(:)
INTEGER :: recv_cnt( nogrp ), recv_displ( nogrp )
INTEGER :: recv_cnt( dffts%nogrp ), recv_displ( dffts%nogrp )
INTEGER :: v_siz
CALL start_clock( 'v_loc_psir' )
IF( use_task_groups .and. nbnd >= nogrp ) THEN
IF( dffts%have_task_groups .and. nbnd >= dffts%nogrp ) THEN
IF (ibnd == 1 ) THEN
CALL tg_gather( dffts, vrs(:,current_spin), tg_v ) !if ibnd==1 this is a new calculation, and tg_v should be distributed.
ENDIF

View File

@ -228,8 +228,7 @@ SUBROUTINE sum_band()
! ... gamma version
!
USE becmod, ONLY : bec_type, becp, calbec
USE mp_global, ONLY : nogrp, nolist, ogrp_comm, me_pool, &
use_task_groups
USE mp_global, ONLY : me_pool
USE mp, ONLY : mp_sum
!
IMPLICIT NONE
@ -249,28 +248,28 @@ SUBROUTINE sum_band()
!
IF ( nks > 1 ) REWIND( iunigk )
!
use_tg = use_task_groups
use_task_groups = ( use_task_groups ) .AND. ( nbnd >= nogrp )
use_tg = dffts%have_task_groups
dffts%have_task_groups = ( dffts%have_task_groups ) .AND. ( nbnd >= dffts%nogrp )
!
incr = 2
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
IF( dft_is_meta() ) &
CALL errore( ' sum_band ', ' task groups with meta dft, not yet implemented ', 1 )
!
v_siz = dffts%tg_nnr * nogrp
v_siz = dffts%tg_nnr * dffts%nogrp
!
ALLOCATE( tg_psi( v_siz ) )
ALLOCATE( tg_rho( v_siz ) )
!
incr = 2 * nogrp
incr = 2 * dffts%nogrp
!
END IF
!
k_loop: DO ik = 1, nks
!
IF( use_task_groups ) tg_rho = 0.0_DP
IF( dffts%have_task_groups ) tg_rho = 0.0_DP
IF ( lsda ) current_spin = isk(ik)
!
npw = ngk(ik)
@ -299,14 +298,14 @@ SUBROUTINE sum_band()
!
DO ibnd = 1, nbnd, incr
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
tg_psi(:) = ( 0.D0, 0.D0 )
ioff = 0
!
DO idx = 1, 2*nogrp, 2
DO idx = 1, 2*dffts%nogrp, 2
!
! ... 2*nogrp ffts at the same time
! ... 2*dffts%nogrp ffts at the same time
!
IF( idx + ibnd - 1 < nbnd ) THEN
DO j = 1, npw
@ -327,14 +326,14 @@ SUBROUTINE sum_band()
CALL invfft ('Wave', tg_psi, dffts)
!
! Now the first proc of the group holds the first two bands
! of the 2*nogrp bands that we are processing at the same time,
! of the 2*dffts%nogrp bands that we are processing at the same time,
! the second proc. holds the third and fourth band
! and so on
!
! Compute the proper factor for each band
!
DO idx = 1, nogrp
IF( nolist( idx ) == me_pool ) EXIT
DO idx = 1, dffts%nogrp
IF( dffts%nolist( idx ) == me_pool ) EXIT
END DO
!
! Remember two bands are packed in a single array :
@ -437,16 +436,16 @@ SUBROUTINE sum_band()
!
END DO
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
! reduce the group charge
!
CALL mp_sum( tg_rho, gid = ogrp_comm )
CALL mp_sum( tg_rho, gid = dffts%ogrp_comm )
!
ioff = 0
DO idx = 1, nogrp
IF( me_pool == nolist( idx ) ) EXIT
ioff = ioff + dffts%nr1x * dffts%nr2x * dffts%npp( nolist( idx ) + 1 )
DO idx = 1, dffts%nogrp
IF( me_pool == dffts%nolist( idx ) ) EXIT
ioff = ioff + dffts%nr1x * dffts%nr2x * dffts%npp( dffts%nolist( idx ) + 1 )
END DO
!
! copy the charge back to the processor location
@ -538,11 +537,11 @@ SUBROUTINE sum_band()
!
END DO k_loop
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
DEALLOCATE( tg_psi )
DEALLOCATE( tg_rho )
END IF
use_task_groups = use_tg
dffts%have_task_groups = use_tg
!
RETURN
!
@ -556,8 +555,7 @@ SUBROUTINE sum_band()
! ... k-points version
!
USE becmod, ONLY : bec_type, becp, calbec
USE mp_global, ONLY : nogrp, nolist, ogrp_comm, me_pool, &
use_task_groups
USE mp_global, ONLY : me_pool
USE mp, ONLY : mp_sum
!
IMPLICIT NONE
@ -588,26 +586,26 @@ SUBROUTINE sum_band()
!
IF ( nks > 1 ) REWIND( iunigk )
!
use_tg = use_task_groups
use_task_groups = ( use_task_groups ) .AND. ( nbnd >= nogrp ) &
use_tg = dffts%have_task_groups
dffts%have_task_groups = ( dffts%have_task_groups ) .AND. ( nbnd >= dffts%nogrp ) &
.AND. ( .NOT. noncolin ) .AND. ( .NOT. dft_is_meta() )
!
incr = 1
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
v_siz = dffts%tg_nnr * nogrp
v_siz = dffts%tg_nnr * dffts%nogrp
!
ALLOCATE( tg_psi( v_siz ) )
ALLOCATE( tg_rho( v_siz ) )
!
incr = nogrp
incr = dffts%nogrp
!
END IF
!
k_loop: DO ik = 1, nks
!
IF( use_task_groups ) tg_rho = 0.0_DP
IF( dffts%have_task_groups ) tg_rho = 0.0_DP
IF ( lsda ) current_spin = isk(ik)
npw = ngk (ik)
@ -626,8 +624,8 @@ SUBROUTINE sum_band()
!
DO ibnd = 1, nbnd, incr
!
IF( use_task_groups ) THEN
DO idx = 1, nogrp
IF( dffts%have_task_groups ) THEN
DO idx = 1, dffts%nogrp
IF( idx + ibnd - 1 <= nbnd ) eband = eband + et( idx + ibnd - 1, ik ) * wg( idx + ibnd - 1, ik )
END DO
ELSE
@ -665,7 +663,7 @@ SUBROUTINE sum_band()
!
ELSE
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
!$omp parallel default(shared), private(j,ioff,idx)
!$omp do
@ -676,9 +674,9 @@ SUBROUTINE sum_band()
!
ioff = 0
!
DO idx = 1, nogrp
DO idx = 1, dffts%nogrp
!
! ... nogrp ffts at the same time
! ... dffts%nogrp ffts at the same time
!
IF( idx + ibnd - 1 <= nbnd ) THEN
!$omp do
@ -696,13 +694,13 @@ SUBROUTINE sum_band()
CALL invfft ('Wave', tg_psi, dffts)
!
! Now the first proc of the group holds the first band
! of the nogrp bands that we are processing at the same time,
! of the dffts%nogrp bands that we are processing at the same time,
! the second proc. holds the second and so on
!
! Compute the proper factor for each band
!
DO idx = 1, nogrp
IF( nolist( idx ) == me_pool ) EXIT
DO idx = 1, dffts%nogrp
IF( dffts%nolist( idx ) == me_pool ) EXIT
END DO
!
! Remember
@ -752,16 +750,16 @@ SUBROUTINE sum_band()
!
END DO
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
! reduce the group charge
!
CALL mp_sum( tg_rho, gid = ogrp_comm )
CALL mp_sum( tg_rho, gid = dffts%ogrp_comm )
!
ioff = 0
DO idx = 1, nogrp
IF( me_pool == nolist( idx ) ) EXIT
ioff = ioff + dffts%nr1x * dffts%nr2x * dffts%npp( nolist( idx ) + 1 )
DO idx = 1, dffts%nogrp
IF( me_pool == dffts%nolist( idx ) ) EXIT
ioff = ioff + dffts%nr1x * dffts%nr2x * dffts%npp( dffts%nolist( idx ) + 1 )
END DO
!
! copy the charge back to the proper processor location
@ -902,11 +900,11 @@ SUBROUTINE sum_band()
!
END DO k_loop
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
DEALLOCATE( tg_psi )
DEALLOCATE( tg_rho )
END IF
use_task_groups = use_tg
dffts%have_task_groups = use_tg
IF (noncolin.and.okvan) THEN
DO np = 1, ntyp

View File

@ -16,7 +16,7 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
USE kinds, ONLY : DP
USE gvecs, ONLY : nls, nlsm
USE wvfct, ONLY : igk
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, use_task_groups
USE mp_global, ONLY : me_pool
USE fft_base, ONLY : dffts, tg_gather
USE fft_interfaces,ONLY : fwfft, invfft
USE wavefunctions_module, ONLY: psic
@ -43,19 +43,19 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
! The following is dirty trick to prevent usage of task groups if
! the number of bands is smaller than the number of task groups
!
use_tg = use_task_groups
use_task_groups = use_task_groups .and. ( m >= nogrp )
use_tg = dffts%have_task_groups
dffts%have_task_groups = dffts%have_task_groups .and. ( m >= dffts%nogrp )
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
v_siz = dffts%tg_nnr * nogrp
v_siz = dffts%tg_nnr * dffts%nogrp
!
ALLOCATE( tg_v ( v_siz ) )
ALLOCATE( tg_psic( v_siz ) )
!
CALL tg_gather( dffts, v, tg_v )
!
incr = 2 * nogrp
incr = 2 * dffts%nogrp
!
ENDIF
!
@ -63,12 +63,12 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
!
DO ibnd = 1, m, incr
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
tg_psic = (0.d0, 0.d0)
ioff = 0
!
DO idx = 1, 2*nogrp, 2
DO idx = 1, 2*dffts%nogrp, 2
IF( idx + ibnd - 1 < m ) THEN
DO j = 1, n
tg_psic(nls (igk(j))+ioff) = psi(j,idx+ibnd-1) + &
@ -109,7 +109,7 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
! product with the potential v on the smooth grid
! back to reciprocal space
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
CALL invfft ('Wave', tg_psic, dffts)
!
@ -133,11 +133,11 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
!
! addition to the total product
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
ioff = 0
!
DO idx = 1, 2*nogrp, 2
DO idx = 1, 2*dffts%nogrp, 2
!
IF( idx + ibnd - 1 < m ) THEN
DO j = 1, n
@ -181,13 +181,13 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
!
ENDDO
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
DEALLOCATE( tg_psic )
DEALLOCATE( tg_v )
!
ENDIF
use_task_groups = use_tg
dffts%have_task_groups = use_tg
!
RETURN
END SUBROUTINE vloc_psi_gamma
@ -202,7 +202,7 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
USE kinds, ONLY : DP
USE gvecs, ONLY : nls, nlsm
USE wvfct, ONLY : igk
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, use_task_groups
USE mp_global, ONLY : me_pool
USE fft_base, ONLY : dffts, tg_gather
USE fft_interfaces,ONLY : fwfft, invfft
USE wavefunctions_module, ONLY: psic
@ -226,20 +226,20 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
! The following is dirty trick to prevent usage of task groups if
! the number of bands is smaller than the number of task groups
!
use_tg = use_task_groups
use_task_groups = use_task_groups .and. ( m >= nogrp )
use_tg = dffts%have_task_groups
dffts%have_task_groups = dffts%have_task_groups .and. ( m >= dffts%nogrp )
!
incr = 1
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
v_siz = dffts%tg_nnr * nogrp
v_siz = dffts%tg_nnr * dffts%nogrp
!
ALLOCATE( tg_v ( v_siz ) )
ALLOCATE( tg_psic( v_siz ) )
!
CALL tg_gather( dffts, v, tg_v )
incr = nogrp
incr = dffts%nogrp
!
ENDIF
!
@ -247,12 +247,12 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
!
DO ibnd = 1, m, incr
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
tg_psic = (0.d0, 0.d0)
ioff = 0
!
DO idx = 1, nogrp
DO idx = 1, dffts%nogrp
IF( idx + ibnd - 1 <= m ) THEN
!$omp parallel do
@ -281,7 +281,7 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
! product with the potential v on the smooth grid
! back to reciprocal space
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
!$omp parallel do
DO j = 1, dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_pool + 1 )
@ -305,11 +305,11 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
!
! addition to the total product
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
ioff = 0
!
DO idx = 1, nogrp
DO idx = 1, dffts%nogrp
!
IF( idx + ibnd - 1 <= m ) THEN
!$omp parallel do
@ -333,13 +333,13 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
!
ENDDO
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
DEALLOCATE( tg_psic )
DEALLOCATE( tg_v )
!
ENDIF
use_task_groups = use_tg
dffts%have_task_groups = use_tg
!
RETURN
END SUBROUTINE vloc_psi_k
@ -354,7 +354,7 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
USE kinds, ONLY : DP
USE gvecs, ONLY : nls, nlsm
USE wvfct, ONLY : igk
USE mp_global, ONLY : nogrp, ogrp_comm, me_pool, nolist, use_task_groups
USE mp_global, ONLY : me_pool
USE fft_base, ONLY : dffts, dfftp, tg_gather
USE fft_interfaces,ONLY : fwfft, invfft
USE noncollin_module, ONLY: npol
@ -382,32 +382,32 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
! The following is dirty trick to prevent usage of task groups if
! the number of bands is smaller than the number of task groups
!
use_tg = use_task_groups
use_task_groups = use_task_groups .and. ( m >= nogrp )
use_tg = dffts%have_task_groups
dffts%have_task_groups = dffts%have_task_groups .and. ( m >= dffts%nogrp )
!
IF( use_task_groups ) THEN
v_siz = dffts%tg_nnr * nogrp
IF( dffts%have_task_groups ) THEN
v_siz = dffts%tg_nnr * dffts%nogrp
ALLOCATE( tg_v( v_siz, 4 ) )
CALL tg_gather( dffts, v(:,1), tg_v(:,1) )
CALL tg_gather( dffts, v(:,2), tg_v(:,2) )
CALL tg_gather( dffts, v(:,3), tg_v(:,3) )
CALL tg_gather( dffts, v(:,4), tg_v(:,4) )
ALLOCATE( tg_psic( v_siz, npol ) )
incr = nogrp
incr = dffts%nogrp
ENDIF
!
! the local potential V_Loc psi. First the psi in real space
!
DO ibnd = 1, m, incr
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
DO ipol = 1, npol
!
tg_psic(:,ipol) = ( 0.D0, 0.D0 )
ioff = 0
!
DO idx = 1, nogrp
DO idx = 1, dffts%nogrp
!
IF( idx + ibnd - 1 <= m ) THEN
DO j = 1, n
@ -436,7 +436,7 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
!
! product with the potential v = (vltot+vr) on the smooth grid
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
DO j=1, dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_pool + 1 )
sup = tg_psic(j,1) * (tg_v(j,1)+tg_v(j,4)) + &
tg_psic(j,2) * (tg_v(j,2)-(0.d0,1.d0)*tg_v(j,3))
@ -458,7 +458,7 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
!
! back to reciprocal space
!
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
DO ipol = 1, npol
@ -466,7 +466,7 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
!
ioff = 0
!
DO idx = 1, nogrp
DO idx = 1, dffts%nogrp
!
IF( idx + ibnd - 1 <= m ) THEN
DO j = 1, n
@ -499,13 +499,13 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
ENDDO
IF( use_task_groups ) THEN
IF( dffts%have_task_groups ) THEN
!
DEALLOCATE( tg_v )
DEALLOCATE( tg_psic )
!
ENDIF
use_task_groups = use_tg
dffts%have_task_groups = use_tg
!
RETURN
END SUBROUTINE vloc_psi_nc