mirror of https://gitlab.com/QEF/q-e.git
- task group number added in ftt_type_set (called by data_structure) so that when task_groups are active the plane distribution is more even and leads to at most to a single plane different between different task_groups. minor clean_up of indices in scatter_mod
- no point in not perfroming task_group parallelization when m < ntask_group. - some more timing report if verbosity >0 git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@12963 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
parent
441f121b0c
commit
f7cd893d9f
|
@ -190,12 +190,13 @@ CONTAINS
|
|||
|
||||
!=----------------------------------------------------------------------------=!
|
||||
|
||||
SUBROUTINE fft_type_set( desc, tk, lpara, nst, ub, lb, idx, in1, in2, ncp, ncpw, ngp, ngpw, st, stw )
|
||||
SUBROUTINE fft_type_set( desc, tk, lpara, ntg, nst, ub, lb, idx, in1, in2, ncp, ncpw, ngp, ngpw, st, stw )
|
||||
|
||||
TYPE (fft_type_descriptor) :: desc
|
||||
|
||||
LOGICAL, INTENT(in) :: tk ! gamma/not-gamma logical
|
||||
LOGICAL, INTENT(in) :: lpara ! set array for parallel or serial FFT drivers
|
||||
INTEGER, INTENT(in) :: ntg ! number of task groups (optimal spacing for residual-plane distribution)
|
||||
INTEGER, INTENT(in) :: nst ! total number of stiks
|
||||
INTEGER, INTENT(in) :: ub(3), lb(3) ! upper and lower bound of real space indices
|
||||
INTEGER, INTENT(in) :: idx(:) ! sorting index of the sticks
|
||||
|
@ -209,7 +210,7 @@ CONTAINS
|
|||
INTEGER, INTENT(in) :: stw( lb(1) : ub(1), lb(2) : ub(2) ) ! stick owner of a given wave stick
|
||||
|
||||
INTEGER :: npp( desc%nproc ), n3( desc%nproc ), nsp( desc%nproc )
|
||||
INTEGER :: np, nq, i, is, iss, i1, i2, m1, m2, n1, n2, ip
|
||||
INTEGER :: np, nq, i, is, iss, itg, i1, i2, m1, m2, n1, n2, ip
|
||||
INTEGER :: ncpx, nppx
|
||||
INTEGER :: nr1, nr2, nr3 ! size of real space grid
|
||||
INTEGER :: nr1x, nr2x, nr3x ! padded size of real space grid
|
||||
|
@ -246,15 +247,20 @@ CONTAINS
|
|||
! in other word do a slab partition along the z axis
|
||||
|
||||
npp = 0
|
||||
IF ( desc%nproc == 1 ) THEN ! sigle processor: npp(1)=nr3
|
||||
IF ( desc%nproc == 1 ) THEN ! sigle processor: npp(1)=nr3
|
||||
npp(1) = nr3
|
||||
ELSE
|
||||
np = nr3 / desc%nproc
|
||||
nq = nr3 - np * desc%nproc
|
||||
DO i = 1, desc%nproc
|
||||
npp(i) = np
|
||||
IF ( i <= nq ) npp(i) = np + 1
|
||||
ENDDO
|
||||
npp(1:desc%nproc) = np ! assign a base value to all processors
|
||||
reminder_loop : & ! assign an extra plane to processors so that they are spaced by ntg
|
||||
DO itg = 1, ntg
|
||||
DO i = itg, desc%nproc, ntg
|
||||
IF (nq==0) EXIT reminder_loop
|
||||
nq = nq - 1
|
||||
npp(i) = np + 1
|
||||
ENDDO
|
||||
ENDDO reminder_loop
|
||||
END IF
|
||||
|
||||
!-- npp(1:nproc) is the number of planes per processor
|
||||
|
@ -474,7 +480,7 @@ CONTAINS
|
|||
END SUBROUTINE fft_type_set
|
||||
!=----------------------------------------------------------------------------=!
|
||||
|
||||
SUBROUTINE fft_type_init( dfft, smap, pers, lgamma, lpara, comm, at, bg, gcut_in, dual_in )
|
||||
SUBROUTINE fft_type_init( dfft, smap, pers, lgamma, lpara, comm, at, bg, gcut_in, dual_in, ntask_groups )
|
||||
|
||||
USE stick_base
|
||||
|
||||
|
@ -488,6 +494,7 @@ CONTAINS
|
|||
REAL(DP), INTENT(IN) :: bg(3,3)
|
||||
REAL(DP), INTENT(IN) :: at(3,3)
|
||||
REAL(DP), OPTIONAL, INTENT(IN) :: dual_in
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: ntask_groups
|
||||
!
|
||||
! Potential or dual
|
||||
!
|
||||
|
@ -514,11 +521,15 @@ CONTAINS
|
|||
! ... sticks length for processor ip = number of G-vectors owned by the processor ip
|
||||
INTEGER :: nstw
|
||||
! ... nstw local number of sticks (wave functions)
|
||||
INTEGER :: ntg
|
||||
! ... ntg number of task groups (assigned from input if any)
|
||||
|
||||
|
||||
REAL(DP) :: gcut, gkcut, dual
|
||||
INTEGER :: ngm, ngw
|
||||
|
||||
ntg = 1
|
||||
IF( PRESENT( ntask_groups ) ) ntg = ntask_groups
|
||||
dual = fft_dual
|
||||
IF( PRESENT( dual_in ) ) dual = dual_in
|
||||
|
||||
|
@ -555,7 +566,7 @@ CONTAINS
|
|||
CALL get_sticks( smap, gkcut, nstpw, sstpw, stw, nstw, ngw )
|
||||
CALL get_sticks( smap, gcut, nstp, sstp, st, nst, ngm )
|
||||
|
||||
CALL fft_type_set( dfft, .not.smap%lgamma, lpara, nst, smap%ub, smap%lb, smap%idx, &
|
||||
CALL fft_type_set( dfft, .not.smap%lgamma, lpara, ntg, nst, smap%ub, smap%lb, smap%idx, &
|
||||
smap%ist(:,1), smap%ist(:,2), nstp, nstpw, sstp, sstpw, st, stw )
|
||||
|
||||
DEALLOCATE( st )
|
||||
|
|
|
@ -94,7 +94,7 @@ SUBROUTINE fft_scatter ( dfft, f_in, nr3x, nxx_, f_aux, ncp_, npp_, isgn, dtgs )
|
|||
|
||||
#if defined(__MPI)
|
||||
|
||||
INTEGER :: dest, from, k, offset, proc, ierr, me, nprocp, gproc, gcomm, i, kdest, kfrom
|
||||
INTEGER :: k, offset, proc, ierr, me, nprocp, gproc, gcomm, i, kdest, kfrom
|
||||
INTEGER :: me_p, nppx, mc, j, npp, nnp, ii, it, ip, ioff, sendsiz, ncpx, ipp, nblk, nsiz
|
||||
!
|
||||
LOGICAL :: use_tg_
|
||||
|
@ -144,7 +144,7 @@ SUBROUTINE fft_scatter ( dfft, f_in, nr3x, nxx_, f_aux, ncp_, npp_, isgn, dtgs )
|
|||
!
|
||||
! step one: store contiguously the slices
|
||||
!
|
||||
offset = 1
|
||||
offset = 0
|
||||
|
||||
DO proc = 1, nprocp
|
||||
IF( use_tg_ ) THEN
|
||||
|
@ -153,11 +153,8 @@ SUBROUTINE fft_scatter ( dfft, f_in, nr3x, nxx_, f_aux, ncp_, npp_, isgn, dtgs )
|
|||
gproc = proc
|
||||
ENDIF
|
||||
!
|
||||
from = offset
|
||||
dest = 1 + ( proc - 1 ) * sendsiz
|
||||
!
|
||||
kdest = dest - 1
|
||||
kfrom = from - 1
|
||||
kdest = ( proc - 1 ) * sendsiz
|
||||
kfrom = offset
|
||||
!
|
||||
DO k = 1, ncp_ (me)
|
||||
DO i = 1, npp_ ( gproc )
|
||||
|
@ -340,19 +337,17 @@ SUBROUTINE fft_scatter ( dfft, f_in, nr3x, nxx_, f_aux, ncp_, npp_, isgn, dtgs )
|
|||
!
|
||||
!! f_in = 0.0_DP
|
||||
!
|
||||
offset = 1
|
||||
offset = 0
|
||||
|
||||
DO proc = 1, nprocp
|
||||
from = offset
|
||||
IF( use_tg_ ) THEN
|
||||
gproc = dtgs%nplist(proc)+1
|
||||
ELSE
|
||||
gproc = proc
|
||||
ENDIF
|
||||
dest = 1 + ( proc - 1 ) * sendsiz
|
||||
!
|
||||
kdest = dest - 1
|
||||
kfrom = from - 1
|
||||
kdest = ( proc - 1 ) * sendsiz
|
||||
kfrom = offset
|
||||
!
|
||||
DO k = 1, ncp_ (me)
|
||||
DO i = 1, npp_ ( gproc )
|
||||
|
@ -424,7 +419,7 @@ SUBROUTINE fft_scatter ( dfft, f_in, nr3x, nxx_, f_aux, ncp_, npp_, isgn, dtgs )
|
|||
|
||||
#if defined(__MPI)
|
||||
|
||||
INTEGER :: dest, from, k, offset, proc, ierr, me, nprocp, gproc, gcomm, i, kdest, kfrom
|
||||
INTEGER :: k, offset, proc, ierr, me, nprocp, gproc, gcomm, i, kdest, kfrom
|
||||
INTEGER :: me_p, nppx, mc, j, npp, nnp, ii, it, ip, ioff, sendsiz, ncpx, ipp, nblk, nsiz, ijp
|
||||
INTEGER :: sh(dfft%nproc), rh(dfft%nproc)
|
||||
INTEGER :: istat( MPI_STATUS_SIZE )
|
||||
|
@ -473,13 +468,13 @@ SUBROUTINE fft_scatter ( dfft, f_in, nr3x, nxx_, f_aux, ncp_, npp_, isgn, dtgs )
|
|||
!
|
||||
! step one: store contiguously the slices
|
||||
!
|
||||
offset = 1
|
||||
offset = 0
|
||||
|
||||
IF( use_tg_ ) THEN
|
||||
DO proc = 1, nprocp
|
||||
gproc = dtgs%nplist(proc)+1
|
||||
kdest = ( proc - 1 ) * sendsiz
|
||||
kfrom = offset - 1
|
||||
kfrom = offset
|
||||
DO k = 1, ncp_ (me)
|
||||
DO i = 1, npp_ ( gproc )
|
||||
f_aux ( kdest + i ) = f_in ( kfrom + i )
|
||||
|
@ -495,7 +490,7 @@ SUBROUTINE fft_scatter ( dfft, f_in, nr3x, nxx_, f_aux, ncp_, npp_, isgn, dtgs )
|
|||
ELSE
|
||||
DO proc = 1, nprocp
|
||||
kdest = ( proc - 1 ) * sendsiz
|
||||
kfrom = offset - 1
|
||||
kfrom = offset
|
||||
DO k = 1, ncp_ (me)
|
||||
DO i = 1, npp_ ( proc )
|
||||
f_aux ( kdest + i ) = f_in ( kfrom + i )
|
||||
|
@ -703,13 +698,13 @@ SUBROUTINE fft_scatter ( dfft, f_in, nr3x, nxx_, f_aux, ncp_, npp_, isgn, dtgs )
|
|||
call mpi_waitall( nblk, rh, MPI_STATUSES_IGNORE, ierr )
|
||||
call mpi_waitall( nblk, sh, MPI_STATUSES_IGNORE, ierr )
|
||||
!
|
||||
offset = 1
|
||||
offset = 0
|
||||
|
||||
IF( use_tg_ ) THEN
|
||||
DO proc = 1, nprocp
|
||||
gproc = dtgs%nplist(proc) + 1
|
||||
kdest = ( proc - 1 ) * sendsiz
|
||||
kfrom = offset - 1
|
||||
kfrom = offset
|
||||
DO k = 1, ncp_ (me)
|
||||
DO i = 1, npp_ ( gproc )
|
||||
f_in ( kfrom + i ) = f_aux ( kdest + i )
|
||||
|
@ -722,7 +717,7 @@ SUBROUTINE fft_scatter ( dfft, f_in, nr3x, nxx_, f_aux, ncp_, npp_, isgn, dtgs )
|
|||
ELSE
|
||||
DO proc = 1, nprocp
|
||||
kdest = ( proc - 1 ) * sendsiz
|
||||
kfrom = offset - 1
|
||||
kfrom = offset
|
||||
DO k = 1, ncp_ (me)
|
||||
DO i = 1, npp_ ( proc )
|
||||
f_in ( kfrom + i ) = f_aux ( kdest + i )
|
||||
|
|
|
@ -146,6 +146,13 @@ SUBROUTINE task_groups_init( dffts, dtgs, nogrp )
|
|||
dtgs%tg_npp(1) = num_planes
|
||||
#endif
|
||||
|
||||
#ifdef __TASK_PRINTOUT
|
||||
write (6,*) 'TASK GROUP stick AND plane DISTRIBUTION '
|
||||
do i=1,dtgs%nproc
|
||||
write (6,*) i, dtgs%tg_nsw(i), dtgs%tg_npp(i)
|
||||
end do
|
||||
#endif
|
||||
|
||||
ALLOCATE( dtgs%tg_snd( dtgs%nogrp ) )
|
||||
ALLOCATE( dtgs%tg_rcv( dtgs%nogrp ) )
|
||||
ALLOCATE( dtgs%tg_psdsp( dtgs%nogrp ) )
|
||||
|
|
|
@ -63,7 +63,7 @@ SUBROUTINE data_structure( gamma_only )
|
|||
!
|
||||
! ... set up fft descriptors, including parallel stuff: sticks, planes, etc.
|
||||
!
|
||||
CALL fft_type_init( dffts, smap, "wave", gamma_only, lpara, intra_bgrp_comm, at, bg, gkcut, gcutms/gkcut )
|
||||
CALL fft_type_init( dffts, smap, "wave", gamma_only, lpara, intra_bgrp_comm, at, bg, gkcut, gcutms/gkcut, ntask_groups )
|
||||
CALL fft_type_init( dfftp, smap, "rho", gamma_only, lpara, intra_bgrp_comm, at, bg, gcutm )
|
||||
CALL task_groups_init( dffts, dtgs, ntask_groups )
|
||||
CALL fft_base_info( ionode, stdout )
|
||||
|
|
|
@ -126,7 +126,7 @@ SUBROUTINE h_psi_( lda, n, m, psi, hpsi )
|
|||
! ... real-space algorithm
|
||||
! ... fixme: real_space without beta functions does not make sense
|
||||
!
|
||||
IF ( dtgs%have_task_groups .AND. ( m >= dtgs%nogrp )) then
|
||||
IF ( dtgs%have_task_groups ) then
|
||||
incr = 2 * dtgs%nogrp
|
||||
ELSE
|
||||
incr = 2
|
||||
|
@ -163,7 +163,7 @@ SUBROUTINE h_psi_( lda, n, m, psi, hpsi )
|
|||
! ... real-space algorithm
|
||||
! ... fixme: real_space without beta functions does not make sense
|
||||
!
|
||||
IF ( dtgs%have_task_groups .AND. ( m >= dtgs%nogrp )) then
|
||||
IF ( dtgs%have_task_groups ) then
|
||||
incr = dtgs%nogrp
|
||||
ELSE
|
||||
incr = 1
|
||||
|
|
|
@ -34,6 +34,10 @@ SUBROUTINE print_clock_pw()
|
|||
!
|
||||
WRITE( stdout, '(/5x,"Called by init_run:")' )
|
||||
CALL print_clock( 'wfcinit' )
|
||||
IF ( iverbosity > 0 ) THEN
|
||||
CALL print_clock( 'wfcinit:atomic' )
|
||||
CALL print_clock( 'wfcinit:wfcrot' )
|
||||
END IF
|
||||
CALL print_clock( 'potinit' )
|
||||
CALL print_clock( 'realus' )
|
||||
IF ( iverbosity > 0 ) THEN
|
||||
|
|
|
@ -1369,7 +1369,7 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 'calbec_rs' )
|
||||
!
|
||||
IF( ( dtgs%have_task_groups ) .and. ( last >= dtgs%nogrp ) ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
|
||||
CALL errore( 'calbec_rs_gamma', 'task_groups not implemented', 1 )
|
||||
|
||||
|
@ -1478,7 +1478,7 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 'calbec_rs' )
|
||||
!
|
||||
IF( ( dtgs%have_task_groups ) .and. ( last >= dtgs%nogrp ) ) CALL errore( 'calbec_rs_k', 'task_groups not implemented', 1 )
|
||||
IF( dtgs%have_task_groups ) CALL errore( 'calbec_rs_k', 'task_groups not implemented', 1 )
|
||||
|
||||
call set_xkphase(current_k)
|
||||
|
||||
|
@ -1556,7 +1556,7 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 's_psir' )
|
||||
|
||||
IF( ( dtgs%have_task_groups ) .and. ( last >= dtgs%nogrp ) ) CALL errore( 's_psir_gamma', 'task_groups not implemented', 1 )
|
||||
IF( dtgs%have_task_groups ) CALL errore( 's_psir_gamma', 'task_groups not implemented', 1 )
|
||||
|
||||
!
|
||||
fac = sqrt(omega)
|
||||
|
@ -1641,7 +1641,7 @@ MODULE realus
|
|||
|
||||
CALL start_clock( 's_psir' )
|
||||
|
||||
IF( ( dtgs%have_task_groups ) .and. ( last >= dtgs%nogrp ) ) CALL errore( 's_psir_k', 'task_groups not implemented', 1 )
|
||||
IF( dtgs%have_task_groups ) CALL errore( 's_psir_k', 'task_groups not implemented', 1 )
|
||||
|
||||
call set_xkphase(current_k)
|
||||
|
||||
|
@ -1729,7 +1729,7 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 'add_vuspsir' )
|
||||
|
||||
IF( ( dtgs%have_task_groups ) .and. ( last >= dtgs%nogrp ) ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
|
||||
CALL errore( 'add_vuspsir_gamma', 'task_groups not implemented', 1 )
|
||||
|
||||
|
@ -1833,7 +1833,7 @@ MODULE realus
|
|||
!
|
||||
CALL start_clock( 'add_vuspsir' )
|
||||
|
||||
IF( ( dtgs%have_task_groups ) .and. ( last >= dtgs%nogrp ) ) CALL errore( 'add_vuspsir_k', 'task_groups not implemented', 1 )
|
||||
IF( dtgs%have_task_groups ) CALL errore( 'add_vuspsir_k', 'task_groups not implemented', 1 )
|
||||
|
||||
call set_xkphase(current_k)
|
||||
!
|
||||
|
@ -1930,10 +1930,7 @@ MODULE realus
|
|||
!print *, "->Real space"
|
||||
CALL start_clock( 'invfft_orbital' )
|
||||
!
|
||||
! The following is dirty trick to prevent usage of task groups if
|
||||
! the number of bands is smaller than the number of task groups
|
||||
!
|
||||
IF( dtgs%have_task_groups .and. ( last >= dtgs%nogrp ) ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
!
|
||||
|
||||
tg_psic = (0.d0, 0.d0)
|
||||
|
@ -2047,7 +2044,7 @@ MODULE realus
|
|||
!print *, "->fourier space"
|
||||
CALL start_clock( 'fwfft_orbital' )
|
||||
!New task_groups versions
|
||||
IF( ( dtgs%have_task_groups ) .and. ( last >= dtgs%nogrp ) ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
!
|
||||
CALL fwfft ('Wave', tg_psic, dffts, dtgs )
|
||||
!
|
||||
|
@ -2151,7 +2148,7 @@ MODULE realus
|
|||
|
||||
ik_ = current_k ; if (present(ik)) ik_ = ik
|
||||
|
||||
IF( ( dtgs%have_task_groups ) .and. ( last >= dtgs%nogrp ) ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
!
|
||||
tg_psic = ( 0.D0, 0.D0 )
|
||||
ioff = 0
|
||||
|
@ -2236,7 +2233,7 @@ MODULE realus
|
|||
|
||||
ik_ = current_k ; if (present(ik)) ik_ = ik
|
||||
|
||||
IF( ( dtgs%have_task_groups ) .and. ( last >= dtgs%nogrp ) ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
!
|
||||
CALL fwfft ('Wave', tg_psic, dffts, dtgs)
|
||||
!
|
||||
|
@ -2301,7 +2298,7 @@ MODULE realus
|
|||
REAL(DP), ALLOCATABLE :: tg_v(:)
|
||||
CALL start_clock( 'v_loc_psir' )
|
||||
|
||||
IF( dtgs%have_task_groups .and. last >= dtgs%nogrp ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
IF (ibnd == 1 ) THEN
|
||||
CALL tg_gather( dffts, dtgs, vrs(:,current_spin), tg_v )
|
||||
!if ibnd==1 this is a new calculation, and tg_v should be distributed.
|
||||
|
@ -2350,7 +2347,7 @@ MODULE realus
|
|||
REAL(DP), ALLOCATABLE :: tg_v(:)
|
||||
CALL start_clock( 'v_loc_psir' )
|
||||
|
||||
IF( dtgs%have_task_groups .and. last >= dtgs%nogrp ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
IF (ibnd == 1 ) THEN
|
||||
CALL tg_gather( dffts, dtgs, vrs(:,current_spin), tg_v )
|
||||
!if ibnd==1 this is a new calculation, and tg_v should be distributed.
|
||||
|
|
|
@ -254,7 +254,7 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
incr = 2
|
||||
!
|
||||
IF( dtgs%have_task_groups .AND. ( this_bgrp_nbnd >= dtgs%nogrp ) ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
!
|
||||
IF( dft_is_meta() .OR. lxdm) &
|
||||
CALL errore( ' sum_band ', ' task groups with meta dft, not yet implemented ', 1 )
|
||||
|
@ -270,7 +270,7 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
k_loop: DO ik = 1, nks
|
||||
!
|
||||
IF( dtgs%have_task_groups .AND. ( this_bgrp_nbnd >= dtgs%nogrp ) ) tg_rho = 0.0_DP
|
||||
IF( dtgs%have_task_groups ) tg_rho = 0.0_DP
|
||||
IF ( lsda ) current_spin = isk(ik)
|
||||
!
|
||||
npw = ngk(ik)
|
||||
|
@ -295,7 +295,7 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
DO ibnd = ibnd_start, ibnd_end, incr
|
||||
!
|
||||
IF( dtgs%have_task_groups .AND. ( this_bgrp_nbnd >= dtgs%nogrp ) ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
!
|
||||
tg_psi(:) = ( 0.D0, 0.D0 )
|
||||
ioff = 0
|
||||
|
@ -436,7 +436,7 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
END DO
|
||||
!
|
||||
IF( dtgs%have_task_groups .AND. ( this_bgrp_nbnd >= dtgs%nogrp ) ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
!
|
||||
! reduce the group charge
|
||||
!
|
||||
|
@ -466,7 +466,7 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
IF( okvan .AND. becp%comm /= mp_get_comm_null() ) CALL mp_sum( becsum, becp%comm )
|
||||
!
|
||||
IF( dtgs%have_task_groups .AND. ( this_bgrp_nbnd >= dtgs%nogrp ) ) THEN
|
||||
IF( dtgs%have_task_groups ) THEN
|
||||
DEALLOCATE( tg_psi )
|
||||
DEALLOCATE( tg_rho )
|
||||
END IF
|
||||
|
@ -502,7 +502,6 @@ SUBROUTINE sum_band()
|
|||
! ... of the wavefunctions to the charge
|
||||
!
|
||||
use_tg = ( dtgs%have_task_groups ) .AND. &
|
||||
( this_bgrp_nbnd >= dtgs%nogrp ) .AND. &
|
||||
( .NOT. (dft_is_meta() .OR. lxdm) )
|
||||
!
|
||||
incr = 1
|
||||
|
|
|
@ -39,10 +39,7 @@ SUBROUTINE vloc_psi_gamma(lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
incr = 2
|
||||
!
|
||||
! The following is dirty trick to prevent usage of task groups if
|
||||
! the number of bands is smaller than the number of task groups
|
||||
!
|
||||
use_tg = dtgs%have_task_groups .and. ( m >= dtgs%nogrp )
|
||||
use_tg = dtgs%have_task_groups
|
||||
!
|
||||
IF( use_tg ) THEN
|
||||
!
|
||||
|
@ -221,10 +218,7 @@ SUBROUTINE vloc_psi_k(lda, n, m, psi, v, hpsi)
|
|||
COMPLEX(DP), ALLOCATABLE :: tg_psic(:)
|
||||
INTEGER :: v_siz, idx, ioff
|
||||
!
|
||||
! The following is dirty trick to prevent usage of task groups if
|
||||
! the number of bands is smaller than the number of task groups
|
||||
!
|
||||
use_tg = dtgs%have_task_groups .and. ( m >= dtgs%nogrp )
|
||||
use_tg = dtgs%have_task_groups
|
||||
!
|
||||
incr = 1
|
||||
!
|
||||
|
@ -380,10 +374,7 @@ SUBROUTINE vloc_psi_nc (lda, n, m, psi, v, hpsi)
|
|||
!
|
||||
incr = 1
|
||||
!
|
||||
! The following is dirty trick to prevent usage of task groups if
|
||||
! the number of bands is smaller than the number of task groups
|
||||
!
|
||||
use_tg = dtgs%have_task_groups .and. ( m >= dtgs%nogrp )
|
||||
use_tg = dtgs%have_task_groups
|
||||
!
|
||||
IF( use_tg ) THEN
|
||||
v_siz = dtgs%tg_nnr * dtgs%nogrp
|
||||
|
|
|
@ -246,7 +246,9 @@ SUBROUTINE init_wfc ( ik )
|
|||
!
|
||||
IF ( starting_wfc(1:6) == 'atomic' ) THEN
|
||||
!
|
||||
CALL start_clock( 'wfcinit:atomic' )
|
||||
CALL atomic_wfc( ik, wfcatom )
|
||||
CALL stop_clock( 'wfcinit:atomic' )
|
||||
!
|
||||
IF ( starting_wfc == 'atomic+random' .AND. &
|
||||
n_starting_wfc == n_starting_atomic_wfc ) THEN
|
||||
|
@ -325,8 +327,10 @@ SUBROUTINE init_wfc ( ik )
|
|||
!
|
||||
! ... subspace diagonalization (calls Hpsi)
|
||||
!
|
||||
CALL start_clock( 'wfcinit:wfcrot' )
|
||||
CALL rotate_wfc ( npwx, ngk(ik), n_starting_wfc, gstart, &
|
||||
nbnd, wfcatom, npol, okvan, evc, etatom )
|
||||
CALL stop_clock( 'wfcinit:wfcrot' )
|
||||
!
|
||||
lelfield = lelfield_save
|
||||
!
|
||||
|
|
Loading…
Reference in New Issue