Fixed a small mistype in test.f90 and introduced non-blocking alltoall in unpack_group_sticks (used only in test.f90 and harmless elsewhere)

git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13164 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
faffinito 2016-11-18 15:35:23 +00:00
parent ae92da8180
commit 8f53ee6dd1
2 changed files with 54 additions and 9 deletions

View File

@ -532,6 +532,46 @@ END SUBROUTINE bw_tg_cft3_xy
END SUBROUTINE unpack_group_sticks
SUBROUTINE unpack_group_sticks_i( yf, f, dtgs, req )
USE task_groups, ONLY : task_groups_descriptor
IMPLICIT NONE
#if defined(__MPI)
INCLUDE 'mpif.h'
#endif
COMPLEX(DP), INTENT(out) :: f( : ) ! array containing all bands, and gvecs distributed across processors
COMPLEX(DP), INTENT(in) :: yf( : ) ! array containing bands collected into task groups
TYPE (task_groups_descriptor), INTENT(in) :: dtgs
!
! Bring pencils back to their original distribution
!
INTEGER :: ierr, req
!
IF( dtgs%tg_usdsp(dtgs%nogrp) + dtgs%tg_snd(dtgs%nogrp) > size( f ) ) THEN
CALL fftx_error__( 'unpack_group_sticks', ' inconsistent size ', 3 )
ENDIF
IF( dtgs%tg_rdsp(dtgs%nogrp) + dtgs%tg_rcv(dtgs%nogrp) > size( yf ) ) THEN
CALL fftx_error__( 'unpack_group_sticks', ' inconsistent size ', 4 )
ENDIF
CALL start_clock( 'ALLTOALL' )
#if defined(__MPI)
CALL MPI_IAlltoallv( yf(1), &
dtgs%tg_rcv, dtgs%tg_rdsp, MPI_DOUBLE_COMPLEX, f(1), &
dtgs%tg_snd, dtgs%tg_usdsp, MPI_DOUBLE_COMPLEX, dtgs%ogrp_comm, req, IERR)
IF( ierr /= 0 ) THEN
CALL fftx_error__( 'unpack_group_sticks', ' alltoall error 2 ', abs(ierr) )
ENDIF
#endif
CALL stop_clock( 'ALLTOALL' )
RETURN
END SUBROUTINE unpack_group_sticks_i
SUBROUTINE tg_gather( dffts, dtgs, v, tg_v )
!
USE fft_types, ONLY : fft_type_descriptor

View File

@ -400,12 +400,18 @@ program test
CALL bw_tg_cft3_z( psis( :, ipsi ), dffts, aux, dtgs )
time(9) = MPI_WTIME()
!
CALL unpack_group_sticks( psis( :, ipsi ), aux, dtgs )
!
IF(ireq == 2)THEN
CALL unpack_group_sticks_i( psis( :, ipsi ), aux, dtgs , req_u(ireq) )
ELSE
CALL MPI_WAIT(req_u(ireq-1), MPI_STATUS_IGNORE, ierr )
ipsi = MOD( ireq + 1, 2 ) + 1 ! ireq = 2, ipsi = 2; ireq = 3, ipsi = 1
CALL unpack_group_sticks_i( psis( :, ipsi ), aux, dtgs, req_u(ireq) )
ENDIF
call accumulate_hpsi( ib, nbnd, ngms, hpsi, aux, nls, nlsm, dtgs, dffts)
time(10) = MPI_WTIME()
!
call accumulate_hpsi( ib, nbnd, ngms, hpsi, aux, nls, nlsm, dgts, dffts)
!
do i = 2, 10
my_time(i) = my_time(i) + (time(i) - time(i-1))
end do
@ -413,6 +419,7 @@ program test
ncount = ncount + 1
!
enddo
CALL MPI_WAIT(req_u(ireq),MPI_STATUS_IGNORE,ierr)
#else
@ -453,15 +460,13 @@ program test
CALL unpack_group_sticks( psis( :, ipsi ), aux, dtgs )
time(10) = MPI_WTIME()
call accumulate_hpsi( ib, nbnd, ngms, hpsi, aux, nls, nlsm, dtgs, dffts)
!
do i = 2, 10
my_time(i) = my_time(i) + (time(i) - time(i-1))
end do
!
ncount = ncount + 1
!
enddo
#endif