mirror of https://gitlab.com/QEF/q-e.git
Additions to FFTXlib (Pietro Bonfa): new (working) test subroutine,
scatter with non-blocking communications. Minor documentation update, a piece of information on ESM bug that had disappear is re-instated git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13839 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
parent
df0a2b252e
commit
a0dd31f45d
|
@ -28,6 +28,9 @@ Fixed in 6.2 version
|
|||
* EXX with k-points and pool parallelization was occasionally crashing
|
||||
due to questionable custom FFT grid initialization (r13728+r13835)
|
||||
|
||||
* ESM energy and forces for 'bc2' case and nonzero esm_efield were not
|
||||
correct (r13727). Also: problem with restart in NEB with ESM fixed
|
||||
|
||||
* __USE_3D_FFT was broken since v.6.0 (r13700, r13706)
|
||||
|
||||
* Some constants in the definition of PBE functionals were truncated to
|
||||
|
|
|
@ -188,7 +188,7 @@ contributed in some way or another at some stage, follows:
|
|||
Uli Aschauer, Francesca Baletto, Gerardo Ballabio, Mauro Boero,
|
||||
Scott Brozell, Claudia Bungaro, Paolo Cazzato, Gabriele Cipriani,
|
||||
Jiayu Dai, Cesar Da Silva, Alberto Debernardi, Gernot Deinzer,
|
||||
Alin Marin Elena, Francesco Filipponi,
|
||||
Alin Marin Elena, Francesco Filipponi, Prasenjit Ghosh,
|
||||
Marco Govoni, Thomas Gruber, Martin Hilgeman, Yosuke Kanai, Konstantin Kudin,
|
||||
Nicolas Lacorne, Hyungjun Lee, Stephane Lefranc, Sergey Lisenkov, Kurt Maeder,
|
||||
Andrea Marini, Giuseppe Mattioli, Nicolas Mounet, William Parker,
|
||||
|
|
|
@ -42,6 +42,8 @@ test.o : fft_param.o
|
|||
test.o : fft_support.o
|
||||
test.o : fft_types.o
|
||||
test.o : stick_base.o
|
||||
test.o : fft_helper_subroutines.o
|
||||
test.o : fft_interfaces.o
|
||||
test0.o : fft_interfaces.o
|
||||
test0.o : fft_parallel.o
|
||||
test0.o : fft_param.o
|
||||
|
|
|
@ -42,10 +42,6 @@
|
|||
CONTAINS
|
||||
!=----------------------------------------------------------------------=!
|
||||
!
|
||||
#if ! defined __NON_BLOCKING_SCATTER
|
||||
!
|
||||
! ALLTOALL based SCATTER, should be better on network
|
||||
! with a defined topology, like on bluegene and cray machine
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE fft_scatter_xy ( desc, f_in, f_aux, nxx_, isgn )
|
||||
|
@ -95,7 +91,9 @@ SUBROUTINE fft_scatter_xy ( desc, f_in, f_aux, nxx_, isgn )
|
|||
INTEGER :: i, it, j, k, kfrom, kdest, offset, ioff, mc, m1, m3, i1, icompact, sendsize
|
||||
INTEGER, ALLOCATABLE :: ncp_(:), nr1p_(:), indx(:,:)
|
||||
!
|
||||
|
||||
#if defined(__NON_BLOCKING_SCATTER)
|
||||
INTEGER :: sh(desc%nproc2), rh(desc%nproc2)
|
||||
#endif
|
||||
me2 = desc%mype2 + 1
|
||||
nproc2 = desc%nproc2 ; if ( abs(isgn) == 3 ) nproc2 = 1
|
||||
|
||||
|
@ -153,18 +151,38 @@ SUBROUTINE fft_scatter_xy ( desc, f_in, f_aux, nxx_, isgn )
|
|||
kfrom = kfrom + desc%nr2x
|
||||
ENDDO
|
||||
offset = offset + desc%nr2p( iproc2 )
|
||||
#if defined(__NON_BLOCKING_SCATTER)
|
||||
CALL mpi_isend( f_aux( (iproc2-1)*sendsize + 1 ), sendsize, MPI_DOUBLE_COMPLEX, iproc2-1, me2, desc%comm2, sh( iproc2 ), ierr )
|
||||
#endif
|
||||
ENDDO
|
||||
!
|
||||
! step two: communication across the nproc3 group
|
||||
!
|
||||
#if defined(__NON_BLOCKING_SCATTER)
|
||||
DO iproc2 = 1, nproc2
|
||||
!
|
||||
! now post the receive
|
||||
!
|
||||
CALL mpi_irecv( f_in( (iproc2-1)*sendsize + 1 ), sendsize, MPI_DOUBLE_COMPLEX, iproc2-1, MPI_ANY_TAG, desc%comm2, rh( iproc2 ), ierr )
|
||||
!IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', ' forward receive info<>0', abs(ierr) )
|
||||
ENDDO
|
||||
|
||||
call mpi_waitall( nproc2, sh, MPI_STATUSES_IGNORE, ierr )
|
||||
!
|
||||
#else
|
||||
CALL mpi_alltoall (f_aux(1), sendsize, MPI_DOUBLE_COMPLEX, f_in(1), sendsize, MPI_DOUBLE_COMPLEX, desc%comm2, ierr)
|
||||
|
||||
!
|
||||
IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', 'info<>0', abs(ierr) )
|
||||
#endif
|
||||
!
|
||||
10 CONTINUE
|
||||
!
|
||||
f_aux = (0.0_DP, 0.0_DP)
|
||||
!
|
||||
#if defined(__NON_BLOCKING_SCATTER)
|
||||
if (nproc2 > 1) call mpi_waitall( nproc2, rh, MPI_STATUSES_IGNORE, ierr )
|
||||
#endif
|
||||
!
|
||||
DO iproc2 = 1, nproc2
|
||||
it = ( iproc2 - 1 ) * sendsize
|
||||
DO i = 1, ncp_( iproc2 )
|
||||
|
@ -195,15 +213,28 @@ SUBROUTINE fft_scatter_xy ( desc, f_in, f_aux, nxx_, isgn )
|
|||
ENDDO
|
||||
it = it + nr2px
|
||||
ENDDO
|
||||
#if defined(__NON_BLOCKING_SCATTER)
|
||||
IF( nproc2 > 1 ) CALL mpi_isend( f_in( ( iproc2 - 1 ) * sendsize + 1 ), sendsize, MPI_DOUBLE_COMPLEX, iproc2-1, me2, desc%comm2, sh( iproc2 ), ierr )
|
||||
#endif
|
||||
ENDDO
|
||||
IF (nproc2==1) GO TO 20
|
||||
|
||||
!
|
||||
! step two: communication
|
||||
!
|
||||
#if ! defined(__NON_BLOCKING_SCATTER)
|
||||
CALL mpi_alltoall (f_in(1), sendsize, MPI_DOUBLE_COMPLEX, f_aux(1), sendsize, MPI_DOUBLE_COMPLEX, desc%comm2, ierr)
|
||||
|
||||
IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', 'info<>0', abs(ierr) )
|
||||
#else
|
||||
DO iproc2 = 1, nproc2
|
||||
CALL mpi_irecv( f_aux( (iproc2-1)*sendsize + 1 ), sendsize, MPI_DOUBLE_COMPLEX, iproc2-1, MPI_ANY_TAG, desc%comm2, rh(iproc2), ierr )
|
||||
! IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', ' backward receive info<>0', abs(ierr) )
|
||||
ENDDO
|
||||
|
||||
call mpi_waitall( nproc2, sh, MPI_STATUSES_IGNORE, ierr )
|
||||
call mpi_waitall( nproc2, rh, MPI_STATUSES_IGNORE, ierr )
|
||||
#endif
|
||||
!
|
||||
! step one: store contiguously the columns
|
||||
!
|
||||
|
@ -289,7 +320,10 @@ SUBROUTINE fft_scatter_yz ( desc, f_in, f_aux, nxx_, isgn )
|
|||
INTEGER, ALLOCATABLE :: ncp_(:), ir1p_(:)
|
||||
INTEGER :: my_nr1p_
|
||||
!
|
||||
|
||||
#if defined(__NON_BLOCKING_SCATTER)
|
||||
INTEGER :: sh(desc%nproc3), rh(desc%nproc3)
|
||||
#endif
|
||||
!
|
||||
me = desc%mype + 1
|
||||
me2 = desc%mype2 + 1
|
||||
me3 = desc%mype3 + 1
|
||||
|
@ -348,6 +382,9 @@ SUBROUTINE fft_scatter_yz ( desc, f_in, f_aux, nxx_, isgn )
|
|||
ENDDO
|
||||
ENDDO
|
||||
offset = offset + desc%nr3p( iproc3 )
|
||||
#if defined(__NON_BLOCKING_SCATTER)
|
||||
CALL mpi_isend( f_aux( ( iproc3 - 1 ) * sendsize + 1 ), sendsize, MPI_DOUBLE_COMPLEX, iproc3-1, me3, desc%comm3, sh( iproc3 ), ierr )
|
||||
#endif
|
||||
ENDDO
|
||||
!
|
||||
! ensures that no garbage is present in the output
|
||||
|
@ -355,13 +392,31 @@ SUBROUTINE fft_scatter_yz ( desc, f_in, f_aux, nxx_, isgn )
|
|||
!
|
||||
! step two: communication across the nproc3 group
|
||||
!
|
||||
#if defined(__NON_BLOCKING_SCATTER)
|
||||
DO iproc3 = 1, nproc3
|
||||
!
|
||||
! now post the receive
|
||||
!
|
||||
CALL mpi_irecv( f_in( (iproc3-1)*sendsize + 1 ), sendsize, MPI_DOUBLE_COMPLEX, iproc3-1, MPI_ANY_TAG, desc%comm3, rh( iproc3 ), ierr )
|
||||
!IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', ' forward receive info<>0', abs(ierr) )
|
||||
!
|
||||
!
|
||||
ENDDO
|
||||
|
||||
call mpi_waitall( nproc3, sh, MPI_STATUSES_IGNORE, ierr )
|
||||
!
|
||||
#else
|
||||
CALL mpi_alltoall (f_aux(1), sendsize, MPI_DOUBLE_COMPLEX, f_in(1), sendsize, MPI_DOUBLE_COMPLEX, desc%comm3, ierr)
|
||||
|
||||
IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', 'info<>0', abs(ierr) )
|
||||
#endif
|
||||
!
|
||||
10 CONTINUE
|
||||
!
|
||||
f_aux = (0.0_DP, 0.0_DP) !
|
||||
#if defined(__NON_BLOCKING_SCATTER)
|
||||
if (nproc3 > 1) call mpi_waitall( nproc3, rh, MPI_STATUSES_IGNORE, ierr )
|
||||
#endif
|
||||
!
|
||||
DO iproc3 = 1, desc%nproc3
|
||||
it = ( iproc3 - 1 ) * sendsize
|
||||
|
@ -401,16 +456,28 @@ SUBROUTINE fft_scatter_yz ( desc, f_in, f_aux, nxx_, isgn )
|
|||
it = it + nr3px
|
||||
ENDDO
|
||||
ENDDO
|
||||
#if defined(__NON_BLOCKING_SCATTER)
|
||||
IF( nproc3 > 1 ) CALL mpi_isend( f_in( ( iproc3 - 1 ) * sendsize + 1 ), sendsize, MPI_DOUBLE_COMPLEX, iproc3-1, me3, desc%comm3, sh( iproc3 ), ierr )
|
||||
#endif
|
||||
ENDDO
|
||||
|
||||
IF( nproc3 == 1 ) GO TO 20
|
||||
!
|
||||
! step two: communication
|
||||
!
|
||||
|
||||
#if ! defined(__NON_BLOCKING_SCATTER)
|
||||
CALL mpi_alltoall (f_in(1), sendsize, MPI_DOUBLE_COMPLEX, f_aux(1), sendsize, MPI_DOUBLE_COMPLEX, desc%comm3, ierr)
|
||||
|
||||
IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', 'info<>0', abs(ierr) )
|
||||
#else
|
||||
DO iproc3 = 1, desc%nproc3
|
||||
CALL mpi_irecv( f_aux( (iproc3-1)*sendsize + 1 ), sendsize, MPI_DOUBLE_COMPLEX, iproc3-1, MPI_ANY_TAG, desc%comm3, rh(iproc3), ierr )
|
||||
! IF( abs(ierr) /= 0 ) CALL fftx_error__ ('fft_scatter', ' backward receive info<>0', abs(ierr) )
|
||||
ENDDO
|
||||
|
||||
call mpi_waitall( desc%nproc3, sh, MPI_STATUSES_IGNORE, ierr )
|
||||
call mpi_waitall( desc%nproc3, rh, MPI_STATUSES_IGNORE, ierr )
|
||||
#endif
|
||||
!
|
||||
! step one: store contiguously the columns
|
||||
!
|
||||
|
@ -555,23 +622,6 @@ SUBROUTINE fft_scatter_tg_opt ( desc, f_in, f_out, nxx_, isgn )
|
|||
99 format ( 20 ('(',2f12.9,')') )
|
||||
|
||||
END SUBROUTINE fft_scatter_tg_opt
|
||||
|
||||
#else
|
||||
!
|
||||
! NON BLOCKING SCATTER, should be better on switched network
|
||||
! like infiniband, ethernet, myrinet
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE fft_scatter ( dfft, f_in, nr3x, nxx_, f_aux, ncp_, npp_, isgn )
|
||||
!-----------------------------------------------------------------------
|
||||
|
||||
to be rewritten with the new data distribution in mind
|
||||
|
||||
RETURN
|
||||
|
||||
END SUBROUTINE fft_scatter
|
||||
!
|
||||
#endif
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
SUBROUTINE gather_real_grid ( dfft, f_in, f_out )
|
||||
|
|
1140
FFTXlib/test.f90
1140
FFTXlib/test.f90
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue