2003-04-09 06:36:18 +08:00
|
|
|
!
|
2006-02-17 06:39:29 +08:00
|
|
|
! Copyright (C) 2006 Quantum-ESPRESSO group
|
2003-04-09 06:36:18 +08:00
|
|
|
! This file is distributed under the terms of the
|
|
|
|
! GNU General Public License. See the file `License'
|
|
|
|
! in the root directory of the present distribution,
|
|
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
|
|
!
|
2006-02-17 06:39:29 +08:00
|
|
|
!
|
General cleanup of intrinsic functions:
conversion to real => DBLE
(including real part of a complex number)
conversion to complex => CMPLX
complex conjugate => CONJG
imaginary part => AIMAG
All functions are uppercase.
CMPLX is preprocessed by f_defs.h and performs an explicit cast:
#define CMPLX(a,b) cmplx(a,b,kind=DP)
This implies that 1) f_defs.h must be included whenever a CMPLX is present,
2) CMPLX should stay in a single line, 3) DP must be defined.
All occurrences of real, float, dreal, dfloat, dconjg, dimag, dcmplx
removed - please do not reintroduce any of them.
Tested only with ifc7 and g95 - beware unintended side effects
Maybe not the best solution (explicit casts everywhere would be better)
but it can be easily changed with a script if the need arises.
The following code might be used to test for possible trouble:
program test_intrinsic
implicit none
integer, parameter :: dp = selected_real_kind(14,200)
real (kind=dp) :: a = 0.123456789012345_dp
real (kind=dp) :: b = 0.987654321098765_dp
complex (kind=dp) :: c = ( 0.123456789012345_dp, 0.987654321098765_dp)
print *, ' A = ', a
print *, ' DBLE(A)= ', DBLE(a)
print *, ' C = ', c
print *, 'CONJG(C)= ', CONJG(c)
print *, 'DBLE(c),AIMAG(C) = ', DBLE(c), AIMAG(c)
print *, 'CMPLX(A,B,kind=dp)= ', CMPLX( a, b, kind=dp)
end program test_intrinsic
Note that CMPLX and REAL without a cast yield single precision numbers on
ifc7 and g95 !!!
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@2133 c92efa57-630b-4861-b058-cf58834340f0
2005-08-27 01:44:42 +08:00
|
|
|
#include "f_defs.h"
|
2006-02-17 06:39:29 +08:00
|
|
|
!
|
2003-04-09 06:36:18 +08:00
|
|
|
!----------------------------------------------------------------------
|
|
|
|
! FFT base Module.
|
|
|
|
! Written by Carlo Cavazzoni
|
|
|
|
!----------------------------------------------------------------------
|
|
|
|
!
|
|
|
|
!=----------------------------------------------------------------------=!
|
2007-09-17 17:35:37 +08:00
|
|
|
MODULE fft_base
|
2003-04-09 06:36:18 +08:00
|
|
|
!=----------------------------------------------------------------------=!
|
|
|
|
|
2005-08-28 22:09:42 +08:00
|
|
|
USE kinds, ONLY: DP
|
2004-06-17 17:29:03 +08:00
|
|
|
USE parallel_include
|
2003-04-09 06:36:18 +08:00
|
|
|
|
|
|
|
USE fft_types, ONLY: fft_dlay_descriptor
|
|
|
|
|
2004-03-01 23:38:01 +08:00
|
|
|
IMPLICIT NONE
|
|
|
|
|
2006-02-20 07:29:28 +08:00
|
|
|
TYPE ( fft_dlay_descriptor ) :: dfftp ! descriptor for dense grid
|
|
|
|
TYPE ( fft_dlay_descriptor ) :: dffts ! descriptor for smooth grid
|
|
|
|
TYPE ( fft_dlay_descriptor ) :: dfftb ! descriptor for box grids
|
2004-03-01 07:30:07 +08:00
|
|
|
|
2003-04-09 06:36:18 +08:00
|
|
|
SAVE
|
|
|
|
|
|
|
|
PRIVATE
|
|
|
|
|
2007-09-17 17:35:37 +08:00
|
|
|
PUBLIC :: fft_scatter
|
2006-02-20 07:29:28 +08:00
|
|
|
PUBLIC :: dfftp, dffts, dfftb, fft_dlay_descriptor
|
2003-04-09 06:36:18 +08:00
|
|
|
|
|
|
|
|
|
|
|
!=----------------------------------------------------------------------=!
|
|
|
|
CONTAINS
|
|
|
|
!=----------------------------------------------------------------------=!
|
|
|
|
!
|
|
|
|
!
|
|
|
|
!-----------------------------------------------------------------------
|
2006-12-17 01:27:33 +08:00
|
|
|
subroutine fft_scatter ( f_in, nrx3, nxx_, f_aux, ncp_, npp_, sign, use_tg )
|
2003-04-09 06:36:18 +08:00
|
|
|
!-----------------------------------------------------------------------
|
|
|
|
!
|
|
|
|
! transpose the fft grid across nodes
|
|
|
|
! a) From columns to planes (sign > 0)
|
|
|
|
!
|
|
|
|
! "columns" (or "pencil") representation:
|
|
|
|
! processor "me" has ncp_(me) contiguous columns along z
|
|
|
|
! Each column has nrx3 elements for a fft of order nr3
|
|
|
|
! nrx3 can be =nr3+1 in order to reduce memory conflicts.
|
|
|
|
!
|
|
|
|
! The transpose take places in two steps:
|
|
|
|
! 1) on each processor the columns are divided into slices along z
|
|
|
|
! that are stored contiguously. On processor "me", slices for
|
|
|
|
! processor "proc" are npp_(proc)*ncp_(me) big
|
|
|
|
! 2) all processors communicate to exchange slices
|
|
|
|
! (all columns with z in the slice belonging to "me"
|
|
|
|
! must be received, all the others must be sent to "proc")
|
|
|
|
! Finally one gets the "planes" representation:
|
|
|
|
! processor "me" has npp_(me) complete xy planes
|
|
|
|
!
|
|
|
|
! b) From planes to columns (sign < 0)
|
|
|
|
!
|
|
|
|
! Quite the same in the opposite direction
|
|
|
|
!
|
|
|
|
! The output is overwritten on f_in ; f_aux is used as work space
|
|
|
|
!
|
2006-12-17 01:27:33 +08:00
|
|
|
! If optional argument "use_tg" is true the subroutines performs
|
|
|
|
! the trasposition using the Task Groups distribution
|
|
|
|
!
|
2005-01-07 20:57:41 +08:00
|
|
|
#ifdef __PARA
|
|
|
|
USE parallel_include
|
|
|
|
#endif
|
2006-12-17 01:27:33 +08:00
|
|
|
use mp_global, ONLY : nproc_pool, me_pool, intra_pool_comm, nproc, &
|
2006-12-18 07:11:16 +08:00
|
|
|
my_image_id, nogrp, pgrp_comm
|
2006-12-17 01:27:33 +08:00
|
|
|
USE kinds, ONLY : DP
|
|
|
|
USE task_groups, ONLY : nplist
|
|
|
|
|
2003-04-09 06:36:18 +08:00
|
|
|
implicit none
|
|
|
|
|
2006-12-17 01:27:33 +08:00
|
|
|
integer, intent(in) :: nrx3, nxx_, sign, ncp_ (:), npp_ (:)
|
|
|
|
complex (DP) :: f_in (nxx_), f_aux (nxx_)
|
|
|
|
logical, optional, intent(in) :: use_tg
|
2003-04-09 06:36:18 +08:00
|
|
|
|
|
|
|
#ifdef __PARA
|
|
|
|
|
|
|
|
integer :: dest, from, k, offset1 (nproc), sendcount (nproc), &
|
|
|
|
sdispls (nproc), recvcount (nproc), rdispls (nproc), &
|
2007-11-19 21:19:42 +08:00
|
|
|
proc, ierr, me, nprocp, gproc, gcomm, i, kdest, kfrom
|
2003-04-09 06:36:18 +08:00
|
|
|
!
|
2006-12-17 01:27:33 +08:00
|
|
|
LOGICAL :: use_tg_
|
2003-04-25 06:10:04 +08:00
|
|
|
|
|
|
|
#if defined __HPM
|
2003-04-28 15:23:04 +08:00
|
|
|
! CALL f_hpmstart( 10, 'scatter' )
|
2003-04-25 06:10:04 +08:00
|
|
|
#endif
|
|
|
|
|
2003-04-09 06:36:18 +08:00
|
|
|
!
|
2006-12-17 01:27:33 +08:00
|
|
|
! Task Groups
|
2003-04-25 06:10:04 +08:00
|
|
|
|
2006-12-17 01:27:33 +08:00
|
|
|
use_tg_ = .FALSE.
|
2003-04-09 06:36:18 +08:00
|
|
|
|
2006-12-17 01:27:33 +08:00
|
|
|
IF( PRESENT( use_tg ) ) use_tg_ = use_tg
|
|
|
|
|
|
|
|
me = me_pool + 1
|
2006-03-06 07:02:36 +08:00
|
|
|
!
|
2006-12-17 01:27:33 +08:00
|
|
|
IF( use_tg_ ) THEN
|
|
|
|
! This is the number of procs. in the plane-wave group
|
|
|
|
nprocp = nproc_pool / nogrp
|
|
|
|
ELSE
|
|
|
|
nprocp = nproc_pool
|
|
|
|
END IF
|
2006-03-06 07:02:36 +08:00
|
|
|
!
|
|
|
|
if (nprocp.eq.1) return
|
|
|
|
!
|
|
|
|
call start_clock ('fft_scatter')
|
|
|
|
!
|
|
|
|
! sendcount(proc): amount of data processor "me" must send to processor
|
|
|
|
! recvcount(proc): amount of data processor "me" must receive from
|
|
|
|
!
|
2006-12-17 01:27:33 +08:00
|
|
|
IF( use_tg_ ) THEN
|
|
|
|
do proc = 1, nprocp
|
|
|
|
gproc = nplist( proc ) + 1
|
|
|
|
sendcount (proc) = npp_ ( gproc ) * ncp_ (me)
|
|
|
|
recvcount (proc) = npp_ (me) * ncp_ ( gproc )
|
|
|
|
end do
|
|
|
|
ELSE
|
|
|
|
do proc = 1, nprocp
|
|
|
|
sendcount (proc) = npp_ (proc) * ncp_ (me)
|
|
|
|
recvcount (proc) = npp_ (me) * ncp_ (proc)
|
|
|
|
end do
|
|
|
|
END IF
|
2006-03-06 07:02:36 +08:00
|
|
|
!
|
|
|
|
! offset1(proc) is used to locate the slices to be sent to proc
|
|
|
|
! sdispls(proc)+1 is the beginning of data that must be sent to proc
|
|
|
|
! rdispls(proc)+1 is the beginning of data that must be received from pr
|
|
|
|
!
|
|
|
|
offset1 (1) = 1
|
2006-12-17 01:27:33 +08:00
|
|
|
IF( use_tg_ ) THEN
|
|
|
|
do proc = 2, nprocp
|
|
|
|
gproc = nplist( proc - 1 ) + 1
|
|
|
|
offset1 (proc) = offset1 (proc - 1) + npp_ ( gproc )
|
|
|
|
enddo
|
|
|
|
ELSE
|
|
|
|
do proc = 2, nprocp
|
|
|
|
offset1 (proc) = offset1 (proc - 1) + npp_ (proc - 1)
|
|
|
|
enddo
|
|
|
|
END IF
|
|
|
|
|
2006-03-06 07:02:36 +08:00
|
|
|
sdispls (1) = 0
|
|
|
|
rdispls (1) = 0
|
|
|
|
do proc = 2, nprocp
|
|
|
|
sdispls (proc) = sdispls (proc - 1) + sendcount (proc - 1)
|
|
|
|
rdispls (proc) = rdispls (proc - 1) + recvcount (proc - 1)
|
|
|
|
enddo
|
|
|
|
!
|
|
|
|
|
|
|
|
ierr = 0
|
|
|
|
if (sign.gt.0) then
|
|
|
|
!
|
|
|
|
! "forward" scatter from columns to planes
|
|
|
|
!
|
|
|
|
! step one: store contiguously the slices
|
|
|
|
!
|
|
|
|
do proc = 1, nprocp
|
|
|
|
from = offset1 (proc)
|
|
|
|
dest = 1 + sdispls (proc)
|
2006-12-17 01:27:33 +08:00
|
|
|
IF( use_tg_ ) THEN
|
|
|
|
gproc = nplist(proc)+1
|
|
|
|
ELSE
|
|
|
|
gproc = proc
|
|
|
|
END IF
|
2007-11-19 21:19:42 +08:00
|
|
|
!
|
|
|
|
! optimize for large parallel execution, where npp_ ( gproc ) ~ 1
|
|
|
|
!
|
|
|
|
IF( npp_ ( gproc ) > 128 ) THEN
|
|
|
|
do k = 1, ncp_ (me)
|
|
|
|
call DCOPY (2 * npp_ ( gproc ), f_in (from + (k - 1) * nrx3), &
|
|
|
|
1, f_aux (dest + (k - 1) * npp_ ( gproc ) ), 1)
|
|
|
|
enddo
|
|
|
|
ELSE IF( npp_ ( gproc ) == 1 ) THEN
|
|
|
|
do k = 1, ncp_ (me)
|
|
|
|
f_aux (dest + (k - 1) ) = f_in (from + (k - 1) * nrx3 )
|
|
|
|
enddo
|
|
|
|
ELSE
|
|
|
|
do k = 1, ncp_ (me)
|
|
|
|
kdest = dest + (k - 1) * npp_ ( gproc ) - 1
|
|
|
|
kfrom = from + (k - 1) * nrx3 - 1
|
|
|
|
do i = 1, npp_ ( gproc )
|
|
|
|
f_aux ( kdest + i ) = f_in ( kfrom + i )
|
|
|
|
enddo
|
|
|
|
enddo
|
|
|
|
END IF
|
2006-03-06 07:02:36 +08:00
|
|
|
enddo
|
|
|
|
!
|
|
|
|
! maybe useless; ensures that no garbage is present in the output
|
|
|
|
!
|
2007-06-12 01:13:15 +08:00
|
|
|
f_in = 0.0_DP
|
2006-03-06 07:02:36 +08:00
|
|
|
!
|
|
|
|
! step two: communication
|
|
|
|
!
|
2006-12-17 01:27:33 +08:00
|
|
|
IF( use_tg_ ) THEN
|
2006-12-18 07:11:16 +08:00
|
|
|
gcomm = pgrp_comm
|
2006-12-17 01:27:33 +08:00
|
|
|
ELSE
|
|
|
|
gcomm = intra_pool_comm
|
|
|
|
END IF
|
2006-03-06 07:02:36 +08:00
|
|
|
|
2007-09-17 17:35:37 +08:00
|
|
|
call mpi_barrier (gcomm, ierr) ! why barrier? for buggy openmpi over ib
|
2006-03-06 07:02:36 +08:00
|
|
|
|
|
|
|
call mpi_alltoallv (f_aux(1), sendcount, sdispls, MPI_DOUBLE_COMPLEX, f_in(1), &
|
2006-12-17 01:27:33 +08:00
|
|
|
recvcount, rdispls, MPI_DOUBLE_COMPLEX, gcomm, ierr)
|
|
|
|
|
2006-03-06 07:02:36 +08:00
|
|
|
if( ABS(ierr) /= 0 ) call errore ('fft_scatter', 'info<>0', ABS(ierr) )
|
|
|
|
!
|
|
|
|
else
|
|
|
|
!
|
|
|
|
! "backward" scatter from planes to columns
|
|
|
|
!
|
|
|
|
! step two: communication
|
|
|
|
!
|
2006-12-17 01:27:33 +08:00
|
|
|
IF( use_tg_ ) THEN
|
2006-12-18 07:11:16 +08:00
|
|
|
gcomm = pgrp_comm
|
2006-12-17 01:27:33 +08:00
|
|
|
ELSE
|
|
|
|
gcomm = intra_pool_comm
|
|
|
|
END IF
|
2006-03-06 07:02:36 +08:00
|
|
|
|
2007-09-17 17:35:37 +08:00
|
|
|
call mpi_barrier (gcomm, ierr) ! why barrier? for buggy openmpi over ib
|
2006-03-06 07:02:36 +08:00
|
|
|
|
|
|
|
call mpi_alltoallv (f_in(1), recvcount, rdispls, MPI_DOUBLE_COMPLEX, f_aux(1), &
|
2006-12-17 01:27:33 +08:00
|
|
|
sendcount, sdispls, MPI_DOUBLE_COMPLEX, gcomm, ierr)
|
2006-03-06 07:02:36 +08:00
|
|
|
|
|
|
|
if( ABS(ierr) /= 0 ) call errore ('fft_scatter', 'info<>0', ABS(ierr) )
|
|
|
|
!
|
|
|
|
! step one: store contiguously the columns
|
|
|
|
!
|
2007-06-12 01:13:15 +08:00
|
|
|
f_in = 0.0_DP
|
2006-03-06 07:02:36 +08:00
|
|
|
!
|
|
|
|
do proc = 1, nprocp
|
|
|
|
from = 1 + sdispls (proc)
|
|
|
|
dest = offset1 (proc)
|
2006-12-17 01:27:33 +08:00
|
|
|
IF( use_tg_ ) THEN
|
|
|
|
gproc = nplist(proc)+1
|
|
|
|
ELSE
|
|
|
|
gproc = proc
|
|
|
|
END IF
|
2007-11-19 21:19:42 +08:00
|
|
|
!
|
|
|
|
! optimize for large parallel execution, where npp_ ( gproc ) ~ 1
|
|
|
|
!
|
|
|
|
IF( npp_ ( gproc ) > 128 ) THEN
|
|
|
|
do k = 1, ncp_ (me)
|
|
|
|
call DCOPY ( 2 * npp_ ( gproc ), f_aux (from + (k - 1) * npp_ ( gproc ) ), 1, &
|
2006-12-17 01:27:33 +08:00
|
|
|
f_in (dest + (k - 1) * nrx3 ), 1 )
|
2007-11-19 21:19:42 +08:00
|
|
|
enddo
|
|
|
|
ELSE IF ( npp_ ( gproc ) == 1 ) THEN
|
|
|
|
do k = 1, ncp_ (me)
|
|
|
|
f_in ( dest + (k - 1) * nrx3 ) = f_aux ( from + (k - 1) )
|
|
|
|
end do
|
|
|
|
ELSE
|
|
|
|
do k = 1, ncp_ (me)
|
|
|
|
kdest = dest + (k - 1) * nrx3 - 1
|
|
|
|
kfrom = from + (k - 1) * npp_ ( gproc ) - 1
|
|
|
|
do i = 1, npp_ ( gproc )
|
|
|
|
f_in ( kdest + i ) = f_aux( kfrom + i )
|
|
|
|
enddo
|
|
|
|
enddo
|
|
|
|
END IF
|
2006-03-06 07:02:36 +08:00
|
|
|
|
|
|
|
enddo
|
|
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
call stop_clock ('fft_scatter')
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined __HPM
|
|
|
|
! CALL f_hpmstop( 10 )
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return
|
|
|
|
|
2006-12-17 01:27:33 +08:00
|
|
|
end subroutine fft_scatter
|
2006-03-06 07:02:36 +08:00
|
|
|
|
2003-04-09 06:36:18 +08:00
|
|
|
!=----------------------------------------------------------------------=!
|
2007-09-17 17:35:37 +08:00
|
|
|
END MODULE fft_base
|
2003-04-09 06:36:18 +08:00
|
|
|
!=----------------------------------------------------------------------=!
|