quantum-espresso/FFTXlib/fft_types.f90

998 lines
42 KiB
Fortran

!
! Copyright (C) Quantum ESPRESSO group
!
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
!=----------------------------------------------------------------------------=!
MODULE fft_types
!=----------------------------------------------------------------------------=!
USE fft_support, ONLY : good_fft_order, good_fft_dimension
USE fft_param
IMPLICIT NONE
PRIVATE
SAVE
!
! Data type for FFT descriptor.
!
TYPE fft_type_descriptor
!
! FFT dimensions
!
INTEGER :: nr1 = 0 !
INTEGER :: nr2 = 0 ! effective FFT dimensions of the 3D grid (global)
INTEGER :: nr3 = 0 !
INTEGER :: nr1x = 0 ! FFT grids leading dimensions
INTEGER :: nr2x = 0 ! dimensions of the arrays for the 3D grid (global)
INTEGER :: nr3x = 0 ! may differ from nr1 ,nr2 ,nr3 in order to boost performances
!
! Parallel layout: in reciprocal space data are organized in columns (sticks) along
! the third direction and distributed across nproc processors.
! In real space data are distributed in blocks comprising sections
! of the Y and Z directions and complete rows in the X direction in
! a matrix of nproc2 x nproc3 processors.
! nproc = nproc2 x nproc3 and additional communicators are introduced
! for data redistribution across matrix columns and rows.
!
! communicators and processor coordinates
!
LOGICAL :: lpara = .FALSE. ! .TRUE. if parallel FFT is active
LOGICAL :: lgamma = .FALSE. ! .TRUE. if the grid has Gamma symmetry
INTEGER :: root = 0 ! root processor
INTEGER :: comm = MPI_COMM_NULL ! communicator for the main fft group
INTEGER :: comm2 = MPI_COMM_NULL ! communicator for the fft group along the second direction
INTEGER :: comm3 = MPI_COMM_NULL ! communicator for the fft group along the third direction
INTEGER :: nproc = 1 ! number of processor in the main fft group
INTEGER :: nproc2 = 1 ! number of processor in the fft group along the second direction
INTEGER :: nproc3 = 1 ! number of processor in the fft group along the third direction
INTEGER :: mype = 0 ! my processor id (starting from 0) in the fft main communicator
INTEGER :: mype2 = 0 ! my processor id (starting from 0) in the fft communicator along the second direction (nproc2)
INTEGER :: mype3 = 0 ! my processor id (starting from 0) in the fft communicator along the third direction (nproc3)
INTEGER, ALLOCATABLE :: iproc(:,:) , iproc2(:), iproc3(:) ! subcommunicators proc mapping (starting from 1)
!
! FFT distributed data dimensions and indices
!
INTEGER :: my_nr3p = 0 ! size of the "Z" section for this processor = nr3p( mype3 + 1 ) ~ nr3/nproc3
INTEGER :: my_nr2p = 0 ! size of the "Y" section for this processor = nr2p( mype2 + 1 ) ~ nr2/nproc2
INTEGER :: my_i0r3p = 0 ! offset of the first "Z" element of this proc in the nproc3 group = i0r3p( mype3 + 1 )
INTEGER :: my_i0r2p = 0 ! offset of the first "Y" element of this proc in the nproc2 group = i0r2p( mype2 + 1 )
INTEGER, ALLOCATABLE :: nr3p(:) ! size of the "Z" section of each processor in the nproc3 group along Z
INTEGER, ALLOCATABLE :: nr3p_offset(:) ! offset of the "Z" section of each processor in the nproc3 group along Z
INTEGER, ALLOCATABLE :: nr2p(:) ! size of the "Y" section of each processor in the nproc2 group along Y
INTEGER, ALLOCATABLE :: nr2p_offset(:) ! offset of the "Y" section of each processor in the nproc2 group along Y
INTEGER, ALLOCATABLE :: nr1p(:) ! number of active "X" values ( potential ) for a given proc in the nproc2 group
INTEGER, ALLOCATABLE :: nr1w(:) ! number of active "X" values ( wave func ) for a given proc in the nproc2 group
INTEGER :: nr1w_tg ! total number of active "X" values ( wave func ). used in task group ffts
INTEGER, ALLOCATABLE :: i0r3p(:) ! offset of the first "Z" element of each proc in the nproc3 group (starting from 0)
INTEGER, ALLOCATABLE :: i0r2p(:) ! offset of the first "Y" element of each proc in the nproc2 group (starting from 0)
INTEGER, ALLOCATABLE :: ir1p(:) ! if >0 ir1p(m1) is the incremental index of the active ( potential ) X value of this proc
INTEGER, ALLOCATABLE :: indp(:,:)! is the inverse of ir1p
INTEGER, ALLOCATABLE :: ir1w(:) ! if >0 ir1w(m1) is the incremental index of the active ( wave func ) X value of this proc
INTEGER, ALLOCATABLE :: indw(:,:)! is the inverse of ir1w
INTEGER, ALLOCATABLE :: ir1w_tg(:)! if >0 ir1w_tg(m1) is the incremental index of the active ( wfc ) X value in task group
INTEGER, ALLOCATABLE :: indw_tg(:)! is the inverse of ir1w_tg
INTEGER :: nst ! total number of sticks ( potential )
INTEGER, ALLOCATABLE :: nsp(:) ! number of sticks per processor ( potential ) using proc index starting from 1
! ... that is on proc mype -> nsp( mype + 1 )
INTEGER, ALLOCATABLE :: nsp_offset(:,:) ! offset of sticks per processor ( potential )
INTEGER, ALLOCATABLE :: nsw(:) ! number of sticks per processor ( wave func ) using proc index as above
INTEGER, ALLOCATABLE :: nsw_offset(:,:) ! offset of sticks per processor ( wave func )
INTEGER, ALLOCATABLE :: nsw_tg(:)! number of sticks per processor ( wave func ) using proc index as above. task group version
INTEGER, ALLOCATABLE :: ngl(:) ! per proc. no. of non zero charge density/potential components
INTEGER, ALLOCATABLE :: nwl(:) ! per proc. no. of non zero wave function plane components
INTEGER :: ngm ! my no. of non zero charge density/potential components
! ngm = dfftp%ngl( dfftp%mype + 1 )
! with gamma sym.
! ngm = ( dfftp%ngl( dfftp%mype + 1 ) + 1 ) / 2
INTEGER :: ngw ! my no. of non zero wave function plane components
! ngw = dffts%nwl( dffts%mype + 1 )
! with gamma sym.
! ngw = ( dffts%nwl( dffts%mype + 1 ) + 1 ) / 2
INTEGER, ALLOCATABLE :: iplp(:) ! if > 0 is the iproc2 processor owning the active "X" value ( potential )
INTEGER, ALLOCATABLE :: iplw(:) ! if > 0 is the iproc2 processor owning the active "X" value ( wave func )
INTEGER :: nnp = 0 ! number of 0 and non 0 sticks in a plane ( ~nr1*nr2/nproc )
INTEGER :: nnr = 0 ! local number of FFT grid elements ( ~nr1*nr2*nr3/nproc )
! size of the arrays allocated for the FFT, local to each processor:
! in parallel execution may differ from nr1x*nr2x*nr3x
! Not to be confused either with nr1*nr2*nr3
INTEGER :: nnr_tg = 0 ! local number of grid elements for task group FFT ( ~nr1*nr2*nr3/proc3 )
INTEGER, ALLOCATABLE :: iss(:) ! index of the first rho stick on each proc
INTEGER, ALLOCATABLE :: isind(:) ! for each position in the plane indicate the stick index
INTEGER, ALLOCATABLE :: ismap(:) ! for each stick in the plane indicate the position
INTEGER, ALLOCATABLE :: nl(:) ! position of the G vec in the FFT grid
INTEGER, ALLOCATABLE :: nlm(:) ! with gamma sym. position of -G vec in the FFT grid
!
! task group ALLTOALL communication layout
INTEGER, ALLOCATABLE :: tg_snd(:) ! number of elements to be sent in task group redistribution
INTEGER, ALLOCATABLE :: tg_rcv(:) ! number of elements to be received in task group redistribution
INTEGER, ALLOCATABLE :: tg_sdsp(:)! send displacement for task group A2A communication
INTEGER, ALLOCATABLE :: tg_rdsp(:)! receive displacement for task group A2A communicattion
!
LOGICAL :: has_task_groups = .FALSE.
!
CHARACTER(len=12):: rho_clock_label = ' '
CHARACTER(len=12):: wave_clock_label = ' '
INTEGER :: grid_id
END TYPE
REAL(DP) :: fft_dual = 4.0d0
INTEGER :: incremental_grid_identifier = 0
PUBLIC :: fft_type_descriptor, fft_type_init
PUBLIC :: fft_type_allocate, fft_type_deallocate
PUBLIC :: fft_stick_index
CONTAINS
!=----------------------------------------------------------------------------=!
SUBROUTINE fft_type_allocate( desc, at, bg, gcutm, comm, fft_fact, nyfft )
!
! routine allocating arrays of fft_type_descriptor, called by fft_type_init
!
TYPE (fft_type_descriptor) :: desc
REAL(DP), INTENT(IN) :: at(3,3), bg(3,3)
REAL(DP), INTENT(IN) :: gcutm
INTEGER, INTENT(IN), OPTIONAL :: fft_fact(3)
INTEGER, INTENT(IN), OPTIONAL :: nyfft
INTEGER, INTENT(in) :: comm ! mype starting from 0
INTEGER :: nx, ny, ierr, nzfft
INTEGER :: mype, root, nproc, nproc2, nproc3, iproc, iproc2, iproc3 ! mype starting from 0
INTEGER :: color, key, comm2, comm3
!write (6,*) ' inside fft_type_allocate' ; FLUSH(6)
IF ( ALLOCATED( desc%nsp ) ) &
CALL fftx_error__(' fft_type_allocate ', ' fft arrays already allocated ', 1 )
desc%comm = comm
#if defined(__MPI)
IF( desc%comm == MPI_COMM_NULL ) THEN
CALL fftx_error__( ' fft_type_allocate ', ' fft communicator is null ', 1 )
END IF
#endif
!
root = 0 ; mype = 0 ; nproc = 1
#if defined(__MPI)
CALL MPI_COMM_RANK( comm, mype, ierr )
CALL MPI_COMM_SIZE( comm, nproc, ierr )
#endif
desc%root = root ; desc%mype = mype ; desc%nproc = nproc
IF ( present(nyfft) ) THEN
! check on yfft group dimension
CALL fftx_error__( ' fft_type_allocate ', ' MOD(nproc,nyfft) .ne. 0 ', MOD(nproc,nyfft) )
!#define ZCOMPACT
#if defined(__MPI)
#if defined(ZCOMPACT)
!write (6,*) ' FFT IS ZCOMPACT '
nzfft = nproc / nyfft
color = mype / nzfft ; key = MOD( mype, nzfft )
#else
!write (6,*) ' FFT IS YCOMPACT '
color = MOD( mype, nyfft ) ; key = mype / nyfft
#endif
! processes with the same key are in the same group along Y
CALL MPI_COMM_SPLIT( comm, key, color, desc%comm2, ierr )
CALL MPI_COMM_RANK( desc%comm2, desc%mype2, ierr )
CALL MPI_COMM_SIZE( desc%comm2, desc%nproc2, ierr )
! processes with the same color are in the same group along Z
CALL MPI_COMM_SPLIT( comm, color, key, desc%comm3, ierr )
CALL MPI_COMM_RANK( desc%comm3, desc%mype3, ierr )
CALL MPI_COMM_SIZE( desc%comm3, desc%nproc3, ierr )
#else
desc%comm2 = desc%comm ; desc%mype2 = desc%mype ; desc%nproc2 = desc%nproc
desc%comm3 = desc%comm ; desc%mype3 = desc%mype ; desc%nproc3 = desc%nproc
#endif
ENDIF
!write (6,*) ' nproc and mype '
!write (6,*) desc%nproc, desc%nproc2, desc%nproc3
!write (6,*) desc%mype, desc%mype2, desc%mype3
ALLOCATE ( desc%iproc(desc%nproc2,desc%nproc3), desc%iproc2(desc%nproc), desc%iproc3(desc%nproc) )
do iproc = 1, desc%nproc
#if defined(ZCOMPACT)
iproc3 = MOD(iproc-1, desc%nproc3) + 1 ; iproc2 = (iproc-1)/desc%nproc3 + 1
#else
iproc2 = MOD(iproc-1, desc%nproc2) + 1 ; iproc3 = (iproc-1)/desc%nproc2 + 1
#endif
desc%iproc2(iproc) = iproc2 ; desc%iproc3(iproc) = iproc3
desc%iproc(iproc2,iproc3) = iproc
end do
CALL realspace_grid_init( desc, at, bg, gcutm, fft_fact )
ALLOCATE( desc%nr2p ( desc%nproc2 ), desc%i0r2p( desc%nproc2 ) ) ; desc%nr2p = 0 ; desc%i0r2p = 0
ALLOCATE( desc%nr2p_offset ( desc%nproc2 ) ) ; desc%nr2p_offset = 0
ALLOCATE( desc%nr3p ( desc%nproc3 ), desc%i0r3p( desc%nproc3 ) ) ; desc%nr3p = 0 ; desc%i0r3p = 0
ALLOCATE( desc%nr3p_offset ( desc%nproc3 ) ) ; desc%nr3p_offset = 0
nx = desc%nr1x
ny = desc%nr2x
ALLOCATE( desc%nsp( desc%nproc ) ) ; desc%nsp = 0
ALLOCATE( desc%nsp_offset( desc%nproc2, desc%nproc3 ) ) ; desc%nsp_offset = 0
ALLOCATE( desc%nsw( desc%nproc ) ) ; desc%nsw = 0
ALLOCATE( desc%nsw_offset( desc%nproc2, desc%nproc3 ) ) ; desc%nsw_offset = 0
ALLOCATE( desc%nsw_tg( desc%nproc ) ) ; desc%nsw_tg = 0
ALLOCATE( desc%ngl( desc%nproc ) ) ; desc%ngl = 0
ALLOCATE( desc%nwl( desc%nproc ) ) ; desc%nwl = 0
ALLOCATE( desc%iss( desc%nproc ) ) ; desc%iss = 0
ALLOCATE( desc%isind( nx * ny ) ) ; desc%isind = 0
ALLOCATE( desc%ismap( nx * ny ) ) ; desc%ismap = 0
ALLOCATE( desc%nr1p( desc%nproc2 ) ) ; desc%nr1p = 0
ALLOCATE( desc%nr1w( desc%nproc2 ) ) ; desc%nr1w = 0
ALLOCATE( desc%ir1p( desc%nr1x ) ) ; desc%ir1p = 0
ALLOCATE( desc%indp( desc%nr1x,desc%nproc2 ) ) ; desc%indp = 0
ALLOCATE( desc%ir1w( desc%nr1x ) ) ; desc%ir1w = 0
ALLOCATE( desc%ir1w_tg( desc%nr1x ) ) ; desc%ir1w_tg = 0
ALLOCATE( desc%indw( desc%nr1x, desc%nproc2 ) ) ; desc%indw = 0
ALLOCATE( desc%indw_tg( desc%nr1x ) ) ; desc%indw_tg = 0
ALLOCATE( desc%iplp( nx ) ) ; desc%iplp = 0
ALLOCATE( desc%iplw( nx ) ) ; desc%iplw = 0
ALLOCATE( desc%tg_snd( desc%nproc2) ) ; desc%tg_snd = 0
ALLOCATE( desc%tg_rcv( desc%nproc2) ) ; desc%tg_rcv = 0
ALLOCATE( desc%tg_sdsp( desc%nproc2) ) ; desc%tg_sdsp = 0
ALLOCATE( desc%tg_rdsp( desc%nproc2) ) ; desc%tg_rdsp = 0
incremental_grid_identifier = incremental_grid_identifier + 1
desc%grid_id = incremental_grid_identifier
END SUBROUTINE fft_type_allocate
SUBROUTINE fft_type_deallocate( desc )
TYPE (fft_type_descriptor) :: desc
INTEGER :: ierr
!write (6,*) ' inside fft_type_deallocate' ; FLUSH(6)
IF ( ALLOCATED( desc%nr2p ) ) DEALLOCATE( desc%nr2p )
IF ( ALLOCATED( desc%nr2p_offset ) ) DEALLOCATE( desc%nr2p_offset )
IF ( ALLOCATED( desc%nr3p_offset ) ) DEALLOCATE( desc%nr3p_offset )
IF ( ALLOCATED( desc%i0r2p ) ) DEALLOCATE( desc%i0r2p )
IF ( ALLOCATED( desc%nr3p ) ) DEALLOCATE( desc%nr3p )
IF ( ALLOCATED( desc%i0r3p ) ) DEALLOCATE( desc%i0r3p )
IF ( ALLOCATED( desc%nsp ) ) DEALLOCATE( desc%nsp )
IF ( ALLOCATED( desc%nsp_offset ) ) DEALLOCATE( desc%nsp_offset )
IF ( ALLOCATED( desc%nsw ) ) DEALLOCATE( desc%nsw )
IF ( ALLOCATED( desc%nsw_offset ) ) DEALLOCATE( desc%nsw_offset )
IF ( ALLOCATED( desc%nsw_tg ) ) DEALLOCATE( desc%nsw_tg )
IF ( ALLOCATED( desc%ngl ) ) DEALLOCATE( desc%ngl )
IF ( ALLOCATED( desc%nwl ) ) DEALLOCATE( desc%nwl )
IF ( ALLOCATED( desc%iss ) ) DEALLOCATE( desc%iss )
IF ( ALLOCATED( desc%isind ) ) DEALLOCATE( desc%isind )
IF ( ALLOCATED( desc%ismap ) ) DEALLOCATE( desc%ismap )
IF ( ALLOCATED( desc%nr1p ) ) DEALLOCATE( desc%nr1p )
IF ( ALLOCATED( desc%nr1w ) ) DEALLOCATE( desc%nr1w )
IF ( ALLOCATED( desc%ir1p ) ) DEALLOCATE( desc%ir1p )
IF ( ALLOCATED( desc%indp ) ) DEALLOCATE( desc%indp )
IF ( ALLOCATED( desc%ir1w ) ) DEALLOCATE( desc%ir1w )
IF ( ALLOCATED( desc%ir1w_tg ) )DEALLOCATE( desc%ir1w_tg )
IF ( ALLOCATED( desc%indw ) ) DEALLOCATE( desc%indw )
IF ( ALLOCATED( desc%indw_tg ) )DEALLOCATE( desc%indw_tg )
IF ( ALLOCATED( desc%iplp ) ) DEALLOCATE( desc%iplp )
IF ( ALLOCATED( desc%iplw ) ) DEALLOCATE( desc%iplw )
IF ( ALLOCATED( desc%iproc ) ) DEALLOCATE( desc%iproc )
IF ( ALLOCATED( desc%iproc2 ) ) DEALLOCATE( desc%iproc2 )
IF ( ALLOCATED( desc%iproc3 ) ) DEALLOCATE( desc%iproc3 )
IF ( ALLOCATED( desc%tg_snd ) ) DEALLOCATE( desc%tg_snd )
IF ( ALLOCATED( desc%tg_rcv ) ) DEALLOCATE( desc%tg_rcv )
IF ( ALLOCATED( desc%tg_sdsp ) )DEALLOCATE( desc%tg_sdsp )
IF ( ALLOCATED( desc%tg_rdsp ) )DEALLOCATE( desc%tg_rdsp )
IF ( ALLOCATED( desc%nl ) ) DEALLOCATE( desc%nl )
IF ( ALLOCATED( desc%nlm ) ) DEALLOCATE( desc%nlm )
desc%comm = MPI_COMM_NULL
#if defined(__MPI)
IF (desc%comm2 /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm2, ierr )
IF (desc%comm3 /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm3, ierr )
#else
desc%comm2 = MPI_COMM_NULL
desc%comm3 = MPI_COMM_NULL
#endif
desc%nr1 = 0 ; desc%nr2 = 0 ; desc%nr3 = 0
desc%nr1x = 0 ; desc%nr2x = 0 ; desc%nr3x = 0
desc%grid_id = 0
END SUBROUTINE fft_type_deallocate
!=----------------------------------------------------------------------------=!
SUBROUTINE fft_type_set( desc, nst, ub, lb, idx, in1, in2, ncp, ncpw, ngp, ngpw, st, stw )
TYPE (fft_type_descriptor) :: desc
INTEGER, INTENT(in) :: nst ! total number of stiks
INTEGER, INTENT(in) :: ub(3), lb(3) ! upper and lower bound of real space indices
INTEGER, INTENT(in) :: idx(:) ! sorting index of the sticks
INTEGER, INTENT(in) :: in1(:) ! x-index of a stick
INTEGER, INTENT(in) :: in2(:) ! y-index of a stick
INTEGER, INTENT(in) :: ncp(:) ! number of rho columns per processor
INTEGER, INTENT(in) :: ncpw(:) ! number of wave columns per processor
INTEGER, INTENT(in) :: ngp(:) ! number of rho G-vectors per processor
INTEGER, INTENT(in) :: ngpw(:) ! number of wave G-vectors per processor
INTEGER, INTENT(in) :: st( lb(1) : ub(1), lb(2) : ub(2) ) ! stick owner of a given rho stick
INTEGER, INTENT(in) :: stw( lb(1) : ub(1), lb(2) : ub(2) ) ! stick owner of a given wave stick
INTEGER :: nsp( desc%nproc ), nsw_tg, nr1w_tg
INTEGER :: np, nq, i, is, iss, itg, i1, i2, m1, m2, ip
INTEGER :: ncpx, nr1px, nr2px, nr3px
INTEGER :: nr1, nr2, nr3 ! size of real space grid
INTEGER :: nr1x, nr2x, nr3x ! padded size of real space grid
!write (6,*) ' inside fft_type_set' ; FLUSH(6)
!
IF (.NOT. ALLOCATED( desc%nsp ) ) &
CALL fftx_error__(' fft_type_set ', ' fft arrays not yet allocated ', 1 )
IF ( desc%nr1 == 0 .OR. desc%nr2 == 0 .OR. desc%nr3 == 0 ) &
CALL fftx_error__(' fft_type_set ', ' fft dimensions not yet set ', 1 )
! Set fft actual and leading dimensions to be used internally
nr1 = desc%nr1 ; nr2 = desc%nr2 ; nr3 = desc%nr3
nr1x = desc%nr1x ; nr2x = desc%nr2x ; nr3x = desc%nr3x
IF( ( nr1 > nr1x ) .or. ( nr2 > nr2x ) .or. ( nr3 > nr3x ) ) &
CALL fftx_error__( ' fft_type_set ', ' wrong fft dimensions ', 1 )
IF( ( size( desc%ngl ) < desc%nproc ) .or. ( size( desc%iss ) < desc%nproc ) .or. &
( size( desc%nr2p ) < desc%nproc2 ) .or. ( size( desc%i0r2p ) < desc%nproc2 ) .or. &
( size( desc%nr3p ) < desc%nproc3 ) .or. ( size( desc%i0r3p ) < desc%nproc3 ) ) &
CALL fftx_error__( ' fft_type_set ', ' wrong descriptor dimensions ', 2 )
IF( ( size( idx ) < nst ) .or. ( size( in1 ) < nst ) .or. ( size( in2 ) < nst ) ) &
CALL fftx_error__( ' fft_type_set ', ' wrong number of stick dimensions ', 3 )
IF( ( size( ncp ) < desc%nproc ) .or. ( size( ngp ) < desc%nproc ) ) &
CALL fftx_error__( ' fft_type_set ', ' wrong stick dimensions ', 4 )
! Set the number of "Y" values for each processor in the nproc2 group
np = nr2 / desc%nproc2
nq = nr2 - np * desc%nproc2
desc%nr2p(1:desc%nproc2) = np ! assign a base value to all processors of the nproc2 group
DO i =1, nq ! assign an extra unit to the first nq processors of the nproc2 group
desc%nr2p(i) = np + 1
ENDDO
! set the offset
desc%nr2p_offset(1) = 0
DO i =1, desc%nproc2-1
desc%nr2p_offset(i+1) = desc%nr2p_offset(i) + desc%nr2p(i)
ENDDO
!-- my_nr2p is the number of planes per processor of this processor in the Y group
desc%my_nr2p = desc%nr2p( desc%mype2 + 1 )
! Find out the index of the starting plane on each proc
desc%i0r2p = 0
DO i = 2, desc%nproc2
desc%i0r2p(i) = desc%i0r2p(i-1) + desc%nr2p(i-1)
ENDDO
!-- my_i0r2p is the index-offset of the starting plane of this processor in the Y group
desc%my_i0r2p = desc%i0r2p( desc%mype2 + 1 )
! Set the number of "Z" values for each processor in the nproc3 group
np = nr3 / desc%nproc3
nq = nr3 - np * desc%nproc3
desc%nr3p(1:desc%nproc3) = np ! assign a base value to all processors
DO i =1, nq ! assign an extra unit to the first nq processors of the nproc3 group
desc%nr3p(i) = np + 1
END DO
! set the offset
desc%nr3p_offset(1) = 0
DO i =1, desc%nproc3-1
desc%nr3p_offset(i+1) = desc%nr3p_offset(i) + desc%nr3p(i)
ENDDO
!-- my_nr3p is the number of planes per processor of this processor in the Z group
desc%my_nr3p = desc%nr3p( desc%mype3 + 1 )
! Find out the index of the starting plane on each proc
desc%i0r3p = 0
DO i = 2, desc%nproc3
desc%i0r3p( i ) = desc%i0r3p( i-1 ) + desc%nr3p ( i-1 )
ENDDO
!-- my_i0r3p is the index-offset of the starting plane of this processor in the Z group
desc%my_i0r3p = desc%i0r3p( desc%mype3 + 1 )
! dimension of the xy plane. see ncplane
desc%nnp = nr1x * nr2x
!!!!!!!
desc%ngl( 1:desc%nproc ) = ngp( 1:desc%nproc ) ! local number of g vectors (rho) per processor
desc%nwl( 1:desc%nproc ) = ngpw( 1:desc%nproc ) ! local number of g vectors (wave) per processor
IF( size( desc%isind ) < ( nr1x * nr2x ) ) &
CALL fftx_error__( ' fft_type_set ', ' wrong descriptor dimensions, isind ', 5 )
IF( size( desc%iplp ) < ( nr1x ) .or. size( desc%iplw ) < ( nr1x ) ) &
CALL fftx_error__( ' fft_type_set ', ' wrong descriptor dimensions, ipl ', 5 )
!
! 1. Temporarily store in the array "desc%isind" the index of the processor
! that own the corresponding stick (index of proc starting from 1)
! 2. Set the array elements of "desc%iplw" and "desc%iplp" to one
! for that index corresponding to YZ planes containing at least one stick
! this are used in the FFT transform along Y
!
desc%isind = 0 ! will contain the +ve or -ve of the processor number, if any, that owns the stick
desc%iplp = 0 ! if > 0 is the nporc2 processor owning this ( potential ) X active plane
desc%iplw = 0 ! if > 0 is the nproc2 processor owning this ( wave func ) X active value
! Set nst to the proper number of sticks (the total number of 1d fft along z to be done)
desc%nst = 0
DO iss = 1, SIZE( idx )
is = idx( iss )
IF( is < 1 ) CYCLE
i1 = in1( is )
i2 = in2( is )
IF( st( i1, i2 ) > 0 ) THEN
desc%nst = desc%nst + 1
m1 = i1 + 1; IF ( m1 < 1 ) m1 = m1 + nr1
m2 = i2 + 1; IF ( m2 < 1 ) m2 = m2 + nr2
IF( stw( i1, i2 ) > 0 ) THEN
desc%isind( m1 + ( m2 - 1 ) * nr1x ) = st( i1, i2 )
desc%iplw( m1 ) = desc%iproc2(st(i1,i2))
ELSE
desc%isind( m1 + ( m2 - 1 ) * nr1x ) = -st( i1, i2 )
ENDIF
desc%iplp( m1 ) = desc%iproc2(st(i1,i2))
IF( desc%lgamma ) THEN
IF( i1 /= 0 .OR. i2 /= 0 ) desc%nst = desc%nst + 1
m1 = -i1 + 1; IF ( m1 < 1 ) m1 = m1 + nr1
m2 = -i2 + 1; IF ( m2 < 1 ) m2 = m2 + nr2
IF( stw( -i1, -i2 ) > 0 ) THEN
desc%isind( m1 + ( m2 - 1 ) * nr1x ) = st( -i1, -i2 )
desc%iplw( m1 ) = desc%iproc2(st(-i1,-i2))
ELSE
desc%isind( m1 + ( m2 - 1 ) * nr1x ) = -st( -i1, -i2 )
ENDIF
desc%iplp( m1 ) = desc%iproc2(st(-i1,-i2))
ENDIF
ENDIF
ENDDO
do m1=1,desc%nr1x
if (desc%iplw(m1)>0) then
if (desc%iplp(m1) /= desc%iplw(m1) ) then
write (6,*) 'WRONG iplp/iplw arrays'
write (6,*) desc%iplp
write (6,*) desc%iplw
CALL fftx_error__( ' fft_type_set ', ' iplp is wrong ', m1 )
end if
end if
end do
! count how many active X values per each nproc2 processor and set the incremental index of this one
! wave func X values first
desc%nr1w = 0 ; desc%ir1w = 0 ; desc%indw = 0
nr1w_tg = 0 ; desc%ir1w_tg = 0 ; desc%indw_tg = 0
do i1 = 1, nr1
if (desc%iplw(i1) > 0 ) then
desc%nr1w(desc%iplw(i1)) = desc%nr1w(desc%iplw(i1)) + 1
desc%indw(desc%nr1w(desc%iplw(i1)),desc%iplw(i1)) = i1
nr1w_tg = nr1w_tg + 1 ; desc%ir1w_tg(i1) = nr1w_tg ; desc%indw_tg(nr1w_tg) = i1
end if
if (desc%iplw(i1) == desc%mype2 +1) desc%ir1w(i1) = desc%nr1w(desc%iplw(i1))
end do
desc%nr1w_tg = nr1w_tg ! this is useful in task group ffts
! then potential X values
desc%nr1p = desc%nr1w ; desc%ir1p=desc%ir1w ; desc%indp = desc%indw
do i1 = 1, nr1
if ( (desc%iplw(i1) > 0) .and. (desc%iplp(i1) == 0) ) &
CALL fftx_error__( ' fft_type_set ', ' bad distribution of X values ', i1 )
if ( (desc%iplw(i1) > 0) ) cycle ! this X value has already been taken care of
if (desc%iplp(i1) > 0 ) then
desc%nr1p(desc%iplp(i1)) = desc%nr1p(desc%iplp(i1)) + 1
desc%indp(desc%nr1p(desc%iplp(i1)),desc%iplp(i1)) = i1
end if
if (desc%iplp(i1) == desc%mype2+1) desc%ir1p(i1) = desc%nr1p(desc%iplp(i1))
end do
!
! Compute for each proc the global index ( starting from 0 ) of the first
! local stick ( desc%iss )
!
DO i = 1, desc%nproc
IF( i == 1 ) THEN
desc%iss( i ) = 0
ELSE
desc%iss( i ) = desc%iss( i - 1 ) + ncp( i - 1 )
ENDIF
ENDDO
! iss(1:nproc) is the index offset of the first column of a given processor
IF( size( desc%ismap ) < ( nst ) ) &
CALL fftx_error__( ' fft_type_set ', ' wrong descriptor dimensions ', 6 )
!
! 1. Set the array desc%ismap which maps stick indexes to
! position in the plane ( iss )
! 2. Re-set the array "desc%isind", that maps position
! in the plane with stick indexes (it is the inverse of desc%ismap )
!
! wave function sticks first
desc%ismap = 0 ! will be the global xy stick index in the global list of processor-ordered sticks
nsp = 0 ! will be the number of sticks of a given processor
DO iss = 1, size( desc%isind )
ip = desc%isind( iss ) ! processor that owns iss wave stick. if it's a rho stick it's negative !
IF( ip > 0 ) THEN ! only operates on wave sticks
nsp( ip ) = nsp( ip ) + 1
desc%ismap( nsp( ip ) + desc%iss( ip ) ) = iss
IF( ip == ( desc%mype + 1 ) ) THEN
desc%isind( iss ) = nsp( ip ) ! isind is OVERWRITTEN as the ordered index in this processor stick list
ELSE
desc%isind( iss ) = 0 ! zero otherwise...
ENDIF
ENDIF
ENDDO
! check number of sticks against the input value
IF( any( nsp( 1:desc%nproc ) /= ncpw( 1:desc%nproc ) ) ) THEN
DO ip = 1, desc%nproc
WRITE( stdout,*) ' * ', ip, ' * ', nsp( ip ), ' /= ', ncpw( ip )
ENDDO
CALL fftx_error__( ' fft_type_set ', ' inconsistent number of sticks ', 7 )
ENDIF
desc%nsw( 1:desc%nproc ) = nsp( 1:desc%nproc ) ! -- number of wave sticks per processor
DO ip=1, desc%nproc3
desc%nsw_offset(1,ip) = 0
DO i=1, desc%nproc2-1
desc%nsw_offset(i+1,ip) = desc%nsw_offset(i,ip) + desc%nsw(desc%iproc(i,ip))
ENDDO
ENDDO
! -- number of wave sticks per processor for task group ffts
desc%nsw_tg( 1:desc%nproc ) = 0
do ip =1, desc%nproc3
nsw_tg = sum(desc%nsw(desc%iproc(1:desc%nproc2,ip)))
desc%nsw_tg(desc%iproc(1:desc%nproc2,ip)) = nsw_tg
end do
! then add pseudopotential stick
DO iss = 1, size( desc%isind )
ip = desc%isind( iss ) ! -ve of processor that owns iss rho stick. if it was a wave stick it's something non negative !
IF( ip < 0 ) THEN
nsp( -ip ) = nsp( -ip ) + 1
desc%ismap( nsp( -ip ) + desc%iss( -ip ) ) = iss
IF( -ip == ( desc%mype + 1 ) ) THEN
desc%isind( iss ) = nsp( -ip ) ! isind is OVERWRITTEN as the ordered index in this processor stick list
ELSE
desc%isind( iss ) = 0 ! zero otherwise...
ENDIF
ENDIF
ENDDO
! check number of sticks against the input value
IF( any( nsp( 1:desc%nproc ) /= ncp( 1:desc%nproc ) ) ) THEN
DO ip = 1, desc%nproc
WRITE( stdout,*) ' * ', ip, ' * ', nsp( ip ), ' /= ', ncp( ip )
ENDDO
CALL fftx_error__( ' fft_type_set ', ' inconsistent number of sticks ', 8 )
ENDIF
desc%nsp( 1:desc%nproc ) = nsp( 1:desc%nproc ) ! -- number of rho sticks per processor
DO ip=1, desc%nproc3
desc%nsp_offset(1,ip) = 0
DO i=1, desc%nproc2-1
desc%nsp_offset(i+1,ip) = desc%nsp_offset(i,ip) + desc%nsp(desc%iproc(i,ip))
ENDDO
ENDDO
IF( .NOT. desc%lpara ) THEN
desc%isind = 0
desc%iplw = 0
desc%iplp = 1
! here we are setting parameter as if we were in a serial code,
! sticks are along X dimension and not along Z
desc%nsp(1) = 0
desc%nsw(1) = 0
DO i1 = lb( 1 ), ub( 1 )
DO i2 = lb( 2 ), ub( 2 )
m1 = i1 + 1; IF ( m1 < 1 ) m1 = m1 + nr1
m2 = i2 + 1; IF ( m2 < 1 ) m2 = m2 + nr2
IF( st( i1, i2 ) > 0 ) THEN
desc%nsp(1) = desc%nsp(1) + 1
END IF
IF( stw( i1, i2 ) > 0 ) THEN
desc%nsw(1) = desc%nsw(1) + 1
desc%isind( m1 + ( m2 - 1 ) * nr1x ) = 1 ! st( i1, i2 )
desc%iplw( m1 ) = 1
ENDIF
ENDDO
ENDDO
!
! if we are in a parallel run, but would like to use serial FFT, all
! tasks must have the same parameters as if serial run.
!
desc%nnr = nr1x * nr2x * nr3x
desc%nnp = nr1x * nr2x
desc%my_nr2p = nr2 ; desc%nr2p = nr2 ; desc%i0r2p = 0
desc%my_nr3p = nr3 ; desc%nr3p = nr3 ; desc%i0r3p = 0
desc%nsw = desc%nsw(1)
desc%nsp = desc%nsp(1)
desc%ngl = SUM(ngp)
desc%nwl = SUM(ngpw)
!
END IF
!write (6,*) 'fft_type_set SUMMARY'
!write (6,*) 'desc%mype ', desc%mype
!write (6,*) 'desc%mype2', desc%mype2
!write (6,*) 'desc%mype3', desc%mype3
!write (6,*) 'nr1 nr2 nr3 dimensions : ', desc%nr1, desc%nr2, desc%nr3
!write (6,*) 'nr1x nr2x nr3x dimensions : ', desc%nr1x, desc%nr2x, desc%nr3x
!write (6,*) 'nr3p arrays'
!write (6,*) desc%nr3p
!write (6,*) 'i0r3p arrays'
!write (6,*) desc%i0r3p
!write (6,*) 'nr2p arrays'
!write (6,*) desc%nr2p
!write (6,*) 'i0r2p arrays'
!write (6,*) desc%i0r2p
!write (6,*) 'nsp/nsw arrays'
!write (6,*) desc%nsp
!write (6,*) desc%nsw
!write (6,*) 'nr1p/nr1w arrays'
!write (6,*) desc%nr1p
!write (6,*) desc%nr1w
!write (6,*) 'ir1p/ir1w arrays'
!write (6,*) desc%ir1p
!write (6,*) desc%ir1w
!write (6,*) 'indp/indw arrays'
!write (6,*) desc%indp
!write (6,*) desc%indw
!write (6,*) 'iplp/iplw arrays'
!write (6,*) desc%iplp
!write (6,*) desc%iplw
! Finally set fft local workspace dimension
nr1px = MAXVAL( desc%nr1p( 1:desc%nproc2 ) ) ! maximum number of X values per processor in the nproc2 group
nr2px = MAXVAL( desc%nr2p( 1:desc%nproc2 ) ) ! maximum number of planes per processor in the nproc2 group
nr3px = MAXVAL( desc%nr3p( 1:desc%nproc3 ) ) ! maximum number of planes per processor in the nproc3 group
ncpx = MAXVAL( ncp( 1:desc%nproc ) ) ! maximum number of columns per processor (use potential sticks to be safe)
IF ( desc%nproc == 1 ) THEN
desc%nnr = nr1x * nr2x * nr3x
desc%nnr_tg = desc%nnr * desc%nproc2
ELSE
desc%nnr = max( ncpx * nr3x, nr1x * nr2px * nr3px ) ! this is required to contain the local data in R and G space
desc%nnr = max( desc%nnr, ncpx*nr3px*desc%nproc3, nr1px*nr2px*nr3px*desc%nproc2) ! this is required to use ALLTOALL instead of ALLTOALLV
desc%nnr = max( 1, desc%nnr ) ! ensure that desc%nrr > 0 ( for extreme parallelism )
desc%nnr_tg = desc%nnr * desc%nproc2
ENDIF
!write (6,*) ' nnr bounds'
!write (6,*) ' nr1x ',nr1x,' nr2x ', nr2x, ' nr3x ', nr3x
!write (6,*) ' nr1x * nr2x * nr3x',nr1x * nr2x * nr3x
!write (6,*) ' ncpx ',ncpx,' nr3px ', nr3px, ' desc%nproc3 ', desc%nproc3
!write (6,*) ' ncpx * nr3x ',ncpx * nr3x
!write (6,*) ' ncpx * nr3px * desc%nproc3 ',ncpx*nr3px*desc%nproc3
!write (6,*) ' nr1px ', nr1px,' nr2px ',nr2px,' desc%nproc2 ', desc%nproc2
!write (6,*) ' nr1x * nr2px * nr3px ',nr1x * nr2px * nr3px
!write (6,*) ' nr1px * nr2px *nr3px * desc%nproc2 ',nr1px*nr2px*nr3px*desc%nproc2
!write (6,*) ' desc%nnr ', desc%nnr
IF( desc%nr3x * desc%nsw( desc%mype + 1 ) > desc%nnr ) &
CALL fftx_error__( ' task_groups_init ', ' inconsistent desc%nnr ', 1 )
desc%tg_snd(1) = desc%nr3x * desc%nsw( desc%mype + 1 )
desc%tg_rcv(1) = desc%nr3x * desc%nsw( desc%iproc(1,desc%mype3+1) )
desc%tg_sdsp(1) = 0
desc%tg_rdsp(1) = 0
DO i = 2, desc%nproc2
desc%tg_snd(i) = desc%nr3x * desc%nsw( desc%mype + 1 )
desc%tg_rcv(i) = desc%nr3x * desc%nsw( desc%iproc(i,desc%mype3+1) )
desc%tg_sdsp(i) = desc%tg_sdsp(i-1) + desc%nnr
desc%tg_rdsp(i) = desc%tg_rdsp(i-1) + desc%tg_rcv(i-1)
ENDDO
RETURN
END SUBROUTINE fft_type_set
!=----------------------------------------------------------------------------=!
SUBROUTINE fft_type_init( dfft, smap, pers, lgamma, lpara, comm, at, bg, gcut_in, dual_in, fft_fact, nyfft )
USE stick_base
TYPE (fft_type_descriptor), INTENT(INOUT) :: dfft
TYPE (sticks_map), INTENT(INOUT) :: smap
CHARACTER(LEN=*), INTENT(IN) :: pers ! fft personality
LOGICAL, INTENT(IN) :: lpara
LOGICAL, INTENT(IN) :: lgamma
INTEGER, INTENT(IN) :: comm
REAL(DP), INTENT(IN) :: gcut_in
REAL(DP), INTENT(IN) :: bg(3,3)
REAL(DP), INTENT(IN) :: at(3,3)
REAL(DP), OPTIONAL, INTENT(IN) :: dual_in
INTEGER, INTENT(IN), OPTIONAL :: fft_fact(3)
INTEGER, INTENT(IN) :: nyfft
!
! Potential or dual
!
INTEGER, ALLOCATABLE :: st(:,:)
! ... stick map, st(i,j) = number of G-vector in the
! ... stick whose x and y miller index are i and j
INTEGER, ALLOCATABLE :: nstp(:)
! ... number of sticks, nstp(ip) = number of stick for processor ip
INTEGER, ALLOCATABLE :: sstp(:)
! ... number of G-vectors, sstp(ip) = sum of the
! ... sticks length for processor ip = number of G-vectors owned by the processor ip
INTEGER :: nst
! ... nst local number of sticks
!
! ... Plane wave
!
INTEGER, ALLOCATABLE :: stw(:,:)
! ... stick map (wave functions), stw(i,j) = number of G-vector in the
! ... stick whose x and y miller index are i and j
INTEGER, ALLOCATABLE :: nstpw(:)
! ... number of sticks (wave functions), nstpw(ip) = number of stick for processor ip
INTEGER, ALLOCATABLE :: sstpw(:)
! ... number of G-vectors (wave functions), sstpw(ip) = sum of the
! ... sticks length for processor ip = number of G-vectors owned by the processor ip
INTEGER :: nstw
! ... nstw local number of sticks (wave functions)
REAL(DP) :: gcut, gkcut, dual
INTEGER :: ngm, ngw
!write (6,*) ' inside fft_type_init' ; FLUSH(6)
dual = fft_dual
IF( PRESENT( dual_in ) ) dual = dual_in
IF( pers == 'rho' ) THEN
gcut = gcut_in
gkcut = gcut / dual
ELSE IF ( pers == 'wave' ) THEN
gkcut = gcut_in
gcut = gkcut * dual
ELSE
CALL fftx_error__(' fft_type_init ', ' unknown FFT personality ', 1 )
END IF
!write (*,*) 'FFT_TYPE_INIT pers, gkcut,gcut', pers, gkcut, gcut
IF( .NOT. ALLOCATED( dfft%nsp ) ) THEN
CALL fft_type_allocate( dfft, at, bg, gcut, comm, fft_fact=fft_fact, nyfft=nyfft )
ELSE
IF( dfft%comm /= comm ) THEN
CALL fftx_error__(' fft_type_init ', ' FFT already allocated with a different communicator ', 1 )
END IF
END IF
dfft%lpara = lpara ! this descriptor can be either a descriptor for a
! parallel FFT or a serial FFT even in parallel build
CALL sticks_map_allocate( smap, lgamma, dfft%lpara, dfft%nproc2, &
dfft%iproc, dfft%iproc2, dfft%nr1, dfft%nr2, dfft%nr3, bg, dfft%comm )
dfft%lgamma = smap%lgamma ! .TRUE. if the grid has Gamma symmetry
ALLOCATE( stw ( smap%lb(1):smap%ub(1), smap%lb(2):smap%ub(2) ) )
ALLOCATE( st ( smap%lb(1):smap%ub(1), smap%lb(2):smap%ub(2) ) )
ALLOCATE( nstp(smap%nproc) )
ALLOCATE( sstp(smap%nproc) )
ALLOCATE( nstpw(smap%nproc) )
ALLOCATE( sstpw(smap%nproc) )
!write(*,*) 'calling get_sticks with gkcut =',gkcut
CALL get_sticks( smap, gkcut, nstpw, sstpw, stw, nstw, ngw )
!write(*,*) 'calling get_sticks with gcut =',gcut
CALL get_sticks( smap, gcut, nstp, sstp, st, nst, ngm )
CALL fft_type_set( dfft, nst, smap%ub, smap%lb, smap%idx, &
smap%ist(:,1), smap%ist(:,2), nstp, nstpw, sstp, sstpw, st, stw )
dfft%ngw = dfft%nwl( dfft%mype + 1 )
dfft%ngm = dfft%ngl( dfft%mype + 1 )
IF( dfft%lgamma ) THEN
dfft%ngw = (dfft%ngw + 1)/2
dfft%ngm = (dfft%ngm + 1)/2
END IF
IF( dfft%ngw /= ngw ) THEN
CALL fftx_error__(' fft_type_init ', ' wrong ngw ', 1 )
END IF
IF( dfft%ngm /= ngm ) THEN
CALL fftx_error__(' fft_type_init ', ' wrong ngm ', 1 )
END IF
DEALLOCATE( st )
DEALLOCATE( stw )
DEALLOCATE( nstp )
DEALLOCATE( sstp )
DEALLOCATE( nstpw )
DEALLOCATE( sstpw )
END SUBROUTINE fft_type_init
!=----------------------------------------------------------------------------=!
SUBROUTINE realspace_grid_init( dfft, at, bg, gcutm, fft_fact )
!
! ... Sets optimal values for dfft%nr[123] and dfft%nr[123]x
! ... If fft_fact is present, force nr[123] to be multiple of fft_fac([123])
!
USE fft_support, only: good_fft_dimension, good_fft_order
!
IMPLICIT NONE
!
REAL(DP), INTENT(IN) :: at(3,3), bg(3,3)
REAL(DP), INTENT(IN) :: gcutm
INTEGER, INTENT(IN), OPTIONAL :: fft_fact(3)
TYPE(fft_type_descriptor), INTENT(INOUT) :: dfft
!write (6,*) ' inside realspace_grid_init' ; FLUSH(6)
!
IF( dfft%nr1 == 0 .OR. dfft%nr2 == 0 .OR. dfft%nr3 == 0 ) THEN
!
! ... calculate the size of the real-space dense grid for FFT
! ... first, an estimate of nr1,nr2,nr3, based on the max values
! ... of n_i indices in: G = i*b_1 + j*b_2 + k*b_3
! ... We use G*a_i = n_i => n_i .le. |Gmax||a_i|
!
dfft%nr1 = int ( sqrt (gcutm) * sqrt (at(1, 1)**2 + at(2, 1)**2 + at(3, 1)**2) ) + 1
dfft%nr2 = int ( sqrt (gcutm) * sqrt (at(1, 2)**2 + at(2, 2)**2 + at(3, 2)**2) ) + 1
dfft%nr3 = int ( sqrt (gcutm) * sqrt (at(1, 3)**2 + at(2, 3)**2 + at(3, 3)**2) ) + 1
!write (6,*) sqrt(gcutm)*sqrt(at(1,1)**2 + at(2,1)**2 + at(3,1)**2) , dfft%nr1
!write (6,*) sqrt(gcutm)*sqrt(at(1,2)**2 + at(2,2)**2 + at(3,2)**2) , dfft%nr2
!write (6,*) sqrt(gcutm)*sqrt(at(1,3)**2 + at(2,3)**2 + at(3,3)**2) , dfft%nr3
!
CALL grid_set( dfft, bg, gcutm, dfft%nr1, dfft%nr2, dfft%nr3 )
!
#if defined (__DEBUG)
ELSE
WRITE( stdout, '( /, 3X,"Info: using nr1, nr2, nr3 values from input" )' )
#endif
END IF
IF (PRESENT(fft_fact)) THEN
dfft%nr1 = good_fft_order( dfft%nr1, fft_fact(1) )
dfft%nr2 = good_fft_order( dfft%nr2, fft_fact(2) )
dfft%nr3 = good_fft_order( dfft%nr3, fft_fact(3) )
ELSE
dfft%nr1 = good_fft_order( dfft%nr1 )
dfft%nr2 = good_fft_order( dfft%nr2 )
dfft%nr3 = good_fft_order( dfft%nr3 )
END IF
dfft%nr1x = good_fft_dimension( dfft%nr1 )
dfft%nr2x = dfft%nr2
dfft%nr3x = good_fft_dimension( dfft%nr3 )
END SUBROUTINE realspace_grid_init
!=----------------------------------------------------------------------------=!
SUBROUTINE grid_set( dfft, bg, gcut, nr1, nr2, nr3 )
! this routine returns in nr1, nr2, nr3 the minimal 3D real-space FFT
! grid required to fit the G-vector sphere with G^2 <= gcut
! On input, nr1,nr2,nr3 must be set to values that match or exceed
! the largest i,j,k (Miller) indices in G(i,j,k) = i*b1 + j*b2 + k*b3
! ----------------------------------------------
IMPLICIT NONE
! ... declare arguments
TYPE(fft_type_descriptor), INTENT(IN) :: dfft
INTEGER, INTENT(INOUT) :: nr1, nr2, nr3
REAL(DP), INTENT(IN) :: bg(3,3), gcut
! ... declare other variables
INTEGER :: i, j, k, nr, nb(3)
REAL(DP) :: gsq, g(3)
!write (6,*) ' inside grid_set' ; FLUSH(6)
! ----------------------------------------------
nb = 0
! ... calculate moduli of G vectors and the range of indices where
! ... |G|^2 < gcut (in parallel whenever possible)
DO k = -nr3, nr3
!
! ... me_image = processor number, starting from 0
!
IF( MOD( k + nr3, dfft%nproc ) == dfft%mype ) THEN
DO j = -nr2, nr2
DO i = -nr1, nr1
g( 1 ) = DBLE(i)*bg(1,1) + DBLE(j)*bg(1,2) + DBLE(k)*bg(1,3)
g( 2 ) = DBLE(i)*bg(2,1) + DBLE(j)*bg(2,2) + DBLE(k)*bg(2,3)
g( 3 ) = DBLE(i)*bg(3,1) + DBLE(j)*bg(3,2) + DBLE(k)*bg(3,3)
! ... calculate modulus
gsq = g( 1 )**2 + g( 2 )**2 + g( 3 )**2
IF( gsq < gcut ) THEN
! ... calculate maximum index
nb(1) = MAX( nb(1), ABS( i ) )
nb(2) = MAX( nb(2), ABS( j ) )
nb(3) = MAX( nb(3), ABS( k ) )
END IF
END DO
END DO
END IF
END DO
#if defined(__MPI)
CALL MPI_ALLREDUCE( MPI_IN_PLACE, nb, 3, MPI_INTEGER, MPI_MAX, dfft%comm, i )
#endif
! ... the size of the 3d FFT matrix depends upon the maximum indices. With
! ... the following choice, the sphere in G-space "touches" its periodic image
nr1 = 2 * nb(1) + 1
nr2 = 2 * nb(2) + 1
nr3 = 2 * nb(3) + 1
RETURN
END SUBROUTINE grid_set
PURE FUNCTION fft_stick_index( desc, i, j )
IMPLICIT NONE
TYPE(fft_type_descriptor), INTENT(IN) :: desc
INTEGER :: fft_stick_index
INTEGER, INTENT(IN) :: i, j
INTEGER :: mc, m1, m2
m1 = mod (i, desc%nr1) + 1
IF (m1 < 1) m1 = m1 + desc%nr1
m2 = mod (j, desc%nr2) + 1
IF (m2 < 1) m2 = m2 + desc%nr2
mc = m1 + (m2 - 1) * desc%nr1x
fft_stick_index = desc%isind ( mc )
END FUNCTION
!=----------------------------------------------------------------------------=!
END MODULE fft_types
!=----------------------------------------------------------------------------=!