mirror of https://gitlab.com/QEF/q-e.git
410 lines
11 KiB
Fortran
410 lines
11 KiB
Fortran
!
|
|
! Copyright (C) 2001-2003 PWSCF group
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
!
|
|
!-----------------------------------------------------------------------
|
|
subroutine data_structure( lgamma )
|
|
!-----------------------------------------------------------------------
|
|
! this routine sets the data structure for the fft arrays.
|
|
! In the parallel case distributes columns to processes, too
|
|
! This version computes also the smooth and hard mesh
|
|
!
|
|
#include "f_defs.h"
|
|
USE io_global, ONLY : stdout
|
|
USE sticks, ONLY: dfftp, dffts
|
|
USE kinds, ONLY: DP
|
|
USE cell_base, ONLY: bg, tpiba
|
|
USE klist, ONLY: xk, nks
|
|
USE gvect, ONLY: nr1, nr2, nr3, nrx1, nrx2, nrx3, nrxx, &
|
|
ngm, ngm_l, ngm_g, gcutm, ecutwfc
|
|
USE gsmooth, ONLY: nr1s, nr2s, nr3s, nrx1s, nrx2s, nrx3s, nrxxs, &
|
|
ngms, ngms_l, ngms_g, gcutms
|
|
|
|
#ifdef __PARA
|
|
use para, only: maxproc, ncplane, ncplanes, nkcp, npp, npps, &
|
|
ncp0, ncp0s, nxx, nxxs, nct, ncts, ncp, ncps
|
|
#endif
|
|
use mp, only: mp_sum
|
|
use mp_global, only: intra_pool_comm, nproc_pool, me_pool, my_image_id
|
|
use stick_base
|
|
use fft_scalar, only: good_fft_dimension
|
|
use fft_types, only: fft_dlay_allocate, fft_dlay_set, fft_dlay_scalar
|
|
!
|
|
implicit none
|
|
logical, intent(in) :: lgamma
|
|
integer :: n1, n2, n3, i1, i2, i3
|
|
! counters on G space
|
|
!
|
|
|
|
real(kind=DP) :: amod
|
|
! modulus of G vectors
|
|
|
|
integer, allocatable :: st(:,:), stw(:,:), sts(:,:)
|
|
! sticks maps
|
|
|
|
integer :: ub(3), lb(3)
|
|
! upper and lower bounds for maps
|
|
|
|
real(kind=DP) :: gkcut
|
|
! cut-off for the wavefunctions
|
|
|
|
integer :: np, nps1, nq, nqs, max1, min1, max2, min2, kpoint, m1, &
|
|
m2, i, mc, nct_, ic, ics
|
|
|
|
#ifdef __PARA
|
|
! counters on planes
|
|
|
|
integer, allocatable :: ngc (:), ngcs (:), ngkc (:)
|
|
integer :: ngp (maxproc), ngps(maxproc), ngkp (maxproc), ncp_(maxproc),&
|
|
j, jj, idum
|
|
! counters on planes
|
|
! indices for including meshes
|
|
! counter on k points
|
|
! generic counters
|
|
! check variables
|
|
! number of columns per plane
|
|
! number of columns per plane (smooth part)
|
|
! number of columns per plane (hard part)
|
|
! from thick plane to column list
|
|
! from smooth plane to column list
|
|
! number of g per processor
|
|
! number of column per processor
|
|
! counter on processors
|
|
! counter on processors
|
|
! used for swap
|
|
|
|
logical :: tk = .TRUE.
|
|
! map type: true for full space sticks map, false for half space sticks map
|
|
integer, allocatable :: in1(:), in2(:), index(:)
|
|
! sticks coordinates
|
|
|
|
!
|
|
! Subroutine body
|
|
!
|
|
|
|
tk = .NOT. lgamma
|
|
|
|
!
|
|
! set the values of fft arrays
|
|
!
|
|
|
|
nrx1 = good_fft_dimension (nr1)
|
|
nrx1s = good_fft_dimension (nr1s)
|
|
!
|
|
! nrx2 is there just for compatibility
|
|
!
|
|
nrx2 = nr2
|
|
nrx2s = nr2s
|
|
nrx3 = good_fft_dimension (nr3)
|
|
nrx3s = good_fft_dimension (nr3s)
|
|
!
|
|
! compute number of columns per plane for each processor
|
|
!
|
|
ncplane = nrx1 * nrx2
|
|
ncplanes = nrx1s * nrx2s
|
|
!
|
|
!
|
|
! check the number of plane per process
|
|
!
|
|
if ( nr3 < nproc_pool ) &
|
|
call errore ('data_structure', 'some processors have no planes ', - 1)
|
|
|
|
if ( nr3s < nproc_pool ) &
|
|
call errore ('data_structure', 'some processors have no smooth planes ', - 1)
|
|
|
|
!
|
|
! compute gkcut calling an internal procedure
|
|
!
|
|
|
|
call calculate_gkcut()
|
|
|
|
!
|
|
! find maximum among the nodes
|
|
!
|
|
|
|
call poolextreme (gkcut, + 1)
|
|
|
|
!
|
|
#ifdef DEBUG
|
|
WRITE( stdout, '(5x,"ecutrho & ecutwfc",2f12.2)') tpiba2 * gcutm, &
|
|
tpiba2 * gkcut
|
|
#endif
|
|
!
|
|
!
|
|
! Now compute for each point of the big plane how many column have
|
|
! non zero vectors on the smooth and thick mesh
|
|
!
|
|
n1 = nr1 + 1
|
|
n2 = nr2 + 1
|
|
n3 = nr3 + 1
|
|
!
|
|
ub = (/ n1, n2, n3 /)
|
|
lb = (/ -n1, -n2, -n3 /)
|
|
!
|
|
ALLOCATE( stw ( lb(1) : ub(1), lb(2) : ub(2) ) )
|
|
ALLOCATE( st ( lb(1) : ub(1), lb(2) : ub(2) ) )
|
|
ALLOCATE( sts ( lb(1) : ub(1), lb(2) : ub(2) ) )
|
|
|
|
!
|
|
! ... Fill in the stick maps, for given g-space base (b1,b2,b3)
|
|
! ... and cut-offs
|
|
! ... The value of the element (i,j) of the map ( st ) is equal to the
|
|
! ... number of G-vector belonging to the (i,j) stick.
|
|
!
|
|
|
|
CALL sticks_maps( tk, ub, lb, bg(:,1), bg(:,2), bg(:,3), gcutm, gkcut, gcutms, st, stw, sts )
|
|
|
|
nct = COUNT( st > 0 )
|
|
ncts = COUNT( sts > 0 )
|
|
|
|
if ( nct > ncplane ) &
|
|
& call errore('data_structure','too many sticks',1)
|
|
|
|
if ( ncts > ncplanes ) &
|
|
& call errore('data_structure','too many sticks',2)
|
|
|
|
if ( nct == 0 ) &
|
|
& call errore('data_structure','number of sticks 0', 1)
|
|
|
|
if ( ncts == 0 ) &
|
|
& call errore('data_structure','number smooth sticks 0', 1)
|
|
|
|
!
|
|
! local pointers deallocated at the end
|
|
!
|
|
ALLOCATE( in1( nct ), in2( nct ) )
|
|
ALLOCATE( ngc( nct ), ngcs( nct ), ngkc( nct ) )
|
|
ALLOCATE( index( nct ) )
|
|
|
|
!
|
|
! ... initialize the sticks indexes array ist
|
|
! ... nct counts columns containing G-vectors for the dense grid
|
|
! ... ncts counts columns contaning G-vectors for the smooth grid
|
|
!
|
|
|
|
CALL sticks_countg( tk, ub, lb, st, stw, sts, in1, in2, ngc, ngkc, ngcs )
|
|
|
|
CALL sticks_sort( ngc, ngkc, ngcs, nct, index )
|
|
|
|
CALL sticks_dist( tk, ub, lb, index, in1, in2, ngc, ngkc, ngcs, nct, &
|
|
ncp, nkcp, ncps, ngp, ngkp, ngps, st, stw, sts )
|
|
|
|
CALL sticks_pairup( tk, ub, lb, index, in1, in2, ngc, ngkc, ngcs, nct, &
|
|
ncp, nkcp, ncps, ngp, ngkp, ngps, st, stw, sts )
|
|
|
|
! set the total number of G vectors
|
|
|
|
IF( tk ) THEN
|
|
ngm = ngp ( me_pool + 1 )
|
|
ngms = ngps( me_pool + 1 )
|
|
ELSE
|
|
IF( st( 0, 0 ) == ( me_pool + 1 ) ) THEN
|
|
ngm = ngp ( me_pool + 1 ) / 2 + 1
|
|
ngms = ngps( me_pool + 1 ) / 2 + 1
|
|
ELSE
|
|
ngm = ngp ( me_pool + 1 ) / 2
|
|
ngms = ngps( me_pool + 1 ) / 2
|
|
END IF
|
|
END IF
|
|
|
|
CALL fft_dlay_allocate( dfftp, nproc_pool, nrx1, nrx2 )
|
|
CALL fft_dlay_allocate( dffts, nproc_pool, nrx1s, nrx2s )
|
|
|
|
! here set the fft data layout structures for dense and smooth mesh,
|
|
! according to stick distribution
|
|
|
|
CALL fft_dlay_set( dfftp, &
|
|
tk, nct, nr1, nr2, nr3, nrx1, nrx2, nrx3, (me_pool+1), &
|
|
nproc_pool, ub, lb, index, in1(:), in2(:), ncp, nkcp, ngp, ngkp, st, stw)
|
|
CALL fft_dlay_set( dffts, &
|
|
tk, ncts, nr1s, nr2s, nr3s, nrx1s, nrx2s, nrx3s, (me_pool+1), &
|
|
nproc_pool, ub, lb, index, in1(:), in2(:), ncps, nkcp, ngps, ngkp, sts, stw)
|
|
|
|
! if tk = .FALSE. only half reciprocal space is considered, then we
|
|
! need to correct the number of sticks
|
|
|
|
IF( .NOT. tk ) THEN
|
|
nct = nct*2 - 1
|
|
ncts = ncts*2 - 1
|
|
END IF
|
|
|
|
!
|
|
! set the number of plane per process
|
|
!
|
|
|
|
npp ( 1 : nproc_pool ) = dfftp%npp ( 1 : nproc_pool )
|
|
npps( 1 : nproc_pool ) = dffts%npp ( 1 : nproc_pool )
|
|
|
|
WRITE( stdout, '(/5x,"Planes per process (thick) : nr3 =", &
|
|
& i3," npp = ",i3," ncplane =",i5)') nr3, npp (me_pool + 1) , ncplane
|
|
|
|
if ( nr3s /= nr3 ) WRITE( stdout, '(/5x,"Planes per process (smooth): nr3s=",&
|
|
&i3," npps= ",i3," ncplanes=",i5)') nr3s, npps (me_pool + 1) , ncplanes
|
|
|
|
WRITE( stdout,*)
|
|
WRITE( stdout,'( &
|
|
& '' Proc/ planes cols G planes cols G columns G''/ &
|
|
& '' Pool (dense grid) (smooth grid) (wavefct grid)'')')
|
|
do i=1,nproc_pool
|
|
WRITE( stdout,'(i3,2x,3(i5,2i7))') i, npp(i), ncp(i), ngp(i), &
|
|
& npps(i), ncps(i), ngps(i), nkcp(i), ngkp(i)
|
|
end do
|
|
WRITE( stdout,'(i3,2x,3(i5,2i7))') 0, SUM(npp(1:nproc_pool)), SUM(ncp(1:nproc_pool)), &
|
|
& SUM(ngp(1:nproc_pool)), SUM(npps(1:nproc_pool)), SUM(ncps(1:nproc_pool)), &
|
|
& SUM(ngps(1:nproc_pool)), SUM(nkcp(1:nproc_pool)), SUM(ngkp(1:nproc_pool))
|
|
WRITE( stdout,*)
|
|
|
|
|
|
DEALLOCATE( stw, st, sts, in1, in2, index, ngc, ngcs, ngkc )
|
|
|
|
!
|
|
! ncp0 = starting column for each processor
|
|
!
|
|
|
|
ncp0( 1:nproc_pool ) = dfftp%iss( 1:nproc_pool )
|
|
ncp0s( 1:nproc_pool ) = dffts%iss( 1:nproc_pool )
|
|
|
|
!
|
|
! array ipc and ipcl ( ipc contain the number of the
|
|
! column for that processor or zero if the
|
|
! column do not belong to the processor,
|
|
! ipcl contains the point in the plane for
|
|
! each column)
|
|
!
|
|
! ipc ( 1:ncplane ) = > dfftp%isind( 1:ncplane )
|
|
! icpl( 1:nct ) = > dfftp%ismap( 1:nct )
|
|
|
|
! ipcs ( 1:ncplanes ) = > dffts%isind( 1:ncplanes )
|
|
! icpls( 1:ncts ) = > dffts%ismap( 1:ncts )
|
|
|
|
nrxx = dfftp%nnr
|
|
nrxxs = dffts%nnr
|
|
|
|
!
|
|
! nxx is just a copy in the parallel commons of nrxx
|
|
!
|
|
|
|
nxx = nrxx
|
|
nxxs = nrxxs
|
|
|
|
|
|
#else
|
|
|
|
nrx1 = good_fft_dimension (nr1)
|
|
nrx1s = good_fft_dimension (nr1s)
|
|
!
|
|
! nrx2 and nrx3 are there just for compatibility
|
|
!
|
|
nrx2 = nr2
|
|
nrx3 = nr3
|
|
|
|
nrxx = nrx1 * nrx2 * nrx3
|
|
nrx2s = nr2s
|
|
nrx3s = nr3s
|
|
nrxxs = nrx1s * nrx2s * nrx3s
|
|
|
|
CALL fft_dlay_allocate( dfftp, nproc_pool, MAX(nrx1, nrx3), nrx2 )
|
|
CALL fft_dlay_allocate( dffts, nproc_pool, MAX(nrx1s, nrx3s), nrx2s )
|
|
|
|
CALL calculate_gkcut()
|
|
|
|
!
|
|
! compute the number of g necessary to the calculation
|
|
!
|
|
n1 = nr1 + 1
|
|
n2 = nr2 + 1
|
|
n3 = nr3 + 1
|
|
|
|
ngm = 0
|
|
ngms = 0
|
|
|
|
ub = (/ n1, n2, n3 /)
|
|
lb = (/ -n1, -n2, -n3 /)
|
|
!
|
|
ALLOCATE( stw ( lb(2):ub(2), lb(3):ub(3) ) )
|
|
stw = 0
|
|
|
|
do i1 = - n1, n1
|
|
!
|
|
! Gamma-only: exclude space with x<0
|
|
!
|
|
if (lgamma .and. i1 < 0) go to 10
|
|
!
|
|
do i2 = - n2, n2
|
|
!
|
|
! Gamma-only: exclude plane with x=0, y<0
|
|
!
|
|
if(lgamma .and. i1 == 0.and. i2 < 0) go to 20
|
|
!
|
|
do i3 = - n3, n3
|
|
!
|
|
! Gamma-only: exclude line with x=0, y=0, z<0
|
|
!
|
|
if(lgamma .and. i1 == 0 .and. i2 == 0 .and. i3 < 0) go to 30
|
|
!
|
|
amod = (i1 * bg (1, 1) + i2 * bg (1, 2) + i3 * bg (1, 3) ) **2 + &
|
|
(i1 * bg (2, 1) + i2 * bg (2, 2) + i3 * bg (2, 3) ) **2 + &
|
|
(i1 * bg (3, 1) + i2 * bg (3, 2) + i3 * bg (3, 3) ) **2
|
|
if (amod <= gcutm) ngm = ngm + 1
|
|
if (amod <= gcutms) ngms = ngms + 1
|
|
if (amod <= gkcut ) then
|
|
stw( i2, i3 ) = 1
|
|
if (lgamma) stw( -i2, -i3 ) = 1
|
|
end if
|
|
30 continue
|
|
enddo
|
|
20 continue
|
|
enddo
|
|
10 continue
|
|
enddo
|
|
|
|
call fft_dlay_scalar( dfftp, ub, lb, nr1, nr2, nr3, nrx1, nrx2, nrx3, stw )
|
|
call fft_dlay_scalar( dffts, ub, lb, nr1s, nr2s, nr3s, nrx1s, nrx2s, nrx3s, stw )
|
|
|
|
deallocate( stw )
|
|
|
|
#endif
|
|
|
|
!
|
|
! compute the global number of g, i.e. the sum over all processors
|
|
! within a pool
|
|
!
|
|
ngm_l = ngm
|
|
ngms_l = ngms
|
|
ngm_g = ngm
|
|
ngms_g = ngms
|
|
call mp_sum( ngm_g , intra_pool_comm )
|
|
call mp_sum( ngms_g, intra_pool_comm )
|
|
|
|
return
|
|
|
|
contains
|
|
|
|
subroutine calculate_gkcut()
|
|
if (nks.eq.0) then
|
|
!
|
|
! if k-points are automatically generated (which happens later)
|
|
! use max(bg)/2 as an estimate of the largest k-point
|
|
!
|
|
gkcut = sqrt (ecutwfc) / tpiba + 0.5d0 * max (sqrt (bg ( &
|
|
1, 1) **2 + bg (2, 1) **2 + bg (3, 1) **2), sqrt (bg (1, 2) ** &
|
|
2 + bg (2, 2) **2 + bg (3, 2) **2), sqrt (bg (1, 3) **2 + bg ( &
|
|
2, 3) **2 + bg (3, 3) **2) )
|
|
else
|
|
gkcut = 0.0d0
|
|
do kpoint = 1, nks
|
|
gkcut = max (gkcut, sqrt (ecutwfc) / tpiba + sqrt (xk ( &
|
|
1, kpoint) **2 + xk (2, kpoint) **2 + xk (3, kpoint) **2) )
|
|
enddo
|
|
endif
|
|
gkcut = gkcut * gkcut
|
|
return
|
|
end subroutine calculate_gkcut
|
|
|
|
|
|
end subroutine data_structure
|
|
|