quantum-espresso/PW/data_structure.f90

514 lines
13 KiB
Fortran

!
! Copyright (C) 2001 PWSCF group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
!-----------------------------------------------------------------------
subroutine data_structure
!-----------------------------------------------------------------------
! this routine sets the data structure for the fft arrays.
! In the parallel case distributes columns to processes, too
! This version computes also the smooth and hard mesh
!
#include "machine.h"
use pwcom
#ifdef PARA
use para
#endif
use mp, only: mp_sum
use mp_global, only: intra_pool_comm
!
implicit none
integer :: n1, n2, n3, i1, i2, i3
! counters on G space
!
integer :: good_fft_dimension
! a function with obvious meaning
real(kind=DP) :: amod
! modulus of G vectors
#ifdef PARA
! counters on planes
integer :: np, nps1, nq, nqs, max1, min1, max2, min2, kpoint, m1, &
m2, i, mc, nct_
integer, allocatable :: ngc (:), ngcs (:), ngkc (:), &
ic (:), ics (:)
integer :: ngp (maxproc), ngps(maxproc), ngkp (maxproc), ncp_(maxproc),&
j, jj, idum
! counters on planes
! indices for including meshes
! counter on k points
! generic counters
! check variables
! number of columns per plane
! number of columns per plane (smooth part)
! number of columns per plane (hard part)
! from thick plane to column list
! from smooth plane to column list
! number of g per processor
! number of column per processor
! counter on processors
! counter on processors
! used for swap
real(kind=DP) :: gkcut
! cut-off for the wavefunctions
!
! set the values of fft arrays
!
nrx1 = good_fft_dimension (nr1)
nrx1s = good_fft_dimension (nr1s)
!
! nrx2 is there just for compatibility
!
nrx2 = nr2
nrx2s = nr2s
nrx3 = good_fft_dimension (nr3)
nrx3s = good_fft_dimension (nr3s)
!
! compute number of columns per plane for each processor
!
ncplane = nrx1 * nrx2
ncplanes = nrx1s * nrx2s
!
! global pointers passed with commons are allocate here
!
allocate (ipc( ncplane))
allocate (ipcs( ncplanes))
!
! local pointers deallocated at the end
!
allocate (ngc( ncplane))
allocate (ngcs( ncplane))
allocate (ngkc( ncplane))
allocate (ic( ncplane))
allocate (ics( ncplane))
!
! set the number of plane per process
!
if (nr3.lt.nprocp) call error ('data_structure', &
'some processors have no planes ', - 1)
if (nr3s.lt.nprocp) call error ('data_structure', &
'some processors have no smooth planes ', - 1)
if (nprocp.eq.1) then
npp (1) = nr3
npps (1) = nr3s
else
np = nr3 / nprocp
nq = nr3 - np * nprocp
nps1 = nr3s / nprocp
nqs = nr3s - nps1 * nprocp
do i = 1, nprocp
npp (i) = np
if (i.le.nq) npp (i) = np + 1
npps (i) = nps1
if (i.le.nqs) npps (i) = nps1 + 1
enddo
endif
write (6, '(/5x,"Planes per process (thick) : nr3 =", &
& i3," npp = ",i3," ncplane =",i5)') nr3, npp (me) , ncplane
if (nr3s.ne.nr3) write (6, '(/5x,"Planes per process (smooth): nr3s=",&
&i3," npps= ",i3," ncplanes=",i5)') &
nr3s, npps (me) , ncplanes
if (nks.eq.0) then
!
! if k-points are automatically generated (which happens later)
! use max(bg)/2 as an estimate of the largest k-point
!
gkcut = sqrt (ecutwfc) / tpiba + 0.5d0 * max (sqrt (bg ( &
1, 1) **2 + bg (2, 1) **2 + bg (3, 1) **2), sqrt (bg (1, 2) ** &
2 + bg (2, 2) **2 + bg (3, 2) **2), sqrt (bg (1, 3) **2 + bg ( &
2, 3) **2 + bg (3, 3) **2) )
else
gkcut = 0.0d0
do kpoint = 1, nks
gkcut = max (gkcut, sqrt (ecutwfc) / tpiba + sqrt (xk ( &
1, kpoint) **2 + xk (2, kpoint) **2 + xk (3, kpoint) **2) )
enddo
endif
gkcut = gkcut * gkcut
! find maximum among the nodes
call poolextreme (gkcut, + 1)
!
#ifdef DEBUG
write (6, '(5x,"ecutrho & ecutwfc",2f12.2)') tpiba2 * gcutm, &
tpiba2 * gkcut
#endif
!
!
! Now compute for each point of the big plane how many column have
! non zero vectors on the smooth and thick mesh
!
n1 = nr1 + 1
n2 = nr2 + 1
n3 = nr3 + 1
!
do mc = 1, ncplane
ngc (mc) = 0
ngcs (mc) = 0
ngkc (mc) = 0
ipc (mc) = 0
ic (mc) = 0
enddo
do mc = 1, ncplanes
ipcs (mc) = 0
enddo
do i1 = - n1, n1
m1 = mod (i1, nr1) + 1
if (m1.lt.1) m1 = m1 + nr1
do i2 = - n2, n2
m2 = mod (i2, nr2) + 1
if (m2.lt.1) m2 = m2 + nr2
mc = m1 + (m2 - 1) * nrx1
if (mc.lt.1.or.mc.gt.ncplane) call error ('data_structure', &
'mc is wrong', 1)
do i3 = - n3, n3
amod = (bg (1, 1) * i1 + bg (1, 2) * i2 + bg (1, 3) * i3) **2 + &
(bg (2, 1) * i1 + bg (2, 2) * i2 + bg (2, 3) * i3) **2 + (bg (3, &
1) * i1 + bg (3, 2) * i2 + bg (3, 3) * i3) **2
if (amod.le.gcutm) ngc (mc) = ngc (mc) + 1
if (amod.le.gcutms) ngcs (mc) = ngcs (mc) + 1
if (amod.le.gkcut) ngkc (mc) = ngkc (mc) + 1
enddo
enddo
enddo
!
! Now compute the total number of nonzero columns
!
nct = 0
ncts = 0
do mc = 1, ncplane
if (ngcs (mc) .gt.0) ncts = ncts + 1
if (ngc (mc) .gt.0) then
nct = nct + 1
ngc (nct) = ngc (mc)
ngcs (nct) = ngcs (mc)
ngkc (nct) = ngkc (mc)
ic (nct) = mc
! write (*,*) nct, ngc(nct), mc
endif
enddo
do mc = nct + 1, ncplane
ngc (mc) = 0
ngcs (mc) = 0
ngkc (mc) = 0
enddo
if (nct.eq.0) call error ('data_structure', 'number of column 0', 1)
if (ncts.eq.0) call error ('data_structure', 'number smooth column 0', 1)
!
! Now sort the columns. First the column with the largest number of G
! vectors on the wavefunction sphere, then on the smooth sphere,
! then on the big sphere
!
do i = 1, nct
do j = i + 1, nct
if ( (ngkc (i) .lt.ngkc (j) ) .or. (ngkc (i) .eq.ngkc (j) &
.and.ngcs (i) .lt.ngcs (j) ) .or. (ngkc (i) .eq.ngkc (j) &
.and.ngcs (i) .eq.ngcs (j) .and.ngc (i) .lt.ngc (j) ) ) then
idum = ngkc (i)
ngkc (i) = ngkc (j)
ngkc (j) = idum
idum = ngc (i)
ngc (i) = ngc (j)
ngc (j) = idum
idum = ngcs (i)
ngcs (i) = ngcs (j)
ngcs (j) = idum
idum = ic (i)
ic (i) = ic (j)
ic (j) = idum
endif
enddo
enddo
!
! Set the ics index with the correspondence between planes in the smooth
! thick mesh
!
if (mod (nr1s, 2) .eq.0) then
max1 = nr1s / 2
min1 = nr1 - nr1s / 2 + 1
else
max1 = (nr1s - 1) / 2 + 1
min1 = nr1 - (nr1s - 1) / 2 + 1
endif
if (mod (nr2s, 2) .eq.0) then
max2 = nr2s / 2
min2 = nr2 - nr2s / 2 + 1
else
max2 = (nr2s - 1) / 2 + 1
min2 = nr2 - (nr2s - 1) / 2 + 1
endif
do i = 1, nct
if (ngcs (i) .gt.0) then
!
! this column contain smooth vectors, find the indices on the thick
! mesh i1,i2, and the corresponding indices on the smooth mesh n1,n2
!
i1 = mod (ic (i) - 1, nrx1) + 1
i2 = (ic (i) - i1) / nrx1 + 1
if (i1.le.max1) then
n1 = i1
elseif (i1.ge.min1) then
n1 = nr1s - (nr1 - i1)
else
call error ('data_structure', 'something wrong with n1', 1)
endif
if (i2.le.max2) then
n2 = i2
elseif (i2.ge.min2) then
n2 = nr2s - (nr2 - i2)
else
call error ('data_structure', 'something wrong with n2', 1)
endif
!
! check that the indices are within bounds
!
if (n1.lt.1.or.n1.gt.nr1s.or.n2.lt.1.or.n2.gt.nr2s) &
call error ('data_structure', 'something wrong with n1,n2', 1)
ics (i) = n1 + (n2 - 1) * nrx1s
else
ics (i) = 0
endif
enddo
!
! assign columns to processes, set the ipc index
!
do j = 1, nprocp
ncp (j) = 0
ncps (j) = 0
nkcp (j) = 0
ngp (j) = 0
ngps (j) = 0
ngkp (j) = 0
enddo
do i = 1, nct
jj = 1
if (ngkc (i) .gt.0) then
do j = 1, nprocp
if (ngkp (j) .lt.ngkp (jj) ) jj = j
enddo
else
do j = 1, nprocp
if (ngp (j) .lt.ngp (jj) ) jj = j
enddo
endif
ipc (ic (i) ) = - jj
ncp (jj) = ncp (jj) + 1
ngp (jj) = ngp (jj) + ngc (i)
if (ngcs (i) .gt.0) then
ncps (jj) = ncps (jj) + 1
ngps (jj) = ngps (jj) + ngcs (i)
ipcs (ics (i) ) = - jj
endif
if (ngkc (i) .gt.0) then
ipcs (ics (i) ) = jj
ipc (ic (i) ) = jj
ngkp (jj) = ngkp (jj) + ngkc (i)
nkcp (jj) = nkcp (jj) + 1
endif
enddo
!
deallocate (ics)
deallocate (ic)
deallocate (ngkc)
deallocate (ngcs)
deallocate (ngc)
!
! computing the starting column for each processor
! and the total number of G vectors
!
ngm = ngp (me)
ngms = ngps (me)
do i = 1, nprocp
! write(6,*) i, ncp(i), ngp(i), nkcp(i), ngkp(i)
if (ngkp (i) .eq.0) call error ('data_structure', &
'some processors have no pencils, not yet implemented', 1)
if (i.eq.1) then
ncp0 (i) = 0
ncp0s (i) = 0
else
ncp0 (i) = ncp0 (i - 1) + ncp (i - 1)
ncp0s (i) = ncp0s (i - 1) + ncps (i - 1)
endif
enddo
allocate (icpl( nct))
allocate (icpls( ncts))
do j = 1, nprocp
ncp_ (j) = 0
enddo
!
! Now compute the array ipc and ipcl ( ipc contain the number of the
! column for that processor or zero if the
! column do not belong to the processor,
! ipcl contains the point in the plane for
! each column)
!
!- columns with non zero ngkc first........
!
do mc = 1, ncplane
if (ipc (mc) .gt.0) then
j = ipc (mc)
ncp_ (j) = ncp_ (j) + 1
icpl (ncp_ (j) + ncp0 (j) ) = mc
if (j.eq.me) then
ipc (mc) = ncp_ (j)
else
ipc (mc) = 0
endif
endif
enddo
!
!-..... ( intermediate check ) ....
!
do j = 1, nprocp
if (ncp_ (j) .ne.nkcp (j) ) then
write (6, * ) 'ncp_(j).ne.nkcp(j)', j, ncp_ (j) , nkcp (j)
endif
enddo
!
!- ........then the remaining columns
!
do mc = 1, ncplane
if (ipc (mc) .lt.0) then
j = - ipc (mc)
ncp_ (j) = ncp_ (j) + 1
icpl (ncp_ (j) + ncp0 (j) ) = mc
if (j.eq.me) then
ipc (mc) = ncp_ (j)
else
ipc (mc) = 0
endif
endif
enddo
!
!-... ( final check )
!
nct_ = 0
do j = 1, nprocp
if (ncp_ (j) .ne.ncp (j) ) then
write (6, * ) 'ncp_(j).ne.ncp(j)', j, ncp_ (j) , ncp (j)
endif
nct_ = nct_ + ncp_ (j)
enddo
if (nct_.ne.nct) then
write (6, * ) 'nct_.ne.nct', nct_, nct
endif
!
! here compute the two arrays ipcs and ipcls for the smooth mesh
!
do j = 1, nprocp
ncp_ (j) = 0
enddo
!
! the columns of the wavefunction sphere first
!
do mc = 1, ncplanes
if (ipcs (mc) .gt.0) then
j = ipcs (mc)
ncp_ (j) = ncp_ (j) + 1
icpls (ncp_ (j) + ncp0s (j) ) = mc
if (j.eq.me) then
ipcs (mc) = ncp_ (j)
else
ipcs (mc) = 0
endif
endif
enddo
!
! and then all the others
!
do mc = 1, ncplanes
if (ipcs (mc) .lt.0) then
j = - ipcs (mc)
ncp_ (j) = ncp_ (j) + 1
icpls (ncp_ (j) + ncp0s (j) ) = mc
if (j.eq.me) then
ipcs (mc) = ncp_ (j)
else
ipcs (mc) = 0
endif
endif
enddo
if (nprocp.eq.1) then
nrxx = nrx1 * nrx2 * nrx3
nrxxs = nrx1s * nrx2s * nrx3s
else
nrxx = max (nrx3 * ncp (me), nrx1 * nrx2 * npp (me) )
nrxxs = max (nrx3s * ncps (me), nrx1s * nrx2s * npps (me) )
endif
!
! nxx is just a copy in the parallel commons of nrxx
!
nxx = nrxx
nxxs = nrxxs
#else
nrx1 = good_fft_dimension (nr1)
nrx1s = good_fft_dimension (nr1s)
!
! nrx2 and nrx3 are there just for compatibility
!
nrx2 = nr2
nrx3 = nr3
nrxx = nrx1 * nrx2 * nrx3
nrx2s = nr2s
nrx3s = nr3s
nrxxs = nrx1s * nrx2s * nrx3s
!
! compute the number of g necessary to the calculation
!
n1 = nr1 + 1
n2 = nr2 + 1
n3 = nr3 + 1
ngm = 0
ngms = 0
do i1 = - n1, n1
do i2 = - n2, n2
do i3 = - n3, n3
amod = (i1 * bg (1, 1) + i2 * bg (1, 2) + i3 * bg (1, 3) ) **2 + &
(i1 * bg (2, 1) + i2 * bg (2, 2) + i3 * bg (2, 3) ) **2 + (i1 * &
bg (3, 1) + i2 * bg (3, 2) + i3 * bg (3, 3) ) **2
if (amod.le.gcutm) ngm = ngm + 1
if (amod.le.gcutms) ngms = ngms + 1
enddo
enddo
enddo
#endif
!
! compute the global number of g, i.e. the sum over all processors
! within a pool
!
ngm_l = ngm
ngms_l = ngms
ngm_g = ngm
ngms_g = ngms
call mp_sum( ngm_g , intra_pool_comm )
call mp_sum( ngms_g, intra_pool_comm )
return
end subroutine data_structure