quantum-espresso/PWCOND/rotproc.f90

303 lines
9.3 KiB
Fortran

!
! Copyright (C) 2003 A. Smogunov
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC).
!
#include "f_defs.h"
!
SUBROUTINE rotproc (fun0, fund0, fun1, fund1, funl0, fundl0, funl1, &
fundl1, intw1, intw2, n2d, norbf, norbnow)
!
! This subroutine implements a matching procedure to construct
! local and nonlocal functions on the whole region from those computed
! by different CPU.
! It works well with 1, 2, 4, 8, 16... CPU.
! The matching scheme with 8 CPU looks like:
! | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
! + + + +
! | | | | |
! + +
! | | |
! +
! | |
!
! So in this case there are 3 matching steps.
!
USE kinds, ONLY : DP
USE mp_global, ONLY: nproc, me_pool
USE noncollin_module, ONLY : npol
USE parallel_include
IMPLICIT NONE
INTEGER :: k, ig, n, lam, lam1, iorb, iorb1, norbf, norbnow, n2d, &
ibound, numb, ninsl, ib, icolor, ikey, new_comm, info
INTEGER, ALLOCATABLE :: ipiv(:)
COMPLEX(kind=DP) :: fun0(n2d, 2*n2d), & ! phi_n(0)
fund0(n2d, 2*n2d), & ! phi'_n(0)
fun1(n2d, 2*n2d), & ! phi_n(d)
fund1(n2d, 2*n2d), & ! phi'_n(d)
funl0(n2d, npol*norbf), & ! phi_alpha(0)
fundl0(n2d, npol*norbf),& ! phi'_alpha(0)
funl1(n2d, npol*norbf), & ! phi_alpha(d)
fundl1(n2d, npol*norbf), & ! phi'_alpha(d)
intw1(norbf*npol, 2*n2d), & ! integrals on phi_n
intw2(norbf*npol, norbf*npol) ! integrals on phi_alpha
COMPLEX(kind=DP), ALLOCATABLE :: x(:), y(:), amat(:,:), vec(:,:), &
amat_aux(:,:), vec_aux(:,:)
#ifdef __PARA
IF(nproc.EQ.1) RETURN
ALLOCATE( x( n2d ) )
ALLOCATE( y( n2d ) )
ALLOCATE( amat( 2*n2d, 2*n2d ) )
ALLOCATE( amat_aux( 2*n2d, 2*n2d ) )
ALLOCATE( vec( 2*n2d, 2*n2d+npol*norbnow ) )
ALLOCATE( vec_aux( 2*n2d, 2*n2d+npol*norbnow ) )
ALLOCATE( ipiv( 2*n2d ) )
numb=0
ibound=nproc/2
ninsl=1
ib=2*(me_pool+1)-(me_pool+1)/2*2
DO WHILE(ibound.GT.0)
!
! To find the matching coefficients for a group of CPU
!
icolor=(ib+ninsl-1)/(2*ninsl)
ikey=((me_pool+1)+ninsl)-ib
CALL mpi_barrier (MPI_COMM_WORLD, info)
CALL mpi_comm_split(MPI_COMM_WORLD, icolor, ikey, new_comm, info)
amat=(0.d0,0.d0)
vec=(0.d0,0.d0)
IF((me_pool+1).EQ.ib) THEN
DO lam=1, n2d
DO lam1=1, n2d
amat(lam, n2d+lam1)=-fun0(lam,n2d+lam1)
amat(n2d+lam, n2d+lam1)=-fund0(lam,n2d+lam1)
vec(lam, lam1)=fun0(lam, lam1)
vec(n2d+lam, lam1)=fund0(lam, lam1)
ENDDO
DO iorb=1, npol*norbnow
vec(lam, 2*n2d+iorb)=funl0(lam, iorb)
vec(n2d+lam, 2*n2d+iorb)=fundl0(lam, iorb)
ENDDO
ENDDO
numb=numb+1
ENDIF
IF((me_pool+1).EQ.ib-1) THEN
DO lam=1, n2d
DO lam1=1, n2d
amat(lam, lam1)=fun1(lam,lam1)
amat(n2d+lam, lam1)=fund1(lam,lam1)
vec(lam, n2d+lam1)=-fun1(lam, n2d+lam1)
vec(n2d+lam, n2d+lam1)=-fund1(lam, n2d+lam1)
ENDDO
DO iorb=1, npol*norbnow
vec(lam, 2*n2d+iorb)=-funl1(lam, iorb)
vec(n2d+lam, 2*n2d+iorb)=-fundl1(lam, iorb)
ENDDO
ENDDO
numb=numb+1
ENDIF
CALL mpi_allreduce(amat, amat_aux, 2*2*n2d*2*n2d, MPI_REAL8, &
MPI_SUM, new_comm, info)
CALL mpi_allreduce(vec, vec_aux, 2*2*n2d*(2*n2d+npol*norbnow), &
MPI_REAL8, MPI_SUM, new_comm, info)
CALL DCOPY(2*2*n2d*2*n2d, amat_aux, 1, amat, 1)
CALL DCOPY(2*2*n2d*(2*n2d+npol*norbnow), vec_aux, 1, vec, 1)
CALL ZGESV(2*n2d, 2*n2d+npol*norbnow, amat, 2*n2d, ipiv, &
vec, 2*n2d, info)
!
! recalculate the functions for CPU which is left to matching
! boundary
!
IF(numb.LE.1.AND.(me_pool+1)/2*2.EQ.(me_pool+1)) THEN
DO ig=1, n2d
DO n=1, n2d
DO lam=1, n2d
fun1(ig, n)=fun1(ig, n)+ &
vec(n2d+lam, n)*fun1(ig, n2d+lam)
fund1(ig, n)=fund1(ig, n)+ &
vec(n2d+lam, n)*fund1(ig, n2d+lam)
ENDDO
ENDDO
DO iorb=1, npol*norbnow
DO lam=1, n2d
funl1(ig, iorb)=funl1(ig, iorb)+ &
vec(n2d+lam, 2*n2d+iorb)*fun1(ig, n2d+lam)
fundl1(ig, iorb)=fundl1(ig, iorb)+ &
vec(n2d+lam, 2*n2d+iorb)*fund1(ig, n2d+lam)
ENDDO
ENDDO
ENDDO
DO ig=1, n2d
x=(0.d0,0.d0)
y=(0.d0,0.d0)
DO n=1, n2d
DO lam=1, n2d
x(n)=x(n)+vec(n2d+lam, n2d+n)*fun1(ig, n2d+lam)
y(n)=y(n)+vec(n2d+lam, n2d+n)*fund1(ig, n2d+lam)
ENDDO
ENDDO
DO n=1, n2d
fun1(ig, n2d+n)=x(n)
fund1(ig, n2d+n)=y(n)
ENDDO
ENDDO
ENDIF
!
! recalculate the functions for CPU which is right to matching
! boundary
!
IF(numb.LE.1.AND.(me_pool+1)/2*2.NE.(me_pool+1)) THEN
DO ig=1, n2d
DO n=1, n2d
DO lam=1, n2d
fun0(ig, n2d+n)=fun0(ig, n2d+n)+ &
vec(lam, n2d+n)*fun0(ig, lam)
fund0(ig, n2d+n)=fund0(ig, n2d+n)+ &
vec(lam, n2d+n)*fund0(ig, lam)
ENDDO
ENDDO
DO iorb=1, npol*norbnow
DO lam=1, n2d
funl0(ig, iorb)=funl0(ig, iorb)+ &
vec(lam, 2*n2d+iorb)*fun0(ig, lam)
fundl0(ig, iorb)=fundl0(ig, iorb)+ &
vec(lam, 2*n2d+iorb)*fund0(ig, lam)
ENDDO
ENDDO
ENDDO
DO ig=1, n2d
x=(0.d0,0.d0)
y=(0.d0,0.d0)
DO n=1, n2d
DO lam=1, n2d
x(n)=x(n)+vec(lam, n)*fun0(ig, lam)
y(n)=y(n)+vec(lam, n)*fund0(ig, lam)
ENDDO
ENDDO
DO n=1, n2d
fun0(ig, n)=x(n)
fund0(ig, n)=y(n)
ENDDO
ENDDO
ENDIF
!
! to recalculate the integrals for a given group of CPU
!
IF((me_pool+1).GE.ib) THEN
DO iorb=1, npol*norbnow
DO iorb1=1, npol*norbnow
DO lam=1, n2d
intw2(iorb,iorb1)=intw2(iorb,iorb1)+ &
vec(n2d+lam, 2*n2d+iorb1)*intw1(iorb, n2d+lam)
ENDDO
ENDDO
ENDDO
DO iorb=1, npol*norbnow
x=(0.d0,0.d0)
DO n=1, n2d
DO lam=1, n2d
x(n)=x(n)+vec(n2d+lam, n2d+n)*intw1(iorb, n2d+lam)
intw1(iorb, n)=intw1(iorb, n)+ &
vec(n2d+lam, n)*intw1(iorb, n2d+lam)
ENDDO
ENDDO
DO n=1, n2d
intw1(iorb, n2d+n)=x(n)
ENDDO
ENDDO
ELSE
DO iorb=1, npol*norbnow
DO iorb1=1, npol*norbnow
DO lam=1, n2d
intw2(iorb, iorb1)=intw2(iorb, iorb1)+ &
vec(lam, 2*n2d+iorb1)*intw1(iorb, lam)
ENDDO
ENDDO
ENDDO
DO iorb=1, npol*norbnow
x=(0.d0,0.d0)
DO n=1, n2d
DO lam=1, n2d
x(n)=x(n)+vec(lam, n)*intw1(iorb, lam)
intw1(iorb, n2d+n)=intw1(iorb, n2d+n)+ &
vec(lam, n2d+n)*intw1(iorb, lam)
ENDDO
ENDDO
DO n=1, n2d
intw1(iorb, n)=x(n)
ENDDO
ENDDO
ENDIF
!
! to next matching step
!
n=(ib+ninsl-1)/(2*ninsl)
IF(n/2*2.EQ.n) THEN
ib=ib-ninsl
ELSE
ib=ib+ninsl
ENDIF
ninsl=ninsl*2
ibound=ibound/2
CALL mpi_comm_free(new_comm, info)
ENDDO
!
! Broadcast of the functions and the integrals to all CPU
!
CALL mpi_bcast(fun0, 2*n2d*2*n2d, MPI_REAL8, 0, &
MPI_COMM_WORLD, info)
CALL mpi_bcast(fund0, 2*n2d*2*n2d, MPI_REAL8, 0, &
MPI_COMM_WORLD, info)
CALL mpi_bcast(funl0, 2*n2d*npol*norbf, MPI_REAL8, 0, &
MPI_COMM_WORLD, info)
CALL mpi_bcast(fundl0, 2*n2d*npol*norbf, MPI_REAL8, 0, &
MPI_COMM_WORLD, info)
CALL mpi_bcast(fun1, 2*n2d*2*n2d, MPI_REAL8, nproc-1, &
MPI_COMM_WORLD, info)
CALL mpi_bcast(fund1, 2*n2d*2*n2d, MPI_REAL8, nproc-1, &
MPI_COMM_WORLD, info)
CALL mpi_bcast(funl1, 2*n2d*npol*norbf, MPI_REAL8, nproc-1, &
MPI_COMM_WORLD, info)
CALL mpi_bcast(fundl1, 2*n2d*npol*norbf, MPI_REAL8, nproc-1, &
MPI_COMM_WORLD, info)
!
! Gathering of the integrals
!
CALL reduce(2*norbf*npol*2*n2d, intw1)
CALL reduce(2*norbf*npol*norbf*npol, intw2)
DEALLOCATE(x)
DEALLOCATE(y)
DEALLOCATE(amat)
DEALLOCATE(amat_aux)
DEALLOCATE(vec)
DEALLOCATE(vec_aux)
DEALLOCATE(ipiv)
#endif
RETURN
END SUBROUTINE rotproc