mirror of https://gitlab.com/QEF/q-e.git
1094 lines
29 KiB
Fortran
1094 lines
29 KiB
Fortran
!
|
|
! Copyright (C) 2002-2005 FPMD-CPV groups
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
!
|
|
#include "f_defs.h"
|
|
|
|
MODULE orthogonalize_base
|
|
|
|
USE kinds
|
|
USE parallel_toolkit, ONLY: matmulp, cmatmulp, &
|
|
pdspev_drv, dspev_drv, pzhpev_drv, zhpev_drv
|
|
|
|
IMPLICIT NONE
|
|
|
|
SAVE
|
|
|
|
PRIVATE
|
|
|
|
LOGICAL :: timing = .false.
|
|
|
|
REAL(DP) :: one, zero, two, minus_one, minus_two
|
|
PARAMETER ( one = 1.0d0, zero = 0.0d0, two = 2.0d0, minus_one = -1.0d0 )
|
|
PARAMETER ( minus_two = -2.0d0 )
|
|
COMPLEX(DP) :: cone, czero, mcone
|
|
PARAMETER ( cone = (1.0d0, 0.0d0), czero = (0.0d0, 0.0d0) )
|
|
PARAMETER ( mcone = (-1.0d0, 0.0d0) )
|
|
REAL(DP) :: small = 1.0d-14
|
|
|
|
#if defined __AIX
|
|
INTEGER, PARAMETER :: nrlx_tune = 128
|
|
#else
|
|
INTEGER, PARAMETER :: nrlx_tune = 4
|
|
#endif
|
|
|
|
INTERFACE sqr_matmul
|
|
MODULE PROCEDURE sqr_dmatmul, sqr_cmatmul
|
|
END INTERFACE
|
|
|
|
INTERFACE sigset
|
|
MODULE PROCEDURE rsigset, csigset
|
|
END INTERFACE
|
|
INTERFACE rhoset
|
|
MODULE PROCEDURE rrhoset, crhoset
|
|
END INTERFACE
|
|
|
|
INTERFACE diagonalize_rho
|
|
MODULE PROCEDURE diagonalize_rrho, diagonalize_crho
|
|
END INTERFACE
|
|
|
|
PUBLIC :: sqr_matmul, sigset, rhoset, diagonalize_rho
|
|
PUBLIC :: backrhoset2, sigrhoset2, backrhoset, sigrhoset
|
|
|
|
CONTAINS
|
|
|
|
|
|
SUBROUTINE sqr_dmatmul(transa,transb,a,b,c)
|
|
! ... Multiply square matrices A, B and return the result in C
|
|
USE mp_global, ONLY: nproc
|
|
REAL(DP) :: c(:,:), a(:,:), b(:,:)
|
|
CHARACTER*1 :: transa, transb
|
|
INTEGER :: n
|
|
n = SIZE(c,1)
|
|
IF ( ( nproc > 1 ) .AND. ( n >= nproc ) ) THEN
|
|
CALL matmulp( transa, transb, a, b, c, n )
|
|
ELSE
|
|
CALL DGEMM( transa, transb, n, n, n, one, a(1,1), n, b(1,1), n, zero, c(1,1), n)
|
|
END IF
|
|
RETURN
|
|
END SUBROUTINE sqr_dmatmul
|
|
|
|
SUBROUTINE sqr_cmatmul(transa,transb,a,b,c)
|
|
! ... Multiply square matrices A, B and return the result in C
|
|
USE mp_global, ONLY: nproc
|
|
COMPLEX(DP) :: c(:,:), a(:,:), b(:,:)
|
|
CHARACTER*1 transa, transb
|
|
INTEGER :: n
|
|
n = SIZE(c,1)
|
|
IF ((nproc > 1 ).AND. (n >= nproc)) THEN
|
|
CALL cmatmulp(transa,transb,A,B,C,n)
|
|
ELSE
|
|
CALL ZGEMM(transa,transb,n,n,n,cone,a(1,1),n,b(1,1),n,czero,c(1,1),n)
|
|
END IF
|
|
RETURN
|
|
END SUBROUTINE sqr_cmatmul
|
|
|
|
|
|
!.....DIAGONALIZATION OF RHOS
|
|
|
|
SUBROUTINE diagonalize_rrho( temp, rhod, s, pwrk)
|
|
|
|
#if defined __SHMEM
|
|
USE shmem_include
|
|
#endif
|
|
|
|
USE mp_global, ONLY: nproc, mpime
|
|
USE mp, ONLY: mp_sum
|
|
REAL(DP) :: rhod(:)
|
|
REAL(DP) :: temp(:,:), s(:,:), pwrk(:)
|
|
|
|
REAL(DP), ALLOCATABLE :: aux(:)
|
|
REAL(DP), ALLOCATABLE :: diag(:,:)
|
|
REAL(DP), ALLOCATABLE :: vv(:,:)
|
|
INTEGER :: n, nrl
|
|
|
|
n = SIZE(temp,1)
|
|
|
|
IF ( ( nproc > 1 ) .AND. ( n / nproc ) >= nrlx_tune ) THEN
|
|
|
|
nrl = n/nproc
|
|
IF(mpime < MOD(n,nproc)) THEN
|
|
nrl = nrl + 1
|
|
end if
|
|
ALLOCATE( diag(nrl,n), vv(nrl,n) )
|
|
|
|
CALL prpack(diag, temp)
|
|
CALL pdspev_drv( 'V', diag, nrl, rhod, vv, nrl, nrl, n, nproc, mpime)
|
|
CALL prunpack(temp, vv)
|
|
|
|
DEALLOCATE( diag, vv )
|
|
|
|
#if defined __SHMEM
|
|
call shmem_barrier_all
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(S,TEMP,N*N,0,0,nproc, pWrk, pSync_sta)
|
|
call shmem_barrier_all
|
|
#else
|
|
CALL mp_sum(temp, s)
|
|
#endif
|
|
|
|
ELSE
|
|
|
|
ALLOCATE( aux( n * ( n + 1 ) / 2 ) )
|
|
CALL rpack( aux, temp )
|
|
CALL dspev_drv('V', 'L', n, aux, rhod, s, n)
|
|
DEALLOCATE( aux )
|
|
|
|
END IF
|
|
|
|
RETURN
|
|
END SUBROUTINE diagonalize_rrho
|
|
|
|
! BEGIN manual
|
|
|
|
SUBROUTINE diagonalize_crho(a,d,ev)
|
|
|
|
! this routine calls the appropriate Lapack routine for diagonalizing a
|
|
! complex Hermitian matrix
|
|
! ----------------------------------------------
|
|
! END manual
|
|
|
|
USE mp_global, ONLY: nproc, mpime
|
|
USE mp, ONLY: mp_sum
|
|
IMPLICIT NONE
|
|
|
|
REAL(DP) :: d(:)
|
|
COMPLEX(DP) :: a(:,:), ev(:,:)
|
|
|
|
INTEGER :: n, nrl
|
|
|
|
COMPLEX(DP), ALLOCATABLE :: aloc(:)
|
|
COMPLEX(DP), ALLOCATABLE :: ap(:,:)
|
|
COMPLEX(DP), ALLOCATABLE :: vp(:,:)
|
|
|
|
! ... end of declarations
|
|
! ----------------------------------------------
|
|
|
|
n = SIZE(a, 1)
|
|
|
|
IF((nproc.EQ.2) .OR. (n.LT.nproc) .OR. (n.LT.256)) THEN
|
|
|
|
ALLOCATE(aloc(n*(n+1)/2))
|
|
|
|
! ... copy the lower-diagonal part of the matrix according to the
|
|
! ... Lapack packed storage scheme for Hermitian matrices
|
|
CALL zpack(aloc, a)
|
|
! ... call the Lapack routine
|
|
CALL zhpev_drv('V', 'L', n, aloc, d, ev, n)
|
|
|
|
DEALLOCATE(aloc)
|
|
|
|
ELSE
|
|
|
|
nrl = n/nproc
|
|
IF(mpime.LT.MOD(n,nproc)) THEN
|
|
nrl = nrl + 1
|
|
END IF
|
|
|
|
ALLOCATE(ap(nrl,n), vp(nrl,n))
|
|
|
|
CALL pzpack(ap, a)
|
|
CALL pzhpev_drv( 'V', ap, nrl, d, vp, nrl, nrl, n, nproc, mpime)
|
|
CALL pzunpack(a, vp)
|
|
CALL mp_sum(a, ev)
|
|
|
|
DEALLOCATE(ap, vp)
|
|
|
|
END IF
|
|
|
|
RETURN
|
|
END SUBROUTINE diagonalize_crho
|
|
|
|
|
|
! ----------------------------------------------
|
|
! BEGIN manual
|
|
|
|
SUBROUTINE rsigset ( ngw, nb, cp, sig, pwrk)
|
|
|
|
! SIG = REAL PART OF ONE-2.0*ADJ(CP)*CP+CP(*,1)*ADJ(CP(*,1))
|
|
! WHERE CP(*,1) IS REAL, AND THEREFORE TRANS() IS USED IN PLACE OF ADJ()
|
|
! ----------------------------------------------
|
|
! END manual
|
|
|
|
USE mp_global, ONLY: nproc, group
|
|
USE mp, ONLY: mp_sum
|
|
#if defined __SHMEM
|
|
USE shmem_include
|
|
#endif
|
|
|
|
IMPLICIT NONE
|
|
|
|
COMPLEX(DP) :: CP(:,:)
|
|
REAL(DP) :: SIG(:,:), PWRK(1)
|
|
INTEGER, INTENT(IN) :: nb, ngw
|
|
INTEGER :: i, ldc, twongw, j, k, lds, n
|
|
|
|
ldc = 2 * SIZE( cp, 1 )
|
|
lds = SIZE( sig, 1 )
|
|
twongw = 2*ngw
|
|
n = nb
|
|
|
|
! WRITE( stdout,*) ' SIGSET 1 ', SUM(sig), SUM(cp) ! DEBUG
|
|
! DO i = 1, nb
|
|
! DO j = 1, nb
|
|
! DO k = 1, ngw
|
|
! sig(i,j) = - 2.0d0 * ( DBLE(cp(k,i))*DBLE(cp(k,j))+AIMAG(cp(k,i))*AIMAG(cp(k,j)) )
|
|
! END DO
|
|
! END DO
|
|
! END DO
|
|
|
|
CALL DGEMM('T','N', n, n, twongw, -2.0d0, cp(1,1), ldc, cp(1,1), ldc, zero, sig(1,1), lds)
|
|
DO i = 1, n
|
|
sig(i,i) = sig(i,i) + one / DBLE(nproc)
|
|
END DO
|
|
|
|
! WRITE( stdout,*) ' SIGSET 2 ', SUM(sig) ! DEBUG
|
|
|
|
|
|
#if defined __SHMEM
|
|
call shmem_barrier_all
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(SIG, SIG, SIZE(sig),0,0, nproc, pWrk, pSync_sta)
|
|
call shmem_barrier_all
|
|
#else
|
|
CALL mp_sum( sig, group )
|
|
#endif
|
|
|
|
|
|
RETURN
|
|
END SUBROUTINE rsigset
|
|
|
|
! ----------------------------------------------
|
|
! BEGIN manual
|
|
|
|
SUBROUTINE csigset( ngw, nx, cp, sig, pwrk )
|
|
|
|
! SIG = REAL PART OF ONE-2.0*ADJ(CP)*CP+CP(*,1)*ADJ(CP(*,1))
|
|
! (WHERE CP(*,1) IS REAL, AND THEREFORE TRANS() IS USED IN
|
|
! PLACE OF ADJ()
|
|
! ----------------------------------------------
|
|
! END manual
|
|
|
|
USE mp_global, ONLY: nproc, mpime
|
|
USE mp, ONLY: mp_sum
|
|
#if defined __SHMEM
|
|
USE shmem_include
|
|
#endif
|
|
|
|
IMPLICIT NONE
|
|
|
|
INTEGER, INTENT( IN ) :: nx, ngw
|
|
COMPLEX(DP) :: cp(:,:), sig(:,:), pwrk(1)
|
|
INTEGER :: i, j, ldc, lds
|
|
|
|
ldc = SIZE( cp, 1 )
|
|
lds = SIZE( sig, 1 )
|
|
|
|
CALL ZGEMM('C','N',NX,NX,NGW,mcone,CP(1,1),ldc,CP(1,1),ldc,czero,sig(1,1),lds)
|
|
DO i = 1, nx
|
|
sig(i,i) = sig(i,i) + cone / DBLE(nproc)
|
|
END DO
|
|
|
|
#if defined __SHMEM
|
|
call shmem_barrier_all
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(SIG,SIG,2*NX*NX,0,0,nproc,pWrk,pSync_sta)
|
|
call shmem_barrier_all
|
|
#else
|
|
CALL mp_sum( sig )
|
|
#endif
|
|
|
|
DO i=1,nx
|
|
DO j=i,nx
|
|
sig(j,i) = 0.5d0 * ( sig(j,i) + CONJG(sig(i,j)) )
|
|
ENDDO
|
|
ENDDO
|
|
DO i=1,nx
|
|
DO j=1,i-1
|
|
sig(j,i) = CONJG(sig(i,j))
|
|
ENDDO
|
|
ENDDO
|
|
|
|
RETURN
|
|
END SUBROUTINE csigset
|
|
|
|
! ----------------------------------------------
|
|
! BEGIN manual
|
|
|
|
SUBROUTINE rrhoset ( ngw, nb, c0, cp, rho, tmass, pwrk)
|
|
|
|
! RHO = REAL PART OF 2*ADJ(C0/PMSS)*CP +
|
|
! C0(*,1)/PMSS*TRANS(CP(*,1))
|
|
! (CP(*,1) AND C0(*,1) REAL!)
|
|
!
|
|
! TMASS = REAL PART OF 2*ADJ(C0/PMSS)*C0/PMSS + ...
|
|
!
|
|
! RHO AND TMASS ARE PLACED IN COMMON /HOPE/
|
|
! ----------------------------------------------
|
|
! END manual
|
|
|
|
USE mp_global, ONLY: nproc
|
|
USE mp, ONLY: mp_sum
|
|
#if defined __SHMEM
|
|
USE shmem_include
|
|
#endif
|
|
|
|
IMPLICIT NONE
|
|
|
|
COMPLEX(DP) :: CP(:,:), C0(:,:)
|
|
REAL(DP) :: RHO(:,:), TMASS(:,:)
|
|
REAL(DP) :: pWrk(1)
|
|
INTEGER, INTENT(IN) :: ngw, nb
|
|
|
|
INTEGER :: i, j, ldc, ldr, tngw, n
|
|
|
|
ldc = 2*SIZE( cp, 1 )
|
|
ldr = SIZE( rho, 1 )
|
|
tngw = 2*ngw
|
|
n = nb
|
|
|
|
CALL DGEMM('T','N',n,n,tngw,two,c0(1,1),ldc,cp(1,1),ldc,zero,rho(1,1),ldr)
|
|
CALL DGEMM('T','N',n,n,tngw,two,c0(1,1),ldc,c0(1,1),ldc,zero,tmass(1,1),ldr)
|
|
|
|
#if defined __SHMEM
|
|
call shmem_barrier_all
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(RHO, RHO, size(rho),0,0, nproc, &
|
|
pWrk, pSync_sta)
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(TMASS,TMASS,size(tmass),0,0,nproc, &
|
|
pWrk,pSync_sta)
|
|
call shmem_barrier_all
|
|
#else
|
|
CALL mp_sum( rho )
|
|
CALL mp_sum( tmass )
|
|
#endif
|
|
|
|
RETURN
|
|
END SUBROUTINE rrhoset
|
|
|
|
! ----------------------------------------------
|
|
! BEGIN manual
|
|
|
|
SUBROUTINE crhoset( ngw, nx, c0, cp, rho, tmass, pwrk )
|
|
!
|
|
! RHO = REAL PART OF 2*ADJ(C0/PMSS)*CP +
|
|
! C0(*,1)/PMSS*TRANS(CP(*,1))
|
|
! (CP(*,1) AND C0(*,1) REAL!)
|
|
!
|
|
! TMASS = REAL PART OF 2*ADJ(C0/PMSS)*C0/PMSS + ...
|
|
!
|
|
! ----------------------------------------------
|
|
! END manual
|
|
|
|
USE mp_global, ONLY: nproc
|
|
USE mp, ONLY: mp_sum
|
|
#if defined __SHMEM
|
|
USE shmem_include
|
|
#endif
|
|
|
|
IMPLICIT NONE
|
|
|
|
INTEGER :: nx, ngw
|
|
COMPLEX(DP) :: cp(:,:), c0(:,:)
|
|
COMPLEX(DP) :: rho(:,:), tmass(:,:)
|
|
COMPLEX(DP) :: pwrk(1)
|
|
INTEGER :: i, j, ldc, ldr
|
|
|
|
ldc = SIZE( c0, 1 )
|
|
ldr = SIZE( rho, 1 )
|
|
CALL ZGEMM('C','N',nx,nx,ngw,cone,c0(1,1),ldc,cp(1,1),ldc,czero,rho(1,1),ldr)
|
|
CALL ZGEMM('C','N',nx,nx,ngw,cone,c0(1,1),ldc,c0(1,1),ldc,czero,tmass(1,1),ldr)
|
|
|
|
#if defined __SHMEM
|
|
call shmem_barrier_all
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(TMASS,TMASS,2*NX*NX,0,0,nproc,pWrk,pSync_sta)
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(RHO,RHO,2*NX*NX,0,0,nproc,pWrk,pSync_sta)
|
|
call shmem_barrier_all
|
|
#else
|
|
CALL mp_sum( RHO )
|
|
CALL mp_sum( TMASS )
|
|
#endif
|
|
|
|
DO I=1,NX
|
|
DO J=I,NX
|
|
TMASS(J,I) = 0.5d0 * ( TMASS(J,I) + CONJG(TMASS(I,J)) )
|
|
ENDDO
|
|
ENDDO
|
|
DO I=1,NX
|
|
DO J=1,I-1
|
|
TMASS(J,I) = CONJG(TMASS(I,J))
|
|
ENDDO
|
|
ENDDO
|
|
|
|
RETURN
|
|
END SUBROUTINE crhoset
|
|
|
|
!----------------------------------------------------------------------!
|
|
|
|
|
|
SUBROUTINE sigrhoset( ngw, nx, cp, c0, sig, rho, tmass, pmss, emass, gzero )
|
|
|
|
!***************************************************************
|
|
! SIG = REAL PART OF ONE-2.0*ADJ(CP)*CP+CP(*,1)*ADJ(CP(*,1))
|
|
! (WHERE CP(*,1) IS REAL, AND THEREFORE TRANS() IS USED IN
|
|
! PLACE OF ADJ()
|
|
!***************************************************************
|
|
USE parallel_types
|
|
USE descriptors_module
|
|
USE mp, ONLY: mp_sum
|
|
USE processors_grid_module, ONLY: get_grid_coor, get_grid_dims
|
|
|
|
IMPLICIT NONE
|
|
|
|
REAL (DP) :: PMSS(:), EMASS
|
|
TYPE (real_parallel_matrix) :: sig
|
|
TYPE (real_parallel_matrix) :: rho
|
|
TYPE (real_parallel_matrix) :: tmass
|
|
COMPLEX (DP) :: CP(:,:)
|
|
COMPLEX (DP) :: C0(:,:)
|
|
LOGICAL, INTENT(IN) :: gzero
|
|
INTEGER, INTENT(IN) :: ngw, nx
|
|
|
|
INTEGER I,J,NPROW, NPCOL, MYROW, MYCOL, CURROW, CURCOL
|
|
INTEGER ME, NRL, NCL, N, NB, CURNR, CURNC, II, JJ,IB,JB
|
|
INTEGER IP,JP,NIB, NJB, JP1, JP2, IOFF1, IOFF2, IOFF3
|
|
INTEGER RSRC, CSRC, npz, mez
|
|
INTEGER INDXL2G, INDXG2L, INDXG2P
|
|
REAL(DP) DDOT
|
|
REAL(DP), allocatable :: SIGTMP(:)
|
|
real(DP), allocatable :: CTMP(:,:)
|
|
real(DP), allocatable :: ebpmss(:)
|
|
real(DP) sqrtfact
|
|
COMPLEX (DP) :: C0ji
|
|
INTEGER :: ldc
|
|
|
|
!
|
|
! SUBROUTINE BODY
|
|
!
|
|
CALL get_grid_dims(sig%desc%grid, nprow, npcol, npz)
|
|
CALL get_grid_coor(sig%desc%grid, myrow, mycol, mez)
|
|
ldc = 2 * SIZE(C0,1)
|
|
N = sig%desc%nx
|
|
NB = sig%desc%nxblk
|
|
RSRC = sig%desc%ipexs
|
|
CSRC = sig%desc%ipeys
|
|
|
|
ALLOCATE( ctmp( nx, 2*ngw ) )
|
|
ALLOCATE( ebpmss( ngw ) )
|
|
CALL avrec( ngw, emass, pmss, ebpmss )
|
|
DO I = 1, N
|
|
DO J = 1, NGW
|
|
jp1 = j+j-1
|
|
jp2 = j+j
|
|
c0ji = c0(j,i)
|
|
c0ji = c0ji * ebpmss(j)
|
|
c0(j,i) = c0ji
|
|
ctmp(i,jp1) = DBLE(c0ji)
|
|
ctmp(i,jp2) = AIMAG(c0ji)
|
|
ENDDO
|
|
ENDDO
|
|
DEALLOCATE( ebpmss )
|
|
|
|
|
|
sqrtfact = sqrt(0.5_DP)
|
|
IF(gzero) THEN
|
|
DO I = 1, N
|
|
CTMP(I,1) = sqrtfact * CTMP(I,1)
|
|
C0(1,I) = sqrtfact * C0(1,I)
|
|
CP(1,I) = sqrtfact * CP(1,I)
|
|
ENDDO
|
|
ENDIF
|
|
|
|
|
|
! LOOP OVER THE 2D PROCESSORS GRID
|
|
!
|
|
DO CURCOL = 0, NPCOL-1
|
|
DO CURROW = 0, NPROW-1
|
|
|
|
CURNR = NUMROC(N, NB, CURROW, RSRC, NPROW)
|
|
CURNC = NUMROC(N, NB, CURCOL, CSRC, NPCOL)
|
|
|
|
allocate(sigtmp(CURNR*CURNC*3))
|
|
|
|
! LOOP OVER THE BLOCKS OWNED BY PE (CURROW,CURCOL)
|
|
!
|
|
do jb = 1,N,NB
|
|
jp = INDXG2P( jb, NB, MYCOL, CSRC, NPCOL )
|
|
njb = min(nb,n-jb+1)
|
|
if(jp.eq.curcol) then
|
|
do ib = 1,N,NB
|
|
nib = min(nb,n-ib+1)
|
|
ip = INDXG2P( IB, NB, MYROW, RSRC, NPROW )
|
|
if(ip.eq.currow) then
|
|
I = INDXG2L( IB, NB, CURROW, RSRC, NPROW )
|
|
J = INDXG2L( JB, NB, CURCOL, CSRC, NPCOL )
|
|
IOFF1 = i+(j-1)*CURNR
|
|
IOFF2 = i+(j-1)*CURNR + CURNR*CURNC
|
|
IOFF3 = i+(j-1)*CURNR + 2*CURNR*CURNC
|
|
! SIG
|
|
call DGEMM('T','N',nib,njb,2*ngw,-2.0d0, &
|
|
& CP(1,ib),ldc,CP(1,jb),ldc,0.0d0, &
|
|
& sigtmp(IOFF1),CURNR)
|
|
! RHO
|
|
call DGEMM('N','N',nib,njb,2*ngw,2.0d0, &
|
|
& CTMP(ib,1),n,CP(1,jb),ldc,0.0d0, &
|
|
& sigtmp(IOFF2),CURNR)
|
|
! TMASS
|
|
call DGEMM('N','N',nib,njb,2*ngw,2.0d0, &
|
|
& CTMP(ib,1),n,C0(1,jb),ldc,0.0d0, &
|
|
& sigtmp(IOFF3),CURNR)
|
|
|
|
endif
|
|
enddo
|
|
endif
|
|
enddo
|
|
|
|
call mp_sum( sigtmp )
|
|
|
|
IF( (CURROW.eq.MYROW) .and. (CURCOL.eq.MYCOL) ) THEN
|
|
DO J=1,CURNC
|
|
DO I=1,CURNR
|
|
SIG%m(I,J) = sigtmp(i+(j-1)*CURNR)
|
|
RHO%m(I,J) = sigtmp(i+(j-1)*CURNR+ CURNR*CURNC)
|
|
TMASS%m(I,J) = sigtmp(i+(j-1)*CURNR+ 2*CURNR*CURNC)
|
|
ENDDO
|
|
ENDDO
|
|
ENDIF
|
|
|
|
DEALLOCATE(SIGTMP)
|
|
|
|
ENDDO
|
|
ENDDO
|
|
|
|
deallocate(ctmp)
|
|
|
|
DO I=1,N
|
|
II = INDXG2L( I, NB, MYROW, RSRC, NPROW )
|
|
JJ = INDXG2L( I, NB, MYCOL, CSRC, NPCOL )
|
|
CURROW = INDXG2P( I, NB, MYROW, RSRC, NPROW )
|
|
CURCOL = INDXG2P( I, NB, MYCOL, CSRC, NPCOL )
|
|
IF( ( CURROW .eq. MYROW ) .and. ( CURCOL .eq. MYCOL ) ) THEN
|
|
SIG%m(II,JJ) = SIG%m(II,JJ) + 1.0
|
|
END IF
|
|
ENDDO
|
|
|
|
IF(gzero) THEN
|
|
DO I=1,N
|
|
C0(1,I) = C0(1,I)/sqrtfact
|
|
CP(1,I) = CP(1,I)/sqrtfact
|
|
ENDDO
|
|
ENDIF
|
|
|
|
|
|
RETURN
|
|
END SUBROUTINE sigrhoset
|
|
|
|
|
|
SUBROUTINE sigrhoset2( ngw, nx, CP,C0,SIG,RHO,TMASS,PMSS,EMASS,gzero)
|
|
|
|
!***************************************************************
|
|
! SIG = REAL PART OF ONE-2.0*ADJ(CP)*CP+CP(*,1)*ADJ(CP(*,1))
|
|
! (WHERE CP(*,1) IS REAL, AND THEREFORE TRANS() IS USED IN
|
|
! PLACE OF ADJ()
|
|
!***************************************************************
|
|
USE parallel_types
|
|
USE descriptors_module
|
|
USE mp, ONLY: mp_sum
|
|
USE processors_grid_module, ONLY: get_grid_coor, get_grid_dims
|
|
|
|
IMPLICIT NONE
|
|
|
|
REAL (DP) :: PMSS(:), EMASS
|
|
TYPE (real_parallel_matrix) :: sig
|
|
TYPE (real_parallel_matrix) :: rho
|
|
TYPE (real_parallel_matrix) :: tmass
|
|
COMPLEX (DP) :: CP(:,:)
|
|
COMPLEX (DP) :: C0(:,:)
|
|
LOGICAL, INTENT(IN) :: gzero
|
|
INTEGER, INTENT(IN) :: ngw, nx
|
|
|
|
INTEGER :: I,J, NPROW, NPCOL, MYROW, MYCOL
|
|
INTEGER :: NRL, N, II, JJ
|
|
INTEGER :: ip, ldc
|
|
INTEGER :: nngw, npz, mez, nproc, mpime
|
|
INTEGER :: nrl_ip, nrlx
|
|
REAL(DP), ALLOCATABLE :: RTMP(:,:,:)
|
|
REAL(DP), ALLOCATABLE :: ebpmss(:)
|
|
REAL(DP) :: sqrtfact
|
|
|
|
!
|
|
! SUBROUTINE BODY
|
|
!
|
|
CALL get_grid_dims( sig%desc%grid, nprow, npcol, npz )
|
|
CALL get_grid_coor( sig%desc%grid, myrow, mycol, mez )
|
|
|
|
ldc = 2 * SIZE(C0, 1)
|
|
nngw = 2 * ngw
|
|
n = nx
|
|
nproc = nprow
|
|
mpime = myrow
|
|
nrl = n/nproc
|
|
nrlx = n/nproc + 1
|
|
IF( mpime < MOD(n,nproc) ) THEN
|
|
nrl = nrl + 1
|
|
END IF
|
|
|
|
IF( npcol > 1 .OR. npz > 1 ) THEN
|
|
CALL errore(' sigrhoset2 ',' wrong grid dimension ', npcol)
|
|
END IF
|
|
IF( mycol > 0 .OR. mez > 0 ) THEN
|
|
CALL errore(' sigrhoset2 ',' wrong grid coordinates ', mycol)
|
|
END IF
|
|
IF( nrl /= SIZE(sig%m,1) .OR. n /= SIZE(sig%m,2)) THEN
|
|
CALL errore(' sigrhoset2 ',' wrong sizes for matrix SIG ', nrl)
|
|
END IF
|
|
IF( nrl /= SIZE(rho%m,1) .OR. n /= SIZE(rho%m,2)) THEN
|
|
CALL errore(' sigrhoset2 ',' wrong sizes for matrix RHO ', nrl)
|
|
END IF
|
|
IF( nrl /= SIZE(tmass%m,1) .OR. n /= SIZE(tmass%m,2)) THEN
|
|
CALL errore(' sigrhoset2 ',' wrong sizes for matrix TMASS ', nrl)
|
|
END IF
|
|
IF( n > SIZE(C0,2) ) THEN
|
|
CALL errore(' sigrhoset2 ',' wrong number of states ', n)
|
|
END IF
|
|
|
|
ALLOCATE( ebpmss( ngw ) )
|
|
CALL avrec( ngw, emass, pmss, ebpmss )
|
|
|
|
sqrtfact = sqrt(0.5d0)
|
|
IF( gzero ) THEN
|
|
DO i = 1, n
|
|
C0(1,i) = sqrtfact * C0(1,i)
|
|
CP(1,i) = sqrtfact * CP(1,i)
|
|
ENDDO
|
|
ENDIF
|
|
DO j = 1, n
|
|
DO i = 1, ngw
|
|
C0(i,j) = C0(i,j) * ebpmss(i)
|
|
END DO
|
|
END DO
|
|
|
|
DEALLOCATE(ebpmss)
|
|
|
|
!
|
|
! LOOP OVER THE 1D PROCESSORS GRID
|
|
!
|
|
|
|
IF( nrlx > nrlx_tune ) THEN
|
|
|
|
DO ip = 0, nproc-1
|
|
|
|
nrl_ip = n/nproc
|
|
IF( ip < MOD(n,nproc) ) THEN
|
|
nrl_ip = nrl_ip + 1
|
|
end if
|
|
|
|
allocate( rtmp( nrl_ip, n, 1 ) )
|
|
|
|
! SIG
|
|
CALL DGEMM('T', 'N', nrl_ip, n, nngw, -2.0d0, &
|
|
& cp(1,ip+1), ldc * nproc, CP(1,1), ldc, 0.0d0, rtmp(1,1,1), nrl_ip)
|
|
|
|
call mp_sum( rtmp(:,:,1), sig%m(:,:), root=ip )
|
|
|
|
! RHO
|
|
CALL DGEMM('T', 'N', nrl_ip, n, nngw, 2.0d0, &
|
|
& c0(1,ip+1), ldc * nproc, CP(1,1), ldc, 0.0d0, rtmp(1,1,1), nrl_ip)
|
|
|
|
call mp_sum( rtmp(:,:,1), rho%m(:,:), root=ip )
|
|
|
|
! TMASS
|
|
CALL DGEMM('T', 'N', nrl_ip, n, nngw, 2.0d0, &
|
|
& c0(1,ip+1), ldc * nproc, C0(1,1), ldc, 0.0d0, rtmp(1,1,1), nrl_ip)
|
|
|
|
call mp_sum( rtmp(:,:,1), tmass%m(:,:), root=ip )
|
|
|
|
DEALLOCATE(rtmp)
|
|
|
|
ENDDO
|
|
|
|
ELSE
|
|
|
|
ALLOCATE( rtmp( n, n, 1 ) )
|
|
|
|
! SIG
|
|
CALL DGEMM('T', 'N', n, n, nngw, -2.0d0, &
|
|
& cp(1,1), ldc, CP(1,1), ldc, 0.0d0, rtmp(1,1,1), n)
|
|
call mp_sum( rtmp(:,:,1) )
|
|
DO j = 1, n
|
|
ii = mpime + 1
|
|
DO i = 1, nrl
|
|
sig%m(i,j) = rtmp(ii,j,1)
|
|
ii = ii + nproc
|
|
END DO
|
|
END DO
|
|
|
|
! RHO
|
|
CALL DGEMM('T', 'N', n, n, nngw, 2.0d0, &
|
|
& c0(1,1), ldc, CP(1,1), ldc, 0.0d0, rtmp(1,1,1), n)
|
|
call mp_sum( rtmp(:,:,1) )
|
|
DO j = 1, n
|
|
ii = mpime + 1
|
|
DO i = 1, nrl
|
|
rho%m(i,j) = rtmp(ii,j,1)
|
|
ii = ii + nproc
|
|
END DO
|
|
END DO
|
|
|
|
! TMASS
|
|
CALL DGEMM('T', 'N', n, n, nngw, 2.0d0, &
|
|
& c0(1,1), ldc, C0(1,1), ldc, 0.0d0, rtmp(1,1,1), n)
|
|
call mp_sum( rtmp(:,:,1) )
|
|
DO j = 1, n
|
|
ii = mpime + 1
|
|
DO i = 1, nrl
|
|
tmass%m(i,j) = rtmp(ii,j,1)
|
|
ii = ii + nproc
|
|
END DO
|
|
END DO
|
|
|
|
DEALLOCATE(rtmp)
|
|
|
|
END IF
|
|
|
|
! ... Add "1" to the diagonal elements
|
|
ii = mpime + 1
|
|
DO i = 1, nrl
|
|
SIG%m(i,ii) = SIG%m(i,ii) + 1.0d0
|
|
ii = ii + nproc
|
|
ENDDO
|
|
|
|
! ... Restore the coefficients of the G=0 reciprocal vector
|
|
IF(gzero) THEN
|
|
DO I=1,N
|
|
C0(1,I) = C0(1,I)/sqrtfact
|
|
CP(1,I) = CP(1,I)/sqrtfact
|
|
ENDDO
|
|
ENDIF
|
|
|
|
RETURN
|
|
END SUBROUTINE sigrhoset2
|
|
|
|
!
|
|
!----------------------------------------------------------------------
|
|
!
|
|
|
|
SUBROUTINE backrhoset( ngw, nx, cp, c0, rho, pmss, emass )
|
|
|
|
USE parallel_types
|
|
USE descriptors_module
|
|
USE mp, ONLY: mp_sum
|
|
USE processors_grid_module, ONLY: get_grid_coor, get_grid_dims
|
|
|
|
IMPLICIT NONE
|
|
TYPE (real_parallel_matrix) :: RHO
|
|
COMPLEX (DP) :: CP(:,:)
|
|
COMPLEX (DP) :: C0(:,:)
|
|
REAL (DP) :: PMSS(:), EMASS
|
|
INTEGER, INTENT(IN) :: ngw, nx
|
|
|
|
INTEGER I,J,NPROW, NPCOL, MYROW, MYCOL, CURROW, CURCOL
|
|
INTEGER ME, NRL, NCL, N, NB, CURNR, CURNC, II, JJ,IB,JB
|
|
INTEGER IP,JP,NIB, NJB, JP1, JP2, IOFF1, IOFF2, IOFF3
|
|
INTEGER RSRC, CSRC, npz, mez, ldc
|
|
INTEGER INDXL2G, INDXG2L, INDXG2P
|
|
REAL (DP) :: DDOT
|
|
REAL (DP) :: FACT,ONE_BY_EMASS
|
|
REAL (DP), allocatable :: SIGTMP(:)
|
|
|
|
!
|
|
! SUBROUTINE BODY
|
|
!
|
|
|
|
CALL get_grid_dims(rho%desc%grid, nprow, npcol, npz)
|
|
CALL get_grid_coor(rho%desc%grid, myrow, mycol, mez)
|
|
ldc = 2 * SIZE( C0, 1 )
|
|
N = nx
|
|
NB = rho%desc%nxblk
|
|
RSRC = rho%desc%ipexs
|
|
CSRC = rho%desc%ipeys
|
|
|
|
|
|
! LOOP OVER THE 2D PROCESSORS GRID
|
|
!
|
|
DO CURCOL = 0, NPCOL-1
|
|
DO CURROW = 0, NPROW-1
|
|
|
|
CURNR = NUMROC(N, NB, CURROW, RSRC, NPROW)
|
|
CURNC = NUMROC(N, NB, CURCOL, CSRC, NPCOL)
|
|
|
|
allocate(sigtmp(CURNR*CURNC))
|
|
sigtmp = 0.0d0
|
|
IF( (CURROW.eq.MYROW) .and. (CURCOL.eq.MYCOL) ) THEN
|
|
DO J=1,CURNC
|
|
DO I=1,CURNR
|
|
sigtmp(i+(j-1)*CURNR) = RHO%m(I,J)
|
|
ENDDO
|
|
ENDDO
|
|
ENDIF
|
|
call mp_sum( sigtmp )
|
|
|
|
! LOOP OVER THE BLOCKS OWNED BY PE (CURROW,CURCOL)
|
|
!
|
|
do jb = 1,N,NB
|
|
jp = INDXG2P( jb, NB, MYCOL, CSRC, NPCOL )
|
|
njb = min(nb,n-jb+1)
|
|
if(jp.eq.curcol) then
|
|
do ib = 1,N,NB
|
|
nib = min(nb,n-ib+1)
|
|
ip = INDXG2P( IB, NB, MYROW, RSRC, NPROW )
|
|
if(ip.eq.currow) then
|
|
I = INDXG2L( IB, NB, CURROW, RSRC, NPROW )
|
|
J = INDXG2L( JB, NB, CURCOL, CSRC, NPCOL )
|
|
IOFF1 = i+(j-1)*CURNR
|
|
CALL DGEMM('N','N',2*NGW,nib,njb,1.0D0, &
|
|
& C0(1,ib),ldc,sigtmp(IOFF1),CURNR,1.0D0, &
|
|
& CP(1,jb),ldc )
|
|
|
|
endif
|
|
enddo
|
|
endif
|
|
enddo
|
|
|
|
DEALLOCATE(SIGTMP)
|
|
|
|
ENDDO
|
|
ENDDO
|
|
|
|
!
|
|
!.....RESTORE C0
|
|
!
|
|
ALLOCATE(SIGTMP(NGW))
|
|
ONE_BY_EMASS = 1.0d0 / EMASS
|
|
DO J=1,NGW
|
|
SIGTMP(J) = PMSS(J) * ONE_BY_EMASS
|
|
END DO
|
|
DO I=1,N
|
|
DO J=1,NGW
|
|
C0(J,I) = C0(J,I) * SIGTMP(J)
|
|
END DO
|
|
END DO
|
|
DEALLOCATE(SIGTMP)
|
|
|
|
|
|
RETURN
|
|
END SUBROUTINE backrhoset
|
|
|
|
|
|
SUBROUTINE backrhoset2( ngw, nx, cp, c0, rho, pmss, emass )
|
|
|
|
USE parallel_types
|
|
USE descriptors_module
|
|
USE mp, ONLY: mp_sum, mp_bcast
|
|
USE processors_grid_module, ONLY: get_grid_coor, get_grid_dims
|
|
|
|
IMPLICIT NONE
|
|
TYPE (real_parallel_matrix) :: RHO
|
|
COMPLEX (DP) :: CP(:,:)
|
|
COMPLEX (DP) :: C0(:,:)
|
|
REAL (DP) :: PMSS(:), EMASS
|
|
INTEGER, INTENT(IN) :: ngw, nx
|
|
|
|
INTEGER I,J,NPROW, NPCOL, MYROW, MYCOL
|
|
INTEGER NRL, nrl_ip, n, ii, jj
|
|
INTEGER ip, nngw, nrlx
|
|
INTEGER npz, mez, mpime, nproc, ldc
|
|
|
|
REAL (DP) :: DDOT
|
|
REAL (DP) :: FACT,ONE_BY_EMASS
|
|
REAL (DP), allocatable :: SIGTMP(:)
|
|
REAL (DP), allocatable :: rtmp(:,:)
|
|
COMPLEX (DP), allocatable :: cc(:,:)
|
|
|
|
!
|
|
! SUBROUTINE BODY
|
|
!
|
|
|
|
CALL get_grid_dims(rho%desc%grid, nprow, npcol, npz)
|
|
CALL get_grid_coor(rho%desc%grid, myrow, mycol, mez)
|
|
|
|
ldc = 2 * SIZE( C0, 1 )
|
|
nngw = 2 * ngw
|
|
n = nx
|
|
nproc = nprow
|
|
mpime = myrow
|
|
nrlx = n/nproc+1
|
|
nrl = n/nproc
|
|
IF( mpime < MOD(n,nproc) ) THEN
|
|
nrl = nrl + 1
|
|
END IF
|
|
|
|
IF( npcol > 1 .OR. npz > 1 ) THEN
|
|
CALL errore(' backrhoset2 ',' wrong grid dimension ', npcol)
|
|
END IF
|
|
IF( mycol > 0 .OR. mez > 0 ) THEN
|
|
CALL errore(' backrhoset2 ',' wrong grid coordinates ', mycol)
|
|
END IF
|
|
IF( nrl /= SIZE(rho%m,1) .OR. n /= SIZE(rho%m,2)) THEN
|
|
CALL errore(' backrhoset2 ',' wrong sizes for matrix RHO ', nrl)
|
|
END IF
|
|
IF( n > SIZE(C0,2) ) THEN
|
|
CALL errore(' backrhoset2 ',' wrong number of states ', n)
|
|
END IF
|
|
|
|
IF( nrlx > nrlx_tune ) THEN
|
|
|
|
DO ip = 0, nproc-1
|
|
|
|
nrl_ip = n/nproc
|
|
IF( ip < MOD(n,nproc) ) THEN
|
|
nrl_ip = nrl_ip + 1
|
|
end if
|
|
|
|
allocate( rtmp( nrl_ip, n ) )
|
|
|
|
IF ( mpime == ip ) THEN
|
|
rtmp = rho%m( 1:nrl_ip, 1:n )
|
|
END IF
|
|
call mp_bcast( rtmp, ip )
|
|
|
|
CALL DGEMM('N', 'N', nngw, n, nrl_ip, 1.0D0, &
|
|
& c0(1,ip+1), ldc * nproc, rtmp(1,1), nrl_ip, 1.0D0, cp(1,1), ldc )
|
|
|
|
DEALLOCATE(rtmp)
|
|
|
|
ENDDO
|
|
|
|
ELSE
|
|
|
|
allocate( rtmp( n, n ) )
|
|
rtmp = 0.0d0
|
|
DO j = 1, n
|
|
ii = mpime + 1
|
|
DO i = 1, nrl
|
|
rtmp(ii,j) = rho%m(i,j)
|
|
ii = ii + nproc
|
|
END DO
|
|
END DO
|
|
call mp_sum( rtmp(:,:) )
|
|
|
|
CALL DGEMM('N', 'N', nngw, n, n, 1.0D0, &
|
|
& c0(1,1), ldc, rtmp(1,1), n, 1.0D0, cp(1,1), ldc )
|
|
|
|
deallocate( rtmp )
|
|
|
|
END IF
|
|
|
|
!
|
|
!.....RESTORE C0
|
|
!
|
|
ALLOCATE(SIGTMP(NGW))
|
|
ONE_BY_EMASS = 1.0d0 / EMASS
|
|
DO J=1,NGW
|
|
SIGTMP(J) = PMSS(J) * ONE_BY_EMASS
|
|
END DO
|
|
DO I=1,N
|
|
DO J=1,NGW
|
|
C0(J,I) = C0(J,I) * SIGTMP(J)
|
|
END DO
|
|
END DO
|
|
DEALLOCATE(SIGTMP)
|
|
|
|
RETURN
|
|
END SUBROUTINE backrhoset2
|
|
|
|
|
|
!=----------------------------------------------------------------------------=!
|
|
|
|
|
|
SUBROUTINE prpack( ap, a)
|
|
USE mp_global, ONLY: mpime, nproc
|
|
REAL(DP), INTENT(IN) :: a(:,:)
|
|
REAL(DP), INTENT(OUT) :: ap(:,:)
|
|
INTEGER :: i, j, jl
|
|
DO i = 1, SIZE( ap, 2)
|
|
j = mpime + 1
|
|
DO jl = 1, SIZE( ap, 1)
|
|
ap(jl,i) = a(j,i)
|
|
j = j + nproc
|
|
END DO
|
|
END DO
|
|
RETURN
|
|
END SUBROUTINE prpack
|
|
|
|
SUBROUTINE pzpack( ap, a)
|
|
USE mp_global, ONLY: mpime, nproc
|
|
COMPLEX(DP), INTENT(IN) :: a(:,:)
|
|
COMPLEX(DP), INTENT(OUT) :: ap(:,:)
|
|
INTEGER :: i, j, jl
|
|
DO i = 1, SIZE( ap, 2)
|
|
j = mpime + 1
|
|
DO jl = 1, SIZE( ap, 1)
|
|
ap(jl,i) = a(j,i)
|
|
j = j + nproc
|
|
END DO
|
|
END DO
|
|
RETURN
|
|
END SUBROUTINE pzpack
|
|
|
|
SUBROUTINE prunpack( a, ap)
|
|
USE mp_global, ONLY: mpime, nproc
|
|
REAL(DP), INTENT(IN) :: ap(:,:)
|
|
REAL(DP), INTENT(OUT) :: a(:,:)
|
|
INTEGER :: i, j, jl
|
|
DO i = 1, SIZE(a, 2)
|
|
DO j = 1, SIZE(a, 1)
|
|
a(j,i) = zero
|
|
END DO
|
|
j = mpime + 1
|
|
DO jl = 1, SIZE(ap, 1)
|
|
a(j,i) = ap(jl,i)
|
|
j = j + nproc
|
|
END DO
|
|
END DO
|
|
RETURN
|
|
END SUBROUTINE prunpack
|
|
|
|
SUBROUTINE pzunpack( a, ap)
|
|
USE mp_global, ONLY: mpime, nproc
|
|
COMPLEX(DP), INTENT(IN) :: ap(:,:)
|
|
COMPLEX(DP), INTENT(OUT) :: a(:,:)
|
|
INTEGER :: i, j, jl
|
|
DO i = 1, SIZE(a, 2)
|
|
DO j = 1, SIZE(a, 1)
|
|
a(j,i) = zero
|
|
END DO
|
|
j = mpime + 1
|
|
DO jl = 1, SIZE(ap, 1)
|
|
a(j,i) = ap(jl,i)
|
|
j = j + nproc
|
|
END DO
|
|
END DO
|
|
RETURN
|
|
END SUBROUTINE pzunpack
|
|
|
|
SUBROUTINE rpack( ap, a)
|
|
REAL(DP), INTENT(IN) :: a(:,:)
|
|
REAL(DP), INTENT(OUT) :: ap(:)
|
|
INTEGER :: i, j, k
|
|
K = 0
|
|
DO J = 1, SIZE(a, 2)
|
|
DO I = J, SIZE(a, 1)
|
|
K = K + 1
|
|
ap( k ) = a( i, j )
|
|
END DO
|
|
END DO
|
|
RETURN
|
|
END SUBROUTINE rpack
|
|
|
|
SUBROUTINE zpack( ap, a)
|
|
COMPLEX(DP), INTENT(IN) :: a(:,:)
|
|
COMPLEX(DP), INTENT(OUT) :: ap(:)
|
|
INTEGER :: i, j, k
|
|
K=0
|
|
DO J = 1, SIZE(a, 2)
|
|
DO I = J, SIZE(a, 1)
|
|
K = K + 1
|
|
ap(k) = a(i,j)
|
|
END DO
|
|
END DO
|
|
RETURN
|
|
END SUBROUTINE zpack
|
|
|
|
|
|
END MODULE orthogonalize_base
|