mirror of https://gitlab.com/QEF/q-e.git
becp and related quantities in add_vuspsi and s_psi
have been completely distributed across processors, for gamma point calculations with scalapack. This implementation save memory and add some communications. The distribution of becp is performed whenever allocate_bec_type is called with a communicator as an additional argument. add_vuspsi, calbec and s_psi check for the presence of the communicator within bec type, and if it is present use a distributed algorithm to compute related quantities. Exactly the same strategy could be applied to the k-point and non-collinear case git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@8712 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
parent
4a60554d57
commit
d0eddbe99c
|
@ -69,13 +69,33 @@ SUBROUTINE add_vuspsi( lda, n, m, hpsi )
|
|||
SUBROUTINE add_vuspsi_gamma()
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
USE mp, ONLY: mp_size, mp_rank, mp_get_comm_null, mp_circular_shift_left
|
||||
!
|
||||
IMPLICIT NONE
|
||||
INTEGER, EXTERNAL :: ldim_block, lind_block, gind_block
|
||||
REAL(DP), ALLOCATABLE :: ps (:,:)
|
||||
INTEGER :: ierr
|
||||
INTEGER :: nproc, mype, m_loc, m_begin, ibnd_loc, icyc, icur_blk, m_max
|
||||
!
|
||||
IF ( nkb == 0 ) RETURN
|
||||
!
|
||||
ALLOCATE (ps (nkb,m), STAT=ierr )
|
||||
m_loc = m
|
||||
m_begin = 1
|
||||
m_max = m
|
||||
nproc = 1
|
||||
mype = 0
|
||||
!
|
||||
IF( becp%comm /= mp_get_comm_null() ) THEN
|
||||
nproc = mp_size( becp%comm )
|
||||
mype = mp_rank( becp%comm )
|
||||
m_loc = ldim_block( becp%nbnd , nproc, mype )
|
||||
m_begin = gind_block( 1, becp%nbnd, nproc, mype )
|
||||
m_max = becp%nbnd / nproc
|
||||
IF( MOD( becp%nbnd, nproc ) /= 0 ) m_max = m_max + 1
|
||||
IF( ( m_begin + m_loc - 1 ) > m ) m_loc = m - m_begin + 1
|
||||
END IF
|
||||
!
|
||||
ALLOCATE (ps (nkb,m_max), STAT=ierr )
|
||||
IF( ierr /= 0 ) &
|
||||
CALL errore( ' add_vuspsi_gamma ', ' cannot allocate ps ', ABS( ierr ) )
|
||||
!
|
||||
|
@ -89,7 +109,7 @@ SUBROUTINE add_vuspsi( lda, n, m, hpsi )
|
|||
!
|
||||
IF ( ityp(na) == nt ) THEN
|
||||
!
|
||||
DO ibnd = 1, m
|
||||
DO ibnd = 1, m_loc
|
||||
!
|
||||
DO jh = 1, nh(nt)
|
||||
!
|
||||
|
@ -116,8 +136,36 @@ SUBROUTINE add_vuspsi( lda, n, m, hpsi )
|
|||
!
|
||||
END DO
|
||||
!
|
||||
CALL DGEMM( 'N', 'N', ( 2 * n ), m, nkb, 1.D0, vkb, &
|
||||
IF( becp%comm /= mp_get_comm_null() ) THEN
|
||||
!
|
||||
! parallel block multiplication of vkb and ps
|
||||
!
|
||||
icur_blk = mype
|
||||
!
|
||||
DO icyc = 0, nproc - 1
|
||||
|
||||
m_loc = ldim_block( becp%nbnd , nproc, icur_blk )
|
||||
m_begin = gind_block( 1, becp%nbnd, nproc, icur_blk )
|
||||
|
||||
IF( ( m_begin + m_loc - 1 ) > m ) m_loc = m - m_begin + 1
|
||||
|
||||
IF( m_loc > 0 ) THEN
|
||||
CALL DGEMM( 'N', 'N', ( 2 * n ), m_loc, nkb, 1.D0, vkb, &
|
||||
( 2 * lda ), ps, nkb, 1.D0, hpsi( 1, m_begin ), ( 2 * lda ) )
|
||||
END IF
|
||||
|
||||
! block rotation
|
||||
!
|
||||
CALL mp_circular_shift_left( ps, icyc, becp%comm )
|
||||
|
||||
icur_blk = icur_blk + 1
|
||||
IF( icur_blk == nproc ) icur_blk = 0
|
||||
|
||||
END DO
|
||||
ELSE
|
||||
CALL DGEMM( 'N', 'N', ( 2 * n ), m, nkb, 1.D0, vkb, &
|
||||
( 2 * lda ), ps, nkb, 1.D0, hpsi, ( 2 * lda ) )
|
||||
END IF
|
||||
!
|
||||
DEALLOCATE (ps)
|
||||
!
|
||||
|
|
|
@ -26,12 +26,16 @@ MODULE becmod
|
|||
REAL(DP), POINTER :: r(:,:) ! appropriate for gammaonly
|
||||
COMPLEX(DP),POINTER :: k(:,:) ! appropriate for generic k
|
||||
COMPLEX(DP),POINTER :: nc(:,:,:) ! appropriate for noncolin
|
||||
INTEGER :: comm
|
||||
INTEGER :: nbnd
|
||||
END TYPE bec_type
|
||||
#else
|
||||
TYPE bec_type
|
||||
REAL(DP), ALLOCATABLE :: r(:,:) ! appropriate for gammaonly
|
||||
COMPLEX(DP),ALLOCATABLE :: k(:,:) ! appropriate for generic k
|
||||
COMPLEX(DP),ALLOCATABLE :: nc(:,:,:) ! appropriate for noncolin
|
||||
INTEGER :: comm
|
||||
INTEGER :: nbnd
|
||||
END TYPE bec_type
|
||||
#endif
|
||||
!
|
||||
|
@ -65,6 +69,8 @@ CONTAINS
|
|||
SUBROUTINE calbec_bec_type ( npw, beta, psi, betapsi, nbnd )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
USE mp, ONLY: mp_size, mp_rank, mp_get_comm_null
|
||||
!
|
||||
IMPLICIT NONE
|
||||
COMPLEX (DP), INTENT (in) :: beta(:,:), psi(:,:)
|
||||
TYPE (bec_type), INTENT (inout) :: betapsi ! NB: must be INOUT otherwise
|
||||
|
@ -73,6 +79,10 @@ CONTAINS
|
|||
INTEGER, OPTIONAL :: nbnd
|
||||
!
|
||||
INTEGER :: local_nbnd
|
||||
INTEGER, EXTERNAL :: ldim_block, lind_block, gind_block
|
||||
INTEGER :: nproc, mype, m_loc, m_begin, m_max, ip
|
||||
INTEGER :: ibnd, ibnd_loc
|
||||
REAL(DP), ALLOCATABLE :: dtmp(:,:)
|
||||
!
|
||||
IF ( present (nbnd) ) THEN
|
||||
local_nbnd = nbnd
|
||||
|
@ -81,7 +91,38 @@ CONTAINS
|
|||
ENDIF
|
||||
|
||||
IF ( gamma_only ) THEN
|
||||
CALL calbec_gamma ( npw, beta, psi, betapsi%r, local_nbnd )
|
||||
!
|
||||
IF( betapsi%comm == mp_get_comm_null() ) THEN
|
||||
!
|
||||
CALL calbec_gamma ( npw, beta, psi, betapsi%r, local_nbnd )
|
||||
!
|
||||
ELSE
|
||||
|
||||
nproc = mp_size( betapsi%comm )
|
||||
mype = mp_rank( betapsi%comm )
|
||||
m_max = betapsi%nbnd / nproc
|
||||
IF( MOD( betapsi%nbnd, nproc ) /= 0 ) m_max = m_max + 1
|
||||
m_loc = ldim_block( betapsi%nbnd , nproc, mype )
|
||||
m_begin = gind_block( 1, betapsi%nbnd, nproc, mype )
|
||||
IF( ( m_begin + m_loc - 1 ) > local_nbnd ) m_loc = local_nbnd - m_begin + 1
|
||||
!
|
||||
ALLOCATE( dtmp( SIZE( betapsi%r, 1 ), m_max ) )
|
||||
!
|
||||
DO ip = 0, nproc - 1
|
||||
m_loc = ldim_block( betapsi%nbnd , nproc, ip )
|
||||
m_begin = gind_block( 1, betapsi%nbnd, nproc, ip )
|
||||
IF( ( m_begin + m_loc - 1 ) > local_nbnd ) m_loc = local_nbnd - m_begin + 1
|
||||
IF( m_loc > 0 ) THEN
|
||||
CALL calbec_gamma ( npw, beta, psi(:,m_begin:m_begin+m_loc-1), dtmp, m_loc )
|
||||
IF( ip == mype ) THEN
|
||||
betapsi%r(:,1:m_loc) = dtmp(:,1:m_loc)
|
||||
END IF
|
||||
END IF
|
||||
END DO
|
||||
|
||||
DEALLOCATE( dtmp )
|
||||
!
|
||||
END IF
|
||||
!
|
||||
ELSEIF ( noncolin) THEN
|
||||
!
|
||||
|
@ -266,21 +307,37 @@ CONTAINS
|
|||
END SUBROUTINE calbec_nc
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE allocate_bec_type ( nkb, nbnd, bec )
|
||||
SUBROUTINE allocate_bec_type ( nkb, nbnd, bec, comm )
|
||||
!-----------------------------------------------------------------------
|
||||
USE :: mp, ONLY: mp_size, mp_get_comm_null
|
||||
IMPLICIT NONE
|
||||
TYPE (bec_type) :: bec
|
||||
INTEGER, INTENT (in) :: nkb, nbnd
|
||||
INTEGER :: ierr
|
||||
INTEGER, INTENT (in), OPTIONAL :: comm
|
||||
INTEGER :: ierr, nbnd_siz, nproc
|
||||
!
|
||||
#ifdef __STD_F95
|
||||
NULLIFY(bec%r)
|
||||
NULLIFY(bec%nc)
|
||||
NULLIFY(bec%k)
|
||||
#endif
|
||||
!
|
||||
nbnd_siz = nbnd
|
||||
bec%comm = mp_get_comm_null()
|
||||
bec%nbnd = nbnd
|
||||
!
|
||||
IF( PRESENT( comm ) ) THEN
|
||||
bec%comm = comm
|
||||
nproc = mp_size( comm )
|
||||
IF( nproc > 1 ) THEN
|
||||
nbnd_siz = nbnd / nproc
|
||||
IF( MOD( nbnd, nproc ) /= 0 ) nbnd_siz = nbnd_siz + 1
|
||||
END IF
|
||||
END IF
|
||||
!
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
ALLOCATE( bec%r( nkb, nbnd ), STAT=ierr )
|
||||
ALLOCATE( bec%r( nkb, nbnd_siz ), STAT=ierr )
|
||||
IF( ierr /= 0 ) &
|
||||
CALL errore( ' allocate_bec_type ', ' cannot allocate bec%r ', ABS(ierr) )
|
||||
!
|
||||
|
@ -288,7 +345,7 @@ CONTAINS
|
|||
!
|
||||
ELSEIF ( noncolin) THEN
|
||||
!
|
||||
ALLOCATE( bec%nc( nkb, npol, nbnd ), STAT=ierr )
|
||||
ALLOCATE( bec%nc( nkb, npol, nbnd_siz ), STAT=ierr )
|
||||
IF( ierr /= 0 ) &
|
||||
CALL errore( ' allocate_bec_type ', ' cannot allocate bec%nc ', ABS(ierr) )
|
||||
!
|
||||
|
@ -296,7 +353,7 @@ CONTAINS
|
|||
!
|
||||
ELSE
|
||||
!
|
||||
ALLOCATE( bec%k( nkb, nbnd ), STAT=ierr )
|
||||
ALLOCATE( bec%k( nkb, nbnd_siz ), STAT=ierr )
|
||||
IF( ierr /= 0 ) &
|
||||
CALL errore( ' allocate_bec_type ', ' cannot allocate bec%k ', ABS(ierr) )
|
||||
!
|
||||
|
@ -312,9 +369,13 @@ CONTAINS
|
|||
SUBROUTINE deallocate_bec_type (bec)
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
USE :: mp, ONLY: mp_get_comm_null
|
||||
IMPLICIT NONE
|
||||
TYPE (bec_type) :: bec
|
||||
!
|
||||
bec%comm = mp_get_comm_null()
|
||||
bec%nbnd = 0
|
||||
!
|
||||
#ifdef __STD_F95
|
||||
IF (associated(bec%r)) DEALLOCATE(bec%r)
|
||||
IF (associated(bec%nc)) DEALLOCATE(bec%nc)
|
||||
|
|
|
@ -186,7 +186,7 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
|||
USE becmod, ONLY : bec_type, becp, calbec, &
|
||||
allocate_bec_type, deallocate_bec_type
|
||||
USE klist, ONLY : nks
|
||||
USE mp_global, ONLY : nproc_bgrp
|
||||
USE mp_global, ONLY : nproc_bgrp, intra_bgrp_comm
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -218,7 +218,11 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
|||
IF ( nbndx > npwx*nproc_bgrp ) &
|
||||
CALL errore ( 'diag_bands', 'too many bands, or too few plane waves',1)
|
||||
!
|
||||
#ifdef __SCALAPACK
|
||||
CALL allocate_bec_type ( nkb, nbnd, becp, intra_bgrp_comm )
|
||||
#else
|
||||
CALL allocate_bec_type ( nkb, nbnd, becp )
|
||||
#endif
|
||||
!
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
|
|
|
@ -570,7 +570,11 @@ SUBROUTINE pregterg( npw, npwx, nvec, nvecx, evc, ethr, &
|
|||
IF( ierr /= 0 ) &
|
||||
CALL errore( 'pregterg ',' cannot allocate hpsi ', ABS(ierr) )
|
||||
!
|
||||
IF ( uspp ) ALLOCATE( spsi( npwx, nvecx ) )
|
||||
IF ( uspp ) THEN
|
||||
ALLOCATE( spsi( npwx, nvecx ), STAT=ierr )
|
||||
IF( ierr /= 0 ) &
|
||||
CALL errore( 'pregterg ',' cannot allocate spsi ', ABS(ierr) )
|
||||
END IF
|
||||
!
|
||||
! ... Initialize the matrix descriptor
|
||||
!
|
||||
|
|
|
@ -94,10 +94,7 @@ SUBROUTINE s_psi( lda, n, m, psi, spsi )
|
|||
! ... gamma version
|
||||
!
|
||||
USE becmod, ONLY : bec_type, becp
|
||||
#if defined __SCALAPACK
|
||||
USE mp_global, ONLY: nproc_bgrp, intra_bgrp_comm, me_bgrp
|
||||
USE mp, ONLY: mp_circular_shift_left
|
||||
#endif
|
||||
USE mp, ONLY: mp_size, mp_rank, mp_get_comm_null, mp_circular_shift_left
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -105,31 +102,28 @@ SUBROUTINE s_psi( lda, n, m, psi, spsi )
|
|||
!
|
||||
INTEGER :: ikb, jkb, ih, jh, na, nt, ijkb0, ibnd, ierr
|
||||
! counters
|
||||
INTEGER :: m_loc, m_begin, ibnd_loc, icyc, icur_blk, m_max
|
||||
INTEGER :: nproc, mype, m_loc, m_begin, ibnd_loc, icyc, icur_blk, m_max
|
||||
! data distribution indexes
|
||||
#if defined __SCALAPACK
|
||||
INTEGER, EXTERNAL :: ldim_block, lind_block, gind_block
|
||||
! data distribution functions
|
||||
#endif
|
||||
REAL(DP), ALLOCATABLE :: ps(:,:)
|
||||
! the product vkb and psi
|
||||
!
|
||||
#if defined __SCALAPACK
|
||||
IF( m == 1 ) THEN
|
||||
m_loc = 1
|
||||
m_begin = 1
|
||||
m_max = 1
|
||||
ELSE
|
||||
m_loc = ldim_block( m , nproc_bgrp, me_bgrp )
|
||||
m_begin = gind_block( 1, m, nproc_bgrp, me_bgrp )
|
||||
m_max = m / nproc_bgrp
|
||||
IF( MOD( m, nproc_bgrp ) /= 0 ) m_max = m_max + 1
|
||||
END IF
|
||||
#else
|
||||
m_loc = m
|
||||
m_begin = 1
|
||||
m_max = m
|
||||
#endif
|
||||
nproc = 1
|
||||
mype = 0
|
||||
!
|
||||
IF( becp%comm /= mp_get_comm_null() ) THEN
|
||||
nproc = mp_size( becp%comm )
|
||||
mype = mp_rank( becp%comm )
|
||||
m_loc = ldim_block( becp%nbnd , nproc, mype )
|
||||
m_begin = gind_block( 1, becp%nbnd, nproc, mype )
|
||||
m_max = becp%nbnd / nproc
|
||||
IF( MOD( becp%nbnd, nproc ) /= 0 ) m_max = m_max + 1
|
||||
IF( ( m_begin + m_loc - 1 ) > m ) m_loc = m - m_begin + 1
|
||||
END IF
|
||||
!
|
||||
ALLOCATE( ps( nkb, m_max ), STAT=ierr )
|
||||
IF( ierr /= 0 ) &
|
||||
|
@ -149,7 +143,7 @@ SUBROUTINE s_psi( lda, n, m, psi, spsi )
|
|||
DO ih = 1, nh(nt)
|
||||
ikb = ijkb0 + ih
|
||||
ps(ikb,ibnd_loc) = ps(ikb,ibnd_loc) + &
|
||||
qq(ih,jh,nt) * becp%r(jkb,ibnd)
|
||||
qq(ih,jh,nt) * becp%r(jkb,ibnd_loc)
|
||||
END DO
|
||||
END DO
|
||||
END DO
|
||||
|
@ -163,39 +157,42 @@ SUBROUTINE s_psi( lda, n, m, psi, spsi )
|
|||
END IF
|
||||
END DO
|
||||
!
|
||||
IF ( m == 1 ) THEN
|
||||
IF( becp%comm /= mp_get_comm_null() ) THEN
|
||||
!
|
||||
! parallel block multiplication of vkb and ps
|
||||
!
|
||||
icur_blk = mype
|
||||
!
|
||||
DO icyc = 0, nproc - 1
|
||||
|
||||
m_loc = ldim_block( becp%nbnd , nproc, icur_blk )
|
||||
m_begin = gind_block( 1, becp%nbnd, nproc, icur_blk )
|
||||
|
||||
IF( ( m_begin + m_loc - 1 ) > m ) m_loc = m - m_begin + 1
|
||||
|
||||
IF( m_loc > 0 ) THEN
|
||||
CALL DGEMM( 'N', 'N', 2 * n, m_loc, nkb, 1.D0, vkb, &
|
||||
2 * lda, ps, nkb, 1.D0, spsi( 1, m_begin ), 2 * lda )
|
||||
END IF
|
||||
|
||||
! block rotation
|
||||
!
|
||||
CALL mp_circular_shift_left( ps, icyc, becp%comm )
|
||||
|
||||
icur_blk = icur_blk + 1
|
||||
IF( icur_blk == nproc ) icur_blk = 0
|
||||
|
||||
END DO
|
||||
!
|
||||
ELSE IF ( m == 1 ) THEN
|
||||
!
|
||||
CALL DGEMV( 'N', 2 * n, nkb, 1.D0, vkb, &
|
||||
2 * lda, ps, 1, 1.D0, spsi, 1 )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
#if defined __SCALAPACK
|
||||
!
|
||||
! parallel block multiplication of vkb and ps
|
||||
!
|
||||
icur_blk = me_bgrp
|
||||
!
|
||||
DO icyc = 0, nproc_bgrp - 1
|
||||
|
||||
m_loc = ldim_block( m , nproc_bgrp, icur_blk )
|
||||
m_begin = gind_block( 1, m, nproc_bgrp, icur_blk )
|
||||
|
||||
CALL DGEMM( 'N', 'N', 2 * n, m_loc, nkb, 1.D0, vkb, &
|
||||
2 * lda, ps, nkb, 1.D0, spsi( 1, m_begin ), 2 * lda )
|
||||
|
||||
! block rotation
|
||||
!
|
||||
CALL mp_circular_shift_left( ps, icyc, intra_bgrp_comm )
|
||||
|
||||
icur_blk = icur_blk + 1
|
||||
IF( icur_blk == nproc_bgrp ) icur_blk = 0
|
||||
|
||||
END DO
|
||||
#else
|
||||
CALL DGEMM( 'N', 'N', 2 * n, m, nkb, 1.D0, vkb, &
|
||||
2 * lda, ps, nkb, 1.D0, spsi, 2 * lda )
|
||||
#endif
|
||||
!
|
||||
END IF
|
||||
!
|
||||
|
|
Loading…
Reference in New Issue