becp and related quantities in add_vuspsi and s_psi

have been completely distributed across processors,
for gamma point calculations with scalapack.
This implementation save memory and add some communications.
The distribution of becp is performed whenever 
allocate_bec_type is called with a communicator as an additional argument.
add_vuspsi, calbec and s_psi check for the presence of the
communicator within bec type, and if it is present use
a distributed algorithm to compute related quantities.
Exactly the same strategy could be applied to the k-point
and non-collinear case




git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@8712 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
ccavazzoni 2012-02-22 22:34:05 +00:00
parent 4a60554d57
commit d0eddbe99c
5 changed files with 171 additions and 57 deletions

View File

@ -69,13 +69,33 @@ SUBROUTINE add_vuspsi( lda, n, m, hpsi )
SUBROUTINE add_vuspsi_gamma()
!-----------------------------------------------------------------------
!
USE mp, ONLY: mp_size, mp_rank, mp_get_comm_null, mp_circular_shift_left
!
IMPLICIT NONE
INTEGER, EXTERNAL :: ldim_block, lind_block, gind_block
REAL(DP), ALLOCATABLE :: ps (:,:)
INTEGER :: ierr
INTEGER :: nproc, mype, m_loc, m_begin, ibnd_loc, icyc, icur_blk, m_max
!
IF ( nkb == 0 ) RETURN
!
ALLOCATE (ps (nkb,m), STAT=ierr )
m_loc = m
m_begin = 1
m_max = m
nproc = 1
mype = 0
!
IF( becp%comm /= mp_get_comm_null() ) THEN
nproc = mp_size( becp%comm )
mype = mp_rank( becp%comm )
m_loc = ldim_block( becp%nbnd , nproc, mype )
m_begin = gind_block( 1, becp%nbnd, nproc, mype )
m_max = becp%nbnd / nproc
IF( MOD( becp%nbnd, nproc ) /= 0 ) m_max = m_max + 1
IF( ( m_begin + m_loc - 1 ) > m ) m_loc = m - m_begin + 1
END IF
!
ALLOCATE (ps (nkb,m_max), STAT=ierr )
IF( ierr /= 0 ) &
CALL errore( ' add_vuspsi_gamma ', ' cannot allocate ps ', ABS( ierr ) )
!
@ -89,7 +109,7 @@ SUBROUTINE add_vuspsi( lda, n, m, hpsi )
!
IF ( ityp(na) == nt ) THEN
!
DO ibnd = 1, m
DO ibnd = 1, m_loc
!
DO jh = 1, nh(nt)
!
@ -116,8 +136,36 @@ SUBROUTINE add_vuspsi( lda, n, m, hpsi )
!
END DO
!
CALL DGEMM( 'N', 'N', ( 2 * n ), m, nkb, 1.D0, vkb, &
IF( becp%comm /= mp_get_comm_null() ) THEN
!
! parallel block multiplication of vkb and ps
!
icur_blk = mype
!
DO icyc = 0, nproc - 1
m_loc = ldim_block( becp%nbnd , nproc, icur_blk )
m_begin = gind_block( 1, becp%nbnd, nproc, icur_blk )
IF( ( m_begin + m_loc - 1 ) > m ) m_loc = m - m_begin + 1
IF( m_loc > 0 ) THEN
CALL DGEMM( 'N', 'N', ( 2 * n ), m_loc, nkb, 1.D0, vkb, &
( 2 * lda ), ps, nkb, 1.D0, hpsi( 1, m_begin ), ( 2 * lda ) )
END IF
! block rotation
!
CALL mp_circular_shift_left( ps, icyc, becp%comm )
icur_blk = icur_blk + 1
IF( icur_blk == nproc ) icur_blk = 0
END DO
ELSE
CALL DGEMM( 'N', 'N', ( 2 * n ), m, nkb, 1.D0, vkb, &
( 2 * lda ), ps, nkb, 1.D0, hpsi, ( 2 * lda ) )
END IF
!
DEALLOCATE (ps)
!

View File

@ -26,12 +26,16 @@ MODULE becmod
REAL(DP), POINTER :: r(:,:) ! appropriate for gammaonly
COMPLEX(DP),POINTER :: k(:,:) ! appropriate for generic k
COMPLEX(DP),POINTER :: nc(:,:,:) ! appropriate for noncolin
INTEGER :: comm
INTEGER :: nbnd
END TYPE bec_type
#else
TYPE bec_type
REAL(DP), ALLOCATABLE :: r(:,:) ! appropriate for gammaonly
COMPLEX(DP),ALLOCATABLE :: k(:,:) ! appropriate for generic k
COMPLEX(DP),ALLOCATABLE :: nc(:,:,:) ! appropriate for noncolin
INTEGER :: comm
INTEGER :: nbnd
END TYPE bec_type
#endif
!
@ -65,6 +69,8 @@ CONTAINS
SUBROUTINE calbec_bec_type ( npw, beta, psi, betapsi, nbnd )
!-----------------------------------------------------------------------
!
USE mp, ONLY: mp_size, mp_rank, mp_get_comm_null
!
IMPLICIT NONE
COMPLEX (DP), INTENT (in) :: beta(:,:), psi(:,:)
TYPE (bec_type), INTENT (inout) :: betapsi ! NB: must be INOUT otherwise
@ -73,6 +79,10 @@ CONTAINS
INTEGER, OPTIONAL :: nbnd
!
INTEGER :: local_nbnd
INTEGER, EXTERNAL :: ldim_block, lind_block, gind_block
INTEGER :: nproc, mype, m_loc, m_begin, m_max, ip
INTEGER :: ibnd, ibnd_loc
REAL(DP), ALLOCATABLE :: dtmp(:,:)
!
IF ( present (nbnd) ) THEN
local_nbnd = nbnd
@ -81,7 +91,38 @@ CONTAINS
ENDIF
IF ( gamma_only ) THEN
CALL calbec_gamma ( npw, beta, psi, betapsi%r, local_nbnd )
!
IF( betapsi%comm == mp_get_comm_null() ) THEN
!
CALL calbec_gamma ( npw, beta, psi, betapsi%r, local_nbnd )
!
ELSE
nproc = mp_size( betapsi%comm )
mype = mp_rank( betapsi%comm )
m_max = betapsi%nbnd / nproc
IF( MOD( betapsi%nbnd, nproc ) /= 0 ) m_max = m_max + 1
m_loc = ldim_block( betapsi%nbnd , nproc, mype )
m_begin = gind_block( 1, betapsi%nbnd, nproc, mype )
IF( ( m_begin + m_loc - 1 ) > local_nbnd ) m_loc = local_nbnd - m_begin + 1
!
ALLOCATE( dtmp( SIZE( betapsi%r, 1 ), m_max ) )
!
DO ip = 0, nproc - 1
m_loc = ldim_block( betapsi%nbnd , nproc, ip )
m_begin = gind_block( 1, betapsi%nbnd, nproc, ip )
IF( ( m_begin + m_loc - 1 ) > local_nbnd ) m_loc = local_nbnd - m_begin + 1
IF( m_loc > 0 ) THEN
CALL calbec_gamma ( npw, beta, psi(:,m_begin:m_begin+m_loc-1), dtmp, m_loc )
IF( ip == mype ) THEN
betapsi%r(:,1:m_loc) = dtmp(:,1:m_loc)
END IF
END IF
END DO
DEALLOCATE( dtmp )
!
END IF
!
ELSEIF ( noncolin) THEN
!
@ -266,21 +307,37 @@ CONTAINS
END SUBROUTINE calbec_nc
!
!-----------------------------------------------------------------------
SUBROUTINE allocate_bec_type ( nkb, nbnd, bec )
SUBROUTINE allocate_bec_type ( nkb, nbnd, bec, comm )
!-----------------------------------------------------------------------
USE :: mp, ONLY: mp_size, mp_get_comm_null
IMPLICIT NONE
TYPE (bec_type) :: bec
INTEGER, INTENT (in) :: nkb, nbnd
INTEGER :: ierr
INTEGER, INTENT (in), OPTIONAL :: comm
INTEGER :: ierr, nbnd_siz, nproc
!
#ifdef __STD_F95
NULLIFY(bec%r)
NULLIFY(bec%nc)
NULLIFY(bec%k)
#endif
!
nbnd_siz = nbnd
bec%comm = mp_get_comm_null()
bec%nbnd = nbnd
!
IF( PRESENT( comm ) ) THEN
bec%comm = comm
nproc = mp_size( comm )
IF( nproc > 1 ) THEN
nbnd_siz = nbnd / nproc
IF( MOD( nbnd, nproc ) /= 0 ) nbnd_siz = nbnd_siz + 1
END IF
END IF
!
IF ( gamma_only ) THEN
!
ALLOCATE( bec%r( nkb, nbnd ), STAT=ierr )
ALLOCATE( bec%r( nkb, nbnd_siz ), STAT=ierr )
IF( ierr /= 0 ) &
CALL errore( ' allocate_bec_type ', ' cannot allocate bec%r ', ABS(ierr) )
!
@ -288,7 +345,7 @@ CONTAINS
!
ELSEIF ( noncolin) THEN
!
ALLOCATE( bec%nc( nkb, npol, nbnd ), STAT=ierr )
ALLOCATE( bec%nc( nkb, npol, nbnd_siz ), STAT=ierr )
IF( ierr /= 0 ) &
CALL errore( ' allocate_bec_type ', ' cannot allocate bec%nc ', ABS(ierr) )
!
@ -296,7 +353,7 @@ CONTAINS
!
ELSE
!
ALLOCATE( bec%k( nkb, nbnd ), STAT=ierr )
ALLOCATE( bec%k( nkb, nbnd_siz ), STAT=ierr )
IF( ierr /= 0 ) &
CALL errore( ' allocate_bec_type ', ' cannot allocate bec%k ', ABS(ierr) )
!
@ -312,9 +369,13 @@ CONTAINS
SUBROUTINE deallocate_bec_type (bec)
!-----------------------------------------------------------------------
!
USE :: mp, ONLY: mp_get_comm_null
IMPLICIT NONE
TYPE (bec_type) :: bec
!
bec%comm = mp_get_comm_null()
bec%nbnd = 0
!
#ifdef __STD_F95
IF (associated(bec%r)) DEALLOCATE(bec%r)
IF (associated(bec%nc)) DEALLOCATE(bec%nc)

View File

@ -186,7 +186,7 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
USE becmod, ONLY : bec_type, becp, calbec, &
allocate_bec_type, deallocate_bec_type
USE klist, ONLY : nks
USE mp_global, ONLY : nproc_bgrp
USE mp_global, ONLY : nproc_bgrp, intra_bgrp_comm
!
IMPLICIT NONE
!
@ -218,7 +218,11 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
IF ( nbndx > npwx*nproc_bgrp ) &
CALL errore ( 'diag_bands', 'too many bands, or too few plane waves',1)
!
#ifdef __SCALAPACK
CALL allocate_bec_type ( nkb, nbnd, becp, intra_bgrp_comm )
#else
CALL allocate_bec_type ( nkb, nbnd, becp )
#endif
!
IF ( gamma_only ) THEN
!

View File

@ -570,7 +570,11 @@ SUBROUTINE pregterg( npw, npwx, nvec, nvecx, evc, ethr, &
IF( ierr /= 0 ) &
CALL errore( 'pregterg ',' cannot allocate hpsi ', ABS(ierr) )
!
IF ( uspp ) ALLOCATE( spsi( npwx, nvecx ) )
IF ( uspp ) THEN
ALLOCATE( spsi( npwx, nvecx ), STAT=ierr )
IF( ierr /= 0 ) &
CALL errore( 'pregterg ',' cannot allocate spsi ', ABS(ierr) )
END IF
!
! ... Initialize the matrix descriptor
!

View File

@ -94,10 +94,7 @@ SUBROUTINE s_psi( lda, n, m, psi, spsi )
! ... gamma version
!
USE becmod, ONLY : bec_type, becp
#if defined __SCALAPACK
USE mp_global, ONLY: nproc_bgrp, intra_bgrp_comm, me_bgrp
USE mp, ONLY: mp_circular_shift_left
#endif
USE mp, ONLY: mp_size, mp_rank, mp_get_comm_null, mp_circular_shift_left
!
IMPLICIT NONE
!
@ -105,31 +102,28 @@ SUBROUTINE s_psi( lda, n, m, psi, spsi )
!
INTEGER :: ikb, jkb, ih, jh, na, nt, ijkb0, ibnd, ierr
! counters
INTEGER :: m_loc, m_begin, ibnd_loc, icyc, icur_blk, m_max
INTEGER :: nproc, mype, m_loc, m_begin, ibnd_loc, icyc, icur_blk, m_max
! data distribution indexes
#if defined __SCALAPACK
INTEGER, EXTERNAL :: ldim_block, lind_block, gind_block
! data distribution functions
#endif
REAL(DP), ALLOCATABLE :: ps(:,:)
! the product vkb and psi
!
#if defined __SCALAPACK
IF( m == 1 ) THEN
m_loc = 1
m_begin = 1
m_max = 1
ELSE
m_loc = ldim_block( m , nproc_bgrp, me_bgrp )
m_begin = gind_block( 1, m, nproc_bgrp, me_bgrp )
m_max = m / nproc_bgrp
IF( MOD( m, nproc_bgrp ) /= 0 ) m_max = m_max + 1
END IF
#else
m_loc = m
m_begin = 1
m_max = m
#endif
nproc = 1
mype = 0
!
IF( becp%comm /= mp_get_comm_null() ) THEN
nproc = mp_size( becp%comm )
mype = mp_rank( becp%comm )
m_loc = ldim_block( becp%nbnd , nproc, mype )
m_begin = gind_block( 1, becp%nbnd, nproc, mype )
m_max = becp%nbnd / nproc
IF( MOD( becp%nbnd, nproc ) /= 0 ) m_max = m_max + 1
IF( ( m_begin + m_loc - 1 ) > m ) m_loc = m - m_begin + 1
END IF
!
ALLOCATE( ps( nkb, m_max ), STAT=ierr )
IF( ierr /= 0 ) &
@ -149,7 +143,7 @@ SUBROUTINE s_psi( lda, n, m, psi, spsi )
DO ih = 1, nh(nt)
ikb = ijkb0 + ih
ps(ikb,ibnd_loc) = ps(ikb,ibnd_loc) + &
qq(ih,jh,nt) * becp%r(jkb,ibnd)
qq(ih,jh,nt) * becp%r(jkb,ibnd_loc)
END DO
END DO
END DO
@ -163,39 +157,42 @@ SUBROUTINE s_psi( lda, n, m, psi, spsi )
END IF
END DO
!
IF ( m == 1 ) THEN
IF( becp%comm /= mp_get_comm_null() ) THEN
!
! parallel block multiplication of vkb and ps
!
icur_blk = mype
!
DO icyc = 0, nproc - 1
m_loc = ldim_block( becp%nbnd , nproc, icur_blk )
m_begin = gind_block( 1, becp%nbnd, nproc, icur_blk )
IF( ( m_begin + m_loc - 1 ) > m ) m_loc = m - m_begin + 1
IF( m_loc > 0 ) THEN
CALL DGEMM( 'N', 'N', 2 * n, m_loc, nkb, 1.D0, vkb, &
2 * lda, ps, nkb, 1.D0, spsi( 1, m_begin ), 2 * lda )
END IF
! block rotation
!
CALL mp_circular_shift_left( ps, icyc, becp%comm )
icur_blk = icur_blk + 1
IF( icur_blk == nproc ) icur_blk = 0
END DO
!
ELSE IF ( m == 1 ) THEN
!
CALL DGEMV( 'N', 2 * n, nkb, 1.D0, vkb, &
2 * lda, ps, 1, 1.D0, spsi, 1 )
!
ELSE
!
#if defined __SCALAPACK
!
! parallel block multiplication of vkb and ps
!
icur_blk = me_bgrp
!
DO icyc = 0, nproc_bgrp - 1
m_loc = ldim_block( m , nproc_bgrp, icur_blk )
m_begin = gind_block( 1, m, nproc_bgrp, icur_blk )
CALL DGEMM( 'N', 'N', 2 * n, m_loc, nkb, 1.D0, vkb, &
2 * lda, ps, nkb, 1.D0, spsi( 1, m_begin ), 2 * lda )
! block rotation
!
CALL mp_circular_shift_left( ps, icyc, intra_bgrp_comm )
icur_blk = icur_blk + 1
IF( icur_blk == nproc_bgrp ) icur_blk = 0
END DO
#else
CALL DGEMM( 'N', 'N', 2 * n, m, nkb, 1.D0, vkb, &
2 * lda, ps, nkb, 1.D0, spsi, 2 * lda )
#endif
!
END IF
!