mirror of https://gitlab.com/QEF/q-e.git
530 lines
17 KiB
Fortran
530 lines
17 KiB
Fortran
MODULE fft_helper_subroutines
|
|
|
|
IMPLICIT NONE
|
|
SAVE
|
|
|
|
INTERFACE tg_reduce_rho
|
|
MODULE PROCEDURE tg_reduce_rho_1,tg_reduce_rho_2,tg_reduce_rho_3,tg_reduce_rho_4, &
|
|
& tg_reduce_rho_5
|
|
END INTERFACE
|
|
|
|
CONTAINS
|
|
|
|
SUBROUTINE tg_reduce_rho_1( rhos, tg_rho_nc, tg_rho, ispin, noncolin, domag, desc )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
INTEGER, INTENT(IN) :: ispin
|
|
LOGICAL, INTENT(IN) :: noncolin, domag
|
|
REAL(DP), INTENT(INOUT) :: tg_rho(:)
|
|
REAL(DP), INTENT(INOUT) :: tg_rho_nc(:,:)
|
|
REAL(DP), INTENT(OUT) :: rhos(:,:)
|
|
|
|
INTEGER :: ierr, ioff, idx, ir3, ir, ipol, ioff_tg, nxyp, npol_
|
|
! write (*,*) ' enter tg_reduce_rho_1'
|
|
|
|
#if defined(__MPI)
|
|
IF( noncolin) THEN
|
|
CALL MPI_ALLREDUCE( MPI_IN_PLACE, tg_rho_nc, SIZE(tg_rho_nc), MPI_DOUBLE_PRECISION, MPI_SUM, desc%comm2, ierr )
|
|
ELSE
|
|
CALL MPI_ALLREDUCE( MPI_IN_PLACE, tg_rho, SIZE(tg_rho), MPI_DOUBLE_PRECISION, MPI_SUM, desc%comm2, ierr )
|
|
END IF
|
|
#endif
|
|
!
|
|
! copy the charge back to the proper processor location
|
|
!
|
|
nxyp = desc%nr1x * desc%my_nr2p
|
|
IF (noncolin) THEN
|
|
npol_ = 1 ; if (domag) npol_ = 4
|
|
endif
|
|
DO ir3=1,desc%my_nr3p
|
|
ioff = desc%nr1x * desc%my_nr2p * (ir3-1)
|
|
ioff_tg = desc%nr1x * desc%nr2x * (ir3-1) + desc%nr1x * desc%my_i0r2p
|
|
IF (noncolin) THEN
|
|
!$omp parallel do
|
|
DO ipol=1, npol_
|
|
DO ir = 1, nxyp
|
|
rhos(ir+ioff,ipol) = rhos(ir+ioff,ipol) + tg_rho_nc(ir+ioff_tg,ipol)
|
|
END DO
|
|
END DO
|
|
!$omp end parallel do
|
|
ELSE
|
|
!$omp parallel do
|
|
DO ir = 1, nxyp
|
|
rhos(ir+ioff,ispin) = rhos(ir+ioff,ispin) + tg_rho(ir+ioff_tg)
|
|
END DO
|
|
!$omp end parallel do
|
|
END IF
|
|
END DO
|
|
|
|
END SUBROUTINE
|
|
|
|
|
|
|
|
SUBROUTINE tg_reduce_rho_2( rhos, tmp_rhos, ispin, desc )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
INTEGER, INTENT(IN) :: ispin
|
|
REAL(DP), INTENT(INOUT) :: tmp_rhos(:)
|
|
REAL(DP), INTENT(OUT) :: rhos(:,:)
|
|
|
|
INTEGER :: ierr, ioff, idx, ir3, nxyp, ioff_tg
|
|
! write (*,*) ' enter tg_reduce_rho_2'
|
|
|
|
IF ( desc%nproc2 > 1 ) THEN
|
|
#if defined(__MPI)
|
|
CALL MPI_ALLREDUCE( MPI_IN_PLACE, tmp_rhos, SIZE(tmp_rhos), MPI_DOUBLE_PRECISION, MPI_SUM, desc%comm2, ierr )
|
|
#endif
|
|
ENDIF
|
|
!
|
|
!BRING CHARGE DENSITY BACK TO ITS ORIGINAL POSITION
|
|
!
|
|
nxyp = desc%nr1x * desc%my_nr2p
|
|
DO ir3 = 1, desc%my_nr3p
|
|
ioff = desc%nr1x * desc%my_nr2p * (ir3-1)
|
|
ioff_tg = desc%nr1x * desc%nr2x * (ir3-1) + desc%nr1x * desc%my_i0r2p
|
|
rhos(ioff+1:ioff+nxyp,ispin) = rhos(ioff+1:ioff+nxyp,ispin) + tmp_rhos(ioff_tg+1:ioff_tg+nxyp)
|
|
END DO
|
|
|
|
END SUBROUTINE
|
|
|
|
SUBROUTINE tg_reduce_rho_3( rhos, tmp_rhos, desc )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
REAL(DP), INTENT(INOUT) :: tmp_rhos(:,:)
|
|
REAL(DP), INTENT(OUT) :: rhos(:,:)
|
|
|
|
INTEGER :: ierr, from, ir3, ioff, nxyp, ioff_tg
|
|
! write (*,*) ' enter tg_reduce_rho_3'
|
|
|
|
IF ( desc%nproc2 > 1 ) THEN
|
|
#if defined(__MPI)
|
|
CALL MPI_ALLREDUCE( MPI_IN_PLACE, tmp_rhos, SIZE(tmp_rhos), MPI_DOUBLE_PRECISION, MPI_SUM, desc%comm2, ierr )
|
|
#endif
|
|
ENDIF
|
|
!
|
|
!BRING CHARGE DENSITY BACK TO ITS ORIGINAL POSITION
|
|
!
|
|
!If the current processor is not the "first" processor in its
|
|
!orbital group then does a local copy (reshuffling) of its data
|
|
!
|
|
nxyp = desc%nr1x * desc%my_nr2p
|
|
DO ir3 = 1, desc%my_nr3p
|
|
ioff = desc%nr1x * desc%my_nr2p * (ir3-1)
|
|
ioff_tg = desc%nr1x * desc%nr2x * (ir3-1) + desc%nr1x * desc%my_i0r2p
|
|
rhos(ioff+1:ioff+nxyp,:) = rhos(ioff+1:ioff+nxyp,:) + tmp_rhos(ioff_tg+1:ioff_tg+nxyp,:)
|
|
END DO
|
|
END SUBROUTINE
|
|
|
|
|
|
SUBROUTINE tg_reduce_rho_4( rhos, tmp_rhos, desc )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
COMPLEX(DP), INTENT(INOUT) :: tmp_rhos(:)
|
|
COMPLEX(DP), INTENT(OUT) :: rhos(:)
|
|
|
|
INTEGER :: ierr, from, ir3, ioff, nxyp, ioff_tg
|
|
! write (*,*) ' enter tg_reduce_rho_4'
|
|
|
|
IF ( desc%nproc2 > 1 ) THEN
|
|
#if defined(__MPI)
|
|
CALL MPI_ALLREDUCE( MPI_IN_PLACE, tmp_rhos, 2*SIZE(tmp_rhos), MPI_DOUBLE_PRECISION, MPI_SUM, desc%comm2, ierr )
|
|
#endif
|
|
ENDIF
|
|
!
|
|
!BRING CHARGE DENSITY BACK TO ITS ORIGINAL POSITION
|
|
!
|
|
!If the current processor is not the "first" processor in its
|
|
!orbital group then does a local copy (reshuffling) of its data
|
|
!
|
|
nxyp = desc%nr1x * desc%my_nr2p
|
|
DO ir3 = 1, desc%my_nr3p
|
|
ioff = desc%nr1x * desc%my_nr2p * (ir3-1)
|
|
ioff_tg = desc%nr1x * desc%nr2x * (ir3-1) + desc%nr1x * desc%my_i0r2p
|
|
rhos(ioff+1:ioff+nxyp) = rhos(ioff+1:ioff+nxyp) + tmp_rhos(ioff_tg+1:ioff_tg+nxyp)
|
|
END DO
|
|
END SUBROUTINE
|
|
|
|
|
|
SUBROUTINE tg_reduce_rho_5( rhos, tmp_rhos, desc )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
COMPLEX(DP), INTENT(INOUT) :: tmp_rhos(:,:)
|
|
COMPLEX(DP), INTENT(OUT) :: rhos(:,:)
|
|
|
|
INTEGER :: ierr, from, ir3, ioff, nxyp, ioff_tg
|
|
! write (*,*) ' enter tg_reduce_rho_5'
|
|
|
|
IF ( desc%nproc2 > 1 ) THEN
|
|
#if defined(__MPI)
|
|
CALL MPI_ALLREDUCE( MPI_IN_PLACE, tmp_rhos, 2*SIZE(tmp_rhos), MPI_DOUBLE_PRECISION, MPI_SUM, desc%comm2, ierr )
|
|
#endif
|
|
ENDIF
|
|
!
|
|
!BRING CHARGE DENSITY BACK TO ITS ORIGINAL POSITION
|
|
!
|
|
!If the current processor is not the "first" processor in its
|
|
!orbital group then does a local copy (reshuffling) of its data
|
|
!
|
|
nxyp = desc%nr1x * desc%my_nr2p
|
|
DO ir3 = 1, desc%my_nr3p
|
|
ioff = desc%nr1x * desc%my_nr2p * (ir3-1)
|
|
ioff_tg = desc%nr1x * desc%nr2x * (ir3-1) + desc%nr1x * desc%my_i0r2p
|
|
rhos(ioff+1:ioff+nxyp,:) = rhos(ioff+1:ioff+nxyp,:) + tmp_rhos(ioff_tg+1:ioff_tg+nxyp,:)
|
|
END DO
|
|
END SUBROUTINE
|
|
|
|
|
|
|
|
SUBROUTINE tg_get_nnr( desc, right_nnr )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
INTEGER, INTENT(OUT) :: right_nnr
|
|
right_nnr = desc%nnr
|
|
END SUBROUTINE
|
|
|
|
SUBROUTINE tg_get_local_nr3( desc, val )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
INTEGER, INTENT(OUT) :: val
|
|
val = desc%my_nr3p
|
|
END SUBROUTINE
|
|
|
|
SUBROUTINE tg_get_group_nr3( desc, val )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
INTEGER, INTENT(OUT) :: val
|
|
val = desc%my_nr3p
|
|
END SUBROUTINE
|
|
|
|
SUBROUTINE tg_get_recip_inc( desc, val )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
INTEGER, INTENT(OUT) :: val
|
|
val = desc%nnr
|
|
END SUBROUTINE
|
|
|
|
PURE FUNCTION fftx_ntgrp( desc )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
INTEGER :: fftx_ntgrp
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
fftx_ntgrp = desc%nproc2
|
|
END FUNCTION
|
|
|
|
PURE FUNCTION fftx_tgpe( desc )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
INTEGER :: fftx_tgpe
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
fftx_tgpe = desc%mype2
|
|
END FUNCTION
|
|
|
|
PURE FUNCTION fftx_tgcomm( desc )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
INTEGER :: fftx_tgcomm
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
fftx_tgcomm = desc%comm2
|
|
END FUNCTION
|
|
|
|
SUBROUTINE fftx_add_field( r, f, desc )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
REAL(DP), INTENT(INOUT) :: r(:,:)
|
|
REAL(DP), INTENT(IN) :: f(:)
|
|
INTEGER :: nspin, ir
|
|
nspin = SIZE( r, 2 )
|
|
IF (nspin.EQ.1) THEN
|
|
!$omp parallel do
|
|
DO ir=1,desc%nr1*desc%nr2*desc%my_nr3p
|
|
r(ir,1)=r(ir,1)+f(ir)
|
|
END DO
|
|
!$omp end parallel do
|
|
ELSE IF (nspin.EQ.2) THEN
|
|
!$omp parallel do
|
|
DO ir=1,desc%nr1*desc%nr2*desc%my_nr3p
|
|
r(ir,1)=r(ir,1)+f(ir)
|
|
END DO
|
|
!$omp end parallel do
|
|
!$omp parallel do
|
|
DO ir=1,desc%nr1*desc%nr2*desc%my_nr3p
|
|
r(ir,2)=r(ir,2)+f(ir)
|
|
END DO
|
|
!$omp end parallel do
|
|
END IF
|
|
END SUBROUTINE
|
|
|
|
|
|
SUBROUTINE c2psi_gamma( desc, psi, c, ca )
|
|
!
|
|
! Copy wave-functions from 1D array (c_bgrp) to 3D array (psi) in Fourier space
|
|
!
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
complex(DP), INTENT(OUT) :: psi(:)
|
|
complex(DP), INTENT(IN) :: c(:)
|
|
complex(DP), OPTIONAL, INTENT(IN) :: ca(:)
|
|
complex(DP), parameter :: ci=(0.0d0,1.0d0)
|
|
integer :: ig
|
|
!
|
|
psi = 0.0d0
|
|
!
|
|
! nlm and nl array: hold conversion indices form 3D to
|
|
! 1-D vectors. Columns along the z-direction are stored
|
|
! contigiously
|
|
! c array: stores the Fourier expansion coefficients
|
|
! Loop for all local g-vectors (ngw)
|
|
IF( PRESENT(ca) ) THEN
|
|
do ig = 1, desc%ngw
|
|
psi( desc%nlm( ig ) ) = CONJG( c( ig ) ) + ci * conjg( ca( ig ))
|
|
psi( desc%nl( ig ) ) = c( ig ) + ci * ca( ig )
|
|
end do
|
|
ELSE
|
|
do ig = 1, desc%ngw
|
|
psi( desc%nlm( ig ) ) = CONJG( c( ig ) )
|
|
psi( desc%nl( ig ) ) = c( ig )
|
|
end do
|
|
END IF
|
|
END SUBROUTINE
|
|
|
|
SUBROUTINE c2psi_k( desc, psi, c, igk, ngk)
|
|
!
|
|
! Copy wave-functions from 1D array (c/evc) ordered according (k+G) index igk
|
|
! to 3D array (psi) in Fourier space
|
|
!
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
complex(DP), INTENT(OUT) :: psi(:)
|
|
complex(DP), INTENT(IN) :: c(:)
|
|
INTEGER, INTENT(IN) :: igk(:), ngk
|
|
! local variables
|
|
integer :: ig
|
|
!
|
|
! nl array: hold conversion indices form 3D to 1-D vectors.
|
|
! Columns along the z-direction are stored contigiously
|
|
! c array: stores the Fourier expansion coefficients of the wave function
|
|
! Loop for all local g-vectors (npw
|
|
psi = 0.0d0
|
|
do ig = 1, ngk
|
|
psi( desc%nl( igk( ig ) ) ) = c( ig )
|
|
end do
|
|
!
|
|
END SUBROUTINE
|
|
|
|
SUBROUTINE fftx_oned2threed( desc, psi, c, ca )
|
|
!
|
|
! Copy charge density from 1D array (c) to 3D array (psi) in Fourier
|
|
! space
|
|
!
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
complex(DP), INTENT(OUT) :: psi(:)
|
|
complex(DP), INTENT(IN) :: c(:)
|
|
complex(DP), OPTIONAL, INTENT(IN) :: ca(:)
|
|
complex(DP), parameter :: ci=(0.0d0,1.0d0)
|
|
integer :: ig
|
|
!
|
|
psi = 0.0d0
|
|
!
|
|
! nlm and nl array: hold conversion indices form 3D to
|
|
! 1-D vectors. Columns along the z-direction are stored
|
|
! contigiously
|
|
! c array: stores the Fourier expansion coefficients
|
|
! Loop for all local g-vectors (ngw)
|
|
IF( PRESENT(ca) ) THEN
|
|
IF( desc%lgamma ) THEN
|
|
do ig = 1, desc%ngm
|
|
psi( desc%nlm( ig ) ) = CONJG( c( ig ) ) + ci * conjg( ca( ig ))
|
|
psi( desc%nl( ig ) ) = c( ig ) + ci * ca( ig )
|
|
end do
|
|
ELSE
|
|
do ig = 1, desc%ngm
|
|
psi( desc%nl( ig ) ) = c( ig ) + ci * ca( ig )
|
|
end do
|
|
END IF
|
|
ELSE
|
|
IF( desc%lgamma ) THEN
|
|
do ig = 1, desc%ngm
|
|
psi( desc%nlm( ig ) ) = CONJG( c( ig ) )
|
|
psi( desc%nl( ig ) ) = c( ig )
|
|
end do
|
|
ELSE
|
|
DO ig = 1, desc%ngm
|
|
psi( desc%nl( ig ) ) = c( ig )
|
|
END DO
|
|
END IF
|
|
END IF
|
|
END SUBROUTINE
|
|
|
|
SUBROUTINE fftx_add_threed2oned_gamma( desc, vin, vout1, vout2 )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
complex(DP), INTENT(INOUT) :: vout1(:)
|
|
complex(DP), OPTIONAL, INTENT(INOUT) :: vout2(:)
|
|
complex(DP), INTENT(IN) :: vin(:)
|
|
COMPLEX(DP) :: fp, fm
|
|
INTEGER :: ig
|
|
IF( PRESENT( vout2 ) ) THEN
|
|
DO ig=1,desc%ngm
|
|
fp=vin(desc%nl(ig))+vin(desc%nlm(ig))
|
|
fm=vin(desc%nl(ig))-vin(desc%nlm(ig))
|
|
vout1(ig) = vout1(ig) + 0.5d0*CMPLX( DBLE(fp),AIMAG(fm),kind=DP)
|
|
vout2(ig) = vout2(ig) + 0.5d0*CMPLX(AIMAG(fp),-DBLE(fm),kind=DP)
|
|
END DO
|
|
ELSE
|
|
DO ig=1,desc%ngm
|
|
vout1(ig) = vout1(ig) + vin(desc%nl(ig))
|
|
END DO
|
|
END IF
|
|
END SUBROUTINE
|
|
|
|
SUBROUTINE fftx_threed2oned( desc, vin, vout1, vout2 )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
complex(DP), INTENT(OUT) :: vout1(:)
|
|
complex(DP), OPTIONAL, INTENT(OUT) :: vout2(:)
|
|
complex(DP), INTENT(IN) :: vin(:)
|
|
COMPLEX(DP) :: fp, fm
|
|
INTEGER :: ig
|
|
IF( PRESENT( vout2 ) ) THEN
|
|
DO ig=1,desc%ngm
|
|
fp=vin(desc%nl(ig))+vin(desc%nlm(ig))
|
|
fm=vin(desc%nl(ig))-vin(desc%nlm(ig))
|
|
vout1(ig) = 0.5d0*CMPLX( DBLE(fp),AIMAG(fm),kind=DP)
|
|
vout2(ig) = 0.5d0*CMPLX(AIMAG(fp),-DBLE(fm),kind=DP)
|
|
END DO
|
|
ELSE
|
|
DO ig=1,desc%ngm
|
|
vout1(ig) = vin(desc%nl(ig))
|
|
END DO
|
|
END IF
|
|
END SUBROUTINE
|
|
|
|
|
|
SUBROUTINE fftx_psi2c_gamma( desc, vin, vout1, vout2 )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
complex(DP), INTENT(OUT) :: vout1(:)
|
|
complex(DP), OPTIONAL, INTENT(OUT) :: vout2(:)
|
|
complex(DP), INTENT(IN) :: vin(:)
|
|
COMPLEX(DP) :: fp, fm
|
|
INTEGER :: ig
|
|
IF( PRESENT( vout2 ) ) THEN
|
|
DO ig=1,desc%ngw
|
|
fp=vin(desc%nl(ig))+vin(desc%nlm(ig))
|
|
fm=vin(desc%nl(ig))-vin(desc%nlm(ig))
|
|
vout1(ig) = CMPLX( DBLE(fp),AIMAG(fm),kind=DP)
|
|
vout2(ig) = CMPLX(AIMAG(fp),-DBLE(fm),kind=DP)
|
|
END DO
|
|
ELSE
|
|
DO ig=1,desc%ngw
|
|
vout1(ig) = vin(desc%nl(ig))
|
|
END DO
|
|
END IF
|
|
END SUBROUTINE
|
|
|
|
|
|
SUBROUTINE c2psi_gamma_tg(desc, psis, c_bgrp, i, nbsp_bgrp )
|
|
!
|
|
! Copy all wave-functions of an orbital group
|
|
! from 1D array (c_bgrp) to 3D array (psi) in Fourier space
|
|
!
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
complex(DP), INTENT(OUT) :: psis(:)
|
|
complex(DP), INTENT(INOUT) :: c_bgrp(:,:)
|
|
INTEGER, INTENT(IN) :: i, nbsp_bgrp
|
|
INTEGER :: eig_offset, eig_index, right_nnr
|
|
!
|
|
! the i-th column of c_bgrp corresponds to the i-th state (in this band group)
|
|
!
|
|
! The outer loop goes through i : i + 2*NOGRP to cover
|
|
! 2*NOGRP eigenstates at each iteration
|
|
!
|
|
CALL tg_get_nnr( desc, right_nnr )
|
|
|
|
!$omp parallel
|
|
!$omp single
|
|
|
|
do eig_index = 1, 2 * fftx_ntgrp(desc), 2
|
|
|
|
!$omp task default(none) &
|
|
!$omp firstprivate( eig_index, i, nbsp_bgrp, right_nnr ) &
|
|
!$omp private( eig_offset ) &
|
|
!$omp shared( c_bgrp, desc, psis )
|
|
!
|
|
! here we pack 2*nogrp electronic states in the psis array
|
|
! note that if nogrp == nproc_bgrp each proc perform a full 3D
|
|
! fft and the scatter phase is local (without communication)
|
|
!
|
|
! important: if n is odd => c(*,n+1)=0.
|
|
!
|
|
IF ( ( eig_index + i - 1 ) == nbsp_bgrp ) c_bgrp( : , eig_index + i ) = 0.0d0
|
|
!
|
|
eig_offset = ( eig_index - 1 )/2
|
|
!
|
|
IF ( ( i + eig_index - 1 ) <= nbsp_bgrp ) THEN
|
|
!
|
|
! The eig_index loop is executed only ONCE when NOGRP=1.
|
|
!
|
|
CALL c2psi_gamma( desc, psis( eig_offset * right_nnr + 1 : eig_offset * right_nnr + right_nnr ), &
|
|
c_bgrp( :, i+eig_index-1 ), c_bgrp( :, i+eig_index ) )
|
|
!
|
|
ENDIF
|
|
!$omp end task
|
|
!
|
|
end do
|
|
!$omp end single
|
|
!$omp end parallel
|
|
RETURN
|
|
END SUBROUTINE
|
|
|
|
|
|
SUBROUTINE fft_dist_info( desc, unit )
|
|
USE fft_param
|
|
USE fft_types, ONLY : fft_type_descriptor
|
|
INTEGER, INTENT(IN) :: unit
|
|
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
|
INTEGER :: i, j, nr3l
|
|
CALL tg_get_local_nr3( desc, nr3l )
|
|
WRITE( stdout,1000) desc%nr1, desc%nr2, desc%nr3, &
|
|
desc%nr1, desc%my_nr2p, desc%my_nr3p, &
|
|
1, desc%nproc2, desc%nproc3
|
|
WRITE( stdout,1010) desc%nr1x, desc%nr2x, desc%nr3x
|
|
WRITE( stdout,1020) desc%nnr
|
|
WRITE( stdout,*) ' Number of x-y planes for each processors: '
|
|
WRITE( stdout, fmt = '( 5(" |",I4,",",I4) )' ) ( ( desc%nr2p(j), &
|
|
desc%nr3p(i), i = 1, desc%nproc3 ), j=1,desc%nproc2 )
|
|
1000 FORMAT(3X, &
|
|
'Global Dimensions Local Dimensions Processor Grid',/,3X, &
|
|
'.X. .Y. .Z. .X. .Y. .Z. .X. .Y. .Z.',/, &
|
|
3(1X,I5),2X,3(1X,I5),2X,3(1X,I5) )
|
|
1010 FORMAT(3X, 'Array leading dimensions ( nr1x, nr2x, nr3x ) = ', 3(1X,I5))
|
|
1020 FORMAT(3X, 'Local number of cell to store the grid ( nrxx ) = ', 1X, I9 )
|
|
RETURN
|
|
END SUBROUTINE
|
|
|
|
END MODULE fft_helper_subroutines
|