! ! Copyright (C) 2011 Quantum ESPRESSO group ! This file is distributed under the terms of the ! GNU General Public License. See the file `License' ! in the root directory of the present distribution, ! or http://www.gnu.org/copyleft/gpl.txt . ! !-------------------------------------------------------------------- ! Module containing routines for fft with a custom energy cutoff !-------------------------------------------------------------------- ! MODULE fft_custom USE kinds, ONLY: DP USE parallel_include USE fft_types, ONLY: fft_dlay_descriptor IMPLICIT NONE TYPE fft_cus ! ... data structure containing all information ! ... about fft data distribution for a given ! ... potential grid, and its wave functions sub-grid. TYPE ( fft_dlay_descriptor ) :: dfftt ! descriptor for the custom grid REAL(kind=DP) :: ecutt ! Custom cutoff (rydberg) REAL(kind=DP) :: dual_t ! Dual factor REAL(kind=DP) :: gcutmt INTEGER :: nr1t,nr2t,nr3t INTEGER :: nrx1t,nrx2t,nrx3t INTEGER :: nrxxt INTEGER :: ngmt,ngmt_l,ngmt_g INTEGER, DIMENSION(:), POINTER :: nlt,nltm REAL(kind=DP), DIMENSION(:), POINTER :: ggt REAL(kind=DP), DIMENSION(:,:),POINTER :: gt INTEGER, DIMENSION(:), POINTER :: ig_l2gt INTEGER :: gstart_t INTEGER, DIMENSION(:), POINTER :: ig1t,ig2t,ig3t INTEGER :: nlgt INTEGER :: npwt,npwxt LOGICAL :: initialized = .FALSE. END TYPE fft_cus !-------------------------------------------------------------------- CONTAINS !=----------------------------------------------------------------------------=! SUBROUTINE gvec_init( fc, ngm_, comm ) ! ! Set local and global dimensions, allocate arrays ! USE mp, ONLY: mp_max, mp_sum IMPLICIT NONE INTEGER, INTENT(IN) :: ngm_ INTEGER, INTENT(IN) :: comm ! communicator of the group on which g-vecs are distributed TYPE(fft_cus), INTENT(INOUT) :: fc ! fc%ngmt = ngm_ ! ! calculate maximum over all processors ! fc%ngmt_l = ngm_ CALL mp_max( fc%ngmt_l, comm ) ! ! calculate sum over all processors ! fc%ngmt_g = ngm_ CALL mp_sum( fc%ngmt_g, comm ) ! ! allocate arrays - only those that are always kept until the end ! ALLOCATE( fc%ggt(fc%ngmt) ) ALLOCATE( fc%gt (3, fc%ngmt) ) ! ALLOCATE( mill(3, fc%ngmt) ) ALLOCATE( fc%nlt (fc%ngmt) ) ALLOCATE( fc%nltm(fc%ngmt) ) ALLOCATE( fc%ig_l2gt(fc%ngmt) ) ! ALLOCATE( igtongl(fc%ngmt) ) ! RETURN ! END SUBROUTINE gvec_init ! !-------------------------------------------------------------------- SUBROUTINE set_custom_grid(fc) !----------------------------------------------------------------------- ! This routine computes the dimensions of the minimum FFT grid ! compatible with the input cut-off ! ! NB: The values of nr1, nr2, nr3 are computed only if they are not ! given as input parameters. Input values are kept otherwise. ! USE cell_base, ONLY : at, tpiba2 USE fft_scalar, ONLY : allowed IMPLICIT NONE TYPE(fft_cus) :: fc INTEGER, PARAMETER :: nmax = 5000 ! an unreasonably big number for a FFT grid ! ! the values of nr1, nr2, nr3 are computed only if they are not given ! as input parameters ! fc%nr1t=0 fc%nr2t=0 fc%nr3t=0 IF (fc%nr1t == 0) THEN ! ! estimate nr1 and check if it is an allowed value for FFT ! fc%nr1t = INT(2 * SQRT(fc%gcutmt) * SQRT(at(1, 1)**2 + & &at(2, 1)**2 + at(3, 1)**2) ) + 1 10 CONTINUE IF (fc%nr1t > nmax) & CALL errore ('set_custom_grid', 'nr1 is unreasonably large', fc%nr1t) IF (allowed (fc%nr1t) ) GOTO 15 fc%nr1t = fc%nr1t + 1 GOTO 10 ELSE IF (.NOT.allowed (fc%nr1t) ) CALL errore ('set_custom_grid', & 'input nr1t value not allowed', 1) ENDIF 15 CONTINUE ! IF (fc%nr2t == 0) THEN ! ! estimate nr1 and check if it is an allowed value for FFT ! fc%nr2t = INT(2 * SQRT(fc%gcutmt) * SQRT(at(1, 2)**2 + & &at(2, 2)**2 + at(3, 2)**2) ) + 1 20 CONTINUE IF (fc%nr2t > nmax) & CALL errore ('set_custom_grid', 'nr2t is unreasonably large', fc%nr2t) IF (allowed (fc%nr2t) ) GOTO 25 fc%nr2t = fc%nr2t + 1 GOTO 20 ELSE IF (.NOT.allowed (fc%nr2t) ) CALL errore ('set_fft_dim', & 'input nr2t value not allowed', 2) ENDIF 25 CONTINUE ! IF (fc%nr3t == 0) THEN ! ! estimate nr3 and check if it is an allowed value for FFT ! fc%nr3t = INT(2 * SQRT(fc%gcutmt) * SQRT(at(1, 3) **2 + & &at(2, 3)**2 + at(3, 3) **2) ) + 1 30 CONTINUE IF (fc%nr3t > nmax) & CALL errore ('set_custom_grid', 'nr3 is unreasonably large', fc%nr3t) IF (allowed (fc%nr3t) ) GOTO 35 fc%nr3t = fc%nr3t + 1 GOTO 30 ELSE IF (.NOT.allowed (fc%nr3t) ) CALL errore ('set_custom_grid', & 'input nr3t value not allowed', 3) ENDIF 35 CONTINUE ! ! here we compute nr3s if it is not in input ! RETURN END SUBROUTINE set_custom_grid ! !-------------------------------------------------------------------- SUBROUTINE ggent(fc) !-------------------------------------------------------------------- ! USE kinds, ONLY : DP USE cell_base, ONLY : at, bg, tpiba2 USE control_flags, ONLY : gamma_only USE constants, ONLY : eps8 IMPLICIT NONE TYPE(fft_cus) :: fc ! REAL(DP) :: t (3), tt, swap ! INTEGER :: ngmx, n1, n2, n3, n1s, n2s, n3s ! REAL(DP), ALLOCATABLE :: g2sort_g(:) ! array containing all g vectors, on all processors: replicated data INTEGER, ALLOCATABLE :: mill_g(:,:), mill_unsorted(:,:) ! array containing all g vectors generators, on all processors: ! replicated data INTEGER, ALLOCATABLE :: igsrt(:) ! #ifdef __MPI INTEGER :: m1, m2, mc ! #endif INTEGER :: i, j, k, ipol, ng, igl, iswap, indsw, ni, nj, nk ! ALLOCATE( fc%gt(3,fc%ngmt), fc%ggt(fc%ngmt) ) ! ALLOCATE( fc%ig_l2gt( fc%ngmt_l ) ) ALLOCATE( mill_g( 3, fc%ngmt_g ), mill_unsorted( 3, fc%ngmt_g ) ) ALLOCATE( igsrt( fc%ngmt_g ) ) ALLOCATE( g2sort_g( fc%ngmt_g ) ) ALLOCATE( fc%ig1t(fc%ngmt), fc%ig2t(fc%ngmt), fc%ig3t(fc%ngmt) ) g2sort_g(:) = 1.0d20 ! ! save present value of ngm in ngmx variable ! ngmx = fc%ngmt ! fc%ngmt = 0 ! ! max miller indices (same convention as in module stick_set) ! ni = (fc%dfftt%nr1-1)/2 nj = (fc%dfftt%nr2-1)/2 nk = (fc%dfftt%nr3-1)/2 ! iloop: DO i = -ni, ni ! ! gamma-only: exclude space with x < 0 ! IF ( gamma_only .AND. i < 0) CYCLE iloop jloop: DO j = -nj, nj ! ! gamma-only: exclude plane with x = 0, y < 0 ! IF ( gamma_only .AND. i == 0 .AND. j < 0) CYCLE jloop kloop: DO k = -nk, nk ! ! gamma-only: exclude line with x = 0, y = 0, z < 0 ! IF ( gamma_only .AND. i == 0 .AND. j == 0 .AND. k < 0) CYCLE kloop t(:) = i * bg (:,1) + j * bg (:,2) + k * bg (:,3) tt = SUM(t(:)**2) IF (tt <= fc%gcutmt) THEN fc%ngmt = fc%ngmt + 1 IF (fc%ngmt > fc%ngmt_g) CALL errore ('ggent', 'too many g-vectors', fc%ngmt) mill_unsorted( :, fc%ngmt ) = (/ i,j,k /) IF ( tt > eps8 ) THEN g2sort_g(fc%ngmt) = tt ELSE g2sort_g(fc%ngmt) = 0.d0 ENDIF ENDIF ENDDO kloop ENDDO jloop ENDDO iloop IF (fc%ngmt /= fc%ngmt_g ) & CALL errore ('ggent', 'g-vectors missing !', ABS(fc%ngmt - fc%ngmt_g)) igsrt(1) = 0 CALL hpsort_eps( fc%ngmt_g, g2sort_g, igsrt, eps8 ) mill_g(1,:) = mill_unsorted(1,igsrt(:)) mill_g(2,:) = mill_unsorted(2,igsrt(:)) mill_g(3,:) = mill_unsorted(3,igsrt(:)) DEALLOCATE( g2sort_g, igsrt, mill_unsorted ) fc%ngmt = 0 ngloop: DO ng = 1, fc%ngmt_g i = mill_g(1, ng) j = mill_g(2, ng) k = mill_g(3, ng) #ifdef __MPI m1 = MOD (i, fc%dfftt%nr1) + 1 IF (m1 < 1) m1 = m1 + fc%dfftt%nr1 m2 = MOD (j, fc%dfftt%nr2) + 1 IF (m2 < 1) m2 = m2 + fc%dfftt%nr2 mc = m1 + (m2 - 1) * fc%dfftt%nr1x IF ( fc%dfftt%isind ( mc ) == 0) CYCLE ngloop #endif fc%ngmt = fc%ngmt + 1 ! Here map local and global g index !!! ! N.B. the global G vectors arrangement depends on the number of processors ! fc%ig_l2gt( fc%ngmt ) = ng fc%gt (1:3, fc%ngmt) = i * bg (:, 1) + j * bg (:, 2) + k * bg (:, 3) fc%ggt (fc%ngmt) = SUM(fc%gt (1:3, fc%ngmt)**2) IF (fc%ngmt > ngmx) CALL errore ('ggent', 'too many g-vectors', fc%ngmt) ENDDO ngloop IF (fc%ngmt /= ngmx) & CALL errore ('ggent', 'g-vectors missing !', ABS(fc%ngmt - ngmx)) ! ! determine first nonzero g vector ! IF (fc%ggt(1).LE.eps8) THEN fc%gstart_t=2 ELSE fc%gstart_t=1 ENDIF ! ! Now set nl and nls with the correct fft correspondence ! DO ng = 1, fc%ngmt n1 = NINT (SUM(fc%gt (:, ng) * at (:, 1))) + 1 fc%ig1t (ng) = n1 - 1 IF (n1<1) n1 = n1 + fc%dfftt%nr1 n2 = NINT (SUM(fc%gt (:, ng) * at (:, 2))) + 1 fc%ig2t (ng) = n2 - 1 IF (n2<1) n2 = n2 + fc%dfftt%nr2 n3 = NINT (SUM(fc%gt (:, ng) * at (:, 3))) + 1 fc%ig3t (ng) = n3 - 1 IF (n3<1) n3 = n3 + fc%dfftt%nr3 IF (n1>fc%dfftt%nr1 .OR. n2>fc%dfftt%nr2 .OR. n3>fc%dfftt%nr3) & CALL errore('ggent','Mesh too small?',ng) #if defined (__MPI) && !defined (__USE_3D_FFT) fc%nlt (ng) = n3 + ( fc%dfftt%isind (n1 + (n2 - 1) * fc%dfftt%nr1x)& & - 1) * fc%dfftt%nr3x #else fc%nlt (ng) = n1 + (n2 - 1) * fc%dfftt%nr1x + (n3 - 1) * & & fc%dfftt%nr1x * fc%dfftt%nr2x #endif ENDDO ! DEALLOCATE( mill_g ) ! ! calculate number of G shells: ngl IF ( gamma_only) CALL index_minusg_custom(fc) !set npwt,npwxt !This should eventually be calculated somewhere else with !n_plane_waves() but it is good enough for gamma_only IF(gamma_only) THEN fc%npwt=0 fc%npwxt=0 DO ng = 1, fc%ngmt tt = (fc%gt (1, ng) ) **2 + (fc%gt (2, ng) ) **2 + (fc%gt& & (3, ng) ) **2 IF (tt <= fc%ecutt / tpiba2) THEN ! ! here if |k+G|^2 <= Ecut increase the number of G ! inside the sphere ! fc%npwt = fc%npwt + 1 ENDIF ENDDO fc%npwxt=fc%npwt ENDIF ! IF( ALLOCATED( ngmpe ) ) DEALLOCATE( ngmpe ) RETURN ! END SUBROUTINE ggent !----------------------------------------------------------------------- SUBROUTINE index_minusg_custom(fc) !---------------------------------------------------------------------- ! ! compute indices nlm and nlms giving the correspondence ! between the fft mesh points and -G (for gamma-only calculations) ! ! IMPLICIT NONE ! TYPE(fft_cus), INTENT(INOUT) :: fc ! INTEGER :: n1, n2, n3, n1s, n2s, n3s, ng ! DO ng = 1, fc%ngmt n1 = -fc%ig1t (ng) + 1 IF (n1 < 1) n1 = n1 + fc%dfftt%nr1 n2 = -fc%ig2t (ng) + 1 IF (n2 < 1) n2 = n2 + fc%dfftt%nr2 n3 = -fc%ig3t (ng) + 1 IF (n3 < 1) n3 = n3 + fc%dfftt%nr3 IF (n1>fc%dfftt%nr1 .OR. n2>fc%dfftt%nr2 .OR. n3>fc%dfftt%nr3) THEN CALL errore('index_minusg_custom','Mesh too small?',ng) ENDIF #if defined (__MPI) && !defined (__USE_3D_FFT) fc%nltm(ng) = n3 + (fc%dfftt%isind (n1 + (n2 - 1) * fc& &%dfftt%nr1x) - 1) * fc%dfftt%nr3x #else fc%nltm(ng) = n1 + (n2 - 1) * fc%dfftt%nr1x + (n3 - 1) * fc& &%dfftt%nr1x * fc%dfftt%nr1x #endif ENDDO END SUBROUTINE index_minusg_custom SUBROUTINE deallocate_fft_custom(fc) !this subroutine deallocates all the fft custom stuff USE fft_types, ONLY : fft_dlay_deallocate IMPLICIT NONE TYPE(fft_cus) :: fc IF(.NOT. fc%initialized) RETURN DEALLOCATE(fc%nlt,fc%nltm) CALL fft_dlay_deallocate(fc%dfftt) DEALLOCATE(fc%ig_l2gt,fc%ggt,fc%gt) DEALLOCATE(fc%ig1t,fc%ig2t,fc%ig3t) fc%initialized=.FALSE. RETURN END SUBROUTINE deallocate_fft_custom ! !---------------------------------------------------------------------------- SUBROUTINE reorderwfp_col ( nbands, npw1, npw2, pw1, pw2, ngwl1, ngwl2,& & ig_l2g1, ig_l2g2, n_g, mpime, nproc, comm ) !-------------------------------------------------------------------------- ! ! A routine using collective mpi calls that reorders the ! wavefunction in pw1 on a grid specified by ig_l2g1 and puts it ! in pw2 in the order required by ig_l2g2. ! ! Can transform multiple bands at once, as specifed by the nbands ! option. ! ! This operation could previously be performed by calls to ! mergewf and splitwf however that scales very badly with number ! of procs. ! ! Written by P. Umari, documentationa added by S. Binnie ! USE kinds USE parallel_include USE io_global, ONLY : stdout IMPLICIT NONE INTEGER, INTENT(in) :: npw1, npw2 INTEGER, INTENT(IN) :: nbands ! Number of bands to be transformed COMPLEX(DP), INTENT(IN) :: pw1(npw1,nbands) ! Input wavefunction COMPLEX(DP), INTENT(INOUT) :: pw2(npw2,nbands) ! Output INTEGER, INTENT(IN) :: mpime ! index of calling proc (starts at 0) INTEGER, INTENT(IN) :: nproc ! number of procs in the communicator INTEGER, INTENT(IN) :: comm ! communicator INTEGER, INTENT(IN) :: ig_l2g1(ngwl1),ig_l2g2(ngwl2) INTEGER, INTENT(IN) :: ngwl1,ngwl2 ! Global maximum number of G vectors for both grids INTEGER, INTENT(in) :: n_g ! Local variables INTEGER :: ngwl1_max, ngwl2_max, npw1_max, npw2_max, ngwl_min INTEGER :: gid,ierr INTEGER, ALLOCATABLE :: npw1_loc(:),npw2_loc(:) INTEGER, ALLOCATABLE :: ig_l2g1_tot(:,:),ig_l2g2_tot(:,:), itmp(:) INTEGER :: ii,ip,ilast,iband COMPLEX(kind=DP), ALLOCATABLE :: pw1_tot(:,:),pw2_tot(:,:) COMPLEX(kind=DP), ALLOCATABLE :: pw1_tmp(:),pw2_tmp(:), pw_global(:) #ifdef __MPI gid=comm ALLOCATE(npw1_loc(nproc),npw2_loc(nproc)) ! ! Calculate the size of the global correspondance arrays ! CALL MPI_ALLREDUCE( ngwl1, ngwl1_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) CALL MPI_ALLREDUCE( ngwl2, ngwl2_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) CALL MPI_ALLREDUCE( npw1, npw1_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) CALL MPI_ALLREDUCE( npw2, npw2_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) CALL MPI_ALLGATHER( npw1, 1, MPI_INTEGER, npw1_loc, 1,& & MPI_INTEGER, gid, IERR ) CALL MPI_ALLGATHER( npw2, 1, MPI_INTEGER, npw2_loc, 1,& & MPI_INTEGER, gid, IERR ) ! ALLOCATE(ig_l2g1_tot(ngwl1_max,nproc),ig_l2g2_tot(ngwl2_max& &,nproc)) ! ! All procs gather correspondance arrays ! ALLOCATE(itmp(ngwl1_max)) itmp(1:ngwl1)=ig_l2g1(1:ngwl1) CALL MPI_ALLGATHER( itmp, ngwl1_max, MPI_INTEGER, ig_l2g1_tot,& & ngwl1_max, MPI_INTEGER, gid, IERR ) DEALLOCATE(itmp) ! ALLOCATE(itmp(ngwl2_max)) itmp(1:ngwl2)=ig_l2g2(1:ngwl2) CALL MPI_ALLGATHER( itmp, ngwl2_max, MPI_INTEGER, ig_l2g2_tot,& & ngwl2_max, MPI_INTEGER, gid, IERR) DEALLOCATE(itmp) ! ! ALLOCATE( pw1_tot(npw1_max,nproc), pw2_tot(npw2_max,nproc) ) ALLOCATE( pw1_tmp(npw1_max), pw2_tmp(npw2_max) ) ALLOCATE( pw_global(n_g) ) ! DO ii=1, nbands, nproc ! ilast=MIN(nbands,ii+nproc-1) ! ! Gather the input wavefunction. ! DO iband=ii, ilast ! ip = MOD(iband,nproc) ! ip starts from 1 to nproc-1 pw1_tmp(1:npw1)=pw1(1:npw1,iband) CALL MPI_GATHER( pw1_tmp, npw1_max, MPI_DOUBLE_COMPLEX,& & pw1_tot, npw1_max, MPI_DOUBLE_COMPLEX, ip, gid, ierr ) ! ENDDO ! pw_global = ( 0.d0, 0.d0 ) ! ! Put the gathered wavefunction into the standard order. ! DO ip=1,nproc ! pw_global( ig_l2g1_tot(1:npw1_loc(ip), ip) ) = & & pw1_tot( 1:npw1_loc(ip), ip ) ! ENDDO ! ! Now put this into the correct order for output. ! DO ip=1,nproc ! pw2_tot( 1:npw2_loc(ip), ip ) = & & pw_global ( ig_l2g2_tot(1:npw2_loc(ip),ip) ) ! ENDDO ! ! Scatter the output wavefunction across the processors. ! DO iband=ii,ilast ! ip=MOD(iband,nproc) CALL MPI_SCATTER( pw2_tot, npw2_max, MPI_DOUBLE_COMPLEX,& & pw2_tmp, npw2_max, MPI_DOUBLE_COMPLEX, ip, gid, ierr ) pw2(1:npw2,iband)=pw2_tmp(1:npw2) ! ENDDO ! ENDDO ! DEALLOCATE(npw1_loc,npw2_loc) DEALLOCATE(ig_l2g1_tot,ig_l2g2_tot) DEALLOCATE(pw1_tot,pw2_tot) DEALLOCATE(pw1_tmp,pw2_tmp) DEALLOCATE(pw_global) ! #else ! ngwl_min = MIN( ngwl1, ngwl2 ) ! pw2(:, 1:nbands) = ( 0.0d0, 0.0d0 ) pw2( ig_l2g2(1:ngwl_min), 1:nbands ) = pw1( ig_l2g1(1:ngwl_min), 1:nbands ) ! #endif ! RETURN ! END SUBROUTINE reorderwfp_col !---------------------------------------------------------------------------- END MODULE fft_custom