From 1a6b00bbb69372f2f4b35967e65144b14f1faf8f Mon Sep 17 00:00:00 2001 From: cavazzon Date: Tue, 10 Jan 2006 14:04:13 +0000 Subject: [PATCH] - FPMD/CP ortho subroutines merged as much as possible. - Many ortho auxiliary functions (tauset, rhoset, sigset, calphi, updatc) are now in common between FPMD/CP, and moved to module ortho_base.f90 - In FPMD, three index vectors, related to real space like charge and potential have been substituted with single index vector like in CP, for compatibility and efficiency. - Bug fix in pwtools/matdyn.f90 a logical variable was used in place of a character variable git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@2694 c92efa57-630b-4861-b058-cf58834340f0 --- CPV/cg_sub.f90 | 11 +- CPV/cglib.f90 | 119 +--- CPV/chargedensity.f90 | 184 ++---- CPV/chargemix.f90 | 6 +- CPV/cp_restart.f90 | 3 +- CPV/cplib.f90 | 589 ++++------------- CPV/cplib_meta.f90 | 2 +- CPV/cpr.f90 | 12 +- CPV/electrons.f90 | 2 +- CPV/emptystates.f90 | 8 +- CPV/exch_corr.f90 | 160 ++--- CPV/fft.f90 | 68 +- CPV/fields_type.f90 | 1 - CPV/forces.f90 | 12 +- CPV/fromscra.f90 | 24 +- CPV/grid.f90 | 30 +- CPV/guess.f90 | 65 +- CPV/init_run.f90 | 10 +- CPV/inner_loop.f90 | 2 +- CPV/input.f90 | 10 +- CPV/ksstates.f90 | 65 +- CPV/main.f90 | 26 +- CPV/mainvar.f90 | 19 +- CPV/move_electrons.f90 | 4 +- CPV/nl_base.f90 | 8 +- CPV/nlcc.f90 | 2 +- CPV/optical.f90 | 6 +- CPV/ortho.f90 | 1164 ++++++++------------------------ CPV/ortho_base.f90 | 853 ++++++++++++++++-------- CPV/para.f90 | 2 +- CPV/potentials.f90 | 313 ++++----- CPV/print_out.f90 | 19 +- CPV/qmatrixd.f90 | 6 +- CPV/restart.f90 | 39 +- CPV/restart_sub.f90 | 23 +- CPV/rsmesh.f90 | 3 +- CPV/runcg.f90 | 27 +- CPV/runcg_ion.f90 | 24 +- CPV/runcp.f90 | 22 +- CPV/rundiis.f90 | 43 +- CPV/runsd.f90 | 12 +- CPV/smcp.f90 | 23 +- CPV/stress.f90 | 2 +- CPV/turbo.f90 | 10 +- CPV/wave.f90 | 207 +----- CPV/wf.f90 | 14 +- Modules/descriptors.f90 | 577 +--------------- Modules/electrons_base.f90 | 10 +- Modules/parallel_types.f90 | 31 +- Modules/ptoolkit.f90 | 1280 +++++++++++++++++++++--------------- Modules/read_namelists.f90 | 10 +- Modules/xml_io_base.f90 | 4 +- flib/Makefile | 1 - flib/gridsetup.f90 | 61 +- flib/localindex.f90 | 80 +++ pwtools/matdyn.f90 | 5 +- 56 files changed, 2415 insertions(+), 3898 deletions(-) diff --git a/CPV/cg_sub.f90 b/CPV/cg_sub.f90 index 231988393..5c35d2811 100644 --- a/CPV/cg_sub.f90 +++ b/CPV/cg_sub.f90 @@ -25,7 +25,7 @@ use energies, only: eht, epseu, exc, etot, eself, enl, ekin, & & atot, entropy, egrand use electrons_base, only: f, nspin, nel, iupdwn, nupdwn, nudx, nelt, & - nx => nbspx, n => nbsp, ispin => fspin + nx => nbspx, n => nbsp, ispin use ensemble_dft, only: tens, tgrand, ninner, ismear, etemp, ef, & & tdynz, tdynf, zmass, fmass, fricz, fricf, z0, c0diag, & @@ -73,7 +73,8 @@ use efield_module, only: tefield, evalue, ctable, qmat, detq, ipolp, & berry_energy, ctabin, gqq, gqqm, df, pberryel use mp, only: mp_sum,mp_bcast - use cp_electronic_mass, ONLY : emass_cutoff + use cp_electronic_mass, ONLY : emass_cutoff + use orthogonalize_base, ONLY : calphi ! implicit none @@ -143,7 +144,8 @@ !calculates phi for pcdaga - call calphiid(c0,bec,betae,phi) + ! call calphiid(c0,bec,betae,phi) + CALL calphi( c0, SIZE(c0,1), bec, nhsa, betae, phi, n ) !calculates the factors for S and K inversion in US case if(nvb.gt.0) then @@ -694,7 +696,8 @@ call calbec (1,nsp,eigr,c0,bec) !calculates phi for pc_daga - call calphiid(c0,bec,betae,phi) + !call calphiid(c0,bec,betae,phi) + CALL calphi( c0, SIZE(c0,1), bec, nhsa, betae, phi, n ) !======================================================================= ! diff --git a/CPV/cglib.f90 b/CPV/cglib.f90 index d692e865f..b71630288 100644 --- a/CPV/cglib.f90 +++ b/CPV/cglib.f90 @@ -6,93 +6,6 @@ ! or http://www.gnu.org/copyleft/gpl.txt . ! #include "f_defs.h" -!------------------------------------------------------------------------- - subroutine calphiid(c0,bec,betae,phi) -!----------------------------------------------------------------------- -! input: c0 (orthonormal with s(r(t)), bec=, betae=|beta> -! computes the matrix phi (with the old positions) -! where |phi> = s'|c0> = |c0> + sum q_ij |i> -! where s'=s(r(t)) -! - use ions_base, only: na, nsp - use io_global, only: stdout - use cvan - use uspp_param, only: nh - use uspp, only :nhsa=>nkb, nhsavb=>nkbus, qq - use electrons_base, only: n => nbsp - use gvecw, only: ngw - use constants, only: pi, fpi - use control_flags, only: iprint, iprsta - use mp, only: mp_sum -! - implicit none - complex(8) c0(ngw,n), phi(ngw,n), betae(ngw,nhsa) - real(8) bec(nhsa,n) ,emtot -! local variables - integer is, iv, jv, ia, inl, jnl, i, j - real(8) qtemp(nhsavb,n) ! automatic array -! - phi(1:ngw,1:n) = 0.0d0 -! - if (nvb.gt.0) then - qtemp = 0.0d0 - do is=1,nvb - do iv=1,nh(is) - do jv=1,nh(is) - if(abs(qq(iv,jv,is)).gt.1.e-5) then - do ia=1,na(is) - inl=ish(is)+(iv-1)*na(is)+ia - jnl=ish(is)+(jv-1)*na(is)+ia - do i=1,n - qtemp(inl,i) = qtemp(inl,i) + & - & qq(iv,jv,is)*bec(jnl,i) - end do - end do - endif - end do - end do - end do - ! - call MXMA & - & (betae,1,2*ngw,qtemp,1,nhsavb,phi,1,2*ngw,2*ngw,nhsavb,n) - end if -! - do j=1,n - do i=1,ngw - phi(i,j)=(phi(i,j)+c0(i,j)) - end do - end do -! ================================================================= - if(iprsta.gt.2) then - emtot=0. - do j=1,n - do i=1,ngw - emtot=emtot & - & +2.*DBLE(phi(i,j)*CONJG(c0(i,j))) - end do - end do - emtot=emtot/n - call mp_sum(emtot) - WRITE( stdout,*) 'in calphi sqrt(emtot)=',sqrt(emtot) - WRITE( stdout,*) - do is=1,nsp - if(nsp.gt.1) then - WRITE( stdout,'(33x,a,i4)') ' calphi: bec (is)',is - WRITE( stdout,'(8f9.4)') & - & ((bec(ish(is)+(iv-1)*na(is)+1,i),iv=1,nh(is)),i=1,n) - else - do ia=1,na(is) - WRITE( stdout,'(33x,a,i4)') ' calphi: bec (ia)',ia - WRITE( stdout,'(8f9.4)') & - & ((bec(ish(is)+(iv-1)*na(is)+ia,i),iv=1,nh(is)),i=1,n) - end do - end if - end do - endif -! - return - end subroutine calphiid - !----------------------------------------------------------------------- @@ -176,44 +89,26 @@ ! use parallel_toolkit, only: dspev_drv implicit none - integer nx,n,naux,ndim,iopt,iflag,k,i,j,info + integer nx,n,ndim,iflag,k,i,j real(8) dval(n) real(8) amat(nx,n), dvec(nx,n) real(8), allocatable:: ap(:) - real(8), allocatable:: aux(:) - ndim=(n*(n+1))/2 - naux=3*n allocate(ap(ndim)) - allocate(aux(naux)) ap(:)=0.d0 - aux(:)=0.d0 - - if (iflag.eq.1) then - iopt=1 - else if (iflag.eq.0) then - iopt=0 - else - write(*,*) 'ERROR: diag, iflag not allowed' - stop - end if k=0 do j=1,n do i=1,j k=k+1 -! ap(i + (j-1)*j/2)=amat(i,j) ap(k)=amat(i,j) end do end do CALL dspev_drv( 'V', 'U', n, ap, dval, dvec, nx ) - if(info.ne.0) write(6,*) 'Problems with ddiag' - deallocate(ap) - deallocate(aux) return end subroutine ddiag @@ -293,7 +188,7 @@ subroutine pc2(a,beca,b,becb) use control_flags, only: iprint, iprsta use reciprocal_vectors, only: ng0 => gstart use mp, only: mp_sum - use electrons_base, only: n => nbsp, fspin + use electrons_base, only: n => nbsp, ispin use uspp_param, only: nh use uspp, only :nhsa=>nkb use uspp, only :qq @@ -312,7 +207,7 @@ subroutine pc2(a,beca,b,becb) if (ng0.eq.2) then b(1,i)=0.5d0*(b(1,i)+CONJG(b(1,i))) endif - if(fspin(j) == fspin(i)) then + if(ispin(j) == ispin(i)) then do ig=1,ngw !loop on g vectors sca=sca+2.d0*DBLE(CONJG(a(ig,j))*b(ig,i)) !2. for real wavefunctions enddo @@ -369,7 +264,7 @@ subroutine pc2(a,beca,b,becb) use control_flags, only: iprint, iprsta use reciprocal_vectors, only: ng0 => gstart use mp, only: mp_sum - use electrons_base, only: n => nbsp, fspin + use electrons_base, only: n => nbsp, ispin use uspp_param, only: nh use uspp, only :nhsa=>nkb @@ -387,7 +282,7 @@ subroutine pc2(a,beca,b,becb) if (ng0.eq.2) then b(1,i)=0.5d0*(b(1,i)+CONJG(b(1,i))) endif - if(fspin(i) == fspin(j)) then + if(ispin(i) == ispin(j)) then do ig=1,ngw !loop on g vectors sca=sca+2.*DBLE(CONJG(a(ig,j))*b(ig,i)) !2. for real weavefunctions enddo @@ -422,7 +317,7 @@ subroutine pc2(a,beca,b,becb) use control_flags, only: iprint, iprsta use reciprocal_vectors, only: ng0 => gstart use mp, only: mp_sum, mp_bcast - use electrons_base, only: n => nbsp, fspin + use electrons_base, only: n => nbsp, ispin use uspp_param, only: nh use uspp, only :nhsa=>nkb,qq, nhsavb=>nkbus use io_global, ONLY: ionode, ionode_id @@ -535,7 +430,7 @@ subroutine pc2(a,beca,b,becb) use control_flags, only: iprint, iprsta use reciprocal_vectors, only: ng0 => gstart use mp, only: mp_sum, mp_bcast - use electrons_base, only: n => nbsp, fspin + use electrons_base, only: n => nbsp, ispin use uspp_param, only: nh use uspp, only :nhsa=>nkb,qq,nhsavb=>nkbus use io_global, ONLY: ionode, ionode_id diff --git a/CPV/chargedensity.f90 b/CPV/chargedensity.f90 index e21a991e8..5689715b4 100644 --- a/CPV/chargedensity.f90 +++ b/CPV/chargedensity.f90 @@ -32,7 +32,7 @@ ! end of module-scope declarations ! ---------------------------------------------- - PUBLIC :: checkrho, rhoofr, gradrho + PUBLIC :: rhoofr, gradrho !=----------------------------------------------------------------------=! CONTAINS @@ -44,73 +44,6 @@ RETURN END SUBROUTINE charge_density_closeup ! -!=----------------------------------------------------------------------=! - SUBROUTINE checkrho(rhoe, desc, rsum, omega) -!=----------------------------------------------------------------------=! - -! This Subroutine checks the value of the charge density integral -! that should be equal to the total charge - - USE constants, ONLY: rhothr - USE mp_global, ONLY: group, root, mpime - USE io_global, ONLY: ionode, stdout - USE mp, ONLY: mp_sum - USE charge_types, ONLY: charge_descriptor - - - IMPLICIT NONE - - REAL(DP), INTENT(IN) :: omega - REAL(DP) :: rsum(:) - REAL(DP), INTENT(IN) :: rhoe(:,:,:,:) - TYPE (charge_descriptor), INTENT(IN) :: desc - REAL(DP) :: rsum1 - INTEGER :: i, j, k, ispin, nspin, nr1, nr2, nr3, ierr - INTEGER :: nxl, nyl, nzl - - nr1 = desc%nx - nr2 = desc%ny - nr3 = desc%nz - nxl = desc%nxl - nyl = desc%nyl - nzl = desc%nzl - nspin = desc%nspin - -! ... recompute the integral of the charge density (for checking purpose) - - DO ispin = 1, nspin - - rsum1 = SUM( rhoe( 1:nxl, 1:nyl, 1:nzl, ispin ) ) - rsum1 = rsum1 * omega / DBLE( nr1 * nr2 * nr3 ) - -! ... sum over all processors - - CALL mp_sum( rsum1, group ) - CALL mp_sum( rsum(ispin), group ) - -! ... write result (only processor 0) - - IF( ionode ) THEN - - WRITE( stdout,1) rsum(ispin), rsum1 - -! ... issue a warning if the result has changed - - IF( ABS( rsum(ispin) - rsum1 ) > rhothr ) WRITE( stdout,100) - - END IF - - END DO - - 1 FORMAT(//,3X,'Total integrated electronic density',/ & - & ,3X,'in G-space =',F11.6,4X,'in R-space =',F11.6) - 100 FORMAT('** WARNING: CHARGE DENSITY **') - - RETURN -!=----------------------------------------------------------------------=! - END SUBROUTINE checkrho -!=----------------------------------------------------------------------=! - REAL(DP) FUNCTION dft_total_charge( ispin, c, cdesc, fi ) @@ -166,7 +99,7 @@ !=----------------------------------------------------------------------=! ! BEGIN manual - SUBROUTINE rhoofr (nfi, c0, cdesc, fi, rhoe, desc, box) + SUBROUTINE rhoofr (nfi, c0, cdesc, fi, rhoe, box) ! this routine computes: ! rhoe = normalized electron density in real space @@ -191,18 +124,19 @@ ! ... declare modules - USE fft, ONLY: pw_invfft - USE fft_base, ONLY: dfftp - USE mp_global, ONLY: mpime - USE mp, ONLY: mp_sum - USE turbo, ONLY: tturbo, nturbo, turbo_states, allocate_turbo - USE cell_module, ONLY: boxdimensions - USE wave_types, ONLY: wave_descriptor - USE charge_types, ONLY: charge_descriptor - USE io_global, ONLY: stdout, ionode - USE control_flags, ONLY: force_pairing, iprint - USE parameters, ONLY: nspinx - USE brillouin, ONLY: kpoints, kp + USE fft, ONLY: pw_invfft + USE fft_base, ONLY: dfftp + USE mp_global, ONLY: mpime + USE mp, ONLY: mp_sum + USE turbo, ONLY: tturbo, nturbo, turbo_states, allocate_turbo + USE cell_module, ONLY: boxdimensions + USE wave_types, ONLY: wave_descriptor + USE io_global, ONLY: stdout, ionode + USE control_flags, ONLY: force_pairing, iprint + USE parameters, ONLY: nspinx + USE brillouin, ONLY: kpoints, kp + USE grid_dimensions, ONLY: nr1, nr2, nr3, nr1x, nr2x, nnrx + IMPLICIT NONE @@ -213,32 +147,23 @@ COMPLEX(DP) :: c0(:,:,:,:) TYPE (boxdimensions), INTENT(IN) :: box REAL(DP), INTENT(IN) :: fi(:,:,:) - REAL(DP), INTENT(OUT) :: rhoe(:,:,:,:) - TYPE (charge_descriptor), INTENT(IN) :: desc + REAL(DP), INTENT(OUT) :: rhoe(:,:) TYPE (wave_descriptor), INTENT(IN) :: cdesc ! ... declare other variables - INTEGER :: i, is1, is2, j, k, ib, ik, nb, nxl, nyl, nzl, ispin - INTEGER :: nr1x, nr2x, nr3x, nspin, nbnd, nnr + INTEGER :: i, is1, is2, j, k, ib, ik, nb, ispin + INTEGER :: nspin, nbnd, nnr REAL(DP) :: r2, r1, coef3, coef4, omega, rsumg( nspinx ), rsumgs REAL(DP) :: fact, rsumr( nspinx ) - REAL(DP), ALLOCATABLE :: rho(:,:,:) - COMPLEX(DP), ALLOCATABLE :: psi2(:,:,:) + COMPLEX(DP), ALLOCATABLE :: psi2(:) INTEGER :: ierr, ispin_wfc LOGICAL :: ttprint ! ... end of declarations ! ---------------------------------------------- - nxl = dfftp%nr1 - nyl = dfftp%nr2 - nzl = dfftp%npl - nnr = dfftp%nr1 * dfftp%nr2 * dfftp%nr3 - - nr1x = dfftp%nr1x - nr2x = dfftp%nr2x - nr3x = dfftp%npl + nnr = dfftp%nr1x * dfftp%nr2x * dfftp%npl omega = box%deth @@ -250,29 +175,20 @@ ttprint = .FALSE. IF( nfi == 0 .or. mod( nfi, iprint ) == 0 ) ttprint = .TRUE. - ! ... Check consistensy of the charge density grid and fft grid - - IF( SIZE( rhoe, 1 ) < nxl ) & - CALL errore(' rhoofr ', ' wrong X dimension for rhoe ',1) - IF( SIZE( rhoe, 2 ) < nyl ) & - CALL errore(' rhoofr ', ' wrong Y dimension for rhoe ',1) - IF( SIZE( rhoe, 3 ) < nzl ) & - CALL errore(' rhoofr ', ' wrong Z dimension for rhoe ',1) - - ALLOCATE( psi2( nr1x, nr2x, nr3x ), STAT=ierr ) + ALLOCATE( psi2( nnrx ), STAT=ierr ) IF( ierr /= 0 ) CALL errore(' rhoofr ', ' allocating psi2 ', ABS(ierr) ) - ALLOCATE( rho( nr1x, nr2x, nr3x ), STAT=ierr ) - IF( ierr /= 0 ) CALL errore(' rhoofr ', ' allocating rho ', ABS(ierr) ) IF( tturbo ) THEN ! ! ... if tturbo=.TRUE. some data is stored in memory instead of being ! ... recalculated (see card 'TURBO') ! - CALL allocate_turbo( dfftp%nr1x, dfftp%nr2x, dfftp%npl ) + CALL allocate_turbo( nnrx ) END IF + rhoe = zero + DO ispin = 1, nspin ! ... arrange for FFT of wave functions @@ -285,7 +201,7 @@ ! ... Gamma-point calculation: wave functions are real and can be ! ... Fourier-transformed two at a time as a complex vector - rho = zero + psi2 = zero nbnd = cdesc%nbl( ispin ) nb = ( nbnd - MOD( nbnd, 2 ) ) @@ -302,7 +218,7 @@ IF( tturbo .AND. ( ib <= nturbo ) ) THEN ! ... store real-space wave functions to be used in force - turbo_states( :, :, :, ib ) = psi2( :, :, : ) + turbo_states( :, ib ) = psi2( : ) END IF ! ... occupation numbers divided by cell volume @@ -313,21 +229,17 @@ ! ... compute charge density from wave functions - DO k = 1, nzl - DO j = 1, nyl - DO i = 1, nxl + DO i = 1, nnr ! ... extract wave functions from psi2 - r1 = DBLE( psi2(i,j,k) ) - r2 = AIMAG( psi2(i,j,k) ) + r1 = DBLE( psi2(i) ) + r2 = AIMAG( psi2(i) ) ! ... add squared moduli to charge density - rho(i,j,k) = rho(i,j,k) + coef3 * r1 * r1 + coef4 * r2 * r2 + rhoe(i,ispin) = rhoe(i,ispin) + coef3 * r1 * r1 + coef4 * r2 * r2 - END DO - END DO END DO END DO @@ -346,20 +258,16 @@ ! ... compute charge density from wave functions - DO k = 1, nzl - DO j = 1, nyl - DO i = 1, nxl + DO i = 1, nnr - ! ... extract wave functions from psi2 + ! ... extract wave functions from psi2 - r1 = DBLE( psi2(i,j,k) ) + r1 = DBLE( psi2(i) ) - ! ... add squared moduli to charge density + ! ... add squared moduli to charge density - rho(i,j,k) = rho(i,j,k) + coef3 * r1 * r1 + rhoe(i,ispin) = rhoe(i,ispin) + coef3 * r1 * r1 - END DO - END DO END DO END IF @@ -368,7 +276,7 @@ ! ... calculation with generic k points: wave functions are complex - rho = zero + psi2 = zero DO ik = 1, cdesc%nkl @@ -385,25 +293,19 @@ ! ... compute charge density - DO k = 1, nzl - DO j = 1, nyl - DO i = 1, nxl + DO i = 1, nnr - ! ... add squared modulus to charge density + ! ... add squared modulus to charge density - rho(i,j,k) = rho(i,j,k) + coef3 * DBLE( psi2(i,j,k) * CONJG(psi2(i,j,k)) ) + rhoe(i,ispin) = rhoe(i,ispin) + coef3 * DBLE( psi2(i) * CONJG(psi2(i)) ) - END DO - END DO END DO END DO END DO END IF - IF( ttprint ) rsumr( ispin ) = SUM( rho ) * omega / nnr - - rhoe( 1:nxl, 1:nyl, 1:nzl, ispin ) = rho( 1:nxl, 1:nyl, 1:nzl ) + IF( ttprint ) rsumr( ispin ) = SUM( rhoe( :, ispin ) ) * omega / ( nr1 * nr2 * nr3 ) END DO @@ -443,8 +345,6 @@ DEALLOCATE(psi2, STAT=ierr) IF( ierr /= 0 ) CALL errore(' rhoofr ', ' deallocating psi2 ', ABS(ierr) ) - DEALLOCATE(rho, STAT=ierr) - IF( ierr /= 0 ) CALL errore(' rhoofr ', ' deallocating rho ', ABS(ierr) ) RETURN @@ -468,7 +368,7 @@ COMPLEX(DP), INTENT(IN) :: rhoeg(:) ! charge density (Reciprocal Space) REAL(DP), INTENT(IN) :: gx(:,:) ! cartesian components of G-vectors - REAL(DP), INTENT(OUT) :: grho(:,:,:,:) ! charge density gradient + REAL(DP), INTENT(OUT) :: grho(:,:) ! charge density gradient INTEGER :: ig, ipol, ierr COMPLEX(DP), ALLOCATABLE :: tgrho(:) @@ -484,7 +384,7 @@ rg = rhoeg(ig) * gx( ipol, ig ) tgrho(ig) = tpiba * CMPLX( - AIMAG(rg), DBLE(rg) ) END DO - CALL pinvfft( grho(:,:,:,ipol), tgrho ) + CALL pinvfft( grho(:,ipol), tgrho ) END DO DEALLOCATE(tgrho, STAT=ierr) diff --git a/CPV/chargemix.f90 b/CPV/chargemix.f90 index ab36a0b49..02c71ba3d 100644 --- a/CPV/chargemix.f90 +++ b/CPV/chargemix.f90 @@ -160,7 +160,7 @@ IMPLICIT NONE ! ... declare subroutine arguments - REAL(DP), INTENT(INOUT) :: rhoe(:,:,:) + REAL(DP), INTENT(INOUT) :: rhoe(:) REAL(DP), INTENT(OUT) :: drho INTEGER, INTENT(IN) :: nfi @@ -170,7 +170,7 @@ REAL(DP) :: g02, g12, ar, den, num, rsc REAL(DP) :: alpha(daamax) REAL(DP), ALLOCATABLE :: aa(:,:) - REAL(DP), ALLOCATABLE :: rho_old(:,:,:) + REAL(DP), ALLOCATABLE :: rho_old(:) INTEGER :: ns, sp, is, ism, i, ig LOGICAL, SAVE :: tfirst = .TRUE. INTEGER, SAVE :: dimaa, dimaaold, nrho_t, ierr @@ -305,7 +305,7 @@ END IF - ALLOCATE( rho_old( SIZE(rhoe, 1), SIZE(rhoe, 2), SIZE(rhoe, 3) ), STAT=ierr ) + ALLOCATE( rho_old( SIZE(rhoe) ), STAT=ierr ) IF( ierr /= 0 ) CALL errore(' newrho ', ' allocating rho_old ', ierr) rho_old = rhoe diff --git a/CPV/cp_restart.f90 b/CPV/cp_restart.f90 index f7bf23358..04284b30d 100644 --- a/CPV/cp_restart.f90 +++ b/CPV/cp_restart.f90 @@ -50,7 +50,7 @@ MODULE cp_restart USE gvecw, ONLY : ngw, ngwt, ecutw, gcutw USE reciprocal_vectors, ONLY : ig_l2g, mill_l USE electrons_base, ONLY : nspin, nbnd, nbsp, nelt, nel, & - nupdwn, iupdwn, f, fspin, nudx + nupdwn, iupdwn, f, nudx USE cell_base, ONLY : ibrav, alat, celldm, & symm_type, s_to_r USE ions_base, ONLY : nsp, nat, na, atm, zv, & @@ -1129,6 +1129,7 @@ MODULE cp_restart IF ( ( nspin_ /= nspin ) .OR. & ( nbnd_ /= nbnd ) .OR. ( NINT( nelec_ ) /= nelt ) ) THEN ! + attr = "electron, bands or spin do not match" ierr = 30 ! GOTO 100 diff --git a/CPV/cplib.f90 b/CPV/cplib.f90 index beff39a49..805e82f83 100644 --- a/CPV/cplib.f90 +++ b/CPV/cplib.f90 @@ -238,108 +238,6 @@ ! -!------------------------------------------------------------------------- - SUBROUTINE calphi( c0, ngwx, ema0bg, bec, nkbx, betae, phi, n ) -!----------------------------------------------------------------------- -! input: c0 (orthonormal with s(r(t)), bec=, betae=|beta> -! computes the matrix phi (with the old positions) -! where |phi> = s'|c0> = |c0> + sum q_ij |i> -! where s'=s(r(t)) -! - USE kinds, ONLY: DP - USE ions_base, ONLY: na, nsp - USE io_global, ONLY: stdout - USE cvan, ONLY: ish, nvb - USE uspp_param, ONLY: nh - USE uspp, ONLY: nhsavb=>nkbus, qq - USE gvecw, ONLY: ngw - USE constants, ONLY: pi, fpi - USE control_flags, ONLY: iprint, iprsta - USE mp, ONLY: mp_sum -! - IMPLICIT NONE - - INTEGER, INTENT(IN) :: ngwx, nkbx, n - COMPLEX(DP) :: c0( ngwx, n ), phi( ngwx, n ), betae( ngwx, nkbx ) - REAL(DP) :: ema0bg( ngwx ), bec( nkbx, n ), emtot - - ! local variables - ! - INTEGER :: is, iv, jv, ia, inl, jnl, i, j - REAL(DP), ALLOCATABLE :: qtemp( : , : ) -! - CALL start_clock( 'calphi' ) - - ALLOCATE( qtemp( nhsavb, n ) ) - - phi(:,:) = (0.d0, 0.d0) -! - IF ( nvb > 0 ) THEN - qtemp (:,:) = 0.d0 - DO is=1,nvb - DO iv=1,nh(is) - DO jv=1,nh(is) - IF(ABS(qq(iv,jv,is)) > 1.e-5) THEN - DO ia=1,na(is) - inl=ish(is)+(iv-1)*na(is)+ia - jnl=ish(is)+(jv-1)*na(is)+ia - DO i=1,n - qtemp(inl,i) = qtemp(inl,i) + & - & qq(iv,jv,is)*bec(jnl,i) - END DO - END DO - ENDIF - END DO - END DO - END DO -! - CALL MXMA & - & ( betae, 1, 2*ngwx, qtemp, 1, nhsavb, phi, 1, 2*ngwx, 2*ngw, nhsavb, n ) - END IF -! - DO j=1,n - DO i=1,ngw - phi(i,j)=(phi(i,j)+c0(i,j))*ema0bg(i) - END DO - END DO -! ================================================================= - IF(iprsta > 2) THEN - emtot=0.0d0 - DO j=1,n - DO i=1,ngw - emtot=emtot & - & +2.0d0*DBLE(phi(i,j)*CONJG(c0(i,j)))*ema0bg(i)**(-2.0d0) - END DO - END DO - emtot=emtot/n - - CALL mp_sum( emtot ) - - WRITE( stdout,*) 'in calphi sqrt(emtot)=',SQRT(emtot) - WRITE( stdout,*) - DO is=1,nsp - IF(nsp > 1) THEN - WRITE( stdout,'(33x,a,i4)') ' calphi: bec (is)',is - WRITE( stdout,'(8f9.4)') & - & ((bec(ish(is)+(iv-1)*na(is)+1,i),iv=1,nh(is)),i=1,n) - ELSE - DO ia=1,na(is) - WRITE( stdout,'(33x,a,i4)') ' calphi: bec (ia)',ia - WRITE( stdout,'(8f9.4)') & - & ((bec(ish(is)+(iv-1)*na(is)+ia,i),iv=1,nh(is)),i=1,n) - END DO - END IF - END DO - ENDIF - - DEALLOCATE( qtemp ) - - CALL stop_clock( 'calphi' ) -! - RETURN - END SUBROUTINE calphi - - !----------------------------------------------------------------------- REAL(8) FUNCTION cscnorm( bec, nkbx, cp, ngwx, i, n ) @@ -672,7 +570,7 @@ USE uspp_param, ONLY: nhm, nh USE smooth_grid_dimensions, ONLY: nr1s, nr2s, nr3s, & nr1sx, nr2sx, nr3sx, nnrsx - USE electrons_base, ONLY: n => nbsp, ispin => fspin, f, nspin + USE electrons_base, ONLY: n => nbsp, ispin, f, nspin USE constants, ONLY: pi, fpi USE ions_base, ONLY: nsp, na, nat USE gvecw, ONLY: ggp @@ -1151,27 +1049,35 @@ ! !----------------------------------------------------------------------- - REAL(8) FUNCTION enkin(c) + FUNCTION enkin( c, ngwx, f, n ) !----------------------------------------------------------------------- -! -! calculation of kinetic energy term -! - USE constants, ONLY: pi, fpi - USE electrons_base, ONLY: nx => nbspx, n => nbsp, f - USE gvecw, ONLY: ngw + ! + ! calculation of kinetic energy term + ! + USE kinds, ONLY: DP + USE constants, ONLY: pi, fpi + USE gvecw, ONLY: ngw USE reciprocal_vectors, ONLY: gstart - USE gvecw, ONLY: ggp - USE mp, ONLY: mp_sum - USE cell_base, ONLY: tpiba2 + USE gvecw, ONLY: ggp + USE mp, ONLY: mp_sum + USE mp_global, ONLY: group + USE cell_base, ONLY: tpiba2 IMPLICIT NONE -! input - COMPLEX(8) c(ngw,nx) -! local - INTEGER ig, i - REAL(8) sk(n) ! automatic array -! -! + + REAL(DP) :: enkin + + ! input + + INTEGER, INTENT(IN) :: ngwx, n + COMPLEX(DP), INTENT(IN) :: c( ngwx, n ) + REAL(DP), INTENT(IN) :: f( n ) + ! + ! local + + INTEGER :: ig, i + REAL(DP) :: sk(n) ! automatic array + ! DO i=1,n sk(i)=0.0 DO ig=gstart,ngw @@ -1179,16 +1085,20 @@ END DO END DO - CALL mp_sum( sk(1:n) ) + CALL mp_sum( sk(1:n), group ) enkin=0.0 DO i=1,n enkin=enkin+f(i)*sk(i) END DO - enkin=enkin*tpiba2 + + ! ... reciprocal-space vectors are in units of alat/(2 pi) so a + ! ... multiplicative factor (2 pi/alat)**2 is required + + enkin = enkin * tpiba2 ! RETURN - END FUNCTION enkin + END FUNCTION enkin ! ! !----------------------------------------------------------------------- @@ -1464,7 +1374,7 @@ USE cvan, ONLY :nvb, ish USE uspp, ONLY : nkb, nhsavb=>nkbus, qq USE uspp_param, ONLY: nh - USE electrons_base, ONLY: ispin => fspin + USE electrons_base, ONLY: ispin USE gvecw, ONLY: ngw USE mp, ONLY: mp_sum USE kinds, ONLY: DP @@ -2106,185 +2016,6 @@ RETURN END SUBROUTINE nlfl - -!----------------------------------------------------------------------- - SUBROUTINE ortho & - & (eigr,cp,phi,x0,diff,iter,ccc,eps,max,delt,bephi,becp) -!----------------------------------------------------------------------- -! input = cp (non-orthonormal), beta -! input = phi |phi>=s'|c0> -! output= cp (orthonormal with s( r(t+dt) ) ) -! output= bephi, becp -! the method used is similar to the version in les houches 1988 -! 'simple molecular systems at..' p. 462-463 (18-22) -! xcx + b x + b^t x^t + a = 1 -! where c = b = a = -! where s=s(r(t+dt)) and s'=s(r(t)) -! for vanderbilt pseudo pot - kl & ap -! - USE ions_base, ONLY: na, nat - USE cvan, ONLY: ish, nvb - USE uspp, ONLY : nkb, qq - USE uspp_param, ONLY: nh - USE electrons_base, ONLY: n => nbsp, nbspx, nudx, nspin, nupdwn, iupdwn, f - USE gvecw, ONLY: ngw - USE control_flags, ONLY: iprint, iprsta - USE io_global, ONLY: stdout - USE orthogonalize_base, ONLY: ortho_iterate, diagonalize_rho, sigset, rhoset, & - tauset -! - IMPLICIT NONE -! - COMPLEX(8) cp(ngw,n), phi(ngw,n), eigr(ngw,nat) - REAL(8) x0( nbspx, nbspx ), diff, ccc, eps, delt - INTEGER iter, max - REAL(8) bephi(nkb,n), becp(nkb,n) -! - REAL(8), ALLOCATABLE :: diag(:), work1(:), work2(:), xloc(:,:), & - rhos(:,:), rhor(:,:), u(:,:), & - sig(:,:), rho(:,:), tau(:,:) - - INTEGER :: ngwx, nkbx - - INTEGER istart, nss, ifail, i, j, iss, iv, jv, ia, is, inl, jnl - REAL(8), ALLOCATABLE:: qbephi(:,:), qbecp(:,:) - - ALLOCATE( diag( nudx ), work1( nudx ), work2( nudx ), xloc( nudx, nudx ), & - rhos( nudx, nudx ), rhor( nudx, nudx ), u( nudx, nudx ), & - sig( nudx, nudx ), rho( nudx, nudx ), tau( nudx, nudx ) ) - - ngwx = ngw - nkbx = nkb - ! - ! calculation of becp and bephi - ! - CALL start_clock( 'ortho' ) - - CALL nlsm1( n, 1, nvb, eigr, cp, becp ) - CALL nlsm1( n, 1, nvb, eigr, phi, bephi ) - ! - ! calculation of qbephi and qbecp - ! - ALLOCATE( qbephi( nkbx, n ) ) - ALLOCATE( qbecp ( nkbx, n ) ) - ! - qbephi = 0.d0 - qbecp = 0.d0 -! - DO is=1,nvb - DO iv=1,nh(is) - DO jv=1,nh(is) - IF(ABS(qq(iv,jv,is)).GT.1.e-5) THEN - DO ia=1,na(is) - inl=ish(is)+(iv-1)*na(is)+ia - jnl=ish(is)+(jv-1)*na(is)+ia - DO i=1,n - qbephi(inl,i)= qbephi(inl,i) & - & +qq(iv,jv,is)*bephi(jnl,i) - qbecp (inl,i)=qbecp (inl,i) & - & +qq(iv,jv,is)*becp (jnl,i) - END DO - END DO - ENDIF - END DO - END DO - END DO -! - DO iss = 1, nspin - - nss = nupdwn(iss) - istart = iupdwn(iss) - - ! - ! rho = - ! sig = 1- - ! tau = - ! - CALL rhoset( cp, ngwx, phi, bephi, nkbx, qbecp, n, nss, istart, rho, nudx ) - ! - CALL sigset( cp, ngwx, becp, nkbx, qbecp, n, nss, istart, sig, nudx ) - ! - CALL tauset( phi, ngwx, bephi, nkbx, qbephi, n, nss, istart, tau, nudx ) - ! -! - IF(iprsta.GT.4) THEN - WRITE( stdout,*) - WRITE( stdout,'(26x,a)') ' rho ' - DO i=1,nss - WRITE( stdout,'(7f11.6)') (rho(i,j),j=1,nss) - END DO - WRITE( stdout,*) - WRITE( stdout,'(26x,a)') ' sig ' - DO i=1,nss - WRITE( stdout,'(7f11.6)') (sig(i,j),j=1,nss) - END DO - WRITE( stdout,*) - WRITE( stdout,'(26x,a)') ' tau ' - DO i=1,nss - WRITE( stdout,'(7f11.6)') (tau(i,j),j=1,nss) - END DO - ENDIF -! -! -!----------------------------------------------------------------by ap-- -! - DO j=1,nss - DO i=1,nss - xloc(i,j) = x0(istart-1+i,istart-1+j)*ccc - rhos(i,j)=0.5d0*( rho(i,j)+rho(j,i) ) -! -! on some machines (IBM RS/6000 for instance) the following test allows -! to distinguish between Numbers and Sodium Nitride (NaN, Not a Number). -! If a matrix of Not-Numbers is passed to rs, the most likely outcome is -! that the program goes on forever doing nothing and writing nothing. -! - IF (rhos(i,j).NE.rhos(i,j)) & - & CALL errore('ortho','ortho went bananas',1) - rhor(i,j)=rho(i,j)-rhos(i,j) - END DO - END DO -! - ifail=0 - CALL start_clock( 'rsg' ) - CALL diagonalize_rho( nss, rhos, diag, u ) - ! CALL rs(nudx,nss,rhos,diag,1,u,work1,work2,ifail) - CALL stop_clock( 'rsg' ) -! -! calculation of lagranges multipliers -! - CALL ortho_iterate( u, diag, xloc, sig, rhor, rhos, tau, nudx, nss, max, eps ) - - IF(iprsta.GT.4) THEN - WRITE( stdout,*) - WRITE( stdout,'(26x,a)') ' lambda ' - DO i=1,nss - WRITE( stdout,'(7f11.6)') (xloc(i,j)/f(i+istart-1),j=1,nss) - END DO - ENDIF -! - IF(iprsta.GT.2) THEN - WRITE( stdout,*) ' diff= ',diff,' iter= ',iter - ENDIF -! -! lagrange multipliers -! - DO i=1,nss - DO j=1,nss - x0(istart-1+i,istart-1+j)=xloc(i,j)/ccc - IF (xloc(i,j).NE.xloc(i,j)) & - & CALL errore('ortho','ortho went bananas',2) - END DO - END DO -! - END DO -! - DEALLOCATE(qbecp ) - DEALLOCATE(qbephi) - DEALLOCATE( diag, work1, work2, xloc, rhos, rhor, u, sig, rho, tau ) -! - CALL stop_clock( 'ortho' ) - RETURN - END SUBROUTINE ortho ! !----------------------------------------------------------------------- SUBROUTINE pbc(rin,a1,a2,a3,ainv,rout) @@ -2624,6 +2355,7 @@ ! RETURN END SUBROUTINE rdiag + !----------------------------------------------------------------------- SUBROUTINE rhoofr (nfi,c,irb,eigrb,bec,rhovan,rhor,rhog,rhos,enl,ekin) !----------------------------------------------------------------------- @@ -2638,101 +2370,108 @@ ! ! e_v = sum_i,ij rho_i,ij d^ion_is,ji ! - USE kinds, ONLY: dp - USE control_flags, ONLY: iprint, tbuff, iprsta, thdyn, tpre, trhor - USE ions_base, ONLY: nat, nas => nax, nsp - USE parameters, ONLY: natx, nsx - USE gvecp, ONLY: ng => ngm - USE gvecs - USE gvecb, ONLY: ngb - USE gvecw, ONLY: ngw + USE kinds, ONLY: DP + USE control_flags, ONLY: iprint, tbuff, iprsta, thdyn, tpre, trhor + USE ions_base, ONLY: nat + USE gvecp, ONLY: ngm + USE gvecs, ONLY: ngs, nps, nms + USE gvecb, ONLY: ngb + USE gvecw, ONLY: ngw + USE recvecs_indexes, ONLY: np, nm USE reciprocal_vectors, ONLY: gstart - USE recvecs_indexes, ONLY: np, nm - USE uspp, ONLY: nhsa => nkb - USE uspp_param, ONLY: nh, nhm - USE grid_dimensions, ONLY: nr1, nr2, nr3, & - nr1x, nr2x, nr3x, nnr => nnrx - USE cell_base, ONLY: omega + USE uspp, ONLY: nkb + USE uspp_param, ONLY: nh, nhm + USE grid_dimensions, ONLY: nr1, nr2, nr3, & + nr1x, nr2x, nr3x, nnrx + USE cell_base, ONLY: omega USE smooth_grid_dimensions, ONLY: nr1s, nr2s, nr3s, & - nr1sx, nr2sx, nr3sx, nnrsx - USE electrons_base, ONLY: nx => nbspx, n => nbsp, f, ispin => fspin, nspin - USE constants, ONLY: pi, fpi - USE mp, ONLY: mp_sum - ! use local_pseudo -! - USE cdvan - USE dener - USE io_global, ONLY: stdout - USE funct, ONLY: dft_is_meta - USE cg_module, ONLY : tcg + nr1sx, nr2sx, nr3sx, nnrsx + USE electrons_base, ONLY: nx => nbspx, n => nbsp, f, ispin, nspin + USE constants, ONLY: pi, fpi + USE mp, ONLY: mp_sum + USE dener, ONLY: denl, dekin + USE io_global, ONLY: stdout + USE funct, ONLY: dft_is_meta + USE cg_module, ONLY: tcg + USE cp_main_variables, ONLY: rhopr ! IMPLICIT NONE - REAL(8) bec(nhsa,n), rhovan(nhm*(nhm+1)/2,nat,nspin) - REAL(8) rhor(nnr,nspin), rhos(nnrsx,nspin) - REAL(8) enl, ekin - COMPLEX(8) eigrb(ngb,nat), c(ngw,nx), rhog(ng,nspin) - INTEGER irb(3,nat), nfi + REAL(DP) bec(nkb,n), rhovan( nhm * ( nhm + 1 ) / 2, nat, nspin ) + REAL(DP) rhor(nnrx,nspin), rhos(nnrsx,nspin) + REAL(DP) enl, ekin + COMPLEX(DP) eigrb( ngb, nat ), c( ngw, nx ), rhog( ngm, nspin ) + INTEGER irb( 3, nat ), nfi + + ! local variables + INTEGER iss, isup, isdw, iss1, iss2, ios, i, ir, ig - REAL(8) rsumr(2), rsumg(2), sa1, sa2 - REAL(8) rnegsum, rmin, rmax, rsum - REAL(8), EXTERNAL :: enkin, ennl - COMPLEX(8) ci,fp,fm - COMPLEX(8), ALLOCATABLE :: psi(:), psis(:) -! -! + REAL(DP) rsumr(2), rsumg(2), sa1, sa2 + REAL(DP) rnegsum, rmin, rmax, rsum + REAL(DP), EXTERNAL :: enkin, ennl + COMPLEX(DP) ci,fp,fm + COMPLEX(DP), ALLOCATABLE :: psi(:), psis(:) + LOGICAL, SAVE :: first = .TRUE. + + ! + CALL start_clock( 'rhoofr' ) - ALLOCATE( psi( nnr ) ) + + ALLOCATE( psi( nnrx ) ) ALLOCATE( psis( nnrsx ) ) + ci=(0.0,1.0) + DO iss=1,nspin rhor(:,iss) = 0.d0 rhos(:,iss) = 0.d0 rhog(:,iss) = (0.d0, 0.d0) END DO -! -! ================================================================== -! calculation of kinetic energy ekin -! ================================================================== - ekin=enkin(c) - IF(tpre) CALL denkin(c,dekin) -! -! ================================================================== -! calculation of non-local energy -! ================================================================== - enl=ennl(rhovan, bec) - IF(tpre) CALL dennl(bec,denl) -! -! warning! trhor and thdyn are not compatible yet! -! - IF(trhor.AND.(.NOT.thdyn))THEN -! ================================================================== -! charge density is read from unit 47 -! ================================================================== -#ifdef __PARA - CALL read_rho(47,nspin,rhor) -#else - READ(47) ((rhor(ir,iss),ir=1,nnr),iss=1,nspin) -#endif - REWIND 47 + ! + ! calculation of kinetic energy ekin + ! + ekin = enkin( c, ngw, f, n ) + ! + IF( tpre ) CALL denkin( c, dekin ) + ! + ! calculation of non-local energy + ! + enl = ennl( rhovan, bec ) + ! + IF( tpre ) CALL dennl( bec, denl ) + ! + ! warning! trhor and thdyn are not compatible yet! + ! + IF( trhor .AND. ( .NOT. thdyn ) ) THEN + ! + ! non self-consistent calculation + ! charge density is read from unit 47 + ! + IF( first ) THEN + CALL read_rho( 47, nspin, rhor ) + rhopr = rhor + first = .FALSE. + ELSE + rhor = rhopr + END IF ! IF(nspin.EQ.1)THEN iss=1 - DO ir=1,nnr + DO ir=1,nnrx psi(ir)=CMPLX(rhor(ir,iss),0.d0) END DO CALL fwfft(psi,nr1,nr2,nr3,nr1x,nr2x,nr3x) - DO ig=1,ng + DO ig=1,ngm rhog(ig,iss)=psi(np(ig)) END DO ELSE isup=1 isdw=2 - DO ir=1,nnr + DO ir=1,nnrx psi(ir)=CMPLX(rhor(ir,isup),rhor(ir,isdw)) END DO CALL fwfft(psi,nr1,nr2,nr3,nr1x,nr2x,nr3x) - DO ig=1,ng + DO ig=1,ngm fp=psi(np(ig))+psi(nm(ig)) fm=psi(np(ig))-psi(nm(ig)) rhog(ig,isup)=0.5*CMPLX( DBLE(fp),AIMAG(fm)) @@ -2836,7 +2575,7 @@ psi(np(ig))= rhog(ig,iss) END DO CALL invfft(psi,nr1,nr2,nr3,nr1x,nr2x,nr3x) - DO ir=1,nnr + DO ir=1,nnrx rhor(ir,iss)=DBLE(psi(ir)) END DO ELSE @@ -2851,7 +2590,7 @@ psi(np(ig))=rhog(ig,isup)+ci*rhog(ig,isdw) END DO CALL invfft(psi,nr1,nr2,nr3,nr1x,nr2x,nr3x) - DO ir=1,nnr + DO ir=1,nnrx rhor(ir,isup)= DBLE(psi(ir)) rhor(ir,isdw)=AIMAG(psi(ir)) END DO @@ -2898,7 +2637,7 @@ ! ! IF(iprsta.GE.2) THEN - CALL checkrho(nnr,nspin,rhor,rmin,rmax,rsum,rnegsum) + CALL checkrho(nnrx,nspin,rhor,rmin,rmax,rsum,rnegsum) rnegsum=rnegsum*omega/DBLE(nr1*nr2*nr3) rsum=rsum*omega/DBLE(nr1*nr2*nr3) WRITE( stdout,'(a,4(1x,f12.6))') & @@ -3441,100 +3180,6 @@ END SUBROUTINE spinsq ! -!------------------------------------------------------------------------- - SUBROUTINE updatc(ccc,x0,phi,bephi,becp,bec,cp) -!----------------------------------------------------------------------- -! input ccc : dt**2/emass (unchanged in output) -! input x0 : converged lambdas from ortho-loop (unchanged in output) -! input cp : non-orthonormal cp=c0+dh/dc*ccc -! input bec : -! input phi -! output cp : orthonormal cp=cp+lambda*phi -! output bec: bec=becp+lambda*bephi -! - USE ions_base, ONLY: nsp, na - USE io_global, ONLY: stdout - USE cvan, ONLY: nvb, ish - USE uspp, ONLY: nhsa => nkb, nhsavb=>nkbus - USE uspp_param, ONLY: nh - USE gvecw, ONLY: ngw - USE electrons_base, ONLY: nx => nbspx, n => nbsp - USE control_flags, ONLY: iprint, iprsta -! - IMPLICIT NONE -! - COMPLEX(8) cp(ngw,n), phi(ngw,n) - REAL(8) bec(nhsa,n), x0(nx,nx), ccc - REAL(8) bephi(nhsa,n), becp(nhsa,n) -! local variables - INTEGER i, j, ig, is, iv, ia, inl - REAL(8) wtemp(n,nhsa) ! automatic array - COMPLEX(8), ALLOCATABLE :: wrk2(:,:) -! -! lagrange multipliers -! - CALL start_clock( 'updatc' ) - - ALLOCATE( wrk2( ngw, n ) ) - - wrk2 = (0.d0, 0.d0) - DO j=1,n - CALL DSCAL(n,ccc,x0(1,j),1) - END DO -! -! wrk2 = sum_m lambda_nm s(r(t+dt))|m> -! - CALL MXMA(phi,1,2*ngw,x0,nx,1,wrk2,1,2*ngw,2*ngw,n,n) -! - DO i=1,n - DO ig=1,ngw - cp(ig,i)=cp(ig,i)+wrk2(ig,i) - END DO - END DO -! -! updating of the -! -! bec of vanderbilt species are updated -! - IF(nvb.GT.0)THEN - CALL MXMA(x0,1,nx,bephi,nhsa,1,wtemp,1,n,n,n,nhsavb) -! - DO i=1,n - DO inl=1,nhsavb - bec(inl,i)=wtemp(i,inl)+becp(inl,i) - END DO - END DO - ENDIF -! - IF (iprsta.GT.2) THEN - WRITE( stdout,*) - DO is=1,nsp - IF(nsp.GT.1) THEN - WRITE( stdout,'(33x,a,i4)') ' updatc: bec (is)',is - WRITE( stdout,'(8f9.4)') & - & ((bec(ish(is)+(iv-1)*na(is)+1,i),iv=1,nh(is)),i=1,n) - ELSE - DO ia=1,na(is) - WRITE( stdout,'(33x,a,i4)') ' updatc: bec (ia)',ia - WRITE( stdout,'(8f9.4)') & - & ((bec(ish(is)+(iv-1)*na(is)+ia,i),iv=1,nh(is)),i=1,n) - END DO - END IF - WRITE( stdout,*) - END DO - ENDIF -! - DO j=1,n - CALL DSCAL(n,1.0/ccc,x0(1,j),1) - END DO - - DEALLOCATE( wrk2 ) -! - CALL stop_clock( 'updatc' ) -! - RETURN - END SUBROUTINE updatc -! !----------------------------------------------------------------------- SUBROUTINE vofrho(nfi,rhor,rhog,rhos,rhoc,tfirst,tlast, & & ei1,ei2,ei3,irb,eigrb,sfac,tau0,fion) @@ -3551,7 +3196,7 @@ ! rhos output: total potential on smooth real space grid ! USE kinds, ONLY: dp - USE control_flags, ONLY: iprint, tvlocw, iprsta, thdyn, tpre, tfor, tprnfor + USE control_flags, ONLY: iprint, iprsta, thdyn, tpre, tfor, tprnfor USE io_global, ONLY: stdout USE parameters, ONLY: natx, nsx USE ions_base, ONLY: nas => nax, nsp, na, nat @@ -3841,14 +3486,6 @@ ! etot=ekin+eht+epseu+enl+exc+ebac IF(tpre) detot=dekin+dh+dps+denl+dxc+dsr -! - IF(tvlocw.AND.tlast)THEN -#ifdef __PARA - CALL write_rho(46,nspin,rhor) -#else - WRITE(46) ((rhor(ir,iss),ir=1,nnr),iss=1,nspin) -#endif - ENDIF ! DEALLOCATE(rhotmp) DEALLOCATE(vtemp) diff --git a/CPV/cplib_meta.f90 b/CPV/cplib_meta.f90 index 098898641..470ba39f6 100644 --- a/CPV/cplib_meta.f90 +++ b/CPV/cplib_meta.f90 @@ -75,7 +75,7 @@ use cell_base use smooth_grid_dimensions, only: nr1s, nr2s, nr3s, & nr1sx, nr2sx, nr3sx, nnrsx - use electrons_base, only: nx => nbspx, n => nbsp, f, ispin => fspin, nspin + use electrons_base, only: nx => nbspx, n => nbsp, f, ispin, nspin use constants, only: pi, fpi ! use cdvan diff --git a/CPV/cpr.f90 b/CPV/cpr.f90 index 5c88bc1b8..024eea913 100644 --- a/CPV/cpr.f90 +++ b/CPV/cpr.f90 @@ -28,7 +28,7 @@ SUBROUTINE cprmain( tau, fion_out, etot_out ) USE energies, ONLY : eht, epseu, exc, etot, eself, enl, & ekin, atot, entropy, egrand, enthal, & ekincm, print_energies - USE electrons_base, ONLY : nbspx, nbsp, ispin => fspin, f, nspin + USE electrons_base, ONLY : nbspx, nbsp, ispin, f, nspin USE electrons_base, ONLY : nel, iupdwn, nupdwn, nudx, nelt USE efield_module, ONLY : efield, epol, tefield, allocate_efield, & efield_update, ipolp, qmat, gqq, & @@ -130,6 +130,8 @@ SUBROUTINE cprmain( tau, fion_out, etot_out ) USE metadyn_base, ONLY : set_target USE autopilot, ONLY : pilot USE ions_nose, ONLY : ions_nose_allocate, ions_nose_shiftvar + USE orthogonalize, ONLY : ortho + USE orthogonalize_base, ONLY : updatc ! IMPLICIT NONE ! @@ -396,8 +398,8 @@ SUBROUTINE cprmain( tau, fion_out, etot_out ) ! IF ( tortho ) THEN ! - CALL ortho( eigr, cm, phi, lambda, bigr, iter, ccc, & - ortho_eps, ortho_max, delt, bephi, becp ) + CALL ortho( eigr, cm(:,:,1,1), phi(:,:,1,1), lambda, bigr, iter, ccc, & + bephi, becp ) ! ELSE ! @@ -417,7 +419,9 @@ SUBROUTINE cprmain( tau, fion_out, etot_out ) ! IF ( iprsta >= 3 ) CALL print_lambda( lambda, nbsp, 9, 1.D0 ) ! - IF ( tortho ) CALL updatc( ccc, lambda, phi, bephi, becp, bec, cm ) + IF ( tortho ) & + CALL updatc( ccc, nbsp, lambda, SIZE(lambda,1), phi, SIZE(phi,1), & + bephi, SIZE(bephi,1), becp, bec, cm ) ! CALL calbec( nvb+1, nsp, eigr, cm, bec ) ! diff --git a/CPV/electrons.f90 b/CPV/electrons.f90 index 05f02940d..f20b7f2a6 100644 --- a/CPV/electrons.f90 +++ b/CPV/electrons.f90 @@ -687,7 +687,7 @@ SUBROUTINE cp_eigs( nfi, bec, c0, irb, eigrb, rhor, rhog, rhos, lambdap, lambda, tau0, h ) use ensemble_dft, only: tens, ismear, z0, c0diag, becdiag - use electrons_base, only: nx => nbspx, n => nbsp, ispin => fspin, f, nspin + use electrons_base, only: nx => nbspx, n => nbsp, ispin, f, nspin use electrons_base, only: nel, iupdwn, nupdwn, nudx, nelt use energies, only: enl, ekin use uspp, only: rhovan => becsum diff --git a/CPV/emptystates.f90 b/CPV/emptystates.f90 index 76ee020ea..29de87e4e 100644 --- a/CPV/emptystates.f90 +++ b/CPV/emptystates.f90 @@ -455,7 +455,7 @@ COMPLEX(DP), INTENT(INOUT) :: c_occ(:,:,:,:), c_emp(:,:,:,:) TYPE (wave_descriptor), INTENT(IN) :: wfill, wempt TYPE (atoms_type), INTENT(INOUT) :: atoms ! ions structure - REAL (DP), INTENT(IN) :: vpot(:,:,:,:) + REAL (DP), INTENT(IN) :: vpot(:,:) LOGICAL, INTENT(IN) :: tortho COMPLEX(DP) :: eigr(:,:) ! @@ -525,7 +525,7 @@ CALL nlsm1 ( n_emp, 1, nspnl, eigr, c_emp( 1, 1, ik, ispin ), bece( 1, (ispin-1)*n_emp + 1 ) ) CALL dforce_all( ispin, c_emp(:,:,1,ispin), wempt, fi(:,1,ispin), eforce(:,:,1,ispin), & - vpot(:,:,:,ispin), eigr, bece ) + vpot(:,ispin), eigr, bece ) ! ... Steepest descent DO i = 1, n_emp @@ -613,7 +613,7 @@ COMPLEX(DP), INTENT(inout) :: c_emp(:,:,:,:) TYPE (wave_descriptor), INTENT(IN) :: wempt - REAL (DP), INTENT(in) :: vpot(:,:,:,:), fi(:,:,:) + REAL (DP), INTENT(in) :: vpot(:,:), fi(:,:,:) COMPLEX (DP) :: eforce(:,:,:,:) LOGICAL, INTENT(IN) :: TORTHO COMPLEX(DP) :: eigr(:,:) @@ -649,7 +649,7 @@ ! ... Calculate | dH / dpsi(j) > ! CALL dforce_all( ispin, c_emp(:,:,1,ispin), wempt, fi(:,1,ispin), eforce(:,:,1,ispin), & - vpot(:,:,:,ispin), eigr, bece ) + vpot(:,ispin), eigr, bece ) ! ... Calculate Eij = < psi(i) | H | psi(j) > = < psi(i) | dH / dpsi(j) > DO i = 1, n_emp diff --git a/CPV/exch_corr.f90 b/CPV/exch_corr.f90 index 3e1b02c48..f54bedc54 100644 --- a/CPV/exch_corr.f90 +++ b/CPV/exch_corr.f90 @@ -27,40 +27,31 @@ CONTAINS !=----------------------------------------------------------------------------=! - SUBROUTINE v2gc(v2xc, grho, rhoer, vpot) + SUBROUTINE v2gc( v2xc, grho, rhoer, vpot ) - USE kinds, ONLY: DP - USE fft - USE fft_base, ONLY: dfftp - USE cell_base, ONLY: tpiba - USE mp_global + USE kinds, ONLY: DP + USE fft, ONLY: pfwfft, pinvfft + USE cell_base, ONLY: tpiba USE reciprocal_vectors, ONLY: gstart, gx - USE gvecp, ONLY: ngm + use grid_dimensions, only: nnrx + USE gvecp, ONLY: ngm ! implicit none ! - REAL(DP) :: vpot(:,:,:,:) - REAL(DP), intent(in) :: v2xc(:,:,:,:,:) - REAL(DP), intent(in) :: grho(:,:,:,:,:) - REAL(DP), intent(in) :: rhoer(:,:,:,:) + REAL(DP) :: vpot(:,:) + REAL(DP), intent(in) :: v2xc(:,:,:) + REAL(DP), intent(in) :: grho(:,:,:) + REAL(DP), intent(in) :: rhoer(:,:) ! - integer :: ig, ipol, nxl, nyl, nzl, i, j, k, is, js, nspin - integer :: ldx, ldy, ldz - COMPLEX(DP), allocatable :: psi(:,:,:) + integer :: ig, ipol, is, js, nspin + COMPLEX(DP), allocatable :: psi(:) COMPLEX(DP), allocatable :: vtemp(:) COMPLEX(DP), allocatable :: vtemp_pol(:) - REAL(DP), ALLOCATABLE :: v(:,:,:) + REAL(DP), ALLOCATABLE :: v(:) REAL(DP) :: fac ! ... - ldx = dfftp%nr1x - ldy = dfftp%nr2x - ldz = dfftp%npl - nxl = MIN( dfftp%nr1, SIZE( grho, 1 ) ) - nyl = MIN( dfftp%nr2, SIZE( grho, 2 ) ) - nzl = MIN( dfftp%npl, SIZE( grho, 3 ) ) - nspin = SIZE(rhoer,4) + nspin = SIZE(rhoer,2) - !fac = REAL(nspin) fac = 1.0d0 ALLOCATE( vtemp( ngm ) ) @@ -68,20 +59,14 @@ DO js = 1, nspin ! - ALLOCATE( psi( ldx, ldy, ldz ) ) + ALLOCATE( psi( nnrx ) ) ! vtemp = 0.0d0 DO ipol = 1, 3 DO is = 1, nspin ! - DO k = 1, nzl - DO j = 1, nyl - DO i = 1, nxl - psi(i,j,k) = fac * v2xc(i,j,k,js,is) * grho(i,j,k,ipol,is) - END DO - END DO - END DO + psi( 1:nnrx ) = fac * v2xc( 1:nnrx, js, is ) * grho( 1:nnrx, ipol, is ) ! CALL pfwfft( vtemp_pol, psi ) ! @@ -94,17 +79,12 @@ ! DEALLOCATE( psi ) - ALLOCATE( v( ldx, ldy, ldz ) ) + ALLOCATE( v( nnrx ) ) + v( 1:nnrx ) = 0.0d0 ! CALL pinvfft( v, vtemp ) - DO k = 1, nzl - DO j = 1, nyl - DO i = 1, nxl - vpot(i,j,k,js) = vpot(i,j,k,js) - v(i,j,k) - END DO - END DO - END DO + vpot( 1:nnrx, js ) = vpot( 1:nnrx, js) - v( 1:nnrx ) DEALLOCATE( v ) @@ -120,25 +100,21 @@ SUBROUTINE stress_gc(grho, v2xc, gcpail, omega) ! - use grid_dimensions, only: nr1, nr2, nr3 - USE fft_base, ONLY: dfftp + use grid_dimensions, only: nr1, nr2, nr3, nnrx IMPLICIT NONE ! - REAL(DP) :: v2xc(:,:,:,:,:) - REAL(DP) :: grho(:,:,:,:,:) + REAL(DP) :: v2xc(:,:,:) + REAL(DP) :: grho(:,:,:) REAL(DP) :: gcpail(6) REAL(DP) :: omega ! REAL(DP) :: stre, grhoi, grhoj - INTEGER :: i, j, k, ipol, jpol, ic, nxl, nyl, nzl, is, js, nspin + INTEGER :: i, ipol, jpol, ic, is, js, nspin INTEGER, DIMENSION(6), PARAMETER :: alpha = (/ 1,2,3,2,3,3 /) INTEGER, DIMENSION(6), PARAMETER :: beta = (/ 1,1,1,2,2,3 /) ! ... - nxl = MIN( dfftp%nr1, SIZE( grho, 1 ) ) - nyl = MIN( dfftp%nr2, SIZE( grho, 2 ) ) - nzl = MIN( dfftp%npl, SIZE( grho, 3 ) ) - nspin = SIZE(grho,5) + nspin = SIZE(grho,3) DO ic = 1, 6 ipol = alpha(ic) @@ -146,12 +122,8 @@ stre = 0.0d0 DO is = 1, nspin DO js = 1, nspin - DO k = 1, nzl - DO j = 1, nyl - DO i = 1, nxl - stre = stre + v2xc(i,j,k,is,js) * grho(i,j,k,ipol,js) * grho(i,j,k,jpol,is) - END DO - END DO + DO i = 1, nnrx + stre = stre + v2xc(i,is,js) * grho(i,ipol,js) * grho(i,jpol,is) END DO END DO END DO @@ -184,8 +156,8 @@ COMPLEX(DP) :: vxc(:,:) COMPLEX(DP), INTENT(IN) :: sfac(:,:) REAL(DP) :: dexc(:), strvxc - REAL(DP) :: grho(:,:,:,:,:) - REAL(DP) :: v2xc(:,:,:,:,:) + REAL(DP) :: grho(:,:,:) + REAL(DP) :: v2xc(:,:,:) REAL(DP) :: GAgx_L(:,:) REAL(DP) :: rhocp(:,:) @@ -275,70 +247,44 @@ SUBROUTINE exch_corr_energy(rhoetr, rhoetg, grho, vpot, sxc, vxc, v2xc) - USE kinds, ONLY: DP - USE grid_dimensions, ONLY: nr1l, nr2l, nr3l + USE kinds, ONLY: DP + use grid_dimensions, only: nnrx USE funct, ONLY: dft_is_gradient - REAL (DP) :: rhoetr(:,:,:,:) + REAL (DP) :: rhoetr(:,:) COMPLEX(DP) :: rhoetg(:,:) - REAL (DP) :: grho(:,:,:,:,:) - REAL (DP) :: vpot(:,:,:,:) + REAL (DP) :: grho(:,:,:) + REAL (DP) :: vpot(:,:) REAL (DP) :: sxc ! E_xc energy REAL (DP) :: vxc ! SUM ( v(r) * rho(r) ) - REAL (DP) :: v2xc(:,:,:,:,:) - REAL (DP) :: ddot - - INTEGER :: nspin, nnr, ispin, j, k, i + REAL (DP) :: v2xc(:,:,:) + ! + REAL (DP), EXTERNAL :: ddot + INTEGER :: nspin, ispin logical :: is_gradient is_gradient = dft_is_gradient() - ! vpot = vxc(rhoetr); vpot(r) <-- u(r) - nnr = SIZE( rhoetr, 1 ) * SIZE( rhoetr, 2 ) * SIZE( rhoetr, 3 ) - nspin = SIZE( rhoetr, 4 ) + ! vpot = vxc(rhoetr); vpot(r) <-- u(r) - ! - IF( nnr /= nr3l * nr2l * nr1l ) THEN - DO ispin = 1, nspin - DO k = 1, SIZE( rhoetr, 3 ) - DO j = 1, SIZE( rhoetr, 2 ) - DO i = 1, SIZE( rhoetr, 1 ) - IF( i > nr1l .OR. j > nr2l .OR. k > nr3l ) THEN - rhoetr( i, j, k, ispin ) = 0.0d0 - IF( is_gradient ) THEN - grho ( i, j, k, :, ispin ) = 0.0d0 - END IF - END IF - END DO - END DO - END DO - END DO - END IF + nspin = SIZE( rhoetr, 2 ) + ! + CALL exch_corr_wrapper( nnrx, nspin, grho(1,1,1), rhoetr(1,1), sxc, vpot(1,1), v2xc(1,1,1) ) + ! + IF( dft_is_gradient() ) THEN + ! ... vpot additional term for gradient correction + CALL v2gc( v2xc, grho, rhoetr, vpot ) + END If - ! - CALL exch_corr_wrapper( nnr, nspin, grho(1,1,1,1,1), rhoetr(1,1,1,1), & - sxc, vpot(1,1,1,1), v2xc(1,1,1,1,1) ) - - ! - IF( dft_is_gradient() ) THEN - ! ... vpot additional term for gradient correction - CALL v2gc( v2xc, grho, rhoetr, vpot ) - END If - - ! - ! vxc = SUM( vpot * rhoetr ) - ! - vxc = 0.0d0 - DO ispin = 1, nspin - DO k = 1, nr3l - DO j = 1, nr2l - vxc = vxc + & - DDOT ( nr1l, vpot(1,j,k,ispin), 1, rhoetr(1,j,k,ispin), 1 ) - END DO - END DO - END DO + ! + ! vxc = SUM( vpot * rhoetr ) + ! + vxc = 0.0d0 + DO ispin = 1, nspin + vxc = vxc + DDOT ( nnrx, vpot(1,ispin), 1, rhoetr(1,ispin), 1 ) + END DO RETURN diff --git a/CPV/fft.f90 b/CPV/fft.f90 index 47b440888..51c688c54 100644 --- a/CPV/fft.f90 +++ b/CPV/fft.f90 @@ -154,9 +154,9 @@ IMPLICIT NONE - COMPLEX(DP), INTENT(INOUT) :: cpsi(:,:,:) + COMPLEX(DP), INTENT(INOUT) :: cpsi(:) COMPLEX(DP), INTENT(OUT) :: C(:) - COMPLEX(DP), ALLOCATABLE :: psi(:,:,:) + COMPLEX(DP), ALLOCATABLE :: psi(:) COMPLEX(DP), ALLOCATABLE :: zwrk(:) REAL(DP) :: t1 INTEGER :: ierr @@ -166,20 +166,16 @@ IF ( first ) & CALL errore( ' pfwfft 2 ', ' fft not initialized ', 1 ) - IF ( SIZE( cpsi, 1 ) /= dfftp%nr1x ) THEN - WRITE( stdout, * ) 'Values = ', SIZE( cpsi, 1 ), dfftp%nr1x + IF ( SIZE( cpsi ) /= dfftp%nnr ) THEN + WRITE( stdout, * ) 'Values = ', SIZE( cpsi ), dfftp%nnr CALL errore( ' pfwfft 2 ', ' inconsistent array dimensions ', 1 ) END IF - IF ( SIZE( cpsi, 2 ) /= dfftp%nr2x ) & - CALL errore( ' pfwfft 2 ', ' inconsistent array dimensions ', 2 ) - IF ( SIZE( cpsi, 3 ) /= dfftp%npl ) & - CALL errore( ' pfwfft 2 ', ' inconsistent array dimensions ', 3 ) #if defined __PARA ALLOCATE( zwrk( dfftp%nsp( mpime + 1 ) * dfftp%nr3x ) ) - CALL pc3fft_drv(cpsi(1,1,1), zwrk, -1, dfftp, FFT_MODE_POTE) + CALL pc3fft_drv(cpsi(1), zwrk, -1, dfftp, FFT_MODE_POTE) CALL psi2c( zwrk, SIZE( zwrk ), c(1), c(1), ng, 10 ) @@ -187,7 +183,7 @@ #else - ALLOCATE( psi( SIZE( cpsi, 1 ), SIZE( cpsi, 2 ), SIZE( cpsi, 3 ) ) ) + ALLOCATE( psi( SIZE( cpsi ) ) ) psi = cpsi @@ -221,10 +217,10 @@ IMPLICIT NONE - REAL(DP), INTENT(IN) :: A(:,:,:) + REAL(DP), INTENT(IN) :: A(:) COMPLEX(DP) :: C(:) - COMPLEX(DP), allocatable :: psi(:,:,:) + COMPLEX(DP), allocatable :: psi(:) COMPLEX(DP), ALLOCATABLE :: zwrk(:) REAL(DP) :: t1 INTEGER :: ierr, ig, k, is @@ -234,14 +230,10 @@ IF ( first ) & CALL errore( ' pfwfft 1 ', ' fft not initialized ', 1 ) - IF ( SIZE( A, 1 ) /= dfftp%nr1x ) & + IF ( SIZE( A ) /= dfftp%nnr ) & CALL errore( ' pfwfft 1 ', ' inconsistent array dimensions ', 1 ) - IF ( SIZE( A, 2 ) /= dfftp%nr2x ) & - CALL errore( ' pfwfft 1 ', ' inconsistent array dimensions ', 2 ) - IF ( SIZE( A, 3 ) /= dfftp%npl ) & - CALL errore( ' pfwfft 1 ', ' inconsistent array dimensions ', 3 ) - ALLOCATE( psi( SIZE(A,1), SIZE(A,2), SIZE(A,3) ), STAT=ierr) + ALLOCATE( psi( SIZE(A) ), STAT=ierr) IF( ierr /= 0 ) call errore(' PFWFFT ', ' allocation of psi failed ' ,0) psi = CMPLX( A, 0.d0 ) @@ -250,7 +242,7 @@ ALLOCATE( zwrk( dfftp%nsp( mpime + 1 ) * dfftp%nr3x ) ) - CALL pc3fft_drv(psi(1,1,1), zwrk, -1, dfftp, FFT_MODE_POTE) + CALL pc3fft_drv(psi(1), zwrk, -1, dfftp, FFT_MODE_POTE) CALL psi2c( zwrk(1), SIZE( zwrk ), c(1), c(1), ng, 10 ) @@ -287,12 +279,12 @@ IMPLICIT NONE - REAL(DP), INTENT(INOUT) :: C(:,:,:) + REAL(DP), INTENT(INOUT) :: C(:) REAL(DP), INTENT(IN), OPTIONAL :: ALPHA COMPLEX(DP), INTENT(IN) :: A(:) INTEGER :: ierr - COMPLEX(DP), ALLOCATABLE :: psi(:,:,:) + COMPLEX(DP), ALLOCATABLE :: psi(:) COMPLEX(DP), ALLOCATABLE :: zwrk(:) REAL(DP) t1 ! @@ -301,14 +293,10 @@ IF ( first ) & CALL errore(' pinvfft ',' fft not initialized ', 0 ) - IF ( SIZE( c, 1 ) /= dfftp%nr1x ) & + IF ( SIZE( c ) /= dfftp%nnr ) & CALL errore( ' pinvfft 2 ', ' inconsistent array dimensions ', 1 ) - IF ( SIZE( c, 2 ) /= dfftp%nr2x ) & - CALL errore( ' pinvfft 2 ', ' inconsistent array dimensions ', 2 ) - IF ( SIZE( c, 3 ) /= dfftp%npl ) & - CALL errore( ' pinvfft 2 ', ' inconsistent array dimensions ', 3 ) - ALLOCATE( psi( SIZE( c, 1 ), SIZE( c, 2 ), SIZE( c, 3 ) ), STAT=ierr) + ALLOCATE( psi( SIZE( c ) ), STAT=ierr) IF( ierr /= 0 ) call errore(' PFWFFT ', ' allocation of psi failed ' ,0) #if defined __PARA @@ -321,7 +309,7 @@ CALL c2psi( zwrk, SIZE( zwrk ), a(1), a(1), ng, 11 ) END IF - CALL pc3fft_drv(psi(1,1,1), zwrk, +1, dfftp, FFT_MODE_POTE) + CALL pc3fft_drv(psi(1), zwrk, +1, dfftp, FFT_MODE_POTE) DEALLOCATE( zwrk ) @@ -370,10 +358,10 @@ COMPLEX(DP) :: C(:) COMPLEX(DP), OPTIONAL :: CA(:) - COMPLEX(DP) :: psi(:,:,:) + COMPLEX(DP) :: psi(:) REAL(DP) :: T1 INTEGER :: ierr - COMPLEX(DP), ALLOCATABLE :: psitmp(:,:,:) + COMPLEX(DP), ALLOCATABLE :: psitmp(:) COMPLEX(DP), ALLOCATABLE :: zwrk(:) t1 = cclock() @@ -381,19 +369,15 @@ IF ( first ) & CALL errore(' pw_fwfft 1 ',' fft not initialized ', 1 ) - IF ( SIZE( psi, 1 ) /= dffts%nr1x ) & + IF ( SIZE( psi ) /= dffts%nnr ) & CALL errore( ' pw_fwfft 1 ', ' inconsistent array dimensions ', 1 ) - IF ( SIZE( psi, 2 ) /= dffts%nr2x ) & - CALL errore( ' pw_fwfft 1 ', ' inconsistent array dimensions ', 2 ) - IF ( SIZE( psi, 3 ) /= dffts%npl ) & - CALL errore( ' pw_fwfft 1 ', ' inconsistent array dimensions ', 3 ) #if defined __PARA ALLOCATE( zwrk( dffts%nsp( mpime + 1 ) * dffts%nr3x ) ) - CALL pc3fft_drv(psi(1,1,1), zwrk, -1, dffts, FFT_MODE_WAVE) + CALL pc3fft_drv(psi(1), zwrk, -1, dffts, FFT_MODE_WAVE) IF( PRESENT( ca ) ) THEN CALL psi2c( zwrk, SIZE( zwrk ), c(1), ca(1), ngw, 2 ) @@ -405,7 +389,7 @@ #else - ALLOCATE( psitmp( SIZE( psi, 1 ), SIZE( psi, 2 ), SIZE( psi, 3 ) ) ) + ALLOCATE( psitmp( SIZE( psi ) ) ) psitmp = psi @@ -441,7 +425,7 @@ COMPLEX(DP), INTENT(IN) :: C(:) COMPLEX(DP), INTENT(IN), OPTIONAL :: CA(:) - COMPLEX(DP) :: psi(:,:,:) + COMPLEX(DP) :: psi(:) COMPLEX(DP), ALLOCATABLE :: zwrk(:) REAL(DP) :: T1 @@ -451,12 +435,8 @@ T1 = cclock() - IF ( SIZE( psi, 1 ) /= dffts%nr1x ) & + IF ( SIZE( psi ) /= dffts%nnr ) & CALL errore( ' pw_invfft 1 ', ' inconsistent array dimensions ', 1 ) - IF ( SIZE( psi, 2 ) /= dffts%nr2x ) & - CALL errore( ' pw_invfft 1 ', ' inconsistent array dimensions ', 2 ) - IF ( SIZE( psi, 3 ) /= dffts%npl ) & - CALL errore( ' pw_invfft 1 ', ' inconsistent array dimensions ', 3 ) #if defined __PARA @@ -472,7 +452,7 @@ END IF END IF - CALL pc3fft_drv(psi(1,1,1), zwrk, +1, dffts, FFT_MODE_WAVE) + CALL pc3fft_drv(psi(1), zwrk, +1, dffts, FFT_MODE_WAVE) DEALLOCATE( zwrk ) diff --git a/CPV/fields_type.f90 b/CPV/fields_type.f90 index e1a35e96d..4afa0321b 100644 --- a/CPV/fields_type.f90 +++ b/CPV/fields_type.f90 @@ -9,7 +9,6 @@ USE kinds, ONLY: DP USE parallel_types, ONLY: descriptor, processors_grid - USE descriptors_module, ONLY: desc_init IMPLICIT NONE PRIVATE SAVE diff --git a/CPV/forces.f90 b/CPV/forces.f90 index 0090c9542..14fed20dd 100644 --- a/CPV/forces.f90 +++ b/CPV/forces.f90 @@ -43,13 +43,13 @@ COMPLEX(DP), INTENT(OUT) :: dco(:), dce(:) COMPLEX(DP), INTENT(IN) :: co(:), ce(:) REAL(DP), INTENT(IN) :: fio, fie - REAL(DP), INTENT(IN) :: v(:,:,:) + REAL(DP), INTENT(IN) :: v(:) REAL(DP), INTENT(IN) :: hg(:) - COMPLEX(DP), OPTIONAL :: psi_stored(:,:,:) + COMPLEX(DP), OPTIONAL :: psi_stored(:) ! ... declare other variables ! - COMPLEX(DP), ALLOCATABLE :: psi(:,:,:) + COMPLEX(DP), ALLOCATABLE :: psi(:) COMPLEX(DP) :: fp, fm, aro, are REAL(DP) :: fioby2, fieby2, arg INTEGER :: ig @@ -60,7 +60,7 @@ psi_stored = psi_stored * CMPLX(v, 0.0d0) CALL pw_fwfft(psi_stored, dco, dce) ELSE - ALLOCATE( psi(SIZE(v,1), SIZE(v,2), SIZE(v,3)) ) + ALLOCATE( psi( SIZE(v) ) ) CALL pw_invfft(psi, co, ce) psi = psi * CMPLX(v, 0.0d0) CALL pw_fwfft(psi, dco, dce) @@ -268,7 +268,7 @@ INTEGER, INTENT(IN) :: ib, iss ! band and spin index COMPLEX(DP), INTENT(IN) :: c(:,:) COMPLEX(DP), INTENT(OUT) :: df(:), da(:) - REAL (DP), INTENT(IN) :: v(:,:,:), bec(:,:), f(:) + REAL (DP), INTENT(IN) :: v(:), bec(:,:), f(:) COMPLEX(DP), INTENT(IN) :: eigr(:,:) type (wave_descriptor), INTENT(IN) :: cdesc ! @@ -327,7 +327,7 @@ INTEGER, INTENT(IN) :: ispin COMPLEX(DP), INTENT(INOUT) :: c(:,:) type (wave_descriptor), INTENT(IN) :: cdesc - REAL(DP), INTENT(IN) :: vpot(:,:,:), f(:) + REAL(DP), INTENT(IN) :: vpot(:), f(:) COMPLEX(DP), INTENT(OUT) :: cgrad(:,:) COMPLEX(DP), INTENT(IN) :: eigr(:,:) REAL(DP), INTENT(IN) :: bec(:,:) diff --git a/CPV/fromscra.f90 b/CPV/fromscra.f90 index 7f61cea73..4b7be3bb8 100644 --- a/CPV/fromscra.f90 +++ b/CPV/fromscra.f90 @@ -25,7 +25,7 @@ MODULE from_scratch_module CONTAINS ! !-------------------------------------------------------------------------- - SUBROUTINE from_scratch_fpmd( rhoe, desc, cm, c0, cp, ce, cdesc, edesc, & + SUBROUTINE from_scratch_fpmd( rhoe, cm, c0, cp, ce, cdesc, edesc, & eigr, ei1, ei2, ei3, sfac, fi, ht, atoms, & bec, becdr, vpot, edft ) !------------------------------------------------------------------------ @@ -47,7 +47,6 @@ MODULE from_scratch_module USE orthogonalize, ONLY : ortho USE control_flags, ONLY : tcarpar, tfor, thdyn, tortho, tpre, tranp, & force_pairing, iprsta, tprnfor, amprp, tsde - USE charge_types, ONLY : charge_descriptor USE time_step, ONLY : delt USE runcp_module, ONLY : runcp_ncpp use grid_dimensions, only : nr1, nr2, nr3 @@ -69,15 +68,14 @@ MODULE from_scratch_module COMPLEX(DP), INTENT(OUT) :: ei2(:,:) COMPLEX(DP), INTENT(OUT) :: ei3(:,:) COMPLEX(DP), INTENT(OUT) :: sfac(:,:) - REAL(DP), INTENT(OUT) :: rhoe(:,:,:,:) + REAL(DP), INTENT(OUT) :: rhoe(:,:) REAL(DP), INTENT(OUT) :: bec(:,:) REAL(DP), INTENT(OUT) :: becdr(:,:,:) REAL(DP), INTENT(OUT) :: fi(:,:,:) - REAL(DP), INTENT(OUT) :: vpot(:,:,:,:) + REAL(DP), INTENT(OUT) :: vpot(:,:) TYPE(atoms_type) , INTENT(OUT) :: atoms TYPE(dft_energy_type) , INTENT(OUT) :: edft TYPE(boxdimensions) , INTENT(INOUT) :: ht - TYPE(charge_descriptor), INTENT(IN) :: desc TYPE(wave_descriptor), INTENT(IN) :: cdesc, edesc COMPLEX(DP), INTENT(INOUT) :: cm(:,:,:,:), c0(:,:,:,:) COMPLEX(DP), INTENT(INOUT) :: cp(:,:,:,:), ce(:,:,:,:) @@ -164,9 +162,9 @@ MODULE from_scratch_module ! edft%enl = nlrh_m( cm, cdesc, ttforce, atoms, fi, bec, becdr, eigr ) ! - CALL rhoofr( 0, cm, cdesc, fi, rhoe, desc, ht ) + CALL rhoofr( 0, cm, cdesc, fi, rhoe, ht ) ! - CALL vofrhos( ttprint, ttforce, tstress, rhoe, desc, atoms, & + CALL vofrhos( ttprint, ttforce, tstress, rhoe, atoms, & vpot, bec, cm, cdesc, fi, eigr, ei1, ei2, ei3, & sfac, timepre, ht, edft ) ! @@ -255,6 +253,8 @@ MODULE from_scratch_module USE runcp_module, ONLY : runcp_uspp USE electrons_base, ONLY : f, nspin USE phase_factors_module, ONLY : strucf + USE orthogonalize, ONLY : ortho + USE orthogonalize_base, ONLY : updatc, calphi ! IMPLICIT NONE ! @@ -394,11 +394,10 @@ MODULE from_scratch_module ! calphi calculates phi ! the electron mass rises with g**2 ! - CALL calphi( cm, ngw, ema0bg, bec, nkb, vkb, phi, nbsp ) - + CALL calphi( cm, ngw, bec, nkb, vkb, phi, nbsp, ema0bg ) if( tortho ) then - CALL ortho( eigr, c0, phi, lambda, bigr, iter, ccc, ortho_eps, ortho_max, delt, bephi, becp ) + CALL ortho( eigr, c0(:,:,1,1), phi(:,:,1,1), lambda, bigr, iter, ccc, bephi, becp ) else CALL gram( vkb, bec, nkb, c0, ngw, nbsp ) endif @@ -410,8 +409,11 @@ MODULE from_scratch_module if ( tpre ) CALL nlfh( bec, dbec, lambda ) ! - if ( tortho ) CALL updatc( ccc, lambda, phi, bephi, becp, bec, c0 ) + if ( tortho ) CALL updatc( ccc, nbsp, lambda, SIZE(lambda,1), phi, SIZE(phi,1), & + bephi, SIZE(bephi,1), becp, bec, c0 ) + ! CALL calbec ( nvb+1, nsp, eigr, c0, bec ) + if ( tpre ) CALL caldbec( ngw, nkb, nbsp, 1, nsp, eigr, cm, dbec, .true. ) if(iprsta.ge.3) CALL dotcsc(eigr,c0) diff --git a/CPV/grid.f90 b/CPV/grid.f90 index 5090653fd..388633ebb 100644 --- a/CPV/grid.f90 +++ b/CPV/grid.f90 @@ -82,34 +82,26 @@ RETURN END SUBROUTINE free_blacs_grid - SUBROUTINE get_blacs_grid(grid, rows, columns, debug) + + + SUBROUTINE get_blacs_grid(grid, debug) + TYPE (processors_grid), INTENT(OUT) :: grid - INTEGER, INTENT(IN), OPTIONAL :: rows - INTEGER, INTENT(IN), OPTIONAL :: columns INTEGER, INTENT(IN), OPTIONAL :: debug INTEGER :: iam, nproc , nprow, npcol, context, myrow, mycol + INTEGER :: ndims, dims(2), coor(2) + LOGICAL :: periods(2), reorder + INTEGER :: comm_cart + INTEGER :: ierr + #if defined __SCALAPACK CALL BLACS_PINFO( iam, nproc ) #else - nproc = -1 + ndims = 2 + #endif - IF(.NOT.PRESENT(rows) .AND. .NOT.PRESENT(columns) ) THEN - CALL calculate_grid_dims(nproc , nprow, npcol) - ELSE IF (PRESENT(rows) .AND. .NOT.PRESENT(columns) ) THEN - !IF( rows .GT. nproc ) THEN - !END IF - nprow = rows; npcol = nproc / rows - ELSE IF (.NOT.PRESENT(rows) .AND. PRESENT(columns) ) THEN - !IF( columns .GT. nproc ) THEN - !END IF - npcol = columns; nprow = nproc / columns - ELSE - !IF( rows * columns .GT. nproc ) THEN - !END IF - nprow = rows; npcol = columns - END IF #if defined __SCALAPACK CALL BLACS_GET( -1, 0, context ) diff --git a/CPV/guess.f90 b/CPV/guess.f90 index 8f9db2e07..989eae885 100644 --- a/CPV/guess.f90 +++ b/CPV/guess.f90 @@ -6,36 +6,12 @@ ! or http://www.gnu.org/copyleft/gpl.txt . ! #include "f_defs.h" -#if defined __T3E -# define daxpy saxpy -# define zaxpy caxpy -#endif -! AB INITIO COSTANT PRESSURE MOLECULAR DYNAMICS -! ---------------------------------------------- -! Car-Parrinello Parallel Program -! Carlo Cavazzoni - Gerardo Ballabio -! SISSA, Trieste, Italy - 1997-99 -! Last modified: Sun Nov 21 11:19:43 MET 1999 -! ---------------------------------------------- -! BEGIN manual - - MODULE guess - -! (describe briefly what this module does...) -! ---------------------------------------------- -! routines in this module: -! SUBROUTINE guess_setup(tguess_inp) -! SUBROUTINE guessc0(tk,c0,cm) -! SUBROUTINE guessrho(rho,cm,c0,occ,ht) -! SUBROUTINE ucalc_kp(a,b,ngw,gzero,n,lambda) -! SUBROUTINE ucalc(a,b,ngw,gzero,n,lambda) -! ---------------------------------------------- -! END manual +MODULE guess ! ... declare modules USE kinds - USE parallel_toolkit, ONLY: matmulp, cmatmulp, & + USE parallel_toolkit, ONLY: rep_matmul_drv, & diagonalize, cdiagonalize IMPLICIT NONE @@ -43,7 +19,7 @@ PRIVATE - REAL(DP), ALLOCATABLE :: rho_save( :, :, :, : ) + REAL(DP), ALLOCATABLE :: rho_save( :, : ) ! ... declare module-scope variables LOGICAL :: tguess @@ -140,7 +116,8 @@ DO ik = 1, nk CALL ucalc_kp(cm(:,:,ik,1),c0(:,:,ik,1),ngw,cdesc%gzero,n,cuu) - CALL cmatmulp('N','C',cuu,cuu,ca,n) + ! CALL cmatmulp('N','C',cuu,cuu,ca,n) + CALL errore(' guess ', ' complex matrix mult to be implemented ', 1 ) CALL cdiagonalize(1,ca,costemp,cap,n,nproc,mpime) DO j=1,n DO i=1,n @@ -150,7 +127,8 @@ DO i=1,n costh2(i)=1.0d0/sqrt(costemp(n-i+1)) END DO - CALL cmatmulp('N','N',cuu,ca,cap,n) + !CALL cmatmulp('N','N',cuu,ca,cap,n) + CALL errore(' guess ', ' complex matrix mult to be implemented ', 1 ) DO j=1,n DO i=1,n cap(i,j)=cap(i,j) * costh2(i) @@ -186,7 +164,7 @@ ALLOCATE(crot(ngw,n)) CALL ucalc(cm(:,:,1,1),c0(:,:,1,1),ngw,cdesc%gzero,n,uu) - CALL matmulp('T','N',uu,uu,a,n) + CALL rep_matmul_drv('T','N',n,n,n,1.0d0,uu,n,uu,n,0.0d0,a,n,group) CALL diagonalize(1,a,costemp,ap,n,nproc,mpime) DO j=1,n DO i=1,n @@ -196,7 +174,7 @@ DO i=1,n costh2(i)=1.0d0/sqrt(costemp(n-i+1)) END DO - CALL matmulp('N','N',uu,a,ap,n) + CALL rep_matmul_drv('N','N',n,n,n,1.0d0,uu,n,a,n,0.0d0,ap,n,group) DO j=1,n DO i=1,n ap(i,j)=ap(i,j) * costh2(i) @@ -245,7 +223,7 @@ ! ---------------------------------------------- ! ---------------------------------------------- - SUBROUTINE guessrho(rho, desc, cm, c0, cdesc, occ, ht ) + SUBROUTINE guessrho(rho, cm, c0, cdesc, occ, ht ) ! (describe briefly what this routine does...) ! ---------------------------------------------- @@ -255,38 +233,33 @@ use brillouin, only: kpoints, kp USE wave_types USE parameters, ONLY: nspinx - USE charge_types, ONLY: charge_descriptor ! ... declare subroutine argument - REAL(DP), INTENT(OUT) :: rho(:,:,:,:) - TYPE (charge_descriptor), INTENT(IN) :: desc + REAL(DP), INTENT(OUT) :: rho(:,:) COMPLEX(DP), INTENT(IN) :: c0(:,:,:,:), cm(:,:,:,:) TYPE (wave_descriptor), INTENT(IN) :: cdesc TYPE (boxdimensions), INTENT(IN) :: ht REAL(DP), INTENT(IN) :: occ(:,:,:) ! ... declare other variables - REAL(DP), ALLOCATABLE :: rho0( :, :, :, : ) + REAL(DP), ALLOCATABLE :: rho0( :, : ) LOGICAL, SAVE :: tfirst = .TRUE. - INTEGER :: ispin, nspin, nx, ny, nz + INTEGER :: ispin, nspin ! ... end of declarations ! ---------------------------------------------- - nx = SIZE( rho, 1 ) - ny = SIZE( rho, 2 ) - nz = SIZE( rho, 3 ) - nspin = SIZE( rho, 4 ) + nspin = SIZE( rho, 2 ) IF( tfirst ) THEN - ALLOCATE( rho_save( nx, ny, nz, nspin ) ) - CALL rhoofr( 1, cm, cdesc, occ, rho_save, desc, ht) + ALLOCATE( rho_save( SIZE( rho, 1 ), nspin ) ) + CALL rhoofr( 1, cm, cdesc, occ, rho_save, ht) tfirst = .FALSE. END IF - ALLOCATE( rho0( nx, ny, nz, nspin ) ) - CALL rhoofr( 1, c0, cdesc, occ, rho0, desc, ht) + ALLOCATE( rho0( SIZE( rho, 1 ), nspin ) ) + CALL rhoofr( 1, c0, cdesc, occ, rho0, ht) rho = 2.0d0 * rho0 - rho_save @@ -389,5 +362,5 @@ ! ---------------------------------------------- ! ---------------------------------------------- - END MODULE guess +END MODULE guess diff --git a/CPV/init_run.f90 b/CPV/init_run.f90 index 91be28c8a..9c0e79322 100644 --- a/CPV/init_run.f90 +++ b/CPV/init_run.f90 @@ -51,8 +51,8 @@ SUBROUTINE init_run() becdr, sfac, eigr, ei1, ei2, ei3, taub, & irb, eigrb, rhog, rhos, rhor, bephi, & becp, acc, acc_this_run, wfill, wempt, & - edft, nfi, atoms0, vpot, occn, desc, & - rhoe, atomsm, ht0, htm + edft, nfi, atoms0, vpot, occn, & + atomsm, ht0, htm USE cp_main_variables, ONLY : allocate_mainvar USE energies, ONLY : eself, enl, ekin, etot, enthal, ekincm USE stre, ONLY : stress @@ -257,7 +257,7 @@ SUBROUTINE init_run() ! ELSE IF ( program_name == 'FPMD' ) THEN ! - CALL from_scratch( rhoe, desc, cm, c0, cp, ce, wfill, wempt, eigr, & + CALL from_scratch( rhor, cm, c0, cp, ce, wfill, wempt, eigr, & ei1, ei2, ei3, sfac, occn, ht0, atoms0, bec, & becdr, vpot, edft ) ! @@ -280,7 +280,7 @@ SUBROUTINE init_run() ELSE IF( program_name == 'FPMD' ) THEN ! CALL readfile( nfi, tps, c0, cm, wfill, occn, atoms0, atomsm, acc, & - taui, cdmi, htm, ht0, rhoe, desc, vpot) + taui, cdmi, htm, ht0, rhor, vpot) ! END IF ! @@ -294,7 +294,7 @@ SUBROUTINE init_run() ! ELSE IF( program_name == 'FPMD' ) THEN ! - CALL from_restart( nfi, acc, rhoe, desc, cm, c0, wfill, eigr, ei1, ei2, & + CALL from_restart( nfi, acc, rhor, cm, c0, wfill, eigr, ei1, ei2, & ei3, sfac, occn, htm, ht0, atomsm, atoms0, bec, & becdr, vpot, edft) ! diff --git a/CPV/inner_loop.f90 b/CPV/inner_loop.f90 index 822a46c62..be02e4ba6 100644 --- a/CPV/inner_loop.f90 +++ b/CPV/inner_loop.f90 @@ -24,7 +24,7 @@ use energies, only: eht, epseu, exc, etot, eself, enl, ekin, & & atot, entropy, egrand use electrons_base, only: f, nspin, nel, iupdwn, nupdwn, nudx, nelt, & - nx => nbspx, n => nbsp, ispin => fspin + nx => nbspx, n => nbsp, ispin use ensemble_dft, only: tens, tgrand, ninner, ismear, etemp, ef, & & tdynz, tdynf, zmass, fmass, fricz, fricf, z0, c0diag, & diff --git a/CPV/input.f90 b/CPV/input.f90 index 2d06ca37b..c7d03d67c 100644 --- a/CPV/input.f90 +++ b/CPV/input.f90 @@ -320,8 +320,8 @@ MODULE input ! gvectors and charge density, in reciprocal space. ! trhor_ = ( TRIM( calculation ) == 'nscf' ) - trhow_ = ( TRIM( disk_io ) == 'high' ) - tvlocw_ = .FALSE. + trhow_ = ( TRIM( disk_io ) == 'high' ) ! charge density now written to XML file + tvlocw_ = ( TRIM( disk_io ) == 'high' ) ! warning this is not working ! SELECT CASE( TRIM( verbosity ) ) CASE( 'minimal' ) @@ -1354,9 +1354,9 @@ MODULE input 590 FORMAT( 3X,'Electron temperature control via nose thermostat') ! 700 FORMAT( /,3X, 'Verbosity: iprsta = ',i2,/) -720 FORMAT( 3X, 'charge density is read from unit 47') -721 FORMAT( 3X, 'charge density is written in unit 47') -722 FORMAT( 3X, 'local potential is written in unit 46') +720 FORMAT( 3X, 'charge density is read from file') +721 FORMAT( 3X, 'warning trhow has no effect rho is now written to XML save file') +722 FORMAT( 3X, 'warning tvlocw has no effect vloc is not written to file') ! END SUBROUTINE modules_info ! diff --git a/CPV/ksstates.f90 b/CPV/ksstates.f90 index 136ee6346..fd8420573 100644 --- a/CPV/ksstates.f90 +++ b/CPV/ksstates.f90 @@ -238,7 +238,7 @@ TYPE (wave_descriptor), INTENT(IN) :: wfill, wempt COMPLEX(DP) :: eigr(:,:) REAL(DP), INTENT(IN) :: occ(:,:,:), bec(:,:) - REAL (DP) :: vpot(:,:,:,:) + REAL (DP) :: vpot(:,:) ! ... declare other variables INTEGER :: i, ik, ib, nk, ig, ngw, nb_g, nb_l, ispin, nspin, iks @@ -279,7 +279,7 @@ ALLOCATE( eforce( ngw, nb_l, nk )) CALL dforce_all( ispin, cf(:,:,1,ispin_wfc), wfill, occ(:,1,ispin), eforce(:,:,1), & - vpot(:,:,:,ispin), eigr, bec ) + vpot(:,ispin), eigr, bec ) CALL kohn_sham( ispin, cf(:,:,:,ispin_wfc), wfill, eforce ) @@ -302,7 +302,7 @@ ALLOCATE( eforce( ngw, nb_l, nk )) CALL dforce_all( ispin, ce(:,:,1,ispin), wempt, fi(:,1), eforce(:,:,1), & - vpot(:,:,:,ispin), eigr, bec ) + vpot(:,ispin), eigr, bec ) CALL kohn_sham( ispin, ce(:,:,:,ispin), wempt, eforce ) @@ -400,7 +400,7 @@ TYPE (wave_descriptor), INTENT(IN) :: wfill, wempt COMPLEX(DP) :: eigr(:,:) REAL(DP), INTENT(IN) :: occ(:,:,:), bec(:,:) - REAL (DP) :: vpot(:,:,:,:) + REAL (DP) :: vpot(:,:) ! ... declare other variables INTEGER :: i, ik, ib, nk, ig, ngw, nb_g, nb_l, iks, nb, ispin @@ -440,9 +440,9 @@ ALLOCATE( eforce( ngw, nb, 1, 2 ) ) CALL dforce_all( 1, cf(:,:,1,1), wfill, occ(:,1,1), eforce(:,:,1,1), & - vpot(:,:,:,1), eigr, bec ) + vpot(:,1), eigr, bec ) CALL dforce_all( 2, cf(:,:,1,1), wfill, occ(:,1,2), eforce(:,:,1,2), & - vpot(:,:,:,2), eigr, bec ) + vpot(:,2), eigr, bec ) DO i = 1, nupdwn(2) eforce(:,i,1,1) = occ(i,1,1) * eforce(:,i,1,1) + occ(i,1,2) * eforce(:,i,1,2) @@ -471,12 +471,12 @@ ALLOCATE( eforce( ngw, nb_l, 1, 1 )) - CALL dforce_all( 1, ce(:,:,1,1), wempt, fi(:,1), eforce(:,:,1,1), vpot(:,:,:,1), & + CALL dforce_all( 1, ce(:,:,1,1), wempt, fi(:,1), eforce(:,:,1,1), vpot(:,1), & eigr, bec ) CALL kohn_sham( 1, ce(:,:,:,1), wempt, eforce(:,:,:,1) ) - CALL dforce_all( 2, ce(:,:,1,2), wempt, fi(:,1), eforce(:,:,1,1), vpot(:,:,:,2), & + CALL dforce_all( 2, ce(:,:,1,2), wempt, fi(:,1), eforce(:,:,1,1), vpot(:,2), & eigr, bec ) CALL kohn_sham( 2, ce(:,:,:,2), wempt, eforce(:,:,:,1) ) @@ -555,6 +555,7 @@ USE io_global, ONLY: ionode, ionode_id USE io_global, ONLY: stdout USE fft_base, ONLY: dfftp + USE grid_dimensions, ONLY: nr1, nr2, nr3, nr1x, nr2x, nr3x, nnrx IMPLICIT NONE @@ -562,27 +563,13 @@ CHARACTER(LEN=*) :: file_name COMPLEX(DP), ALLOCATABLE :: zcomp(:) REAL(DP), ALLOCATABLE :: rcomp2(:) - COMPLEX(DP), ALLOCATABLE :: psi2(:,:,:) - INTEGER :: nr1_l, nr2_l, nr3_l, nr1_g, nr2_g, nr3_g + COMPLEX(DP), ALLOCATABLE :: psi2(:) INTEGER :: i, j, k, istr, izl REAL(DP) :: charge LOGICAL :: top - nr1_g = dfftp%nr1 - nr2_g = dfftp%nr2 - nr3_g = dfftp%nr3 - - nr1_l = dfftp%nr1x - nr2_l = dfftp%nr2x - nr3_l = dfftp%npl - - izl = 1 - DO i = 1, mpime - izl = izl + dfftp%npp( i ) - END DO - - ALLOCATE( zcomp( nr3_g ), rcomp2( nr3_g ) ) - ALLOCATE( psi2( nr1_l, nr2_l, nr3_l ) ) + ALLOCATE( zcomp( nr3 ), rcomp2( nr3 ) ) + ALLOCATE( psi2( nnrx ) ) CALL pw_invfft( psi2, psi, psi ) @@ -597,25 +584,39 @@ END IF charge = 0.0d0 - DO i = 1, nr1_g - DO j = 1, nr2_g + + izl = dfftp%ipp( mpime + 1 ) + + DO i = 1, nr1 + + DO j = 1, nr2 + zcomp = 0.0d0 - - zcomp( izl : ( izl + nr3_l - 1 ) ) = psi2( i, j, 1 : nr3_l ) - CALL mp_sum( zcomp(1:nr3_g) ) + + istr = i + nr1 * ( j - 1 ) + + DO k = 1, dfftp%npl + zcomp( izl + k ) = psi2( istr + nr1 * nr2 * ( k - 1 ) ) + END DO + + CALL mp_sum( zcomp( 1 : nr3 ) ) + IF ( ionode ) THEN rcomp2 = DBLE(zcomp)**2 - WRITE(ksunit, fmt='(F10.5)') ( rcomp2(k), k=1, nr3_g ) + WRITE(ksunit, fmt='(F10.5)') ( rcomp2(k), k=1, nr3 ) charge = charge + SUM(rcomp2) END IF + CALL mp_barrier() + END DO + END DO IF ( ionode ) THEN CLOSE(ksunit) WRITE( stdout,'(3X,A15," integrated charge : ",F14.5)') & - & file_name(1:istr), charge / DBLE(nr1_g*nr2_g*nr3_g) + & file_name(1:istr), charge / DBLE(nr1*nr2*nr3) END IF DEALLOCATE(zcomp, rcomp2, psi2) ! ... diff --git a/CPV/main.f90 b/CPV/main.f90 index 1c6fc617b..9b3d2dd06 100644 --- a/CPV/main.f90 +++ b/CPV/main.f90 @@ -187,7 +187,7 @@ USE io_files , ONLY: outdir, prefix USE printout_base , ONLY: printout_base_init USE cp_main_variables, ONLY : atoms0, atomsp, atomsm, ei1, ei2, ei3, eigr, sfac, & - ht0, htm, htp, rhoe, vpot, desc, wfill, wempt, & + ht0, htm, htp, rhor, vpot, wfill, wempt, & acc, acc_this_run, occn, edft, nfi, bec, becdr USE cg_module, ONLY : tcg IMPLICIT NONE @@ -344,7 +344,7 @@ ! ! ... perform DIIS minimization on electronic states ! - CALL runsdiis(ttprint, rhoe, desc, atoms0, bec, becdr, & + CALL runsdiis(ttprint, rhor, atoms0, bec, becdr, & eigr, ei1, ei2, ei3, sfac, c0, cm, cp, wfill, thdyn, ht0, occn, ei, & vpot, doions, edft ) ! @@ -354,7 +354,7 @@ ! IF( nspin > 1 ) CALL errore(' cpmain ',' lsd+diis not allowed ',0) ! - CALL rundiis(ttprint, rhoe, desc, atoms0, bec, becdr, & + CALL rundiis(ttprint, rhor, atoms0, bec, becdr, & eigr, ei1, ei2, ei3, sfac, c0, cm, cp, wfill, thdyn, ht0, occn, ei, & vpot, doions, edft ) ! @@ -362,7 +362,7 @@ ! ! ... on entry c0 should contain the wavefunctions to be optimized ! - CALL runcg(tortho, ttprint, rhoe, desc, atoms0, bec, becdr, & + CALL runcg(tortho, ttprint, rhor, atoms0, bec, becdr, & eigr, ei1, ei2, ei3, sfac, c0, cm, cp, wfill, thdyn, ht0, occn, ei, & vpot, doions, edft, ekin_maxiter, etot_conv_thr, tconv_cg ) ! @@ -371,13 +371,13 @@ ! ELSE IF ( tsteepdesc ) THEN ! - CALL runsd(tortho, ttprint, ttforce, rhoe, desc, atoms0, bec, becdr, & + CALL runsd(tortho, ttprint, ttforce, rhor, atoms0, bec, becdr, & eigr, ei1, ei2, ei3, sfac, c0, cm, cp, wfill, thdyn, ht0, occn, ei, & vpot, doions, edft, ekin_maxiter, ekin_conv_thr ) ! ELSE IF ( tconjgrad_ion%active ) THEN ! - CALL runcg_ion(nfi, tortho, ttprint, rhoe, desc, atomsp, atoms0, & + CALL runcg_ion(nfi, tortho, ttprint, rhor, atomsp, atoms0, & atomsm, bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cm, cp, wfill, thdyn, ht0, occn, ei, & vpot, doions, edft, tconvthrs%derho, tconvthrs%force, tconjgrad_ion%nstepix, & tconvthrs%ekin, tconjgrad_ion%nstepex ) @@ -406,9 +406,9 @@ s5 = cclock() timernl = s5 - s4 - ! ... compute the new charge density "rhoe" + ! ... compute the new charge density "rhor" ! - CALL rhoofr( nfi, c0, wfill, occn, rhoe, desc, ht0) + CALL rhoofr( nfi, c0, wfill, occn, rhor, ht0) IF(memchk) CALL memstat(6) @@ -418,7 +418,7 @@ ! ... vofrhos compute the new DFT potential "vpot", and energies "edft", ! ... ionc forces "fion" and stress "pail". ! - CALL vofrhos(ttprint, ttforce, tstress, rhoe, desc, atoms0, & + CALL vofrhos(ttprint, ttforce, tstress, rhor, atoms0, & vpot, bec, c0, wfill, occn, eigr, ei1, ei2, ei3, sfac, timepre, ht0, edft) ! CALL debug_energies( edft ) ! DEBUG @@ -578,7 +578,7 @@ IF( self_interaction /= 0 ) THEN IF ( nat_localisation > 0 .AND. ttprint ) THEN - CALL localisation( cp( : , nupdwn(1), 1, 1 ), atoms0, ht0, desc) + CALL localisation( cp( : , nupdwn(1), 1, 1 ), atoms0, ht0) END IF END IF @@ -700,7 +700,7 @@ ! IF( ttsave .OR. ttexit ) THEN CALL writefile( nfi, tps, c0, cm, wfill, occn, atoms0, atomsm, acc, & - taui, cdmi, htm, ht0, rhoe, desc, vpot ) + taui, cdmi, htm, ht0, rhor, vpot ) END IF IF( ttexit .AND. .NOT. ttprint ) THEN @@ -746,12 +746,12 @@ END IF IF(tprnsfac) THEN - CALL print_sfac(rhoe, desc, sfac) + CALL print_sfac(rhor, sfac) END IF ! ... report statistics - CALL printacc(nfi, rhoe, desc, atomsm, htm, nstep_this_run, acc, acc_this_run) + CALL printacc(nfi, nstep_this_run, acc, acc_this_run) CALL mp_report_buffers() CALL mp_report() diff --git a/CPV/mainvar.f90 b/CPV/mainvar.f90 index 04c62c1b9..f2bd6c6b2 100644 --- a/CPV/mainvar.f90 +++ b/CPV/mainvar.f90 @@ -18,7 +18,6 @@ MODULE cp_main_variables USE metagga, ONLY : kedtaur, kedtaus, kedtaug USE atoms_type_module, ONLY : atoms_type USE cell_base, ONLY : boxdimensions - USE charge_types, ONLY : charge_descriptor, charge_descriptor_init USE wave_types, ONLY : wave_descriptor, wave_descriptor_init USE energies, ONLY : dft_energy_type ! @@ -80,19 +79,17 @@ MODULE cp_main_variables ! ! charge densities and potentials ! - REAL(DP), ALLOCATABLE :: rhoe(:,:,:,:) ! charge density in real space - REAL(DP), ALLOCATABLE :: vpot(:,:,:,:) - TYPE (charge_descriptor) :: desc ! charge density descriptor - ! ! rhog = charge density in g space ! rhor = charge density in r space (dense grid) ! rhos = charge density in r space (smooth grid) ! rhopr since rhor is overwritten in vofrho, ! this array is used to save rhor for restart file + ! vpot = potential in r space (dense grid) ! COMPLEX(DP), ALLOCATABLE :: rhog(:,:) REAL(DP), ALLOCATABLE :: rhor(:,:), rhos(:,:) REAL(DP), ALLOCATABLE :: rhopr(:,:) + REAL(DP), ALLOCATABLE :: vpot(:,:) ! TYPE (wave_descriptor) :: wfill, wempt ! wave function descriptor ! for filled and empty states @@ -162,10 +159,11 @@ MODULE cp_main_variables ! ALLOCATE( ema0bg( ngw ) ) ! + ALLOCATE( rhor( nnr, nspin ) ) + ! IF( program_name == 'CP90' ) THEN ! ALLOCATE( rhopr( nnr, nspin ) ) - ALLOCATE( rhor( nnr, nspin ) ) ALLOCATE( rhos( nnrsx, nspin ) ) ALLOCATE( rhog( ng, nspin ) ) ! @@ -179,12 +177,7 @@ MODULE cp_main_variables ! ELSE IF( program_name == 'FPMD' ) THEN ! - ALLOCATE( rhoe( nr1x, nr2x, npl, nspin ) ) - ! - CALL charge_descriptor_init( desc, nr1, nr2, nr3, & - nr1, nr2, npl, nr1x, nr2x, npl, nspin ) - ! - ALLOCATE( vpot( nr1x, nr2x, npl, nspin ) ) + ALLOCATE( vpot( nnr, nspin ) ) ! END IF ! @@ -241,7 +234,7 @@ MODULE cp_main_variables IF( ALLOCATED( kedtaur ) ) DEALLOCATE( kedtaur ) IF( ALLOCATED( kedtaus ) ) DEALLOCATE( kedtaus ) IF( ALLOCATED( kedtaug ) ) DEALLOCATE( kedtaug ) - IF( ALLOCATED( rhoe ) ) DEALLOCATE( rhoe ) + ! IF( ALLOCATED( rhoe ) ) DEALLOCATE( rhoe ) IF( ALLOCATED( vpot ) ) DEALLOCATE( vpot ) IF( ALLOCATED( occn ) ) DEALLOCATE( occn ) ! diff --git a/CPV/move_electrons.f90 b/CPV/move_electrons.f90 index 5f7977e00..79183cb3f 100644 --- a/CPV/move_electrons.f90 +++ b/CPV/move_electrons.f90 @@ -41,7 +41,7 @@ SUBROUTINE move_electrons( nfi, tfirst, tlast, b1, b2, b3, fion, & USE runcp_module, ONLY : runcp_uspp USE wave_constrains, ONLY : interpolate_lambda USE gvecw, ONLY : ngw - ! USE para_mod, ONLY : + USE orthogonalize_base, ONLY : calphi ! IMPLICIT NONE ! @@ -143,7 +143,7 @@ SUBROUTINE move_electrons( nfi, tfirst, tlast, b1, b2, b3, fion, & ! ... calphi calculates phi ! ... the electron mass rises with g**2 ! - CALL calphi( c0, ngw, ema0bg, bec, nkb, vkb, phi, nbsp ) + CALL calphi( c0, ngw, bec, nkb, vkb, phi, nbsp, ema0bg ) ! ! ... begin try and error loop (only one step!) ! diff --git a/CPV/nl_base.f90 b/CPV/nl_base.f90 index 956c93593..fec724c5b 100644 --- a/CPV/nl_base.f90 +++ b/CPV/nl_base.f90 @@ -337,7 +337,7 @@ use cvan, only : ish use uspp_param, only : nhm, nh use uspp, only : nkb, dvan - use electrons_base, only : n => nbsp, nspin, ispin => fspin, f + use electrons_base, only : n => nbsp, nspin, ispin, f use ions_base, only : nsp, nat, na ! implicit none @@ -400,7 +400,7 @@ use uspp_param, only : nhm, nh use cvan, only : ish, nvb use ions_base, only : nax, nat, nsp, na - use electrons_base, only : n => nbsp, ispin => fspin, f + use electrons_base, only : n => nbsp, ispin, f use gvecw, only : ngw ! implicit none @@ -641,7 +641,7 @@ subroutine dennl( bec, denl ) use cdvan, ONLY : drhovan, dbec use ions_base, only : nsp, na ! - use electrons_base, only : n => nbsp, ispin => fspin, f, nspin + use electrons_base, only : n => nbsp, ispin, f, nspin use reciprocal_vectors, only : gstart implicit none @@ -710,7 +710,7 @@ subroutine nlfq( c, eigr, bec, becdr, fion ) use uspp_param, only : nhm, nh use cvan, only : ish, nvb use ions_base, only : nax, nat, nsp, na - use electrons_base, only : n => nbsp, ispin => fspin, f + use electrons_base, only : n => nbsp, ispin, f use gvecw, only : ngw use constants, only : pi, fpi ! diff --git a/CPV/nlcc.f90 b/CPV/nlcc.f90 index 228cf99b4..19e7c5bbf 100644 --- a/CPV/nlcc.f90 +++ b/CPV/nlcc.f90 @@ -150,7 +150,7 @@ integer :: nsp COMPLEX(DP) :: rhoetg(:) - REAL(DP) :: rhoetr(:,:,:) + REAL(DP) :: rhoetr(:) REAL(DP) :: rhoc(:,:) COMPLEX(DP), INTENT(IN) :: sfac(:,:) diff --git a/CPV/optical.f90 b/CPV/optical.f90 index 0c34cc060..e9f202aa3 100644 --- a/CPV/optical.f90 +++ b/CPV/optical.f90 @@ -90,7 +90,7 @@ COMPLEX(DP), INTENT(INOUT) :: ce(:,:,:,:) TYPE(wave_descriptor), INTENT(IN) :: wempt, wfill REAL(DP), INTENT(IN) :: occ(:,:,:) - REAL (DP), INTENT(in) :: vpot(:,:,:,:) + REAL (DP), INTENT(in) :: vpot(:,:) REAL (DP) :: bec(:,:) COMPLEX(DP) :: eigr(:,:) @@ -139,7 +139,7 @@ CALL nlsm1 ( nb_l, 1, nspnl, eigr, cf(1,1,1,ispin), bec ) CALL dforce_all( ispin, cf(:,:,1,ispin), wfill, occ(:,1,ispin), eforce(:,:,1), & - vpot(:,:,:,ispin), eigr, bec ) + vpot(:,ispin), eigr, bec ) CALL kohn_sham( ispin, cf(:,:,:,ispin), wfill, eforce ) @@ -156,7 +156,7 @@ CALL nlsm1 ( nb_l, 1, nspnl, eigr, ce(1,1,1,ispin), bece ) ! CALL dforce_all( ispin, ce(:,:,1,ispin), wempt, ff( :, 1), eforce(:,:,1), & - vpot(:,:,:,ispin), eigr, bece ) + vpot(:,ispin), eigr, bece ) ! CALL kohn_sham( ispin, ce(:,:,:,ispin), wempt, eforce ) diff --git a/CPV/ortho.f90 b/CPV/ortho.f90 index 9eb967e96..a4f02447f 100644 --- a/CPV/ortho.f90 +++ b/CPV/ortho.f90 @@ -8,15 +8,10 @@ #include "f_defs.h" !=----------------------------------------------------------------------------=! - MODULE orthogonalize + MODULE orthogonalize !=----------------------------------------------------------------------------=! - USE kinds - USE io_global, ONLY: ionode - USE parallel_toolkit, ONLY: matmulp, cmatmulp, & - pdspev_drv, dspev_drv, pzhpev_drv, zhpev_drv - USE orthogonalize_base, ONLY: sqr_matmul, diagonalize_rho, & - BACKRHOSET, SIGRHOSET, BACKRHOSET2, SIGRHOSET2, ortho_iterate + USE kinds, ONLY: DP IMPLICIT NONE @@ -24,66 +19,90 @@ PRIVATE - INTEGER :: ortho_tune = 16 - - REAL(DP) :: one, zero, two, mone, mtwo - PARAMETER ( one = 1.0d0, zero = 0.0d0, two = 2.0d0, mone = -1.0d0 ) - PARAMETER ( mtwo = -2.0d0 ) - COMPLEX(DP) :: cone, czero, mcone - PARAMETER ( cone = (1.0d0, 0.0d0), czero = (0.0d0, 0.0d0) ) - PARAMETER ( mcone = (-1.0d0, 0.0d0) ) - REAL(DP) :: small = 1.0d-14 - INTERFACE ortho - MODULE PROCEDURE ortho_s, ortho_v, ortho_m + MODULE PROCEDURE ortho_s, ortho_v, ortho_m, ortho_cp END INTERFACE PUBLIC :: ortho !=----------------------------------------------------------------------------=! - CONTAINS + CONTAINS !=----------------------------------------------------------------------------=! - SUBROUTINE ortho_s( ispin, c0, cp, cdesc, pmss, emass, success) + SUBROUTINE ortho_s( ispin, c0, cp, cdesc, pmss, emass, success ) - USE control_flags, ONLY: ortho_eps, ortho_max - USE wave_types, ONLY: wave_descriptor - USE mp_global, ONLY: nproc + USE control_flags, ONLY: ortho_eps, ortho_max + USE wave_types, ONLY: wave_descriptor + USE mp_global, ONLY: nproc + USE orthogonalize_base, ONLY: updatc, calphi - COMPLEX(DP), INTENT(INOUT) :: c0(:,:), cp(:,:) - TYPE (wave_descriptor), INTENT(IN) :: cdesc - REAL(DP) :: pmss(:), emass - LOGICAL, INTENT(OUT), OPTIONAL :: success - INTEGER, INTENT(IN) :: ispin - INTEGER :: iter + COMPLEX(DP), INTENT(INOUT) :: c0(:,:), cp(:,:) + TYPE (wave_descriptor), INTENT(IN) :: cdesc + REAL(DP) :: pmss(:), emass + LOGICAL, INTENT(OUT), OPTIONAL :: success + INTEGER, INTENT(IN) :: ispin + ! + INTEGER :: iter + REAL(DP) :: diff, dum(2,2) + COMPLEX(DP) :: cdum(2,2) + REAL(DP), ALLOCATABLE :: x0(:,:) + REAL(DP), ALLOCATABLE :: aux(:) + COMPLEX(DP), ALLOCATABLE :: phi(:,:) + INTEGER :: n, ngw, info + + CALL start_clock( 'ortho' ) - IF( cdesc%gamma ) THEN #if defined __SCALAPACK - iter = ortho_scalapack( ispin, c0, cp, cdesc, pmss, emass) -#else - IF( ( nproc > 1 ) .AND. ( ( cdesc%nbt( ispin ) / nproc ) >= ortho_tune ) ) THEN - iter = ortho_gamma_p( ispin, c0, cp, cdesc, pmss, emass) - ELSE - iter = ortho_gamma( ispin, c0, cp, cdesc, pmss, emass) - END IF -#endif - ELSE - iter = ortho_kp(c0, cp, pmss, emass) - END IF - IF( PRESENT( success ) ) THEN + iter = ortho_scalapack( ispin, c0, cp, cdesc, pmss, emass) + +#else + + n = cdesc%nbl( ispin ) + ngw = cdesc%ngwl + + ! ... Scale wave functions + + ALLOCATE( phi( SIZE( c0, 1 ), SIZE( c0, 2 ) ), STAT = info ) + IF( info /= 0 ) CALL errore( ' ortho ', ' allocating phi ', 3 ) + + ALLOCATE( aux( ngw ) ) + aux(:) = emass / pmss(:) ! ema0bg + + CALL calphi( c0, SIZE(c0,1), dum, 1, cdum, phi, n, aux ) + + DEALLOCATE( aux ) + + ALLOCATE( x0( n, n ) ) + + ! CALL ortho_gamma( cp, phi, x0, diff, iter, n, n, 1 ) + CALL ortho_gamma( 1, cp, SIZE(cp,1), phi, dum, dum, 2, dum, dum, & + x0, n, diff, iter, n, n, 1 ) + + CALL updatc( 1.0d0, n, x0, n, phi, SIZE(phi,1), dum, 1, dum, dum, cp ) + + DEALLOCATE( phi, x0 ) + +#endif + + IF( PRESENT( success ) ) THEN success = .TRUE. - END IF - IF ( iter > ortho_max ) THEN - IF( PRESENT( success ) ) THEN - success = .FALSE. - ELSE - call errore(' ortho ',' itermax ',iter) - END IF - END IF - RETURN - END SUBROUTINE ortho_s + END IF + ! + IF ( iter > ortho_max ) THEN + IF( PRESENT( success ) ) THEN + success = .FALSE. + ELSE + call errore(' ortho ',' itermax ',iter) + END IF + END IF + ! + CALL stop_clock( 'ortho' ) + ! + RETURN + ! + END SUBROUTINE ortho_s !=----------------------------------------------------------------------------=! @@ -121,883 +140,238 @@ !=----------------------------------------------------------------------------=! -! BEGIN manual - INTEGER FUNCTION ortho_gamma( ispin, c0, cp, cdesc, pmss, emass ) -! INPUT: -! C0 (ORTHONORMAL) -! CP (NON-ORTHONORMAL) -! OUTPUT: -! CP (ORTHONORMAL) -! -! Version for preconditioned equations of motion -! (following f.tassone, f.mauri and r.car ...) -! Replicated data parallel driver -! ---------------------------------------------- -! END manual + SUBROUTINE ortho_gamma( iopt, cp, ngwx, phi, becp, qbecp, nkbx, bephi, qbephi, & + x0, nx, diff, iter, n, nss, istart ) + ! + ! + ! - USE mp_global, ONLY: nproc, mpime - USE wave_types, ONLY: wave_descriptor - USE control_flags, ONLY: ortho_eps, ortho_max - USE time_step, ONLY: delt - USE orthogonalize_base, ONLY: rhoset, sigset, tauset + USE mp_global, ONLY: nproc, mpime + USE orthogonalize_base, ONLY: rhoset, sigset, tauset, ortho_iterate, & + ortho_alt_iterate, updatc, diagonalize_rho IMPLICIT NONE -! ... Arguments - COMPLEX(DP), INTENT(INOUT) :: c0(:,:), cp(:,:) - TYPE (wave_descriptor), INTENT(IN) :: cdesc - REAL(DP), INTENT(IN) :: pmss(:), emass - INTEGER, INTENT(IN) :: ispin + ! ... Arguments - -! ... Functions - INTEGER :: IDAMAX - -! ... Locals + INTEGER, INTENT(IN) :: iopt + INTEGER, INTENT(IN) :: ngwx, nx, nkbx + INTEGER, INTENT(IN) :: n, nss, istart + COMPLEX(DP) :: phi( ngwx, n ), cp( ngwx, n ) + REAL(DP) :: bephi( nkbx, n ), becp( nkbx, n ) + REAL(DP) :: qbephi( nkbx, n ), qbecp( nkbx, n ) + REAL(DP) :: x0( nx, nx ) + INTEGER, INTENT(OUT) :: iter + REAL(DP), INTENT(OUT) :: diff - REAL(DP), ALLOCATABLE :: s(:,:), sig(:,:), rho(:,:), tmass(:,:), temp(:,:) - REAL(DP), ALLOCATABLE :: x0(:,:), temp1(:,:) - REAL(DP), ALLOCATABLE :: x1(:,:), rhoa(:,:) - REAL(DP), ALLOCATABLE :: sigd(:), rhod(:), aux(:) - REAL(DP) :: pwrk(1) - REAL(DP) :: difgam, rhosigd - REAL(DP) :: fact, one_by_emass, den - INTEGER :: nrl,is,jl, n, ngw, nx, naux, i, j, iopt, k, info, iter, gstart - LOGICAL :: gzero - REAL(DP) :: sqrtfact, dum(2,2) + ! ... Locals - COMPLEX(DP), ALLOCATABLE :: phi(:,:) + REAL(DP), ALLOCATABLE :: s(:,:), sig(:,:), rho(:,:), tau(:,:), temp(:,:) + REAL(DP), ALLOCATABLE :: rhoa(:,:), rhod(:) + INTEGER :: i, j, info -! ... Subroutine body + ! ... Subroutine body - CALL start_clock( 'ortho_r' ) ! _r stay for replicated data - n = cdesc%nbl( ispin ) - nx = cdesc%nbl( ispin ) - ngw = cdesc%ngwl - - IF( n < 1 ) THEN - ortho_gamma = 0 - RETURN - END IF - - ALLOCATE( s(n,n), sig(n,n), rho(n,n), tmass(n,n), temp(n,n), STAT = info ) + ALLOCATE( s(nx,nx), sig(nx,nx), rho(nx,nx), tau(nx,nx), temp(nx,nx), STAT = info ) IF( info /= 0 ) CALL errore( ' ortho ', ' allocating matrixes ', 1 ) - - ALLOCATE( x0(n,n), x1(n,n), rhoa(n,n), temp1(n,n), sigd(n), rhod(n), STAT = info ) + ALLOCATE( rhoa(nx,nx), rhod(nx), STAT = info ) IF( info /= 0 ) CALL errore( ' ortho ', ' allocating matrixes ', 2 ) - ALLOCATE( phi( SIZE( c0, 1 ), SIZE( c0, 2 ) ), STAT = info ) - IF( info /= 0 ) CALL errore( ' ortho ', ' allocating phi ', 3 ) + ! ... Initialize rho, sig and tau -! ... Scale wave functions + ! + ! rho = + ! sig = 1- + ! tau = + ! + CALL rhoset( cp, ngwx, phi, bephi, nkbx, qbecp, n, nss, istart, rho, nx ) + ! + CALL sigset( cp, ngwx, becp, nkbx, qbecp, n, nss, istart, sig, nx ) + ! + CALL tauset( phi, ngwx, bephi, nkbx, qbephi, n, nss, istart, tau, nx ) + ! + DO j = 1, nss + DO i = 1, nss - ALLOCATE( aux( ngw ) ) - aux(:) = emass / pmss(:) ! ema0bg + rhoa(i,j) = 0.5d0*(rho(i,j)-rho(j,i)) + temp(i,j) = 0.5d0*(rho(i,j)+rho(j,i)) + ! + ! on some machines (IBM RS/6000 for instance) the following test allows + ! to distinguish between Numbers and Sodium Nitride (NaN, Not a Number). + ! If a matrix of Not-Numbers is passed to rs, the most likely outcome is + ! that the program goes on forever doing nothing and writing nothing. + ! + IF (temp(i,j) /= temp(i,j)) CALL errore('ortho','ortho went bananas',1) - CALL calphi( c0, SIZE(c0,1), aux, dum, 1, dum, phi, n ) - - DEALLOCATE( aux ) - -! ... Initialize rho and sig - - CALL rhoset( cp, SIZE(cp,1), phi, dum, 1, dum, n, n, 1, rho, n ) - CALL tauset( phi, SIZE(phi,1), dum, 1, dum, n, n, 1, tmass, n ) - CALL sigset( cp, SIZE(cp,1), dum, 1, dum, n, n, 1, sig, n ) - - call mytranspose(rho, nx, temp1, NX, N, N) - DO j = 1, n - DO i = 1, n - rhoa(i,j) = 0.5d0*(rho(i,j)-temp1(i,j)) - temp(i,j) = 0.5d0*(rho(i,j)+temp1(i,j)) -! temp1(i,j) = temp(i,j) ENDDO ENDDO -! ... Diagonalize Matrix symmetric part of rho -! ... temp = ( rho(i,j) + rho(j,i) ) / 2 + ! ... Diagonalize Matrix symmetric part of rho (temp) - CALL diagonalize_rho( n, temp, rhod, s ) + CALL start_clock( 'rsg' ) -! ... "s" is the matrix of eigenvectors, "rhod" is the array of eigenvalues + CALL diagonalize_rho( nss, temp, rhod, s ) -! temp = 0.0d0 -! CALL ortho_iterate( s, rhod, temp, sig, rhoa, temp1, tmass, n, n, ortho_max, ortho_eps ) -! -! #ifdef __PIPPO -! -! ... Transform "sig", "rhoa" and "tmass" in the new basis through matrix "s" -! - CALL sqr_matmul( 'N', 'N', SIG, S, TEMP ) - CALL sqr_matmul( 'T', 'N', S, TEMP, SIG ) - CALL sqr_matmul( 'N', 'N', RHOA, S, TEMP ) - CALL sqr_matmul( 'T', 'N', S, TEMP, RHOA ) - CALL sqr_matmul( 'N', 'N', TMASS, S, TEMP ) - CALL sqr_matmul( 'T', 'N', S, TEMP, TMASS ) + CALL stop_clock( 'rsg' ) -! -! ... Initialize x0 -! - DO J = 1, N - DO I = 1, N - den = (RHOD(I)+RHOD(J)) - IF( ABS( den ) <= small ) den = SIGN( small, den ) - X0(I,J) = SIG(I,J) / den + ! ... "s" is the matrix of eigenvectors, "rhod" is the array of eigenvalues + + DO j = 1, nss + DO i = 1, nss + temp(i,j) = 0.5d0*(rho(i,j)+rho(j,i)) ENDDO ENDDO -! -! ... Starting iteration -! - - ITERATIVE_LOOP: DO iter = 0, ortho_max - - ! WRITE(6,*) ' ORTHO LOOP 1 ', SUM(X0) ! DEBUG - - CALL sqr_matmul( 'N', 'N', X0, RHOA, TEMP1 ) - call mytranspose( TEMP1, NX, TEMP, NX, N, N ) - DO J=1,N - DO I=1,N - TEMP1(I,J) = TEMP1(I,J) + TEMP(I,J) - ENDDO - ENDDO -! - CALL sqr_matmul( 'T', 'N', TMASS, X0, TEMP ) - DO I = 1, N - SIGD(I) = TEMP(I,I) - TEMP(I,I) = -SIGD(I) - ENDDO - - CALL sqr_matmul( 'T', 'N', X0, TEMP, X1 ) - call mytranspose( X1, NX, TEMP, NX, N, N ) - -! ... X1 = SIG - TEMP1 - 0.5d0 * ( X1 + X1^t ) - - difgam = zero - DO j = 1, n - DO i = 1, n - den = (rhod(i)+sigd(i)+rhod(j)+sigd(j)) - IF( ABS( den ) <= small ) den = SIGN( small, den ) - x1(i,j) = sig(i,j) - temp1(i,j) - 0.5_DP * (x1(i,j)+temp(i,j)) - x1(i,j) = x1(i,j) / den - difgam = MAX( ABS(x1(i,j)-x0(i,j)), difgam ) - END DO - END DO - - x0 = x1 - - IF( difgam < ortho_eps ) EXIT ITERATIVE_LOOP - - END DO ITERATIVE_LOOP - -! -! ... Transform x0 back to the original basis - - CALL sqr_matmul( 'N', 'N', S, X0, TEMP ) - CALL sqr_matmul( 'N', 'T', S, TEMP, X0 ) - -! #endif - - - CALL DGEMM( 'N', 'N', 2*ngw, n, n, one, phi(1,1), 2*SIZE(phi,1), x0(1,1), n, & - one, cp(1,1), 2*SIZE(cp,1) ) - - - DEALLOCATE(x0, x1, rhoa, temp1, sigd, rhod) - - DEALLOCATE(s, sig, rho, tmass, temp ) - - DEALLOCATE( phi ) - - ortho_gamma = iter - - CALL stop_clock( 'ortho_r' ) - - RETURN - END FUNCTION ortho_gamma - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION ortho_gamma_p( ispin, c0, cp, cdesc, pmss, emass ) - -! INPUT: -! C0 (ORTHONORMAL) -! CP (NON-ORTHONORMAL) -! OUTPUT: -! CP (ORTHONORMAL) -! -! Version for preconditioned equations of motion -! (following f.tassone, f.mauri and r.car ...) -! Distributed data parallel driver -! ---------------------------------------------- -! END manual - - USE parallel_types, ONLY: processors_grid, descriptor, & - real_parallel_matrix, parallel_allocate, parallel_deallocate, & - BLOCK_CYCLIC_SHAPE, CYCLIC_SHAPE, REPLICATED_DATA_SHAPE - USE descriptors_module, ONLY: desc_init - USE processors_grid_module, ONLY: grid_init - USE mp_global, ONLY: nproc, mpime, group - USE mp, ONLY: mp_sum - USE wave_types, ONLY: wave_descriptor - USE control_flags, ONLY: ortho_eps, ortho_max - - IMPLICIT NONE - -! ... Arguments - COMPLEX(DP), INTENT(INOUT) :: c0(:,:), cp(:,:) - TYPE (wave_descriptor), INTENT(IN) :: cdesc - REAL(DP), INTENT(IN) :: pmss(:), emass - INTEGER, INTENT(IN) :: ispin - -! ... Functions - INTEGER IDAMAX - -! ... Locals - - REAL(DP), ALLOCATABLE :: S(:,:), TEMP(:,:) - REAL(DP), ALLOCATABLE :: x0(:,:), temp1(:,:) - REAL(DP), ALLOCATABLE :: x1(:,:), rhoa(:,:) - REAL(DP), ALLOCATABLE :: sigd(:), rhod(:), aux(:) - REAL(DP) :: DIFGAM, RHOSIGD - REAL(DP) :: fact, den - integer :: nrl, n, ngw, I, ii, J, K, ITER - - TYPE (real_parallel_matrix) :: sigt, rhot, tmasst - TYPE (processors_grid) :: grid - TYPE (descriptor), POINTER :: desc - -! ... Subroutine body - - CALL start_clock( 'ortho_d' ) ! _d stay for distributed data - - n = cdesc%nbl( ispin ) - - IF( n < 1 ) THEN - ortho_gamma_p = 0 - RETURN + IF( iopt == 0 ) THEN + CALL ortho_iterate( iter, diff, s, rhod, x0, sig, rhoa, temp, tau, nx, nss ) + ELSE + CALL ortho_alt_iterate( iter, diff, s, rhod, x0, sig, rhoa, tau, nx, nss ) END IF - - ngw = cdesc%ngwl - nrl = n/nproc - IF( mpime < MOD(n,nproc) ) THEN - nrl = nrl + 1 - end if - - ALLOCATE( desc ) - - CALL grid_init(grid, group, nproc , mpime, nproc, 1, 1, mpime, 0, 0) - CALL desc_init(desc, 1, n, n, 1, n, 0, 0, grid, CYCLIC_SHAPE, REPLICATED_DATA_SHAPE, nrl) - - ALLOCATE( s(nrl, n), temp(nrl, n), x0(nrl, n), temp1(nrl, n), x1(nrl, n), rhoa(nrl, n) ) - ALLOCATE( rhod(n), sigd(n) ) - - CALL parallel_allocate(sigt, desc) - CALL parallel_allocate(tmasst, desc) - CALL parallel_allocate(rhot, desc) - -!.....INITIALIZE RHO AND SIG - - CALL SIGRHOSET2( ngw, n, CP(:,:), C0(:,:), SIGT, RHOT, TMASST, PMSS, EMASS, cdesc%gzero) - CALL mytrasp_dati(rhot%m, SIZE(rhot%m,1), 'R', temp1, nrl, 'R', n, mpime, nproc) - - DO j = 1, N - DO i = 1, nrl - rhoa(i,j) = 0.5d0*(rhot%m(i,j)-temp1(i,j)) - temp(i,j) = 0.5d0*(rhot%m(i,j)+temp1(i,j)) - ENDDO - ENDDO - - CALL pdspev_drv( 'V', temp, nrl, rhod, s, nrl, nrl, n, nproc, mpime) - -! -! ... TRANSFORM SIG, RHOA AND TMASS IN THE NEW BASIS THROUGH MATRIX S -! - CALL mymatmul(sigt%m, nrl, 'N', 'R', s, nrl, 'N', 'R', temp, nrl, 'R', n, mpime, nproc) - CALL mymatmul(s, nrl, 'T', 'R', temp, nrl, 'N', 'R', sigt%m, nrl, 'R', n, mpime, nproc) - - CALL mymatmul(rhoa, nrl, 'N', 'R', s, nrl, 'N', 'R', temp, nrl, 'R', n, mpime, nproc) - CALL mymatmul(s, nrl, 'T', 'R', temp, nrl, 'N', 'R', rhoa, nrl, 'R', n, mpime, nproc) - - CALL mymatmul(tmasst%m, nrl, 'N', 'R', s, nrl, 'N', 'R', temp, nrl, 'R', n, mpime, nproc) - CALL mymatmul(s, nrl, 'T', 'R', temp, nrl, 'N', 'R', tmasst%m, nrl, 'R', n, mpime, nproc) - -! -! ... INITIALIZE X0 -! - DO J = 1, N - ii = mpime + 1 - DO I = 1, nrl - den = (RHOD(ii)+RHOD(j)) - IF( ABS( den ) <= small ) den = SIGN( small, den ) - X0(I,J) = SIGT%M(I,J) / den - ii = ii + nproc - ENDDO - ENDDO - - !WRITE(6,*) ' ORTHO X0 ', SUM(X0) ! DEBUG -! -! ... STARTING ITERATION -! - - ITERATIVE_LOOP: DO iter = 0, ortho_max - - CALL mymatmul(x0, nrl, 'N', 'R', rhoa, nrl, 'N', 'R', temp1, nrl, 'R', n, mpime, nproc) - CALL mytrasp_dati(temp1, nrl, 'R', temp, nrl, 'R', n, mpime, nproc) - DO J=1,N - DO I=1,nrl - TEMP1(I,J) = TEMP1(I,J) + TEMP(I,J) - ENDDO - ENDDO -! - CALL mymatmul(tmasst%m, nrl, 'T', 'R', x0, nrl, 'N', 'R', temp, nrl, 'R', n, mpime, nproc) - sigd = 0.0d0 - ii = mpime + 1 - DO I=1,nrl - SIGD(ii) = TEMP(i,ii) - TEMP(i,ii) = -SIGD(ii) - ii = ii + nproc - ENDDO - CALL mp_sum(sigd) - - CALL mymatmul(x0, nrl, 'T', 'R', temp, nrl, 'N', 'R', x1, nrl, 'R', n, mpime, nproc) - CALL mytrasp_dati(x1, nrl, 'R', temp, nrl, 'R', n, mpime, nproc) - -! ... X1 = SIG - TEMP1 - 0.5d0 * ( X1 + X1^t ) - - difgam = zero - DO j = 1, n - ii = mpime + 1 - DO i = 1, nrl - den = (rhod(ii)+sigd(ii)+rhod(j)+sigd(j)) - IF( ABS( den ) <= small ) den = SIGN( small, den ) - x1(i,j) = sigt%m(i,j) - temp1(i,j) - 0.5_DP * (x1(i,j)+temp(i,j)) - x1(i,j) = x1(i,j) / den - difgam = MAX( ABS(x1(i,j)-x0(i,j)), difgam ) - ii = ii + nproc - END DO - END DO - - CALL mp_sum(difgam) - - IF(difgam .LE. ortho_eps) EXIT ITERATIVE_LOOP - x0 = x1 - - END DO ITERATIVE_LOOP - - -! -! ... TRANSFORM X1 BACK TO THE ORIGINAL BASIS - - CALL mymatmul(s, nrl, 'N', 'R', x1, nrl, 'N', 'R', temp, nrl, 'R', n, mpime, nproc) - CALL mymatmul(s, nrl, 'N', 'R', temp, nrl, 'T', 'R', sigt%m, nrl, 'R', n, mpime, nproc) - - !WRITE(6,*) ' ORTHO CP a ', SUM(CP) ! DEBUG - -!.....RESTORE C0 -! - CALL backrhoset2( ngw, n, CP(:,:), C0(:,:), sigt, PMSS, EMASS) -! - !WRITE(6,*) ' ORTHO CP c ', SUM(CP) ! DEBUG - - DEALLOCATE( s, temp, x0, temp1, x1, rhoa, rhod, sigd ) - CALL parallel_deallocate(sigt) - CALL parallel_deallocate(tmasst) - CALL parallel_deallocate(rhot) - DEALLOCATE( desc ) - - CALL stop_clock( 'ortho_d' ) - - ortho_gamma_p = iter - - RETURN - - END FUNCTION ortho_gamma_p - - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION ortho_kp(C0,CP,PMSS,EMASS) - -! INPUT: -! C0 (ORTHONORMAL) -! CP (NON-ORTHONORMAL) -! OUTPUT: -! X1 = DT2/EMASS * LAMBDA -! CP (ORTHONORMAL) -! -! Version for preconditioned equations of motion -! (following f.tassone, f.mauri and r.car ...) -! Replicated data parallel driver for complex wave functions -!----------------------------------------------------------------------! -! END manual - -#if defined __SHMEM - - USE shmem_include - -#endif - - USE control_flags, ONLY: ortho_eps, ortho_max - USE orthogonalize_base, ONLY: rhoset, sigset - - IMPLICIT NONE - - -! ... Arguments - COMPLEX(DP) :: C0(:,:), CP(:,:) - REAL(DP) :: PMSS(:), EMASS - - -! ... Locals - -#if defined __SHMEM - - pointer (p_source,S) - COMPLEX(DP) S( SIZE(c0,2), SIZE(c0,2)) - pointer (p_sig,sig) - COMPLEX(DP) SIG( SIZE(c0,2), SIZE(c0,2)) - pointer (p_rho,rho) - COMPLEX(DP) RHO( SIZE(c0,2), SIZE(c0,2)) - pointer (p_tmass,tmass) - COMPLEX(DP) TMASS( SIZE(c0,2), SIZE(c0,2)) - pointer (p_target,TEMP) - COMPLEX(DP) TEMP( SIZE(c0,2), SIZE(c0,2)) - integer err - pointer (p_pWrk,pWrk) - COMPLEX(DP) pWrk(1) - -#else - - COMPLEX(DP) SIG( SIZE(c0,2), SIZE(c0,2)) - COMPLEX(DP) RHO( SIZE(c0,2), SIZE(c0,2)) - COMPLEX(DP) S( SIZE(c0,2), SIZE(c0,2)) - COMPLEX(DP) TEMP( SIZE(c0,2), SIZE(c0,2)) - COMPLEX(DP) TMASS( SIZE(c0,2), SIZE(c0,2)) - COMPLEX(DP) pWrk(1) - -#endif - - COMPLEX(DP) X0( SIZE(c0,2), SIZE(c0,2)) - COMPLEX(DP) TEMP1( SIZE(c0,2),MAX( SIZE(c0,2),4)) - COMPLEX(DP) BLAM( SIZE(c0,2), SIZE(c0,2)) - COMPLEX(DP) CLAM( SIZE(c0,2), SIZE(c0,2)) - COMPLEX(DP) X1( SIZE(c0,2), SIZE(c0,2)) - COMPLEX(DP) RHOA( SIZE(c0,2), SIZE(c0,2)) - REAL(DP) SIGD( SIZE(c0,2)) - REAL(DP) RHOD( SIZE(c0,2)) - COMPLEX(DP), ALLOCATABLE :: AUX(:) - COMPLEX(DP), ALLOCATABLE :: DIAG(:,:) - COMPLEX(DP), ALLOCATABLE :: vv(:,:) - COMPLEX(DP), ALLOCATABLE :: sd(:) - - INTEGER :: IDAMAX - INTEGER :: N, NGW, NX, I, J, K, ITER - REAL(DP) DIFGAM,RHOSIGD - -! ... Subroutine body - - CALL start_clock( 'ortho_kp' ) - - N = SIZE( c0, 2 ) - NX = SIZE( c0, 2 ) - NGW = SIZE( c0, 1 ) - -#if defined __SHMEM - CALL SHPALLOC(p_pWrk, MAX(2*nx*nx,SHMEM_REDUCE_MIN_WRKDATA_SIZE), err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' ALLOC OF PWRK FAILED ' ,0) - END IF - CALL SHPALLOC(p_sig, 2*nx*nx , err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' ALLOC OF TMASS FAILED ' ,0) - END IF - CALL SHPALLOC(p_tmass, 2*nx*nx , err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' ALLOC OF TMASS FAILED ' ,0) - END IF - CALL SHPALLOC(p_rho, 2*nx*nx , err, 0) - WRITE(*,*)'SHPALLOC RHO done.', 2*nx*nx - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' ALLOC OF RHO FAILED ' ,0) - END IF - CALL SHPALLOC(p_source, 2*nx*nx , err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' ALLOC OF SOURCE FAILED ' ,0) - END IF - CALL SHPALLOC(p_target, 2*nx*nx , err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' ALLOC OF TARGET FAILED ' ,0) - END IF -#endif - - -!.....INITIALIZE RHO AND SIG - - ALLOCATE(AUX(NGW)) - AUX(:) = CMPLX( EMASS / PMSS(:), 0.0_DP) - DO I=1,N - C0(:,I) = C0(:,I) * AUX(:) - END DO - DEALLOCATE(AUX) - - CALL rhoset( ngw, nx, C0, CP, RHO, TMASS ) - CALL sigset( ngw, nx, CP, SIG ) - - DO J=1,N - DO I=1,N -! ... Antisymmetric rho - RHOA(I,J) = 0.5D0*(RHO(I,J) - CONJG(RHO(J,I))) -! ... Symmetric rho - temp(i,j) = rhoa(i,j) + CONJG(rho(j,i)) - ENDDO - ENDDO - -!.....DIAGONALIZATION OF RHOS - - CALL diagonalize_rho(temp,rhod,s) - -! -! ... TRANSFORM SIG, RHOA AND TMASS IN THE NEW BASIS THROUGH MATRIX S -! - CALL sqr_matmul('N','N',SIG,S,TEMP) - CALL sqr_matmul('C','N',S,TEMP,SIG) - CALL sqr_matmul('N','N',RHOA,S,TEMP) - CALL sqr_matmul('C','N',S,TEMP,RHOA) - CALL sqr_matmul('N','N',TMASS,S,TEMP) - CALL sqr_matmul('C','N',S,TEMP,TMASS) - -! -! ... INITIALIZE X0 -! - DO J=1,N - DO I=1,N - X0(I,J) = SIG(I,J)/(RHOD(I)+RHOD(J)) - ENDDO - ENDDO - -!---------------------------------------------------------------------- -! - ITERATIVE_LOOP: DO iter = 0, ortho_max - - CALL sqr_matmul('N','N',X0,RHOA,TEMP) - CALL sqr_matmul('C','N',RHOA,X0,TEMP1) -! - DO J=1,N - DO I=1,N - BLAM(I,J) = TEMP(I,J) + TEMP1(I,J) - ENDDO - ENDDO -! - CALL sqr_matmul('N','N',TMASS,X0,TEMP) - - !DO I=1,N - ! SIGD(I) = REAL(TEMP(I,I)) - ! TEMP(I,I) = -REAL(TEMP(I,I)) - !ENDDO - - CALL sqr_matmul('N','N',X0,TEMP,CLAM) -! -! X1 = SIG - BLAM - CLAM -! X1 = 1 - A - L Ba - Ba' L - L C L -! - difgam = 0.0d0 - DO J=1,N - DO I=1,N - X1(I,J) = SIG(I,J) - BLAM(I,J) - CLAM(I,J) - X1(I,J) = X1(I,J) / ( RHOD(I)+RHOD(J) ) ! +SIGD(I)+SIGD(J)) - difgam=max(abs( DBLE(X1(I,J))- DBLE(X0(I,J))),difgam) - difgam=max(abs(AIMAG(X1(I,J))-AIMAG(X0(I,J))),difgam) - ENDDO - ENDDO - - IF( difgam .LE. ortho_eps ) EXIT ITERATIVE_LOOP - x0 = x1 - - END DO ITERATIVE_LOOP - -! -! ... TRANSFORM X1 BACK TO THE ORIGINAL BASIS -! - CALL sqr_matmul('N','N',S,X1,TEMP) - CALL sqr_matmul('N','C',S,TEMP,X1) -! - CALL ZGEMM('N','N',NGW,N,N,cone,C0,SIZE(c0,1),X1,N,cone,CP,SIZE(cp,1)) -! -!.....RESTORE C0 -! - ALLOCATE(AUX(NGW)) - AUX(:) = CMPLX( PMSS(:) / EMASS ,0.0d0) - DO I=1,N - C0(:,I) = C0(:,I) * AUX(:) - END DO - DEALLOCATE(AUX) - -#if defined __SHMEM - call shmem_barrier_all - CALL SHPDEALLC(p_pwrk, err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' DEALLOC OF PWRK FAILED ' ,0) - END IF - CALL SHPDEALLC(p_sig, err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' DEALLOC OF TMASS FAILED ' ,0) - END IF - CALL SHPDEALLC(p_tmass, err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' DEALLOC OF TMASS FAILED ' ,0) - END IF - CALL SHPDEALLC(p_rho, err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' DEALLOC OF RHO FAILED ' ,0) - END IF - CALL SHPDEALLC(p_source, err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' DEALLOC OF SOURCE FAILED ' ,0) - END IF - CALL SHPDEALLC(p_target, err, 0) - IF(ERR.NE.0) THEN - CALL errore(' ORTHO ',' DEALLOC OF TARGET FAILED ' ,0) - END IF -#endif - - CALL stop_clock( 'ortho_kp' ) - - ortho_kp = iter - RETURN - END FUNCTION ortho_kp - - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION ortho_scalapack( ispin, C0, CP, cdesc, PMSS, EMASS ) - -! INPUT: -! C0 (ORTHONORMAL) -! CP (NON-ORTHONORMAL) -! OUTPUT: -! X1 = DT2/EMASS * LAMBDA -! CP (ORTHONORMAL) -! -! Version for preconditioned equations of motion -! (following f.tassone, f.mauri and r.car ...) -! Scalapack driver -!----------------------------------------------------------------------! -! END manual - - USE wave_types, ONLY: wave_descriptor - USE parallel_types, ONLY: processors_grid, descriptor, & - real_parallel_matrix, parallel_allocate, parallel_deallocate - USE descriptors_module, ONLY: desc_init_blacs, local_dimension - USE processors_grid_module, ONLY: get_blacs_grid, free_blacs_grid, & - get_grid_coor, get_grid_dims - USE blacs, ONLY: start_blacs, stop_blacs - USE scalapack - USE mp, ONLY: mp_sum, mp_max - USE control_flags, ONLY: ortho_eps, ortho_max - - - IMPLICIT NONE - -! ... Arguments - COMPLEX(DP), INTENT(INOUT) :: c0(:,:), cp(:,:) - TYPE (wave_descriptor), INTENT(IN) :: cdesc - REAL (DP) :: PMSS(:), EMASS - INTEGER, INTENT(IN) :: ispin - - -! ... Locals - - TYPE (processors_grid) :: grid - TYPE (descriptor), POINTER :: desc - - INTEGER IDAMAX - INTEGER I,J,K,II,JJ,IP,JP - INTEGER ITER - REAL (DP) :: fact,ONE_BY_EMASS - -! .. Local Scalars .. - INTEGER :: MYCOL, MYROW, NB, NPCOL, NPROW, NRL, NCL, RSRC, CSRC, N - INTEGER :: npz, mez, ngw - INTEGER :: INDXG2L, INDXL2G, INDXG2P - LOGICAL :: gzero -! .. -! .. Local Arrays .. - - TYPE (real_parallel_matrix) :: st, sigt, rhoat, tmasst, tempt, & - temp1t, x0t, x1t - - REAL (DP) :: SIGD( SIZE( c0, 2 ) ) - REAL (DP) :: RHOD( SIZE( c0, 2 ) ) - REAL (DP) :: DIFGAM - - - -! ... Subroutine body - - CALL start_clock( 'ortho_scal' ) - - n = cdesc%nbl( ispin ) - ngw = cdesc%ngwl - - IF( n < 1 ) THEN - ortho_scalapack = 0 - RETURN - END IF - - -! ... Initialize the BLACS -! CALL start_blacs() - - CALL get_blacs_grid(grid) - CALL get_grid_dims(grid, nprow, npcol, npz) - CALL get_grid_coor(grid, myrow, mycol, mez) - CALL blockset( NB, 0, N, nprow, npcol) - - ALLOCATE( desc ) - CALL desc_init_blacs(desc, 1, N, N, NB, NB, 0, 0, grid) - - RSRC = desc%ipexs - CSRC = desc%ipeys -! -!.....INITIALIZE RHO AND SIG -! - CALL parallel_allocate(sigt,desc) - CALL parallel_allocate(tmasst,desc) - CALL parallel_allocate(st,desc) - CALL parallel_allocate(rhoat,desc) - CALL parallel_allocate(tempt,desc) - CALL parallel_allocate(temp1t,desc) - CALL parallel_allocate(x0t,desc) - CALL parallel_allocate(x1t,desc) - - - CALL SIGRHOSET( ngw, n, CP(:,:), C0(:,:), SIGT, RHOAT, TMASST, PMSS, EMASS, cdesc%gzero) - -!.....DIAGONALIZATION OF RHOS - - NRL = local_dimension( desc, 'R' ) - NCL = local_dimension( desc, 'C' ) - -! TEMP = (RHOA(i,j)+RHOA(j,i))/2 SYMMETRIC PART -! RHOA = (RHOA(i,j)-RHOA(j,i))/2 ANTISYMMETRIC PART - - CALL ptranspose(rhoat, tempt) - DO J = 1, NCL - DO I = 1, NRL - TEMPT%m(i,j) = 0.5_DP * ( rhoat%m(i,j) + tempt%m(i,j) ) - rhoat%m(i,j) = rhoat%m(i,j) - tempt%m(i,j) + ! + DO i=1,nss + DO j=1,nss + IF (x0(i,j) /= x0(i,j)) CALL errore('ortho','ortho went bananas',2) END DO END DO + DEALLOCATE( rhoa, rhod, s, sig, rho, tau, temp ) - CALL pdiagonalize('U',tempt,rhod,st) - - ! ... TRANSFORM SIG, RHOA AND TMASS IN THE NEW BASIS THROUGH MATRIX S - - CALL pmatmul(sigt,st,tempt,'n','n') - CALL pmatmul(st,tempt,sigt,'t','n') - CALL pmatmul(rhoat,st,tempt,'n','n') - CALL pmatmul(st,tempt,rhoat,'t','n') - CALL pmatmul(tmasst,st,tempt,'n','n') - CALL pmatmul(st,tempt,tmasst,'t','n') - - ! ... INITIALIZE X0 - - DO J=1,NCL - DO I=1,NRL - II = INDXL2G( I, NB, MYROW, 0, NPROW ) - JJ = INDXL2G( J, NB, MYCOL, 0, NPCOL ) - X0T%m(I,J) = SIGT%m(I,J) / (RHOD(II)+RHOD(JJ)) - ENDDO - ENDDO - - ! - - ITERATIVE_LOOP: DO iter = 0, ortho_max - - CALL pmatmul(x0t,rhoat,tempt,'n','n') - - ! ... TEMP1(i,j) = TEMP(i,j) + TEMP(j,i) - - CALL ptranspose(tempt,temp1t) - DO J=1,NCL - DO I=1,NRL - TEMP1T%m(I,J) = TEMP1T%m(I,J) + TEMPT%m(I,J) - ENDDO - ENDDO -! - CALL pmatmul(tmasst,x0t,tempt,'t','n') - - DO I=1,N - SIGD(I) = 0.0_DP - II = INDXG2L( I, NB, MYROW, 0, NPROW ) - JJ = INDXG2L( I, NB, MYCOL, 0, NPCOL ) - IP = INDXG2P( I, NB, MYROW, 0, NPROW ) - JP = INDXG2P( I, NB, MYCOL, 0, NPCOL ) - IF( ( IP .eq. MYROW ) .and. ( JP .eq. MYCOL ) ) THEN - SIGD(I) = TEMPT%m(II,JJ) - TEMPT%m(II,JJ) = -TEMPT%m(II,JJ) - END IF - ENDDO - CALL mp_sum( SIGD ) - - CALL pmatmul(x0t,tempt,x1t,'t','n') - call ptranspose(x1t,tempt) - - ! ... X1 = SIG - TEMP1 - 0.5d0 * ( X1 + TEMP) - - difgam = 0.0d0 - DO J=1,NCL - DO I=1,NRL - II = INDXL2G( I, NB, MYROW, 0, NPROW ) - JJ = INDXL2G( J, NB, MYCOL, 0, NPCOL ) - X1T%m(I,J) = 0.5d0 * (X1T%m(I,J) + TEMPT%m(I,J)) - X1T%m(I,J) = SIGT%m(I,J) - TEMP1T%m(I,J) - X1T%m(I,J) - X1T%m(I,J) = X1T%m(I,J) / (RHOD(II)+SIGD(II)+RHOD(JJ)+SIGD(JJ)) - difgam = max(abs(X1T%m(I,J)-X0T%m(I,J)),difgam) - ENDDO - ENDDO - call mp_max( difgam ) - - IF( difgam .LE. ortho_eps ) EXIT ITERATIVE_LOOP - x0t%m = x1t%m - - END DO ITERATIVE_LOOP - - - ! ... TRANSFORM X1 BACK TO THE ORIGINAL BASIS - - CALL pmatmul(st,x1t,tempt,'n','n') - CALL pmatmul(st,tempt,x1t,'n','t') - - CALL backrhoset( ngw, n, CP(:,:), C0(:,:), X1T, PMSS, EMASS ) - - CALL parallel_deallocate(st) - CALL parallel_deallocate(sigt) - CALL parallel_deallocate(rhoat) - CALL parallel_deallocate(tmasst) - CALL parallel_deallocate(tempt) - CALL parallel_deallocate(temp1t) - CALL parallel_deallocate(x0t) - CALL parallel_deallocate(x1t) - - DEALLOCATE( desc ) - - CALL free_blacs_grid(grid) - - CALL stop_clock( 'ortho_scal' ) - - ortho_scalapack = iter RETURN - END FUNCTION ortho_scalapack -! + END SUBROUTINE ortho_gamma + + !=----------------------------------------------------------------------------=! - END MODULE orthogonalize + + + + SUBROUTINE ortho_cp( eigr, cp, phi, x0, diff, iter, ccc, bephi, becp ) + ! + ! input = cp (non-orthonormal), beta + ! input = phi |phi>=s'|c0> + ! output= cp (orthonormal with s( r(t+dt) ) ) + ! output= bephi, becp + ! the method used is similar to the version in les houches 1988 + ! 'simple molecular systems at..' p. 462-463 (18-22) + ! xcx + b x + b^t x^t + a = 1 + ! where c = b = a = + ! where s=s(r(t+dt)) and s'=s(r(t)) + ! for vanderbilt pseudo pot - kl & ap + ! + USE kinds, ONLY: DP + USE ions_base, ONLY: na, nat + USE cvan, ONLY: ish, nvb + USE uspp, ONLY: nkb, qq + USE uspp_param, ONLY: nh + USE electrons_base, ONLY: nbsp, nbspx, nudx, nspin, nupdwn, iupdwn, f + USE gvecw, ONLY: ngw + USE control_flags, ONLY: iprint, iprsta, ortho_max + USE io_global, ONLY: stdout, ionode + ! + IMPLICIT NONE +! + COMPLEX(DP) :: cp(ngw,nbsp), phi(ngw,nbsp), eigr(ngw,nat) + REAL(DP) :: x0( nbspx, nbspx ), diff, ccc + INTEGER :: iter + REAL(DP) :: bephi(nkb,nbsp), becp(nkb,nbsp) +! + REAL(DP), ALLOCATABLE :: xloc(:,:) + REAL(DP), ALLOCATABLE:: qbephi(:,:), qbecp(:,:) + + INTEGER :: ngwx, nkbx + INTEGER :: istart, nss, ifail, i, j, iss, iv, jv, ia, is, inl, jnl + + ngwx = ngw + nkbx = nkb + ! + ! calculation of becp and bephi + ! + CALL start_clock( 'ortho' ) + + CALL nlsm1( nbsp, 1, nvb, eigr, cp, becp ) + CALL nlsm1( nbsp, 1, nvb, eigr, phi, bephi ) + ! + ! calculation of qbephi and qbecp + ! + ALLOCATE( qbephi( nkbx, nbsp ) ) + ALLOCATE( qbecp ( nkbx, nbsp ) ) + ! + qbephi = 0.d0 + qbecp = 0.d0 + ! + DO is=1,nvb + DO iv=1,nh(is) + DO jv=1,nh(is) + IF(ABS(qq(iv,jv,is)).GT.1.e-5) THEN + DO ia=1,na(is) + inl=ish(is)+(iv-1)*na(is)+ia + jnl=ish(is)+(jv-1)*na(is)+ia + DO i=1,nbsp + qbephi(inl,i)= qbephi(inl,i) & + & +qq(iv,jv,is)*bephi(jnl,i) + qbecp (inl,i)=qbecp (inl,i) & + & +qq(iv,jv,is)*becp (jnl,i) + END DO + END DO + ENDIF + END DO + END DO + END DO + ! + ALLOCATE( xloc( nudx, nudx ) ) + ! + DO iss = 1, nspin + + nss = nupdwn(iss) + istart = iupdwn(iss) + + DO j=1,nss + DO i=1,nss + xloc(i,j) = x0( istart-1+i, istart-1+j ) * ccc + END DO + END DO + + CALL ortho_gamma( 0, cp, ngwx, phi, becp, qbecp, nkbx, bephi, qbephi, & + xloc, nudx, diff, iter, nbsp, nss, istart ) + + IF( iter > ortho_max ) THEN + WRITE( stdout, * ) ' diff= ',diff,' iter= ',iter + CALL errore('ortho','max number of iterations exceeded',iter) + END IF + + IF( iprsta > 4 ) THEN + WRITE( stdout,*) + WRITE( stdout,'(26x,a)') ' lambda ' + DO i=1,nss + WRITE( stdout,'(7f11.6)') (xloc(i,j)/f(i+istart-1),j=1,nss) + END DO + ENDIF + IF( iprsta > 2 ) THEN + WRITE( stdout,*) ' diff= ',diff,' iter= ',iter + ENDIF + ! + ! lagrange multipliers + ! + DO i=1,nss + DO j=1,nss + x0( istart-1+i, istart-1+j ) = xloc(i,j) / ccc + END DO + END DO +! + END DO +! + DEALLOCATE( xloc ) + DEALLOCATE(qbecp ) + DEALLOCATE(qbephi) +! + CALL stop_clock( 'ortho' ) + RETURN + END SUBROUTINE ortho_cp + + +!=----------------------------------------------------------------------------=! + END MODULE orthogonalize !=----------------------------------------------------------------------------=! diff --git a/CPV/ortho_base.f90 b/CPV/ortho_base.f90 index fd2792724..14b0318b2 100644 --- a/CPV/ortho_base.f90 +++ b/CPV/ortho_base.f90 @@ -7,11 +7,14 @@ ! #include "f_defs.h" - MODULE orthogonalize_base + +MODULE orthogonalize_base + USE kinds - USE parallel_toolkit, ONLY: matmulp, cmatmulp, pdspev_drv, dspev_drv, & - pzhpev_drv, zhpev_drv + USE parallel_toolkit, ONLY: pdspev_drv, dspev_drv, & + pzhpev_drv, zhpev_drv, & + rep_matmul_drv IMPLICIT NONE @@ -27,131 +30,195 @@ PARAMETER ( mcone = (-1.0d0, 0.0d0) ) REAL(DP) :: small = 1.0d-14 -#if defined __AIX - INTEGER, PARAMETER :: nrlx_tune = 128 -#else - INTEGER, PARAMETER :: nrlx_tune = 4 -#endif + INTERFACE sqr_matmul + MODULE PROCEDURE sqr_dmatmul + END INTERFACE - INTERFACE sqr_matmul - MODULE PROCEDURE sqr_dmatmul, sqr_cmatmul - END INTERFACE - - INTERFACE sigset - MODULE PROCEDURE rsigset, csigset, sigset_cp - END INTERFACE - INTERFACE rhoset - MODULE PROCEDURE rrhoset, crhoset, rhoset_cp - END INTERFACE - - INTERFACE diagonalize_rho + INTERFACE diagonalize_rho MODULE PROCEDURE diagonalize_rrho, diagonalize_crho - END INTERFACE + END INTERFACE - PUBLIC :: sqr_matmul, sigset, rhoset, tauset, diagonalize_rho - PUBLIC :: backrhoset2, sigrhoset2, backrhoset, sigrhoset - PUBLIC :: ortho_iterate + PUBLIC :: sigset, rhoset, tauset, diagonalize_rho + PUBLIC :: ortho_iterate + PUBLIC :: ortho_alt_iterate + PUBLIC :: updatc, calphi - CONTAINS +CONTAINS - SUBROUTINE sqr_dmatmul(transa,transb,a,b,c) - ! ... Multiply square matrices A, B and return the result in C - USE mp_global, ONLY: nproc - REAL(DP) :: c(:,:), a(:,:), b(:,:) - CHARACTER*1 :: transa, transb - INTEGER :: n - n = SIZE(c,1) - IF ( ( nproc > 1 ) .AND. ( n >= nproc ) ) THEN - CALL matmulp( transa, transb, a, b, c, n ) - ELSE - CALL DGEMM( transa, transb, n, n, n, one, a(1,1), n, b(1,1), n, zero, c(1,1), n) + SUBROUTINE sqr_dmatmul( transa, transb, n, a, b, c ) + + ! ... Multiply square matrices A, B and return the result in C + + USE control_flags, ONLY: iprsta + USE mp_global, ONLY: nproc, mpime, root, group + USE io_global, ONLY: ionode, stdout + USE mp, ONLY: mp_bcast + + REAL(DP) :: c(:,:), a(:,:), b(:,:) + CHARACTER(LEN=1), INTENT(IN) :: transa, transb + INTEGER, INTENT(IN) :: n + + LOGICAL :: lpdrv + INTEGER, SAVE :: calls_cnt = 0 + REAL(DP) :: t1 + REAL(DP), SAVE :: tser, tpar + REAL(DP), EXTERNAL :: cclock + ! + calls_cnt = calls_cnt + 1 + + IF( nproc == 1 ) THEN + lpdrv = .FALSE. ! with one proc do not use parallel diag + ELSE IF ( calls_cnt == 1 ) THEN + lpdrv = .TRUE. ! use parallel diag the first call to take the time + ELSE IF ( calls_cnt == 2 ) THEN + lpdrv = .FALSE. ! use seria diag the second call to take the time + ELSE IF ( tpar < tser ) THEN + lpdrv = .TRUE. ! use para diag if it is faster + IF( calls_cnt == 3 .AND. ionode .AND. iprsta > 1 ) WRITE( stdout, 10 ) tpar, tser + ELSE + lpdrv = .FALSE. ! use scalar otherwise + IF( calls_cnt == 3 .AND. ionode .AND. iprsta > 1 ) WRITE( stdout, 20 ) tpar, tser + END IF + +10 FORMAT(3X,'ortho matmul, time for parallel and serial driver = ', 2D9.2, /, & + 3X,'using parallel driver' ) +20 FORMAT(3X,'ortho matmul, time for parallel and serial driver = ', 2D9.2, /, & + 3X,'using serial driver' ) + + IF ( lpdrv ) THEN + + IF( calls_cnt < 3 ) t1 = cclock() + + CALL rep_matmul_drv( transa, transb, n, n, n, one, A, SIZE(a,1), B, SIZE(b,1), zero, C, SIZE(c,1), group ) + + IF( calls_cnt < 3 ) THEN + tpar = cclock() - t1 + CALL mp_bcast( tpar, root, group ) END IF - RETURN - END SUBROUTINE sqr_dmatmul + + ELSE + + IF( calls_cnt < 3 ) t1 = cclock() + + CALL DGEMM( transa, transb, n, n, n, one, a, SIZE(a,1), b, SIZE(b,1), zero, c, SIZE(c,1) ) + + IF( calls_cnt < 3 ) THEN + tser = cclock() - t1 + CALL mp_bcast( tser, root, group ) + END IF + + END IF + ! + RETURN + END SUBROUTINE sqr_dmatmul ! ---------------------------------------------- - SUBROUTINE sqr_cmatmul(transa,transb,a,b,c) - ! ... Multiply square matrices A, B and return the result in C - USE mp_global, ONLY: nproc - COMPLEX(DP) :: c(:,:), a(:,:), b(:,:) - CHARACTER*1 transa, transb - INTEGER :: n - n = SIZE(c,1) - IF ((nproc > 1 ).AND. (n >= nproc)) THEN - CALL cmatmulp(transa,transb,A,B,C,n) - ELSE - CALL ZGEMM(transa,transb,n,n,n,cone,a(1,1),n,b(1,1),n,czero,c(1,1),n) - END IF - RETURN - END SUBROUTINE sqr_cmatmul - - -! ---------------------------------------------- - - - SUBROUTINE diagonalize_rrho( n, rhos, rhod, s, use_pdrv ) + SUBROUTINE diagonalize_rrho( n, rhos, rhod, s ) ! Diagonalization of rhos - USE mp_global, ONLY: nproc, mpime - USE mp, ONLY: mp_sum - ! - REAL(DP), INTENT(IN) :: rhos(:,:) ! input symmetric matrix - REAL(DP) :: rhod(:) ! output eigenvalues - REAL(DP) :: s(:,:) ! output eigenvectors - INTEGER, INTENT(IN) :: n ! matrix dimension - LOGICAL, OPTIONAL, & - INTENT(IN) :: use_pdrv ! if true use parallel driver + USE control_flags, ONLY: iprsta + USE mp_global, ONLY: nproc, mpime, group, root + USE io_global, ONLY: ionode, stdout + USE mp, ONLY: mp_sum, mp_bcast + ! + REAL(DP), INTENT(IN) :: rhos(:,:) ! input symmetric matrix + REAL(DP) :: rhod(:) ! output eigenvalues + REAL(DP) :: s(:,:) ! output eigenvectors + INTEGER, INTENT(IN) :: n ! matrix dimension - REAL(DP), ALLOCATABLE :: aux(:) - REAL(DP), ALLOCATABLE :: diag(:,:) - REAL(DP), ALLOCATABLE :: vv(:,:) - ! - INTEGER :: nrl - LOGICAL :: lpdrv + REAL(DP), ALLOCATABLE :: aux(:) + REAL(DP), ALLOCATABLE :: diag(:,:) + REAL(DP), ALLOCATABLE :: vv(:,:) + ! + INTEGER :: nrl + LOGICAL :: lpdrv + INTEGER, SAVE :: calls_cnt = 0 + REAL(DP) :: t1 + REAL(DP), SAVE :: tser, tpar + REAL(DP), EXTERNAL :: cclock - lpdrv = .FALSE. - - IF( PRESENT( use_pdrv ) ) lpdrv = use_pdrv + calls_cnt = calls_cnt + 1 + + IF( nproc == 1 ) THEN + lpdrv = .FALSE. ! with one proc do not use parallel diag + ELSE IF ( calls_cnt == 1 ) THEN + lpdrv = .TRUE. ! use parallel diag the first call to take the time + ELSE IF ( calls_cnt == 2 ) THEN + lpdrv = .FALSE. ! use seria diag the second call to take the time + ELSE IF ( tpar < tser ) THEN + lpdrv = .TRUE. ! use para diag if it is faster + IF( calls_cnt == 3 .AND. ionode .AND. iprsta > 1 ) WRITE( stdout, 10 ) tpar, tser + ELSE + lpdrv = .FALSE. ! use scalar otherwise + IF( calls_cnt == 3 .AND. ionode .AND. iprsta > 1 ) WRITE( stdout, 20 ) tpar, tser + END IF + +10 FORMAT(3X,'ortho diag, time for parallel and serial driver = ', 2D9.2, /, & + 3X,'using parallel driver' ) +20 FORMAT(3X,'ortho diag, time for parallel and serial driver = ', 2D9.2, /, & + 3X,'using serial driver' ) - IF( SIZE( rhos, 1 ) /= SIZE( s, 1 ) .OR. SIZE( rhos, 2 ) /= SIZE( s, 2 ) ) & - CALL errore(" diagonalize_rho ", " input matrixes size do not match ", 1 ) + IF( SIZE( rhos, 1 ) /= SIZE( s, 1 ) .OR. SIZE( rhos, 2 ) /= SIZE( s, 2 ) ) & + CALL errore(" diagonalize_rho ", " input matrixes size do not match ", 1 ) - IF ( ( nproc > 1 ) .AND. lpdrv ) THEN - ! distribute matrix rows to processors - ! - nrl = n / nproc - IF( mpime < MOD( n, nproc ) ) THEN - nrl = nrl + 1 - end if + IF ( lpdrv ) THEN - ALLOCATE( diag( nrl, n ), vv( nrl, n ) ) + IF( calls_cnt < 3 ) t1 = cclock() - CALL prpack( n, diag, rhos) - CALL pdspev_drv( 'V', diag, nrl, rhod, vv, nrl, nrl, n, nproc, mpime) - CALL prunpack( n, s, vv) + ! distribute matrix rows to processors + ! - DEALLOCATE( diag, vv ) + nrl = n / nproc + IF( mpime < MOD( n, nproc ) ) THEN + nrl = nrl + 1 + end if - CALL mp_sum( s ) + ALLOCATE( diag( nrl, n ), vv( nrl, n ) ) - ELSE + CALL prpack( n, diag, rhos) + CALL pdspev_drv( 'V', diag, nrl, rhod, vv, nrl, nrl, n, nproc, mpime) + CALL prunpack( n, s, vv) - ALLOCATE( aux( n * ( n + 1 ) / 2 ) ) + DEALLOCATE( diag, vv ) - CALL rpack( n, aux, rhos ) ! pack lower triangle of rho into aux + CALL mp_sum( s ) - CALL dspev_drv( 'V', 'L', n, aux, rhod, s, SIZE(s,1) ) + IF( calls_cnt < 3 ) THEN - DEALLOCATE( aux ) + tpar = cclock() - t1 - END IF + CALL mp_bcast( tpar, root, group ) + + END IF + + ELSE + + IF( calls_cnt < 3 ) t1 = cclock() + + ALLOCATE( aux( n * ( n + 1 ) / 2 ) ) + + CALL rpack( n, aux, rhos ) ! pack lower triangle of rho into aux + + CALL dspev_drv( 'V', 'L', n, aux, rhod, s, SIZE(s,1) ) + + DEALLOCATE( aux ) + + IF( calls_cnt < 3 ) THEN + + tser = cclock() - t1 + + CALL mp_bcast( tser, root, group ) + + END IF + + END IF RETURN END SUBROUTINE diagonalize_rrho @@ -160,7 +227,7 @@ ! ---------------------------------------------- - SUBROUTINE diagonalize_crho(a,d,ev) + SUBROUTINE diagonalize_crho( n, a, d, ev, use_pdrv ) ! this routine calls the appropriate Lapack routine for diagonalizing a ! complex Hermitian matrix @@ -169,10 +236,13 @@ USE mp, ONLY: mp_sum IMPLICIT NONE - REAL(DP) :: d(:) - COMPLEX(DP) :: a(:,:), ev(:,:) + REAL(DP) :: d(:) + COMPLEX(DP) :: a(:,:), ev(:,:) + LOGICAL, OPTIONAL, & + INTENT(IN) :: use_pdrv ! if true use parallel driver + INTEGER, INTENT(IN) :: n - INTEGER :: n, nrl + INTEGER :: nrl COMPLEX(DP), ALLOCATABLE :: aloc(:) COMPLEX(DP), ALLOCATABLE :: ap(:,:) @@ -180,22 +250,13 @@ ! ... end of declarations ! ---------------------------------------------- + LOGICAL :: lpdrv - n = SIZE(a, 1) + lpdrv = .FALSE. + + IF( PRESENT( use_pdrv ) ) lpdrv = use_pdrv - IF((nproc.EQ.2) .OR. (n.LT.nproc) .OR. (n.LT.256)) THEN - - ALLOCATE(aloc(n*(n+1)/2)) - -! ... copy the lower-diagonal part of the matrix according to the -! ... Lapack packed storage scheme for Hermitian matrices - CALL zpack(aloc, a) -! ... call the Lapack routine - CALL zhpev_drv('V', 'L', n, aloc, d, ev, n) - - DEALLOCATE(aloc) - - ELSE + IF ( ( nproc > 1 ) .AND. use_pdrv ) THEN nrl = n/nproc IF(mpime.LT.MOD(n,nproc)) THEN @@ -211,57 +272,27 @@ DEALLOCATE(ap, vp) + ELSE + + ALLOCATE(aloc(n*(n+1)/2)) + + ! ... copy the lower-diagonal part of the matrix according to the + ! ... Lapack packed storage scheme for Hermitian matrices + + CALL zpack(aloc, a) + + ! ... call the Lapack routine + + CALL zhpev_drv('V', 'L', n, aloc, d, ev, n) + + DEALLOCATE(aloc) + END IF RETURN END SUBROUTINE diagonalize_crho -! ---------------------------------------------- -! BEGIN manual - - SUBROUTINE rsigset ( gstart, ngw, nb, cp, sig ) - -! SIG = REAL PART OF ONE-2.0*ADJ(CP)*CP+CP(*,1)*ADJ(CP(*,1)) -! WHERE CP(*,1) IS REAL, AND THEREFORE TRANS() IS USED IN PLACE OF ADJ() -! ---------------------------------------------- -! END manual - - USE mp_global, ONLY: nproc, group - USE mp, ONLY: mp_sum - - IMPLICIT NONE - - COMPLEX(DP) :: CP(:,:) - REAL(DP) :: SIG(:,:) - INTEGER, INTENT(IN) :: nb, ngw, gstart - INTEGER :: i, ldc, twongw, j, k, lds, n - - ldc = 2 * SIZE( cp, 1 ) - lds = SIZE( sig, 1 ) - twongw = 2*ngw - n = nb - - CALL DGEMM('T','N', n, n, twongw, -2.0d0, cp(1,1), ldc, cp(1,1), ldc, zero, sig(1,1), lds) - DO i = 1, n - sig(i,i) = sig(i,i) + one / DBLE(nproc) - END DO - -! WRITE( stdout,*) ' SIGSET 2 ', SUM(sig) ! DEBUG - - IF ( gstart == 2 ) THEN - DO j=1,n - DO i=1,n - sig(i,j) = sig(i,j) + DBLE(cp(1,i))*DBLE(cp(1,j)) - END DO - END DO - END IF - - CALL mp_sum( sig, group ) - - RETURN - END SUBROUTINE rsigset - ! ---------------------------------------------- SUBROUTINE csigset( ngw, nx, cp, sig ) @@ -303,59 +334,6 @@ RETURN END SUBROUTINE csigset -! ---------------------------------------------- - - SUBROUTINE rrhoset ( gstart, ngw, nb, c0, cp, rho, tmass ) - - ! RHO = REAL PART OF 2*ADJ(C0/PMSS)*CP + - ! C0(*,1)/PMSS*TRANS(CP(*,1)) - ! (CP(*,1) AND C0(*,1) REAL!) - ! - ! TMASS = REAL PART OF 2*ADJ(C0/PMSS)*C0/PMSS + ... - ! - ! RHO AND TMASS ARE PLACED IN COMMON /HOPE/ - - USE mp_global, ONLY: nproc - USE mp, ONLY: mp_sum - - IMPLICIT NONE - - COMPLEX(DP) :: CP(:,:), C0(:,:) - REAL(DP) :: RHO(:,:), TMASS(:,:) - INTEGER, INTENT(IN) :: ngw, nb, gstart - - INTEGER :: i, j, ldc, ldr, tngw, n - - ldc = 2*SIZE( cp, 1 ) - ldr = SIZE( rho, 1 ) - tngw = 2*ngw - n = nb - - CALL DGEMM('T','N',n,n,tngw,two,c0(1,1),ldc,cp(1,1),ldc,zero,rho(1,1),ldr) - CALL DGEMM('T','N',n,n,tngw,two,c0(1,1),ldc,c0(1,1),ldc,zero,tmass(1,1),ldr) - - IF (gstart == 2) THEN - DO j=1,n - DO i=1,n - rho(i,j) = rho(i,j) - DBLE(c0(1,i))*DBLE(cp(1,j)) - END DO - END DO - END IF - - IF (gstart == 2) THEN - DO j=1,n - DO i=1,n - tmass(i,j) = tmass(i,j) - DBLE(c0(1,i))*DBLE(c0(1,j)) - END DO - END DO - END IF - - CALL mp_sum( rho ) - CALL mp_sum( tmass ) - - RETURN - END SUBROUTINE rrhoset - ! ---------------------------------------------- SUBROUTINE crhoset( ngw, nx, c0, cp, rho, tmass ) @@ -438,6 +416,9 @@ COMPLEX (DP) :: C0ji INTEGER :: ldc + INTEGER :: NUMROC + EXTERNAL NUMROC + ! ! SUBROUTINE BODY ! @@ -588,6 +569,7 @@ INTEGER :: ip, ldc INTEGER :: nngw, npz, mez, nproc, mpime INTEGER :: nrl_ip, nrlx + INTEGER :: nrlx_tune = 256 REAL(DP), ALLOCATABLE :: RTMP(:,:,:) REAL(DP), ALLOCATABLE :: ebpmss(:) REAL(DP) :: sqrtfact @@ -772,6 +754,9 @@ REAL (DP) :: FACT,ONE_BY_EMASS REAL (DP), allocatable :: SIGTMP(:) + INTEGER :: NUMROC + EXTERNAL NUMROC + ! ! SUBROUTINE BODY ! @@ -869,6 +854,7 @@ INTEGER NRL, nrl_ip, n, ii, jj INTEGER ip, nngw, nrlx INTEGER npz, mez, mpime, nproc, ldc + INTEGER :: nrlx_tune = 256 REAL (DP) :: DDOT REAL (DP) :: FACT,ONE_BY_EMASS @@ -1073,16 +1059,15 @@ !=----------------------------------------------------------------------------=! - SUBROUTINE ortho_iterate( u, diag, xloc, sig, rhor, rhos, tau, nx, nss, max, eps ) + SUBROUTINE ortho_iterate( iter, diff, u, diag, xloc, sig, rhor, rhos, tau, nx, nss ) - USE kinds, ONLY: DP - USE io_global, ONLY: stdout + USE kinds, ONLY: DP + USE io_global, ONLY: stdout + USE control_flags, ONLY: ortho_eps, ortho_max IMPLICIT NONE - INTEGER, INTENT(IN) :: max INTEGER, INTENT(IN) :: nx, nss - REAL(DP), INTENT(IN) :: eps REAL(DP) :: u( nx, nx ) REAL(DP) :: diag( nx ) REAL(DP) :: xloc( nx, nx ) @@ -1090,16 +1075,17 @@ REAL(DP) :: rhos( nx, nx ) REAL(DP) :: tau( nx, nx ) REAL(DP) :: sig( nx, nx ) + INTEGER, INTENT(OUT) :: iter + REAL(DP), INTENT(OUT) :: diff - INTEGER :: iter, i, j + INTEGER :: i, j REAL(DP), ALLOCATABLE :: tmp1(:,:), tmp2(:,:), dd(:,:) REAL(DP), ALLOCATABLE :: con(:,:), x1(:,:) - REAL(DP) :: diff ALLOCATE( tmp1(nx,nx), tmp2(nx,nx), dd(nx,nx), x1(nx,nx), con(nx,nx) ) - DO iter = 1, max + ITERATIVE_LOOP: DO iter = 1, ortho_max ! ! the following 4 MXMA-calls do the following matrix ! multiplications: @@ -1128,7 +1114,7 @@ END DO END DO - IF( diff <= eps ) go to 20 + IF( diff < ortho_eps ) EXIT ITERATIVE_LOOP ! ! the following two MXMA-calls do: @@ -1153,12 +1139,7 @@ CALL MXMA(tmp1,1,nx, u,nx,1,tmp2,1,nx,nss,nss,nss) CALL MXMA( u,1,nx,tmp2,1,nx,xloc,1,nx,nss,nss,nss) ! - END DO - - WRITE( stdout,*) ' diff= ',diff,' iter= ',iter - CALL errore('ortho','max number of iterations exceeded',iter) - -20 CONTINUE + END DO ITERATIVE_LOOP DEALLOCATE( tmp1, tmp2, dd, x1, con ) @@ -1166,9 +1147,120 @@ END SUBROUTINE ortho_iterate +!=----------------------------------------------------------------------------=! +! +! Alternative iterative cycle +! +!=----------------------------------------------------------------------------=! + + + SUBROUTINE ortho_alt_iterate( iter, diff, u, diag, xloc, sig, rhor, tau, nx, n ) + + USE kinds, ONLY: DP + USE io_global, ONLY: stdout + USE control_flags, ONLY: ortho_eps, ortho_max + USE mp_global, ONLY: group + + IMPLICIT NONE + + INTEGER, INTENT(IN) :: nx, n + REAL(DP) :: u( nx, nx ) + REAL(DP) :: diag( nx ) + REAL(DP) :: xloc( nx, nx ) + REAL(DP) :: rhor( nx, nx ) + REAL(DP) :: tau( nx, nx ) + REAL(DP) :: sig( nx, nx ) + INTEGER, INTENT(OUT) :: iter + REAL(DP), INTENT(OUT) :: diff + + INTEGER :: i, j + REAL(DP), ALLOCATABLE :: tmp1(:,:), tmp2(:,:) + REAL(DP), ALLOCATABLE :: x1(:,:) + REAL(DP), ALLOCATABLE :: sigd(:) + REAL(DP) :: den, dx + + ALLOCATE( tmp1(nx,nx), tmp2(nx,nx), x1(nx,nx), sigd(nx) ) + + + ! + ! ... Transform "sig", "rhoa" and "tau" in the new basis through matrix "s" + ! + CALL sqr_matmul( 'N', 'N', n, sig, u, tmp1 ) + CALL sqr_matmul( 'T', 'N', n, u, tmp1, sig ) + CALL sqr_matmul( 'N', 'N', n, rhor, u, tmp1 ) + CALL sqr_matmul( 'T', 'N', n, u, tmp1, rhor ) + CALL sqr_matmul( 'N', 'N', n, tau, u, tmp1 ) + CALL sqr_matmul( 'T', 'N', n, u, tmp1, tau ) + ! + ! ... Initialize x0 + ! + DO J = 1, N + DO I = 1, N + den = (diag(i)+diag(j)) + IF( ABS( den ) <= small ) den = SIGN( small, den ) + xloc(i,j) = sig(i,j) / den + ENDDO + ENDDO + + ! + ! ... Starting iteration + ! + + ITERATIVE_LOOP: DO iter = 0, ortho_max + + CALL sqr_matmul( 'N', 'N', n, xloc, rhor, tmp2 ) + call mytranspose( tmp2, NX, tmp1, NX, N, N ) + DO J=1,N + DO I=1,N + tmp2(I,J) = tmp2(I,J) + tmp1(I,J) + ENDDO + ENDDO +! + CALL sqr_matmul( 'T', 'N', n, tau, xloc, tmp1 ) + ! + DO I = 1, N + SIGD(I) = tmp1(I,I) + tmp1(I,I) = -SIGD(I) + ENDDO + + CALL sqr_matmul( 'T', 'N', n, xloc, tmp1, X1 ) + ! + call mytranspose( X1, NX, tmp1, NX, N, N ) + + ! ... X1 = SIG - tmp2 - 0.5d0 * ( X1 + X1^t ) + + diff = 0.0d0 + ! + DO j = 1, n + DO i = 1, n + ! + den = ( diag(i) + sigd(i) + diag(j) + sigd(j) ) + IF( ABS( den ) <= small ) den = SIGN( small, den ) + x1(i,j) = sig(i,j) - tmp2(i,j) - 0.5d0 * (x1(i,j)+tmp1(i,j)) + x1(i,j) = x1(i,j) / den + diff = MAX( ABS( x1(i,j) - xloc(i,j) ), diff ) + xloc(i,j) = x1(i,j) + END DO + END DO + + IF( diff < ortho_eps ) EXIT ITERATIVE_LOOP + + END DO ITERATIVE_LOOP + ! + ! ... Transform x0 back to the original basis + + CALL sqr_matmul( 'N', 'N', n, u, xloc, tmp1 ) + CALL sqr_matmul( 'N', 'T', n, u, tmp1, xloc ) + + DEALLOCATE( tmp1, tmp2, x1 ) + + RETURN + END SUBROUTINE ortho_alt_iterate + + !------------------------------------------------------------------------- - SUBROUTINE sigset_cp( cp, ngwx, becp, nkbx, qbecp, n, nss, ist, sig, nx ) + SUBROUTINE sigset( cp, ngwx, becp, nkbx, qbecp, n, nss, ist, sig, nx ) !----------------------------------------------------------------------- ! input: cp (non-orthonormal), becp, qbecp ! computes the matrix @@ -1182,6 +1274,8 @@ USE gvecw, ONLY: ngw USE reciprocal_vectors, ONLY: gstart USE mp, ONLY: mp_sum + USE control_flags, ONLY: iprsta + USE io_global, ONLY: stdout ! IMPLICIT NONE ! @@ -1192,18 +1286,9 @@ INTEGER :: i, j REAL(DP), ALLOCATABLE :: tmp1(:,:) ! - sig = 0.d0 - ! - CALL MXMA( cp( 1, ist ), 2*ngwx, 1, cp( 1, ist ), 1, 2*ngwx, & - & sig, 1, nx, nss, 2*ngw, nss ) - ! - ! q >= 0 components with weight 2.0 - ! - DO j=1,nss - DO i=1,nss - sig(i,j) = -2.0d0 * sig(i,j) - END DO - END DO + + CALL DGEMM( 'T', 'N', nss, nss, 2*ngw, -2.0d0, cp( 1, ist ), 2*ngwx, & + cp( 1, ist ), 2*ngwx, 0.0d0, sig, nx) ! ! q = 0 components has weight 1.0 ! @@ -1238,13 +1323,22 @@ DEALLOCATE( tmp1 ) ENDIF + + IF(iprsta.GT.4) THEN + WRITE( stdout,*) + WRITE( stdout,'(26x,a)') ' sig ' + DO i=1,nss + WRITE( stdout,'(7f11.6)') (sig(i,j),j=1,nss) + END DO + ENDIF + ! RETURN - END SUBROUTINE sigset_cp + END SUBROUTINE sigset ! !----------------------------------------------------------------------- - SUBROUTINE rhoset_cp( cp, ngwx, phi, bephi, nkbx, qbecp, n, nss, ist, rho, nx ) + SUBROUTINE rhoset( cp, ngwx, phi, bephi, nkbx, qbecp, n, nss, ist, rho, nx ) !----------------------------------------------------------------------- ! input: cp (non-orthonormal), phi, bephi, qbecp ! computes the matrix @@ -1259,6 +1353,8 @@ USE cvan, ONLY: nvb USE kinds, ONLY: DP USE mp, ONLY: mp_sum + USE control_flags, ONLY: iprsta + USE io_global, ONLY: stdout ! IMPLICIT NONE ! @@ -1269,20 +1365,12 @@ INTEGER :: i, j REAL(DP), ALLOCATABLE :: tmp1(:,:) ! - rho (:,:) = 0.d0 - ! ! ! - CALL MXMA( phi( 1, ist ), 2*ngwx, 1, cp( 1, ist ), 1, 2*ngwx, & - & rho, 1, nx, nss, 2*ngw, nss ) ! - ! q >= 0 components with weight 2.0 - ! - DO j=1,nss - DO i=1,nss - rho(i,j)=2.*rho(i,j) - END DO - END DO + + CALL DGEMM( 'T', 'N', nss, nss, 2*ngw, 2.0d0, phi( 1, ist ), 2*ngwx, & + cp( 1, ist ), 2*ngwx, 0.0d0, rho, nx) ! ! q = 0 components has weight 1.0 ! @@ -1313,12 +1401,20 @@ DEALLOCATE( tmp1 ) ENDIF + + IF(iprsta.GT.4) THEN + WRITE( stdout,*) + WRITE( stdout,'(26x,a)') ' rho ' + DO i=1,nss + WRITE( stdout,'(7f11.6)') (rho(i,j),j=1,nss) + END DO + ENDIF ! RETURN - END SUBROUTINE rhoset_cp + END SUBROUTINE rhoset !------------------------------------------------------------------------- - SUBROUTINE tauset( phi, ngwx, bephi, nkbx, qbephi, n, nss, ist, tau, nx ) + SUBROUTINE tauset( phi, ngwx, bephi, nkbx, qbephi, n, nss, ist, tau, nx ) !----------------------------------------------------------------------- ! input: phi ! computes the matrix @@ -1332,6 +1428,8 @@ USE gvecw, ONLY: ngw USE reciprocal_vectors, ONLY: gstart USE mp, ONLY: mp_sum + USE control_flags, ONLY: iprsta + USE io_global, ONLY: stdout ! IMPLICIT NONE INTEGER :: nss, ist, ngwx, nkbx, n, nx @@ -1340,19 +1438,9 @@ ! INTEGER :: i, j REAL(DP), ALLOCATABLE :: tmp1( :, : ) -! - tau = 0.0d0 ! - CALL MXMA( phi( 1, ist ), 2*ngwx, 1, phi( 1, ist ), 1, 2*ngwx, & - & tau, 1, nx, nss, 2*ngw, nss ) - ! - ! q >= 0 components with weight 2.0 - ! - DO j=1,nss - DO i=1,nss - tau(i,j) = 2.0d0 * tau(i,j) - END DO - END DO + CALL DGEMM( 'T', 'N', nss, nss, 2*ngw, 2.0d0, phi( 1, ist ), 2*ngwx, & + phi( 1, ist ), 2*ngwx, 0.0d0, tau, nx) ! ! q = 0 components has weight 1.0 ! @@ -1383,9 +1471,236 @@ DEALLOCATE( tmp1 ) ENDIF + + IF(iprsta.GT.4) THEN + WRITE( stdout,*) + WRITE( stdout,'(26x,a)') ' tau ' + DO i=1,nss + WRITE( stdout,'(7f11.6)') (tau(i,j),j=1,nss) + END DO + ENDIF ! RETURN - END SUBROUTINE tauset + END SUBROUTINE tauset +! +!------------------------------------------------------------------------- + SUBROUTINE updatc( ccc, n, x0, nx, phi, ngwx, bephi, nkbx, becp, bec, cp ) +!----------------------------------------------------------------------- +! + ! input ccc : dt**2/emass OR 1.0d0 demending on ortho + ! input x0 : converged lambdas from ortho-loop (unchanged in output) + ! input cp : non-orthonormal cp=c0+dh/dc*ccc + ! input bec : + ! input phi + ! output cp : orthonormal cp=cp+lambda*phi + ! output bec: bec=becp+lambda*bephi + ! + USE kinds, ONLY: DP + USE ions_base, ONLY: nsp, na + USE io_global, ONLY: stdout + USE cvan, ONLY: nvb, ish + USE uspp, ONLY: nkb, nkbus + USE uspp_param, ONLY: nh + USE gvecw, ONLY: ngw + USE control_flags, ONLY: iprint, iprsta +! + IMPLICIT NONE +! + INTEGER, INTENT(IN) :: n, nx, ngwx, nkbx + COMPLEX(DP) :: cp( ngwx, n ), phi( ngwx, n ) + REAL(DP), INTENT(IN) :: ccc + REAL(DP) :: bec( nkbx, n ), x0( nx, nx ) + REAL(DP) :: bephi( nkbx, n ), becp( nkbx, n ) + + ! local variables + + INTEGER :: i, j, ig, is, iv, ia, inl + REAL(DP), ALLOCATABLE :: wtemp(:,:) + ! + ! lagrange multipliers + ! + CALL start_clock( 'updatc' ) + + IF ( ccc /= 1.0d0 ) THEN + DO j = 1, n + CALL DSCAL( n, ccc, x0(1,j), 1 ) + END DO + END IF + ! + CALL DGEMM( 'N', 'N', 2*ngw, n, n, 1.0d0, phi, 2*ngwx, x0, nx, 1.0d0, cp, 2*ngwx ) + ! + ! updating of the + ! + ! bec of vanderbilt species are updated + ! + IF( nvb > 0 )THEN + + ALLOCATE( wtemp( n, nkb ) ) + + CALL MXMA(x0,1,nx,bephi,nkb,1,wtemp,1,n,n,n,nkbus) +! + DO i=1,n + DO inl=1,nkbus + bec(inl,i)=wtemp(i,inl)+becp(inl,i) + END DO + END DO + + DEALLOCATE( wtemp ) + + ENDIF +! + IF ( iprsta > 2 ) THEN + WRITE( stdout,*) + DO is=1,nsp + IF(nsp.GT.1) THEN + WRITE( stdout,'(33x,a,i4)') ' updatc: bec (is)',is + WRITE( stdout,'(8f9.4)') & + & ((bec(ish(is)+(iv-1)*na(is)+1,i),iv=1,nh(is)),i=1,n) + ELSE + DO ia=1,na(is) + WRITE( stdout,'(33x,a,i4)') ' updatc: bec (ia)',ia + WRITE( stdout,'(8f9.4)') & + & ((bec(ish(is)+(iv-1)*na(is)+ia,i),iv=1,nh(is)),i=1,n) + END DO + END IF + WRITE( stdout,*) + END DO + ENDIF +! + IF ( ccc /= 1.0d0 ) THEN + DO j=1,n + CALL DSCAL(n,1.0/ccc,x0(1,j),1) + END DO + END IF +! + CALL stop_clock( 'updatc' ) +! + RETURN + END SUBROUTINE updatc + + +!------------------------------------------------------------------------- + SUBROUTINE calphi( c0, ngwx, bec, nkbx, betae, phi, n, ema0bg ) +!----------------------------------------------------------------------- +! input: c0 (orthonormal with s(r(t)), bec=, betae=|beta> +! computes the matrix phi (with the old positions) +! where |phi> = s'|c0> = |c0> + sum q_ij |i> +! where s'=s(r(t)) +! + USE kinds, ONLY: DP + USE ions_base, ONLY: na, nsp + USE io_global, ONLY: stdout + USE cvan, ONLY: ish, nvb + USE uspp_param, ONLY: nh + USE uspp, ONLY: nhsavb=>nkbus, qq + USE gvecw, ONLY: ngw + USE constants, ONLY: pi, fpi + USE control_flags, ONLY: iprint, iprsta + USE mp, ONLY: mp_sum +! + IMPLICIT NONE + + INTEGER, INTENT(IN) :: ngwx, nkbx, n + COMPLEX(DP) :: c0( ngwx, n ), phi( ngwx, n ), betae( ngwx, nkbx ) + REAL(DP) :: bec( nkbx, n ), emtot + REAL(DP), OPTIONAL :: ema0bg( ngwx ) + + ! local variables + ! + INTEGER :: is, iv, jv, ia, inl, jnl, i, j + REAL(DP), ALLOCATABLE :: qtemp( : , : ) +! + CALL start_clock( 'calphi' ) + + + phi(:,:) = (0.d0, 0.d0) +! + IF ( nvb > 0 ) THEN + + ALLOCATE( qtemp( nhsavb, n ) ) + + qtemp (:,:) = 0.d0 + DO is=1,nvb + DO iv=1,nh(is) + DO jv=1,nh(is) + IF(ABS(qq(iv,jv,is)) > 1.e-5) THEN + DO ia=1,na(is) + inl=ish(is)+(iv-1)*na(is)+ia + jnl=ish(is)+(jv-1)*na(is)+ia + DO i=1,n + qtemp(inl,i) = qtemp(inl,i) + & + & qq(iv,jv,is)*bec(jnl,i) + END DO + END DO + ENDIF + END DO + END DO + END DO +! + CALL MXMA & + & ( betae, 1, 2*ngwx, qtemp, 1, nhsavb, phi, 1, 2*ngwx, 2*ngw, nhsavb, n ) + + DEALLOCATE( qtemp ) + + END IF +! + IF( PRESENT( ema0bg ) ) THEN + DO j=1,n + DO i=1,ngw + phi(i,j)=(phi(i,j)+c0(i,j))*ema0bg(i) + END DO + END DO + ELSE + DO j=1,n + DO i=1,ngw + phi(i,j)=phi(i,j)+c0(i,j) + END DO + END DO + END IF + + ! + + IF(iprsta > 2) THEN + emtot=0.0d0 + IF( PRESENT( ema0bg ) ) THEN + DO j=1,n + DO i=1,ngw + emtot=emtot +2.0d0*DBLE(phi(i,j)*CONJG(c0(i,j)))*ema0bg(i)**(-2.0d0) + END DO + END DO + ELSE + DO j=1,n + DO i=1,ngw + emtot=emtot +2.0d0*DBLE(phi(i,j)*CONJG(c0(i,j))) + END DO + END DO + END IF + emtot=emtot/n + + CALL mp_sum( emtot ) + + WRITE( stdout,*) 'in calphi sqrt(emtot)=',SQRT(emtot) + WRITE( stdout,*) + DO is=1,nsp + IF(nsp > 1) THEN + WRITE( stdout,'(33x,a,i4)') ' calphi: bec (is)',is + WRITE( stdout,'(8f9.4)') & + & ((bec(ish(is)+(iv-1)*na(is)+1,i),iv=1,nh(is)),i=1,n) + ELSE + DO ia=1,na(is) + WRITE( stdout,'(33x,a,i4)') ' calphi: bec (ia)',ia + WRITE( stdout,'(8f9.4)') & + & ((bec(ish(is)+(iv-1)*na(is)+ia,i),iv=1,nh(is)),i=1,n) + END DO + END IF + END DO + ENDIF + + + CALL stop_clock( 'calphi' ) +! + RETURN + END SUBROUTINE calphi END MODULE orthogonalize_base diff --git a/CPV/para.f90 b/CPV/para.f90 index 823a29800..f40e1602a 100644 --- a/CPV/para.f90 +++ b/CPV/para.f90 @@ -88,7 +88,7 @@ end module para_mod ! ! !---------------------------------------------------------------------- - subroutine read_rho(unit,nspin,rhor) + subroutine read_rho( unit, nspin, rhor ) !---------------------------------------------------------------------- ! ! read rhor(nnr,nspin) from file diff --git a/CPV/potentials.f90 b/CPV/potentials.f90 index 515219d82..a39178c8c 100644 --- a/CPV/potentials.f90 +++ b/CPV/potentials.f90 @@ -176,7 +176,7 @@ ! ------------------------------------------------------------------------- SUBROUTINE kspotential & - ( nfi, tprint, tforce, tstress, rhoe, desc, atoms, bec, becdr, eigr, & + ( nfi, tprint, tforce, tstress, rhoe, atoms, bec, becdr, eigr, & ei1, ei2, ei3, sfac, c0, cdesc, tcel, ht, fi, vpot, edft, timepre ) USE charge_density, ONLY: rhoofr @@ -185,7 +185,6 @@ USE cell_module, ONLY: boxdimensions USE atoms_type_module, ONLY: atoms_type USE wave_types, ONLY: wave_descriptor - USE charge_types, ONLY: charge_descriptor ! ... declare subroutine arguments LOGICAL :: tcel @@ -193,7 +192,7 @@ TYPE (atoms_type), INTENT(INOUT) :: atoms COMPLEX(DP), INTENT(INOUT) :: c0(:,:,:,:) TYPE (wave_descriptor), INTENT(IN) :: cdesc - REAL(DP) :: rhoe(:,:,:,:) + REAL(DP) :: rhoe(:,:) COMPLEX(DP) :: ei1(:,:) COMPLEX(DP) :: ei2(:,:) COMPLEX(DP) :: ei3(:,:) @@ -203,17 +202,16 @@ REAL(DP) :: bec(:,:) REAL(DP) :: becdr(:,:,:) TYPE (dft_energy_type) :: edft - REAL(DP) :: vpot(:,:,:,:) + REAL(DP) :: vpot(:,:) COMPLEX(DP), INTENT(IN) :: sfac(:,:) LOGICAL, INTENT(IN) :: tforce, tstress, tprint REAL(DP), INTENT(OUT) :: timepre - TYPE (charge_descriptor), INTENT(IN) :: desc edft%enl = nlrh_m( c0, cdesc, tforce, atoms, fi, bec, becdr, eigr ) - CALL rhoofr( nfi, c0, cdesc, fi, rhoe, desc, ht ) + CALL rhoofr( nfi, c0, cdesc, fi, rhoe, ht ) - CALL vofrhos( tprint, tforce, tstress, rhoe, desc, atoms, vpot, bec, & + CALL vofrhos( tprint, tforce, tstress, rhoe, atoms, vpot, bec, & c0, cdesc, fi, eigr, ei1, ei2, ei3, sfac, timepre, & ht, edft ) @@ -223,7 +221,7 @@ !=----------------------------------------------------------------------------=! SUBROUTINE vofrhos & - ( tprint, tforce, tstress, rhoe, desc, atoms, vpot, bec, c0, cdesc, fi, & + ( tprint, tforce, tstress, rhoe, atoms, vpot, bec, c0, cdesc, fi, & eigr, ei1, ei2, ei3, sfac, timepre, box, edft ) ! this routine computes: @@ -260,7 +258,7 @@ ! ... include modules - USE control_flags, ONLY: tscreen, tchi2, iprsta + USE control_flags, ONLY: tscreen, tchi2, iprsta, force_pairing USE mp_global, ONLY: nproc, mpime, root, group USE mp, ONLY: mp_sum USE cell_module, ONLY: boxdimensions @@ -274,9 +272,6 @@ USE charge_density, ONLY: gradrho USE chi2, ONLY: rhochi, allocate_chi2, deallocate_chi2 USE vanderwaals, ONLY: tvdw, vdw - USE charge_density, ONLY: checkrho - USE charge_types, ONLY: charge_descriptor - USE wave_functions, ONLY: dft_kinetic_energy USE wave_types, ONLY: wave_descriptor USE io_global, ONLY: ionode, stdout USE sic_module, ONLY: self_interaction, sic_epsilon, sic_alpha !!TO ADD!!! @@ -285,15 +280,17 @@ USE atom, ONLY: nlcc USE core, ONLY: nlcc_any, rhocg USE core, ONLY: add_core_charge, core_charge_forces + ! USE reciprocal_vectors, ONLY: gx USE atoms_type_module, ONLY: atoms_type USE exchange_correlation, ONLY: exch_corr_energy + use grid_dimensions, only: nr1, nr2, nr3, nnrx IMPLICIT NONE ! ... declare subroutine arguments LOGICAL, INTENT(IN) :: tprint, tforce, tstress - REAL(DP) :: vpot(:,:,:,:) + REAL(DP) :: vpot(:,:) REAL(DP), INTENT(IN) :: fi(:,:,:) REAL(DP) :: bec(:,:) COMPLEX(DP) :: ei1(:,:) @@ -303,10 +300,9 @@ COMPLEX(DP), INTENT(IN) :: c0(:,:,:,:) TYPE (atoms_type), INTENT(INOUT) :: atoms TYPE (wave_descriptor), INTENT(IN) :: cdesc - TYPE (charge_descriptor), INTENT(IN) :: desc TYPE (boxdimensions), INTENT(INOUT) :: box TYPE (dft_energy_type) :: edft - REAL(DP) :: rhoe(:,:,:,:) + REAL(DP) :: rhoe(:,:) COMPLEX(DP), INTENT(IN) :: sfac(:,:) TYPE (dft_energy_type) :: edft_self @@ -321,16 +317,16 @@ COMPLEX(DP), ALLOCATABLE :: vloc(:), self_vloc(:) COMPLEX(DP), ALLOCATABLE :: rho12(:), rhoeg(:,:), self_rhoeg(:) COMPLEX(DP), ALLOCATABLE :: rhoetg(:,:) - REAL(DP), ALLOCATABLE :: rhoetr(:,:,:,:) + REAL(DP), ALLOCATABLE :: rhoetr(:,:) REAL(DP), ALLOCATABLE :: fion_vdw(:,:) - REAL(DP), ALLOCATABLE :: grho(:,:,:,:,:) - REAL(DP), ALLOCATABLE :: v2xc(:,:,:,:,:) + REAL(DP), ALLOCATABLE :: grho(:,:,:) + REAL(DP), ALLOCATABLE :: v2xc(:,:,:) REAL(DP), ALLOCATABLE :: fion(:,:) - REAL(DP), ALLOCATABLE :: self_rho(:,:,:,:) - REAL(DP), ALLOCATABLE :: self_vpot(:,:,:,:) - REAL(DP), ALLOCATABLE :: self_grho(:,:,:,:,:) - REAL(DP), ALLOCATABLE :: self_v2xc(:,:,:,:,:) + REAL(DP), ALLOCATABLE :: self_rho(:,:) + REAL(DP), ALLOCATABLE :: self_vpot(:,:) + REAL(DP), ALLOCATABLE :: self_grho(:,:,:) + REAL(DP), ALLOCATABLE :: self_v2xc(:,:,:) REAL(DP) :: pail(3,3) @@ -347,36 +343,22 @@ LOGICAL :: ttscreen, ttsic, tgc - INTEGER ig1, ig2, ig3, is, ia, ig, isc, iflag, ispin - INTEGER ik, i, j, k, isa, idum, nspin - INTEGER nr1_l, nr2_l, nr3_l, nr1_g, nr2_g, nr3_g, nnr_l - INTEGER :: nr1x, nr2x, nr3x + INTEGER ig1, ig2, ig3, is, ia, ig, isc, iflag, iss + INTEGER ik, i, j, k, isa, idum, nspin, iswfc INTEGER :: ierr DATA iflag / 0 / SAVE iflag, desr + REAL(DP), EXTERNAL :: enkin + ! end of declarations ! ---------------------------------------------- IF(timing) s0 = cclock() - nspin = desc % nspin + nspin = SIZE( rhoe, 2 ) - nr1_l = desc % nxl - nr2_l = desc % nyl - nr3_l = desc % nzl - - nr1_g = desc % nx - nr2_g = desc % ny - nr3_g = desc % nz - - nnr_l = nr1_l * nr2_l * nr3_l - - nr1x = dfftp%nr1x - nr2x = dfftp%nr2x - nr3x = dfftp%npl - edft%evdw = 0.0d0 ! ! ttscreen = .TRUE. @@ -392,7 +374,9 @@ CALL allocate_chi2(ngm) END IF - ALLOCATE( rhoetr( nr1x, nr2x, nr3x, nspin) ) + ALLOCATE( rhoetr( nnrx, nspin ) ) + rhoetr = 0.0d0 + ALLOCATE( fion( 3, atoms%nat ) ) fion = atoms%for( 1:3, 1:atoms%nat ) @@ -400,44 +384,43 @@ pail = box%pail IF(tgc) THEN - ALLOCATE( grho( nr1x, nr2x, nr3x, 3, nspin ) ) - ALLOCATE( v2xc( nr1x, nr2x, nr3x, nspin, nspin) ) + ALLOCATE( grho( nnrx, 3, nspin ) ) + ALLOCATE( v2xc( nnrx, nspin, nspin) ) ELSE - ALLOCATE( grho( 1, 1, 1, 1, 1 ) ) - ALLOCATE( v2xc( 1, 1, 1, 1, 1 ) ) + ALLOCATE( grho( 1, 1, 1 ) ) + ALLOCATE( v2xc( 1, 1, 1 ) ) END IF + grho = 0.0d0 + v2xc = 0.0d0 ALLOCATE( rhoeg(ngm, nspin) ) ALLOCATE( rhoetg(ngm, nspin) ) - !edft%self_sxc = 0.d0 - !edft%sxc = 0.d0 - !edft%self_ehte = 0.d0 - !edft%eht = 0.d0 - - IF( ttsic ) THEN IF ( tgc ) THEN - ALLOCATE(self_grho( nr1x, nr2x, nr3x, 3, nspin ), STAT = ierr) + ALLOCATE(self_grho( nnrx, 3, nspin ), STAT = ierr) IF( ierr /= 0 ) CALL errore(' vofrhos ', ' allocating self_grho ', ierr) - ALLOCATE(self_v2xc( nr1x, nr2x, nr3x, nspin, nspin ), STAT = ierr) + ALLOCATE(self_v2xc( nnrx, nspin, nspin ), STAT = ierr) IF( ierr /= 0 ) CALL errore(' vofrhos ', ' allocating self_v2xc ', ierr) + self_grho = 0.D0 self_v2xc = 0.D0 END IF !on tgc - ALLOCATE (self_vpot( nr1x, nr2x, nr3x, 2 ), STAT = ierr) + ALLOCATE (self_vpot( nnrx, 2 ), STAT = ierr) IF( ierr /= 0 ) CALL errore(' vofrhos ', ' allocating self_vpot ', ierr) self_vpot = 0.D0 - ALLOCATE (self_rho( nr1x, nr2x, nr3x, 2), STAT = ierr) + ALLOCATE (self_rho( nnrx, 2), STAT = ierr) IF( ierr /= 0 ) CALL errore(' vofrhos ', ' allocating self_rho ', ierr) + self_rho = 0.D0 + END IF !on self_interaction IF(timing) s1 = cclock() @@ -446,7 +429,12 @@ edft%ekin = 0.0_DP edft%emkin = 0.0_DP - edft%ekin = dft_kinetic_energy(c0, cdesc, fi, edft%emkin) + + DO iss = 1, nspin + iswfc = iss + IF( force_pairing ) iswfc = 1 + edft%ekin = edft%ekin + enkin( c0(1,1,1,iswfc), SIZE(c0,1), fi(1,1,iss), cdesc%nbl(iss) ) + END DO IF(tprint) THEN IF( ionode .AND. ttscreen ) & @@ -454,11 +442,6 @@ END IF -! ... reciprocal-space vectors are in units of alat/(2 pi) so a -! ... multiplicative factor (2 pi/alat)**2 is required - edft%ekin = edft%ekin * tpiba2 - edft%emkin = edft%emkin * tpiba2 - IF( tstress .OR. tforce .OR. iflag == 0 ) THEN CALL vofesr( edft%esr, desr, fion, atoms, tstress, box ) IF( iflag == 0 ) & @@ -469,23 +452,26 @@ IF(timing) s2 = cclock() -! ... FFT: rho(r) --> rho(g) - DO ispin = 1, nspin + DO iss = 1, nspin - CALL pfwfft( rhoeg(:,ispin), rhoe(:,:,:,ispin) ) + ! ... FFT: rho(r) --> rho(g) + + CALL pfwfft( rhoeg(:,iss), rhoe(:,iss) ) -! ... add core contribution to the charge + ! ... add core contribution to the charge - CALL ZCOPY( SIZE(rhoetg,1), rhoeg(1,ispin), 1, rhoetg(1,ispin), 1 ) - CALL DCOPY( SIZE(rhoe(:,:,:,ispin)), rhoe(1,1,1,ispin), 1, rhoetr(1,1,1,ispin), 1 ) + CALL ZCOPY( SIZE(rhoeg,1) , rhoeg(1,iss), 1, rhoetg(1,iss), 1 ) + CALL DCOPY( nnrx, rhoe(1,iss), 1, rhoetr(1,iss), 1 ) IF( nlcc_any ) THEN ! ... add core correction ! ... rhoetg = rhoeg + cc ! ... rhoetr = rhoe + cc - CALL add_core_charge( rhoetg(:,ispin), rhoetr(:,:,:,ispin), sfac, rhocg, atoms%nsp ) + + CALL add_core_charge( rhoetg(:,iss), rhoetr(:,iss), sfac, rhocg, atoms%nsp ) + ELSE ! ... no core correction @@ -493,6 +479,7 @@ ! ... rhoetr = rhoe ! ... chi2 + IF(tchi2) THEN IF(nspin.GT.1) CALL errore(' vofrho ',' spin + tchi ',nspin) rhochi = rhoeg(:,1) @@ -501,7 +488,7 @@ END IF IF(tgc) THEN - CALL gradrho( rhoetg(:,ispin), grho(:,:,:,:,ispin), gx ) + CALL gradrho( rhoetg(:,iss), grho(:,:,iss), gx ) END IF END DO @@ -518,26 +505,26 @@ ! IF ( ttsic ) THEN - self_rho(:,:,:,1) = rhoetr(:,:,:,2) - self_rho(:,:,:,2) = rhoetr(:,:,:,2) + self_rho(:,1) = rhoetr(:,2) + self_rho(:,2) = rhoetr(:,2) IF (tgc) THEN - self_grho(:,:,:,:,1) = grho(:,:,:,:,2) - self_grho(:,:,:,:,2) = grho(:,:,:,:,2) + self_grho(:,:,1) = grho(:,:,2) + self_grho(:,:,2) = grho(:,:,2) ENDIF CALL exch_corr_energy( self_rho, rhoetg, self_grho, self_vpot, & self_sxcp, self_vxc, self_v2xc ) - vpot (:,:,:,1) = ( 1.0d0 - sic_alpha ) * vpot(:,:,:,1) + vpot (:,1) = ( 1.0d0 - sic_alpha ) * vpot(:,1) ! - vpot (:,:,:,2) = ( 1.0d0 - sic_alpha ) * vpot(:,:,:,2) + sic_alpha * ( self_vpot(:,:,:,2) + self_vpot(:,:,:,1) ) + vpot (:,2) = ( 1.0d0 - sic_alpha ) * vpot(:,2) + sic_alpha * ( self_vpot(:,2) + self_vpot(:,1) ) IF (tgc) THEN ! - v2xc(:,:,:,1,1) = ( 1.0d0 - sic_alpha ) * v2xc(:,:,:,1,1) + v2xc(:,1,1) = ( 1.0d0 - sic_alpha ) * v2xc(:,1,1) ! - v2xc(:,:,:,2,2) = ( 1.0d0 - sic_alpha ) * v2xc(:,:,:,2,2) +sic_alpha * ( self_v2xc(:,:,:,2,2) + self_v2xc(:,:,:,1,1) ) + v2xc(:,2,2) = ( 1.0d0 - sic_alpha ) * v2xc(:,2,2) +sic_alpha * ( self_v2xc(:,2,2) + self_v2xc(:,1,1) ) ! END IF @@ -552,14 +539,14 @@ END IF IF ( tstress ) THEN - strvxc = ( edft%sxc - vxc ) * omega / DBLE( nr1_g * nr2_g * nr3_g ) + strvxc = ( edft%sxc - vxc ) * omega / DBLE( nr1 * nr2 * nr3 ) END IF IF( nlcc_any ) THEN ! ... xc potential (vpot) from real to G space, to compute nlcc forces ! ... rhoetg = fwfft(vpot) - DO ispin = 1, nspin - CALL pfwfft( rhoetg(:,ispin), vpot(:,:,:,ispin) ) + DO iss = 1, nspin + CALL pfwfft( rhoetg(:,iss), vpot(:,iss) ) END DO ! ... now rhoetg contains the xc potential IF (tforce) THEN @@ -584,7 +571,7 @@ ! CALL vofloc(ttscreen, tforce, edft%ehte, edft%ehti, ehp, & eps, vloc, rhoeg, fion, atoms, rhops, vps, eigr, & - ei1, ei2, ei3, sfac, box, desc ) + ei1, ei2, ei3, sfac, box ) ! edft%self_ehte = 0.d0 @@ -600,16 +587,16 @@ ! working on the total charge density - CALL self_vofloc( ttscreen, self_ehtep, self_vloc, self_rhoeg, box, desc) + CALL self_vofloc( ttscreen, self_ehtep, self_vloc, self_rhoeg, box ) ! - CALL pinvfft( self_vpot(:,:,:,1), self_vloc(:)) + CALL pinvfft( self_vpot(:,1), self_vloc(:)) - self_vpot(:,:,:,1) = sic_epsilon * self_vpot(:,:,:,1) + self_vpot(:,1) = sic_epsilon * self_vpot(:,1) ! edft%self_ehte = sic_epsilon * DBLE( self_ehtep ) - vpot(:,:,:,1) = vpot(:,:,:,1) - self_vpot(:,:,:,1) - vpot(:,:,:,2) = vpot(:,:,:,2) + self_vpot(:,:,:,1) + vpot(:,1) = vpot(:,1) - self_vpot(:,1) + vpot(:,2) = vpot(:,2) + self_vpot(:,1) DEALLOCATE( self_vloc, self_rhoeg ) @@ -628,13 +615,13 @@ IF(timing) s5 = cclock() - DO ispin = 1, nspin + DO iss = 1, nspin ! ... add hartree end local pseudo potentials ( invfft(vloc) ) ! ... to xc potential (vpot). ! ... vpot = vpot + invfft(vloc) - CALL pinvfft( vpot(:,:,:,ispin), vloc(:), 1.0d0 ) + CALL pinvfft( vpot(:,iss), vloc(:), 1.0d0 ) END DO @@ -660,10 +647,10 @@ CALL mp_sum(edft%ehte, group) CALL mp_sum(edft%ehti, group) CALL mp_sum(edft%self_ehte, group) - CALL mp_sum(edft%ekin, group) + ! CALL mp_sum(edft%ekin, group) ! already summed up CALL mp_sum(edft%emkin, group) - CALL total_energy(edft,omega,vxc,eps,self_vxc,nr1_g*nr2_g*nr3_g) + CALL total_energy(edft,omega,vxc,eps,self_vxc,nr1*nr2*nr3) !fran: the output is introduced only in the print_energies.f90 !fran: in this way you print only each print_step @@ -761,21 +748,20 @@ !=----------------------------------------------------------------------------=! - SUBROUTINE cluster_bc( screen_coul, hg, box, desc ) + SUBROUTINE cluster_bc( screen_coul, hg, box ) USE green_functions, ONLY: greenf USE mp_global, ONLY: mpime USE fft, ONLY : pfwfft USE fft_base, ONLY: dfftp - USE charge_types, ONLY: charge_descriptor USE processors_grid_module, ONLY: get_grid_info USE cell_module, ONLY: boxdimensions, s_to_r, alat USE constants, ONLY: gsmall, pi USE cell_base, ONLY: tpiba2 + use grid_dimensions, only: nr1, nr2, nr3, nr1l, nr2l, nr3l, nnrx REAL(DP), INTENT(IN) :: hg(:) TYPE (boxdimensions), INTENT(IN) :: box - TYPE (charge_descriptor), INTENT(IN) :: desc COMPLEX(DP) :: screen_coul(:) ! ... declare external function @@ -783,21 +769,12 @@ EXTERNAL erf, erfc ! ... Locals - REAL(DP), ALLOCATABLE :: grr(:,:,:) + REAL(DP), ALLOCATABLE :: grr(:) COMPLEX(DP), ALLOCATABLE :: grg(:) REAL(DP) :: rc, r(3), s(3), rmod, g2, rc2, arg, omega, fact - INTEGER :: ig, i, j, k - INTEGER :: nr1_l, nr2_l, nr3_l, nr1_g, nr2_g, nr3_g + INTEGER :: ig, i, j, k, ir INTEGER :: ir1, ir2, ir3 - - nr1_l = desc % nxl - nr2_l = desc % nyl - nr3_l = desc % nzl - nr1_g = desc % nx - nr2_g = desc % ny - nr3_g = desc % nz - ir1 = 1 ir2 = 1 ir3 = 1 @@ -805,28 +782,31 @@ ir3 = ir3 + dfftp%npp( k ) END DO - ALLOCATE( grr( dfftp%nr1x, dfftp%nr2x, dfftp%npl ) ) + ALLOCATE( grr( nnrx ) ) ALLOCATE( grg( SIZE( screen_coul ) ) ) + grr = 0.0d0 + ! ... Martina and Tuckerman convergence criterium rc = 7.0d0 / alat rc2 = rc**2 omega = box%deth - fact = omega / ( nr1_g * nr2_g * nr3_g ) - IF( MOD(nr1_g * nr2_g * nr3_g, 2) /= 0 ) fact = -fact + fact = omega / ( nr1 * nr2 * nr3 ) + IF( MOD(nr1 * nr2 * nr3, 2) /= 0 ) fact = -fact - DO k = 1, nr3_l - s(3) = DBLE ( (k-1) + (ir3 - 1) ) / nr3_g - 0.5d0 - DO j = 1, nr2_l - s(2) = DBLE ( (j-1) + (ir2 - 1) ) / nr2_g - 0.5d0 - DO i = 1, nr1_l - s(1) = DBLE ( (i-1) + (ir1 - 1) ) / nr1_g - 0.5d0 + DO k = 1, nr3l + s(3) = DBLE ( (k-1) + (ir3 - 1) ) / nr3 - 0.5d0 + DO j = 1, nr2l + s(2) = DBLE ( (j-1) + (ir2 - 1) ) / nr2 - 0.5d0 + DO i = 1, nr1l + s(1) = DBLE ( (i-1) + (ir1 - 1) ) / nr1 - 0.5d0 CALL S_TO_R(S, R, box) rmod = SQRT( r(1)**2 + r(2)**2 + r(3)**2 ) + ir = i + (j-1)*dfftp%nr1x + (k-1)*dfftp%nr1x*dfftp%nr2x IF( rmod < gsmall ) THEN - grr(i,j,k) = fact * 2.0d0 * rc / SQRT( pi ) + grr( ir ) = fact * 2.0d0 * rc / SQRT( pi ) ELSE - grr(i,j,k) = fact * erf( rc * rmod ) / rmod + grr( ir ) = fact * erf( rc * rmod ) / rmod END IF END DO END DO @@ -856,11 +836,11 @@ ! BEGIN manual SUBROUTINE vofloc(tscreen, tforce, ehte, ehti, eh, eps, vloc, rhoeg, & - fion, atoms, rhops, vps, eigr, ei1, ei2, ei3, sfac, ht, desc ) + fion, atoms, rhops, vps, eigr, ei1, ei2, ei3, sfac, ht ) ! this routine computes: ! omega = ht%deth -! rho_e(ig) = (sum over ispin) rhoeg(ig,ispin) +! rho_e(ig) = (sum over iss) rhoeg(ig,iss) ! rho_I(ig) = (sum over is) sfac(is,ig) * rhops(ig,is) ! vloc_h(ig) = fpi / ( g(ig) * tpiba2 ) * { rho_e(ig) + rho_I(ig) } ! vloc_ps(ig) = (sum over is) sfac(is,ig) * vps(ig,is) @@ -884,7 +864,7 @@ ! tx_ps(ig,is) = vps(ig,is) * CONJG( rho_e(ig) ) ! gx(ig) = CMPLX(0.D0, gx(1,ig)) * tpiba ! fion(x,isa) = fion(x,isa) + -! Fact * omega * ( sum over ig, ispin) (tx_h(ig,is) + tx_ps(ig,is)) * +! Fact * omega * ( sum over ig, iss) (tx_h(ig,is) + tx_ps(ig,is)) * ! gx(ig) * eigrx(ig,isa) * eigry(ig,isa) * eigrz(ig,isa) ! if Gamma symmetry Fact = 2.0 else Fact = 1 ! @@ -895,7 +875,6 @@ USE control_flags, ONLY: gamma_only USE cell_base, ONLY: tpiba2, tpiba USE cell_module, ONLY: boxdimensions - USE charge_types, ONLY: charge_descriptor USE atoms_type_module, ONLY: atoms_type USE io_global, ONLY: stdout USE grid_dimensions, ONLY: nr1, nr2, nr3 @@ -910,7 +889,6 @@ TYPE (atoms_type) :: atoms TYPE (boxdimensions), INTENT(in) :: ht - TYPE (charge_descriptor), INTENT(IN) :: desc LOGICAL :: tforce LOGICAL :: tscreen REAL(DP) :: fion(:,:) @@ -927,7 +905,7 @@ ! ... Locals - INTEGER :: is, ia, isa, ig, ig1, ig2, ig3, nspin, ispin + INTEGER :: is, ia, isa, ig, ig1, ig2, ig3, nspin, iss REAL(DP) :: fpibg, cost, omega COMPLEX(DP) :: cxc, rhet, rhog, vp, rp, gxc, gyc, gzc COMPLEX(DP) :: teigr, cnvg, cvn, tx, ty, tz, vscreen @@ -944,7 +922,7 @@ IF( tscreen ) THEN ALLOCATE( screen_coul( ngm ) ) - CALL cluster_bc( screen_coul, g, ht, desc ) + CALL cluster_bc( screen_coul, g, ht ) END IF !======================================================================= @@ -1020,9 +998,9 @@ IF(TFORCE) THEN ! ... each processor add its own contribution to the array FION IF( gamma_only ) THEN - cost = 2.D0 * ht%deth * tpiba + cost = 2.D0 * omega * tpiba ELSE - cost = ht%deth * tpiba + cost = omega * tpiba END IF FION = FION + DBLE(ftmp) * cost END IF @@ -1046,11 +1024,11 @@ eh = eh + vscreen * rhog * CONJG(rhog) ehte = ehte + vscreen * DBLE(rhet * CONJG(rhet)) ehti = ehti + vscreen * DBLE( rp * CONJG(rp)) - DO ispin = 1, nspin + DO iss = 1, nspin IF( gamma_only ) THEN - eps = eps + vp * CONJG(RHOEG(1,ispin)) * 0.5d0 + eps = eps + vp * CONJG(RHOEG(1,iss)) * 0.5d0 ELSE - eps = eps + vp * CONJG(RHOEG(1,ispin)) + eps = eps + vp * CONJG(RHOEG(1,iss)) END IF END DO END IF @@ -1079,8 +1057,7 @@ USE cell_module, ONLY: s_to_r, boxdimensions, pbcs USE mp_global, ONLY: nproc, mpime, group USE mp, ONLY: mp_sum - USE parallel_types, ONLY: BLOCK_PARTITION_SHAPE - USE descriptors_module, ONLY: global_index, local_dimension + USE parallel_types, ONLY: BLOCK_PARTITION_DIST USE atoms_type_module, ONLY: atoms_type USE ions_base, ONLY: rcmax, zv @@ -1099,6 +1076,9 @@ REAL(DP) :: erf, erfc EXTERNAL erf, erfc + INTEGER :: ldim_block, gind_block + EXTERNAL ldim_block, gind_block + ! ... LOCALS @@ -1188,10 +1168,8 @@ ESR = 0.0_DP DESR = 0.0_DP - ! NA_LOC = LOCALDIM(npt,NPROC,ME) - NA_LOC = local_dimension( npt, 1, mpime, 0, nproc, BLOCK_PARTITION_SHAPE) - ! IA_S = GLOBALINDEX(1,npt,NPROC,ME) - IA_S = global_index( 1, npt, 1, mpime, 0, nproc, BLOCK_PARTITION_SHAPE ) + NA_LOC = ldim_block( npt, nproc, mpime) + IA_S = gind_block( 1, npt, nproc, mpime ) IA_E = IA_S + NA_LOC - 1 DO ia = ia_s, ia_e @@ -1286,7 +1264,7 @@ ! ---------------------------------------------- ! BEGIN manual - SUBROUTINE self_vofloc(tscreen, ehte, vloc, rhoeg, ht, desc) + SUBROUTINE self_vofloc(tscreen, ehte, vloc, rhoeg, ht) ! adds the hartree part of the self interaction ! @@ -1296,7 +1274,6 @@ USE constants, ONLY: fpi USE control_flags, ONLY: gamma_only USE cell_module, ONLY: boxdimensions - USE charge_types, ONLY: charge_descriptor USE cell_base, ONLY: tpiba2 USE gvecp, ONLY: ngm USE reciprocal_vectors, ONLY: gstart, g @@ -1305,7 +1282,6 @@ ! ... Arguments TYPE (boxdimensions), INTENT(in) :: ht - TYPE (charge_descriptor), INTENT(IN) :: desc LOGICAL :: tscreen COMPLEX(DP) :: vloc(:) COMPLEX(DP) :: rhoeg(:) @@ -1324,7 +1300,7 @@ IF( tscreen ) THEN ALLOCATE( screen_coul( ngm ) ) - CALL cluster_bc( screen_coul, g, ht, desc ) + CALL cluster_bc( screen_coul, g, ht ) END IF !======================================================================= @@ -1375,7 +1351,7 @@ - SUBROUTINE localisation( wfc, atoms_m, ht, desc) + SUBROUTINE localisation( wfc, atoms_m, ht) ! adds the hartree part of the self interaction ! @@ -1385,7 +1361,6 @@ USE constants, ONLY: fpi USE control_flags, ONLY: gamma_only USE cell_module, ONLY: boxdimensions, s_to_r - USE charge_types, ONLY: charge_descriptor USE atoms_type_module, ONLY: atoms_type USE fft, ONLY : pw_invfft, pfwfft, pinvfft USE sic_module, ONLY: ind_localisation, nat_localisation, print_localisation @@ -1395,6 +1370,7 @@ USE cell_base, ONLY: tpiba2 USE reciprocal_vectors, ONLY: gstart, g USE gvecp, ONLY: ngm + use grid_dimensions, only: nr1, nr2, nr3, nr1l, nr2l, nr3l, nnrx IMPLICIT NONE @@ -1403,7 +1379,6 @@ COMPLEX(DP), INTENT(IN) :: wfc(:) TYPE (atoms_type), INTENT(in) :: atoms_m TYPE (boxdimensions), INTENT(in) :: ht - TYPE (charge_descriptor), INTENT(IN) :: desc ! ... Locals @@ -1411,42 +1386,36 @@ REAL(DP) :: ehte INTEGER :: ig, at, ia, is, isa_input, isa_sorted, isa_loc REAL(DP) :: fpibg, omega, aRe, aR2, R(3) - INTEGER :: Xmin, Ymin, Zmin, Xmax, Ymax, Zmax - INTEGER :: nr1_l, nr2_l, nr3_l + INTEGER :: Xmin, Ymin, Zmin, Xmax, Ymax, Zmax, i,j,k, ir REAL(DP) :: work, work2 COMPLEX(DP) :: rhog - REAL(DP), ALLOCATABLE :: density(:,:,:), psi(:,:,:) - COMPLEX(DP), ALLOCATABLE :: k_density(:), cpsi(:,:,:) + REAL(DP), ALLOCATABLE :: density(:), psi(:) + COMPLEX(DP), ALLOCATABLE :: k_density(:), cpsi(:) COMPLEX(DP) :: vscreen COMPLEX(DP), ALLOCATABLE :: screen_coul(:) - INTEGER :: nr1x, nr2x, nr3x ! ... Subroutine body ... IF( .FALSE. ) THEN ALLOCATE( screen_coul( ngm ) ) - CALL cluster_bc( screen_coul, g, ht, desc ) + CALL cluster_bc( screen_coul, g, ht ) END IF - nr1x = dfftp%nr1x - nr2x = dfftp%nr2x - nr3x = dfftp%npl - omega = ht%deth - nr1_l = desc % nxl - nr2_l = desc % nyl - nr3_l = desc % nzl - - ALLOCATE( density( nr1x, nr2x, nr3x ) ) - ALLOCATE( psi( nr1x, nr2x, nr3x ) ) - ALLOCATE( cpsi( nr1x, nr2x, nr3x ) ) + ALLOCATE( density( nnrx ) ) + ALLOCATE( psi( nnrx ) ) ALLOCATE( k_density( ngm ) ) - CALL pw_invfft( cpsi(:,:,:), wfc(:), wfc(:) ) + ALLOCATE( cpsi( nnrx ) ) + cpsi = 0.0d0 + + CALL pw_invfft( cpsi(:), wfc(:), wfc(:) ) + psi = DBLE( cpsi ) + DEALLOCATE( cpsi ) isa_sorted = 0 @@ -1473,34 +1442,44 @@ !WRITE(6,*) 'ATOM ', ind_localisation( isa_input ) !WRITE(6,*) 'POS ', atoms_m%taus( :, isa_sorted ) - work = nr1_l + work = nr1l work2 = sic_rloc * work work = work * R(1) - work2 Xmin = FLOOR(work) work = work + 2*work2 Xmax = FLOOR(work) - IF ( Xmax > nr1_l ) Xmax = nr1_l + IF ( Xmax > nr1l ) Xmax = nr1l IF ( Xmin < 1 ) Xmin = 1 - work = nr2_l + + work = nr2l work2 = sic_rloc * work work = work * R(2) - work2 Ymin = FLOOR(work) work = work + 2*work2 Ymax = FLOOR(work) - IF ( Ymax > nr2_l ) Ymax = nr2_l + IF ( Ymax > nr2l ) Ymax = nr2l IF ( Ymin < 1 ) Ymin = 1 - work = nr3_l + + work = nr3l work2 = sic_rloc * work work = work * R(3) - work2 Zmin = FLOOR(work) work = work + 2*work2 Zmax = FLOOR(work) - IF ( Zmax > nr3_l ) Zmax = nr3_l + IF ( Zmax > nr3l ) Zmax = nr3l IF ( Zmin < 1 ) Zmin = 1 density = 0.D0 - density( Xmin:Xmax, Ymin:Ymax, Zmin:Zmax ) = & - psi( Xmin:Xmax, Ymin:Ymax, Zmin:Zmax ) * psi( Xmin:Xmax, Ymin:Ymax, Zmin:Zmax ) + + DO k = Zmin, Zmax + DO j = Ymin, Ymax + DO i = Xmin, Xmax + ir = i + (j-1)*dfftp%nr1x + (k-1)*dfftp%nr1x*dfftp%nr2x + density( ir ) = psi( ir ) * psi( ir ) + END DO + END DO + END DO + CALL pfwfft( k_density, density ) ! ... G /= 0 elements diff --git a/CPV/print_out.f90 b/CPV/print_out.f90 index badf0b3bf..f1858f1e0 100644 --- a/CPV/print_out.f90 +++ b/CPV/print_out.f90 @@ -594,17 +594,15 @@ !=----------------------------------------------------------------------------=! - SUBROUTINE print_sfac( rhoe, desc, sfac ) + SUBROUTINE print_sfac( rhoe, sfac ) USE mp_global, ONLY: mpime, nproc, group USE mp, ONLY: mp_max, mp_get, mp_put USE fft, ONLY : pfwfft, pinvfft - USE charge_types, ONLY: charge_descriptor USE reciprocal_vectors, ONLY: ig_l2g, gx, g USE gvecp, ONLY: ngm - TYPE (charge_descriptor), INTENT(IN) :: desc - REAL(DP), INTENT(IN) :: rhoe(:,:,:,:) + REAL(DP), INTENT(IN) :: rhoe(:,:) COMPLEX(DP), INTENT(IN) :: sfac(:,:) INTEGER :: nspin, ispin, ip, nsp, ngx_l, ng, is, ig @@ -615,7 +613,7 @@ INTEGER , ALLOCATABLE :: ig_rcv(:) COMPLEX(DP), ALLOCATABLE :: sfac_rcv(:,:) - nspin = SIZE(rhoe,4) + nspin = SIZE(rhoe,2) nsp = SIZE(sfac,2) ngx_l = ngm CALL mp_max(ngx_l, group) @@ -627,7 +625,7 @@ ALLOCATE(sfac_rcv(ngx_l,nsp)) ! ... FFT: rho(r) --> rho(g) DO ispin = 1, nspin - CALL pfwfft(rhoeg(:,ispin),rhoe(:,:,:,ispin)) + CALL pfwfft(rhoeg(:,ispin),rhoe(:,ispin)) END DO IF( ionode ) THEN OPEN(sfacunit, FILE=TRIM(sfac_file), STATUS='UNKNOWN') @@ -673,20 +671,15 @@ !=----------------------------------------------------------------------------=! - SUBROUTINE printacc( nfi, rhoe, desc, atoms, ht, nstep_run, avgs, avgs_run ) + SUBROUTINE printacc( nfi, nstep_run, avgs, avgs_run ) USE cell_module, ONLY: boxdimensions USE atoms_type_module, ONLY: atoms_type - USE charge_types, ONLY: charge_descriptor IMPLICIT NONE INTEGER, INTENT(IN) :: nfi, nstep_run - REAL(DP), intent(in) :: rhoe(:,:,:,:) - TYPE (charge_descriptor), intent(in) :: desc REAL (DP) :: avgs(:), avgs_run(:) - TYPE (atoms_type) :: atoms - TYPE (boxdimensions), intent(in) :: ht IF ( nfi < 1 ) THEN RETURN @@ -744,7 +737,7 @@ use kinds, only: DP use ensemble_dft, only: tens, ismear, z0, c0diag, becdiag, dval, zaux, e0, zx - use electrons_base, only: nx => nbspx, n => nbsp, ispin => fspin, f, nspin + use electrons_base, only: nx => nbspx, n => nbsp, ispin, f, nspin use electrons_base, only: nel, iupdwn, nupdwn, nudx, nelt use energies, only: enl, ekin use ions_base, only: nsp diff --git a/CPV/qmatrixd.f90 b/CPV/qmatrixd.f90 index 1e5923a6d..fe7e2a82c 100644 --- a/CPV/qmatrixd.f90 +++ b/CPV/qmatrixd.f90 @@ -33,7 +33,7 @@ subroutine qmatrixd(c0, bec0,ctable, gqq, qmat, detq) use reciprocal_vectors, only: ng0 => gstart use uspp_param, only: nh, nhm use uspp, only : nhsa=> nkb - use electrons_base, only: nx => nbspx, n => nbsp, fspin + use electrons_base, only: nx => nbspx, n => nbsp, ispin use mp, only: mp_sum @@ -80,7 +80,7 @@ subroutine qmatrixd(c0, bec0,ctable, gqq, qmat, detq) ! first the local part sca=(0.,0.) - if(fspin(ix) == fspin(jx) ) then + if(ispin(ix) == ispin(jx) ) then !#ifdef NEC ! *vdir nodep @@ -133,7 +133,7 @@ subroutine qmatrixd(c0, bec0,ctable, gqq, qmat, detq) ! now the non local vanderbilt part sca =(0.,0.) - if(fspin(ix)==fspin(jx)) then + if(ispin(ix)==ispin(jx)) then do is=1,nvb!loop on vanderbilt species do ia=1,na(is)!loop on atoms do iv=1,nh(is)!loop on projectors diff --git a/CPV/restart.f90 b/CPV/restart.f90 index 60486c498..d92822f1a 100644 --- a/CPV/restart.f90 +++ b/CPV/restart.f90 @@ -230,7 +230,7 @@ SUBROUTINE writefile_fpmd( nfi, trutime, c0, cm, cdesc, occ, & atoms_0, atoms_m, acc, taui, cdmi, & - ht_m, ht_0, rho, desc, vpot) + ht_m, ht_0, rho, vpot) USE cell_module, only: boxdimensions, r_to_s USE brillouin, only: kpoints, kp @@ -240,7 +240,6 @@ USE atoms_type_module, ONLY: atoms_type USE io_global, ONLY: ionode, ionode_id USE io_global, ONLY: stdout - USE charge_types, ONLY: charge_descriptor USE electrons_nose, ONLY: xnhe0, xnhem, vnhe USE electrons_base, ONLY: nbsp, nspin USE cell_nose, ONLY: xnhh0, xnhhm, vnhh @@ -257,17 +256,15 @@ REAL(DP), INTENT(IN) :: occ(:,:,:) TYPE (boxdimensions), INTENT(IN) :: ht_m, ht_0 TYPE (atoms_type), INTENT(IN) :: atoms_0, atoms_m - REAL(DP), INTENT(IN) :: rho(:,:,:,:) - TYPE (charge_descriptor), INTENT(IN) :: desc + REAL(DP), INTENT(IN) :: rho(:,:) TYPE (wave_descriptor) :: cdesc - REAL(DP), INTENT(INOUT) :: vpot(:,:,:,:) + REAL(DP), INTENT(INOUT) :: vpot(:,:) REAL(DP), INTENT(IN) :: taui(:,:) REAL(DP), INTENT(IN) :: acc(:), cdmi(:) REAL(DP), INTENT(IN) :: trutime REAL(DP), ALLOCATABLE :: lambda(:,:) - REAL(DP), ALLOCATABLE :: rhow(:,:) REAL(DP) :: ekincm INTEGER :: i, j, k, iss, ir @@ -278,36 +275,16 @@ ! properties on the writefile subroutine ALLOCATE( lambda(nbsp,nbsp) ) - ALLOCATE( rhow( nr1x * nr2x * SIZE( rho, 3 ), nspin ) ) lambda = 0.0d0 ekincm = 0.0d0 ! ! - IF( SIZE( rho, 1 ) /= nr1x .OR. SIZE( rho, 2 ) /= nr2x ) THEN - WRITE( stdout, * ) nr1x, nr2x - WRITE( stdout, * ) SIZE( rho, 1 ), SIZE( rho, 2 ) - CALL errore( ' writefile_fpmd ', ' rho dimensions do not correspond ', 1 ) - END IF - ! - DO iss = 1, nspin - ir = 0 - DO k = 1, SIZE( rho, 3 ) - DO j = 1, nr2x - DO i = 1, nr1x - ir = ir + 1 - rhow( ir, iss ) = rho( i, j, k, iss ) - END DO - END DO - END DO - END DO - CALL cp_writefile( ndw, scradir, .TRUE., nfi, trutime, acc, kp%nkpt, kp%xk, kp%weight, & ht_0%a, ht_m%a, ht_0%hvel, ht_0%gvel, xnhh0, xnhhm, vnhh, taui, cdmi, & atoms_0%taus, atoms_0%vels, atoms_m%taus, atoms_m%vels, atoms_0%for, vnhp, & xnhp0, xnhpm, nhpcl, nhpdim, occ, occ, lambda, lambda, & - xnhe0, xnhem, vnhe, ekincm, ei, rhow, c04 = c0, cm4 = cm ) + xnhe0, xnhem, vnhe, ekincm, ei, rho, c04 = c0, cm4 = cm ) - DEALLOCATE( rhow ) DEALLOCATE( lambda ) RETURN @@ -319,7 +296,7 @@ SUBROUTINE readfile_fpmd( nfi, trutime, & c0, cm, cdesc, occ, atoms_0, atoms_m, acc, taui, cdmi, & - ht_m, ht_0, rho, desc, vpot ) + ht_m, ht_0, rho, vpot ) use electrons_base, only: nbsp USE cell_module, only: boxdimensions, cell_init, r_to_s, s_to_r @@ -336,7 +313,6 @@ USE gvecw, ONLY: ecutwfc => ecutw USE gvecp, ONLY: ecutrho => ecutp USE fft, ONLY : pfwfft, pinvfft - USE charge_types, ONLY: charge_descriptor USE ions_base, ONLY: nat, nsp, na USE electrons_module, ONLY: nspin USE control_flags, ONLY: twfcollect, force_pairing @@ -354,10 +330,9 @@ REAL(DP), INTENT(INOUT) :: occ(:,:,:) TYPE (boxdimensions), INTENT(INOUT) :: ht_m, ht_0 TYPE (atoms_type), INTENT(INOUT) :: atoms_0, atoms_m - REAL(DP), INTENT(INOUT) :: rho(:,:,:,:) - TYPE (charge_descriptor), INTENT(IN) :: desc + REAL(DP), INTENT(INOUT) :: rho(:,:) TYPE (wave_descriptor) :: cdesc - REAL(DP), INTENT(INOUT) :: vpot(:,:,:,:) + REAL(DP), INTENT(INOUT) :: vpot(:,:) REAL(DP), INTENT(OUT) :: taui(:,:) REAL(DP), INTENT(OUT) :: acc(:), cdmi(:) diff --git a/CPV/restart_sub.f90 b/CPV/restart_sub.f90 index 15be4e732..1a4191bce 100644 --- a/CPV/restart_sub.f90 +++ b/CPV/restart_sub.f90 @@ -72,6 +72,8 @@ MODULE from_restart_module USE cell_nose, ONLY : xnhh0, xnhhm, vnhh, cell_nosezero USE phase_factors_module, ONLY : strucf USE cg_module, ONLY : tcg + USE orthogonalize, ONLY : ortho + USE orthogonalize_base, ONLY : updatc, calphi ! COMPLEX(DP) :: eigr(:,:), ei1(:,:), ei2(:,:), ei3(:,:) COMPLEX(DP) :: eigrb(:,:) @@ -239,7 +241,7 @@ MODULE from_restart_module ! ! ... calphi calculates phi; the electron mass rises with g**2 ! - CALL calphi( c0, ngw, ema0bg, bec, nkb, vkb, phi, nbsp ) + CALL calphi( c0, ngw, bec, nkb, vkb, phi, nbsp, ema0bg ) ! ! ... begin try and error loop ( only one step! ) ! @@ -252,10 +254,11 @@ MODULE from_restart_module ! IF ( tortho ) THEN ! - CALL ortho( eigr, cm, phi, lambda, bigr, iter, & - dt2bye, ortho_eps, ortho_max, delt0, bephi, becp ) + CALL ortho( eigr, cm(:,:,1,1), phi(:,:,1,1), lambda, bigr, iter, & + dt2bye, bephi, becp ) ! - CALL updatc( dt2bye, lambda, phi, bephi, becp, bec, cm ) + CALL updatc( dt2bye, nbsp, lambda, SIZE(lambda,1), phi, SIZE(phi,1), & + bephi, SIZE(bephi,1), becp, bec, cm ) ! ELSE ! @@ -414,7 +417,7 @@ MODULE from_restart_module END SUBROUTINE from_restart_sm ! !-------------------------------------------------------------------------- - SUBROUTINE from_restart_fpmd( nfi, acc, rhoe, desc, cm, c0, cdesc, & + SUBROUTINE from_restart_fpmd( nfi, acc, rhoe, cm, c0, cdesc, & eigr, ei1, ei2, ei3, sfac, fi, ht_m, ht_0, & atoms_m, atoms_0, bec, becdr, vpot, edft ) !-------------------------------------------------------------------------- @@ -454,7 +457,6 @@ MODULE from_restart_module tprnfor, tpre USE parameters, ONLY : nacx USE atoms_type_module, ONLY : atoms_type - USE charge_types, ONLY : charge_descriptor USE ions_base, ONLY : vel_srt, tau_units USE runcp_module, ONLY : runcp_ncpp USE grid_dimensions, ONLY : nr1, nr2, nr3 @@ -476,12 +478,11 @@ MODULE from_restart_module COMPLEX(DP), INTENT(INOUT) :: cm(:,:,:,:), c0(:,:,:,:) REAL(DP) :: fi(:,:,:) TYPE(boxdimensions) :: ht_m, ht_0 - REAL(DP) :: rhoe(:,:,:,:) - TYPE(charge_descriptor) :: desc + REAL(DP) :: rhoe(:,:) TYPE(wave_descriptor) :: cdesc REAL(DP) :: bec(:,:) REAL(DP) :: becdr(:,:,:) - REAL(DP) :: vpot(:,:,:,:) + REAL(DP) :: vpot(:,:) TYPE(dft_energy_type) :: edft ! INTEGER :: ig, ib, i, j, k, ik, nb, is, ia, ierr, isa, iss @@ -638,9 +639,9 @@ MODULE from_restart_module ! edft%enl = nlrh_m( c0, cdesc, ttforce, atoms_0, fi, bec, becdr, eigr ) ! - CALL rhoofr( nfi, c0, cdesc, fi, rhoe, desc, ht_0 ) + CALL rhoofr( nfi, c0, cdesc, fi, rhoe, ht_0 ) ! - CALL vofrhos( .true. , ttforce, tstress, rhoe, desc, & + CALL vofrhos( .true. , ttforce, tstress, rhoe, & atoms_0, vpot, bec, c0, cdesc, fi, eigr, & ei1, ei2, ei3, sfac, timepre, ht_0, edft ) ! diff --git a/CPV/rsmesh.f90 b/CPV/rsmesh.f90 index 6a2df4f01..e33188841 100644 --- a/CPV/rsmesh.f90 +++ b/CPV/rsmesh.f90 @@ -18,9 +18,8 @@ !=----------------------------------------------------------------------------=! USE kinds - USE parallel_types, ONLY: processors_grid, descriptor, BLOCK_PARTITION_SHAPE + USE parallel_types, ONLY: processors_grid, descriptor, BLOCK_PARTITION_DIST USE processors_grid_module, ONLY: grid_init, get_grid_info, calculate_grid_dims - USE descriptors_module, ONLY: desc_init, get_local_dims USE grid_dimensions, ONLY: nr1, nr2, nr3, nr1x, nr2x, nr3x USE grid_dimensions, ONLY: nr1l, nr2l, nr3l, nnrx USE smooth_grid_dimensions, ONLY: nr1s, nr2s, nr3s, nr1sx, nr2sx, nr3sx diff --git a/CPV/runcg.f90 b/CPV/runcg.f90 index fb3a046e8..d61881fa5 100644 --- a/CPV/runcg.f90 +++ b/CPV/runcg.f90 @@ -53,7 +53,7 @@ ! ----------------------------------------------------------------------- ! BEGIN manual - SUBROUTINE runcg_new(tortho, tprint, rhoe, desc, atoms_0, & + SUBROUTINE runcg_new(tortho, tprint, rhoe, atoms_0, & bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cm, cp, cdesc, tcel, ht0, occ, ei, & vpot, doions, edft, maxnstep, cgthr, tconv ) @@ -66,7 +66,6 @@ USE energies, ONLY: dft_energy_type, print_energies USE electrons_module, ONLY: pmss, eigs, nb_l USE cp_electronic_mass, ONLY: emass - USE descriptors_module, ONLY: get_local_dims, owner_of, local_index USE wave_functions, ONLY: cp_kinetic_energy, proj, fixwave USE wave_base, ONLY: dotp, hpsi USE wave_constrains, ONLY: update_lambda @@ -80,7 +79,6 @@ USE potentials, ONLY: kspotential USE time_step, ONLY: delt USE atoms_type_module, ONLY: atoms_type - USE charge_types, ONLY: charge_descriptor USE control_flags, ONLY: force_pairing USE environment, ONLY: start_cclock_val USE reciprocal_space_mesh, ONLY: gkmask_l @@ -93,8 +91,7 @@ TYPE (atoms_type) :: atoms_0 COMPLEX(DP), INTENT(INOUT) :: c0(:,:,:,:), cm(:,:,:,:), cp(:,:,:,:) TYPE (wave_descriptor) :: cdesc - TYPE (charge_descriptor) :: desc - REAL(DP) :: rhoe(:,:,:,:) + REAL(DP) :: rhoe(:,:) COMPLEX(DP) :: eigr(:,:) COMPLEX(DP) :: ei1(:,:) COMPLEX(DP) :: ei2(:,:) @@ -109,7 +106,7 @@ REAL(DP) :: cgthr REAL(DP) :: ei(:,:,:) - REAL(DP) :: vpot(:,:,:,:) + REAL(DP) :: vpot(:,:) ! ... declare other variables LOGICAL :: ttsde, ttprint, ttforce, ttstress, gzero @@ -178,7 +175,7 @@ s1 = cclock() - CALL kspotential( 1, ttprint, ttforce, ttstress, rhoe, desc, & + CALL kspotential( 1, ttprint, ttforce, ttstress, rhoe, & atoms_0, bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cdesc, tcel, ht0, occ, vpot, edft, timepre ) s2 = cclock() @@ -189,7 +186,7 @@ ! ... |d H / dPsi_j > = H |Psi_j> - Sum{i} |Psi_i> CALL dforce_all( ispin, c0(:,:,1,ispin), cdesc, occ(:,1,ispin), cp(:,:,1,ispin), & - vpot(:,:,:,ispin), eigr, bec ) + vpot(:,ispin), eigr, bec ) ! ... Project the gradient IF( gamma_symmetry ) THEN @@ -237,7 +234,7 @@ ! perform line minimization in the direction of "hacca" - CALL CGLINMIN(emin, demin, tbad, edft, cp, c0, cdesc, occ, vpot, rhoe, desc, hacca, & + CALL CGLINMIN(emin, demin, tbad, edft, cp, c0, cdesc, occ, vpot, rhoe, hacca, & atoms_0, ht0, bec, becdr, eigr, ei1, ei2, ei3, sfac) ! CALL print_energies( edft ) @@ -323,7 +320,7 @@ DO ispin = 1, nspin CALL dforce_all( ispin, c0(:,:,1,ispin), cdesc, occ(:,1,ispin), hacca(:,:,1,ispin), & - vpot(:,:,:,ispin), eigr, bec ) + vpot(:,ispin), eigr, bec ) nb_g( ispin ) = cdesc%nbt( ispin ) @@ -363,7 +360,7 @@ ! ! ---------------------------------------------------------------------- ! - SUBROUTINE cglinmin(emin, ediff, tbad, edft, cp, c, cdesc, occ, vpot, rhoe, desc, hacca, & + SUBROUTINE cglinmin(emin, ediff, tbad, edft, cp, c, cdesc, occ, vpot, rhoe, hacca, & atoms, ht, bec, becdr, eigr, ei1, ei2, ei3, sfac) ! ... declare modules @@ -376,7 +373,6 @@ USE cell_module, ONLY: boxdimensions USE potentials, ONLY: kspotential USE atoms_type_module, ONLY: atoms_type - USE charge_types, ONLY: charge_descriptor USE reciprocal_space_mesh, ONLY: gkmask_l USE uspp, ONLY : vkb, nkb @@ -389,8 +385,7 @@ COMPLEX(DP), INTENT(IN) :: c(:,:,:,:) COMPLEX(DP), INTENT(INOUT) :: cp(:,:,:,:) TYPE (wave_descriptor), INTENT(IN) :: cdesc - TYPE (charge_descriptor) :: desc - REAL(DP) :: rhoe(:,:,:,:) + REAL(DP) :: rhoe(:,:) COMPLEX(DP) :: sfac(:,:) COMPLEX(DP) :: eigr(:,:) COMPLEX(DP) :: ei1(:,:) @@ -402,7 +397,7 @@ REAL(DP) :: becdr(:,:,:) TYPE (dft_energy_type) :: edft COMPLEX (DP) :: hacca(:,:,:,:) - REAL (DP), INTENT(in) :: vpot(:,:,:,:) + REAL (DP), INTENT(in) :: vpot(:,:) ! ! ... LOCALS @@ -605,7 +600,7 @@ END DO - CALL kspotential( 1, ttprint, ttforce, ttstress, rhoe, desc, & + CALL kspotential( 1, ttprint, ttforce, ttstress, rhoe, & atoms, bec, becdr, eigr, ei1, ei2, ei3, sfac, cp, cdesc, tcel, ht, occ, vpot, edft, timepre ) cgenergy = edft%etot diff --git a/CPV/runcg_ion.f90 b/CPV/runcg_ion.f90 index befda1bfa..dfe10c625 100644 --- a/CPV/runcg_ion.f90 +++ b/CPV/runcg_ion.f90 @@ -29,7 +29,7 @@ ! ----------------------------------------------------------------------- ! BEGIN manual - SUBROUTINE runcg_ion(nfi, tortho, tprint, rhoe, desc, atomsp, atoms0, atomsm, & + SUBROUTINE runcg_ion(nfi, tortho, tprint, rhoe, atomsp, atoms0, atomsm, & bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cm, cp, cdesc, tcel, ht, occ, ei, & vpot, doions, edft, etol, ftol, maxiter, sdthr, maxnstep ) @@ -52,7 +52,6 @@ USE atoms_type_module, ONLY: atoms_type USE print_out_module USE parameters, ONLY: nacx - USE charge_types, ONLY: charge_descriptor USE runsd_module, ONLY: runsd IMPLICIT NONE @@ -66,8 +65,7 @@ TYPE (atoms_type) :: atomsm COMPLEX(DP), INTENT(INOUT) :: c0(:,:,:,:), cm(:,:,:,:), cp(:,:,:,:) TYPE (wave_descriptor) :: cdesc - TYPE (charge_descriptor) :: desc - REAL(DP) :: rhoe(:,:,:,:) + REAL(DP) :: rhoe(:,:) REAL(DP) :: bec(:,:) REAL(DP) :: becdr(:,:,:) COMPLEX(DP) :: eigr(:,:) @@ -80,7 +78,7 @@ TYPE (dft_energy_type) :: edft REAL(DP) :: ei(:,:,:) - REAL(DP) :: vpot(:,:,:,:) + REAL(DP) :: vpot(:,:) INTEGER, INTENT(IN) :: maxnstep, maxiter REAL(DP), INTENT(IN) :: sdthr, etol, ftol @@ -156,7 +154,7 @@ s1 = cclock() old_clock_value = s1 - CALL runsd(ttortho, ttprint, ttforce, rhoe, desc, atoms0, & + CALL runsd(ttortho, ttprint, ttforce, rhoe, atoms0, & bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cm, cp, cdesc, tcel, ht, occ, ei, & vpot, doions, edft, maxnstep, sdthr ) @@ -184,7 +182,7 @@ IF(ionode) & WRITE( stdout,fmt="(/,8X,'cgion: iter',I5,' line minimization along gradient starting')") iter - CALL CGLINMIN(fret, edft, cp, c0, cm, cdesc, occ, ei, vpot, rhoe, desc, xi, atomsp, atoms0, & + CALL CGLINMIN(fret, edft, cp, c0, cm, cdesc, occ, ei, vpot, rhoe, xi, atomsp, atoms0, & ht, bec, becdr, eigr, ei1, ei2, ei3, sfac, maxnstep, sdthr, displ) IF( tbad ) THEN @@ -199,7 +197,7 @@ IF( ionode ) WRITE( stdout, fmt='(8X,"cgion: bad step")') ! perform steepest descent displ = displ / 2.0d0 - CALL runsd(ttortho, ttprint, ttforce, rhoe, desc, atoms0, & + CALL runsd(ttortho, ttprint, ttforce, rhoe, atoms0, & bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cm, cp, cdesc, tcel, ht, occ, ei, & vpot, doions, edft, maxnstep, sdthr ) @@ -289,7 +287,7 @@ ! ---------------------------------------------------------------------- ! SUBROUTINE cglinmin(emin, edft, cp, c0, cm, cdesc, occ, ei, vpot, & - rhoe, desc, hacca, atomsp, atoms0, ht, bec, becdr, eigr, ei1, ei2, ei3, sfac, & + rhoe, hacca, atomsp, atoms0, ht, bec, becdr, eigr, ei1, ei2, ei3, sfac, & maxnstep, sdthr, displ) ! ... declare modules @@ -302,7 +300,6 @@ USE cell_module, ONLY: boxdimensions, r_to_s USE atoms_type_module, ONLY: atoms_type USE check_stop, ONLY: check_stop_now - USE charge_types, ONLY: charge_descriptor USE runsd_module, ONLY: runsd IMPLICIT NONE @@ -315,8 +312,7 @@ COMPLEX(DP), INTENT(INOUT) :: cp(:,:,:,:) COMPLEX(DP), INTENT(INOUT) :: cm(:,:,:,:) TYPE (wave_descriptor) :: cdesc - TYPE (charge_descriptor) :: desc - REAL(DP) :: rhoe(:,:,:,:) + REAL(DP) :: rhoe(:,:) COMPLEX(DP) :: eigr(:,:) COMPLEX(DP) :: ei1(:,:) COMPLEX(DP) :: ei2(:,:) @@ -326,7 +322,7 @@ REAL(DP) :: occ(:,:,:) TYPE (dft_energy_type) :: edft REAL (DP) :: hacca(:,:) - REAL (DP), INTENT(in) :: vpot(:,:,:,:) + REAL (DP), INTENT(in) :: vpot(:,:) REAL(DP) :: bec(:,:) REAL(DP) :: becdr(:,:,:) @@ -546,7 +542,7 @@ ! ... Calculate Forces (fion) and DFT Total Energy (edft) for the new ionic ! ... positions (atomsp) - CALL runsd(ttortho, ttprint, ttforce, rhoe, desc, atomsp, & + CALL runsd(ttortho, ttprint, ttforce, rhoe, atomsp, & bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cm, cp, cdesc, tcel, ht, occ, ei, & vpot, doions, edft, maxnstep, sdthr ) diff --git a/CPV/runcp.f90 b/CPV/runcp.f90 index 8aba5336b..715fe8590 100644 --- a/CPV/runcp.f90 +++ b/CPV/runcp.f90 @@ -44,7 +44,6 @@ USE mp, ONLY: mp_sum USE electrons_module, ONLY: pmss, eigs, nb_l USE cp_electronic_mass, ONLY: emass - USE descriptors_module, ONLY: get_local_dims, owner_of, local_index USE wave_functions, ONLY : cp_kinetic_energy USE wave_base, ONLY: hpsi USE cell_module, ONLY: boxdimensions @@ -68,7 +67,7 @@ REAL(DP), INTENT(IN) :: fi(:,:,:) REAL(DP), INTENT(IN) :: bec(:,:) TYPE (boxdimensions), INTENT(IN) :: ht - REAL (DP) :: vpot(:,:,:,:) + REAL (DP) :: vpot(:,:) REAL(DP) :: ei(:,:,:) REAL(DP) :: timerd, timeorto REAL(DP) :: ekinc(:) @@ -191,7 +190,7 @@ TYPE (wave_descriptor), INTENT(IN) :: cdesc COMPLEX(DP) :: eigr(:,:) REAL(DP), INTENT(IN) :: fi(:,:,:) - REAL (DP) :: vpot(:,:,:,:) + REAL (DP) :: vpot(:,:) REAL (DP), INTENT(IN) :: bec(:,:) REAL(DP), INTENT(IN) :: fccc LOGICAL, OPTIONAL, INTENT(IN) :: lambda, fromscra, diis, restart @@ -255,7 +254,7 @@ DO i = 1, nb, 2 - CALL dforce( i, is, c0(:,:,1,is), cdesc, fi(:,1,is), c2, c3, vpot(:,:,:,is), eigr, bec ) + CALL dforce( i, is, c0(:,:,1,is), cdesc, fi(:,1,is), c2, c3, vpot(:,is), eigr, bec ) IF( tlam ) THEN CALL update_lambda( i, gam( :, :,is), c0(:,:,1,is), cdesc, c2 ) @@ -286,7 +285,7 @@ nb = nx - CALL dforce( nx, is, c0(:,:,1,is), cdesc, fi(:,1,is), c2, c3, vpot(:,:,:,is), eigr, bec ) + CALL dforce( nx, is, c0(:,:,1,is), cdesc, fi(:,1,is), c2, c3, vpot(:,is), eigr, bec ) IF( tlam ) THEN CALL update_lambda( nb, gam( :, :,is), c0(:,:,1,is), cdesc, c2 ) @@ -337,7 +336,6 @@ USE mp, ONLY: mp_sum USE electrons_module, ONLY: pmss, eigs, nb_l, nupdwn, nspin USE cp_electronic_mass, ONLY: emass - USE descriptors_module, ONLY: get_local_dims, owner_of, local_index USE wave_functions, ONLY : cp_kinetic_energy USE wave_base, ONLY: wave_steepest, wave_verlet USE wave_base, ONLY: hpsi @@ -362,7 +360,7 @@ COMPLEX(DP) :: eigr(:,:) REAL(DP), INTENT(INOUT) :: fi(:,:,:) TYPE (boxdimensions), INTENT(IN) :: ht - REAL (DP) :: vpot(:,:,:,:) + REAL (DP) :: vpot(:,:) REAL(DP) :: ei(:,:,:) REAL(DP), INTENT(IN) :: bec(:,:) REAL(DP) :: timerd, timeorto @@ -457,8 +455,8 @@ DO i = 1, nb, 2 ! - CALL dforce( i, 2, c0(:,:,1,1), cdesc, fi(:,1,1), c2, c3, vpot(:,:,:,1), eigr, bec ) - CALL dforce( i, 2, c0(:,:,1,1), cdesc, fi(:,1,1), c4, c5, vpot(:,:,:,2), eigr, bec ) + CALL dforce( i, 2, c0(:,:,1,1), cdesc, fi(:,1,1), c2, c3, vpot(:,1), eigr, bec ) + CALL dforce( i, 2, c0(:,:,1,1), cdesc, fi(:,1,1), c4, c5, vpot(:,2), eigr, bec ) ! c2 = occup(i , ik)* (c2 + c4) c3 = occup(i+1, ik)* (c3 + c5) @@ -490,8 +488,8 @@ ! nb = n_unp - 1 ! - CALL dforce( nb, 2, c0(:,:,1,1), cdesc, fi(:,1,1), c2, c3, vpot(:,:,:,1), eigr, bec ) - CALL dforce( nb, 2, c0(:,:,1,1), cdesc, fi(:,1,2), c4, c5, vpot(:,:,:,2), eigr, bec ) + CALL dforce( nb, 2, c0(:,:,1,1), cdesc, fi(:,1,1), c2, c3, vpot(:,1), eigr, bec ) + CALL dforce( nb, 2, c0(:,:,1,1), cdesc, fi(:,1,2), c4, c5, vpot(:,2), eigr, bec ) c2 = occup(nb , ik)* (c2 + c4) @@ -509,7 +507,7 @@ END IF ! - CALL dforce( n_unp, 1, c0(:,:,1,1), cdesc, fi(:,1,1), c2, c3, vpot(:,:,:,1), eigr, bec ) + CALL dforce( n_unp, 1, c0(:,:,1,1), cdesc, fi(:,1,1), c2, c3, vpot(:,1), eigr, bec ) intermed = -2.d0 * sum( c2 * conjg( c0(:, n_unp, ik, 1 ) ) ) intermed3 = sum(c0(:,n_unp, ik, 1) * conjg( c0(:, n_unp, ik, 1))) diff --git a/CPV/rundiis.f90 b/CPV/rundiis.f90 index d51351886..fc7754466 100644 --- a/CPV/rundiis.f90 +++ b/CPV/rundiis.f90 @@ -27,7 +27,7 @@ ! ---------------------------------------------- ! BEGIN manual - SUBROUTINE rundiis(tprint, rhoe, desc, atoms, & + SUBROUTINE rundiis(tprint, rhoe, atoms, & bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cm, cgrad, cdesc, tcel, ht0, fi, eig, & vpot, doions, edft ) @@ -94,7 +94,6 @@ USE brillouin, ONLY: kpoints, kp USE wave_types, ONLY: wave_descriptor USE atoms_type_module, ONLY: atoms_type - USE charge_types, ONLY: charge_descriptor USE control_flags, ONLY: force_pairing use grid_dimensions, only: nr1, nr2, nr3 USE reciprocal_vectors, ONLY: mill_l @@ -109,7 +108,7 @@ TYPE (atoms_type) :: atoms COMPLEX(DP), INTENT(INOUT) :: c0(:,:,:,:), cm(:,:,:,:), cgrad(:,:,:,:) TYPE (wave_descriptor) :: cdesc - REAL(DP) :: rhoe(:,:,:,:) + REAL(DP) :: rhoe(:,:) REAL(DP) :: bec(:,:) REAL(DP) :: becdr(:,:,:) COMPLEX(DP) :: sfac(:,:) @@ -117,13 +116,12 @@ COMPLEX(DP) :: ei1(:,:) COMPLEX(DP) :: ei2(:,:) COMPLEX(DP) :: ei3(:,:) - TYPE (charge_descriptor) :: desc TYPE (boxdimensions), INTENT(INOUT) :: ht0 REAL(DP) :: fi(:,:,:) TYPE (dft_energy_type) :: edft REAL(DP) :: eig(:,:,:) - REAL(DP) :: vpot(:,:,:,:) + REAL(DP) :: vpot(:,:) ! ... declare other variables INTEGER ig, ib, j, k, ik, ngw, i, is, nrt, istate, nrl, ndiis, nowv @@ -197,8 +195,8 @@ CALL phfacs( ei1, ei2, ei3, eigr, mill_l, atoms%taus, nr1, nr2, nr3, atoms%nat ) CALL strucf( sfac, ei1, ei2, ei3, mill_l, ngm ) - CALL rhoofr( 1, c0, cdesc, fi, rhoe, desc, ht0) - CALL newrho(rhoe(:,:,:,1), drho, 0) ! memorize density + CALL rhoofr( 1, c0, cdesc, fi, rhoe, ht0) + CALL newrho(rhoe(:,1), drho, 0) ! memorize density CALL phfacs( ei1, ei2, ei3, eigr, mill_l, atoms%taus, nr1, nr2, nr3, atoms%nat ) CALL strucf( sfac, ei1, ei2, ei3, mill_l, ngm ) CALL guessc0( .NOT. kp%gamma_only, bec, c0, cm, cdesc) @@ -236,12 +234,12 @@ ! ... self consistent energy edft%enl = nlrh_m(c0, cdesc, tforce, atoms, fi, bec, becdr, eigr) - CALL rhoofr( 1, c0, cdesc, fi, rhoe, desc, ht0) - CALL vofrhos(.FALSE., tforce, tstress, rhoe, desc, atoms, & + CALL rhoofr( 1, c0, cdesc, fi, rhoe, ht0) + CALL vofrhos(.FALSE., tforce, tstress, rhoe, atoms, & vpot, bec, c0, cdesc, fi, eigr, ei1, ei2, ei3, sfac, timepre, ht0, edft) ! ... density upgrade - CALL newrho(rhoe(:,:,:,1), drho, idiis) + CALL newrho(rhoe(:,1), drho, idiis) IF (ionode) WRITE( stdout,45) idiis, edft%etot, drho dene = abs(edft%etot - etot_m) etot_m = edft%etot @@ -250,7 +248,7 @@ ! ... recalculate potential edft%enl = nlrh_m(c0, cdesc, tforce, atoms, fi, bec, becdr, eigr) - CALL vofrhos(.FALSE., tforce, tstress, rhoe, desc, atoms, & + CALL vofrhos(.FALSE., tforce, tstress, rhoe, atoms, & vpot, bec, c0, cdesc, fi, eigr, ei1, ei2, ei3, sfac, timepre, ht0, edft) IF( idiis /= 1 )THEN @@ -269,7 +267,7 @@ edft%enl = nlrh_m(c0, cdesc, tforce, atoms, fs, bec, becdr, eigr) - CALL dforce_all( 1, c0(:,:,1,1), cdesc, fi(:,1,1), cgrad(:,:,1,1), vpot(:,:,:,1), eigr, bec ) + CALL dforce_all( 1, c0(:,:,1,1), cdesc, fi(:,1,1), cgrad(:,:,1,1), vpot(:,1), eigr, bec ) IF(.NOT.kp%gamma_only) THEN DO ik = 1, kp%nkpt @@ -285,7 +283,7 @@ call entropy_s(fi(1,1,1),temp_elec,cdesc%nbl(1),edft%ent) edft%enl = nlrh_m(c0, cdesc, tforce, atoms, fs, bec, becdr, eigr) - CALL dforce_all( 1, c0(:,:,1,1), cdesc, fi(:,1,1), cgrad(:,:,1,1), vpot(:,:,:,1), eigr, bec ) + CALL dforce_all( 1, c0(:,:,1,1), cdesc, fi(:,1,1), cgrad(:,:,1,1), vpot(:,1), eigr, bec ) DO ik = 1, kp%nkpt DO ib = 1, cdesc%nbl( 1 ) @@ -298,7 +296,7 @@ ! ... DIIS on c0 at FIXED potential edft%enl = nlrh_m(c0, cdesc, tforce, atoms, fs, bec, becdr, eigr) - CALL dforce_all( 1, c0(:,:,1,1), cdesc, fi(:,1,1), cgrad(:,:,1,1), vpot(:,:,:,1), eigr, bec ) + CALL dforce_all( 1, c0(:,:,1,1), cdesc, fi(:,1,1), cgrad(:,:,1,1), vpot(:,1), eigr, bec ) IF( kp%gamma_only ) THEN CALL proj( 1, cgrad(:,:,1,1), cdesc, c0(:,:,1,1), cdesc, lambda) @@ -388,7 +386,7 @@ ! ---------------------------------------------- ! BEGIN manual - SUBROUTINE runsdiis(tprint, rhoe, desc, atoms, & + SUBROUTINE runsdiis(tprint, rhoe, atoms, & bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cm, cgrad, cdesc, tcel, ht0, fi, eig, & vpot, doions, edft ) @@ -451,7 +449,6 @@ USE brillouin, ONLY: kpoints, kp USE wave_types USE atoms_type_module, ONLY: atoms_type - USE charge_types, ONLY: charge_descriptor USE local_pseudo, ONLY: vps USE uspp, ONLY : vkb, nkb @@ -462,8 +459,7 @@ TYPE (atoms_type) :: atoms COMPLEX(DP), INTENT(INOUT) :: c0(:,:,:,:), cm(:,:,:,:), cgrad(:,:,:,:) TYPE (wave_descriptor) :: cdesc - REAL(DP) :: rhoe(:,:,:,:) - TYPE (charge_descriptor) :: desc + REAL(DP) :: rhoe(:,:) COMPLEX(DP) :: eigr(:,:) COMPLEX(DP) :: ei1(:,:) COMPLEX(DP) :: ei2(:,:) @@ -476,7 +472,7 @@ TYPE (dft_energy_type) :: edft REAL(DP) :: eig(:,:,:) - REAL(DP) :: vpot(:,:,:,:) + REAL(DP) :: vpot(:,:) ! ... declare other variables LOGICAL :: tlimit, tsteep @@ -537,7 +533,7 @@ EXIT DIIS_LOOP END IF - CALL kspotential( 1, .FALSE., tforce, tstress, rhoe, desc, & + CALL kspotential( 1, .FALSE., tforce, tstress, rhoe, & atoms, bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cdesc, tcel, ht0, fi, vpot, edft, timepre ) s0 = cclock() @@ -582,7 +578,7 @@ ! ... so on). CALL dforce_all( ispin, c0(:,:,1,ispin), cdesc, fi(:,1,ispin), cgrad(:,:,1,ispin), & - vpot(:,:,:,ispin), eigr, bec ) + vpot(:,ispin), eigr, bec ) IF(.NOT.kp%gamma_only) THEN DO ik = 1, kp%nkpt @@ -695,7 +691,6 @@ USE constants, ONLY: au USE cell_base, ONLY: tpiba2 USE electrons_module, ONLY: eigs, ei, pmss, emass, nb_l, ib_owner, ib_local - USE descriptors_module, ONLY: get_local_dims, owner_of, local_index USE forces, ONLY: dforce_all USE brillouin, ONLY: kpoints, kp USE orthogonalize @@ -714,7 +709,7 @@ COMPLEX(DP), INTENT(inout) :: c(:,:,:,:) COMPLEX(DP), INTENT(inout) :: eforce(:,:,:,:) TYPE (wave_descriptor), INTENT(in) :: cdesc - REAL (DP), INTENT(in) :: vpot(:,:,:,:), fi(:,:,:) + REAL (DP), INTENT(in) :: vpot(:,:), fi(:,:,:) REAL (DP) :: bec(:,:) LOGICAL, INTENT(IN) :: TORTHO COMPLEX(DP) :: eigr(:,:) @@ -749,7 +744,7 @@ ! ... Calculate | dH / dpsi(j) > CALL dforce_all( ispin, c(:,:,1,ispin), cdesc, fi(:,1,ispin), eforce(:,:,1,ispin), & - vpot(:,:,:,ispin), eigr, bec ) + vpot(:,ispin), eigr, bec ) DO ik = 1, kp%nkpt diff --git a/CPV/runsd.f90 b/CPV/runsd.f90 index 9b7d29896..830bca659 100644 --- a/CPV/runsd.f90 +++ b/CPV/runsd.f90 @@ -28,7 +28,7 @@ ! ----------------------------------------------------------------------- ! BEGIN manual - SUBROUTINE runsd(tortho, tprint, tforce, rhoe, desc, atoms_0, & + SUBROUTINE runsd(tortho, tprint, tforce, rhoe, atoms_0, & bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cm, cp, cdesc, tcel, ht0, occ, ei, & vpot, doions, edft, maxnstep, sdthr ) @@ -47,7 +47,6 @@ USE atoms_type_module, ONLY: atoms_type USE runcp_module, ONLY: runcp USE phase_factors_module, ONLY: strucf, phfacs - USE charge_types, ONLY: charge_descriptor USE control_flags, ONLY: force_pairing use grid_dimensions, only: nr1, nr2, nr3 USE reciprocal_vectors, ONLY: mill_l @@ -61,9 +60,8 @@ TYPE (atoms_type), INTENT(INOUT) :: atoms_0 COMPLEX(DP), INTENT(INOUT) :: c0(:,:,:,:), cm(:,:,:,:), cp(:,:,:,:) TYPE (wave_descriptor) :: cdesc - REAL(DP) :: rhoe(:,:,:,:) + REAL(DP) :: rhoe(:,:) COMPLEX(DP) :: sfac(:,:) - TYPE (charge_descriptor) :: desc COMPLEX(DP) :: eigr(:,:) COMPLEX(DP) :: ei1(:,:) COMPLEX(DP) :: ei2(:,:) @@ -75,7 +73,7 @@ TYPE (dft_energy_type) :: edft REAL(DP) :: ei(:,:,:) - REAL(DP) :: vpot(:,:,:,:) + REAL(DP) :: vpot(:,:) INTEGER :: maxnstep ! maximum number of iteration REAL(DP) :: sdthr ! threshold for convergence @@ -127,7 +125,7 @@ s1 = cclock() - CALL kspotential( 1, ttprint, ttforce, ttstress, rhoe, desc, atoms_0, & + CALL kspotential( 1, ttprint, ttforce, ttstress, rhoe, atoms_0, & bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cdesc, tcel, ht0, & occ, vpot, edft, timepre ) @@ -171,7 +169,7 @@ IF( tforce ) THEN atoms_0%for = 0.0d0 - CALL kspotential( 1, ttprint, tforce, ttstress, rhoe, desc, & + CALL kspotential( 1, ttprint, tforce, ttstress, rhoe, & atoms_0, bec, becdr, eigr, ei1, ei2, ei3, sfac, c0, cdesc, tcel, ht0, occ, vpot, edft, timepre ) IF(ionode ) THEN WRITE( stdout,fmt="(12X,'runsd: fion and edft calculated = ',F14.6)") edft%etot diff --git a/CPV/smcp.f90 b/CPV/smcp.f90 index a28f07ad7..13a743270 100644 --- a/CPV/smcp.f90 +++ b/CPV/smcp.f90 @@ -93,6 +93,8 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out ) USE mp_global, ONLY : mp_global_start USE mp, ONLY : mp_sum USE fft_base, ONLY : dfftp + USE orthogonalize, ONLY : ortho + USE orthogonalize_base, ONLY : updatc, calphi ! #if ! defined __NOSMD ! @@ -612,8 +614,8 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out ) ! imposing the orthogonality ! ========================================================== ! - CALL calphi( rep_el(sm_k)%cm, ngw, ema0bg,rep_el(sm_k)%bec, nkb, & - & vkb,rep_el(sm_k)%phi, nbsp ) + CALL calphi( rep_el(sm_k)%cm, ngw, rep_el(sm_k)%bec, nkb, & + & vkb,rep_el(sm_k)%phi, nbsp, ema0bg ) ! ! IF(ionode) WRITE( sm_file,*) ' out from calphi' @@ -621,7 +623,7 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out ) ! IF(tortho) THEN CALL ortho (eigr,rep_el(sm_k)%c0,rep_el(sm_k)%phi,rep_el(sm_k)%lambda, & - & bigr,iter,ccc(sm_k),ortho_eps,ortho_max,delt,bephi,becp) + & bigr,iter,ccc(sm_k),bephi,becp) ELSE CALL gram( vkb, rep_el(sm_k)%bec, nkb, rep_el(sm_k)%c0, ngw, nbsp ) ! @@ -643,8 +645,9 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out ) ENDIF ! IF(tortho) THEN - CALL updatc(ccc(sm_k),rep_el(sm_k)%lambda,rep_el(sm_k)%phi, & - & bephi,becp,rep_el(sm_k)%bec,rep_el(sm_k)%c0) + CALL updatc( ccc(sm_k), nbsp, rep_el(sm_k)%lambda, SIZE( rep_el(sm_k)%lambda, 1 ), & + rep_el(sm_k)%phi, SIZE( rep_el(sm_k)%phi, 1 ), bephi, SIZE(bephi,1), & + becp,rep_el(sm_k)%bec,rep_el(sm_k)%c0) ! IF(ionode) WRITE( sm_file,*) ' out from updatc' ENDIF @@ -959,7 +962,7 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out ) ! calphi calculates phi ! the electron mass rises with g**2 ! - CALL calphi( rep_el(sm_k)%c0, ngw, ema0bg, rep_el(sm_k)%bec, nkb, vkb, rep_el(sm_k)%phi, nbsp ) + CALL calphi( rep_el(sm_k)%c0, ngw, rep_el(sm_k)%bec, nkb, vkb, rep_el(sm_k)%phi, nbsp, ema0bg ) ! ! begin try and error loop (only one step!) ! @@ -1171,7 +1174,7 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out ) IF(tortho) THEN CALL ortho & & (eigr,rep_el(sm_k)%cm,rep_el(sm_k)%phi,rep_el(sm_k)%lambda, & - & bigr,iter,ccc(sm_k),ortho_eps,ortho_max,delt,bephi,becp) + & bigr,iter,ccc(sm_k),bephi,becp) ELSE CALL gram( vkb, rep_el(sm_k)%bec, nkb, rep_el(sm_k)%cm, ngw, nbsp ) IF(iprsta.GT.4) CALL dotcsc(eigr,rep_el(sm_k)%cm) @@ -1183,8 +1186,10 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out ) ! IF(iprsta.GE.3) CALL print_lambda( rep_el(sm_k)%lambda, nbsp, 9, 1.0d0 ) ! - IF(tortho) CALL updatc(ccc(sm_k),rep_el(sm_k)%lambda,rep_el(sm_k)%phi,bephi, & - & becp,rep_el(sm_k)%bec,rep_el(sm_k)%cm) + IF(tortho) & + CALL updatc( ccc(sm_k), nbsp, rep_el(sm_k)%lambda, SIZE(rep_el(sm_k)%lambda,1), & + rep_el(sm_k)%phi, SIZE(rep_el(sm_k)%phi,1), bephi, SIZE(bephi,1), & + becp, rep_el(sm_k)%bec, rep_el(sm_k)%cm ) ! CALL calbec (nvb+1,nsp,eigr,rep_el(sm_k)%cm,rep_el(sm_k)%bec) IF (tpre) CALL caldbec(ngw,nkb,nbsp,1,nsp,eigr,rep_el(sm_k)%cm,dbec,.true.) diff --git a/CPV/stress.f90 b/CPV/stress.f90 index 236d9ad60..ae65ff044 100644 --- a/CPV/stress.f90 +++ b/CPV/stress.f90 @@ -85,7 +85,7 @@ ! ... declare subroutine arguments REAL(DP) :: pail(:,:), desr(:), strvxc - REAL(DP) :: grho(:,:,:,:,:), v2xc(:,:,:,:,:) + REAL(DP) :: grho(:,:,:), v2xc(:,:,:) REAL(DP) :: bec(:,:) COMPLEX(DP) :: rhoeg(:,:), vxc(:,:) COMPLEX(DP), INTENT(IN) :: sfac(:,:) diff --git a/CPV/turbo.f90 b/CPV/turbo.f90 index e12f5e40e..57754301f 100644 --- a/CPV/turbo.f90 +++ b/CPV/turbo.f90 @@ -15,7 +15,7 @@ LOGICAL :: TTURBO INTEGER :: NTURBO - COMPLEX(DP), ALLOCATABLE :: turbo_states(:,:,:,:) + COMPLEX(DP), ALLOCATABLE :: turbo_states(:,:) PUBLIC :: tturbo, nturbo, turbo_states, turbo_init, allocate_turbo PUBLIC :: deallocate_turbo @@ -35,19 +35,19 @@ RETURN END SUBROUTINE turbo_init - SUBROUTINE allocate_turbo( nr1, nr2, nr3 ) + SUBROUTINE allocate_turbo( nnr ) USE io_global, ONLY: ionode USE io_global, ONLY: stdout USE mp, ONLY: mp_sum - INTEGER :: nr1,nr2,nr3 + INTEGER :: nnr INTEGER :: ierr IF( ionode ) THEN WRITE( stdout,fmt='(/,3X,"TURBO: allocating ",I10," bytes ")') & - 16*nr1*nr2*nr3*nturbo + 16*nnr*nturbo END IF IF( .NOT. ALLOCATED( turbo_states ) ) THEN - ALLOCATE( turbo_states( nr1, nr2, nr3, nturbo ), STAT = ierr) + ALLOCATE( turbo_states( nnr, nturbo ), STAT = ierr) CALL mp_sum(ierr) IF( ierr /= 0 ) THEN IF( ionode ) THEN diff --git a/CPV/wave.f90 b/CPV/wave.f90 index 0c7d59d0a..41e29b392 100644 --- a/CPV/wave.f90 +++ b/CPV/wave.f90 @@ -7,27 +7,11 @@ ! #include "f_defs.h" -! ---------------------------------------------- -! BEGIN manual !=----------------------------------------------------------------------------=! MODULE wave_functions !=----------------------------------------------------------------------------=! -! (describe briefly what this module does...) -! ---------------------------------------------- -! routines in this module: -! REAL(DP) FUNCTION dft_kinetic_energy(c,hg,f,nb) -! REAL(DP) FUNCTION cp_kinetic_energy(cp,cm,pmss,emass,delt) -! SUBROUTINE update_wave_functions(cm,c0,cp) -! SUBROUTINE crot_gamma (c0,lambda,eig) -! SUBROUTINE crot_kp (ik,c0,lambda,eig) -! SUBROUTINE proj_gamma(a,b,lambda) -! SUBROUTINE proj_kp(ik,a,b,lambda) -! ---------------------------------------------- -! END manual - - ! ... include modules USE kinds @@ -47,202 +31,13 @@ MODULE PROCEDURE fixwave_s, fixwave_v, fixwave_m END INTERFACE - PUBLIC :: dft_kinetic_energy, cp_kinetic_energy + PUBLIC :: cp_kinetic_energy PUBLIC :: update_wave_functions, wave_rand_init -! end of module-scope declarations -! ---------------------------------------------- - !=----------------------------------------------------------------------------=! CONTAINS !=----------------------------------------------------------------------------=! -! subroutines -! ---------------------------------------------- -! ---------------------------------------------- - - - REAL(DP) FUNCTION dft_kinetic_energy(c0, cdesc, f, xmkin) - -! This function compute the Total Quanto-Mechanical Kinetic Energy of the Kohn-Sham -! wave function -! ---------------------------------------------- - - USE cell_base, ONLY: tpiba2 - USE brillouin, ONLY: kpoints, kp - USE wave_types, ONLY: wave_descriptor - USE electrons_module, ONLY: pmss - USE control_flags, ONLY: force_pairing, gamma_only - USE reciprocal_space_mesh, ONLY: gkcutz_l - USE reciprocal_vectors, ONLY: ggp - - IMPLICIT NONE - - COMPLEX(DP), INTENT(IN) :: c0(:,:,:,:) ! wave functions coefficients - TYPE (wave_descriptor), INTENT(IN) :: cdesc ! descriptor of c0 - REAL(DP), INTENT(IN) :: f(:,:,:) ! occupation numbers - REAL(DP), OPTIONAL, INTENT(INOUT) :: xmkin - - INTEGER :: ib, ik, ispin, ispin_wfc - REAL(DP) :: xkin, fact, xkins, xmkins - -! ... end of declarations -! ---------------------------------------------- - - - xkin = 0.d0 - - DO ispin = 1, cdesc%nspin - - ispin_wfc = ispin - IF( force_pairing ) ispin_wfc = 1 - - DO ik = 1, cdesc%nkl - - fact = kp%weight(ik) - IF( cdesc%gamma ) THEN - fact = fact * 2.d0 - END IF - - IF( cdesc%gamma ) THEN - xkins = dft_kinetic_energy_s( ispin, c0(:,:,ik,ispin_wfc), cdesc, ggp, f(:,ik,ispin) ) - IF( PRESENT( xmkin ) ) THEN - xmkins = dft_weighted_kinene( ispin, c0(:,:,ik,ispin_wfc), cdesc, ggp, f(:,ik,ispin) ) - END IF - ELSE - xkins = dft_kinetic_energy_s( ispin, c0(:,:,ik,ispin_wfc), cdesc, gkcutz_l(:,ik), f(:,ik,ispin) ) - IF( PRESENT( xmkin ) ) THEN - xmkins = dft_weighted_kinene( ispin, c0(:,:,ik,ispin_wfc), cdesc, gkcutz_l(:,ik), f(:,ik,ispin) ) - END IF - ENDIF - - xkin = xkin + fact * xkins - - IF( PRESENT( xmkin ) ) THEN - xmkin = xmkin + fact * xmkins - END IF - - END DO - - END DO - - dft_kinetic_energy = xkin - - RETURN - END FUNCTION dft_kinetic_energy - -!=----------------------------------------------------------------------------=! - - - REAL(DP) FUNCTION dft_weighted_kinene( ispin, c, cdesc, g2, fi) - -! (describe briefly what this routine does...) -! ---------------------------------------------- - - USE wave_types, ONLY: wave_descriptor - USE electrons_module, ONLY: pmss - - COMPLEX(DP), INTENT(IN) :: c(:,:) - INTEGER, INTENT( IN ) :: ispin - TYPE (wave_descriptor), INTENT(IN) :: cdesc - REAL (DP), INTENT(IN) :: fi(:), g2(:) - INTEGER ib, ig - REAL(DP) skm, xmkin -! ... end of declarations - - xmkin = 0.0d0 - - IF( cdesc%nbl( ispin ) > SIZE( c, 2 ) .OR. & - cdesc%nbl( ispin ) > SIZE( fi ) ) & - CALL errore( ' dft_weighted_kinene ', ' wrong sizes ', 1 ) - IF( cdesc%ngwl > SIZE( c, 1 ) .OR. & - cdesc%ngwl > SIZE( g2 ) .OR. & - cdesc%ngwl > SIZE( pmss ) ) & - CALL errore( ' dft_weighted_kinene ', ' wrong sizes ', 2 ) - - IF( cdesc%gamma .AND. cdesc%gzero ) THEN - - DO ib = 1, cdesc%nbl( ispin ) - skm = 0.d0 - DO ig = 2, cdesc%ngwl - skm = skm + g2(ig) * DBLE( CONJG(c(ig,ib)) * c(ig,ib) ) * pmss(ig) - END DO - skm = skm + g2(1) * DBLE( c(1,ib) )**2 * pmss(1) / 2.0d0 - xmkin = xmkin + fi(ib) * skm * 0.5d0 - END DO - - ELSE - - DO ib = 1, cdesc%nbl( ispin ) - skm = 0.d0 - DO ig = 1, cdesc%ngwl - skm = skm + g2(ig) * DBLE( CONJG( c( ig, ib ) ) * c( ig, ib ) ) * pmss(ig) - END DO - xmkin = xmkin + fi(ib) * skm * 0.5d0 - END DO - - END IF - - dft_weighted_kinene = xmkin - - RETURN - END FUNCTION dft_weighted_kinene - -!=----------------------------------------------------------------------------=! - - REAL(DP) FUNCTION dft_kinetic_energy_s( ispin, c, cdesc, g2, fi) - -! (describe briefly what this routine does...) -! ---------------------------------------------- - - USE wave_types, ONLY: wave_descriptor - COMPLEX(DP), INTENT(IN) :: c(:,:) - INTEGER, INTENT( IN ) :: ispin - TYPE (wave_descriptor), INTENT(IN) :: cdesc - REAL (DP), INTENT(IN) :: fi(:), g2(:) - INTEGER ib, ig, igs - REAL(DP) sk1, xkin -! ... end of declarations - - xkin = 0.0d0 - - IF( cdesc%nbl( ispin ) > SIZE( c, 2 ) .OR. & - cdesc%nbl( ispin ) > SIZE( fi ) ) & - CALL errore( ' dft_total_charge ', ' wrong sizes ', 1 ) - IF( cdesc%ngwl > SIZE( c, 1 ) .OR. & - cdesc%ngwl > SIZE( g2 ) ) & - CALL errore( ' dft_total_charge ', ' wrong sizes ', 2 ) - - IF( cdesc%gamma .AND. cdesc%gzero ) THEN - - DO ib = 1, cdesc%nbl( ispin ) - sk1 = 0.d0 - DO ig = 2, cdesc%ngwl - sk1 = sk1 + g2(ig) * DBLE( CONJG( c(ig,ib) ) * c(ig,ib) ) - END DO - sk1 = sk1 + g2(1) * DBLE( c(1,ib) )**2 / 2.0d0 - xkin = xkin + fi(ib) * sk1 * 0.5d0 - END DO - - ELSE - - DO ib = 1, cdesc%nbl( ispin ) - sk1 = 0.d0 - DO ig = 1, cdesc%ngwl - sk1 = sk1 + g2(ig) * DBLE( CONJG( c(ig,ib) ) * c(ig,ib) ) - END DO - xkin = xkin + fi(ib) * sk1 * 0.5d0 - END DO - - END IF - - dft_kinetic_energy_s = xkin - - RETURN - END FUNCTION dft_kinetic_energy_s - - -!=----------------------------------------------------------------------------=! SUBROUTINE fixwave_s ( ispin, c, cdesc, kmask ) diff --git a/CPV/wf.f90 b/CPV/wf.f90 index 0459fff27..311143acb 100644 --- a/CPV/wf.f90 +++ b/CPV/wf.f90 @@ -3569,7 +3569,7 @@ SUBROUTINE dforce_field( bec, deeq, betae, i, c, ca, df, da, v, v1 ) USE cvan, ONLY : ish USE smooth_grid_dimensions, ONLY : nr1s, nr2s, nr3s, & nr1sx, nr2sx, nr3sx, nnrsx - USE electrons_base, ONLY : nbspx, nbsp, nspin, f, fspin + USE electrons_base, ONLY : nbspx, nbsp, nspin, f, ispin USE constants, ONLY : pi, fpi USE ions_base, ONLY : nsp, na, nat USE gvecw, ONLY : ggp @@ -3626,12 +3626,12 @@ SUBROUTINE dforce_field( bec, deeq, betae, i, c, ca, df, da, v, v1 ) ! ENDIF ! - iss1=fspin(i) + iss1=ispin(i) ! ! the following avoids a potential out-of-bounds error ! IF (i.NE.nbsp) THEN - iss2=fspin(i+1) + iss2=ispin(i+1) ELSE iss2=iss1 END IF @@ -3874,7 +3874,7 @@ SUBROUTINE rhoiofr( nfi, c, irb, eigrb, bec, & USE cell_base, ONLY : omega USE smooth_grid_dimensions, ONLY : nr1s, nr2s, nr3s, & nr1sx, nr2sx, nr3sx, nnrsx - USE electrons_base, ONLY : nbspx, nbsp, nspin, f, fspin + USE electrons_base, ONLY : nbspx, nbsp, nspin, f, ispin USE constants, ONLY : pi, fpi USE wannier_base, ONLY : iwf USE dener, ONLY : dekin, denl @@ -3914,7 +3914,7 @@ SUBROUTINE rhoiofr( nfi, c, irb, eigrb, bec, & ! ================================================================== ! calculation of kinetic energy ekin ! ================================================================== - ekin=enkin(c) + ekin=enkin(c,ngw,f,nbsp) IF(tpre) CALL denkin(c,dekin) ! ! ================================================================== @@ -4018,11 +4018,11 @@ SUBROUTINE rhoiofr( nfi, c, irb, eigrb, bec, & #else IF(tbuff) WRITE(21,iostat=ios) psis #endif - ! iss1=fspin(i) + ! iss1=ispin(i) iss1=1 sa1=f(i)/omega ! if (i.ne.nbsp) then - ! iss2=fspin(i+1) + ! iss2=ispin(i+1) ! sa2=f(i+1)/omega ! else iss2=iss1 ! carlo diff --git a/Modules/descriptors.f90 b/Modules/descriptors.f90 index a608de033..c94d220e3 100644 --- a/Modules/descriptors.f90 +++ b/Modules/descriptors.f90 @@ -12,84 +12,16 @@ IMPLICIT NONE SAVE - INTERFACE desc_init - MODULE PROCEDURE desc_init_1d, desc_init_2d, desc_init_3d - END INTERFACE - INTERFACE global_index - MODULE PROCEDURE globalindex_desc, globalindex_shape - END INTERFACE - INTERFACE local_index - MODULE PROCEDURE localindex_desc, localindex_shape - END INTERFACE - INTERFACE local_dimension - MODULE PROCEDURE localdim_desc, localdim_shape - END INTERFACE - INTERFACE owner_of - MODULE PROCEDURE ownerof_desc, ownerof_shape - END INTERFACE - INTERFACE get_local_dims - MODULE PROCEDURE desc_ldims - END INTERFACE - INTERFACE get_global_dims - MODULE PROCEDURE desc_gdims - END INTERFACE - - INTEGER NUMROC - EXTERNAL NUMROC + INTEGER ldim_block, ldim_cyclic, ldim_block_cyclic + INTEGER lind_block, lind_cyclic, lind_block_cyclic + EXTERNAL ldim_block, ldim_cyclic, ldim_block_cyclic + EXTERNAL lind_block, lind_cyclic, lind_block_cyclic CONTAINS -!=----------------------------------------------------------------------------=! -! BEGIN manual -! - SUBROUTINE desc_init_1d(desc, matrix_type, rows, & - row_block, row_src_pe, grid, row_shape) -! -! END manual -!=----------------------------------------------------------------------------=! - - TYPE (descriptor) :: desc - INTEGER, INTENT(IN) :: matrix_type - TYPE (processors_grid), INTENT(IN) :: grid - INTEGER, INTENT(IN) :: rows - INTEGER, INTENT(IN) :: row_block - INTEGER, INTENT(IN) :: row_src_pe - INTEGER, INTENT(IN) :: row_shape - - desc%matrix_type = matrix_type - desc%grid = grid - - CALL desc_init_x(desc%nx, desc%xshape, desc%nxl, & - desc%nxblk, desc%ixl, desc%ipexs, rows, row_shape, & - row_block, row_src_pe, grid%mex, grid%npx) - - desc%ny = 1 - desc%nyl = 1 - desc%nyblk = 1 - desc%ipeys = 0 - desc%yshape = REPLICATED_DATA_SHAPE - - desc%nz = 1 - desc%nzl = 1 - desc%nzblk = 1 - desc%ipezs = 0 - desc%zshape = REPLICATED_DATA_SHAPE - - desc%ldx = 1 - desc%ldy = 1 - RETURN - END SUBROUTINE desc_init_1d - -!=----------------------------------------------------------------------------=! -! BEGIN manual - SUBROUTINE desc_init_blacs(desc, matrix_type, rows, columns, & row_block, column_block, row_src_pe, column_src_pe, grid, local_ld) -! -! -! END manual -!=----------------------------------------------------------------------------=! TYPE (descriptor) :: desc INTEGER, INTENT(IN) :: matrix_type @@ -105,202 +37,74 @@ desc%matrix_type = matrix_type desc%grid = grid - CALL desc_init_x(desc%nx, desc%xshape, desc%nxl, & + CALL desc_init_x(desc%nx, desc%xdist, desc%nxl, & desc%nxblk, desc%ixl, desc%ipexs, rows, & - BLOCK_CYCLIC_SHAPE, row_block, row_src_pe, grid%mex, & + BLOCK_CYCLIC_DIST, row_block, row_src_pe, grid%mex, & grid%npx) - CALL desc_init_x(desc%ny, desc%yshape, & + CALL desc_init_x(desc%ny, desc%ydist, & desc%nyl, desc%nyblk, desc%iyl, & - desc%ipeys, columns, BLOCK_CYCLIC_SHAPE, column_block, & + desc%ipeys, columns, BLOCK_CYCLIC_DIST, column_block, & column_src_pe, grid%mey, grid%npy) desc%nz = 1 desc%nzl = 1 desc%nzblk = 1 desc%ipezs = 0 - desc%zshape = REPLICATED_DATA_SHAPE + desc%zdist = REPLICATED_DATA_DIST IF(PRESENT(local_ld)) THEN desc%ldx = local_ld ELSE - desc%ldx = localdim_shape( rows, row_block, grid%mex, & - row_src_pe, grid%npx, desc%xshape) + desc%ldx = ldim_block_cyclic( rows, row_block, grid%npx, grid%mex ) END IF desc%ldy = 1 RETURN END SUBROUTINE desc_init_blacs -!=----------------------------------------------------------------------------=! -! BEGIN manual - SUBROUTINE desc_init_2d(desc, matrix_type, rows, columns, & - row_block, column_block, row_src_pe, & - column_src_pe, grid, row_shape, column_shape, local_ld) -! -! END manual -!=----------------------------------------------------------------------------=! - - TYPE (descriptor) :: desc - INTEGER, INTENT(IN) :: matrix_type - TYPE (processors_grid), INTENT(IN) :: grid - INTEGER, INTENT(IN) :: rows - INTEGER, INTENT(IN) :: columns - INTEGER, INTENT(IN) :: row_block - INTEGER, INTENT(IN) :: column_block - INTEGER, INTENT(IN) :: row_src_pe - INTEGER, INTENT(IN) :: column_src_pe - INTEGER, INTENT(IN) :: row_shape - INTEGER, INTENT(IN) :: column_shape - INTEGER, INTENT(IN), OPTIONAL :: local_ld - - LOGICAL :: debug = .FALSE. - - desc%matrix_type = matrix_type - desc%grid = grid - - CALL desc_init_x(desc%nx, desc%xshape, desc%nxl, & - desc%nxblk, desc%ixl, desc%ipexs, rows, row_shape, & - row_block, row_src_pe, grid%mex, grid%npx) - CALL desc_init_x(desc%ny, desc%yshape, & - desc%nyl, desc%nyblk, desc%iyl, & - desc%ipeys, columns, column_shape, column_block, & - column_src_pe, grid%mey, grid%npy) - - IF( debug ) THEN - WRITE( stdout,fmt="(' desc%nx = ', I6 )") desc%nx - WRITE( stdout,fmt="(' desc%xshape = ', I6 )") desc%xshape - WRITE( stdout,fmt="(' desc%nxl = ', I6 )") desc%nxl - WRITE( stdout,fmt="(' desc%nxblk = ', I6 )") desc%nxblk - WRITE( stdout,fmt="(' desc%ixl = ', I6 )") desc%ixl - WRITE( stdout,fmt="(' desc%ipexs = ', I6 )") desc%ipexs - - WRITE( stdout,fmt="(' desc%ny = ', I6 )") desc%ny - WRITE( stdout,fmt="(' desc%yshape = ', I6 )") desc%yshape - WRITE( stdout,fmt="(' desc%nyl = ', I6 )") desc%nyl - WRITE( stdout,fmt="(' desc%nyblk = ', I6 )") desc%nyblk - WRITE( stdout,fmt="(' desc%iyl = ', I6 )") desc%iyl - WRITE( stdout,fmt="(' desc%ipeys = ', I6 )") desc%ipeys - END IF - - desc%nz = 1 - desc%nzl = 1 - desc%nzblk = 1 - desc%ipezs = 0 - desc%zshape = REPLICATED_DATA_SHAPE - - IF(PRESENT(local_ld)) THEN - desc%ldx = local_ld - ELSE - desc%ldx = localdim_shape( rows, row_block, grid%mex, & - row_src_pe, grid%npx, desc%xshape) - END IF - desc%ldy = 1 - RETURN - END SUBROUTINE desc_init_2d - - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - SUBROUTINE desc_init_3d(desc, matrix_type, rows, columns, & - planes, row_block, column_block, plane_block, row_src_pe, & - column_src_pe, plane_src_pe, grid, row_shape, column_shape, & - plane_shape, local_ld, local_sub_ld) -! -! END manual -!=----------------------------------------------------------------------------=! - - TYPE (descriptor) :: desc - INTEGER, INTENT(IN) :: matrix_type - TYPE (processors_grid), INTENT(IN) :: grid - INTEGER, INTENT(IN) :: rows - INTEGER, INTENT(IN) :: columns - INTEGER, INTENT(IN) :: planes - INTEGER, INTENT(IN) :: row_block - INTEGER, INTENT(IN) :: column_block - INTEGER, INTENT(IN) :: plane_block - INTEGER, INTENT(IN) :: row_src_pe - INTEGER, INTENT(IN) :: column_src_pe - INTEGER, INTENT(IN) :: plane_src_pe - INTEGER, INTENT(IN) :: row_shape - INTEGER, INTENT(IN) :: column_shape - INTEGER, INTENT(IN) :: plane_shape - INTEGER, INTENT(IN), OPTIONAL :: local_ld - INTEGER, INTENT(IN), OPTIONAL :: local_sub_ld - - desc%matrix_type = matrix_type - desc%grid = grid - - CALL desc_init_x(desc%nx, desc%xshape, desc%nxl, & - desc%nxblk, desc%ixl, desc%ipexs, rows, row_shape, & - row_block, row_src_pe, grid%mex, grid%npx) - CALL desc_init_x(desc%ny, desc%yshape, & - desc%nyl, desc%nyblk, desc%iyl, & - desc%ipeys, columns, column_shape, column_block, & - column_src_pe, grid%mey, grid%npy) - CALL desc_init_x(desc%nz, desc%zshape, & - desc%nzl, desc%nzblk, desc%izl, & - desc%ipezs, planes, plane_shape, plane_block, & - plane_src_pe, grid%mez, grid%npz) - - IF(PRESENT(local_ld)) THEN - desc%ldx = local_ld - ELSE - desc%ldx = localdim_shape( rows, row_block, grid%mex, & - row_src_pe, grid%npx, desc%xshape) - END IF - IF(PRESENT(local_sub_ld)) THEN - desc%ldy = local_sub_ld - ELSE - desc%ldy = localdim_shape( columns, column_block, & - grid%mey, column_src_pe, grid%npy, & - desc%yshape) - END IF - RETURN - END SUBROUTINE desc_init_3d - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - SUBROUTINE desc_init_x(desc_nxs, desc_nx_shape, desc_local_nxs, & - desc_nx_block, desc_ix, desc_nx_src_pe, nxs, nx_shape, nx_block, & + SUBROUTINE desc_init_x(desc_nxs, desc_nx_dist, desc_local_nxs, & + desc_nx_block, desc_ix, desc_nx_src_pe, nxs, nx_dist, nx_block, & nx_src_pe, mype, npes) ! -! END manual -!=----------------------------------------------------------------------------=! IMPLICIT NONE INTEGER, INTENT(OUT) :: desc_nxs - INTEGER, INTENT(OUT) :: desc_nx_shape + INTEGER, INTENT(OUT) :: desc_nx_dist INTEGER, INTENT(OUT) :: desc_local_nxs INTEGER, INTENT(OUT) :: desc_nx_block INTEGER, INTENT(OUT) :: desc_ix INTEGER, INTENT(OUT) :: desc_nx_src_pe INTEGER, INTENT(IN) :: nxs - INTEGER, INTENT(IN) :: nx_shape + INTEGER, INTENT(IN) :: nx_dist INTEGER, INTENT(IN) :: nx_block INTEGER, INTENT(IN) :: nx_src_pe INTEGER, INTENT(IN) :: mype INTEGER, INTENT(IN) :: npes - desc_nxs = nxs - desc_nx_shape = nx_shape - desc_local_nxs = localdim_shape( nxs, nx_block, mype, nx_src_pe, npes, nx_shape) - desc_ix = localindex_shape( 1, nxs, nx_block, mype, npes, nx_shape) + desc_nxs = nxs + desc_nx_dist = nx_dist - SELECT CASE (nx_shape) - CASE ( BLOCK_CYCLIC_SHAPE ) - desc_nx_block = nx_block + SELECT CASE (nx_dist) + CASE ( BLOCK_CYCLIC_DIST ) + desc_local_nxs = ldim_block_cyclic( nxs, nx_block, npes, mype ) + desc_ix = lind_block_cyclic( 1, nxs, nx_block, npes, mype) + desc_nx_block = nx_block desc_nx_src_pe = nx_src_pe - CASE ( BLOCK_PARTITION_SHAPE ) + CASE ( BLOCK_PARTITION_DIST ) + desc_local_nxs = ldim_block( nxs, npes, mype ) + desc_ix = lind_block( 1, nxs, npes, mype) desc_nx_block = desc_local_nxs desc_nx_src_pe = 0 - CASE ( CYCLIC_SHAPE ) + CASE ( CYCLIC_DIST ) + desc_local_nxs = ldim_cyclic( nxs, npes, mype ) + desc_ix = lind_cyclic( 1, nxs, npes, mype) desc_nx_block = 1 desc_nx_src_pe = 0 - CASE ( REPLICATED_DATA_SHAPE ) - desc_nx_block = nxs + CASE ( REPLICATED_DATA_DIST ) + desc_local_nxs = nxs + desc_ix = 1 + desc_nx_block = nxs desc_nx_src_pe = mype END SELECT @@ -329,324 +133,5 @@ RETURN END SUBROUTINE pblas_descriptor -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION globalindex_shape( lind, n, nb, me, isrc, np, pshape ) - -! This function computes the global index of a distributed array entry -! pointed to by the local index lind of the process indicated by me. -! lind local index of the distributed matrix entry. -! N is the size of the global array. -! NB size of the blocks the distributed matrix is split into. -! me The coordinate of the process whose local array row or -! column is to be determined. -! isrc The coordinate of the process that possesses the first -! row/column of the distributed matrix. -! np The total number processes over which the distributed -! matrix is distributed. -! -! END manual -!=----------------------------------------------------------------------------=! - - - INTEGER, INTENT(IN) :: lind, n, nb, me, isrc, np, pshape - INTEGER r, q - - IF( pshape .EQ. BLOCK_PARTITION_SHAPE ) THEN - - q = INT(n/np) - r = MOD(n,np) - IF( me < r ) THEN - GLOBALINDEX_SHAPE = (Q+1)*me + lind - ELSE - GLOBALINDEX_SHAPE = Q*me + R + lind - END IF - - ELSE IF ( pshape .EQ. BLOCK_CYCLIC_SHAPE ) THEN - - GLOBALINDEX_SHAPE = np*NB*((lind-1)/NB) + & - MOD(lind-1,NB) + MOD(np+me-isrc, np)*NB + 1 - - ELSE IF ( pshape .EQ. CYCLIC_SHAPE ) THEN - - GLOBALINDEX_SHAPE = (lind-1) * np + me + 1 - - ELSE - - GLOBALINDEX_SHAPE = lind - - END IF - RETURN - END FUNCTION globalindex_shape - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION globalindex_desc( lind, desc, what ) - -! END manual -!=----------------------------------------------------------------------------=! - - INTEGER, INTENT(IN) :: lind - TYPE (descriptor) :: desc - CHARACTER(LEN=*) :: what - INTEGER N, nb, src_pe, my_pe, np, pshape - IF ( what(1:1) .EQ. 'R' .OR. what(1:1) .EQ. 'r' ) THEN - NB = desc%nxblk; N = desc%nx; - np = desc%grid%npx; src_pe = desc%ipexs; - my_pe = desc%grid%mex; pshape = desc%xshape - ELSE IF ( what(1:1) .EQ. 'C' .OR. what(1:1) .EQ. 'c' ) THEN - NB = desc%nyblk; N = desc%ny; - np = desc%grid%npy; src_pe = desc%ipeys; - my_pe = desc%grid%mey; pshape = desc%yshape - ELSE IF ( what(1:1) .EQ. 'P' .OR. what(1:1) .EQ. 'p' ) THEN - NB = desc%nzblk; N = desc%nz; - np = desc%grid%npz; src_pe = desc%ipezs; - my_pe = desc%grid%mez; pshape = desc%zshape - END IF - globalindex_desc = globalindex_shape(lind, n, nb, my_pe, src_pe, np, pshape ) - RETURN - END FUNCTION globalindex_desc - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION localdim_shape( n, nb, me, isrc, np, pshape) - -! N = Global dimension of the array -! NB = Size of the blocks ( meaningful only for BLOCK_CYCLIC_SHAPE ) -! me = Index of the callig processor -! isrc = Index of the processor owning the first element of the array -! np = Number of processors among which the array is subdivided -! pshape = Shape of the distributed data -! -! This function return the number of array elements owned -! by the callig processor -! -! END manual -!=----------------------------------------------------------------------------=! - - IMPLICIT NONE - INTEGER, INTENT(IN) :: n, nb, me, isrc, np, pshape - - IF( pshape .EQ. BLOCK_PARTITION_SHAPE ) THEN - - LOCALDIM_SHAPE = INT(N/np) - IF( me < MOD(N,np) ) LOCALDIM_SHAPE = LOCALDIM_SHAPE + 1 - - ELSE IF( pshape .EQ. BLOCK_CYCLIC_SHAPE ) THEN - - LOCALDIM_SHAPE = NUMROC( N, NB, me, isrc, np ) - - ELSE IF( pshape .EQ. CYCLIC_SHAPE ) THEN - - LOCALDIM_SHAPE = INT(N/np) - IF( me < MOD(N,np) ) LOCALDIM_SHAPE = LOCALDIM_SHAPE + 1 - - ELSE - - LOCALDIM_SHAPE = n - - END IF - RETURN - END FUNCTION localdim_shape - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION localdim_desc( desc, what ) - -! END manual -!=----------------------------------------------------------------------------=! - - TYPE (descriptor) :: desc - CHARACTER(LEN=*) :: what - INTEGER n, nb, src_pe, my_pe, np, pshape - IF ( what(1:1) .EQ. 'R' .OR. what(1:1) .EQ. 'r' ) THEN - NB = desc%nxblk; N = desc%nx; - np = desc%grid%npx; src_pe = desc%ipexs; - my_pe = desc%grid%mex; pshape = desc%xshape - ELSE IF ( what(1:1) .EQ. 'C' .OR. what(1:1) .EQ. 'c' ) THEN - NB = desc%nyblk; N = desc%ny; - np = desc%grid%npy; src_pe = desc%ipeys; - my_pe = desc%grid%mey; pshape = desc%yshape - ELSE IF ( what(1:1) .EQ. 'P' .OR. what(1:1) .EQ. 'p' ) THEN - NB = desc%nzblk; N = desc%nz; - np = desc%grid%npz; src_pe = desc%ipezs; - my_pe = desc%grid%mez; pshape = desc%zshape - END IF - localdim_desc = localdim_shape( N, NB, my_pe, src_pe, np, pshape) - RETURN - END FUNCTION localdim_desc - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION localindex_shape(ig, n, nb, me, np, pshape) - -! ig global index of the x dimension of array element -! n dimension of the global array -! nb dimension of the block the global array is split into. -! np number of processors onto which the array is distributed -! -! This function return the index of the element in the local block -! -! END manual -!=----------------------------------------------------------------------------=! - - INTEGER ig, n, np, pshape, nb, me, q, r - - IF( pshape .EQ. BLOCK_PARTITION_SHAPE ) THEN - - q = INT(n/np) - r = MOD(n,np) - IF( me < r ) THEN - LOCALINDEX_SHAPE = ig - (q+1) * me - ELSE - LOCALINDEX_SHAPE = ig - (q+1) * r - q * (me - r) - END IF - - ELSE IF ( pshape .EQ. BLOCK_CYCLIC_SHAPE ) THEN - - LOCALINDEX_SHAPE = NB*((IG-1)/(NB*NP))+MOD(IG-1,NB)+1 - - ELSE IF ( pshape .EQ. CYCLIC_SHAPE ) THEN - - LOCALINDEX_SHAPE = (ig-1)/np + 1 - - ELSE - LOCALINDEX_SHAPE = ig - END IF - RETURN - END FUNCTION localindex_shape - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION localindex_desc(ig, desc, what ) - -! END manual -!=----------------------------------------------------------------------------=! - - TYPE (descriptor) :: desc - CHARACTER(LEN=*) :: what - INTEGER ig, n, nb, np, pshape, me - IF ( what(1:1) .EQ. 'R' .OR. what(1:1) .EQ. 'r' ) THEN - NB = desc%nxblk; N = desc%nx; - np = desc%grid%npx; pshape = desc%xshape - me = desc%grid%mex - ELSE IF ( what(1:1) .EQ. 'C' .OR. what(1:1) .EQ. 'c' ) THEN - NB = desc%nyblk; N = desc%ny; - np = desc%grid%npy; pshape = desc%yshape - me = desc%grid%mey - ELSE IF ( what(1:1) .EQ. 'P' .OR. what(1:1) .EQ. 'p' ) THEN - NB = desc%nzblk; N = desc%nz; - np = desc%grid%npz; pshape = desc%zshape - me = desc%grid%mez - END IF - localindex_desc = localindex_shape(ig,n,nb,me,np,pshape) - RETURN - END FUNCTION localindex_desc - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION ownerof_shape(ig,n,nb,src_pe,np,pshape) -! -! ig global index of the x dimension of array element -! n dimension of the global array -! nb dimension of the block -! src_pe index of the processor owning the first element of the array -! at the moment meaningfull only for pshape = BLOCK_CYCLIC_SHAPE -! np number of processors -! -! This function return the index of the processor owning the array element -! whose global index is "ig" -! -! END manual -!=----------------------------------------------------------------------------=! - - IMPLICIT NONE - INTEGER ig, n, nb, np, pshape, src_pe, r, q - IF( pshape .EQ. BLOCK_PARTITION_SHAPE ) THEN - q = INT(n/np); r = MOD(n,np) - IF ( ig <= ((q+1)*r) ) THEN - ownerof_shape = INT((ig-1)/(q+1)) - ELSE - ownerof_shape = INT((ig-1-r*(q+1))/q)+r - END IF - ELSE IF( pshape .EQ. BLOCK_CYCLIC_SHAPE ) THEN - ownerof_shape = MOD( src_pe + (ig - 1) / NB, NP ) - ELSE IF( pshape .EQ. CYCLIC_SHAPE ) THEN - ownerof_shape = MOD( ig-1, np ) - END IF - RETURN - END FUNCTION ownerof_shape - - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - INTEGER FUNCTION ownerof_desc(ig, desc, what ) - -! END manual -!=----------------------------------------------------------------------------=! - - TYPE (descriptor) :: desc - CHARACTER(LEN=*) :: what - INTEGER ig, n, nb, src_pe, np, pshape - IF ( what(1:1) .EQ. 'R' .OR. what(1:1) .EQ. 'r' ) THEN - NB = desc%nxblk; N = desc%nx; - np = desc%grid%npx; pshape = desc%xshape - src_pe = desc%ipexs - ELSE IF ( what(1:1) .EQ. 'C' .OR. what(1:1) .EQ. 'c' ) THEN - NB = desc%nyblk; N = desc%ny; - np = desc%grid%npy; pshape = desc%yshape - src_pe = desc%ipeys - ELSE IF ( what(1:1) .EQ. 'P' .OR. what(1:1) .EQ. 'p' ) THEN - NB = desc%nzblk; N = desc%nz; - np = desc%grid%npz; pshape = desc%zshape - src_pe = desc%ipezs - END IF - ownerof_desc = ownerof_shape(ig, n, nb, src_pe, np, pshape) - RETURN - END FUNCTION ownerof_desc - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - SUBROUTINE desc_gdims(d, nx, ny, nz ) - -! END manual -!=----------------------------------------------------------------------------=! - - TYPE (descriptor), INTENT(IN) :: d - INTEGER, INTENT(OUT) :: nx, ny, nz - nx = d%nx - ny = d%ny - nz = d%nz - RETURN - END SUBROUTINE desc_gdims - -!=----------------------------------------------------------------------------=! -! BEGIN manual - - SUBROUTINE desc_ldims(d, nxl, nyl, nzl ) - -! END manual -!=----------------------------------------------------------------------------=! - - TYPE (descriptor), INTENT(IN) :: d - INTEGER, INTENT(OUT) :: nxl - INTEGER, OPTIONAL, INTENT(OUT) :: nyl, nzl - nxl = d%nxl - IF( PRESENT( nyl ) ) nyl = d%nyl - IF( PRESENT( nzl ) ) nzl = d%nzl - - RETURN - END SUBROUTINE desc_ldims - - END MODULE descriptors_module diff --git a/Modules/electrons_base.f90 b/Modules/electrons_base.f90 index 731f84bb8..80b2ab517 100644 --- a/Modules/electrons_base.f90 +++ b/Modules/electrons_base.f90 @@ -31,7 +31,7 @@ REAL(DP), ALLOCATABLE :: f(:) ! occupation numbers ( at gamma ) REAL(DP) :: qbac = 0.0d0 ! background neutralizing charge - INTEGER, ALLOCATABLE :: fspin(:) ! spin of each state + INTEGER, ALLOCATABLE :: ispin(:) ! spin of each state ! !------------------------------------------------------------------------------! CONTAINS @@ -113,9 +113,9 @@ END IF ALLOCATE( f ( nbspx ) ) - ALLOCATE( fspin ( nbspx ) ) + ALLOCATE( ispin ( nbspx ) ) f = 0.0d0 - fspin = 0 + ispin = 0 iupdwn ( 1 ) = 1 nel = 0 @@ -265,7 +265,7 @@ do iss = 1, nspin do in = iupdwn(iss), iupdwn(iss) - 1 + nupdwn(iss) - fspin(in) = iss + ispin(in) = iss end do end do @@ -402,7 +402,7 @@ SUBROUTINE deallocate_elct() IF( ALLOCATED( f ) ) DEALLOCATE( f ) - IF( ALLOCATED( fspin ) ) DEALLOCATE( fspin ) + IF( ALLOCATED( ispin ) ) DEALLOCATE( ispin ) telectrons_base_initval = .FALSE. RETURN END SUBROUTINE deallocate_elct diff --git a/Modules/parallel_types.f90 b/Modules/parallel_types.f90 index 3c3f9e412..1fea0de2c 100644 --- a/Modules/parallel_types.f90 +++ b/Modules/parallel_types.f90 @@ -27,12 +27,13 @@ ! 0 <= mez < npz-1 END TYPE -! ... Valid values for data shape - INTEGER, PARAMETER :: BLOCK_CYCLIC_SHAPE = 1 - INTEGER, PARAMETER :: BLOCK_PARTITION_SHAPE = 2 - INTEGER, PARAMETER :: FREE_PATTERN_SHAPE = 3 - INTEGER, PARAMETER :: REPLICATED_DATA_SHAPE = 4 - INTEGER, PARAMETER :: CYCLIC_SHAPE = 5 + ! ... Valid values for data distribution + ! + INTEGER, PARAMETER :: BLOCK_CYCLIC_DIST = 1 + INTEGER, PARAMETER :: BLOCK_PARTITION_DIST = 2 + INTEGER, PARAMETER :: FREE_PATTERN_DIST = 3 + INTEGER, PARAMETER :: REPLICATED_DATA_DIST = 4 + INTEGER, PARAMETER :: CYCLIC_DIST = 5 ! ---------------------------------------------- ! BEGIN manual @@ -40,18 +41,18 @@ ! Given the Array |a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11| ! and three processors P0, P1, P2 ! -! in the BLOCK_PARTITION_SHAPE scheme, the Array is partitioned +! in the BLOCK_PARTITION_DIST scheme, the Array is partitioned ! as follow ! P0 P1 P2 ! |a1 a2 a3 a4| |a5 a6 a7 a8| |a9 a10 a11| ! -! in the BLOCK_CYCLIC_SHAPE scheme the Array is first partitioned +! in the BLOCK_CYCLIC_DIST scheme the Array is first partitioned ! into blocks (i.e. of size 2) |a1 a2|a3 a4|a5 a6|a7 a8|a9 a10|a11| ! Then the block are distributed cyclically among P0, P1 and P2 ! P0 P1 P2 ! |a1 a2|a7 a8| |a3 a4|a9 a10| |a5 a6|a11| ! -! in the CYCLIC_SHAPE scheme the Array elements are distributed round robin +! in the CYCLIC_DIST scheme the Array elements are distributed round robin ! among P0, P1 and P2 ! P0 P1 P2 ! |a1 a4 a7 a10| |a2 a5 a8 a11| |a3 a6 a9| @@ -67,7 +68,7 @@ INTEGER :: nx ! rows, number of rows in the global array INTEGER :: ny ! columns, number of columns in the global array INTEGER :: nz ! planes, number of planes in the global array - INTEGER :: nxblk ! row_block, if shape = BLOCK_CICLYC_SHAPE, + INTEGER :: nxblk ! row_block, if DIST = BLOCK_CICLYC_DIST, ! this value represent the blocking factor ! used to distribute the rows of the array, ! otherwise this is the size of local block of rows @@ -90,9 +91,9 @@ ! of the array INTEGER :: ldz ! - INTEGER :: xshape ! row_shape - INTEGER :: yshape ! column_shape - INTEGER :: zshape ! plane_shape + INTEGER :: xdist ! row_dist + INTEGER :: ydist ! column_dist + INTEGER :: zdist ! plane_dist END TYPE @@ -150,8 +151,8 @@ complex_parallel_vector, complex_parallel_matrix, & complex_parallel_tensor, parallel_allocate, parallel_deallocate - PUBLIC :: BLOCK_CYCLIC_SHAPE, BLOCK_PARTITION_SHAPE, & - FREE_PATTERN_SHAPE, REPLICATED_DATA_SHAPE, CYCLIC_SHAPE + PUBLIC :: BLOCK_CYCLIC_DIST, BLOCK_PARTITION_DIST, & + FREE_PATTERN_DIST, REPLICATED_DATA_DIST, CYCLIC_DIST INTERFACE parallel_allocate MODULE PROCEDURE allocate_real_vector, allocate_real_matrix, & diff --git a/Modules/ptoolkit.f90 b/Modules/ptoolkit.f90 index 549f08b32..276f210bd 100644 --- a/Modules/ptoolkit.f90 +++ b/Modules/ptoolkit.f90 @@ -8,30 +8,11 @@ #include "f_defs.h" -! BEGIN manual - !==----------------------------------------------==! MODULE parallel_toolkit !==----------------------------------------------==! -! (describe briefly what this module does...) -! ---------------------------------------------- -! routines in this module: -! MATMULP1(TRANSA, TRANSB, A, B, C, N) -! MATMULP(TRANSA, TRANSB, A, B, C, N) -! CMATMULP(TRANSA,TRANSB,A,B,C,N) -! PTREDV(A,LDA,D,E,V,LDV,NRL,N,NPROC,ME) -! ptqliv(d,e,n,z,ldz,nrl) -! peigsrtv(d,v,ldv,n,nrl) -! diagonalize(IOPT,A,D,EV,N,NPROC,MPIME) -! pdspev_drv( JOBZ, ap, lda, w, z, ldz, nrl, n, nproc, mpime) -! dspev_drv( JOBZ, UPLO, N, AP, W, Z, LDZ ) -! cdiagonalize(iflg,a,d,ev,n,nproc,mpime) -! PZHPTRD( N, NRL, AP, LDA, D, E, TAU, NPROC, ME) -! PZUPGTR( N, NRL, AP, LDA, TAU, Q, LDQ, NPROC, ME) -! END manual - USE io_global, ONLY : stdout USE parallel_include @@ -40,487 +21,17 @@ SAVE PRIVATE - PUBLIC :: matmulp, cmatmulp, pdspev_drv, dspev_drv, & + PUBLIC :: pdspev_drv, dspev_drv, & diagonalize, pzhpev_drv, zhpev_drv, cdiagonalize + PUBLIC :: rep_matmul_drv !==----------------------------------------------==! CONTAINS !==----------------------------------------------==! - SUBROUTINE matmulp1(transa, transb, a, b, c, n) - - ! - ! Parallel driver for matrix multiplication of square matrixes - ! Compute: - ! C = OP( A ) * OP( B ) - ! - ! TRANSA = 'N', OP( A ) = A - ! TRANSA = 'T', OP( A ) = A' - ! TRANSB = 'N', OP( B ) = B - ! TRANSB = 'T', OP( B ) = B' - ! - ! N is the dimension of the matrixes - ! - ! NOTE: All matrixes should be replicated on all processors - ! - ! Writte by Carlo Cavazzoni - ! - - USE kinds - - IMPLICIT NONE - -#if defined __PARA -# if defined __SHMEM - include 'mpp/shmem.fh' -# endif -#endif - - INTEGER :: N - REAL(DP) :: A(N,*), C(N,*), B(N,*) - - CHARACTER*1, INTENT(IN) :: TRANSA, TRANSB - -#if defined __MPI - - INTEGER, PARAMETER :: matmul_size = 2**20 ! 1Mb 2^20 - - INTEGER :: ISTATUS( MPI_STATUS_SIZE ) - - INTEGER :: ME, I, II, J, JJ, IP, SOUR, DEST, INFO, IERR, ioff, ldx - INTEGER :: NB, IB_S, NB_SOUR, IB_SOUR, IBUF - INTEGER :: nproc, mpime, q, r - - REAL(DP) :: auxa( MATMUL_SIZE ) - REAL(DP) :: auxb( MATMUL_SIZE ) - - SAVE :: auxa, auxb - - ! - ! ... BODY - ! - - CALL MPI_COMM_SIZE( MPI_COMM_WORLD, NPROC, IERR ) - CALL MPI_COMM_RANK( MPI_COMM_WORLD, MPIME, IERR ) - - ME = MPIME + 1 - Q = INT(N/NPROC) - R = MOD(N,NPROC) - - ! ... Find out the number of elements in the local block - - NB = Q - IF(ME .LE. R ) NB = NB+1 - - ! ... Find out the global index of the local first row - - IF(ME .LE. R) THEN - ib_s = (Q+1)*(ME-1) + 1 - ELSE - ib_s = Q*(ME-1) + R + 1 - END IF - - if ( n*(nb+1) > matmul_size ) then - call errore('pmatmul','n*(nb+1)>matmul_size',n*(nb+1)) - end if - - ldx = n / nproc + 1 - - ! ... Distribute blocks of A across processors - - IF( TRANSA == 'N' .OR. TRANSA == 'n' ) THEN - ibuf=0 - ioff=IB_S-1 - DO J = 1,N - DO I = 1,NB - auxa(ibuf+I) = A(I+ioff,J) - END DO - ibuf = ibuf + ldx - END DO - ELSE - ibuf = 0 - ioff=IB_S-1 - DO J = 1,N - DO I = 1,NB - auxa(ibuf+I) = A(J,I+ioff) - END DO - ibuf = ibuf + ldx - END DO - END IF - - ! ... Use scalar lapack driver - - CALL DGEMM( 'N', transb, NB, N, N, 1.0d0, auxa(1), ldx, B(1,1), N, 0.0d0, auxb(1), ldx ) - - ibuf = 0 - ioff = IB_S - 1 - DO J = 1, N - DO I = 1, NB - C( I + ioff, J ) = auxb( ibuf + I ) - END DO - ibuf = ibuf + ldx - END DO - - ! ... Here processors exchange blocks - - DO IP = 1, NPROC - 1 - SOUR = MOD(ME-IP-1+NPROC,NPROC)+1 - DEST = MOD(ME+IP-1 ,NPROC)+1 - - ! ... Find out the number of elements in the block of processor SOUR - - NB_SOUR = q - IF(SOUR .LE. r ) NB_SOUR = NB_SOUR+1 - - ! ... Find out the global index of the first row owned by SOUR - - IF(SOUR .LE. R) THEN - ib_sour = (Q+1)*(SOUR-1) + 1 - ELSE - ib_sour = Q*(SOUR-1) + R + 1 - END IF - -# if defined __SHMEM - - call shmem_barrier_all - call shmem_get64(auxa, auxb, ldx*n, sour-1) - -# elif defined __MPI - - CALL MPI_SENDRECV(auxb,ldx*n,mpi_double_precision,DEST-1,ip, & - auxa, ldx*n, mpi_double_precision,SOUR-1, ip, & - MPI_COMM_WORLD,ISTATUS,IERR) - -# endif - - IBUF = 0 - ioff = IB_SOUR - 1 - DO J = 1, N - DO I = 1, NB_SOUR - C( I + ioff, J ) = AUXA( IBUF + I ) - END DO - IBUF = IBUF + ldx - END DO - - END DO - -# if defined __SHMEM - call shmem_barrier_all -# endif - -#else - - CALL DGEMM(TRANSA,TRANSB,N,N,N,1.0d0,A(1,1),N,B(1,1),N,0.0d0,C(1,1),N) - -#endif - - RETURN - END SUBROUTINE matmulp1 - - -!=----------------------------------------------------------------------------=! - - - SUBROUTINE matmulp( transa, transb, a, b, c, n ) - - ! - ! Parallel driver for matrix multiplication of square matrixes - ! Compute: - ! C = OP( A ) * OP( B ) - ! - ! TRANSA = 'N', OP( A ) = A - ! TRANSA = 'T', OP( A ) = A' - ! TRANSB = 'N', OP( B ) = B - ! TRANSB = 'T', OP( B ) = B' - ! - ! N is the dimension of the matrixes - ! - ! NOTE: All matrixes should be replicated on all processors - ! - ! Writte by Carlo Cavazzoni - ! - - USE kinds - - IMPLICIT NONE - - INTEGER :: N - REAL(DP) :: A(N,*), C(N,*), B(N,*) - - CHARACTER*1, INTENT(IN) :: TRANSA, TRANSB - -#if defined __MPI - - INTEGER, PARAMETER :: matmul_size = 2**20 ! 1Mb 2^20 - - INTEGER :: ME, I, II, J, JJ, IP, SOUR, DEST, INFO, IERR, ioff, ldx - INTEGER :: NB, IB_S, NB_SOUR, IB_SOUR, IBUF - INTEGER :: nproc, mpime, q, r - - REAL(DP) :: auxa( MATMUL_SIZE ) - REAL(DP) :: auxb( MATMUL_SIZE ) - - SAVE :: auxa, auxb - - ! - ! ... BODY - ! - - CALL MPI_COMM_SIZE(MPI_COMM_WORLD, NPROC, IERR) - CALL MPI_COMM_RANK(MPI_COMM_WORLD, MPIME, IERR) - - ME = MPIME + 1 - Q = INT(N/NPROC) - R = MOD(N,NPROC) - - ! ... Find out the number of elements in the local block - - NB = Q - IF(ME .LE. R ) NB = NB+1 - - ! ... Find out the global index of the local first row - - IF(ME .LE. R) THEN - ib_s = (Q+1)*(ME-1) + 1 - ELSE - ib_s = Q*(ME-1) + R + 1 - END IF - - - IF ( n*(nb+1) > matmul_size ) THEN - call errore( ' pmatmul ', ' n*(nb+1) > matmul_size ', n*(nb+1) ) - END IF - - ldx = n/nproc + 1 - - IF(TRANSA == 'N' .OR. TRANSA == 'n' ) THEN - ibuf = 0 - ioff = IB_S - 1 - DO J = 1, N - DO I = 1, NB - auxa( ibuf + I ) = A( I + ioff, J ) - END DO - ibuf = ibuf + ldx - END DO - ELSE - ioff = IB_S - 1 - call mytranspose( A( 1, ioff + 1 ), n, auxa(1), ldx, n, nb) - END IF - - CALL DGEMM('N',transb,NB,N,N,1.0d0,auxa(1),ldx,B(1,1),N,0.0d0,auxb(1),ldx) - - ! ... Here processors exchange blocks - - DO IP = 0, NPROC-1 - - ! ... Find out the number of elements in the block of processor SOUR - - NB_SOUR = q - IF( (IP+1) .LE. r ) NB_SOUR = NB_SOUR+1 - - ! ... Find out the global index of the first row owned by SOUR - - IF( (IP+1) .LE. r ) THEN - ib_sour = (Q+1)*IP + 1 - ELSE - ib_sour = Q*IP + R + 1 - END IF - - IF( mpime == ip ) auxa(1:n*ldx) = auxb(1:n*ldx) - CALL MPI_BCAST( auxa(1), ldx*n, mpi_double_precision, ip, MPI_COMM_WORLD, IERR) - - IBUF = 0 - ioff = IB_SOUR - 1 - DO J = 1, N - DO I = 1, NB_SOUR - C( I + ioff, J ) = AUXA( IBUF + I ) - END DO - IBUF = IBUF + ldx - END DO - - END DO - -#else - - CALL DGEMM(TRANSA, TRANSB, N, N, N, 1.0d0, A(1,1), N, B(1,1), N, 0.0d0, C(1,1), N) - -#endif - - RETURN - END SUBROUTINE matmulp - -!==----------------------------------------------==! - - - SUBROUTINE cmatmulp(transa,transb,a,b,c,n) - - ! - ! Parallel driver for matrix multiplication of square matrixes - ! Compute: - ! C = OP( A ) * OP( B ) - ! - ! TRANSA = 'N', OP( A ) = A - ! TRANSA = 'T', OP( A ) = A' - ! TRANSB = 'N', OP( B ) = B - ! TRANSB = 'T', OP( B ) = B' - ! - ! N is the dimension of the matrixes - ! - ! NOTE: All matrixes should be replicated on all processors - ! - ! Writte by Carlo Cavazzoni - - USE kinds - - IMPLICIT NONE - -#if defined __PARA -# if defined __SHMEM - include 'mpp/shmem.fh' -# endif -#endif - - INTEGER :: N - COMPLEX(DP) :: A(N,*), C(N,*), B(N,*) - - CHARACTER*1 :: TRANSA, TRANSB - COMPLEX(DP) :: zero = (0.0d0,0.0d0) - COMPLEX(DP) :: one = (1.0d0,0.0d0) - -#if defined __MPI - - INTEGER, PARAMETER :: matmul_size = 2**20 ! 1Mb 2^20 - - INTEGER :: ISTATUS(MPI_STATUS_SIZE) - - INTEGER :: ME, I, II, J, JJ, IP, SOUR, DEST, INFO, IERR, ioff - INTEGER :: NB_SOUR,IB_SOUR,IBUF,NB,IB_S,LDX - INTEGER :: nproc, mpime, r, q - COMPLEX(DP) :: auxa(MATMUL_SIZE) - COMPLEX(DP) :: auxb(MATMUL_SIZE) - - save :: auxa, auxb - - ! - ! ... BODY - ! - - CALL MPI_COMM_SIZE(MPI_COMM_WORLD, NPROC, IERR) - CALL MPI_COMM_RANK(MPI_COMM_WORLD, MPIME, IERR) - - ME = MPIME + 1 - LDX = N/NPROC + 1 - - Q = INT(N/NPROC) - R = MOD(N,NPROC) - - ! ... Find out the number of elements in the local block - - NB = Q - IF(ME .LE. R ) NB = NB+1 - - ! ... Find out the global index of the local first row - - IF(ME .LE. R) THEN - ib_s = (Q+1)*(ME-1) + 1 - ELSE - ib_s = Q*(ME-1) + R + 1 - END IF - - if( n*(ldx+1) > matmul_size ) then - call errore('pmatmul','n*(ldx+1)>matmul_size',n*(ldx+1)) - end if - - ! ... Distribute blocks of A across processors - - IF( transa == 'N' .OR. transa == 'n' ) THEN - ibuf = 0 - ioff = ib_s - 1 - DO j = 1, n - DO i = 1, nb - auxa( ibuf + i ) = a( i + ioff, j ) - END DO - ibuf = ibuf + ldx - END DO - ELSE - ibuf = 0 - ioff = ib_s - 1 - DO j = 1, n - DO i = 1, nb - auxa( ibuf + i ) = CONJG( a( j, i + ioff ) ) - END DO - ibuf = ibuf + ldx - END DO - END IF - - ! ... Now use the scalar driver with the local block of matrix A - - CALL ZGEMM('N',TRANSB,NB,N,N,one,auxa(1),ldx,B(1,1),N,zero,auxb(1),ldx) - - ibuf = 0 - ioff = IB_S - 1 - DO J = 1, N - DO I = 1, NB - C( I + ioff, J ) = auxb( ibuf + I ) - END DO - ibuf = ibuf + ldx - END DO - - ! Here processors exchange blocks - - DO IP = 1, NPROC - 1 - - SOUR = MOD( ME - IP - 1 + NPROC, NPROC ) + 1 - DEST = MOD( ME + IP - 1 , NPROC ) + 1 - - ! Find out the number of elements in the block of processor SOUR - - NB_SOUR = q - IF(SOUR .LE. r ) NB_SOUR = NB_SOUR+1 - - ! Find out the global index of the first row owned by SOUR - - IF(SOUR .LE. R) THEN - ib_sour = (Q+1)*(SOUR-1) + 1 - ELSE - ib_sour = Q*(SOUR-1) + R + 1 - END IF - -# if defined __SHMEM - call shmem_barrier_all - call shmem_get64(auxa,auxb,2*ldx*N,sour-1) -# else - CALL MPI_SENDRECV(auxb(1), ldx*N, mpi_double_complex, DEST-1, ip, & - auxa(1), ldx*N, mpi_double_complex, SOUR-1, ip, & - MPI_COMM_WORLD,ISTATUS, IERR) -# endif - - IBUF = 0 - ioff = IB_SOUR - 1 - DO J = 1, N - DO I = 1, NB_SOUR - C( I + ioff, J ) = AUXA( IBUF + I ) - END DO - IBUF = IBUF + ldx - END DO - END DO - -# if defined __SHMEM - CALL shmem_barrier_all -# endif - -#else - - CALL ZGEMM(TRANSA, TRANSB, B, N, N, one, A(1,1), N, B(1,1), N, zero, C(1,1), N) - -#endif - - RETURN - END SUBROUTINE cmatmulp - -!==----------------------------------------------==! + SUBROUTINE ptredv( a, lda, d, e, v, ldv, nrl, n, nproc, me ) ! ! Parallel version of the famous HOUSEHOLDER tridiagonalization @@ -539,8 +50,6 @@ ! T.L. FREEMAN AND C.PHILLIPS, ! PRENTICE HALL INTERNATIONAL (1992). ! -! - SUBROUTINE ptredv(a,lda,d,e,v,ldv,nrl,n,nproc,me) ! ! ! INPUTS : @@ -935,11 +444,14 @@ D(1:N) = U(1:N) #endif - RETURN - END SUBROUTINE ptredv + RETURN + END SUBROUTINE ptredv !==----------------------------------------------==! + + SUBROUTINE ptqliv( d, e, n, z, ldz, nrl ) + ! ! Modified QL algorithm for CRAY T3E PARALLEL MACHINE ! calculate the eigenvectors and eigenvalues of a matrix reduced to @@ -967,8 +479,6 @@ ! in fact there is no communication at all. ! ! - SUBROUTINE ptqliv(d,e,n,z,ldz,nrl) - ! INPUTS : ! ! D(N) Diagonal elements of the tridiagonal matrix @@ -1141,6 +651,9 @@ !==----------------------------------------------==! + + SUBROUTINE peigsrtv(d,v,ldv,n,nrl) + ! ! This routine sort eigenvalues and eigenvectors ! generated by PTREDV and PTQLIV. @@ -1148,10 +661,6 @@ ! AUTHOR : Carlo Cavazzoni - SISSA 1997 ! comments and suggestions to : cava@sissa.it ! -! - - SUBROUTINE peigsrtv(d,v,ldv,n,nrl) - USE kinds IMPLICIT NONE @@ -1373,21 +882,12 @@ !==----------------------------------------------==! -! AB INITIO COSTANT PRESSURE MOLECULAR DYNAMICS -! ---------------------------------------------- -! Car-Parrinello Parallel Program -! Carlo Cavazzoni - Gerardo Ballabio -! SISSA, Trieste, Italy - 1997-99 -! Last modified: Mon Nov 15 10:47:13 MET 1999 -! ---------------------------------------------- -! BEGIN manual - SUBROUTINE cdiagonalize( iflg, a, d, ev, n, nproc, mpime ) + SUBROUTINE cdiagonalize( iflg, a, d, ev, n, nproc, mpime ) -! this routine calls the appropriate Lapack routine for diagonalizing a -! complex Hermitian matrix -! ---------------------------------------------- -! END manual + ! this routine calls the appropriate Lapack routine for diagonalizing a + ! complex Hermitian matrix + ! USE kinds @@ -1520,28 +1020,28 @@ !==----------------------------------------------==! - SUBROUTINE pzhptrd( n, nrl, ap, lda, d, e, tau, nproc, me) -! -! Parallel MPI version of the LAPACK routine ZHPTRD -! -! Carlo Cavazzoni (carlo.cavazzoni@cineca.it) -- CINECA -! Dicember 12, 1999 -! -! REFERENCES : -! -! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING. -! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING, -! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE. -! -! PARALLEL NUMERICAL ALGORITHMS, -! T.L. FREEMAN AND C.PHILLIPS, -! PRENTICE HALL INTERNATIONAL (1992). -! -! LAPACK routine (version 2.0) -- -! Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., -! Courant Institute, Argonne National Lab, and Rice University + SUBROUTINE pzhptrd( n, nrl, ap, lda, d, e, tau, nproc, me) -! + ! + ! Parallel MPI version of the LAPACK routine ZHPTRD + ! + ! Carlo Cavazzoni (carlo.cavazzoni@cineca.it) -- CINECA + ! Dicember 12, 1999 + ! + ! REFERENCES : + ! + ! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING. + ! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING, + ! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE. + ! + ! PARALLEL NUMERICAL ALGORITHMS, + ! T.L. FREEMAN AND C.PHILLIPS, + ! PRENTICE HALL INTERNATIONAL (1992). + ! + ! LAPACK routine (version 2.0) -- + ! Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + ! Courant Institute, Argonne National Lab, and Rice University + ! USE kinds @@ -1988,7 +1488,8 @@ !==----------------------------------------------==! - SUBROUTINE pzupgtr( n, nrl, ap, lda, tau, q, ldq, nproc, me) + SUBROUTINE pzupgtr( n, nrl, ap, lda, tau, q, ldq, nproc, me) + ! ! Parallel MPI version of the LAPACK routine ZUPGTR ! @@ -2789,7 +2290,7 @@ !==----------------------------------------------==! - SUBROUTINE zhpev_drv( JOBZ, UPLO, N, AP, W, Z, LDZ ) + SUBROUTINE zhpev_drv( JOBZ, UPLO, N, AP, W, Z, LDZ ) USE kinds IMPLICIT NONE @@ -2822,11 +2323,11 @@ #endif RETURN - END SUBROUTINE zhpev_drv + END SUBROUTINE zhpev_drv !==----------------------------------------------==! - SUBROUTINE pzhpev_drv( JOBZ, ap, lda, w, z, ldz, nrl, n, nproc, mpime) + SUBROUTINE pzhpev_drv( JOBZ, ap, lda, w, z, ldz, nrl, n, nproc, mpime) USE kinds IMPLICIT NONE CHARACTER :: JOBZ @@ -2843,9 +2344,706 @@ CALL pzsteqr( jobz, n, nrl, w, rwork, z, ldz, nproc, mpime) RETURN - END SUBROUTINE pzhpev_drv + END SUBROUTINE pzhpev_drv + +!==----------------------------------------------==! +! +! My parallel blas +! +!==----------------------------------------------==! + + +SUBROUTINE mattr_drv( m, k, a, lda, b, ldb, nb, dims, coor, comm ) + ! + ! Compute B as the transpose of matrix A + ! A and B are distributed on a 2D cartesian processor + ! grid in a block cyclic way (as in scalapack), + ! using a block size of NB + ! + ! B := A' + ! + ! A is a K by M matrix + ! B is an M by K matrix + ! + implicit none + ! + INTEGER, INTENT(IN) :: m, k + INTEGER, INTENT(IN) :: lda, ldb + REAL*8 :: a(lda,*), b(ldb,*) + INTEGER, INTENT(IN) :: nb, dims(2), coor(2), comm + ! +#if defined __MPI + + include 'mpif.h' + + ! + integer ierr + integer ndims, rowid, colid + integer coosrc(2), coodst(2), ipsrc, ipdst, mpime + integer ihsnd, ihrcv + logical periods(2) + ! + integer :: iu + integer :: i, j, nk, nm + integer :: ii, jj + integer :: isrc, jsrc + integer :: idst, jdst + integer :: itag + integer :: nmb, nkb + integer :: istatus( MPI_STATUS_SIZE ) + real*8, allocatable :: abuf(:,:) + ! + integer :: numroc + integer :: indxg2l + external :: numroc, indxg2l + + ! + CALL GRID2D_RANK( dims(1), dims(2), coor(1), coor(2), mpime ) + ! + iu = 200 + mpime + ! + ! Compute the global number of blocks for matrix dimension + ! + nmb = ( m + nb - 1 ) / nb + nkb = ( k + nb - 1 ) / nb + ! + ! WRITE(iu,*) 'TR',coor(1),coor(2),' m,k,nmb,nkb,nb = ',m,k,nmb,nkb,nb + ! WRITE(iu,*) 'TR',coor(1),coor(2),' ---------------- ' + ! + ALLOCATE( abuf( nb, nb ) ) + ! + DO i = 1, nmb + DO j = 1, nkb + ! + itag = j + nkb * (i-1) + ! + coosrc(1) = MOD( (j-1), dims(1) ) + coosrc(2) = MOD( (i-1), dims(2) ) + ! + coodst(1) = MOD( (i-1), dims(1) ) + coodst(2) = MOD( (j-1), dims(2) ) + ! + CALL GRID2D_RANK( dims(1), dims(2), coosrc(1), coosrc(2), ipsrc ) + CALL GRID2D_RANK( dims(1), dims(2), coodst(1), coodst(2), ipdst ) + ! + jsrc = INDXG2L( 1 + (j-1)*nb, nb, coor(1), 0, dims(1) ) + isrc = INDXG2L( 1 + (i-1)*nb, nb, coor(2), 0, dims(2) ) + ! + jdst = INDXG2L( 1 + (j-1)*nb, nb, coor(2), 0, dims(2) ) + idst = INDXG2L( 1 + (i-1)*nb, nb, coor(1), 0, dims(1) ) + ! + nk = MIN( nb, k - (j-1)*nb ) ! number of element in the block + nm = MIN( nb, m - (i-1)*nb ) ! number of element in the block + ! + ! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' cccsrc,ipsrc = ', coosrc(1), coosrc(2), ipsrc + ! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' cccdst,ipdst = ', coodst(1), coodst(2), ipdst + ! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' i,j,nm,nk = ', i,j,nm,nk + ! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' isrc,jsrc = ', isrc,jsrc + ! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' idst,jdst = ', idst,jdst + ! + IF( ipsrc == ipdst ) THEN + IF( ipsrc == mpime ) THEN + DO ii = 1, nm + DO jj = 1, nk + b( idst + ii - 1, jdst + jj - 1 ) = a( jsrc + jj - 1, isrc + ii - 1 ) + END DO + END DO + END IF + ELSE + IF( ipsrc == mpime ) THEN + DO ii = 1, nm + DO jj = 1, nk + abuf( ii, jj ) = a( jsrc + jj - 1, isrc + ii - 1 ) + ! + ! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' ii,jj,abuf_snd = ', ii,jj,abuf( ii, jj ) + ! + END DO + END DO + CALL MPI_ISEND( abuf, nb*nb, MPI_DOUBLE_PRECISION, ipdst, itag, comm, ihsnd, ierr ) + CALL mpi_wait(ihsnd, istatus, ierr) + ELSE IF( ipdst == mpime ) THEN + CALL MPI_IRECV( abuf, nb*nb, MPI_DOUBLE_PRECISION, ipsrc, itag, comm, ihrcv, ierr ) + CALL mpi_wait(ihrcv, istatus, ierr) + DO jj = 1, nk + DO ii = 1, nm + ! + ! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' ii,jj,abuf_rcv = ', ii,jj,abuf( ii, jj ) + ! + b( idst + ii - 1, jdst + jj - 1 ) = abuf( ii, jj ) + END DO + END DO + END IF + END IF + ! + ! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' ---------------- ' + ! + END DO + END DO + +#else + + INTEGER :: i, j + + DO j = 1, k + DO i = 1, m + B( i, j ) = A( j, i ) + END DO + END DO + +#endif + + RETURN + +END SUBROUTINE mattr_drv + + +! --------------------------------------------------------------------------------- + +SUBROUTINE matsplit_drv( m, k, ar, ldar, a, lda, nb, dims, coor, comm ) + ! + implicit none + ! + INTEGER, INTENT(IN) :: m, k + INTEGER, INTENT(IN) :: ldar + REAL*8 :: ar(ldar,*) ! matrix to be splitted, replicated on all proc + INTEGER, INTENT(IN) :: lda + REAL*8 :: a(lda,*) + INTEGER, INTENT(IN) :: nb, coor(2), dims(2), comm + ! + INTEGER :: i, j, nra, nca, ii, jj + ! + INTEGER :: numroc, INDXL2G + EXTERNAL :: numroc, INDXL2G + + nra = NUMROC( m, nb, coor(1), 0, dims(1) ) ! total number of local row for matrix A, C + nca = NUMROC( k, nb, coor(2), 0, dims(2) ) ! total number of local columns of A + + do j = 1, nca + jj = INDXL2G( j, NB, coor(2), 0, dims(2) ) + do i = 1, nra + ii = INDXL2G( i, NB, coor(1), 0, dims(1) ) + a( i, j ) = ar( ii, jj ) + end do + end do + + RETURN + +END SUBROUTINE matsplit_drv + + + +! --------------------------------------------------------------------------------- + +SUBROUTINE matmerge_drv( m, k, a, lda, ar, ldar, nb, dims, coor, comm ) + ! + implicit none + ! + INTEGER, INTENT(IN) :: m, k + INTEGER, INTENT(IN) :: ldar + REAL*8 :: ar(ldar,*) ! matrix to be merged, replicated on all proc + INTEGER, INTENT(IN) :: lda + REAL*8 :: a(lda,*) + INTEGER, INTENT(IN) :: nb, coor(2), dims(2), comm + ! + INTEGER :: i, j, ii, jj, ierr + +#if defined __MPI + + include 'mpif.h' + ! + + INTEGER :: jsrc, isrc, ipsrc, coosrc(2) + INTEGER :: nmb, nkb, nk, nm, mpime + + REAL*8, ALLOCATABLE :: buf(:,:) + ! + INTEGER :: INDXG2L + EXTERNAL :: INDXG2L + ! + CALL GRID2D_RANK( dims(1), dims(2), coor(1), coor(2), mpime ) + + nmb = ( m + nb - 1 ) / nb + nkb = ( k + nb - 1 ) / nb + + ALLOCATE( buf( nb, nb ) ) + + DO j = 1, nkb + DO i = 1, nmb + ! + coosrc(1) = MOD( (i-1), dims(1) ) + coosrc(2) = MOD( (j-1), dims(2) ) + ! + CALL GRID2D_RANK( dims(1), dims(2), coosrc(1), coosrc(2), ipsrc ) + ! + isrc = INDXG2L( 1 + (i-1)*nb, nb, coor(1), 0, dims(1) ) + jsrc = INDXG2L( 1 + (j-1)*nb, nb, coor(2), 0, dims(2) ) + ! + nm = MIN( nb, m - (i-1)*nb ) ! number of element in the block + nk = MIN( nb, k - (j-1)*nb ) ! number of element in the block + + IF( ipsrc == mpime ) THEN + DO jj = 1, nk + DO ii = 1, nm + buf( ii, jj ) = a( isrc + ii - 1, jsrc + jj - 1 ) + END DO + END DO + ENDIF + ! + CALL MPI_BCAST( buf, nb*nb, MPI_DOUBLE_PRECISION, ipsrc, comm, ierr ) + ! + do jj = 1, nk + do ii = 1, nm + ar( ii + (i-1)*nb, jj + (j-1)*nb ) = buf( ii, jj ) + end do + end do + ! + END DO + END DO + ! + DEALLOCATE( buf ) + +#else + + DO j = 1, k + DO i = 1, m + ar( i, j ) = a( i, j ) + END DO + END DO + +#endif + + RETURN +END SUBROUTINE matmerge_drv + + + +! --------------------------------------------------------------------------------- + + +SUBROUTINE matscal_drv( m, n, beta, c, ldc, nb, dims, coor, comm ) + ! + implicit none + ! + INTEGER, INTENT(IN) :: m, n + REAL*8, INTENT(IN) :: beta + INTEGER, INTENT(IN) :: ldc + REAL*8 :: c(ldc,*) + INTEGER, INTENT(IN) :: nb, coor(2), dims(2), comm + ! + INTEGER :: i, j, nr, nc, ierr + ! + INTEGER :: numroc + EXTERNAL :: numroc + + nr = NUMROC( m, nb, coor(1), 0, dims(1) ) ! local row of C + nc = NUMROC( n, nb, coor(2), 0, dims(2) ) ! local colum of C + + IF( beta == 0.0d0 ) THEN + do j = 1, nc + do i = 1, nr + c(i,j) = 0.0d0 + end do + end do + ELSE + do j = 1, nc + do i = 1, nr + c(i,j) = beta * c(i,j) + end do + end do + END IF + + RETURN + +END SUBROUTINE + + +! --------------------------------------------------------------------------------- + + +SUBROUTINE matmul_drv( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC, nb, dims, coor, comm ) + ! + implicit none + ! + CHARACTER(LEN=1), INTENT(IN) :: transa, transb + INTEGER, INTENT(IN) :: m, n, k + REAL*8, INTENT(IN) :: alpha, beta + INTEGER, INTENT(IN) :: lda, ldb, ldc + REAL*8 :: a(lda,*), b(ldb,*), c(ldc,*) + INTEGER, INTENT(IN) :: nb, dims(2), coor(2), comm + ! + ! DGEMM PERFORMS ONE OF THE MATRIX-MATRIX OPERATIONS + ! + ! C := ALPHA*OP( A )*OP( B ) + BETA*C, + ! + ! WHERE OP( X ) IS ONE OF + ! + ! OP( X ) = X OR OP( X ) = X', + ! + ! ALPHA AND BETA ARE SCALARS, AND A, B AND C ARE MATRICES, WITH OP( A ) + ! AN M BY K MATRIX, OP( B ) A K BY N MATRIX AND C AN M BY N MATRIX. + ! + ! + ! + +#if defined __MPI + + include 'mpif.h' + ! + integer ierr + integer ndims, rowid, colid + integer comm_row, comm_col + ! + integer :: ib, jb, kb, ibl, kbl, jbl + integer :: i, j, kk, ni, nj, nk, nm, il, jl + integer :: nnb, nmb, nkb + integer :: nr, nra, nca, nc, nrb, ncb, ii, jj + integer :: nrt, ncat, nct, nrbt + real*8, allocatable :: abuf(:,:), bbuf(:,:) + real*8, allocatable :: at(:,:) + real*8, allocatable :: bt(:,:) + ! + integer :: numroc + integer :: indxg2l + external :: numroc, indxg2l + ! + IF( dims(1) * dims(2) == 1 ) THEN + + ! if there is only one proc no need of using parallel alg. + + call dgemm( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + + RETURN + + END IF + ! + + CALL MPI_COMM_SPLIT( COMM, coor(2), coor(1), COMM_COL, IERR ) + CALL MPI_COMM_RANK( COMM_COL, rowid, IERR ) + ! + CALL MPI_COMM_SPLIT( COMM, coor(1), coor(2), COMM_ROW, IERR ) + CALL MPI_COMM_RANK( COMM_ROW, colid, IERR ) + ! + ! Compute the global number of blocks for matrix dimension + ! + nmb = ( m + nb - 1 ) / nb + ! + nnb = ( n + nb - 1 ) / nb + ! + nkb = ( k + nb - 1 ) / nb + ! + ! Compute the total number of local row for matrix A, C + ! + nr = NUMROC( m, nb, coor(1), 0, dims(1) ) ! local row of C + ! + nra = NUMROC( m, nb, coor(1), 0, dims(1) ) ! local row of OP( A ) + nca = NUMROC( k, nb, coor(2), 0, dims(2) ) ! local columns of OP( A ) + ! + nrb = NUMROC( k, nb, coor(1), 0, dims(1) ) ! local row of OP( B ) + ncb = NUMROC( n, nb, coor(2), 0, dims(2) ) ! local colum of OP( B ) + ! + nc = NUMROC( n, nb, coor(2), 0, dims(2) ) ! local colum of C + ! + IF( transa == 'T' .OR. transa == 't' ) THEN + ! + ALLOCATE( at( nra, nca ) ) + ! + CALL mattr_drv( m, k, a, lda, at, nra, nb, dims, coor, comm ) + ! + END IF + ! + IF( transb == 'T' .OR. transb == 't' ) THEN + ! + ALLOCATE( bt( nrb, ncb ) ) + ! + CALL mattr_drv( k, n, b, ldb, bt, nrb, nb, dims, coor, comm ) + ! + END IF + ! + ! Scale matrix C + ! + CALL matscal_drv( m, n, beta, c, ldc, nb, dims, coor, comm ) + ! + ! loop over the rows/columns blocks of matrix OP(A)/OP(B) + ! + do kb = 1, nkb + ! + kk = ( kb - 1 ) * nb + 1 ! first element of the block (global index) + nk = MIN( nb, k - kk + 1 ) ! number of element in the block + + colid = MOD( (kb-1), dims(2) ) ! processor owning the block + rowid = MOD( (kb-1), dims(1) ) + + allocate( abuf( nr, nk ) ) + + if( colid == coor(2) ) then + nrt = 0 + ibl = 0 + kbl = INDXG2L( 1 + (kb-1)*nb, nb, coor(2), 0, dims(2) ) + do ib = 1 + coor(1), nmb, dims(1) + i = ( ib - 1 ) * nb + 1 + ni = MIN( nb, m - i + 1 ) + IF( transa == 'T' .OR. transa == 't' ) THEN + do jj = 1, nk + do ii = 1, ni + abuf( ii + nrt, jj ) = at( ii + ibl*nb, jj + kbl - 1 ) + end do + end do + ELSE + do jj = 1, nk + do ii = 1, ni + abuf( ii + nrt, jj ) = a( ii + ibl*nb, jj + kbl - 1 ) + end do + end do + END IF + nrt = nrt + ni + ibl = ibl + 1 + end do + end if + CALL MPI_BCAST( abuf(1,1), nr*nk, MPI_DOUBLE_PRECISION, colid, COMM_ROW, IERR ) + + allocate( bbuf( nk, nc ) ) + + if( rowid == coor(1) ) then + nct = 0 + jbl = 0 + kbl = INDXG2L( 1 + (kb-1)*nb, nb, coor(1), 0, dims(1) ) + do jb = 1 + coor(2), nnb, dims(2) + j = ( jb - 1 ) * nb + 1 + nj = MIN( nb, n - j + 1 ) + IF( transb == 'T' .OR. transb == 't' ) THEN + do jj = 1, nj + do ii = 1, nk + bbuf( ii, jj + nct ) = bt( ii + kbl - 1, jj + jbl*nb ) + end do + end do + ELSE + do jj = 1, nj + do ii = 1, nk + bbuf( ii, jj + nct ) = b( ii + kbl - 1, jj + jbl*nb ) + end do + end do + END IF + nct = nct + nj + jbl = jbl + 1 + end do + end if + + CALL MPI_BCAST( bbuf(1,1), nk*nc, MPI_DOUBLE_PRECISION, rowid, COMM_COL, IERR ) + + ii = 1 + do ib = 1 + coor(1), nmb, dims(1) + i = ( ib - 1 ) * nb + 1 + il = INDXG2L( i, nb, coor(1), 0, dims(1) ) + ni = MIN( nb, m - i + 1 ) + jj = 1 + do jb = 1 + coor(2), nnb, dims(2) + j = ( jb - 1 ) * nb + 1 + jl = INDXG2L( j, nb, coor(2), 0, dims(2) ) + nj = MIN( nb, n - j + 1 ) + call dgemm( 'n', 'n', ni, nj, nk, alpha, abuf( ii, 1 ), nra, bbuf( 1, jj ), nk, 1.0d0, c( il, jl ), ldc ) + jj = jj + nj + end do + ii = ii + ni + end do + + deallocate( abuf ) + deallocate( bbuf ) + + end do + + IF( ALLOCATED( at ) ) DEALLOCATE( at ) + IF( ALLOCATED( bt ) ) DEALLOCATE( bt ) + + +#else + + ! if we are not compiling with __MPI this is equivalent to a blas call + + call dgemm( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + +#endif + + + RETURN + +END SUBROUTINE + + +!==----------------------------------------------==! +! +! Copyright (C) 2005 Carlo Cavazzoni +! This file is distributed under the terms of the +! GNU General Public License. See the file `License' +! in the root directory of the present distribution, +! or http://www.gnu.org/copyleft/gpl.txt . +! + + +SUBROUTINE rep_matmul_drv( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC, comm ) + ! + ! Parallel matrix multiplication with replicated matrix + ! + implicit none + ! + CHARACTER(LEN=1), INTENT(IN) :: transa, transb + INTEGER, INTENT(IN) :: m, n, k + REAL*8, INTENT(IN) :: alpha, beta + INTEGER, INTENT(IN) :: lda, ldb, ldc + REAL*8 :: a(lda,*), b(ldb,*), c(ldc,*) + INTEGER, INTENT(IN) :: comm + ! + ! DGEMM PERFORMS ONE OF THE MATRIX-MATRIX OPERATIONS + ! + ! C := ALPHA*OP( A )*OP( B ) + BETA*C, + ! + ! WHERE OP( X ) IS ONE OF + ! + ! OP( X ) = X OR OP( X ) = X', + ! + ! ALPHA AND BETA ARE SCALARS, AND A, B AND C ARE MATRICES, WITH OP( A ) + ! AN M BY K MATRIX, OP( B ) A K BY N MATRIX AND C AN M BY N MATRIX. + ! + ! + ! + +#if defined __MPI + + include 'mpif.h' + ! + + INTEGER :: ME, I, II, J, JJ, IP, SOUR, DEST, INFO, IERR, ioff, ldx + INTEGER :: NB, IB_S, NB_SOUR, IB_SOUR, IBUF + INTEGER :: nproc, mpime, q, r + + REAL*8, ALLOCATABLE :: auxa( : ) + REAL*8, ALLOCATABLE :: auxc( : ) + + ! + ! ... BODY + ! + + CALL MPI_COMM_SIZE(comm, NPROC, IERR) + CALL MPI_COMM_RANK(comm, MPIME, IERR) + + IF ( NPROC == 1 ) THEN + + ! if there is only one proc no need of using parallel alg. + + CALL DGEMM(TRANSA, TRANSB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc) + + RETURN + + END IF + + ME = MPIME + 1 + Q = INT( m / NPROC ) + R = MOD( m , NPROC ) + + ! ... Find out the number of elements in the local block + ! along "M" first dimension os matrix A + + NB = Q + IF( ME <= R ) NB = NB + 1 + + ! ... Find out the global index of the local first row + + IF( ME <= R ) THEN + ib_s = (Q+1)*(ME-1) + 1 + ELSE + ib_s = Q*(ME-1) + R + 1 + END IF + + ldx = m / nproc + 1 + + ALLOCATE( auxa( MAX( n, m ) * ldx ) ) + ALLOCATE( auxc( MAX( n, m ) * ldx ) ) + + IF( TRANSA == 'N' .OR. TRANSA == 'n' ) THEN + ibuf = 0 + ioff = ib_s - 1 + DO J = 1, k + DO I = 1, NB + auxa( ibuf + I ) = A( I + ioff, J ) + END DO + ibuf = ibuf + ldx + END DO + ELSE + ibuf = 0 + ioff = ib_s - 1 + DO J = 1, k + DO I = 1, NB + auxa( ibuf + I ) = A( J, I + ioff ) + END DO + ibuf = ibuf + ldx + END DO + !ioff = ib_s - 1 + !call mytranspose( A( 1, ioff + 1 ), lda, auxa(1), ldx, m, nb) + END IF + + IF( beta /= 0.0d0 ) THEN + ibuf = 0 + ioff = ib_s - 1 + DO J = 1, n + DO I = 1, NB + auxc( ibuf + I ) = C( I + ioff, J ) + END DO + ibuf = ibuf + ldx + END DO + END IF + + CALL DGEMM( 'N', transb, nb, n, k, alpha, auxa(1), ldx, B, ldb, beta, auxc(1), ldx ) + + ! ... Here processors exchange blocks + + DO IP = 0, NPROC-1 + + ! ... Find out the number of elements in the block of processor SOUR + + NB_SOUR = q + IF( (IP+1) .LE. r ) NB_SOUR = NB_SOUR+1 + + ! ... Find out the global index of the first row owned by SOUR + + IF( (IP+1) .LE. r ) THEN + ib_sour = (Q+1)*IP + 1 + ELSE + ib_sour = Q*IP + R + 1 + END IF + + IF( mpime == ip ) auxa(1:n*ldx) = auxc(1:n*ldx) + + CALL MPI_BCAST( auxa(1), ldx*n, mpi_double_precision, ip, comm, IERR) + + IBUF = 0 + ioff = IB_SOUR - 1 + DO J = 1, N + DO I = 1, NB_SOUR + C( I + ioff, J ) = AUXA( IBUF + I ) + END DO + IBUF = IBUF + ldx + END DO + + END DO + + DEALLOCATE( auxa, auxc ) + + +#else + + ! if we are not compiling with __MPI this is equivalent to a blas call + + CALL DGEMM(TRANSA, TRANSB, m, N, k, alpha, A, lda, B, ldb, beta, C, ldc) + +#endif + + + + RETURN + +END SUBROUTINE rep_matmul_drv + !==----------------------------------------------==! END MODULE parallel_toolkit !==----------------------------------------------==! diff --git a/Modules/read_namelists.f90 b/Modules/read_namelists.f90 index 711579314..741e704fb 100644 --- a/Modules/read_namelists.f90 +++ b/Modules/read_namelists.f90 @@ -1062,8 +1062,8 @@ MODULE read_namelists_module IF( calculation == ' ' ) & CALL errore( sub_name,' calculation not specified ',1) IF( prog == 'CP' ) THEN - IF( calculation == 'nscf' .OR. calculation == 'phonon' ) & - CALL errore( sub_name,' calculation '//TRIM(calculation)// & + IF( calculation == 'phonon' ) & + CALL errore( sub_name,' calculation '//calculation// & & ' not implemented ',1) END IF IF( ndr < 50 ) & @@ -1573,9 +1573,9 @@ MODULE read_namelists_module cell_dynamics = 'none' END IF CASE ('nscf') - IF( prog == 'CP' ) & - CALL errore( sub_name,' calculation '//TRIM(calculation)// & - & ' not implemented ',1) + ! IF( prog == 'CP' ) & + ! CALL errore( sub_name,' calculation '//calculation// & + ! & ' not implemented ',1) IF( prog == 'CP' ) occupations = 'bogus' IF( prog == 'CP' ) electron_dynamics = 'damp' IF( prog == 'PW' ) startingpot = 'file' diff --git a/Modules/xml_io_base.f90 b/Modules/xml_io_base.f90 index 06e2d6e41..b897cbe8b 100644 --- a/Modules/xml_io_base.f90 +++ b/Modules/xml_io_base.f90 @@ -1000,8 +1000,8 @@ MODULE xml_io_base ! CALL mp_bcast( ierr, ionode_id ) ! - CALL errore( 'read_rho_xml', & - 'cannot open rho_file file for writing', ierr ) + CALL errore( ' read_rho_xml ', & + 'cannot open ' // rho_file // ' file for reading', ierr ) ! IF ( ionode ) THEN ! diff --git a/flib/Makefile b/flib/Makefile index 3ac5acff1..834610c38 100644 --- a/flib/Makefile +++ b/flib/Makefile @@ -48,7 +48,6 @@ infog1l.o \ infog2l.o \ localdim.o \ localindex.o \ -matmul.o \ npreroc.o \ numroc.o \ ownerof.o \ diff --git a/flib/gridsetup.f90 b/flib/gridsetup.f90 index 8b714237f..487e1018e 100644 --- a/flib/gridsetup.f90 +++ b/flib/gridsetup.f90 @@ -10,31 +10,40 @@ !----------------------------------------------------------------------- ! - subroutine GRIDSETUP(NPROC,NPROW,NPCOL) +SUBROUTINE GRID2D_DIMS( nproc, nprow, npcol ) + ! + ! This subroutine factorizes the number of processors (NPROC) + ! into NPROW and NPCOL, that are the sizes of the 2D processors mesh. + ! + ! Written by Carlo Cavazzoni + ! + IMPLICIT NONE + INTEGER, INTENT(IN) :: nproc + INTEGER, INTENT(OUT) :: nprow, npcol + integer sqrtnp,i + sqrtnp = int( sqrt( dble(nproc) ) + 1 ) + do i=1,sqrtnp + if(mod(nproc,i).eq.0) nprow = i + end do + npcol = nproc/nprow + RETURN +END SUBROUTINE -! -! This subroutine factorizes the number of processors (NPROC) -! into NPROW and NPCOL, that are the sizes of the 2D processors mesh. -! -! Written by Carlo Cavazzoni -! - - IMPLICIT NONE - - integer nproc,nprow,npcol - integer sqrtnp,i - - if(nproc.lt.2) then - npcol = 1 - nprow = 1 - else - sqrtnp = int( sqrt( DBLE(nproc) ) + 1 ) - do i=1,sqrtnp - if(mod(nproc,i).eq.0) nprow = i - end do - npcol = nproc/nprow - endif - - return - end subroutine gridsetup +SUBROUTINE GRID2D_COORDS( rank, nprow, npcol, row, col ) + IMPLICIT NONE + INTEGER, INTENT(IN) :: rank ! process index starting from 0 + INTEGER, INTENT(IN) :: nprow, npcol ! dimensions of the processor grid + INTEGER, INTENT(OUT) :: row, col + row = MOD( rank, nprow ) + col = rank / nprow + RETURN +END SUBROUTINE +SUBROUTINE GRID2D_RANK( nprow, npcol, row, col, rank ) + IMPLICIT NONE + INTEGER, INTENT(OUT) :: rank ! process index starting from 0 + INTEGER, INTENT(IN) :: nprow, npcol ! dimensions of the processor grid + INTEGER, INTENT(IN) :: row, col + rank = row + col * nprow + RETURN +END SUBROUTINE diff --git a/flib/localindex.f90 b/flib/localindex.f90 index e37a0e054..f47d8a365 100644 --- a/flib/localindex.f90 +++ b/flib/localindex.f90 @@ -122,3 +122,83 @@ ! End of INDXG2L ! END FUNCTION lind_block_cyclic + + +!=----------------------------------------------------------------------------=! + + + INTEGER FUNCTION gind_cyclic( lind, n, np, me ) + +! This function computes the global index of a distributed array entry +! pointed to by the local index lind of the process indicated by me. +! lind local index of the distributed matrix entry. +! N is the size of the global array. +! me The coordinate of the process whose local array row or +! column is to be determined. +! np The total number processes over which the distributed +! matrix is distributed. +! + + INTEGER, INTENT(IN) :: lind, n, me, np + INTEGER r, q + + gind_cyclic = (lind-1) * np + me + 1 + + RETURN + END FUNCTION gind_cyclic + + +!=----------------------------------------------------------------------------=! + + + INTEGER FUNCTION gind_block( lind, n, np, me ) + +! This function computes the global index of a distributed array entry +! pointed to by the local index lind of the process indicated by me. +! lind local index of the distributed matrix entry. +! N is the size of the global array. +! me The coordinate of the process whose local array row or +! column is to be determined. +! np The total number processes over which the distributed +! matrix is distributed. + + + INTEGER, INTENT(IN) :: lind, n, me, np + INTEGER r, q + + q = INT(n/np) + r = MOD(n,np) + IF( me < r ) THEN + gind_block = (Q+1)*me + lind + ELSE + gind_block = Q*me + R + lind + END IF + + RETURN + END FUNCTION gind_block + +!=----------------------------------------------------------------------------=! + + INTEGER FUNCTION gind_block_cyclic( lind, n, nb, np, me ) + +! This function computes the global index of a distributed array entry +! pointed to by the local index lind of the process indicated by me. +! lind local index of the distributed matrix entry. +! N is the size of the global array. +! NB size of the blocks the distributed matrix is split into. +! me The coordinate of the process whose local array row or +! column is to be determined. +! np The total number processes over which the distributed +! matrix is distributed. + + + INTEGER, INTENT(IN) :: lind, n, nb, me, np + INTEGER r, q, isrc + + isrc = 0 + gind_block_cyclic = np*NB*((lind-1)/NB) + & + MOD(lind-1,NB) + MOD(np+me-isrc, np)*NB + 1 + + RETURN + END FUNCTION gind_block_cyclic + diff --git a/pwtools/matdyn.f90 b/pwtools/matdyn.f90 index 14e9a0e5b..41d2cb60e 100644 --- a/pwtools/matdyn.f90 +++ b/pwtools/matdyn.f90 @@ -1614,7 +1614,8 @@ SUBROUTINE a2Fdos & ! INTEGER, INTENT(in) :: nat, nq, nr1, nr2, nr3, ibrav, ndos, ntetra, & tetra(4, ntetra) - LOGICAL, INTENT(in) :: dos, asr + LOGICAL, INTENT(in) :: dos + CHARACTER(LEN=*), INTENT(IN) :: asr REAL(DP), INTENT(in) :: freq(3*nat,nq), q(3,nq), at(3,3), bg(3,3), & tau(3,nat), alat, Emin, DeltaE ! @@ -1664,7 +1665,7 @@ SUBROUTINE a2Fdos & filea2F = 60 + isig CALL readfg ( filea2F, nr1, nr2, nr3, nat, frcg ) ! - if (asr /= 'no') then + if ( asr /= 'no') then CALL set_asr (asr, nr1, nr2, nr3, frcg, zeu, nat_blk, ibrav, tau_blk) endif !