diff --git a/CPV/src/cg_sub.f90 b/CPV/src/cg_sub.f90 index f0e90882c..2c07e2087 100644 --- a/CPV/src/cg_sub.f90 +++ b/CPV/src/cg_sub.f90 @@ -53,7 +53,6 @@ use orthogonalize_base, ONLY : calphi_bgrp use cp_interfaces, ONLY : rhoofr, dforce, compute_stress, vofrho, nlfl_bgrp, prefor use cp_interfaces, ONLY : nlsm2_bgrp, calbec, caldbec_bgrp, nlfq_bgrp - use cp_interfaces, ONLY : collect_lambda, distribute_lambda USE cp_main_variables, ONLY : descla, drhor, drhog USE descriptors, ONLY : la_descriptor, ldim_cyclic USE mp_global, ONLY: me_image, my_image_id, nbgrp @@ -93,6 +92,7 @@ complex(dp) :: phi( ngw, nbspx ) real(dp) :: dbec(nhsa,nbspx,3,3) ! + include 'laxlib.fh' ! integer :: i, j, ig, k, is, iss,ia, iv, jv, il, ii, jj, kk, ip, nrlx integer :: inl, jnl, niter, istart, nss, nrl, me_rot, np_rot , comm diff --git a/CPV/src/cglib.f90 b/CPV/src/cglib.f90 index f193f7473..b841f5179 100644 --- a/CPV/src/cglib.f90 +++ b/CPV/src/cglib.f90 @@ -122,39 +122,6 @@ end subroutine rotate -!----------------------------------------------------------------------- - subroutine ddiag(nx,n,amat,dval,dvec,iflag) -!----------------------------------------------------------------------- -! - use dspev_module, only: dspev_drv - use kinds , only : dp - - implicit none - - integer nx,n,ndim,iflag,k,i,j - real(dp) dval(n) - real(dp) amat(nx,n), dvec(nx,n) - real(dp), allocatable:: ap(:) - - ndim=(n*(n+1))/2 - allocate(ap(ndim)) - ap(:)=0.d0 - - k=0 - do j=1,n - do i=1,j - k=k+1 - ap(k)=amat(i,j) - end do - end do - - CALL dspev_drv( 'V', 'U', n, ap, dval, dvec, nx ) - - deallocate(ap) - - return - end subroutine ddiag - subroutine minparabola(ene0,dene0,ene1,passop,passo,stima) !this subroutines finds the minimum of a quadratic real function @@ -206,7 +173,6 @@ subroutine pc2(a,beca,b,becb) use uspp_param, only: nh, nvb, ish use uspp, only :nhsa=>nkb use uspp, only :qq_nt - use parallel_toolkit, only : rep_matmul_drv implicit none @@ -776,7 +742,6 @@ SUBROUTINE para_dgemm( transa, transb, m, n, k, & ! ... trivial parallelization (splitting matrix B by columns) of dgemm ! USE kinds, ONLY : DP - USE parallel_toolkit ! IMPLICIT NONE ! diff --git a/CPV/src/cp_interfaces.f90 b/CPV/src/cp_interfaces.f90 index 92e6a342a..3a4b1748a 100644 --- a/CPV/src/cp_interfaces.f90 +++ b/CPV/src/cp_interfaces.f90 @@ -81,8 +81,6 @@ PUBLIC :: set_eitot PUBLIC :: set_evtot ! - PUBLIC :: print_lambda - ! PUBLIC :: move_electrons ! PUBLIC :: compute_stress @@ -106,7 +104,6 @@ PUBLIC :: dennl PUBLIC :: nlfq_bgrp PUBLIC :: collect_bec - PUBLIC :: distribute_lambda ! ------------------------------------ ! @@ -806,19 +803,6 @@ END SUBROUTINE END INTERFACE - - INTERFACE print_lambda - SUBROUTINE print_lambda_x( lambda, descla, n, nshow, ccc, iunit ) - USE kinds, ONLY : DP - USE descriptors, ONLY: la_descriptor - IMPLICIT NONE - REAL(DP), INTENT(IN) :: lambda(:,:,:), ccc - TYPE(la_descriptor), INTENT(IN) :: descla(:) - INTEGER, INTENT(IN) :: n, nshow - INTEGER, INTENT(IN), OPTIONAL :: iunit - END SUBROUTINE - END INTERFACE - INTERFACE protate SUBROUTINE protate_x ( c0, bec, c0rot, becrot, ngwl, nss, noff, lambda, nrl, & na, nsp, ish, nh, np_rot, me_rot, comm_rot ) @@ -1024,62 +1008,6 @@ END SUBROUTINE collect_bec_x END INTERFACE - INTERFACE distribute_lambda - SUBROUTINE distribute_lambda_x( lambda_repl, lambda_dist, desc ) - USE kinds, ONLY : DP - USE descriptors, ONLY : la_descriptor - REAL(DP), INTENT(IN) :: lambda_repl(:,:) - REAL(DP), INTENT(OUT) :: lambda_dist(:,:) - TYPE(la_descriptor), INTENT(IN) :: desc - END SUBROUTINE distribute_lambda_x - END INTERFACE - - PUBLIC :: collect_lambda - INTERFACE collect_lambda - SUBROUTINE collect_lambda_x( lambda_repl, lambda_dist, desc ) - USE kinds, ONLY : DP - USE descriptors, ONLY : la_descriptor - REAL(DP), INTENT(OUT) :: lambda_repl(:,:) - REAL(DP), INTENT(IN) :: lambda_dist(:,:) - TYPE(la_descriptor), INTENT(IN) :: desc - END SUBROUTINE collect_lambda_x - END INTERFACE - - PUBLIC :: setval_lambda - INTERFACE setval_lambda - SUBROUTINE setval_lambda_x( lambda_dist, i, j, val, desc ) - USE kinds, ONLY : DP - USE descriptors, ONLY : la_descriptor - REAL(DP), INTENT(OUT) :: lambda_dist(:,:) - INTEGER, INTENT(IN) :: i, j - REAL(DP), INTENT(IN) :: val - TYPE(la_descriptor), INTENT(IN) :: desc - END SUBROUTINE setval_lambda_x - END INTERFACE - - PUBLIC :: distribute_zmat - INTERFACE distribute_zmat - SUBROUTINE distribute_zmat_x( zmat_repl, zmat_dist, desc ) - USE kinds, ONLY : DP - USE descriptors, ONLY : la_descriptor - REAL(DP), INTENT(IN) :: zmat_repl(:,:) - REAL(DP), INTENT(OUT) :: zmat_dist(:,:) - TYPE(la_descriptor), INTENT(IN) :: desc - END SUBROUTINE distribute_zmat_x - END INTERFACE - - PUBLIC :: collect_zmat - INTERFACE collect_zmat - SUBROUTINE collect_zmat_x( zmat_repl, zmat_dist, desc ) - USE kinds, ONLY : DP - USE descriptors, ONLY : la_descriptor - REAL(DP), INTENT(OUT) :: zmat_repl(:,:) - REAL(DP), INTENT(IN) :: zmat_dist(:,:) - TYPE(la_descriptor), INTENT(IN) :: desc - END SUBROUTINE collect_zmat_x - END INTERFACE - - !=----------------------------------------------------------------------------=! !=----------------------------------------------------------------------------=! diff --git a/CPV/src/cp_restart_new.f90 b/CPV/src/cp_restart_new.f90 index ad353b005..1be110def 100644 --- a/CPV/src/cp_restart_new.f90 +++ b/CPV/src/cp_restart_new.f90 @@ -69,7 +69,6 @@ MODULE cp_restart_new nproc_image USE mp_bands, ONLY : my_bgrp_id, intra_bgrp_comm, & root_bgrp, root_bgrp_id - USE mp_diag, ONLY : nproc_ortho USE run_info, ONLY : title USE gvect, ONLY : ngm, ngm_g, ecutrho USE gvecs, ONLY : ngms_g, ecuts @@ -1608,7 +1607,6 @@ MODULE cp_restart_new USE mp_images, ONLY : intra_image_comm USE io_global, ONLY : ionode, ionode_id USE cp_main_variables, ONLY : descla - USE cp_interfaces, ONLY : collect_lambda ! IMPLICIT NONE CHARACTER(LEN=*), INTENT(in) :: filename @@ -1616,6 +1614,8 @@ MODULE cp_restart_new REAL(dp), INTENT(in) :: lambda(:,:) INTEGER, INTENT(out) :: ierr ! + include 'laxlib.fh' + ! REAL(dp), ALLOCATABLE :: mrepl(:,:) ! IF ( ionode ) OPEN( unit=iunpun, file =TRIM(filename), & @@ -1647,9 +1647,11 @@ MODULE cp_restart_new USE mp_images, ONLY : intra_image_comm USE io_global, ONLY : ionode, ionode_id USE cp_main_variables, ONLY : descla - USE cp_interfaces, ONLY : distribute_lambda ! IMPLICIT NONE + + include 'laxlib.fh' + CHARACTER(LEN=*), INTENT(in) :: filename INTEGER, INTENT(in) :: iunpun, iss, nspin, nudx REAL(dp), INTENT(out) :: lambda(:,:) @@ -1690,10 +1692,12 @@ MODULE cp_restart_new USE mp_images, ONLY : intra_image_comm USE io_global, ONLY : ionode, ionode_id USE cp_main_variables, ONLY : descla - USE cp_interfaces, ONLY : collect_zmat USE electrons_base,ONLY: nspin, nudx ! IMPLICIT NONE + + include 'laxlib.fh' + REAL(dp), INTENT(in) :: mat_z(:,:,:) INTEGER, INTENT(in) :: ndw INTEGER, INTENT(out) :: ierr @@ -1742,10 +1746,12 @@ MODULE cp_restart_new USE mp_images, ONLY : intra_image_comm USE io_global, ONLY : ionode, ionode_id USE cp_main_variables, ONLY : descla - USE cp_interfaces, ONLY : distribute_zmat USE electrons_base,ONLY: nspin, nudx ! IMPLICIT NONE + + include 'laxlib.fh' + REAL(dp), INTENT(out) :: mat_z(:,:,:) INTEGER, INTENT(in) :: ndr INTEGER, INTENT(out) :: ierr diff --git a/CPV/src/cplib.f90 b/CPV/src/cplib.f90 index 94f047300..68a3761d3 100644 --- a/CPV/src/cplib.f90 +++ b/CPV/src/cplib.f90 @@ -1421,46 +1421,6 @@ subroutine dylmr2_( nylm, ngy, g, gg, ainv, dylm ) ! end subroutine dylmr2_ - -SUBROUTINE print_lambda_x( lambda, descla, n, nshow, ccc, iunit ) - USE kinds, ONLY : DP - USE descriptors, ONLY: la_descriptor - USE io_global, ONLY: stdout, ionode - USE cp_interfaces, ONLY: collect_lambda - USE electrons_base, ONLY: nudx - IMPLICIT NONE - real(DP), intent(in) :: lambda(:,:,:), ccc - TYPE(la_descriptor), INTENT(IN) :: descla(:) - integer, intent(in) :: n, nshow - integer, intent(in), optional :: iunit - ! - integer :: nnn, j, un, i, is - real(DP), allocatable :: lambda_repl(:,:) - if( present( iunit ) ) then - un = iunit - else - un = stdout - end if - nnn = min( nudx, nshow ) - ALLOCATE( lambda_repl( nudx, nudx ) ) - IF( ionode ) WRITE( un,*) - DO is = 1, SIZE( lambda, 3 ) - CALL collect_lambda( lambda_repl, lambda(:,:,is), descla(is) ) - IF( ionode ) THEN - WRITE( un,3370) ' lambda nudx, spin = ', nudx, is - IF( nnn < n ) WRITE( un,3370) ' print only first ', nnn - DO i=1,nnn - WRITE( un,3380) (lambda_repl(i,j)*ccc,j=1,nnn) - END DO - END IF - END DO - DEALLOCATE( lambda_repl ) -3370 FORMAT(26x,a,2i4) -3380 FORMAT(9f8.4) - RETURN -END SUBROUTINE print_lambda_x -!----------------------------------------------------------------------- -! !----------------------------------------------------------------------- SUBROUTINE denlcc_x( nnr, nspin, vxcr, sfac, drhocg, dcc ) !----------------------------------------------------------------------- @@ -2038,27 +1998,6 @@ END SUBROUTINE print_lambda_x ! RETURN END SUBROUTINE collect_bec_x - -!------------------------------------------------------------------------ - SUBROUTINE distribute_lambda_x( lambda_repl, lambda_dist, desc ) -!------------------------------------------------------------------------ - USE kinds, ONLY : DP - USE descriptors - REAL(DP), INTENT(IN) :: lambda_repl(:,:) - REAL(DP), INTENT(OUT) :: lambda_dist(:,:) - TYPE(la_descriptor), INTENT(IN) :: desc - INTEGER :: i, j, ic, ir - IF( desc%active_node > 0 ) THEN - ir = desc%ir - ic = desc%ic - DO j = 1, desc%nc - DO i = 1, desc%nr - lambda_dist( i, j ) = lambda_repl( i + ir - 1, j + ic - 1 ) - END DO - END DO - END IF - RETURN - END SUBROUTINE distribute_lambda_x ! !------------------------------------------------------------------------ SUBROUTINE distribute_bec_x( bec_repl, bec_dist, desc, nspin ) @@ -2092,102 +2031,3 @@ END SUBROUTINE print_lambda_x END IF RETURN END SUBROUTINE distribute_bec_x - ! -!------------------------------------------------------------------------ - SUBROUTINE distribute_zmat_x( zmat_repl, zmat_dist, desc ) -!------------------------------------------------------------------------ - USE kinds, ONLY : DP - USE descriptors - REAL(DP), INTENT(IN) :: zmat_repl(:,:) - REAL(DP), INTENT(OUT) :: zmat_dist(:,:) - TYPE(la_descriptor), INTENT(IN) :: desc - INTEGER :: i, ii, j, me, np - me = desc%mype - np = desc%npc * desc%npr - IF( desc%active_node > 0 ) THEN - DO j = 1, desc%n - ii = me + 1 - DO i = 1, desc%nrl - zmat_dist( i, j ) = zmat_repl( ii, j ) - ii = ii + np - END DO - END DO - END IF - RETURN - END SUBROUTINE distribute_zmat_x - ! -!------------------------------------------------------------------------ - SUBROUTINE collect_lambda_x( lambda_repl, lambda_dist, desc ) -!------------------------------------------------------------------------ - USE kinds, ONLY : DP - USE mp_global, ONLY: intra_bgrp_comm - USE mp, ONLY: mp_sum - USE descriptors - REAL(DP), INTENT(OUT) :: lambda_repl(:,:) - REAL(DP), INTENT(IN) :: lambda_dist(:,:) - TYPE(la_descriptor), INTENT(IN) :: desc - INTEGER :: i, j, ic, ir - lambda_repl = 0.0d0 - IF( desc%active_node > 0 ) THEN - ir = desc%ir - ic = desc%ic - DO j = 1, desc%nc - DO i = 1, desc%nr - lambda_repl( i + ir - 1, j + ic - 1 ) = lambda_dist( i, j ) - END DO - END DO - END IF - CALL mp_sum( lambda_repl, intra_bgrp_comm ) - RETURN - END SUBROUTINE collect_lambda_x - ! -!------------------------------------------------------------------------ - SUBROUTINE collect_zmat_x( zmat_repl, zmat_dist, desc ) -!------------------------------------------------------------------------ - USE kinds, ONLY : DP - USE mp_global, ONLY: intra_bgrp_comm - USE mp, ONLY: mp_sum - USE descriptors - REAL(DP), INTENT(OUT) :: zmat_repl(:,:) - REAL(DP), INTENT(IN) :: zmat_dist(:,:) - TYPE(la_descriptor), INTENT(IN) :: desc - INTEGER :: i, ii, j, me, np, nrl - zmat_repl = 0.0d0 - me = desc%mype - np = desc%npc * desc%npr - nrl = desc%nrl - IF( desc%active_node > 0 ) THEN - DO j = 1, desc%n - ii = me + 1 - DO i = 1, nrl - zmat_repl( ii, j ) = zmat_dist( i, j ) - ii = ii + np - END DO - END DO - END IF - CALL mp_sum( zmat_repl, intra_bgrp_comm ) - RETURN - END SUBROUTINE collect_zmat_x - ! -!------------------------------------------------------------------------ - SUBROUTINE setval_lambda_x( lambda_dist, i, j, val, desc ) -!------------------------------------------------------------------------ - USE kinds, ONLY : DP - USE descriptors - REAL(DP), INTENT(OUT) :: lambda_dist(:,:) - INTEGER, INTENT(IN) :: i, j - REAL(DP), INTENT(IN) :: val - TYPE(la_descriptor), INTENT(IN) :: desc - IF( desc%active_node > 0 ) THEN - IF( ( i >= desc%ir ) .AND. ( i - desc%ir + 1 <= desc%nr ) ) THEN - IF( ( j >= desc%ic ) .AND. ( j - desc%ic + 1 <= desc%nc ) ) THEN - lambda_dist( i - desc%ir + 1, j - desc%ic + 1 ) = val - END IF - END IF - END IF - RETURN - END SUBROUTINE setval_lambda_x - - -!------------------------------------------------------------------------ - diff --git a/CPV/src/cpr.f90 b/CPV/src/cpr.f90 index 65219003c..d6ed17172 100644 --- a/CPV/src/cpr.f90 +++ b/CPV/src/cpr.f90 @@ -91,7 +91,7 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out ) USE gvecw, ONLY : ecutwfc USE gvect, ONLY : ecutrho USE time_step, ONLY : delt, tps, dt2, twodelt - USE cp_interfaces, ONLY : cp_print_rho, nlfh, print_lambda, prefor, dotcsc + USE cp_interfaces, ONLY : cp_print_rho, nlfh, prefor, dotcsc USE cp_main_variables, ONLY : acc, lambda, lambdam, lambdap, & ema0bg, sfac, eigr, iprint_stdout, & irb, taub, eigrb, rhog, rhos, & @@ -121,6 +121,8 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out ) ! IMPLICIT NONE ! + include 'laxlib.fh' + ! ! ... input/output variables ! REAL(DP), INTENT(OUT) :: tau_out(3,nat) @@ -564,7 +566,7 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out ) ! ! correction to displacement of ions ! - IF ( iverbosity > 1 ) CALL print_lambda( lambda, descla, nbsp, 9, 1.D0 ) + IF ( iverbosity > 1 ) CALL print_lambda( lambda, descla, nbsp, 9, nudx, 1.D0, ionode, stdout ) ! IF ( tortho ) THEN CALL updatc( ccc, lambda, phi_bgrp, bephi, becp_bgrp, bec_bgrp, cm_bgrp, descla ) @@ -972,7 +974,7 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out ) xnhp0, xnhpm, vnhp, nhpcl,nhpdim,ekincm, xnhh0, xnhhm, & vnhh, velh, fion, tps, z0t, f, rhor ) ! - IF( iverbosity > 1 ) CALL print_lambda( lambda, descla, nbsp, nbsp, 1.D0 ) + IF( iverbosity > 1 ) CALL print_lambda( lambda, descla, nbsp, nbsp, nudx, 1.D0, ionode, stdout ) ! IF (lda_plus_u) DEALLOCATE( forceh ) diff --git a/CPV/src/cprstart.f90 b/CPV/src/cprstart.f90 index eb302030e..b598939fd 100644 --- a/CPV/src/cprstart.f90 +++ b/CPV/src/cprstart.f90 @@ -27,11 +27,13 @@ PROGRAM main USE mp_images, ONLY : intra_image_comm USE mp_pools, ONLY : intra_pool_comm USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm - USE mp_diag, ONLY : mp_start_diag USE read_input, ONLY : read_input_file USE command_line_options, ONLY : input_file_, ndiag_ ! IMPLICIT NONE + ! + include 'laxlib.fh' + ! LOGICAL :: diag_in_band_group = .true. ! ! ... program starts here @@ -39,7 +41,7 @@ PROGRAM main ! ... initialize MPI (parallel processing handling) ! CALL mp_startup ( ) - CALL mp_start_diag ( ndiag_, world_comm, intra_bgrp_comm, & + CALL laxlib_start ( ndiag_, world_comm, intra_bgrp_comm, & do_distr_diag_inside_bgrp_ = diag_in_band_group ) CALL set_mpi_comm_4_solvers( intra_pool_comm, intra_bgrp_comm, & inter_bgrp_comm ) @@ -74,10 +76,7 @@ PROGRAM main ! CALL cpr_loop( 1 ) ! - CALL laxlib_free_ortho_group () - CALL stop_run() - CALL do_stop( .TRUE. ) - ! - STOP + CALL laxlib_end() + CALL stop_cp_run() ! END PROGRAM main diff --git a/CPV/src/eigs0.f90 b/CPV/src/eigs0.f90 index 8234411b4..82fb868be 100644 --- a/CPV/src/eigs0.f90 +++ b/CPV/src/eigs0.f90 @@ -15,13 +15,13 @@ use kinds, only : DP use io_global, only : stdout use constants, only : autoev - use dspev_module, only : dspev_drv, pdspev_drv USE sic_module, only : self_interaction USE descriptors, ONLY : la_descriptor USE mp, only : mp_sum, mp_bcast USE mp_global, only : intra_bgrp_comm, root_bgrp, me_bgrp implicit none + include 'laxlib.fh' ! input logical, intent(in) :: tprint, lf integer, intent(in) :: nspin, nx, nudx, nupdwn(nspin), iupdwn(nspin), nlam @@ -30,7 +30,7 @@ real(DP), intent(out) :: ei( nudx, nspin ) ! local variables real(DP), allocatable :: ap(:), wr(:) - real(DP) zr(1) + real(DP) zr(1,1) integer :: iss, j, i, ierr, k, n, ndim, nspin_eig, npaired INTEGER :: ir, ic, nr, nc, nrl, nrlx, comm, np, me logical :: tsic diff --git a/CPV/src/electrons.f90 b/CPV/src/electrons.f90 index 7f0156f59..4e2b719ab 100644 --- a/CPV/src/electrons.f90 +++ b/CPV/src/electrons.f90 @@ -9,7 +9,6 @@ MODULE electrons_module !=----------------------------------------------------------------------------=! USE kinds - USE dspev_module, ONLY: pdspev_drv, dspev_drv USE electrons_base, ONLY: nbnd, nbndx, nbsp, nbspx, nspin, nel, nelt, & nupdwn, iupdwn, telectrons_base_initval, f, & nudx, nupdwn_bgrp, iupdwn_bgrp, nudx_bgrp, & diff --git a/CPV/src/fromscra.f90 b/CPV/src/fromscra.f90 index ef108b58c..5faa4441b 100644 --- a/CPV/src/fromscra.f90 +++ b/CPV/src/fromscra.f90 @@ -22,7 +22,7 @@ SUBROUTINE from_scratch( ) cell_force, velh, at, alat USE cell_nose, ONLY : xnhh0, xnhhm, vnhh USE electrons_nose, ONLY : xnhe0, xnhem, vnhe - use electrons_base, ONLY : nbsp, f, nspin, nupdwn, iupdwn, nbsp_bgrp, nbspx_bgrp, nbspx + use electrons_base, ONLY : nbsp, f, nspin, nupdwn, iupdwn, nbsp_bgrp, nbspx_bgrp, nbspx, nudx USE electrons_module, ONLY : occn_info, distribute_c, collect_c, distribute_b, collect_b USE energies, ONLY : entropy, eself, enl, ekin, enthal, etot, ekincm USE energies, ONLY : dft_energy_type, debug_energies @@ -43,7 +43,7 @@ SUBROUTINE from_scratch( ) strucf, phfacs, nlfh, vofrho, nlfl_bgrp, prefor USE cp_interfaces, ONLY : rhoofr, ortho, wave_rand_init, elec_fakekine USE cp_interfaces, ONLY : compute_stress, dotcsc, calbec_bgrp, caldbec_bgrp - USE cp_interfaces, ONLY : print_lambda, nlfq_bgrp, setval_lambda + USE cp_interfaces, ONLY : nlfq_bgrp USE printout_base, ONLY : printout_pos USE orthogonalize_base, ONLY : updatc, calphi_bgrp USE wave_base, ONLY : wave_steepest @@ -60,6 +60,8 @@ SUBROUTINE from_scratch( ) ! IMPLICIT NONE ! + include 'laxlib.fh' + ! REAL(DP), ALLOCATABLE :: emadt2(:), emaver(:) REAL(DP) :: verl1, verl2 REAL(DP) :: bigr, dum @@ -258,7 +260,8 @@ SUBROUTINE from_scratch( ) CALL nlfl_bgrp( bec_bgrp, becdr_bgrp, lambda, descla, fion ) END IF - if ( iverbosity > 1 ) CALL print_lambda( lambda, descla, nbsp, 9, ccc ) + if ( iverbosity > 1 ) & + CALL print_lambda( lambda, descla, nbsp, 9, nudx, ccc, ionode, stdout ) ! if ( tstress ) CALL nlfh( stress, bec_bgrp, dbec, lambda, descla ) diff --git a/CPV/src/inner_loop_cold.f90 b/CPV/src/inner_loop_cold.f90 index 46bc76335..8c51469ec 100644 --- a/CPV/src/inner_loop_cold.f90 +++ b/CPV/src/inner_loop_cold.f90 @@ -40,7 +40,6 @@ USE local_pseudo, ONLY: vps, rhops USE io_global, ONLY: stdout, ionode, ionode_id USE mp_bands, ONLY: intra_bgrp_comm - USE mp_diag, ONLY: leg_ortho USE dener USE uspp, ONLY: nhsa=> nkb, betae => vkb, & rhovan => becsum, deeq, nlcc_any @@ -53,12 +52,11 @@ USE cg_module, ONLY: itercg USE cp_main_variables, ONLY: descla, drhor, drhog USE descriptors, ONLY: descla_init , la_descriptor - USE dspev_module, ONLY: pdspev_drv, dspev_drv - - ! IMPLICIT NONE + include 'laxlib.fh' + !input variables INTEGER :: nfi LOGICAL :: tfirst @@ -95,7 +93,7 @@ INTEGER :: np(2), coor_ip(2), ipr, ipc, nr, nc, ir, ic, ii, jj, root, j TYPE(la_descriptor) :: desc_ip - INTEGER :: np_rot, me_rot, comm_rot, nrlx + INTEGER :: np_rot, me_rot, comm_rot, nrlx, leg_ortho CALL start_clock( 'inner_loop') @@ -103,6 +101,7 @@ allocate(c0hc0(nrcx, nrcx, nspin)) allocate(h0c0(ngw,nx)) + CALL laxlib_getval( leg_ortho = leg_ortho ) lambdap=0.3d0!small step for free-energy calculation @@ -532,12 +531,11 @@ USE cg_module, ONLY: itercg USE cp_main_variables, ONLY: descla USE descriptors, ONLY: la_descriptor, descla_init - USE dspev_module, ONLY: pdspev_drv, dspev_drv - - ! IMPLICIT NONE + include 'laxlib.fh' + COMPLEX(kind=DP) :: c0( ngw, n ) REAL(kind=DP) :: bec( nhsa, n ) REAL(kind=DP) :: psihpsi( nrcx, nrcx, nspin ) @@ -573,7 +571,7 @@ CALL blk2cyc_redist( nss, epsi0, nrl, nss, psihpsi(1,1,is), SIZE(psihpsi,1), SIZE(psihpsi,2), descla(is) ) - CALL pdspev_drv( 'V', epsi0, nrl, dval, zaux, nrl, nrl, nss, np_rot, me_rot, comm_rot ) + CALL dspev_drv( 'V', epsi0, nrl, dval, zaux, nrl, nrl, nss, np_rot, me_rot, comm_rot ) ! IF( me_rot /= 0 ) dval = 0.0d0 ! diff --git a/CPV/src/ldaU.f90 b/CPV/src/ldaU.f90 index 9272fed0f..be9faa3ec 100644 --- a/CPV/src/ldaU.f90 +++ b/CPV/src/ldaU.f90 @@ -329,11 +329,12 @@ use gvecw, only: ngw USE ldaU_cp, ONLY: Hubbard_U, Hubbard_l, ldmx USE ldaU_cp, ONLY: nwfcU, ns, e_hubbard - use dspev_module, only : dspev_drv USE step_penalty, ONLY: write_pen implicit none + include 'laxlib.fh' + integer :: is, isp, ia, m1, m2, iat, err, k real(DP), allocatable :: ftemp1(:), ftemp2(:), f1 (:), vet (:,:) diff --git a/CPV/src/ldaUpen.f90 b/CPV/src/ldaUpen.f90 index 433e92600..06410dba2 100644 --- a/CPV/src/ldaUpen.f90 +++ b/CPV/src/ldaUpen.f90 @@ -80,9 +80,9 @@ CONTAINS USE gvecw, ONLY: ngw USE electrons_base, ONLY: nspin, n => nbsp, nx => nbspx, ispin, f USE ldaU_cp, ONLY: Hubbard_U, Hubbard_l, ldmx, nwfcU, ns - USE dspev_module, ONLY: dspev_drv ! IMPLICIT NONE + include 'laxlib.fh' INTEGER, intent(in) :: offset(nsp,nat) REAL(dp), intent(in) :: proj(nwfcU,n) COMPLEX(dp), intent(in) :: swfc(ngw,nwfcU) @@ -151,9 +151,9 @@ CONTAINS USE gvecw, ONLY: ngw USE electrons_base, ONLY: nspin, n => nbsp, nx => nbspx, ispin, f USE ldaU_cp, ONLY: Hubbard_U, Hubbard_l, ldmx, nwfcU, ns - USE dspev_module, ONLY: dspev_drv ! IMPLICIT NONE + include 'laxlib.fh' INTEGER, intent(in) :: is, iat REAL(dp), intent(in) :: dns(ldmx,ldmx,nspin,nat) REAL(dp), intent(inout) :: forceh diff --git a/CPV/src/mainvar.f90 b/CPV/src/mainvar.f90 index f1f034d93..914eb9db2 100644 --- a/CPV/src/mainvar.f90 +++ b/CPV/src/mainvar.f90 @@ -114,10 +114,13 @@ MODULE cp_main_variables gstart, nudx, tpre, nbspx_bgrp ) !------------------------------------------------------------------------ ! - USE mp_diag, ONLY: np_ortho, me_ortho, ortho_comm, ortho_comm_id, ortho_cntx USE mp_bands, ONLY: intra_bgrp_comm, me_bgrp USE mp, ONLY: mp_max, mp_min - USE descriptors, ONLY: la_descriptor, descla_init + USE descriptors, ONLY: descla_init + ! + IMPLICIT NONE + ! + include 'laxlib.fh' ! INTEGER, INTENT(IN) :: ngw, ngw_g, ngb, ngs, ng, nr1,nr2,nr3, & nnr, nrxxs, nat, nax, nsp, nspin, & @@ -129,6 +132,10 @@ MODULE cp_main_variables ! INTEGER :: iss, ierr, nlam, nrcx LOGICAL :: gzero + INTEGER :: np_ortho(2), me_ortho(2), ortho_comm, ortho_comm_id, ortho_cntx + ! + CALL laxlib_getval( np_ortho = np_ortho, me_ortho = me_ortho, ortho_comm = ortho_comm, & + ortho_comm_id = ortho_comm_id, ortho_cntx = ortho_cntx ) ! ! ... allocation of all arrays not already allocated in init and nlinit ! diff --git a/CPV/src/manycp.f90 b/CPV/src/manycp.f90 index 5e75e3d21..799cfbc9b 100644 --- a/CPV/src/manycp.f90 +++ b/CPV/src/manycp.f90 @@ -32,13 +32,14 @@ PROGRAM manycp USE mp_images, ONLY : intra_image_comm, my_image_id USE mp_pools, ONLY : intra_pool_comm USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm - USE mp_diag, ONLY : mp_start_diag USE read_input, ONLY : read_input_file USE check_stop, ONLY : check_stop_init USE command_line_options, ONLY: input_file_, ndiag_ ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER :: i LOGICAL :: opnd, diag_in_band_group = .true. CHARACTER(LEN=256) :: filin, filout @@ -47,7 +48,7 @@ PROGRAM manycp ! ! CALL mp_startup ( start_images=.true. ) - CALL mp_start_diag ( ndiag_, world_comm, intra_bgrp_comm, & + CALL laxlib_start ( ndiag_, world_comm, intra_bgrp_comm, & do_distr_diag_inside_bgrp_ = diag_in_band_group ) CALL set_mpi_comm_4_solvers( intra_pool_comm, intra_bgrp_comm, & inter_bgrp_comm ) @@ -108,10 +109,7 @@ PROGRAM manycp ! CALL cpr_loop( 1 ) ! - CALL laxlib_free_ortho_group () - CALL stop_run( ) - CALL do_stop( .TRUE. ) - ! - STOP + CALL laxlib_end () + CALL stop_cp_run( ) ! END PROGRAM manycp diff --git a/CPV/src/ortho_base.f90 b/CPV/src/ortho_base.f90 index f8729d1a1..7ea77bb19 100644 --- a/CPV/src/ortho_base.f90 +++ b/CPV/src/ortho_base.f90 @@ -43,7 +43,6 @@ CONTAINS SUBROUTINE mesure_diag_perf( n ) ! USE mp_bands, ONLY: nproc_bgrp, me_bgrp, intra_bgrp_comm, root_bgrp - USE mp_diag, ONLY: nproc_ortho, np_ortho, me_ortho, ortho_comm, ortho_cntx, ortho_comm_id USE io_global, ONLY: ionode, stdout USE mp, ONLY: mp_sum, mp_bcast, mp_barrier USE mp, ONLY: mp_max @@ -51,12 +50,18 @@ CONTAINS ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER, INTENT(IN) :: n REAL(DP), ALLOCATABLE :: s(:,:), a(:,:), d(:) REAL(DP) :: t1, tpar, tser INTEGER :: nr, nc, ir, ic, nx TYPE(la_descriptor) :: desc INTEGER, PARAMETER :: paradim = 1000 + INTEGER :: nproc_ortho, np_ortho(2), me_ortho(2), ortho_comm, ortho_comm_id, ortho_cntx + ! + CALL laxlib_getval( nproc_ortho = nproc_ortho, np_ortho = np_ortho, me_ortho = me_ortho, ortho_comm = ortho_comm, & + ortho_comm_id = ortho_comm_id, ortho_cntx = ortho_cntx ) ! ! Check if number of PEs for orthogonalization/diagonalization is given from the input ! @@ -186,8 +191,6 @@ CONTAINS USE mp_bands, ONLY: nproc_bgrp, me_bgrp, intra_bgrp_comm, & root_bgrp, my_bgrp_id, nbgrp USE mp_images, ONLY: nimage, my_image_id - USE mp_diag, ONLY: ortho_comm, nproc_ortho, np_ortho, & - me_ortho, init_ortho_group, ortho_comm_id, ortho_cntx USE io_global, ONLY: ionode, stdout USE mp, ONLY: mp_sum, mp_bcast, mp_barrier USE mp, ONLY: mp_max @@ -195,22 +198,18 @@ CONTAINS ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER, INTENT(IN) :: n ! REAL(DP), ALLOCATABLE :: c(:,:), a(:,:), b(:,:) REAL(DP) :: t1, tcan - INTEGER :: nr, nc, ir, ic, np, lnode + INTEGER :: nr, nc, ir, ic, lnode TYPE(la_descriptor) :: desc + INTEGER :: nproc_ortho, np_ortho(2), me_ortho(2), ortho_comm, ortho_comm_id, ortho_cntx ! - np = MAX( INT( SQRT( DBLE( nproc_ortho ) + 0.1d0 ) ), 1 ) - ! - ! Make ortho group compatible with the number of electronic states - ! - np = MIN( np, n ) - ! - ! Now re-define the ortho group and test the performance - ! - CALL init_ortho_group( np * np, world_comm, intra_bgrp_comm, nimage*nbgrp, my_bgrp_id + nbgrp * my_image_id ) + CALL laxlib_getval( nproc_ortho = nproc_ortho, np_ortho = np_ortho, me_ortho = me_ortho, ortho_comm = ortho_comm, & + ortho_comm_id = ortho_comm_id, ortho_cntx = ortho_cntx ) CALL descla_init( desc, n, n, np_ortho, me_ortho, ortho_comm, ortho_cntx, ortho_comm_id ) @@ -242,7 +241,7 @@ CONTAINS IF( ionode ) THEN ! WRITE( stdout, 90 ) - WRITE( stdout, 120 ) tcan, np*np + WRITE( stdout, 120 ) tcan, nproc_ortho 90 FORMAT(/,3X,'Matrix Multiplication Performances') 120 FORMAT(3X,'ortho mmul, time for parallel driver = ', 1F9.5, ' with ', I4, ' procs') ! @@ -602,11 +601,11 @@ CONTAINS USE control_flags, ONLY: iverbosity USE io_global, ONLY: stdout USE mp_bands, ONLY: intra_bgrp_comm, inter_bgrp_comm, my_bgrp_id, nbgrp - USE mp_diag, ONLY: leg_ortho USE descriptors, ONLY: la_descriptor, descla_init - USE parallel_toolkit, ONLY: dsqmsym ! IMPLICIT NONE + + include 'laxlib.fh' ! INTEGER :: nss, ist, ngwx, nkbx, n, ldx, nx COMPLEX(DP) :: cp( ngwx, n ) @@ -618,12 +617,14 @@ CONTAINS INTEGER :: i, j, ipr, ipc, nr, nc, ir, ic, npr, npc INTEGER :: ii, jj, root TYPE(la_descriptor):: desc_ip - INTEGER :: np( 2 ), coor_ip( 2 ) + INTEGER :: np( 2 ), coor_ip( 2 ), leg_ortho ! REAL(DP), ALLOCATABLE :: sigp(:,:) ! IF( nss < 1 ) RETURN + CALL laxlib_getval( leg_ortho = leg_ortho ) + np(1) = desc%npr np(2) = desc%npc @@ -690,7 +691,7 @@ CONTAINS CALL mp_sum( sig, inter_bgrp_comm ) END IF ! - CALL dsqmsym( nss, sig, nx, desc ) + CALL laxlib_dsqmsym( nss, sig, nx, desc ) ! IF( desc%active_node > 0 ) THEN ! @@ -742,12 +743,13 @@ CONTAINS USE kinds, ONLY: DP USE mp, ONLY: mp_root_sum, mp_sum USE mp_bands, ONLY: intra_bgrp_comm, me_bgrp, inter_bgrp_comm, my_bgrp_id, nbgrp - USE mp_diag, ONLY: leg_ortho USE control_flags, ONLY: iverbosity USE io_global, ONLY: stdout USE descriptors, ONLY: la_descriptor, descla_init ! IMPLICIT NONE + + include 'laxlib.fh' ! INTEGER :: nss, ist, ngwx, nkbx, ldx, n COMPLEX(DP) :: cp( ngwx, n ), phi( ngwx, n ) @@ -758,7 +760,7 @@ CONTAINS INTEGER :: i, j, ipr, ipc, nr, nc, ir, ic, npr, npc INTEGER :: ii, jj, root, nx TYPE(la_descriptor) :: desc_ip - INTEGER :: np( 2 ), coor_ip( 2 ) + INTEGER :: np( 2 ), coor_ip( 2 ), leg_ortho REAL(DP), ALLOCATABLE :: rhop(:,:) ! @@ -768,6 +770,8 @@ CONTAINS IF( nss < 1 ) RETURN + CALL laxlib_getval( leg_ortho = leg_ortho ) + np(1) = desc%npr np(2) = desc%npc @@ -882,13 +886,13 @@ CONTAINS USE mp, ONLY: mp_root_sum, mp_sum USE control_flags, ONLY: iverbosity USE io_global, ONLY: stdout - USE mp_diag, ONLY: leg_ortho USE mp_bands, ONLY: intra_bgrp_comm, inter_bgrp_comm, my_bgrp_id, nbgrp USE descriptors, ONLY: la_descriptor, descla_init - USE parallel_toolkit, ONLY: dsqmsym ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER :: nss, ist, ngwx, nkbx, n, ldx, nx COMPLEX(DP) :: phi( ngwx, n ) REAL(DP) :: bephi( nkbx, ldx ), qbephi( nkbx, ldx ) @@ -898,11 +902,13 @@ CONTAINS INTEGER :: i, j, ipr, ipc, nr, nc, ir, ic, npr, npc INTEGER :: ii, jj, root TYPE(la_descriptor) :: desc_ip - INTEGER :: np( 2 ), coor_ip( 2 ) + INTEGER :: np( 2 ), coor_ip( 2 ), leg_ortho REAL(DP), ALLOCATABLE :: taup( :, : ) ! IF( nss < 1 ) RETURN + + CALL laxlib_getval( leg_ortho = leg_ortho ) ! ! get dimensions of the square processor grid ! @@ -979,7 +985,7 @@ CONTAINS CALL mp_sum( tau, inter_bgrp_comm ) END IF ! - CALL dsqmsym( nss, tau, nx, desc ) + CALL laxlib_dsqmsym( nss, tau, nx, desc ) ! IF( desc%active_node > 0 ) THEN ! @@ -1031,11 +1037,12 @@ CONTAINS USE control_flags, ONLY: iverbosity USE mp, ONLY: mp_sum, mp_bcast USE mp_bands, ONLY: intra_bgrp_comm, me_bgrp, inter_bgrp_comm - USE mp_diag, ONLY: leg_ortho USE electrons_base, ONLY: nbspx_bgrp, ibgrp_g2l, nbsp, nspin, nupdwn, iupdwn, nbspx USE descriptors, ONLY: descla_init, la_descriptor ! IMPLICIT NONE + + include 'laxlib.fh' ! TYPE(la_descriptor), INTENT(IN) :: desc( : ) COMPLEX(DP) :: cp_bgrp( :, : ), phi( :, : ) @@ -1051,11 +1058,13 @@ CONTAINS INTEGER :: ibgrp_i, ibgrp_i_first, nbgrp_i, i_first REAL(DP), ALLOCATABLE :: xd(:,:) REAL(DP), ALLOCATABLE :: bephi_tmp(:,:) - INTEGER :: np( 2 ), coor_ip( 2 ) + INTEGER :: np( 2 ), coor_ip( 2 ), leg_ortho TYPE(la_descriptor) :: desc_ip CALL start_clock( 'updatc' ) + CALL laxlib_getval( leg_ortho = leg_ortho ) + DO iss = 1, nspin ! ! size of the local block @@ -1364,19 +1373,22 @@ CONTAINS USE uspp, ONLY: nkb, nkbus USE mp, ONLY: mp_sum USE mp_bands, ONLY: intra_bgrp_comm, me_bgrp, inter_bgrp_comm - USE mp_diag, ONLY: leg_ortho USE electrons_base, ONLY: nbspx_bgrp, ibgrp_g2l, nspin USE descriptors, ONLY: la_descriptor ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER, INTENT(IN) :: nrcx TYPE(la_descriptor), INTENT(IN) :: desc( : ) REAL(DP), INTENT(IN) :: bec_bgrp(:,:) REAL(DP), INTENT(OUT) :: bec_ortho(:,:) ! - INTEGER :: ir, nr, i, ibgrp_i, nup + INTEGER :: ir, nr, i, ibgrp_i, nup, leg_ortho ! + CALL laxlib_getval( leg_ortho = leg_ortho ) + bec_ortho = 0.0d0 ! IF( desc( 1 )%active_node > 0 ) THEN diff --git a/CPV/src/restart.f90 b/CPV/src/restart.f90 index 0288b5592..a1ba5ef3c 100644 --- a/CPV/src/restart.f90 +++ b/CPV/src/restart.f90 @@ -238,11 +238,13 @@ USE kinds, ONLY: DP USE electrons_base, ONLY: nupdwn, nspin, iupdwn, nudx USE electrons_module, ONLY: ei - USE cp_interfaces, ONLY: crot, collect_lambda + USE cp_interfaces, ONLY: crot USE descriptors, ONLY: la_descriptor ! IMPLICIT NONE ! + include 'laxlib.fh' + ! COMPLEX(DP), INTENT(IN) :: c0(:,:) COMPLEX(DP), INTENT(OUT) :: ctot(:,:) REAL(DP), INTENT(IN) :: lambda(:,:,:) diff --git a/CPV/src/stop_run.f90 b/CPV/src/stop_run.f90 index 5120b2d34..e669d10a5 100644 --- a/CPV/src/stop_run.f90 +++ b/CPV/src/stop_run.f90 @@ -7,7 +7,7 @@ ! ! !---------------------------------------------------------------------------- -SUBROUTINE stop_run() +SUBROUTINE stop_cp_run() !---------------------------------------------------------------------------- ! ! ... Close all files and synchronize processes before stopping. @@ -19,7 +19,6 @@ SUBROUTINE stop_run() ! IMPLICIT NONE ! - ! CALL environment_end( 'CP' ) ! CALL deallocate_modules_var() @@ -30,17 +29,6 @@ SUBROUTINE stop_run() ! CALL mp_global_end() ! -END SUBROUTINE stop_run - -SUBROUTINE do_stop( flag ) - IMPLICIT NONE + STOP 0 ! - LOGICAL, INTENT(IN) :: flag - ! - IF ( flag ) THEN - STOP - ELSE - STOP 1 - END IF - ! -END SUBROUTINE do_stop +END SUBROUTINE stop_cp_run diff --git a/CPV/src/wannier.f90 b/CPV/src/wannier.f90 index 78cacb708..c70e437ad 100644 --- a/CPV/src/wannier.f90 +++ b/CPV/src/wannier.f90 @@ -385,7 +385,7 @@ MODULE wannier_subroutines ! CALL write_rho_g( rhog ) ! - CALL stop_run( .TRUE. ) + CALL stop_cp_run() ! END IF ! @@ -436,7 +436,7 @@ MODULE wannier_subroutines CALL rhoofr (nfi,cm, irb, eigrb,bec,dbec,rhovan,rhor,drhor,rhog,drhog,rhos,enl,denl,ekin,dekin6,.false.,j) END DO ! - CALL stop_run( .TRUE. ) + CALL stop_cp_run() ! END IF ! @@ -448,7 +448,7 @@ MODULE wannier_subroutines ! CALL wf (calwf,cm,bec,eigr,eigrb,taub,irb,b1,b2,b3,utwf,what1,wfc,jwf,ibrav) ! - CALL stop_run( .TRUE. ) + CALL stop_cp_run( ) ! END IF ! @@ -457,7 +457,7 @@ MODULE wannier_subroutines jwf=iplot(1) CALL wf (calwf,cm,bec,eigr,eigrb,taub,irb,b1,b2,b3,utwf,what1,wfc,jwf,ibrav) ! - CALL stop_run( .TRUE. ) + CALL stop_cp_run( ) ! END IF ! @@ -746,7 +746,7 @@ MODULE wannier_subroutines vnhh, velh, fion, tps, mat_z, occ_f, rho ) ! CALL stop_clock('wf_close_opt') - CALL stop_run( .TRUE. ) + CALL stop_cp_run( ) ! END IF ! diff --git a/CPV/src/wave.f90 b/CPV/src/wave.f90 index 7ca4d36b1..3053f6cd4 100644 --- a/CPV/src/wave.f90 +++ b/CPV/src/wave.f90 @@ -170,7 +170,6 @@ USE kinds, ONLY: DP USE mp, ONLY: mp_bcast USE mp_global, ONLY: nproc_bgrp, me_bgrp, intra_bgrp_comm - USE dspev_module, ONLY: pdspev_drv, dspev_drv IMPLICIT NONE @@ -263,10 +262,11 @@ USE kinds, ONLY: DP USE mp, ONLY: mp_bcast USE mp_global, ONLY: nproc_bgrp, me_bgrp, intra_bgrp_comm - USE dspev_module, ONLY: dspev_drv IMPLICIT NONE + include 'laxlib.fh' + ! ... declare subroutine arguments INTEGER, INTENT(IN) :: ngw, n, nx, noffr, noff diff --git a/CPV/src/wf.f90 b/CPV/src/wf.f90 index 9e3358a02..d5a702bd8 100644 --- a/CPV/src/wf.f90 +++ b/CPV/src/wf.f90 @@ -550,7 +550,7 @@ SUBROUTINE wf( clwf, c, bec, eigr, eigrb, taub, irb, & END DO CLOSE(38) END IF - CALL stop_run( .TRUE. ) + CALL stop_cp_run( ) END IF IF(clwf.EQ.3.OR.clwf.EQ.4) THEN @@ -2795,7 +2795,7 @@ SUBROUTINE write_psi( c, jw ) IF( ionode ) WRITE( stdout, * ) "State Written", jw ! - CALL stop_run( .TRUE. ) + CALL stop_cp_run( ) ! RETURN ! @@ -3025,7 +3025,6 @@ END SUBROUTINE jacobi_rotation USE constants, ONLY : tpi, autoaf => BOHR_RADIUS_ANGS USE mp_global, ONLY : nproc_image, me_image, intra_image_comm USE cp_main_variables, ONLY: descla - USE cp_interfaces, ONLY: distribute_lambda, collect_lambda USE printout_base, ONLY : printout_base_open, printout_base_unit, printout_base_close USE cp_main_variables, ONLY : nfi, iprint_stdout USE time_step, ONLY : tps @@ -3035,6 +3034,8 @@ END SUBROUTINE jacobi_rotation USE io_global, ONLY : ionode, stdout IMPLICIT NONE + + include 'laxlib.fh' INTEGER , INTENT(in) :: nbsp REAL(DP), INTENT(out) :: U(nbsp,nbsp) diff --git a/GWW/pw4gww/diago_cg.f90 b/GWW/pw4gww/diago_cg.f90 index a2bfe99fc..ddd7cd7a6 100644 --- a/GWW/pw4gww/diago_cg.f90 +++ b/GWW/pw4gww/diago_cg.f90 @@ -25,6 +25,8 @@ SUBROUTINE diago_cg(ndim,omat,maxter,max_state,e,ovec,cutoff,ethr,found_state,l_ ! IMPLICIT NONE ! + include 'laxlib.fh' + ! ! ... I/O variables ! @@ -154,7 +156,7 @@ SUBROUTINE diago_cg(ndim,omat,maxter,max_state,e,ovec,cutoff,ethr,found_state,l_ write(stdout,*) 'Call rdiaghg' FLUSH(stdout) - CALL rdiaghg( max_state, max_state, hr, sr, max_state, en, hr(1,1,2), me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( max_state, max_state, hr(:,:,1), sr, max_state, en, hr(:,:,2), me_bgrp, root_bgrp, intra_bgrp_comm ) write(stdout,*) 'Done' FLUSH(stdout) diff --git a/GWW/pw4gww/diago_cg_g.f90 b/GWW/pw4gww/diago_cg_g.f90 index 2940a0833..42056775b 100644 --- a/GWW/pw4gww/diago_cg_g.f90 +++ b/GWW/pw4gww/diago_cg_g.f90 @@ -25,6 +25,8 @@ SUBROUTINE diago_cg_g(ndim,omat,smat,maxter,max_state,e,ovec,cutoff,ethr,found_s ! IMPLICIT NONE ! + include 'laxlib.fh' + ! ! ... I/O variables ! @@ -147,7 +149,7 @@ SUBROUTINE diago_cg_g(ndim,omat,smat,maxter,max_state,e,ovec,cutoff,ethr,found_s write(stdout,*) 'Call rdiaghg' FLUSH(stdout) - CALL rdiaghg( max_state, max_state, hr, sr, max_state, en, hr(1,1,2), me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( max_state, max_state, hr(:,:,1), sr, max_state, en, hr(:,:,2), me_bgrp, root_bgrp, intra_bgrp_comm ) write(stdout,*) 'Done' FLUSH(stdout) diff --git a/GWW/pw4gww/o_rinitcgg.f90 b/GWW/pw4gww/o_rinitcgg.f90 index 6aa34a37c..a0c5ecd70 100644 --- a/GWW/pw4gww/o_rinitcgg.f90 +++ b/GWW/pw4gww/o_rinitcgg.f90 @@ -26,6 +26,8 @@ SUBROUTINE o_rinitcgg( npwx, npw, nstart, numwp, psi, o_evc, e, numv, v_states,h ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER :: npw, npwx, nstart, numwp ! dimension of the matrix to be diagonalized ! leading dimension of matrix psi, as declared in the calling pgm unit @@ -117,7 +119,7 @@ SUBROUTINE o_rinitcgg( npwx, npw, nstart, numwp, psi, o_evc, e, numv, v_states,h write(stdout,*) 'Call rdiaghg' FLUSH(stdout) - CALL rdiaghg( nstart, numwp, hr, sr, nstart, en, hr(1,1,2), me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nstart, numwp, hr(:,:,1), sr, nstart, en, hr(:,:,2), me_bgrp, root_bgrp, intra_bgrp_comm ) write(stdout,*) 'Done' FLUSH(stdout) diff --git a/HP/src/hp_main.f90 b/HP/src/hp_main.f90 index 09b5b4c89..e36e4a74e 100644 --- a/HP/src/hp_main.f90 +++ b/HP/src/hp_main.f90 @@ -18,7 +18,6 @@ PROGRAM hp_main USE mp_world, ONLY : world_comm USE mp_pools, ONLY : intra_pool_comm USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm - USE mp_diag, ONLY : mp_start_diag USE command_line_options, ONLY : input_file_, ndiag_ USE environment, ONLY : environment_start, environment_end USE ions_base, ONLY : nat, ityp, atm, tau, amass @@ -29,13 +28,15 @@ PROGRAM hp_main ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER :: iq, na, ipol LOGICAL :: do_iq, setup_pw ! ! Initialize MPI, clocks, print initial messages ! CALL mp_startup() - CALL mp_start_diag ( ndiag_, world_comm, intra_bgrp_comm, & + CALL laxlib_start ( ndiag_, world_comm, intra_bgrp_comm, & do_distr_diag_inside_bgrp_ = .true. ) CALL set_mpi_comm_4_solvers( intra_pool_comm, intra_bgrp_comm, & inter_bgrp_comm ) @@ -226,7 +227,7 @@ PROGRAM hp_main ! CALL environment_end(code) ! - CALL laxlib_free_ortho_group() + CALL laxlib_end() CALL mp_global_end() ! 3336 FORMAT(' ',69('=')) diff --git a/KS_Solvers/CG/rotate_wfc_gamma.f90 b/KS_Solvers/CG/rotate_wfc_gamma.f90 index d7bc8d597..ee58fd733 100644 --- a/KS_Solvers/CG/rotate_wfc_gamma.f90 +++ b/KS_Solvers/CG/rotate_wfc_gamma.f90 @@ -23,6 +23,8 @@ SUBROUTINE rotate_wfc_gamma( h_psi, s_psi, overlap, & ! IMPLICIT NONE ! + INCLUDE 'laxlib.fh' + ! ! ... I/O variables ! INTEGER :: npw, npwx, nstart, nbnd @@ -113,7 +115,7 @@ SUBROUTINE rotate_wfc_gamma( h_psi, s_psi, overlap, & ! ... Diagonalize ! call start_clock('rotwfcg:diag'); !write(*,*) 'start rotwfcg:diag' ; FLUSH(6) - CALL rdiaghg( nstart, nbnd, hr, sr, nstart, en, vr, me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nstart, nbnd, hr, sr, nstart, en, vr, me_bgrp, root_bgrp, intra_bgrp_comm ) call stop_clock('rotwfcg:diag'); !write(*,*) 'stop rotwfcg:diag' ; FLUSH(6) call start_clock('rotwfcg:evc'); !write(*,*) 'start rotwfcg:evc' ; FLUSH(6) ! @@ -160,15 +162,13 @@ SUBROUTINE protate_wfc_gamma( h_psi, s_psi, overlap, & USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id,& nbgrp, my_bgrp_id USE mp_bands_util, ONLY : gstart ! index of the first nonzero G - USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho, & - ortho_parent_comm, ortho_cntx, do_distr_diag_inside_bgrp USE descriptors, ONLY : la_descriptor, descla_init - USE parallel_toolkit, ONLY : dsqmsym USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum, mp_barrier - ! IMPLICIT NONE ! + include 'laxlib.fh' + ! ! ... I/O variables ! INTEGER :: npw, npwx, nstart, nbnd @@ -198,6 +198,9 @@ SUBROUTINE protate_wfc_gamma( h_psi, s_psi, overlap, & ! flag to distinguish procs involved in linear algebra TYPE(la_descriptor), ALLOCATABLE :: desc_ip( :, : ) INTEGER, ALLOCATABLE :: rank_ip( :, : ) + INTEGER :: ortho_comm, np_ortho(2), me_ortho(2), ortho_comm_id, leg_ortho, & + ortho_parent_comm, ortho_cntx + LOGICAL :: do_distr_diag_inside_bgrp ! EXTERNAL h_psi, s_psi ! h_psi(npwx,npw,nvec,psi,hpsi) @@ -207,6 +210,11 @@ SUBROUTINE protate_wfc_gamma( h_psi, s_psi, overlap, & ! Vectors psi,hpsi,spsi are dimensioned (npwx,npol,nvec) call start_clock('protwfcg'); !write(*,*) 'start protwfcg' ; FLUSH(6) + ! + CALL laxlib_getval( np_ortho = np_ortho, me_ortho = me_ortho, ortho_comm = ortho_comm, & + leg_ortho = leg_ortho, ortho_comm_id = ortho_comm_id, ortho_parent_comm = ortho_parent_comm, & + ortho_cntx = ortho_cntx, do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp ) + ! ALLOCATE( desc_ip( np_ortho(1), np_ortho(2) ) ) ALLOCATE( rank_ip( np_ortho(1), np_ortho(2) ) ) @@ -257,15 +265,15 @@ SUBROUTINE protate_wfc_gamma( h_psi, s_psi, overlap, & ! ... Diagonalize ! call start_clock('protwfcg:diag'); !write(*,*) 'start protwfcg:diag' ; FLUSH(6) - IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of prdiaghg en and vr are the same across ortho_parent_comm + IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg en and vr are the same across ortho_parent_comm ! only the first bgrp performs the diagonalization - IF( my_bgrp_id == root_bgrp_id ) CALL prdiaghg( nstart, hr, sr, nx, en, vr, desc ) + IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nstart, hr, sr, nx, en, vr, desc ) IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups CALL mp_bcast( vr, root_bgrp_id, inter_bgrp_comm ) CALL mp_bcast( en, root_bgrp_id, inter_bgrp_comm ) ENDIF ELSE - CALL prdiaghg( nstart, hr, sr, nx, en, vr, desc ) + CALL pdiaghg( nstart, hr, sr, nx, en, vr, desc ) END IF call stop_clock('protwfcg:diag'); !write(*,*) 'stop protwfcg:diag' ; FLUSH(6) ! @@ -374,7 +382,7 @@ CONTAINS if (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) dm = dm/nbgrp ! - CALL dsqmsym( nstart, dm, nx, desc ) + CALL laxlib_dsqmsym( nstart, dm, nx, desc ) ! DEALLOCATE( work ) ! diff --git a/KS_Solvers/CG/rotate_wfc_k.f90 b/KS_Solvers/CG/rotate_wfc_k.f90 index c3e5cd4a1..633fbfeca 100644 --- a/KS_Solvers/CG/rotate_wfc_k.f90 +++ b/KS_Solvers/CG/rotate_wfc_k.f90 @@ -20,6 +20,8 @@ SUBROUTINE rotate_wfc_k( h_psi, s_psi, overlap, & ! IMPLICIT NONE ! + INCLUDE 'laxlib.fh' + ! ! ... I/O variables ! INTEGER, INTENT(IN) :: npw, npwx, nstart, nbnd, npol @@ -108,7 +110,7 @@ SUBROUTINE rotate_wfc_k( h_psi, s_psi, overlap, & ! ... Diagonalize ! call start_clock('rotwfck:diag'); !write(*,*) 'start rotwfck:diag';FLUSH(6) - CALL cdiaghg( nstart, nbnd, hc, sc, nstart, en, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nstart, nbnd, hc, sc, nstart, en, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) call stop_clock('rotwfck:diag'); !write(*,*) 'stop rotwfck:diag';FLUSH(6) call start_clock('rotwfck:evc'); !write(*,*) 'start rotwfck:evc';FLUSH(6) ! @@ -152,14 +154,13 @@ SUBROUTINE protate_wfc_k( h_psi, s_psi, overlap, & USE cg_param, ONLY : DP USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id,& nbgrp, my_bgrp_id - USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho, & - ortho_parent_comm, ortho_cntx, do_distr_diag_inside_bgrp USE descriptors, ONLY : la_descriptor, descla_init - USE parallel_toolkit, ONLY : zsqmher USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum, mp_barrier ! IMPLICIT NONE ! + include 'laxlib.fh' + ! ! ... I/O variables ! INTEGER :: npw, npwx, nstart, nbnd, npol @@ -191,6 +192,10 @@ SUBROUTINE protate_wfc_k( h_psi, s_psi, overlap, & TYPE(la_descriptor), ALLOCATABLE :: desc_ip( :, : ) INTEGER, ALLOCATABLE :: rank_ip( :, : ) ! + INTEGER :: ortho_comm, np_ortho(2), me_ortho(2), ortho_comm_id, leg_ortho, & + ortho_parent_comm, ortho_cntx + LOGICAL :: do_distr_diag_inside_bgrp + ! EXTERNAL h_psi, s_psi ! h_psi(npwx,npw,nvec,psi,hpsi) ! calculates H|psi> @@ -200,6 +205,10 @@ SUBROUTINE protate_wfc_k( h_psi, s_psi, overlap, & call start_clock('protwfck') ! + CALL laxlib_getval( np_ortho = np_ortho, me_ortho = me_ortho, ortho_comm = ortho_comm, & + leg_ortho = leg_ortho, ortho_comm_id = ortho_comm_id, ortho_parent_comm = ortho_parent_comm, & + ortho_cntx = ortho_cntx, do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp ) + ! ALLOCATE( desc_ip( np_ortho(1), np_ortho(2) ) ) ALLOCATE( rank_ip( np_ortho(1), np_ortho(2) ) ) ! @@ -251,15 +260,15 @@ SUBROUTINE protate_wfc_k( h_psi, s_psi, overlap, & ! ... Diagonalize ! call start_clock('protwfck:diag') - IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pcdiaghg en and vc are the same across ortho_parent_comm + IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg en and vc are the same across ortho_parent_comm ! only the first bgrp performs the diagonalization - IF( my_bgrp_id == root_bgrp_id ) CALL pcdiaghg( nstart, hc, sc, nx, en, vc, desc ) + IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nstart, hc, sc, nx, en, vc, desc ) IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups CALL mp_bcast( vc, root_bgrp_id, inter_bgrp_comm ) CALL mp_bcast( en, root_bgrp_id, inter_bgrp_comm ) ENDIF ELSE - CALL pcdiaghg( nstart, hc, sc, nx, en, vc, desc ) + CALL pdiaghg( nstart, hc, sc, nx, en, vc, desc ) END IF call stop_clock('protwfck:diag') ! @@ -364,7 +373,7 @@ CONTAINS END DO if (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) dm = dm/nbgrp ! - CALL zsqmher( nstart, dm, nx, desc ) + CALL laxlib_zsqmher( nstart, dm, nx, desc ) ! DEALLOCATE( work ) ! diff --git a/KS_Solvers/Davidson/cegterg.f90 b/KS_Solvers/Davidson/cegterg.f90 index d8594bd27..369f35d54 100644 --- a/KS_Solvers/Davidson/cegterg.f90 +++ b/KS_Solvers/Davidson/cegterg.f90 @@ -29,6 +29,8 @@ SUBROUTINE cegterg( h_psi, s_psi, uspp, g_psi, & ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx, npol ! dimension of the matrix to be diagonalized ! leading dimension of matrix evc, as declared in the calling pgm unit @@ -240,7 +242,7 @@ SUBROUTINE cegterg( h_psi, s_psi, uspp, g_psi, & ! CALL start_clock( 'cegterg:diag' ) IF( my_bgrp_id == root_bgrp_id ) THEN - CALL cdiaghg( nbase, nvec, hc, sc, nvecx, ew, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nbase, nvec, hc, sc, nvecx, ew, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) END IF IF( nbgrp > 1 ) THEN CALL mp_bcast( vc, root_bgrp_id, inter_bgrp_comm ) @@ -431,7 +433,7 @@ SUBROUTINE cegterg( h_psi, s_psi, uspp, g_psi, & ! CALL start_clock( 'cegterg:diag' ) IF( my_bgrp_id == root_bgrp_id ) THEN - CALL cdiaghg( nbase, nvec, hc, sc, nvecx, ew, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nbase, nvec, hc, sc, nvecx, ew, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) END IF IF( nbgrp > 1 ) THEN CALL mp_bcast( vc, root_bgrp_id, inter_bgrp_comm ) @@ -580,15 +582,14 @@ SUBROUTINE pcegterg(h_psi, s_psi, uspp, g_psi, & ! USE david_param, ONLY : DP, stdout USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id, nbgrp, my_bgrp_id - USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho, & - ortho_parent_comm, ortho_cntx, do_distr_diag_inside_bgrp USE descriptors, ONLY : la_descriptor, descla_init , descla_local_dims - USE parallel_toolkit, ONLY : zsqmred, zsqmher, zsqmdst USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum, mp_barrier, & mp_size, mp_type_free, mp_allgather ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx, npol ! dimension of the matrix to be diagonalized ! leading dimension of matrix evc, as declared in the calling pgm unit @@ -653,6 +654,10 @@ SUBROUTINE pcegterg(h_psi, s_psi, uspp, g_psi, & INTEGER, ALLOCATABLE :: notcnv_ip( : ) INTEGER, ALLOCATABLE :: ic_notcnv( : ) ! + INTEGER :: ortho_comm, np_ortho(2), me_ortho(2), ortho_comm_id, leg_ortho, & + ortho_parent_comm, ortho_cntx + LOGICAL :: do_distr_diag_inside_bgrp + ! REAL(DP), EXTERNAL :: ddot ! EXTERNAL h_psi, s_psi, g_psi @@ -668,6 +673,10 @@ SUBROUTINE pcegterg(h_psi, s_psi, uspp, g_psi, & ! CALL start_clock( 'cegterg' ) ! + CALL laxlib_getval( np_ortho = np_ortho, me_ortho = me_ortho, ortho_comm = ortho_comm, & + leg_ortho = leg_ortho, ortho_comm_id = ortho_comm_id, ortho_parent_comm = ortho_parent_comm, & + ortho_cntx = ortho_cntx, do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp ) + ! IF ( nvec > nvecx / 2 ) CALL errore( 'pcegterg', 'nvecx is too small', 1 ) ! ! ... threshold for empty bands @@ -812,15 +821,15 @@ SUBROUTINE pcegterg(h_psi, s_psi, uspp, g_psi, & ! Calling block parallel algorithm ! CALL start_clock( 'cegterg:diag' ) - IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pcdiaghg ew and vl are the same across ortho_parent_comm + IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg ew and vl are the same across ortho_parent_comm ! only the first bgrp performs the diagonalization - IF( my_bgrp_id == root_bgrp_id ) CALL pcdiaghg( nbase, hl, sl, nx, ew, vl, desc ) + IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nbase, hl, sl, nx, ew, vl, desc ) IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm ) CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) ENDIF ELSE - CALL pcdiaghg( nbase, hl, sl, nx, ew, vl, desc ) + CALL pdiaghg( nbase, hl, sl, nx, ew, vl, desc ) END IF CALL stop_clock( 'cegterg:diag' ) ! @@ -914,7 +923,7 @@ SUBROUTINE pcegterg(h_psi, s_psi, uspp, g_psi, & IF( ierr /= 0 ) & CALL errore( ' pcegterg ',' cannot allocate hl ', ABS(ierr) ) - CALL zsqmred( nbase, vl, desc_old%nrcx, desc_old, nbase+notcnv, hl, nx, desc ) + CALL laxlib_zsqmred( nbase, vl, desc_old%nrcx, desc_old, nbase+notcnv, hl, nx, desc ) vl = sl DEALLOCATE( sl ) @@ -922,7 +931,7 @@ SUBROUTINE pcegterg(h_psi, s_psi, uspp, g_psi, & IF( ierr /= 0 ) & CALL errore( ' pcegterg ',' cannot allocate sl ', ABS(ierr) ) - CALL zsqmred( nbase, vl, desc_old%nrcx, desc_old, nbase+notcnv, sl, nx, desc ) + CALL laxlib_zsqmred( nbase, vl, desc_old%nrcx, desc_old, nbase+notcnv, sl, nx, desc ) DEALLOCATE( vl ) ALLOCATE( vl( nx , nx ), STAT=ierr ) @@ -952,15 +961,15 @@ SUBROUTINE pcegterg(h_psi, s_psi, uspp, g_psi, & ! Call block parallel algorithm ! CALL start_clock( 'cegterg:diag' ) - IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pcdiaghg ew and vl are the same across ortho_parent_comm + IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg ew and vl are the same across ortho_parent_comm ! only the first bgrp performs the diagonalization - IF( my_bgrp_id == root_bgrp_id ) CALL pcdiaghg( nbase, hl, sl, nx, ew, vl, desc ) + IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nbase, hl, sl, nx, ew, vl, desc ) IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm ) CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) ENDIF ELSE - CALL pcdiaghg( nbase, hl, sl, nx, ew, vl, desc ) + CALL pdiaghg( nbase, hl, sl, nx, ew, vl, desc ) END IF CALL stop_clock( 'cegterg:diag' ) ! @@ -1496,7 +1505,7 @@ CONTAINS ! ! The matrix is hermitianized using upper triangle ! - CALL zsqmher( nbase, dm, nx, desc ) + CALL laxlib_zsqmher( nbase, dm, nx, desc ) ! DEALLOCATE( work ) ! @@ -1558,7 +1567,7 @@ CONTAINS ! END DO ! - CALL zsqmher( nbase+notcnv, dm, nx, desc ) + CALL laxlib_zsqmher( nbase+notcnv, dm, nx, desc ) ! DEALLOCATE( vtmp ) RETURN diff --git a/KS_Solvers/Davidson/regterg.f90 b/KS_Solvers/Davidson/regterg.f90 index 1622771c6..94869b493 100644 --- a/KS_Solvers/Davidson/regterg.f90 +++ b/KS_Solvers/Davidson/regterg.f90 @@ -31,6 +31,8 @@ SUBROUTINE regterg( h_psi, s_psi, uspp, g_psi, & ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx ! dimension of the matrix to be diagonalized ! leading dimension of matrix evc, as declared in the calling pgm unit @@ -205,7 +207,7 @@ SUBROUTINE regterg( h_psi, s_psi, uspp, g_psi, & ! CALL start_clock( 'regterg:diag' ) IF( my_bgrp_id == root_bgrp_id ) THEN - CALL rdiaghg( nbase, nvec, hr, sr, nvecx, ew, vr, me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nbase, nvec, hr, sr, nvecx, ew, vr, me_bgrp, root_bgrp, intra_bgrp_comm ) END IF IF( nbgrp > 1 ) THEN CALL mp_bcast( vr, root_bgrp_id, inter_bgrp_comm ) @@ -366,7 +368,7 @@ SUBROUTINE regterg( h_psi, s_psi, uspp, g_psi, & ! CALL start_clock( 'regterg:diag' ) IF( my_bgrp_id == root_bgrp_id ) THEN - CALL rdiaghg( nbase, nvec, hr, sr, nvecx, ew, vr, me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nbase, nvec, hr, sr, nvecx, ew, vr, me_bgrp, root_bgrp, intra_bgrp_comm ) END IF IF( nbgrp > 1 ) THEN CALL mp_bcast( vr, root_bgrp_id, inter_bgrp_comm ) @@ -516,14 +518,13 @@ SUBROUTINE pregterg(h_psi, s_psi, uspp, g_psi, & USE david_param, ONLY : DP, stdout USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id, nbgrp, my_bgrp_id USE mp_bands_util, ONLY : gstart - USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho, & - ortho_parent_comm, ortho_cntx, do_distr_diag_inside_bgrp USE descriptors, ONLY : la_descriptor, descla_init, descla_local_dims - USE parallel_toolkit, ONLY : dsqmdst, dsqmcll, dsqmred, dsqmsym USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER, INTENT(IN) :: npw, npwx, nvec, nvecx ! dimension of the matrix to be diagonalized ! leading dimension of matrix evc, as declared in the calling pgm unit @@ -585,6 +586,10 @@ SUBROUTINE pregterg(h_psi, s_psi, uspp, g_psi, & INTEGER, ALLOCATABLE :: notcnv_ip( : ) INTEGER, ALLOCATABLE :: ic_notcnv( : ) ! + INTEGER :: ortho_comm, np_ortho(2), me_ortho(2), ortho_comm_id, leg_ortho, & + ortho_parent_comm, ortho_cntx + LOGICAL :: do_distr_diag_inside_bgrp + ! REAL(DP), EXTERNAL :: ddot ! EXTERNAL h_psi, s_psi, g_psi @@ -599,7 +604,11 @@ SUBROUTINE pregterg(h_psi, s_psi, uspp, g_psi, & ! ! CALL start_clock( 'regterg' ) - ! + ! + CALL laxlib_getval( np_ortho = np_ortho, me_ortho = me_ortho, ortho_comm = ortho_comm, & + leg_ortho = leg_ortho, ortho_comm_id = ortho_comm_id, ortho_parent_comm = ortho_parent_comm, & + ortho_cntx = ortho_cntx, do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp ) + ! IF ( nvec > nvecx / 2 ) CALL errore( 'pregter', 'nvecx is too small', 1 ) ! IF ( gstart == -1 ) CALL errore( 'pregter', 'gstart variable not initialized', 1 ) @@ -739,15 +748,15 @@ SUBROUTINE pregterg(h_psi, s_psi, uspp, g_psi, & ! Calling block parallel algorithm ! CALL start_clock( 'regterg:diag' ) - IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of prdiaghg ew and vl are the same across ortho_parent_comm + IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg ew and vl are the same across ortho_parent_comm ! only the first bgrp performs the diagonalization - IF( my_bgrp_id == root_bgrp_id ) CALL prdiaghg( nbase, hl, sl, nx, ew, vl, desc ) + IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nbase, hl, sl, nx, ew, vl, desc ) IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm ) CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) ENDIF ELSE - CALL prdiaghg( nbase, hl, sl, nx, ew, vl, desc ) + CALL pdiaghg( nbase, hl, sl, nx, ew, vl, desc ) END IF CALL stop_clock( 'regterg:diag' ) ! @@ -829,7 +838,7 @@ SUBROUTINE pregterg(h_psi, s_psi, uspp, g_psi, & IF( ierr /= 0 ) & CALL errore( 'pregterg ',' cannot allocate hl ', ABS(ierr) ) - CALL dsqmred( nbase, vl, desc_old%nrcx, desc_old, nbase+notcnv, hl, nx, desc ) + CALL laxlib_dsqmred( nbase, vl, desc_old%nrcx, desc_old, nbase+notcnv, hl, nx, desc ) vl = sl DEALLOCATE( sl ) @@ -837,7 +846,7 @@ SUBROUTINE pregterg(h_psi, s_psi, uspp, g_psi, & IF( ierr /= 0 ) & CALL errore( 'pregterg ',' cannot allocate sl ', ABS(ierr) ) - CALL dsqmred( nbase, vl, desc_old%nrcx, desc_old, nbase+notcnv, sl, nx, desc ) + CALL laxlib_dsqmred( nbase, vl, desc_old%nrcx, desc_old, nbase+notcnv, sl, nx, desc ) DEALLOCATE( vl ) ALLOCATE( vl( nx , nx ), STAT=ierr ) @@ -867,15 +876,15 @@ SUBROUTINE pregterg(h_psi, s_psi, uspp, g_psi, & ! Call block parallel algorithm ! CALL start_clock( 'regterg:diag' ) - IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of prdiaghg ew and vl are the same across ortho_parent_comm + IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg ew and vl are the same across ortho_parent_comm ! only the first bgrp performs the diagonalization - IF( my_bgrp_id == root_bgrp_id ) CALL prdiaghg( nbase, hl, sl, nx, ew, vl, desc ) + IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nbase, hl, sl, nx, ew, vl, desc ) IF( nbgrp > 1 ) THEN ! results must be brodcast to the other bnd groups CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm ) CALL mp_bcast( ew, root_bgrp_id, inter_bgrp_comm ) ENDIF ELSE - CALL prdiaghg( nbase, hl, sl, nx, ew, vl, desc ) + CALL pdiaghg( nbase, hl, sl, nx, ew, vl, desc ) END IF CALL stop_clock( 'regterg:diag' ) ! @@ -1403,7 +1412,7 @@ CONTAINS END DO IF (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) dm = dm/nbgrp ! - CALL dsqmsym( nbase, dm, nx, desc ) + CALL laxlib_dsqmsym( nbase, dm, nx, desc ) ! DEALLOCATE( work ) ! @@ -1466,7 +1475,7 @@ CONTAINS ! END DO ! - CALL dsqmsym( nbase+notcnv, dm, nx, desc ) + CALL laxlib_dsqmsym( nbase+notcnv, dm, nx, desc ) ! DEALLOCATE( vtmp ) RETURN diff --git a/KS_Solvers/Davidson_RCI/david_rci.f90 b/KS_Solvers/Davidson_RCI/david_rci.f90 index a6f238d78..061b34e76 100644 --- a/KS_Solvers/Davidson_RCI/david_rci.f90 +++ b/KS_Solvers/Davidson_RCI/david_rci.f90 @@ -130,6 +130,9 @@ contains integer, intent(out) :: task ! Next task to be performed by the calling program ! + ! + include 'laxlib.fh' + ! ! ... LOCAL variables ! INTEGER, PARAMETER :: maxter = 20 @@ -250,7 +253,7 @@ contains ! ... diagonalize the reduced hamiltonian ! IF( my_bgrp_id == root_bgrp_id ) THEN - CALL cdiaghg( nbase, nvec, hc, sc, nvecx, work%ew, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nbase, nvec, hc, sc, nvecx, work%ew, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) END IF IF( nbgrp > 1 ) THEN CALL mp_bcast( vc, root_bgrp_id, inter_bgrp_comm ) @@ -337,7 +340,7 @@ contains ! ! ... "normalize" correction vectors psi(:,nb1:nbase+notcnv) in ! ... order to improve numerical stability of subspace diagonalization - ! ... (cdiaghg) ew is used as work array : + ! ... (diaghg) ew is used as work array : ! ! ... ew = , i = nbase + 1, nbase + notcnv ! @@ -423,7 +426,7 @@ contains ! ... diagonalize the reduced hamiltonian ! IF( my_bgrp_id == root_bgrp_id ) THEN - CALL cdiaghg( nbase, nvec, hc, sc, nvecx, work%ew, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nbase, nvec, hc, sc, nvecx, work%ew, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) END IF IF( nbgrp > 1 ) THEN CALL mp_bcast( vc, root_bgrp_id, inter_bgrp_comm ) diff --git a/KS_Solvers/PPCG/ppcg_gamma.f90 b/KS_Solvers/PPCG/ppcg_gamma.f90 index cedcb6610..dfe21e093 100644 --- a/KS_Solvers/PPCG/ppcg_gamma.f90 +++ b/KS_Solvers/PPCG/ppcg_gamma.f90 @@ -13,11 +13,11 @@ SUBROUTINE ppcg_gamma( h_psi, s_psi, overlap, precondition, & USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id, nbgrp, my_bgrp_id, & gstart USE descriptors, ONLY : la_descriptor, descla_init, descla_local_dims - USE parallel_toolkit, ONLY : dsqmsym - USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho, & - ortho_parent_comm, ortho_cntx, do_distr_diag_inside_bgrp ! IMPLICIT NONE + ! + include 'laxlib.fh' + ! REAL (DP), PARAMETER :: ONE = 1.D0, ZERO = 0.D0 COMPLEX (DP), PARAMETER :: C_ZERO = (0.D0,0.D0) ! @@ -91,6 +91,12 @@ SUBROUTINE ppcg_gamma( h_psi, s_psi, overlap, precondition, & INTEGER, PARAMETER :: blocksz = 256 ! used to optimize some omp parallel do loops INTEGER :: nblock + + INTEGER :: ortho_comm, np_ortho(2), me_ortho(2), ortho_comm_id, leg_ortho, & + ortho_parent_comm, ortho_cntx + LOGICAL :: do_distr_diag_inside_bgrp + + nblock = (npw -1) /blocksz + 1 ! used to optimize some omp parallel do loops res_array = 0.0 @@ -99,6 +105,11 @@ SUBROUTINE ppcg_gamma( h_psi, s_psi, overlap, precondition, & ! ! ... Initialization and validation ! + + CALL laxlib_getval( np_ortho = np_ortho, me_ortho = me_ortho, ortho_comm = ortho_comm, & + leg_ortho = leg_ortho, ortho_comm_id = ortho_comm_id, ortho_parent_comm = ortho_parent_comm, & + ortho_cntx = ortho_cntx, do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp ) + print_info = 0 ! 3 sbsize3 = sbsize*3 npw2 = npw*2 @@ -1281,15 +1292,15 @@ CONTAINS ! ... diagonalize the reduced hamiltonian ! Calling block parallel algorithm ! - IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of prdiaghg e and vl are the same across ortho_parent_comm + IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg e and vl are the same across ortho_parent_comm ! only the first bgrp performs the diagonalization - IF( my_bgrp_id == root_bgrp_id) CALL prdiaghg( nbnd, Hl, Sl, nx, e, vl, desc ) + IF( my_bgrp_id == root_bgrp_id) CALL pdiaghg( nbnd, Hl, Sl, nx, e, vl, desc ) IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm ) CALL mp_bcast( e, root_bgrp_id, inter_bgrp_comm ) ENDIF ELSE - CALL prdiaghg( nbnd, Hl, Sl, nx, e, vl, desc ) + CALL pdiaghg( nbnd, Hl, Sl, nx, e, vl, desc ) END IF ! ! "Rotate" psi to eigenvectors @@ -1460,7 +1471,7 @@ CONTAINS if (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) dm = dm/nbgrp ! ! CALL dsqmsym( nbnd, dm, nx, desc ) - CALL dsqmsym( k, dm, nx, desc ) + CALL laxlib_dsqmsym( k, dm, nx, desc ) ! DEALLOCATE( work ) ! diff --git a/KS_Solvers/PPCG/ppcg_k.f90 b/KS_Solvers/PPCG/ppcg_k.f90 index c075941f9..2cf9d7971 100644 --- a/KS_Solvers/PPCG/ppcg_k.f90 +++ b/KS_Solvers/PPCG/ppcg_k.f90 @@ -12,11 +12,9 @@ SUBROUTINE ppcg_k( h_psi, s_psi, overlap, precondition, & USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id, nbgrp, my_bgrp_id USE descriptors, ONLY : la_descriptor, descla_init, descla_local_dims - USE parallel_toolkit, ONLY : zsqmher - USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho, & - ortho_parent_comm, ortho_cntx, do_distr_diag_inside_bgrp ! IMPLICIT NONE + include 'laxlib.fh' COMPLEX (DP), PARAMETER :: C_ONE = (1.D0,0.D0), C_ZERO = (0.D0,0.D0) ! ! ... I/O variables @@ -92,12 +90,19 @@ SUBROUTINE ppcg_k( h_psi, s_psi, overlap, precondition, & INTEGER, PARAMETER :: blocksz = 256 ! used to optimize some omp parallel do loops INTEGER :: nblock + INTEGER :: ortho_comm, np_ortho(2), me_ortho(2), ortho_comm_id, leg_ortho, & + ortho_parent_comm, ortho_cntx + LOGICAL :: do_distr_diag_inside_bgrp res_array = 0.0 ! CALL start_clock( 'ppcg_k' ) ! ! ... Initialization and validation + + CALL laxlib_getval( np_ortho = np_ortho, me_ortho = me_ortho, ortho_comm = ortho_comm, & + leg_ortho = leg_ortho, ortho_comm_id = ortho_comm_id, ortho_parent_comm = ortho_parent_comm, & + ortho_cntx = ortho_cntx, do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp ) ! print_info = 0 ! 3 sbsize3 = sbsize*3 @@ -1244,15 +1249,15 @@ CONTAINS ! ... diagonalize the reduced hamiltonian ! Calling block parallel algorithm ! - IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pcdiaghg e and vl are the same across ortho_parent_comm + IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg e and vl are the same across ortho_parent_comm ! only the first bgrp performs the diagonalization - IF( my_bgrp_id == root_bgrp_id) CALL pcdiaghg( nbnd, Hl, Sl, nx, e, vl, desc ) + IF( my_bgrp_id == root_bgrp_id) CALL pdiaghg( nbnd, Hl, Sl, nx, e, vl, desc ) IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups CALL mp_bcast( vl, root_bgrp_id, inter_bgrp_comm ) CALL mp_bcast( e, root_bgrp_id, inter_bgrp_comm ) ENDIF ELSE - CALL pcdiaghg( nbnd, Hl, Sl, nx, e, vl, desc ) + CALL pdiaghg( nbnd, Hl, Sl, nx, e, vl, desc ) END IF ! ! "Rotate" psi to eigenvectors @@ -1422,7 +1427,7 @@ CONTAINS if (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) dm = dm/nbgrp ! ! CALL zsqmher( nbnd, dm, nx, desc ) - CALL zsqmher( k, dm, nx, desc ) + CALL laxlib_zsqmher( k, dm, nx, desc ) ! DEALLOCATE( work ) ! diff --git a/KS_Solvers/PPCG/rotate_wfc_gamma.f90 b/KS_Solvers/PPCG/rotate_wfc_gamma.f90 index e1834c63f..568b24746 100644 --- a/KS_Solvers/PPCG/rotate_wfc_gamma.f90 +++ b/KS_Solvers/PPCG/rotate_wfc_gamma.f90 @@ -22,6 +22,8 @@ SUBROUTINE rotate_wfc_gamma( h_psi, s_psi, overlap, & ! IMPLICIT NONE ! + include 'laxlib.fh' + ! ! ... I/O variables ! INTEGER :: npw, npwx, nstart, nbnd @@ -111,7 +113,7 @@ SUBROUTINE rotate_wfc_gamma( h_psi, s_psi, overlap, & ! ... Diagonalize ! call start_clock('rotwfcg:diag'); !write(*,*) 'start rotwfcg:diag' ; FLUSH(6) - CALL rdiaghg( nstart, nbnd, hr, sr, nstart, en, vr, me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nstart, nbnd, hr, sr, nstart, en, vr, me_bgrp, root_bgrp, intra_bgrp_comm ) call stop_clock('rotwfcg:diag'); !write(*,*) 'stop rotwfcg:diag' ; FLUSH(6) call start_clock('rotwfcg:evc'); !write(*,*) 'start rotwfcg:evc' ; FLUSH(6) ! @@ -157,15 +159,14 @@ SUBROUTINE protate_wfc_gamma( h_psi, s_psi, overlap, & USE ppcg_param, ONLY : DP, gamma_only USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id, nbgrp, my_bgrp_id USE mp_bands_util, ONLY : gstart ! index of the first nonzero G - USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho, & - ortho_parent_comm, ortho_cntx, do_distr_diag_inside_bgrp USE descriptors, ONLY : la_descriptor, descla_init USE parallel_toolkit, ONLY : dsqmsym USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum, mp_barrier - ! IMPLICIT NONE ! + include 'laxlib.fh' + ! ! ... I/O variables ! INTEGER :: npw, npwx, nstart, nbnd @@ -202,9 +203,16 @@ SUBROUTINE protate_wfc_gamma( h_psi, s_psi, overlap, & ! s_psi(npwx,npw,nvec,spsi) ! calculates S|psi> (if needed) ! Vectors psi,hpsi,spsi are dimensioned (npwx,npol,nvec) + INTEGER :: ortho_comm, np_ortho(2), me_ortho(2), ortho_comm_id, leg_ortho, & + ortho_parent_comm, ortho_cntx + LOGICAL :: do_distr_diag_inside_bgrp call start_clock('protwfcg'); !write(*,*) 'start protwfcg' ; FLUSH(6) ! + CALL laxlib_getval( dims = np_ortho, mycoords = me_ortho, comm = ortho_comm, & + leg = leg_ortho, comm_id = ortho_comm_id, parent_comm = ortho_parent_comm, & + blacs_context = ortho_cntx, distr_diag_inside_bgrp = do_distr_diag_inside_bgrp ) + ! ALLOCATE( desc_ip( np_ortho(1), np_ortho(2) ) ) ALLOCATE( rank_ip( np_ortho(1), np_ortho(2) ) ) ! @@ -254,15 +262,15 @@ SUBROUTINE protate_wfc_gamma( h_psi, s_psi, overlap, & ! ... Diagonalize ! call start_clock('protwfcg:diag'); !write(*,*) 'start protwfcg:diag' ; FLUSH(6) - IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of prdiaghg en and vr are the same across ortho_parent_comm + IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg en and vr are the same across ortho_parent_comm ! only the first bgrp performs the diagonalization - IF( my_bgrp_id == root_bgrp_id ) CALL prdiaghg( nstart, hr, sr, nx, en, vr, desc ) + IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nstart, hr, sr, nx, en, vr, desc ) IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups CALL mp_bcast( vr, root_bgrp_id, inter_bgrp_comm ) CALL mp_bcast( en, root_bgrp_id, inter_bgrp_comm ) ENDIF ELSE - CALL prdiaghg( nstart, hr, sr, nx, en, vr, desc ) + CALL pdiaghg( nstart, hr, sr, nx, en, vr, desc ) END IF call stop_clock('protwfcg:diag'); !write(*,*) 'stop protwfcg:diag' ; FLUSH(6) ! diff --git a/KS_Solvers/PPCG/rotate_wfc_k.f90 b/KS_Solvers/PPCG/rotate_wfc_k.f90 index be1736785..7a8f2b747 100644 --- a/KS_Solvers/PPCG/rotate_wfc_k.f90 +++ b/KS_Solvers/PPCG/rotate_wfc_k.f90 @@ -20,6 +20,8 @@ SUBROUTINE rotate_wfc_k( h_psi, s_psi, overlap, & ! IMPLICIT NONE ! + include 'laxlib.fh' + ! ! ... I/O variables ! INTEGER, INTENT(IN) :: npw, npwx, nstart, nbnd, npol @@ -108,7 +110,7 @@ SUBROUTINE rotate_wfc_k( h_psi, s_psi, overlap, & ! ... Diagonalize ! call start_clock('rotwfck:diag'); !write(*,*) 'start rotwfck:diag';FLUSH(6) - CALL cdiaghg( nstart, nbnd, hc, sc, nstart, en, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) + CALL diaghg( nstart, nbnd, hc, sc, nstart, en, vc, me_bgrp, root_bgrp, intra_bgrp_comm ) call stop_clock('rotwfck:diag'); !write(*,*) 'stop rotwfck:diag';FLUSH(6) call start_clock('rotwfck:evc'); !write(*,*) 'start rotwfck:evc';FLUSH(6) ! @@ -152,14 +154,14 @@ SUBROUTINE protate_wfc_k( h_psi, s_psi, overlap, & USE ppcg_param, ONLY : DP USE mp_bands_util, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp_id,& nbgrp, my_bgrp_id - USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho, & - ortho_parent_comm, ortho_cntx, do_distr_diag_inside_bgrp USE descriptors, ONLY : la_descriptor, descla_init USE parallel_toolkit, ONLY : zsqmher USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum, mp_barrier ! IMPLICIT NONE ! + include 'laxlib.fh' + ! ! ... I/O variables ! INTEGER :: npw, npwx, nstart, nbnd, npol @@ -197,9 +199,16 @@ SUBROUTINE protate_wfc_k( h_psi, s_psi, overlap, & ! s_psi(npwx,npw,nvec,spsi) ! calculates S|psi> (if needed) ! Vectors psi,hpsi,spsi are dimensioned (npwx,npol,nvec) + INTEGER :: ortho_comm, np_ortho(2), me_ortho(2), ortho_comm_id, leg_ortho, & + ortho_parent_comm, ortho_cntx + LOGICAL :: do_distr_diag_inside_bgrp call start_clock('protwfck') ! + CALL laxlib_getval( dims = np_ortho, mycoords = me_ortho, comm = ortho_comm, & + leg = leg_ortho, comm_id = ortho_comm_id, parent_comm = ortho_parent_comm, & + blacs_context = ortho_cntx, distr_diag_inside_bgrp = do_distr_diag_inside_bgrp ) + ! ALLOCATE( desc_ip( np_ortho(1), np_ortho(2) ) ) ALLOCATE( rank_ip( np_ortho(1), np_ortho(2) ) ) ! @@ -251,15 +260,15 @@ SUBROUTINE protate_wfc_k( h_psi, s_psi, overlap, & ! ... Diagonalize ! call start_clock('protwfck:diag') - IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pcdiaghg en and vc are the same across ortho_parent_comm + IF ( do_distr_diag_inside_bgrp ) THEN ! NB on output of pdiaghg en and vc are the same across ortho_parent_comm ! only the first bgrp performs the diagonalization - IF( my_bgrp_id == root_bgrp_id ) CALL pcdiaghg( nstart, hc, sc, nx, en, vc, desc ) + IF( my_bgrp_id == root_bgrp_id ) CALL pdiaghg( nstart, hc, sc, nx, en, vc, desc ) IF( nbgrp > 1 ) THEN ! results must be brodcast to the other band groups CALL mp_bcast( vc, root_bgrp_id, inter_bgrp_comm ) CALL mp_bcast( en, root_bgrp_id, inter_bgrp_comm ) ENDIF ELSE - CALL pcdiaghg( nstart, hc, sc, nx, en, vc, desc ) + CALL pdiaghg( nstart, hc, sc, nx, en, vc, desc ) END IF call stop_clock('protwfck:diag') ! diff --git a/LAXlib/Makefile b/LAXlib/Makefile index ab608c430..ed53d6805 100644 --- a/LAXlib/Makefile +++ b/LAXlib/Makefile @@ -4,7 +4,8 @@ include ../make.inc MODFLAGS= $(MOD_FLAG)../UtilXlib $(MOD_FLAG)../ELPA/src $(MOD_FLAG)../EIGENSOLVER_GPU/lib_eigsolve $(MOD_FLAG) . -LAX = la_types.o \ +LAX = la_interface_mod.o \ +la_types.o \ la_error.o \ la_helper.o \ cdiaghg.o \ @@ -30,6 +31,8 @@ la_test.x : test.o libqela.a TEST : la_test.x +la_interface_mod.o : laxlib.fh + clean : - /bin/rm -f *.o *.a *.d *.i *~ *_tmp.f90 *.mod *.L *.x diff --git a/LAXlib/cdiaghg.f90 b/LAXlib/cdiaghg.f90 index a8b9436c1..39a85cf92 100644 --- a/LAXlib/cdiaghg.f90 +++ b/LAXlib/cdiaghg.f90 @@ -10,7 +10,7 @@ #define ONE ( 1.D0, 0.D0 ) ! !---------------------------------------------------------------------------- -SUBROUTINE cdiaghg( n, m, h, s, ldh, e, v, me_bgrp, root_bgrp, intra_bgrp_comm ) +SUBROUTINE laxlib_cdiaghg( n, m, h, s, ldh, e, v, me_bgrp, root_bgrp, intra_bgrp_comm ) !---------------------------------------------------------------------------- ! ! ... calculates eigenvalues and eigenvectors of the generalized problem @@ -192,10 +192,10 @@ SUBROUTINE cdiaghg( n, m, h, s, ldh, e, v, me_bgrp, root_bgrp, intra_bgrp_comm ) ! RETURN ! -END SUBROUTINE cdiaghg +END SUBROUTINE laxlib_cdiaghg ! !---------------------------------------------------------------------------- -SUBROUTINE pcdiaghg( n, h, s, ldh, e, v, desc ) +SUBROUTINE laxlib_pcdiaghg( n, h, s, ldh, e, v, desc ) !---------------------------------------------------------------------------- ! ! ... calculates eigenvalues and eigenvectors of the generalized problem @@ -205,9 +205,7 @@ SUBROUTINE pcdiaghg( n, h, s, ldh, e, v, desc ) ! ... Parallel version, with full data distribution ! USE la_param - USE zhpev_module, ONLY : pzhpev_drv, zhpev_drv USE descriptors, ONLY : la_descriptor - USE parallel_toolkit, ONLY : zsqmdst, zsqmcll USE mp_diag, ONLY : ortho_parent_comm #if defined __SCALAPACK USE mp_diag, ONLY : ortho_cntx, me_blacs, np_ortho, me_ortho, ortho_comm @@ -384,11 +382,13 @@ CONTAINS ! SUBROUTINE test_drv_begin() ALLOCATE( tt( n, n ) ) - CALL zsqmcll( n, hh, nx, tt, n, desc, desc%comm ) + CALL laxlib_zsqmcll( n, hh, nx, tt, n, desc, desc%comm ) RETURN END SUBROUTINE test_drv_begin ! SUBROUTINE test_drv_end() + ! + USE la_interface_mod, ONLY: zhpev_drv ! INTEGER :: i, j, k COMPLEX(DP), ALLOCATABLE :: diag(:,:) @@ -423,11 +423,11 @@ CONTAINS IF ( info /= 0 ) & CALL lax_error__( 'test_drv_end', 'error broadcasting array e', ABS( info ) ) #endif - CALL zsqmdst( n, tt, n, hh, nx, desc ) + CALL laxlib_zsqmdst( n, tt, n, hh, nx, desc ) DEALLOCATE( tt ) CALL lax_error__('cdiaghg','stop serial',1) RETURN END SUBROUTINE test_drv_end ! -END SUBROUTINE pcdiaghg +END SUBROUTINE laxlib_pcdiaghg ! diff --git a/LAXlib/dspev_drv.f90 b/LAXlib/dspev_drv.f90 index b21f1d2a6..9946ae41b 100644 --- a/LAXlib/dspev_drv.f90 +++ b/LAXlib/dspev_drv.f90 @@ -10,20 +10,9 @@ MODULE dspev_module USE la_param - IMPLICIT NONE - SAVE - PRIVATE - - PUBLIC :: pdspev_drv, dspev_drv - PUBLIC :: diagonalize_parallel, diagonalize_serial - -#if defined __SCALAPACK - PUBLIC :: pdsyevd_drv -#endif - CONTAINS @@ -600,54 +589,6 @@ CONTAINS ! !==----------------------------------------------==! - SUBROUTINE pdspev_drv( jobz, ap, lda, w, z, ldz, & - nrl, n, nproc, mpime, comm ) - IMPLICIT NONE - CHARACTER, INTENT(IN) :: JOBZ - INTEGER, INTENT(IN) :: lda, ldz, nrl, n, nproc, mpime - INTEGER, INTENT(IN) :: comm - REAL(DP) :: ap( lda, * ), w( * ), z( ldz, * ) - REAL(DP), ALLOCATABLE :: sd( : ) - LOGICAL :: tv - ! - IF( n < 1 ) RETURN - ! - tv = .false. - IF( jobz == 'V' .OR. jobz == 'v' ) tv = .true. - - ALLOCATE ( sd ( n ) ) - CALL ptredv( tv, ap, lda, w, sd, z, ldz, nrl, n, nproc, mpime, comm) - CALL ptqliv( tv, w, sd, n, z, ldz, nrl, mpime, comm) - DEALLOCATE ( sd ) - CALL peigsrtv( tv, w, z, ldz, n, nrl) - - RETURN - END SUBROUTINE pdspev_drv - -!==----------------------------------------------==! - - SUBROUTINE dspev_drv( JOBZ, UPLO, N, AP, W, Z, LDZ ) - IMPLICIT NONE - CHARACTER :: JOBZ, UPLO - INTEGER :: IOPT, INFO, LDZ, N - REAL(DP) :: AP( * ), W( * ), Z( LDZ, * ) - REAL(DP), ALLOCATABLE :: WORK(:) - - IF( n < 1 ) RETURN - - ALLOCATE( work( 3*n ) ) - - CALL DSPEV(jobz, uplo, n, ap(1), w(1), z(1,1), ldz, work, INFO) - IF( info .NE. 0 ) THEN - CALL lax_error__( ' dspev_drv ', ' diagonalization failed ',info ) - END IF - - DEALLOCATE( work ) - - RETURN - END SUBROUTINE dspev_drv - - #if defined __SCALAPACK SUBROUTINE pdsyevd_drv( tv, n, nb, s, lds, w, ortho_cntx, ortho_comm ) @@ -800,6 +741,7 @@ END SUBROUTINE diagonalize_parallel SUBROUTINE diagonalize_serial( n, rhos, rhod ) + USE la_interface_mod, ONLY: dspev_drv IMPLICIT NONE INTEGER, INTENT(IN) :: n REAL(DP) :: rhos(:,:) @@ -837,6 +779,53 @@ SUBROUTINE diagonalize_serial( n, rhos, rhod ) END SUBROUTINE diagonalize_serial - - END MODULE dspev_module + +!==----------------------------------------------==! + + + SUBROUTINE pdspev_drv_x ( jobz, ap, lda, w, z, ldz, nrl, n, nproc, mpime, comm ) + use dspev_module + IMPLICIT NONE + CHARACTER, INTENT(IN) :: JOBZ + INTEGER, INTENT(IN) :: lda, ldz, nrl, n, nproc, mpime + INTEGER, INTENT(IN) :: comm + REAL(DP) :: ap( lda, * ), w( * ), z( ldz, * ) + REAL(DP), ALLOCATABLE :: sd( : ) + LOGICAL :: tv + ! + IF( n < 1 ) RETURN + ! + tv = .false. + IF( jobz == 'V' .OR. jobz == 'v' ) tv = .true. + + ALLOCATE ( sd ( n ) ) + CALL ptredv( tv, ap, lda, w, sd, z, ldz, nrl, n, nproc, mpime, comm) + CALL ptqliv( tv, w, sd, n, z, ldz, nrl, mpime, comm) + DEALLOCATE ( sd ) + CALL peigsrtv( tv, w, z, ldz, n, nrl) + RETURN + END SUBROUTINE pdspev_drv_x + +!==----------------------------------------------==! + + SUBROUTINE dspev_drv_x( JOBZ, UPLO, N, AP, W, Z, LDZ ) + use dspev_module + IMPLICIT NONE + CHARACTER :: JOBZ, UPLO + INTEGER :: IOPT, INFO, LDZ, N + REAL(DP) :: AP( * ), W( * ), Z( LDZ, * ) + REAL(DP), ALLOCATABLE :: WORK(:) + + IF( n < 1 ) RETURN + + ALLOCATE( work( 3*n ) ) + + CALL DSPEV(jobz, uplo, n, ap(1), w(1), z(1,1), ldz, work, INFO) + IF( info .NE. 0 ) THEN + CALL lax_error__( ' dspev_drv ', ' diagonalization failed ',info ) + END IF + DEALLOCATE( work ) + RETURN + END SUBROUTINE dspev_drv_x + diff --git a/LAXlib/la_helper.f90 b/LAXlib/la_helper.f90 index ee065da8c..51c1ae52e 100644 --- a/LAXlib/la_helper.f90 +++ b/LAXlib/la_helper.f90 @@ -1,13 +1,351 @@ -!---------------------------------------------------------------------------- -SUBROUTINE laxlib_free_ortho_group() - !---------------------------------------------------------------------------- - ! - use mp_diag - IMPLICIT NONE -#if defined (__MPI) - CALL clean_ortho_group ( ) -#endif - RETURN - ! -END SUBROUTINE laxlib_free_ortho_group +! +! Copyright (C) 2003-2013 Quantum ESPRESSO group +! This file is distributed under the terms of the +! GNU General Public License. See the file `License' +! in the root directory of the present distribution, +! or http://www.gnu.org/copyleft/gpl.txt . +! +! + +SUBROUTINE laxlib_end() + use mp_diag + CALL laxlib_end_drv ( ) +END SUBROUTINE laxlib_end + + +SUBROUTINE laxlib_getval_ ( nproc_ortho, leg_ortho, np_ortho, me_ortho, ortho_comm, ortho_row_comm, ortho_col_comm, & + ortho_comm_id, ortho_parent_comm, me_blacs, np_blacs, ortho_cntx, world_cntx, do_distr_diag_inside_bgrp ) + use mp_diag, ONLY : & + nproc_ortho_ => nproc_ortho, & + leg_ortho_ => leg_ortho, & + np_ortho_ => np_ortho, & + me_ortho_ => me_ortho, & + ortho_comm_ => ortho_comm, & + ortho_row_comm_ => ortho_row_comm, & + ortho_col_comm_ => ortho_col_comm, & + ortho_comm_id_ => ortho_comm_id, & + ortho_parent_comm_ => ortho_parent_comm, & + me_blacs_ => me_blacs, & + np_blacs_ => np_blacs, & + ortho_cntx_ => ortho_cntx, & + world_cntx_ => world_cntx, & + do_distr_diag_inside_bgrp_ => do_distr_diag_inside_bgrp + IMPLICIT NONE + INTEGER, OPTIONAL, INTENT(OUT) :: nproc_ortho + INTEGER, OPTIONAL, INTENT(OUT) :: leg_ortho + INTEGER, OPTIONAL, INTENT(OUT) :: np_ortho(2) + INTEGER, OPTIONAL, INTENT(OUT) :: me_ortho(2) + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_comm + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_row_comm + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_col_comm + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_comm_id + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_parent_comm + INTEGER, OPTIONAL, INTENT(OUT) :: me_blacs + INTEGER, OPTIONAL, INTENT(OUT) :: np_blacs + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_cntx + INTEGER, OPTIONAL, INTENT(OUT) :: world_cntx + LOGICAL, OPTIONAL, INTENT(OUT) :: do_distr_diag_inside_bgrp + IF( PRESENT(nproc_ortho) ) nproc_ortho = nproc_ortho_ + IF( PRESENT(leg_ortho) ) leg_ortho = leg_ortho_ + IF( PRESENT(np_ortho) ) np_ortho = np_ortho_ + IF( PRESENT(me_ortho) ) me_ortho = me_ortho_ + IF( PRESENT(ortho_comm) ) ortho_comm = ortho_comm_ + IF( PRESENT(ortho_row_comm) ) ortho_row_comm = ortho_row_comm_ + IF( PRESENT(ortho_col_comm) ) ortho_col_comm = ortho_col_comm_ + IF( PRESENT(ortho_comm_id) ) ortho_comm_id = ortho_comm_id_ + IF( PRESENT(ortho_parent_comm) ) ortho_parent_comm = ortho_parent_comm_ + IF( PRESENT(me_blacs) ) me_blacs = me_blacs_ + IF( PRESENT(np_blacs) ) np_blacs = np_blacs_ + IF( PRESENT(ortho_cntx) ) ortho_cntx = ortho_cntx_ + IF( PRESENT(world_cntx) ) world_cntx = world_cntx_ + IF( PRESENT(do_distr_diag_inside_bgrp) ) do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp_ +END SUBROUTINE +! + +!---------------------------------------------------------------------------- + +SUBROUTINE laxlib_start_drv( ndiag_, my_world_comm, parent_comm, do_distr_diag_inside_bgrp_ ) + ! + use mp_diag + ! + ! ... Ortho/diag/linear algebra group initialization + ! + IMPLICIT NONE + ! + INTEGER, INTENT(INOUT) :: ndiag_ ! (IN) input number of procs in the diag group, (OUT) actual number + INTEGER, INTENT(IN) :: my_world_comm ! parallel communicator of the "local" world + INTEGER, INTENT(IN) :: parent_comm ! parallel communicator inside which the distributed linear algebra group + ! communicators are created + LOGICAL, INTENT(IN) :: do_distr_diag_inside_bgrp_ ! comme son nom l'indique + ! + INTEGER :: mpime = 0 ! the global MPI task index (used in clocks) can be set with a laxlib_rank call + ! + INTEGER :: nproc_ortho_try + INTEGER :: parent_nproc ! nproc of the parent group + INTEGER :: world_nproc ! nproc of the world group + INTEGER :: my_parent_id ! id of the parent communicator + INTEGER :: nparent_comm ! mumber of parent communicators + INTEGER :: ierr = 0 + ! + IF( lax_is_initialized ) & + CALL laxlib_end_drv ( ) + + world_nproc = laxlib_size( my_world_comm ) ! the global number of processors in world_comm + mpime = laxlib_rank( my_world_comm ) ! set the global MPI task index (used in clocks) + parent_nproc = laxlib_size( parent_comm )! the number of processors in the current parent communicator + my_parent_id = mpime / parent_nproc ! set the index of the current parent communicator + nparent_comm = world_nproc/parent_nproc ! number of paren communicators + + ! save input value inside the module + do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp_ + + ! +#if defined __SCALAPACK + np_blacs = laxlib_size( my_world_comm ) + me_blacs = laxlib_rank( my_world_comm ) + ! + ! define a 1D grid containing all MPI tasks of the global communicator + ! NOTE: world_cntx has the MPI communicator on entry and the BLACS context on exit + ! BLACS_GRIDINIT() will create a copy of the communicator, which can be + ! later retrieved using CALL BLACS_GET(world_cntx, 10, comm_copy) + ! + world_cntx = my_world_comm + CALL BLACS_GRIDINIT( world_cntx, 'Row', 1, np_blacs ) + ! +#endif + ! + IF( ndiag_ > 0 ) THEN + ! command-line argument -ndiag N or -northo N set to a value N + ! use the command line value ensuring that it falls in the proper range + nproc_ortho_try = MIN( ndiag_ , parent_nproc ) + ELSE + ! no command-line argument -ndiag N or -northo N is present + ! insert here custom architecture specific default definitions +#if defined __SCALAPACK + nproc_ortho_try = MAX( parent_nproc/2, 1 ) +#else + nproc_ortho_try = 1 +#endif + END IF + ! + ! the ortho group for parallel linear algebra is a sub-group of the pool, + ! then there are as many ortho groups as pools. + ! + CALL init_ortho_group ( nproc_ortho_try, my_world_comm, parent_comm, nparent_comm, my_parent_id ) + ! + ! set the number of processors in the diag group to the actual number used + ! + ndiag_ = nproc_ortho + ! + lax_is_initialized = .true. + ! + RETURN + ! +CONTAINS + + SUBROUTINE init_ortho_group ( nproc_try_in, my_world_comm, comm_all, nparent_comm, my_parent_id ) + ! + USE mp_diag + ! + IMPLICIT NONE + + INTEGER, INTENT(IN) :: nproc_try_in, comm_all + INTEGER, INTENT(IN) :: my_world_comm ! parallel communicator of the "local" world + INTEGER, INTENT(IN) :: nparent_comm + INTEGER, INTENT(IN) :: my_parent_id ! id of the parent communicator + + INTEGER :: ierr, color, key, me_all, nproc_all, nproc_try + +#if defined __SCALAPACK + INTEGER, ALLOCATABLE :: blacsmap(:,:) + INTEGER, ALLOCATABLE :: ortho_cntx_pe(:) + INTEGER :: nprow, npcol, myrow, mycol, i, j, k + INTEGER, EXTERNAL :: BLACS_PNUM +#endif + +#if defined __MPI + + me_all = laxlib_rank( comm_all ) + ! + nproc_all = laxlib_size( comm_all ) + ! + nproc_try = MIN( nproc_try_in, nproc_all ) + nproc_try = MAX( nproc_try, 1 ) + + ! find the square closer (but lower) to nproc_try + ! + CALL grid2d_dims( 'S', nproc_try, np_ortho(1), np_ortho(2) ) + ! + ! now, and only now, it is possible to define the number of tasks + ! in the ortho group for parallel linear algebra + ! + nproc_ortho = np_ortho(1) * np_ortho(2) + ! + IF( nproc_all >= 4*nproc_ortho ) THEN + ! + ! here we choose a processor every 4, in order not to stress memory BW + ! on multi core procs, for which further performance enhancements are + ! possible using OpenMP BLAS inside regter/cegter/rdiaghg/cdiaghg + ! (to be implemented) + ! + color = 0 + IF( me_all < 4*nproc_ortho .AND. MOD( me_all, 4 ) == 0 ) color = 1 + ! + leg_ortho = 4 + ! + ELSE IF( nproc_all >= 2*nproc_ortho ) THEN + ! + ! here we choose a processor every 2, in order not to stress memory BW + ! + color = 0 + IF( me_all < 2*nproc_ortho .AND. MOD( me_all, 2 ) == 0 ) color = 1 + ! + leg_ortho = 2 + ! + ELSE + ! + ! here we choose the first processors + ! + color = 0 + IF( me_all < nproc_ortho ) color = 1 + ! + leg_ortho = 1 + ! + END IF + ! + key = me_all + ! + ! initialize the communicator for the new group by splitting the input communicator + ! + CALL laxlib_comm_split ( comm_all, color, key, ortho_comm ) + ! + ! and remember where it comes from + ! + ortho_parent_comm = comm_all + ! + ! Computes coordinates of the processors, in row maior order + ! + me_ortho1 = laxlib_rank( ortho_comm ) + ! + IF( me_all == 0 .AND. me_ortho1 /= 0 ) & + CALL lax_error__( " init_ortho_group ", " wrong root task in ortho group ", ierr ) + ! + if( color == 1 ) then + ortho_comm_id = 1 + CALL GRID2D_COORDS( 'R', me_ortho1, np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2) ) + CALL GRID2D_RANK( 'R', np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2), ierr ) + IF( ierr /= me_ortho1 ) & + CALL lax_error__( " init_ortho_group ", " wrong task coordinates in ortho group ", ierr ) + IF( me_ortho1*leg_ortho /= me_all ) & + CALL lax_error__( " init_ortho_group ", " wrong rank assignment in ortho group ", ierr ) + + CALL laxlib_comm_split( ortho_comm, me_ortho(2), me_ortho(1), ortho_col_comm) + CALL laxlib_comm_split( ortho_comm, me_ortho(1), me_ortho(2), ortho_row_comm) + + else + ortho_comm_id = 0 + me_ortho(1) = me_ortho1 + me_ortho(2) = me_ortho1 + endif + +#if defined __SCALAPACK + ! + ! This part is used to eliminate the image dependency from ortho groups + ! SCALAPACK is now independent from whatever level of parallelization + ! is present on top of pool parallelization + ! + ALLOCATE( ortho_cntx_pe( nparent_comm ) ) + ALLOCATE( blacsmap( np_ortho(1), np_ortho(2) ) ) + + DO j = 1, nparent_comm + + CALL BLACS_GET(world_cntx, 10, ortho_cntx_pe( j ) ) ! retrieve communicator of world context + blacsmap = 0 + nprow = np_ortho(1) + npcol = np_ortho(2) + + IF( ( j == ( my_parent_id + 1 ) ) .and. ( ortho_comm_id > 0 ) ) THEN + + blacsmap( me_ortho(1) + 1, me_ortho(2) + 1 ) = BLACS_PNUM( world_cntx, 0, me_blacs ) + + END IF + + ! All MPI tasks defined in the global communicator take part in the definition of the BLACS grid + + CALL MPI_ALLREDUCE( MPI_IN_PLACE, blacsmap, SIZE(blacsmap), MPI_INTEGER, MPI_SUM, my_world_comm, ierr ) + IF( ierr /= 0 ) & + CALL lax_error__( ' init_ortho_group ', ' problem in MPI_ALLREDUCE of blacsmap ', ierr ) + + CALL BLACS_GRIDMAP( ortho_cntx_pe( j ), blacsmap, nprow, nprow, npcol ) + + CALL BLACS_GRIDINFO( ortho_cntx_pe( j ), nprow, npcol, myrow, mycol ) + + IF( ( j == ( my_parent_id + 1 ) ) .and. ( ortho_comm_id > 0 ) ) THEN + + IF( np_ortho(1) /= nprow ) & + CALL lax_error__( ' init_ortho_group ', ' problem with SCALAPACK, wrong no. of task rows ', 1 ) + IF( np_ortho(2) /= npcol ) & + CALL lax_error__( ' init_ortho_group ', ' problem with SCALAPACK, wrong no. of task columns ', 1 ) + IF( me_ortho(1) /= myrow ) & + CALL lax_error__( ' init_ortho_group ', ' problem with SCALAPACK, wrong task row ID ', 1 ) + IF( me_ortho(2) /= mycol ) & + CALL lax_error__( ' init_ortho_group ', ' problem with SCALAPACK, wrong task columns ID ', 1 ) + + ortho_cntx = ortho_cntx_pe( j ) + + END IF + + END DO + + DEALLOCATE( blacsmap ) + DEALLOCATE( ortho_cntx_pe ) + + ! end SCALAPACK code block + +#endif + +#else + + ortho_comm_id = 1 + +#endif + + RETURN + END SUBROUTINE init_ortho_group + +END SUBROUTINE laxlib_start_drv + +!------------------------------------------------------------------------------! + +SUBROUTINE print_lambda_x( lambda, descla, n, nshow, nudx, ccc, ionode, iunit ) + USE la_param + USE descriptors, ONLY: la_descriptor + USE la_interface_mod, ONLY: collect_lambda + IMPLICIT NONE + real(DP), intent(in) :: lambda(:,:,:), ccc + TYPE(la_descriptor), INTENT(IN) :: descla(:) + integer, intent(in) :: n, nshow, nudx + logical, intent(in) :: ionode + integer, intent(in) :: iunit + ! + integer :: nnn, j, i, is + real(DP), allocatable :: lambda_repl(:,:) + nnn = min( nudx, nshow ) + ALLOCATE( lambda_repl( nudx, nudx ) ) + IF( ionode ) WRITE( iunit,*) + DO is = 1, SIZE( lambda, 3 ) + CALL collect_lambda( lambda_repl, lambda(:,:,is), descla(is) ) + IF( ionode ) THEN + WRITE( iunit,3370) ' lambda nudx, spin = ', nudx, is + IF( nnn < n ) WRITE( iunit,3370) ' print only first ', nnn + DO i=1,nnn + WRITE( iunit,3380) (lambda_repl(i,j)*ccc,j=1,nnn) + END DO + END IF + END DO + DEALLOCATE( lambda_repl ) +3370 FORMAT(26x,a,2i4) +3380 FORMAT(9f8.4) + RETURN +END SUBROUTINE print_lambda_x diff --git a/LAXlib/la_interface_mod.f90 b/LAXlib/la_interface_mod.f90 new file mode 100644 index 000000000..6673c7b5f --- /dev/null +++ b/LAXlib/la_interface_mod.f90 @@ -0,0 +1,5 @@ +MODULE la_interface_mod + IMPLICIT NONE + SAVE + include 'laxlib.fh' +END MODULE diff --git a/LAXlib/laxlib.fh b/LAXlib/laxlib.fh index 106691a65..74692e415 100644 --- a/LAXlib/laxlib.fh +++ b/LAXlib/laxlib.fh @@ -7,18 +7,18 @@ ! ! !---------------------------------------------------------------------------- -INTERFACE -SUBROUTINE rdiaghg( n, m, h, s, ldh, e, v, me_bgrp, root_bgrp, intra_bgrp_comm ) +INTERFACE diaghg +SUBROUTINE laxlib_rdiaghg( n, m, h, s, ldh, e, v, me_bgrp, root_bgrp, intra_bgrp_comm ) !---------------------------------------------------------------------------- ! ... Hv=eSv, with H symmetric matrix, S overlap matrix. ! ... On output both matrix are unchanged ! ! ... LAPACK version - uses both DSYGV and DSYGVX ! - USE la_param - ! IMPLICIT NONE ! + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + ! INTEGER, INTENT(IN) :: n, m, ldh ! dimension of the matrix to be diagonalized ! number of eigenstates to be calculated @@ -33,4 +33,236 @@ SUBROUTINE rdiaghg( n, m, h, s, ldh, e, v, me_bgrp, root_bgrp, intra_bgrp_comm ) INTEGER, INTENT(IN) :: me_bgrp, root_bgrp, intra_bgrp_comm ! END SUBROUTINE +!---------------------------------------------------------------------------- +SUBROUTINE laxlib_cdiaghg( n, m, h, s, ldh, e, v, me_bgrp, root_bgrp, intra_bgrp_comm ) + !---------------------------------------------------------------------------- + ! + ! ... calculates eigenvalues and eigenvectors of the generalized problem + ! ... Hv=eSv, with H hermitean matrix, S overlap matrix. + ! ... On output both matrix are unchanged + ! + ! ... LAPACK version - uses both ZHEGV and ZHEGVX + ! + IMPLICIT NONE + ! + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + ! + INTEGER, INTENT(IN) :: n, m, ldh + ! dimension of the matrix to be diagonalized + ! number of eigenstates to be calculate + ! leading dimension of h, as declared in the calling pgm unit + COMPLEX(DP), INTENT(INOUT) :: h(ldh,n), s(ldh,n) + ! actually intent(in) but compilers don't know and complain + ! matrix to be diagonalized + ! overlap matrix + REAL(DP), INTENT(OUT) :: e(n) + ! eigenvalues + COMPLEX(DP), INTENT(OUT) :: v(ldh,m) + ! eigenvectors (column-wise) + INTEGER, INTENT(IN) :: me_bgrp, root_bgrp, intra_bgrp_comm +END SUBROUTINE END INTERFACE +! +! +! +INTERFACE pdiaghg +!---------------------------------------------------------------------------- +SUBROUTINE laxlib_pcdiaghg( n, h, s, ldh, e, v, desc ) + !---------------------------------------------------------------------------- + ! + ! ... calculates eigenvalues and eigenvectors of the generalized problem + ! ... Hv=eSv, with H hermitean matrix, S overlap matrix. + ! ... On output both matrix are unchanged + ! + ! ... Parallel version, with full data distribution + ! + USE descriptors, ONLY : la_descriptor + ! + IMPLICIT NONE + ! + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + ! + INTEGER, INTENT(IN) :: n, ldh + ! dimension of the matrix to be diagonalized + ! leading dimension of h, as declared in the calling pgm unit + COMPLEX(DP), INTENT(INOUT) :: h(ldh,ldh), s(ldh,ldh) + ! actually intent(in) but compilers don't know and complain + ! matrix to be diagonalized + ! overlap matrix + REAL(DP), INTENT(OUT) :: e(n) + ! eigenvalues + COMPLEX(DP), INTENT(OUT) :: v(ldh,ldh) + ! eigenvectors (column-wise) + TYPE(la_descriptor), INTENT(IN) :: desc +END SUBROUTINE +! +!---------------------------------------------------------------------------- +SUBROUTINE laxlib_prdiaghg( n, h, s, ldh, e, v, desc ) + !---------------------------------------------------------------------------- + ! + ! ... calculates eigenvalues and eigenvectors of the generalized problem + ! ... Hv=eSv, with H symmetric matrix, S overlap matrix. + ! ... On output both matrix are unchanged + ! + ! ... Parallel version with full data distribution + ! + USE descriptors, ONLY : la_descriptor + ! + IMPLICIT NONE + ! + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + ! + INTEGER, INTENT(IN) :: n, ldh + ! dimension of the matrix to be diagonalized and number of eigenstates to be calculated + ! leading dimension of h, as declared in the calling pgm unit + REAL(DP), INTENT(INOUT) :: h(ldh,ldh), s(ldh,ldh) + ! matrix to be diagonalized + ! overlap matrix + ! + REAL(DP), INTENT(OUT) :: e(n) + ! eigenvalues + REAL(DP), INTENT(OUT) :: v(ldh,ldh) + ! eigenvectors (column-wise) + TYPE(la_descriptor), INTENT(IN) :: desc +END SUBROUTINE +END INTERFACE + +INTERFACE laxlib_start + SUBROUTINE laxlib_start_drv( ndiag_, my_world_comm, parent_comm, do_distr_diag_inside_bgrp_ ) + IMPLICIT NONE + INTEGER, INTENT(INOUT) :: ndiag_ ! (IN) input number of procs in the diag group, (OUT) actual number + INTEGER, INTENT(IN) :: my_world_comm ! parallel communicator of the "local" world + INTEGER, INTENT(IN) :: parent_comm ! parallel communicator inside which the distributed linear algebra group + ! communicators are created + LOGICAL, INTENT(IN) :: do_distr_diag_inside_bgrp_ ! comme son nom l'indique + END SUBROUTINE +END INTERFACE laxlib_start + +INTERFACE laxlib_getval +SUBROUTINE laxlib_getval_ ( nproc_ortho, leg_ortho, np_ortho, me_ortho, ortho_comm, ortho_row_comm, ortho_col_comm, & + ortho_comm_id, ortho_parent_comm, me_blacs, np_blacs, ortho_cntx, world_cntx, do_distr_diag_inside_bgrp ) + IMPLICIT NONE + INTEGER, OPTIONAL, INTENT(OUT) :: nproc_ortho + INTEGER, OPTIONAL, INTENT(OUT) :: leg_ortho + INTEGER, OPTIONAL, INTENT(OUT) :: np_ortho(2) + INTEGER, OPTIONAL, INTENT(OUT) :: me_ortho(2) + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_comm + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_row_comm + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_col_comm + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_comm_id + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_parent_comm + INTEGER, OPTIONAL, INTENT(OUT) :: me_blacs + INTEGER, OPTIONAL, INTENT(OUT) :: np_blacs + INTEGER, OPTIONAL, INTENT(OUT) :: ortho_cntx + INTEGER, OPTIONAL, INTENT(OUT) :: world_cntx + LOGICAL, OPTIONAL, INTENT(OUT) :: do_distr_diag_inside_bgrp +END SUBROUTINE +END INTERFACE + + INTERFACE print_lambda + SUBROUTINE print_lambda_x( lambda, descla, n, nshow, nudx, ccc, ionode, iunit ) + USE descriptors, ONLY: la_descriptor + IMPLICIT NONE + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + REAL(DP), INTENT(IN) :: lambda(:,:,:), ccc + TYPE(la_descriptor), INTENT(IN) :: descla(:) + INTEGER, INTENT(IN) :: n, nshow, nudx + LOGICAL, INTENT(IN) :: ionode + INTEGER, INTENT(IN) :: iunit + END SUBROUTINE + END INTERFACE + +INTERFACE dspev_drv + SUBROUTINE dspev_drv_x( JOBZ, UPLO, N, AP, W, Z, LDZ ) + IMPLICIT NONE + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + CHARACTER :: JOBZ, UPLO + INTEGER :: LDZ, N + REAL(DP) :: AP( * ), W( * ), Z( LDZ, * ) + END SUBROUTINE + SUBROUTINE pdspev_drv_x ( jobz, ap, lda, w, z, ldz, nrl, n, nproc, mpime, comm ) + IMPLICIT NONE + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + CHARACTER, INTENT(IN) :: JOBZ + INTEGER, INTENT(IN) :: lda, ldz, nrl, n, nproc, mpime + INTEGER, INTENT(IN) :: comm + REAL(DP) :: ap( lda, * ), w( * ), z( ldz, * ) + END SUBROUTINE +END INTERFACE + +INTERFACE zhpev_drv + SUBROUTINE zhpev_drv_x( JOBZ, UPLO, N, AP, W, Z, LDZ ) + IMPLICIT NONE + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + CHARACTER :: JOBZ, UPLO + INTEGER :: LDZ, N + COMPLEX(DP) :: AP( * ), Z( LDZ, * ) + REAL(DP) :: W( * ) + END SUBROUTINE + SUBROUTINE pzhpev_drv_x( jobz, ap, lda, w, z, ldz, nrl, n, nproc, mpime, comm ) + IMPLICIT NONE + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + CHARACTER :: JOBZ + INTEGER, INTENT(IN) :: lda, ldz, nrl, n, nproc, mpime + INTEGER, INTENT(IN) :: comm + COMPLEX(DP) :: ap( lda, * ), z( ldz, * ) + REAL(DP) :: w( * ) + END SUBROUTINE +END INTERFACE + + INTERFACE distribute_lambda + SUBROUTINE distribute_lambda_x( lambda_repl, lambda_dist, desc ) + USE descriptors, ONLY : la_descriptor + IMPLICIT NONE + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + REAL(DP), INTENT(IN) :: lambda_repl(:,:) + REAL(DP), INTENT(OUT) :: lambda_dist(:,:) + TYPE(la_descriptor), INTENT(IN) :: desc + END SUBROUTINE distribute_lambda_x + END INTERFACE + + INTERFACE collect_lambda + SUBROUTINE collect_lambda_x( lambda_repl, lambda_dist, desc ) + USE descriptors, ONLY : la_descriptor + IMPLICIT NONE + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + REAL(DP), INTENT(OUT) :: lambda_repl(:,:) + REAL(DP), INTENT(IN) :: lambda_dist(:,:) + TYPE(la_descriptor), INTENT(IN) :: desc + END SUBROUTINE collect_lambda_x + END INTERFACE + + INTERFACE setval_lambda + SUBROUTINE setval_lambda_x( lambda_dist, i, j, val, desc ) + USE descriptors, ONLY : la_descriptor + IMPLICIT NONE + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + REAL(DP), INTENT(OUT) :: lambda_dist(:,:) + INTEGER, INTENT(IN) :: i, j + REAL(DP), INTENT(IN) :: val + TYPE(la_descriptor), INTENT(IN) :: desc + END SUBROUTINE setval_lambda_x + END INTERFACE + + INTERFACE distribute_zmat + SUBROUTINE distribute_zmat_x( zmat_repl, zmat_dist, desc ) + USE descriptors, ONLY : la_descriptor + IMPLICIT NONE + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + REAL(DP), INTENT(IN) :: zmat_repl(:,:) + REAL(DP), INTENT(OUT) :: zmat_dist(:,:) + TYPE(la_descriptor), INTENT(IN) :: desc + END SUBROUTINE distribute_zmat_x + END INTERFACE + + INTERFACE collect_zmat + SUBROUTINE collect_zmat_x( zmat_repl, zmat_dist, desc ) + USE descriptors, ONLY : la_descriptor + IMPLICIT NONE + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + REAL(DP), INTENT(OUT) :: zmat_repl(:,:) + REAL(DP), INTENT(IN) :: zmat_dist(:,:) + TYPE(la_descriptor), INTENT(IN) :: desc + END SUBROUTINE collect_zmat_x + END INTERFACE + diff --git a/LAXlib/make.depend b/LAXlib/make.depend index bbb167c9f..125a54dc9 100644 --- a/LAXlib/make.depend +++ b/LAXlib/make.depend @@ -3,20 +3,27 @@ cdiaghg.o : la_types.o cdiaghg.o : mp_diag.o cdiaghg.o : ptoolkit.o cdiaghg.o : zhpev_drv.o +cdiaghg.o : la_interface_mod.o dspev_drv.o : la_param.o dspev_drv.o : la_types.o la_error.o : la_param.o la_helper.o : mp_diag.o +la_helper.o : la_interface_mod.o +mp_diag.o : la_param.o +mp_diag.o : la_interface_mod.o ptoolkit.o : dspev_drv.o ptoolkit.o : la_param.o ptoolkit.o : la_types.o ptoolkit.o : zhpev_drv.o +ptoolkit.o : la_interface_mod.o rdiaghg.o : dspev_drv.o rdiaghg.o : la_param.o rdiaghg.o : la_types.o rdiaghg.o : mp_diag.o +rdiaghg.o : la_interface_mod.o test.o : dspev_drv.o test.o : la_param.o test.o : la_types.o +test.o : la_interface_mod.o transto.o : la_param.o zhpev_drv.o : la_param.o diff --git a/LAXlib/mp_diag.f90 b/LAXlib/mp_diag.f90 index dc8293f9e..3a491677e 100644 --- a/LAXlib/mp_diag.f90 +++ b/LAXlib/mp_diag.f90 @@ -41,261 +41,18 @@ MODULE mp_diag ! LOGICAL :: do_distr_diag_inside_bgrp = .true. ! whether the distributed diagoalization should be performed ! at the band group level (bgrp) or at its parent level + ! + LOGICAL, SAVE :: lax_is_initialized = .false. + ! CONTAINS ! - !---------------------------------------------------------------------------- - SUBROUTINE mp_start_diag( ndiag_, my_world_comm, parent_comm, do_distr_diag_inside_bgrp_ ) - !--------------------------------------------------------------------------- - ! - ! ... Ortho/diag/linear algebra group initialization - ! - IMPLICIT NONE - ! - INTEGER, INTENT(INOUT) :: ndiag_ ! (IN) input number of procs in the diag group, (OUT) actual number - INTEGER, INTENT(IN) :: my_world_comm ! parallel communicator of the "local" world - INTEGER, INTENT(IN) :: parent_comm ! parallel communicator inside which the distributed linear algebra group - ! communicators are created - LOGICAL, INTENT(IN) :: do_distr_diag_inside_bgrp_ ! comme son nom l'indique - ! - INTEGER :: mpime = 0 ! the global MPI task index (used in clocks) can be set with a laxlib_rank call - ! - INTEGER :: nproc_ortho_try - INTEGER :: parent_nproc ! nproc of the parent group - INTEGER :: world_nproc ! nproc of the world group - INTEGER :: my_parent_id ! id of the parent communicator - INTEGER :: nparent_comm ! mumber of parent communicators - INTEGER :: ierr = 0 - ! - world_nproc = laxlib_size( my_world_comm ) ! the global number of processors in world_comm - mpime = laxlib_rank( my_world_comm ) ! set the global MPI task index (used in clocks) - parent_nproc = laxlib_size( parent_comm )! the number of processors in the current parent communicator - my_parent_id = mpime / parent_nproc ! set the index of the current parent communicator - nparent_comm = world_nproc/parent_nproc ! number of paren communicators - - ! save input value inside the module - do_distr_diag_inside_bgrp = do_distr_diag_inside_bgrp_ - - ! -#if defined __SCALAPACK - np_blacs = laxlib_size( my_world_comm ) - me_blacs = laxlib_rank( my_world_comm ) - ! - ! define a 1D grid containing all MPI tasks of the global communicator - ! NOTE: world_cntx has the MPI communicator on entry and the BLACS context on exit - ! BLACS_GRIDINIT() will create a copy of the communicator, which can be - ! later retrieved using CALL BLACS_GET(world_cntx, 10, comm_copy) - ! - world_cntx = my_world_comm - CALL BLACS_GRIDINIT( world_cntx, 'Row', 1, np_blacs ) - ! -#endif - ! - IF( ndiag_ > 0 ) THEN - ! command-line argument -ndiag N or -northo N set to a value N - ! use the command line value ensuring that it falls in the proper range - nproc_ortho_try = MIN( ndiag_ , parent_nproc ) - ELSE - ! no command-line argument -ndiag N or -northo N is present - ! insert here custom architecture specific default definitions -#if defined __SCALAPACK - nproc_ortho_try = MAX( parent_nproc/2, 1 ) -#else - nproc_ortho_try = 1 -#endif - END IF - ! - ! the ortho group for parallel linear algebra is a sub-group of the pool, - ! then there are as many ortho groups as pools. - ! - CALL init_ortho_group( nproc_ortho_try, my_world_comm, parent_comm, nparent_comm, my_parent_id ) - ! - ! set the number of processors in the diag group to the actual number used - ! - ndiag_ = nproc_ortho - ! - RETURN - ! - END SUBROUTINE mp_start_diag - ! - ! - SUBROUTINE init_ortho_group( nproc_try_in, my_world_comm, comm_all, nparent_comm, my_parent_id ) - ! - IMPLICIT NONE - - INTEGER, INTENT(IN) :: nproc_try_in, comm_all - INTEGER, INTENT(IN) :: my_world_comm ! parallel communicator of the "local" world - INTEGER, INTENT(IN) :: nparent_comm - INTEGER, INTENT(IN) :: my_parent_id ! id of the parent communicator - - LOGICAL, SAVE :: first = .true. - INTEGER :: ierr, color, key, me_all, nproc_all, nproc_try - -#if defined __SCALAPACK - INTEGER, ALLOCATABLE :: blacsmap(:,:) - INTEGER, ALLOCATABLE :: ortho_cntx_pe(:) - INTEGER :: nprow, npcol, myrow, mycol, i, j, k - INTEGER, EXTERNAL :: BLACS_PNUM -#endif - -#if defined __MPI - - me_all = laxlib_rank( comm_all ) - ! - nproc_all = laxlib_size( comm_all ) - ! - nproc_try = MIN( nproc_try_in, nproc_all ) - nproc_try = MAX( nproc_try, 1 ) - - IF( .NOT. first ) CALL clean_ortho_group ( ) - - ! find the square closer (but lower) to nproc_try - ! - CALL grid2d_dims( 'S', nproc_try, np_ortho(1), np_ortho(2) ) - ! - ! now, and only now, it is possible to define the number of tasks - ! in the ortho group for parallel linear algebra - ! - nproc_ortho = np_ortho(1) * np_ortho(2) - ! - IF( nproc_all >= 4*nproc_ortho ) THEN - ! - ! here we choose a processor every 4, in order not to stress memory BW - ! on multi core procs, for which further performance enhancements are - ! possible using OpenMP BLAS inside regter/cegter/rdiaghg/cdiaghg - ! (to be implemented) - ! - color = 0 - IF( me_all < 4*nproc_ortho .AND. MOD( me_all, 4 ) == 0 ) color = 1 - ! - leg_ortho = 4 - ! - ELSE IF( nproc_all >= 2*nproc_ortho ) THEN - ! - ! here we choose a processor every 2, in order not to stress memory BW - ! - color = 0 - IF( me_all < 2*nproc_ortho .AND. MOD( me_all, 2 ) == 0 ) color = 1 - ! - leg_ortho = 2 - ! - ELSE - ! - ! here we choose the first processors - ! - color = 0 - IF( me_all < nproc_ortho ) color = 1 - ! - leg_ortho = 1 - ! - END IF - ! - key = me_all - ! - ! initialize the communicator for the new group by splitting the input communicator - ! - CALL laxlib_comm_split ( comm_all, color, key, ortho_comm ) - ! - ! and remember where it comes from - ! - ortho_parent_comm = comm_all - ! - ! Computes coordinates of the processors, in row maior order - ! - me_ortho1 = laxlib_rank( ortho_comm ) - ! - IF( me_all == 0 .AND. me_ortho1 /= 0 ) & - CALL lax_error__( " init_ortho_group ", " wrong root task in ortho group ", ierr ) - ! - if( color == 1 ) then - ortho_comm_id = 1 - CALL GRID2D_COORDS( 'R', me_ortho1, np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2) ) - CALL GRID2D_RANK( 'R', np_ortho(1), np_ortho(2), me_ortho(1), me_ortho(2), ierr ) - IF( ierr /= me_ortho1 ) & - CALL lax_error__( " init_ortho_group ", " wrong task coordinates in ortho group ", ierr ) - IF( me_ortho1*leg_ortho /= me_all ) & - CALL lax_error__( " init_ortho_group ", " wrong rank assignment in ortho group ", ierr ) - - CALL laxlib_comm_split( ortho_comm, me_ortho(2), me_ortho(1), ortho_col_comm) - CALL laxlib_comm_split( ortho_comm, me_ortho(1), me_ortho(2), ortho_row_comm) - - else - ortho_comm_id = 0 - me_ortho(1) = me_ortho1 - me_ortho(2) = me_ortho1 - endif -#if defined __SCALAPACK - ! - ! This part is used to eliminate the image dependency from ortho groups - ! SCALAPACK is now independent from whatever level of parallelization - ! is present on top of pool parallelization - ! - ALLOCATE( ortho_cntx_pe( nparent_comm ) ) - ALLOCATE( blacsmap( np_ortho(1), np_ortho(2) ) ) - - DO j = 1, nparent_comm - - CALL BLACS_GET(world_cntx, 10, ortho_cntx_pe( j ) ) ! retrieve communicator of world context - blacsmap = 0 - nprow = np_ortho(1) - npcol = np_ortho(2) - - IF( ( j == ( my_parent_id + 1 ) ) .and. ( ortho_comm_id > 0 ) ) THEN - - blacsmap( me_ortho(1) + 1, me_ortho(2) + 1 ) = BLACS_PNUM( world_cntx, 0, me_blacs ) - - END IF - - ! All MPI tasks defined in the global communicator take part in the definition of the BLACS grid - -#if defined(__MPI) - CALL MPI_ALLREDUCE( MPI_IN_PLACE, blacsmap, SIZE(blacsmap), MPI_INTEGER, MPI_SUM, my_world_comm, ierr ) - IF( ierr /= 0 ) & - CALL lax_error__( ' init_ortho_group ', ' problem in MPI_ALLREDUCE of blacsmap ', ierr ) -#endif - - - CALL BLACS_GRIDMAP( ortho_cntx_pe( j ), blacsmap, nprow, nprow, npcol ) - - CALL BLACS_GRIDINFO( ortho_cntx_pe( j ), nprow, npcol, myrow, mycol ) - - IF( ( j == ( my_parent_id + 1 ) ) .and. ( ortho_comm_id > 0 ) ) THEN - - IF( np_ortho(1) /= nprow ) & - CALL lax_error__( ' init_ortho_group ', ' problem with SCALAPACK, wrong no. of task rows ', 1 ) - IF( np_ortho(2) /= npcol ) & - CALL lax_error__( ' init_ortho_group ', ' problem with SCALAPACK, wrong no. of task columns ', 1 ) - IF( me_ortho(1) /= myrow ) & - CALL lax_error__( ' init_ortho_group ', ' problem with SCALAPACK, wrong task row ID ', 1 ) - IF( me_ortho(2) /= mycol ) & - CALL lax_error__( ' init_ortho_group ', ' problem with SCALAPACK, wrong task columns ID ', 1 ) - - ortho_cntx = ortho_cntx_pe( j ) - - END IF - - END DO - - DEALLOCATE( blacsmap ) - DEALLOCATE( ortho_cntx_pe ) - - -#endif - -#else - - ortho_comm_id = 1 - -#endif - - first = .false. - - RETURN - END SUBROUTINE init_ortho_group - ! - SUBROUTINE clean_ortho_group ( ) + SUBROUTINE laxlib_end_drv ( ) ! ! free resources associated to the communicator ! + IF( .not. lax_is_initialized ) & + CALL lax_error__( ' laxlib_end ', ' laxlib was not initialized ', 1 ) + ! CALL laxlib_comm_free( ortho_comm ) IF( ortho_comm_id > 0 ) THEN CALL laxlib_comm_free( ortho_col_comm ) @@ -306,7 +63,27 @@ CONTAINS ortho_cntx = -1 #endif ! - END SUBROUTINE clean_ortho_group + lax_is_initialized = .false. + ! + np_ortho(2) = 1 + me_ortho(2) = 0 + me_ortho1 = 0 + nproc_ortho = 1 + leg_ortho = 1 + ortho_comm = 0 + ortho_row_comm = 0 + ortho_col_comm = 0 + ortho_comm_id= 0 + ortho_parent_comm = 0 +#if defined __SCALAPACK + me_blacs = 0 + np_blacs = 1 +#endif + world_cntx = -1 ! BLACS context of all processor + ortho_cntx = -1 ! BLACS context for ortho_comm + do_distr_diag_inside_bgrp = .true. + ! + END SUBROUTINE laxlib_end_drv ! !------------------------------------------------------------------------------! FUNCTION laxlib_rank( comm ) diff --git a/LAXlib/ptoolkit.f90 b/LAXlib/ptoolkit.f90 index 741eb4285..c70ec383d 100644 --- a/LAXlib/ptoolkit.f90 +++ b/LAXlib/ptoolkit.f90 @@ -5,31 +5,15 @@ ! in the root directory of the present distribution, ! or http://www.gnu.org/copyleft/gpl.txt . ! -!==----------------------------------------------==! - MODULE parallel_toolkit -!==----------------------------------------------==! - - USE la_param - IMPLICIT NONE - SAVE - - PRIVATE - - PUBLIC :: rep_matmul_drv - PUBLIC :: zrep_matmul_drv - PUBLIC :: dsqmdst, dsqmcll, dsqmred, dsqmsym - PUBLIC :: zsqmdst, zsqmcll, zsqmred, zsqmher - - CONTAINS - ! --------------------------------------------------------------------------------- -SUBROUTINE dsqmdst( n, ar, ldar, a, lda, desc ) +SUBROUTINE laxlib_dsqmdst( n, ar, ldar, a, lda, desc ) ! ! Double precision SQuare Matrix DiSTribution ! This sub. take a replicated square matrix "ar" and distribute it ! across processors as described by descriptor "desc" ! + USE la_param USE descriptors ! implicit none @@ -76,15 +60,16 @@ SUBROUTINE dsqmdst( n, ar, ldar, a, lda, desc ) RETURN -END SUBROUTINE dsqmdst +END SUBROUTINE laxlib_dsqmdst -SUBROUTINE zsqmdst( n, ar, ldar, a, lda, desc ) +SUBROUTINE laxlib_zsqmdst( n, ar, ldar, a, lda, desc ) ! ! double complex (Z) SQuare Matrix DiSTribution ! This sub. take a replicated square matrix "ar" and distribute it ! across processors as described by descriptor "desc" ! + USE la_param USE descriptors ! implicit none @@ -131,17 +116,18 @@ SUBROUTINE zsqmdst( n, ar, ldar, a, lda, desc ) RETURN -END SUBROUTINE zsqmdst +END SUBROUTINE laxlib_zsqmdst ! --------------------------------------------------------------------------------- -SUBROUTINE dsqmcll( n, a, lda, ar, ldar, desc, comm ) +SUBROUTINE laxlib_dsqmcll( n, a, lda, ar, ldar, desc, comm ) ! ! Double precision SQuare Matrix CoLLect ! This sub. take a distributed square matrix "a" and collect ! the block assigned to processors into a replicated matrix "ar", ! matrix is distributed as described by descriptor desc ! + USE la_param USE descriptors ! implicit none @@ -223,16 +209,17 @@ SUBROUTINE dsqmcll( n, a, lda, ar, ldar, desc, comm ) #endif RETURN -END SUBROUTINE dsqmcll +END SUBROUTINE laxlib_dsqmcll -SUBROUTINE zsqmcll( n, a, lda, ar, ldar, desc, comm ) +SUBROUTINE laxlib_zsqmcll( n, a, lda, ar, ldar, desc, comm ) ! ! double complex (Z) SQuare Matrix CoLLect ! This sub. take a distributed square matrix "a" and collect ! the block assigned to processors into a replicated matrix "ar", ! matrix is distributed as described by descriptor desc ! + USE la_param USE descriptors ! implicit none @@ -314,15 +301,16 @@ SUBROUTINE zsqmcll( n, a, lda, ar, ldar, desc, comm ) #endif RETURN -END SUBROUTINE zsqmcll +END SUBROUTINE laxlib_zsqmcll ! --------------------------------------------------------------------------------- -SUBROUTINE dsqmwpb( n, a, lda, desc ) +SUBROUTINE laxlib_dsqmwpb( n, a, lda, desc ) ! ! Double precision SQuare Matrix WiPe Border subroutine ! + USE la_param USE descriptors ! IMPLICIT NONE @@ -346,14 +334,15 @@ SUBROUTINE dsqmwpb( n, a, lda, desc ) END DO ! RETURN -END SUBROUTINE dsqmwpb +END SUBROUTINE laxlib_dsqmwpb ! --------------------------------------------------------------------------------- -SUBROUTINE dsqmsym( n, a, lda, desc ) +SUBROUTINE laxlib_dsqmsym( n, a, lda, desc ) ! ! Double precision SQuare Matrix SYMmetrization ! + USE la_param USE descriptors ! IMPLICIT NONE @@ -452,13 +441,14 @@ SUBROUTINE dsqmsym( n, a, lda, desc ) #endif RETURN -END SUBROUTINE dsqmsym +END SUBROUTINE laxlib_dsqmsym -SUBROUTINE zsqmher( n, a, lda, desc ) +SUBROUTINE laxlib_zsqmher( n, a, lda, desc ) ! ! double complex (Z) SQuare Matrix HERmitianize ! + USE la_param USE descriptors ! IMPLICIT NONE @@ -598,13 +588,13 @@ SUBROUTINE zsqmher( n, a, lda, desc ) #endif RETURN -END SUBROUTINE zsqmher +END SUBROUTINE laxlib_zsqmher ! --------------------------------------------------------------------------------- -SUBROUTINE dsqmred( na, a, lda, desca, nb, b, ldb, descb ) +SUBROUTINE laxlib_dsqmred( na, a, lda, desca, nb, b, ldb, descb ) ! ! Double precision SQuare Matrix REDistribution ! @@ -616,6 +606,7 @@ SUBROUTINE dsqmred( na, a, lda, desca, nb, b, ldb, descb ) ! If you want to read, get prepared for an headache! ! Written struggling by Carlo Cavazzoni. ! + USE la_param USE descriptors ! IMPLICIT NONE @@ -989,11 +980,11 @@ SUBROUTINE dsqmred( na, a, lda, desca, nb, b, ldb, descb ) #endif RETURN -END SUBROUTINE dsqmred +END SUBROUTINE laxlib_dsqmred -SUBROUTINE zsqmred( na, a, lda, desca, nb, b, ldb, descb ) +SUBROUTINE laxlib_zsqmred( na, a, lda, desca, nb, b, ldb, descb ) ! ! double complex (Z) SQuare Matrix REDistribution ! @@ -1005,6 +996,7 @@ SUBROUTINE zsqmred( na, a, lda, desca, nb, b, ldb, descb ) ! If you want to read, get prepared for an headache! ! Written struggling by Carlo Cavazzoni. ! + USE la_param USE descriptors ! IMPLICIT NONE @@ -1358,7 +1350,7 @@ SUBROUTINE zsqmred( na, a, lda, desca, nb, b, ldb, descb ) #endif RETURN -END SUBROUTINE zsqmred +END SUBROUTINE laxlib_zsqmred @@ -1370,6 +1362,7 @@ SUBROUTINE rep_matmul_drv( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, ! Parallel matrix multiplication with replicated matrix ! written by Carlo Cavazzoni ! + USE la_param implicit none ! CHARACTER(LEN=1), INTENT(IN) :: transa, transb @@ -1534,6 +1527,7 @@ SUBROUTINE zrep_matmul_drv( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA ! Parallel matrix multiplication with replicated matrix ! written by Carlo Cavazzoni ! + USE la_param implicit none ! CHARACTER(LEN=1), INTENT(IN) :: transa, transb @@ -1692,11 +1686,6 @@ SUBROUTINE zrep_matmul_drv( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA END SUBROUTINE zrep_matmul_drv - -!==----------------------------------------------==! -END MODULE parallel_toolkit -!==----------------------------------------------==! - ! ! !=----------------------------------------------------------------------------=! @@ -4273,7 +4262,7 @@ END SUBROUTINE qe_pdtrtri SUBROUTINE qe_pdsyevd( tv, n, desc, hh, ldh, e ) USE descriptors USE la_param - USE dspev_module, ONLY : pdspev_drv + USE la_interface_mod, ONLY: dspev_drv IMPLICIT NONE LOGICAL, INTENT(IN) :: tv ! if tv is true compute eigenvalues and eigenvectors (not used) @@ -4303,7 +4292,7 @@ SUBROUTINE qe_pdsyevd( tv, n, desc, hh, ldh, e ) ! matrix "hh" is block distributed, matrix diag is cyclic distributed CALL blk2cyc_redist( n, diag, nrlx, n, hh, ldh, ldh, desc ) ! - CALL pdspev_drv( jobv, diag, nrlx, e, vv, nrlx, nrl, n, & + CALL dspev_drv( jobv, diag, nrlx, e, vv, nrlx, nrl, n, & desc%npc * desc%npr, desc%mype, desc%comm ) ! IF( tv ) CALL cyc2blk_redist( n, vv, nrlx, n, hh, ldh, ldh, desc ) @@ -4319,7 +4308,7 @@ END SUBROUTINE SUBROUTINE qe_pzheevd( tv, n, desc, hh, ldh, e ) USE descriptors USE la_param - USE zhpev_module, ONLY : pzhpev_drv + USE la_interface_mod, ONLY: zhpev_drv IMPLICIT NONE LOGICAL, INTENT(IN) :: tv ! if tv is true compute eigenvalues and eigenvectors (not used) @@ -4347,7 +4336,7 @@ SUBROUTINE qe_pzheevd( tv, n, desc, hh, ldh, e ) CALL blk2cyc_zredist( n, diag, nrlx, n, hh, ldh, ldh, desc ) ! - CALL pzhpev_drv( jobv, diag, nrlx, e, vv, nrlx, nrl, n, & + CALL zhpev_drv( jobv, diag, nrlx, e, vv, nrlx, nrl, n, & desc%npc * desc%npr, desc%mype, desc%comm ) ! if( tv ) CALL cyc2blk_zredist( n, vv, nrlx, n, hh, ldh, ldh, desc ) @@ -4529,3 +4518,120 @@ SUBROUTINE sqr_zsetmat( what, n, alpha, a, lda, desc ) ! RETURN END SUBROUTINE sqr_zsetmat + +!------------------------------------------------------------------------ + SUBROUTINE distribute_lambda_x( lambda_repl, lambda_dist, desc ) +!------------------------------------------------------------------------ + USE la_param + USE descriptors + REAL(DP), INTENT(IN) :: lambda_repl(:,:) + REAL(DP), INTENT(OUT) :: lambda_dist(:,:) + TYPE(la_descriptor), INTENT(IN) :: desc + INTEGER :: i, j, ic, ir + IF( desc%active_node > 0 ) THEN + ir = desc%ir + ic = desc%ic + DO j = 1, desc%nc + DO i = 1, desc%nr + lambda_dist( i, j ) = lambda_repl( i + ir - 1, j + ic - 1 ) + END DO + END DO + END IF + RETURN + END SUBROUTINE distribute_lambda_x + ! +!------------------------------------------------------------------------ + SUBROUTINE collect_lambda_x( lambda_repl, lambda_dist, desc ) +!------------------------------------------------------------------------ + USE la_param + USE mp_diag, ONLY: ortho_parent_comm + USE descriptors + REAL(DP), INTENT(OUT) :: lambda_repl(:,:) + REAL(DP), INTENT(IN) :: lambda_dist(:,:) + TYPE(la_descriptor), INTENT(IN) :: desc + INTEGER :: i, j, ic, ir, ierr + lambda_repl = 0.0d0 + IF( desc%active_node > 0 ) THEN + ir = desc%ir + ic = desc%ic + DO j = 1, desc%nc + DO i = 1, desc%nr + lambda_repl( i + ir - 1, j + ic - 1 ) = lambda_dist( i, j ) + END DO + END DO + END IF + CALL MPI_ALLREDUCE( MPI_IN_PLACE, lambda_repl, SIZE(lambda_repl), MPI_DOUBLE_PRECISION, & + MPI_SUM, ortho_parent_comm, ierr ) + RETURN + END SUBROUTINE collect_lambda_x + +!------------------------------------------------------------------------ + SUBROUTINE collect_zmat_x( zmat_repl, zmat_dist, desc ) +!------------------------------------------------------------------------ + USE la_param + USE mp_diag, ONLY: ortho_parent_comm + USE descriptors + REAL(DP), INTENT(OUT) :: zmat_repl(:,:) + REAL(DP), INTENT(IN) :: zmat_dist(:,:) + TYPE(la_descriptor), INTENT(IN) :: desc + INTEGER :: i, ii, j, me, np, nrl + zmat_repl = 0.0d0 + me = desc%mype + np = desc%npc * desc%npr + nrl = desc%nrl + IF( desc%active_node > 0 ) THEN + DO j = 1, desc%n + ii = me + 1 + DO i = 1, nrl + zmat_repl( ii, j ) = zmat_dist( i, j ) + ii = ii + np + END DO + END DO + END IF + CALL MPI_ALLREDUCE( MPI_IN_PLACE, zmat_repl, SIZE(zmat_repl), MPI_DOUBLE_PRECISION, & + MPI_SUM, ortho_parent_comm, ierr ) + RETURN + END SUBROUTINE collect_zmat_x + +!------------------------------------------------------------------------ + SUBROUTINE setval_lambda_x( lambda_dist, i, j, val, desc ) +!------------------------------------------------------------------------ + USE la_param + USE descriptors + REAL(DP), INTENT(OUT) :: lambda_dist(:,:) + INTEGER, INTENT(IN) :: i, j + REAL(DP), INTENT(IN) :: val + TYPE(la_descriptor), INTENT(IN) :: desc + IF( desc%active_node > 0 ) THEN + IF( ( i >= desc%ir ) .AND. ( i - desc%ir + 1 <= desc%nr ) ) THEN + IF( ( j >= desc%ic ) .AND. ( j - desc%ic + 1 <= desc%nc ) ) THEN + lambda_dist( i - desc%ir + 1, j - desc%ic + 1 ) = val + END IF + END IF + END IF + RETURN + END SUBROUTINE setval_lambda_x + +!------------------------------------------------------------------------ + SUBROUTINE distribute_zmat_x( zmat_repl, zmat_dist, desc ) +!------------------------------------------------------------------------ + USE la_param + USE descriptors + REAL(DP), INTENT(IN) :: zmat_repl(:,:) + REAL(DP), INTENT(OUT) :: zmat_dist(:,:) + TYPE(la_descriptor), INTENT(IN) :: desc + INTEGER :: i, ii, j, me, np + me = desc%mype + np = desc%npc * desc%npr + IF( desc%active_node > 0 ) THEN + DO j = 1, desc%n + ii = me + 1 + DO i = 1, desc%nrl + zmat_dist( i, j ) = zmat_repl( ii, j ) + ii = ii + np + END DO + END DO + END IF + RETURN + END SUBROUTINE distribute_zmat_x + ! diff --git a/LAXlib/rdiaghg.f90 b/LAXlib/rdiaghg.f90 index 30212351e..ad2a44b03 100644 --- a/LAXlib/rdiaghg.f90 +++ b/LAXlib/rdiaghg.f90 @@ -7,7 +7,7 @@ ! ! !---------------------------------------------------------------------------- -SUBROUTINE rdiaghg( n, m, h, s, ldh, e, v, me_bgrp, root_bgrp, intra_bgrp_comm ) +SUBROUTINE laxlib_rdiaghg( n, m, h, s, ldh, e, v, me_bgrp, root_bgrp, intra_bgrp_comm ) !---------------------------------------------------------------------------- ! ... Hv=eSv, with H symmetric matrix, S overlap matrix. ! ... On output both matrix are unchanged @@ -180,10 +180,10 @@ SUBROUTINE rdiaghg( n, m, h, s, ldh, e, v, me_bgrp, root_bgrp, intra_bgrp_comm ) ! RETURN ! -END SUBROUTINE rdiaghg +END SUBROUTINE laxlib_rdiaghg ! !---------------------------------------------------------------------------- -SUBROUTINE prdiaghg( n, h, s, ldh, e, v, desc ) +SUBROUTINE laxlib_prdiaghg( n, h, s, ldh, e, v, desc ) !---------------------------------------------------------------------------- ! ! ... calculates eigenvalues and eigenvectors of the generalized problem @@ -350,4 +350,4 @@ SUBROUTINE prdiaghg( n, h, s, ldh, e, v, desc ) ! RETURN ! -END SUBROUTINE prdiaghg +END SUBROUTINE laxlib_prdiaghg diff --git a/LAXlib/zhpev_drv.f90 b/LAXlib/zhpev_drv.f90 index e0c13eab9..420a83f26 100644 --- a/LAXlib/zhpev_drv.f90 +++ b/LAXlib/zhpev_drv.f90 @@ -11,14 +11,6 @@ MODULE zhpev_module IMPLICIT NONE SAVE - PRIVATE - - PUBLIC :: pzhpev_drv, zhpev_drv -#if defined __SCALAPACK - PUBLIC :: pzheevd_drv -#endif - - CONTAINS ! !------------------------------------------------------------------------- @@ -1409,63 +1401,6 @@ CONTAINS ! END SUBROUTINE pzsteqr -!==----------------------------------------------==! - - SUBROUTINE zhpev_drv( JOBZ, UPLO, N, AP, W, Z, LDZ ) - - - IMPLICIT NONE - - CHARACTER :: JOBZ, UPLO - INTEGER :: IOPT, INFO, LDZ, N - COMPLEX(DP) :: AP( * ), Z( LDZ, * ) - REAL(DP) :: W( * ) - REAL(DP), ALLOCATABLE :: RWORK(:) - COMPLEX(DP), ALLOCATABLE :: ZWORK(:) - - ALLOCATE( rwork( MAX(1, 3*n-2) ), zwork( MAX(1, 2*n-1)) ) - CALL ZHPEV(jobz, uplo, n, ap, w, z, ldz, zwork, rwork, INFO) - DEALLOCATE( rwork, zwork ) - IF( INFO .NE. 0 ) THEN - CALL lax_error__( ' dspev_drv ', ' diagonalization failed ',INFO ) - END IF - - RETURN - END SUBROUTINE zhpev_drv - -!==----------------------------------------------==! - - SUBROUTINE pzhpev_drv( jobz, ap, lda, w, z, ldz, & - nrl, n, nproc, mpime, comm ) - - - IMPLICIT NONE - CHARACTER :: JOBZ - INTEGER, INTENT(IN) :: lda, ldz, nrl, n, nproc, mpime - INTEGER, INTENT(IN) :: comm - COMPLEX(DP) :: ap( lda, * ), z( ldz, * ) - REAL(DP) :: w( * ) - REAL(DP), ALLOCATABLE :: rwork( : ) - COMPLEX(DP), ALLOCATABLE :: cwork( : ) - ! - ALLOCATE( rwork( n ) ) - ALLOCATE( cwork( n ) ) - ! - CALL pzhptrd( n, nrl, ap, lda, w, rwork, cwork, nproc, mpime, comm) - - IF( jobz == 'V' .OR. jobz == 'v' ) THEN - CALL pzupgtr( n, nrl, ap, lda, cwork, z, ldz, nproc, mpime, comm) - END IF - - CALL pzsteqr( jobz, n, nrl, w, rwork, z, ldz, nproc, mpime, comm) - - DEALLOCATE( cwork ) - DEALLOCATE( rwork ) - - RETURN - END SUBROUTINE pzhpev_drv - - !==----------------------------------------------==! @@ -1580,3 +1515,61 @@ CONTAINS #endif END MODULE zhpev_module + + +!==----------------------------------------------==! + + + SUBROUTINE zhpev_drv_x( JOBZ, UPLO, N, AP, W, Z, LDZ ) + + use zhpev_module + IMPLICIT NONE + + CHARACTER :: JOBZ, UPLO + INTEGER :: IOPT, INFO, LDZ, N + COMPLEX(DP) :: AP( * ), Z( LDZ, * ) + REAL(DP) :: W( * ) + REAL(DP), ALLOCATABLE :: RWORK(:) + COMPLEX(DP), ALLOCATABLE :: ZWORK(:) + + ALLOCATE( rwork( MAX(1, 3*n-2) ), zwork( MAX(1, 2*n-1)) ) + CALL ZHPEV(jobz, uplo, n, ap, w, z, ldz, zwork, rwork, INFO) + DEALLOCATE( rwork, zwork ) + IF( INFO .NE. 0 ) THEN + CALL lax_error__( ' zhpev_drv ', ' diagonalization failed ',INFO ) + END IF + + RETURN + END SUBROUTINE + +!==----------------------------------------------==! + + SUBROUTINE pzhpev_drv_x( jobz, ap, lda, w, z, ldz, nrl, n, nproc, mpime, comm ) + + use zhpev_module + + IMPLICIT NONE + CHARACTER :: JOBZ + INTEGER, INTENT(IN) :: lda, ldz, nrl, n, nproc, mpime + INTEGER, INTENT(IN) :: comm + COMPLEX(DP) :: ap( lda, * ), z( ldz, * ) + REAL(DP) :: w( * ) + REAL(DP), ALLOCATABLE :: rwork( : ) + COMPLEX(DP), ALLOCATABLE :: cwork( : ) + ! + ALLOCATE( rwork( n ) ) + ALLOCATE( cwork( n ) ) + ! + CALL pzhptrd( n, nrl, ap, lda, w, rwork, cwork, nproc, mpime, comm) + + IF( jobz == 'V' .OR. jobz == 'v' ) THEN + CALL pzupgtr( n, nrl, ap, lda, cwork, z, ldz, nproc, mpime, comm) + END IF + + CALL pzsteqr( jobz, n, nrl, w, rwork, z, ldz, nproc, mpime, comm) + + DEALLOCATE( cwork ) + DEALLOCATE( rwork ) + + RETURN + END SUBROUTINE diff --git a/NEB/src/neb.f90 b/NEB/src/neb.f90 index c36934735..d6b668537 100644 --- a/NEB/src/neb.f90 +++ b/NEB/src/neb.f90 @@ -19,7 +19,6 @@ PROGRAM neb USE mp_world, ONLY : world_comm, mpime, root USE mp_pools, ONLY : intra_pool_comm USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm - USE mp_diag, ONLY : mp_start_diag USE read_input, ONLY : read_input_file USE command_line_options, ONLY : input_file_, ndiag_ ! @@ -36,6 +35,8 @@ PROGRAM neb ! IMPLICIT NONE ! + include 'laxlib.fh' + ! CHARACTER(len=256) :: engine_prefix, parsing_file_name INTEGER :: unit_tmp, i, iimage INTEGER, EXTERNAL :: find_free_unit, input_images_getarg @@ -43,7 +44,7 @@ PROGRAM neb ! ! CALL mp_startup ( start_images=.true. ) - CALL mp_start_diag ( ndiag_, world_comm, intra_bgrp_comm, & + CALL laxlib_start ( ndiag_, world_comm, intra_bgrp_comm, & do_distr_diag_inside_bgrp_ = .true. ) CALL set_mpi_comm_4_solvers( intra_pool_comm, intra_bgrp_comm, & inter_bgrp_comm ) @@ -111,7 +112,7 @@ PROGRAM neb ! CALL search_mep() ! - CALL laxlib_free_ortho_group() + CALL laxlib_end() CALL stop_run_path( conv_path ) ! STOP diff --git a/PHonon/Gamma/dyndiar.f90 b/PHonon/Gamma/dyndiar.f90 index 35dd3f906..c0ff05c49 100644 --- a/PHonon/Gamma/dyndiar.f90 +++ b/PHonon/Gamma/dyndiar.f90 @@ -18,6 +18,7 @@ SUBROUTINE dyndiar (dyn,nat3,nmodes,u,nat,ityp,amass,w2,dynout) USE io_global, ONLY : stdout USE mp_bands, ONLY : me_bgrp, root_bgrp, intra_bgrp_comm IMPLICIT NONE + include 'laxlib.fh' INTEGER :: nmodes, nat3, nat,ityp(nat), iudyn real(DP):: dyn(nat3,nmodes), u(nat3,nmodes), amass(*) real(DP):: dynout(nat3,nmodes), w2(nat3) @@ -77,7 +78,7 @@ SUBROUTINE dyndiar (dyn,nat3,nmodes,u,nat,ityp,amass,w2,dynout) ! Note that z are eigendisplacements in the base of input ! modes u and that they are normalized as =I ! - CALL rdiaghg (nat3, nmodes, dynout, m, nat3, w2, z, me_bgrp, root_bgrp, intra_bgrp_comm) + CALL diaghg (nat3, nmodes, dynout, m, nat3, w2, z, me_bgrp, root_bgrp, intra_bgrp_comm) ! ! write frequencies ! diff --git a/PHonon/PH/phonon.f90 b/PHonon/PH/phonon.f90 index 06d4f2121..d63a05326 100644 --- a/PHonon/PH/phonon.f90 +++ b/PHonon/PH/phonon.f90 @@ -48,7 +48,6 @@ PROGRAM phonon USE mp_world, ONLY : world_comm USE mp_pools, ONLY : intra_pool_comm USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm - USE mp_diag, ONLY : mp_start_diag USE command_line_options, ONLY : input_file_, ndiag_ ! YAMBO > USE YAMBO, ONLY : elph_yambo,dvscf_yambo @@ -56,6 +55,8 @@ PROGRAM phonon ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER :: iq, ierr LOGICAL :: do_band, do_iq, setup_pw CHARACTER (LEN=9) :: code = 'PHONON' @@ -64,7 +65,7 @@ PROGRAM phonon ! Initialize MPI, clocks, print initial messages ! CALL mp_startup ( start_images=.true. ) - CALL mp_start_diag ( ndiag_, world_comm, intra_bgrp_comm, & + CALL laxlib_start ( ndiag_, world_comm, intra_bgrp_comm, & do_distr_diag_inside_bgrp_ = .true. ) CALL set_mpi_comm_4_solvers( intra_pool_comm, intra_bgrp_comm, & inter_bgrp_comm ) @@ -101,7 +102,7 @@ PROGRAM phonon ENDIF ! YAMBO < ! - CALL laxlib_free_ortho_group() + CALL laxlib_end() CALL stop_smoothly_ph( .TRUE. ) ! STOP diff --git a/PP/src/projwfc.f90 b/PP/src/projwfc.f90 index f68c3fe4c..a22518bdb 100644 --- a/PP/src/projwfc.f90 +++ b/PP/src/projwfc.f90 @@ -30,7 +30,6 @@ PROGRAM do_projwfc USE mp_images, ONLY : intra_image_comm USE mp_pools, ONLY : intra_pool_comm USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm - USE mp_diag, ONLY : mp_start_diag, nproc_ortho USE command_line_options, ONLY : ndiag_ USE spin_orb, ONLY : lforcet USE wvfct, ONLY : et, nbnd @@ -45,6 +44,8 @@ PROGRAM do_projwfc ! IMPLICIT NONE ! + include 'laxlib.fh' + ! CHARACTER(LEN=256), EXTERNAL :: trimcheck ! CHARACTER (len=256) :: filpdos, filproj, outdir @@ -56,6 +57,7 @@ PROGRAM do_projwfc INTEGER, PARAMETER :: N_MAX_BOXES = 999 INTEGER :: n_proj_boxes, irmin(3,N_MAX_BOXES), irmax(3,N_MAX_BOXES) LOGICAL :: lgww !if .true. use GW QP energies from file bands.dat + INTEGER :: nproc_ortho ! NAMELIST / projwfc / outdir, prefix, ngauss, degauss, lsym, & Emin, Emax, DeltaE, filpdos, filproj, lgww, & @@ -65,10 +67,11 @@ PROGRAM do_projwfc ! initialise environment ! CALL mp_startup ( ) - CALL mp_start_diag ( ndiag_, world_comm, intra_bgrp_comm, & + CALL laxlib_start ( ndiag_, world_comm, intra_bgrp_comm, & do_distr_diag_inside_bgrp_ = .true. ) CALL set_mpi_comm_4_solvers( intra_pool_comm, intra_bgrp_comm, & inter_bgrp_comm ) + CALL laxlib_getval(nproc_ortho=nproc_ortho) ! CALL environment_start ( 'PROJWFC' ) ! @@ -241,7 +244,7 @@ PROGRAM do_projwfc ENDIF ENDIF ! - CALL laxlib_free_ortho_group() + CALL laxlib_end() CALL environment_end ( 'PROJWFC' ) ! CALL stop_pp @@ -1798,16 +1801,14 @@ SUBROUTINE pprojwave( filproj, lsym, lwrite_ovp, lbinary ) USE spin_orb, ONLY: lspinorb USE mp, ONLY: mp_bcast USE mp_pools, ONLY: root_pool, intra_pool_comm - USE mp_diag, ONLY: ortho_comm, np_ortho, me_ortho, ortho_comm_id, & - leg_ortho, ortho_cntx USE wavefunctions, ONLY: evc - USE parallel_toolkit, ONLY : zsqmred, zsqmher, zsqmdst, zsqmcll, dsqmsym - USE zhpev_module, ONLY : pzhpev_drv, zhpev_drv USE descriptors, ONLY : la_descriptor, descla_init USE projections ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER, EXTERNAL :: find_free_unit ! COMPLEX(DP), PARAMETER :: zero = ( 0.0d0, 0.0d0 ) @@ -1845,6 +1846,7 @@ SUBROUTINE pprojwave( filproj, lsym, lwrite_ovp, lbinary ) ! flag to distinguish procs involved in linear algebra INTEGER, ALLOCATABLE :: notcnv_ip( : ) INTEGER, ALLOCATABLE :: ic_notcnv( : ) + INTEGER :: ortho_comm, np_ortho(2), me_ortho(2), ortho_comm_id, leg_ortho, ortho_cntx ! ! INTERFACE @@ -1868,6 +1870,8 @@ SUBROUTINE pprojwave( filproj, lsym, lwrite_ovp, lbinary ) auxname = TRIM(tmp_dir) // TRIM(ADJUSTL(prefix)) // '.AUX' // TRIM(nd_nmbr) OPEN( unit=iunaux, file=trim(auxname), status='unknown', form='unformatted') ! + CALL laxlib_getval( np_ortho = np_ortho, me_ortho = me_ortho, ortho_comm = ortho_comm, & + leg_ortho = leg_ortho, ortho_comm_id = ortho_comm_id, ortho_cntx = ortho_cntx ) ! ALLOCATE( ic_notcnv( np_ortho(2) ) ) ALLOCATE( notcnv_ip( np_ortho(2) ) ) @@ -1978,8 +1982,7 @@ SUBROUTINE pprojwave( filproj, lsym, lwrite_ovp, lbinary ) ! CALL blk2cyc_zredist( natomwfc, diag, nrlx, natomwfc, overlap_d, nx, nx, desc ) ! - CALL pzhpev_drv( 'V', diag, nrlx, e, vv, nrlx, nrl, natomwfc, & - desc%npc * desc%npr, desc%mype, desc%comm ) + CALL zhpev_drv( 'V', diag, nrlx, e, vv, nrlx, nrl, natomwfc, desc%npc * desc%npr, desc%mype, desc%comm ) ! CALL cyc2blk_zredist( natomwfc, vv, nrlx, natomwfc, work_d, nx, nx, desc ) ! @@ -2004,7 +2007,7 @@ SUBROUTINE pprojwave( filproj, lsym, lwrite_ovp, lbinary ) ENDDO ENDDO CALL sqr_zmm_cannon( 'N', 'C', natomwfc, ONE, e_work_d, nx, work_d, nx, ZERO, overlap_d, nx, desc ) - CALL zsqmher( natomwfc, overlap_d, nx, desc ) + CALL laxlib_zsqmher( natomwfc, overlap_d, nx, desc ) DEALLOCATE( e_work_d ) ENDIF ! @@ -2397,7 +2400,7 @@ CONTAINS ! ENDDO ! - CALL zsqmher( n, dm, nx, desc ) + CALL laxlib_zsqmher( n, dm, nx, desc ) ! DEALLOCATE( work ) ! @@ -2467,7 +2470,7 @@ CONTAINS ! ENDDO ! - CALL dsqmsym( n, dm, nx, desc ) + CALL laxlib_dsqmsym( n, dm, nx, desc ) ! DEALLOCATE( work ) ! diff --git a/PW/src/memory_report.f90 b/PW/src/memory_report.f90 index fae8cb092..0783665be 100644 --- a/PW/src/memory_report.f90 +++ b/PW/src/memory_report.f90 @@ -53,13 +53,14 @@ SUBROUTINE memory_report() lxdm, smallmem, tqr, iverbosity USE force_mod, ONLY : lforce, lstres USE ions_base, ONLY : nat, ntyp => nsp, ityp - USE mp_diag, ONLY : np_ortho USE mp_bands, ONLY : nproc_bgrp, nbgrp USE mp_pools, ONLY : npool USE mp_images, ONLY : nproc_image ! IMPLICIT NONE ! + include 'laxlib.fh' + ! INTEGER, PARAMETER :: MB=1024*1024 INTEGER, PARAMETER :: GB=1024*MB INTEGER :: g_fact, mix_type_size, scf_type_size @@ -72,6 +73,7 @@ SUBROUTINE memory_report() ! REAL(dp), PARAMETER :: complex_size=16_dp, real_size=8_dp, int_size=4_dp REAL(dp) :: ram, ram_, ram1, ram2, maxram, totram, add + INTEGER :: np_ortho(2) ! IF ( gamma_only) THEN g_fact = 2 ! use half plane waves or G-vectors @@ -263,6 +265,8 @@ SUBROUTINE memory_report() ! hpsi, spsi, hr and sr matrices, scalar products ! nbnd_l is the estimated dimension of distributed matrices ! + CALL laxlib_getval( np_ortho = np_ortho ) + ! nbnd_l = nbndx/np_ortho(1) ram1 = complex_size/g_fact * ( 3*nbnd_l**2 ) ! hr,sr,vr/hc,sc,vc IF ( iverbosity > 0 ) WRITE( stdout, 1013 ) 'h,s,v(r/c)', ram1/MB diff --git a/PW/src/pwscf.f90 b/PW/src/pwscf.f90 index 3f3386f84..37cd9e8c5 100644 --- a/PW/src/pwscf.f90 +++ b/PW/src/pwscf.f90 @@ -39,12 +39,14 @@ PROGRAM pwscf USE mp_world, ONLY : world_comm USE mp_pools, ONLY : intra_pool_comm USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm - USE mp_diag, ONLY : mp_start_diag USE mp_exx, ONLY : negrp USE read_input, ONLY : read_input_file USE command_line_options, ONLY: input_file_, command_line, ndiag_ ! IMPLICIT NONE + ! + include 'laxlib.fh' + ! CHARACTER(len=256) :: srvaddress !! Get the address of the server CHARACTER(len=256) :: get_server_address @@ -62,12 +64,12 @@ PROGRAM pwscf ! used to be the default : one diag group per bgrp ! with strict hierarchy: POOL > BAND > DIAG ! if using exx groups from mp_exx still use this diag method - CALL mp_start_diag ( ndiag_, world_comm, intra_bgrp_comm, & + CALL laxlib_start ( ndiag_, world_comm, intra_bgrp_comm, & do_distr_diag_inside_bgrp_ = .true. ) ELSE ! new default: one diag group per pool ( individual k-point level ) ! with band group and diag group both being children of POOL comm - CALL mp_start_diag ( ndiag_, world_comm, intra_pool_comm, & + CALL laxlib_start ( ndiag_, world_comm, intra_pool_comm, & do_distr_diag_inside_bgrp_ = .false. ) END IF CALL set_mpi_comm_4_solvers( intra_pool_comm, intra_bgrp_comm, & @@ -107,7 +109,7 @@ PROGRAM pwscf ! END IF ! - CALL laxlib_free_ortho_group() + CALL laxlib_end() CALL stop_run( exit_status ) CALL do_stop( exit_status ) ! diff --git a/PW/src/setup.f90 b/PW/src/setup.f90 index 852cf2f24..f32bbf616 100644 --- a/PW/src/setup.f90 +++ b/PW/src/setup.f90 @@ -620,16 +620,18 @@ END SUBROUTINE setup LOGICAL FUNCTION check_para_diag( nbnd ) ! USE io_global, ONLY : stdout, ionode, ionode_id - USE mp_diag, ONLY : np_ortho, ortho_parent_comm USE mp_bands, ONLY : intra_bgrp_comm USE mp_pools, ONLY : intra_pool_comm USE control_flags, ONLY : gamma_only IMPLICIT NONE + include 'laxlib.fh' + INTEGER, INTENT(IN) :: nbnd LOGICAL, SAVE :: first = .TRUE. LOGICAL, SAVE :: saved_value = .FALSE. + INTEGER :: np_ortho(2), ortho_parent_comm #if defined(__MPI) IF( .NOT. first ) THEN @@ -638,6 +640,8 @@ LOGICAL FUNCTION check_para_diag( nbnd ) END IF first = .FALSE. ! + CALL laxlib_getval( np_ortho = np_ortho, ortho_parent_comm = ortho_parent_comm ) + ! IF( np_ortho(1) > nbnd ) & CALL errore ('check_para_diag', 'Too few bands for required ndiag',nbnd) ! diff --git a/TDDFPT/src/lr_eels_main.f90 b/TDDFPT/src/lr_eels_main.f90 index 863260d58..057b3f5c1 100644 --- a/TDDFPT/src/lr_eels_main.f90 +++ b/TDDFPT/src/lr_eels_main.f90 @@ -33,7 +33,6 @@ PROGRAM lr_eels_main USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm, & ntask_groups USE mp_bands_TDDFPT, ONLY : ibnd_start, ibnd_end - USE mp_diag, ONLY : mp_start_diag USE command_line_options, ONLY : ndiag_ USE wvfct, ONLY : nbnd USE wavefunctions, ONLY : psic @@ -44,6 +43,8 @@ PROGRAM lr_eels_main ! IMPLICIT NONE ! + include 'laxlib.fh' + ! ! Local variables ! INTEGER :: ip, na, pol_index, ibnd @@ -55,7 +56,7 @@ PROGRAM lr_eels_main pol_index = 1 ! CALL mp_startup ( ) - CALL mp_start_diag ( ndiag_, world_comm, intra_bgrp_comm, & + CALL laxlib_start ( ndiag_, world_comm, intra_bgrp_comm, & do_distr_diag_inside_bgrp_ = .true. ) CALL set_mpi_comm_4_solvers( intra_pool_comm, intra_bgrp_comm, & inter_bgrp_comm ) @@ -223,7 +224,7 @@ PROGRAM lr_eels_main ! CALL print_clock_lr() ! - CALL laxlib_free_ortho_group() + CALL laxlib_end() CALL stop_lr( .TRUE. ) ! IF (lr_verbosity > 5) THEN