- removing LA files

git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@12056 c92efa57-630b-4861-b058-cf58834340f0
2016-01-23 20:04:03 +00:00 · 2016-01-23 20:04:03 +00:00 · 491b47d6e6
parent 6ac81c8eb2
commit 491b47d6e6
7 changed files with 117 additions and 7086 deletions
--- a/Modules/Makefile
+++ b/Modules/Makefile
@ -3,7 +3,7 @@
 include ../make.sys

 # location of needed modules
-MODFLAGS= $(MOD_FLAG)../iotk/src $(MOD_FLAG)../ELPA/src $(MOD_FLAG)../FFTXlib $(MOD_FLAG).
+MODFLAGS= $(MOD_FLAG)../iotk/src $(MOD_FLAG)../ELPA/src $(MOD_FLAG)../FFTXlib $(MOD_FLAG)../LAXlib $(MOD_FLAG).

 MODULES = \
 atom.o \
@ -22,8 +22,6 @@ constants.o \
 constraints_module.o \
 control_flags.o \
 coulomb_vcut.o \
-descriptors.o \
-dspev_drv.o \
 electrons_base.o \
 environment.o \
 error_handler.o \
@ -65,7 +63,6 @@ plugin_flags.o \
 plugin_arguments.o \
 plugin_variables.o \
 pseudo_types.o \
-ptoolkit.o \
 qexml.o \
 qexml_xsd.o \
 qmmm.o \
@ -106,8 +103,7 @@ xc_rVV10.o \
 xml_input.o \
 xml_io_base.o \
 wypos.o \
-zdotc_wrapper.o \
-zhpev_drv.o 
+zdotc_wrapper.o 

 TLDEPS=libfft

--- a/Modules/descriptors.f90
+++ b/Modules/descriptors.f90
@ -1,180 +0,0 @@
-!
-! Copyright (C) 2002 FPMD group
-! This file is distributed under the terms of the
-! GNU General Public License. See the file `License'
-! in the root directory of the present distribution,
-! or http://www.gnu.org/copyleft/gpl.txt .
-!
-
-   MODULE descriptors
-      !
-      IMPLICIT NONE
-      SAVE
-
-      INTEGER  ldim_block, ldim_cyclic, ldim_block_cyclic, ldim_block_sca
-      INTEGER  gind_block, gind_cyclic, gind_block_cyclic, gind_block_sca
-      EXTERNAL ldim_block, ldim_cyclic, ldim_block_cyclic, ldim_block_sca
-      EXTERNAL gind_block, gind_cyclic, gind_block_cyclic, gind_block_sca
-
-      !  Descriptor for linear algebra data distribution (like in Cannon's algorithm)
-      !
-      !  Remember here we use square matrixes block distributed on a square grid of processors
-      !
-      TYPE la_descriptor  
-         INTEGER :: ir        = 0 !  globla index of the first row in the local block of the distributed matrix
-         INTEGER :: nr        = 0 !  number of row in the local block of the distributed matrix
-         INTEGER :: ic        = 0 !  global index of the first column in the local block of the distributed matrix
-         INTEGER :: nc        = 0 !  number of column in the local block of the distributed matrix
-         INTEGER :: nrcx        = 0 !  leading dimension of the distribute matrix (greather than nr and nc)
-         INTEGER :: active_node = 0 !  if > 0 the proc holds a block of the lambda matrix
-         INTEGER :: n        = 0 !  global dimension of the matrix
-         INTEGER :: nx       = 0 !  global leading dimension ( >= n )
-         INTEGER :: npr      = 0 !  number of row processors 
-         INTEGER :: npc      = 0 !  number of column processors 
-         INTEGER :: myr      = 0 !  processor row index
-         INTEGER :: myc      = 0 !  processor column index
-         INTEGER :: comm     = 0 !  communicator
-         INTEGER :: mype     = 0 !  processor index ( from 0 to desc( la_npr_ ) * desc( la_npc_ ) - 1 )
-         INTEGER :: nrl      = 0 !  number of local rows, when the matrix rows are cyclically distributed across proc
-         INTEGER :: nrlx     = 0 !  leading dimension, when the matrix is distributed by row
-      END TYPE
-      !
-   CONTAINS
-
-   !------------------------------------------------------------------------
-   !
-   SUBROUTINE descla_local_dims( i2g, nl, n, nx, np, me )
-      IMPLICIT NONE
-      INTEGER, INTENT(OUT) :: i2g  !  global index of the first local element
-      INTEGER, INTENT(OUT) :: nl   !  local number of elements
-      INTEGER, INTENT(IN)  :: n    !  number of actual element in the global array
-      INTEGER, INTENT(IN)  :: nx   !  dimension of the global array (nx>=n) to be distributed
-      INTEGER, INTENT(IN)  :: np   !  number of processors
-      INTEGER, INTENT(IN)  :: me   !  taskid for which i2g and nl are computed
-      !
-      !  note that we can distribute a global array larger than the
-      !  number of actual elements. This could be required for performance
-      !  reasons, and to have an equal partition of matrix having different size
-      !  like matrixes of spin-up and spin-down 
-      !
-#if __SCALAPACK
-      nl  = ldim_block_sca( nx, np, me )
-      i2g = gind_block_sca( 1, nx, np, me )
-#else
-      nl  = ldim_block( nx, np, me )
-      i2g = gind_block( 1, nx, np, me )
-#endif
-      ! This is to try to keep a matrix N * N into the same
-      ! distribution of a matrix NX * NX, useful to have 
-      ! the matrix of spin-up distributed in the same way
-      ! of the matrix of spin-down
-      !
-      IF( i2g + nl - 1 > n ) nl = n - i2g + 1
-      IF( nl < 0 ) nl = 0
-      RETURN
-      !
-   END SUBROUTINE descla_local_dims
-   !
-   !
-   SUBROUTINE descla_init( descla, n, nx, np, me, comm, includeme )
-      !
-      IMPLICIT NONE  
-      TYPE(la_descriptor), INTENT(OUT) :: descla
-      INTEGER, INTENT(IN)  :: n   !  the size of this matrix
-      INTEGER, INTENT(IN)  :: nx  !  the max among different matrixes sharing 
-                                  !  this descriptor or the same data distribution
-      INTEGER, INTENT(IN)  :: np(2), me(2), comm
-      INTEGER, INTENT(IN)  :: includeme
-      INTEGER  :: ir, nr, ic, nc, lnode, nrcx, nrl, nrlx
-      INTEGER  :: ip, npp
-      
-      IF( np(1) /= np(2) ) &
-         CALL errore( ' descla_init ', ' only square grid of proc are allowed ', 2 )
-      IF( n < 0 ) &
-         CALL errore( ' descla_init ', ' dummy argument n less than 1 ', 3 )
-      IF( nx < n ) &
-         CALL errore( ' descla_init ', ' dummy argument nx less than n ', 4 )
-      IF( np(1) < 1 ) &
-         CALL errore( ' descla_init ', ' dummy argument np less than 1 ', 5 )
-
-      ! find the block maximum dimensions
-
-#if __SCALAPACK
-      nrcx = ldim_block_sca( nx, np(1), 0 )
-#else
-      nrcx = ldim_block( nx, np(1), 0 )
-      DO ip = 1, np(1) - 1
-         nrcx = MAX( nrcx, ldim_block( nx, np(1), ip ) )
-      END DO
-#endif
-      !
-      ! find local dimensions, if appropriate
-      !
-      IF( includeme == 1 ) THEN
-         !
-         CALL descla_local_dims( ir, nr, n, nx, np(1), me(1) )
-         CALL descla_local_dims( ic, nc, n, nx, np(2), me(2) )
-         !
-         lnode = 1
-         !
-      ELSE
-         !
-         nr = 0
-         nc = 0
-         !  
-         ir = 0
-         ic = 0
-         !
-         lnode = -1
-         !
-      END IF
-
-      descla%ir = ir    ! globla index of the first row in the local block of lambda
-      descla%nr = nr    ! number of row in the local block of lambda ( the "2" accounts for spin)
-      descla%ic = ic    ! global index of the first column in the local block of lambda
-      descla%nc = nc    ! number of column in the local block of lambda
-      descla%nrcx = nrcx  ! leading dimension of the distribute lambda matrix
-      descla%active_node = lnode 
-                          !  if > 0 the proc holds a block of the lambda matrix
-      descla%n = n        ! global dimension of the matrix
-      descla%nx = nx      ! global leading dimension
-      descla%npr = np(1)  ! number of row processors 
-      descla%npc = np(2)  ! number of column processors 
-      descla%myr = me(1)  ! processor row index
-      descla%myc = me(2)  ! processor column index
-      descla%comm = comm  ! communicator
-      descla%mype = descla%myc + descla%myr * descla%npr 
-                          ! processor index ( from 0 to desc( la_npr_ ) * desc( la_npc_ ) - 1 )
-     
-      npp = np(1) * np(2)
-
-      !  Compute local dimension of the cyclically distributed matrix
-      !
-      IF( includeme == 1 ) THEN
-         nrl  = ldim_cyclic( n, npp, descla%mype )
-      ELSE
-         nrl = 0
-      END IF
-      nrlx = n / npp + 1
-
-      descla%nrl  = nrl  ! number of local rows, when the matrix rows are cyclically distributed across procs
-      descla%nrlx = nrlx ! leading dimension 
-
-      IF( nr < 0 .OR. nc < 0 ) &
-         CALL errore( ' descla_init ', ' wrong valune for computed nr and nc ', 1 )
-      IF( nrcx < 1 ) &
-         CALL errore( ' descla_init ', ' wrong value for computed nrcx ', 2 )
-      IF( nrcx < nr ) &
-         CALL errore( ' descla_init ', ' nrcx < nr ', ( nr - nrcx ) )
-      IF( nrcx < nc ) &
-         CALL errore( ' descla_init ', ' nrcx < nc ', ( nc - nrcx ) )
-      IF( nrlx < nrl ) &
-         CALL errore( ' descla_init ', ' nrlx < nrl ', ( nrl - nrlx ) )
-      IF( nrl < 0 ) &
-         CALL errore( ' descla_init ', ' nrl < 0 ', ABS( nrl ) )
-
-
-      RETURN
-   END SUBROUTINE descla_init
-
-   END MODULE descriptors
--- a/Modules/dspev_drv.f90
+++ b/Modules/dspev_drv.f90
@ -1,741 +0,0 @@
-!
-! Copyright (C) 2001-2008 Quantum ESPRESSO group
-! This file is distributed under the terms of the
-! GNU General Public License. See the file `License'
-! in the root directory of the present distribution,
-! or http://www.gnu.org/copyleft/gpl.txt .
-!
-!
-
-MODULE dspev_module
-
-
-    IMPLICIT NONE
-    SAVE
-    PRIVATE
-
-    PUBLIC :: pdspev_drv, dspev_drv
-
-#if defined __SCALAPACK
-    PUBLIC :: pdsyevd_drv
-#endif
-
-
-CONTAINS
-
-
-    SUBROUTINE ptredv( tv, a, lda, d, e, v, ldv, nrl, n, nproc, me, comm )
-
-!
-!     Parallel version of the famous HOUSEHOLDER tridiagonalization
-!     Algorithm for simmetric matrix.
-!
-!     AUTHOR : Carlo Cavazzoni - SISSA 1997
-!              comments and suggestions to : carlo.cavazzoni@cineca.it
-!
-! REFERENCES :
-!
-! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING.
-! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING,
-! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE.  
-!
-! PARALLEL NUMERICAL ALGORITHMS,
-! T.L. FREEMAN AND C.PHILLIPS,
-! PRENTICE HALL INTERNATIONAL (1992). 
-!
-!
-!
-!     INPUTS :
-!
-!     TV       if it is true compute eigrnvectors "v"
-!
-!     A(NRL,N) Local part of the global matrix A(N,N) to be reduced,
-!              only the upper triangle is needed.
-!              The rows of the matrix are distributed among processors
-!              with blocking factor 1. 
-!              Example for NPROC = 4 :
-!              ROW | PE
-!              1   | 0
-!              2   | 1
-!              3   | 2
-!              4   | 3
-!              5   | 0
-!              6   | 1
-!              ..  | ..
-!
-!     LDA      LEADING DIMENSION OF MATRIX A.
-!
-!     LDV      LEADING DIMENSION OF MATRIX V.
-!
-!     NRL      NUMBER OF ROWS BELONGING TO THE LOCAL PROCESSOR.
-!
-!     N        DIMENSION OF THE GLOBAL MATRIX.
-!
-!     NPROC    NUMBER OF PROCESSORS.
-!
-!     ME       INDEX OF THE LOCAL PROCESSOR (Starting from 0).
-!
-!
-!     OUTPUTS :
-!
-!     V(NRL,N) Orthogonal transformation that tridiagonalize A,
-!              this matrix is distributed among processor
-!              in the same way as A.
-!
-!     D(N)     Diagonal elements of the tridiagonal matrix
-!              this vector is equal on all processors.
-!
-!     E(N)     Subdiagonal elements of the tridiagonal matrix
-!              this vector is equal on all processors. 
-!
-!
-      USE kinds,     ONLY : DP
-
-      IMPLICIT NONE
-
-      LOGICAL, INTENT(IN) :: tv
-      INTEGER, intent(in) :: N, NRL, LDA, LDV
-      INTEGER, intent(in) :: NPROC, ME, comm
-      REAL(DP) :: A(LDA,N), D(N), E(N), V(LDV,N)
-!
-      REAL(DP), external ::ddot
-!
-      REAL(DP) :: g, scalef, sigma, kappa, f, h, tmp
-      REAL(DP), ALLOCATABLE :: u(:)
-      REAL(DP), ALLOCATABLE :: p(:)
-      REAL(DP), ALLOCATABLE :: vtmp(:)
-
-      REAL(DP) :: tu, tp, one_over_h
-      REAL(DP) :: one_over_scale
-      REAL(DP) :: redin(3), redout(3)
-      REAL(DP), ALLOCATABLE :: ul(:)
-      REAL(DP), ALLOCATABLE :: pl(:)
-      integer :: l, i, j, k, t, tl, ierr
-      integer :: kl, jl, ks, lloc
-      integer, ALLOCATABLE :: is(:)
-      integer, ALLOCATABLE :: ri(:)
-     
-      
-      !     .......... FOR I=N STEP -1 UNTIL 1 DO -- ..........
-
-      IF( N == 0 ) THEN
-        RETURN
-      END IF
-
-      ALLOCATE( u( n+2 ), p( n+1 ), vtmp( n+2 ), ul( n ), pl( n ), is( n ), ri( n ) )
-
-      DO I = N, 1, -1
-        IS(I)  = (I-1)/NPROC
-        RI(I)  = MOD((I-1),NPROC)  !  owner of I-th row
-        IF(ME .le. RI(I) ) then
-          IS(I) = IS(I) + 1
-        END IF
-      END DO
-
-      DO I = N, 2, -1 
-
-         L     = I - 1         ! first element
-         H     = 0.0_DP
-
-         IF ( L > 1 ) THEN
-
-           SCALEF = 0.0_DP
-           DO K = 1, is(l)
-              SCALEF = SCALEF + DABS( A(K,I) )
-           END DO
-
-#if defined __MPI
-           CALL reduce_base_real( 1, scalef, comm, -1 )
-#endif
-
-           IF ( SCALEF .EQ. 0.0_DP )  THEN
-             !
-             IF (RI(L).EQ.ME) THEN
-               E(I) = A(is(L),I) 
-             END IF
-             !
-           ELSE 
-
-             !  ......  CALCULATION OF SIGMA AND H
-
-             ONE_OVER_SCALE = 1.0_DP/SCALEF
-             SIGMA = 0.0_DP
-             DO k = 1,is(L)
-               A(k,I) = A(k,I) * ONE_OVER_SCALE
-               SIGMA  = SIGMA + A(k,I)**2 
-             END DO
-
-             IF( ri(l) .eq. me ) THEN
-               F = A( is(l), i )
-             ELSE
-               F = 0.0_DP
-             END IF
-
-             !  CONSTRUCTION OF VECTOR U
-
-             vtmp( 1:l ) = 0.0_DP 
-
-             k = ME + 1
-             DO kl = 1,is(l)          
-               vtmp(k)   = A(kl,I)
-               k         = k + NPROC
-             END DO
-
-             DO kl = 1,is(l)          
-               UL(kl)    = A(kl,I)
-             END DO
-
-#if defined __MPI
-             vtmp( l + 1 ) = sigma
-             vtmp( l + 2 ) = f
-             CALL reduce_base_real_to( L + 2, vtmp, u, comm, -1 )
-             sigma = u( l + 1 )
-             f     = u( l + 2 )
-#else
-             u(1:l) = vtmp(1:l)
-#endif
-
-             G          = -SIGN(SQRT(SIGMA),F)
-             H          = SIGMA - F*G
-             ONE_OVER_H = 1.0_DP/H
-             E(I)       = SCALEF*G
-
-             U(L)       = F - G 
-
-             IF( RI(L) ==  ME ) THEN
-               UL(is(l))  = F - G
-               A(is(l),I) = F - G 
-             END IF
-
-             !  CONSTRUCTION OF VECTOR P
-
-             DO J = 1,L
-
-               vtmp(j) = 0.0_DP
-
-               DO KL = 1, IS(J)
-                 vtmp(J) = vtmp(J) + A(KL,J) * UL(KL)
-               END DO
-
-               IF( L > J .AND. ME == RI(J) ) then
-                 DO K = J+1,L
-                   vtmp(J) = vtmp(J) + A(IS(J),K) * U(K)
-                 END DO
-               END IF
-
-               vtmp(J) = vtmp(J) * ONE_OVER_H
-
-             END DO 
-
-             KAPPA = 0.5_DP * ONE_OVER_H * ddot( l, vtmp, 1, u, 1 )
-
-#if defined __MPI
-             vtmp( l + 1 ) = kappa
-             CALL reduce_base_real_to( L + 1, vtmp, p, comm, -1 )
-             kappa = p( l + 1 )
-#else
-             p(1:l) = vtmp(1:l)
-#endif
-
-             CALL daxpy( l, -kappa, u, 1, p, 1 )
-             CALL DGER( is(l), l, -1.0_DP, ul, 1, p, 1, a, lda )
-             CALL DGER( is(l), l, -1.0_DP, p( me + 1 ), nproc, u, 1, a, lda )
-
-           END IF
-
-         ELSE 
-
-           IF(RI(L).EQ.ME) THEN
-             G = A(is(l),I)
-           END IF
-
-#if defined __MPI
-           CALL bcast_real( g, 1, ri( L ), comm )
-#endif
-           E(I) = G
-
-         END IF
-
-         D(I) = H
-
-      END DO
-      
-      E(1) = 0.0_DP
-      D(1) = 0.0_DP
-
-      IF( tv ) THEN
-        DO J = 1,N
-          V(1:nrl,J) = 0.0_DP
-          IF(RI(J).EQ.ME) THEN
-            V(IS(J),J) = 1.0_DP
-          END IF
-        END DO
-
-        DO I = 2,N
-          L = I - 1
-          LLOC = IS(L)
-          !
-          IF( D(I) .NE. 0.0_DP ) THEN
-            !
-            ONE_OVER_H = 1.0_DP/D(I)
-            !
-            IF( lloc > 0 ) THEN
-               CALL DGEMV( 't', lloc, l, 1.0d0, v(1,1), ldv, a(1,i), 1, 0.0d0, p(1), 1 )
-            ELSE
-               P(1:l) = 0.0d0
-            END IF
-           
-
-#if defined __MPI
-            CALL reduce_base_real_to( L, p, vtmp, comm, -1 )
-#else
-            vtmp(1:l) = p(1:l)
-#endif
-
-            IF( lloc > 0 ) THEN
-               CALL DGER( lloc, l, -ONE_OVER_H, a(1,i), 1, vtmp, 1, v, ldv )
-            END IF
-
-          END IF
-
-        END DO 
-
-      END IF
-
-
-      DO I = 1,N
-        U(I) = 0.0_DP
-        IF(RI(I).eq.ME) then
-          U(I) = A(IS(I),I) 
-        END IF
-      END DO
-
-#if defined __MPI
-      CALL reduce_base_real_to( n, u, d, comm, -1 )
-#else
-      D(1:N) = U(1:N)
-#endif
-
-      DEALLOCATE( u, p, vtmp, ul, pl, is, ri )
-
-    RETURN
-    END SUBROUTINE ptredv
-
-!==----------------------------------------------==!
-
-    SUBROUTINE ptqliv( tv, d, e, n, z, ldz, nrl, mpime, comm )
-
-!
-! Modified QL algorithm for CRAY T3E PARALLEL MACHINE
-! calculate the eigenvectors and eigenvalues of a matrix reduced to 
-! tridiagonal form by PTREDV. 
-!
-! AUTHOR : Carlo Cavazzoni - SISSA 1997
-!          comments and suggestions to : carlo.cavazzoni@cineca.it
-!
-! REFERENCES :
-!
-! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING.
-! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING,
-! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE.  
-!
-! PARALLEL NUMERICAL ALGORITHMS,
-! T.L. FREEMAN AND C.PHILLIPS,
-! PRENTICE HALL INTERNATIONAL (1992). 
-!
-! NOTE : the algorithm that finds the eigenvalues is not parallelized
-!        ( it scales as O(N^2) ), I preferred to parallelize only the
-!        updating of the eigenvectors because it is the most costly
-!        part of the algorithm ( it scales as O(N^3) ).
-!        For large matrix in practice all the time is spent in the updating
-!        that in this routine scales linearly with the number of processors,
-!        in fact there is no communication at all.
-!
-!  
-!     INPUTS :
-!
-!     TV       if it is true compute eigrnvectors "z"
-!
-!     D(N)     Diagonal elements of the tridiagonal matrix
-!              this vector is equal on all processors.
-!
-!     E(N)     Subdiagonal elements of the tridiagonal matrix
-!              this vector is equal on all processors.
-!
-!     N        DIMENSION OF THE GLOBAL MATRIX.
-!
-!     NRL      NUMBER OF ROWS OF Z BELONGING TO THE LOCAL PROCESSOR.
-!
-!     LDZ      LEADING DIMENSION OF MATRIX Z. 
-!
-!     Z(LDZ,N) Orthogonal transformation that tridiagonalizes the original
-!              matrix A.
-!              The rows of the matrix are distributed among processors
-!              with blocking factor 1.
-!              Example for NPROC = 4 :
-!              ROW | PE
-!              1   | 0
-!              2   | 1
-!              3   | 2
-!              4   | 3
-!              5   | 0
-!              6   | 1
-!              ..  | ..
-!
-!
-!
-!     OUTPUTS :
-!
-!     Z(LDZ,N) EIGENVECTORS OF THE ORIGINAL MATRIX.
-!              THE Jth COLUMN of Z contains the eigenvectors associated
-!              with the jth eigenvalue.
-!              The eigenvectors are scattered among processors (4PE examp. )
-!              eigenvector | PE
-!               elements   |
-!                 V(1)     | 0
-!                 V(2)     | 1
-!                 V(3)     | 2
-!                 V(4)     | 3
-!                 V(5)     | 0
-!                 V(6)     | 1
-!                 ....       ..
-! 
-!     D(N)     Eigenvalues of the original matrix, 
-!              this vector is equal on all processors.
-!
-!
-!
-!
-      USE kinds,     ONLY : DP
-
-      IMPLICIT NONE
-
-      LOGICAL, INTENT(IN)  :: tv
-      INTEGER, INTENT(IN)  :: n, nrl, ldz, mpime, comm
-      REAL(DP) :: d(n), e(n)
-      REAL(DP) :: z(ldz,n)
-
-      INTEGER  :: i, iter, mk, k, l, m, ierr
-      REAL(DP) :: b, dd, f, g, p, r, c, s
-      REAL(DP), ALLOCATABLE :: cv(:,:)
-      REAL(DP), ALLOCATABLE :: fv1(:)
-      REAL(DP), ALLOCATABLE :: fv2(:)
-
-      ALLOCATE( cv( 2,n ) )
-      ALLOCATE( fv1( nrl ) )
-      ALLOCATE( fv2( nrl ) )
-
-      do l = 2,n
-        e(l-1) = e(l)
-      end do
-      do l=1,n
-        iter=0
-1       do m=l,n-1
-          dd = abs(d(m))+abs(d(m+1))
-          if ( abs(e(m))+dd .eq. dd ) goto 2
-        end do
-        m=n
-
-2       if ( m /= l ) then
-          if ( iter == 200 ) then
-             call errore(' tqli ',' too many iterations ', iter)
-          end if
-          iter=iter+1
-          !
-          ! iteration is performed on one processor and results broadcast 
-          ! to all others to prevent potential problems if all processors
-          ! do not behave in exactly the same way (even with the same data!)
-          !
-          if ( mpime == 0 ) then
-             g=(d(l+1)-d(l))/(2.0_DP*e(l))
-             r=pythag(g,1.0_DP)
-             g=d(m)-d(l)+e(l)/(g+sign(r,g))
-             s=1.0_DP
-             c=1.0_DP
-             p=0.0_DP
-             do i=m-1,l,-1
-                f=s*e(i)
-                b=c*e(i)
-                r=pythag(f,g)
-                e(i+1)=r
-                if ( r == 0.0_DP) then
-                   d(i+1)=d(i+1)-p
-                   e(m)=0.0_DP
-                   goto 1
-                endif
-                c=g/r
-                g=d(i+1)-p
-                s=f/r
-                r=(d(i)-g)*s+2.0_DP*c*b
-                p=s*r
-                d(i+1)=g+p
-                g=c*r-b
-                !
-                cv(1,i-l+1) = c
-                cv(2,i-l+1) = s
-                !cv(1,i) = c
-                !cv(2,i) = s
-             end do
-             !
-             d(l)=d(l)-p
-             e(l)=g
-             e(m)=0.0_DP
-           end if
-#if defined __MPI
-           CALL bcast_real( cv, 2*(m-l), 0, comm )
-           CALL bcast_real( d(l), m-l+1, 0, comm )
-           CALL bcast_real( e(l), m-l+1, 0, comm )
-#endif
-
-          if( tv ) then
-            do i=m-1,l,-1
-              do k=1,nrl
-                fv2(k)  =z(k,i+1)
-              end do
-              do k=1,nrl
-                fv1(k)  =z(k,i)
-              end do
-              c = cv(1,i-l+1)
-              s = cv(2,i-l+1)
-              do k=1,nrl
-                z(k,i+1)  =s*fv1(k) + c*fv2(k)
-                z(k,i)    =c*fv1(k) - s*fv2(k)
-              end do
-            end do
-          end if
-
-          goto 1
-
-        endif
-      end do
-
-      DEALLOCATE( cv )
-      DEALLOCATE( fv1 )
-      DEALLOCATE( fv2 )
-
-      RETURN
-      END SUBROUTINE ptqliv
-
-!==----------------------------------------------==!
-
-
-      SUBROUTINE peigsrtv(tv,d,v,ldv,n,nrl)
-
-      USE kinds,     ONLY : DP
-!
-!     This routine sorts eigenvalues and eigenvectors 
-!     generated by PTREDV and PTQLIV.  
-!
-!     AUTHOR : Carlo Cavazzoni - SISSA 1997
-!              comments and suggestions to : carlo.cavazzoni@cineca.it
-!
-
-      IMPLICIT NONE
-      LOGICAL, INTENT(IN) :: tv
-      INTEGER, INTENT (IN) :: n,ldv,nrl
-      REAL(DP), INTENT(INOUT) :: d(n),v(ldv,n)
-
-      INTEGER :: i,j,k
-      REAL(DP):: p
-
-      do 13 i=1,n-1
-        k=i
-        p=d(i)
-        do j=i+1,n
-          if(d(j).le.p)then
-            k=j
-            p=d(j)
-          endif
-        end do
-        if(k.ne.i)then
-          d(k)=d(i)
-          d(i)=p
-!
-!         Exchange local elements of eigenvectors.
-!
-          if( tv ) then
-            do j=1,nrl
-              p=v(j,i)
-              v(j,i)=v(j,k)
-              v(j,k)=p
-            END DO
-          end if
-
-        endif
-13    continue
-      return
-      END SUBROUTINE peigsrtv
-
-   !
-   !-------------------------------------------------------------------------
-   FUNCTION pythag(a,b)
-      USE kinds,     ONLY : DP
-      IMPLICIT NONE
-      REAL(DP) :: a, b, pythag
-      REAL(DP) :: absa, absb
-      absa=abs(a)
-      absb=abs(b)
-      if(absa.gt.absb)then
-        pythag=absa*sqrt(1.0_DP+(absb/absa)**2)
-      else
-        if(absb.eq.0.0_DP)then
-          pythag=0.0_DP
-        else
-          pythag=absb*sqrt(1.0_DP+(absa/absb)**2)
-        endif
-      endif
-      return
-   END FUNCTION pythag
-   !
-!==----------------------------------------------==!
-
-   SUBROUTINE pdspev_drv( jobz, ap, lda, w, z, ldz, &
-                          nrl, n, nproc, mpime, comm )
-     USE kinds,     ONLY : DP
-     IMPLICIT NONE
-     CHARACTER, INTENT(IN) :: JOBZ
-     INTEGER, INTENT(IN) :: lda, ldz, nrl, n, nproc, mpime
-     INTEGER, INTENT(IN) :: comm
-     REAL(DP) :: ap( lda, * ), w( * ), z( ldz, * )
-     REAL(DP), ALLOCATABLE :: sd( : )
-     LOGICAL :: tv
-     !
-     IF( n < 1 ) RETURN
-     !
-     tv = .false.
-     IF( jobz == 'V' .OR. jobz == 'v' ) tv = .true.
-
-     ALLOCATE ( sd ( n ) )
-     CALL ptredv( tv, ap, lda, w, sd, z, ldz, nrl, n, nproc, mpime, comm)
-     CALL ptqliv( tv, w, sd, n, z, ldz, nrl, mpime, comm)
-     DEALLOCATE ( sd )
-     CALL peigsrtv( tv, w, z, ldz, n, nrl)
-
-     RETURN
-   END SUBROUTINE pdspev_drv
- 
-!==----------------------------------------------==!
-
-      SUBROUTINE dspev_drv( JOBZ, UPLO, N, AP, W, Z, LDZ )
-        USE kinds,     ONLY : DP
-        IMPLICIT NONE
-        CHARACTER ::       JOBZ, UPLO
-        INTEGER   ::       IOPT, INFO, LDZ, N
-        REAL(DP) ::  AP( * ), W( * ), Z( LDZ, * )
-        REAL(DP), ALLOCATABLE :: WORK(:)
-
-        IF( n < 1 ) RETURN
-
-        ALLOCATE( work( 3*n ) )
-
-#if defined __ESSL
-          IOPT = 0
-          IF((JOBZ .EQ. 'V') .OR. (JOBZ .EQ. 'v') ) iopt = iopt + 1
-          IF((UPLO .EQ. 'U') .OR. (UPLO .EQ. 'u') ) iopt = iopt + 20
-          CALL DSPEV(IOPT, ap, w, z, ldz, n, work, 3*n)
-#else 
-          CALL DSPEV(jobz, uplo, n, ap(1), w(1), z(1,1), ldz, work, INFO)
-          IF( info .NE. 0 ) THEN
-            CALL errore( ' dspev_drv ', ' diagonalization failed ',info )
-          END IF
-#endif
-
-        DEALLOCATE( work )
- 
-        RETURN
-      END SUBROUTINE dspev_drv
-
-
-#if defined __SCALAPACK
-
-  SUBROUTINE pdsyevd_drv( tv, n, nb, s, lds, w, ortho_cntx )
-     USE kinds,     ONLY : DP
-     USE mp_bands,  ONLY: nproc_bgrp, me_bgrp, intra_bgrp_comm, root_bgrp
-     USE mp_diag,   ONLY: ortho_comm
-     USE mp,        ONLY: mp_comm_free
-#if defined(__ELPA)
-     USE elpa1
-#endif
-     IMPLICIT NONE
-
-     LOGICAL, INTENT(IN)  :: tv  
-       ! if tv is true compute eigenvalues and eigenvectors (not used)
-     INTEGER, INTENT(IN)  :: nb, n, ortho_cntx 
-       ! nb = block size, n = matrix size, ortho_cntx = BLACS context
-     INTEGER, INTENT(IN)  :: lds
-       ! lds = leading dim of s
-     REAL(DP) :: s(:,:), w(:)    
-       ! input:  s = matrix to be diagonalized
-       ! output: s = eigenvectors, w = eigenvalues
-
-     INTEGER     :: desch( 10 )
-     REAL(DP)    :: rtmp( 4 )
-     INTEGER     :: itmp( 4 )
-     REAL(DP), ALLOCATABLE :: work(:)
-     REAL(DP), ALLOCATABLE :: vv(:,:)
-     INTEGER,  ALLOCATABLE :: iwork(:)
-     INTEGER     :: LWORK, LIWORK, info
-     CHARACTER   :: jobv
-     INTEGER     :: i
-#if defined(__ELPA)
-     INTEGER     :: nprow,npcol,my_prow, my_pcol,mpi_comm_rows, mpi_comm_cols
-#endif 
-
-     IF( SIZE( s, 1 ) /= lds ) &
-        CALL errore( ' pdsyevd_drv ', ' wrong matrix leading dimension ', 1 )
-     !
-     IF( tv ) THEN
-        ALLOCATE( vv( SIZE( s, 1 ), SIZE( s, 2 ) ) )
-        jobv = 'V'
-     ELSE
-        CALL errore('pdsyevd_drv','PDSYEVD does not compute eigenvalue only',1)
-     END IF
-
-     CALL descinit( desch, n, n, nb, nb, 0, 0, ortho_cntx, SIZE( s, 1 ) , info )
-
-     IF( info /= 0 ) CALL errore( ' pdsyevd_drv ', ' desckinit ', ABS( info ) )
-
-     lwork = -1
-     liwork = 1
-     itmp = 0
-     rtmp = 0.0_DP
-
-#if defined(__ELPA)
-     CALL BLACS_Gridinfo(ortho_cntx,nprow, npcol, my_prow,my_pcol)
-     CALL get_elpa_row_col_comms(ortho_comm, my_prow, my_pcol,mpi_comm_rows, mpi_comm_cols)
-     CALL solve_evp_real(n,  n,   s, lds,    w,  vv, lds     ,nb  ,mpi_comm_rows, mpi_comm_cols)
-     
-     IF( tv )  s = vv
-     IF( ALLOCATED( vv ) ) DEALLOCATE( vv )
-
-     CALL mp_comm_free ( mpi_comm_rows )
-     CALL mp_comm_free ( mpi_comm_cols )
-#else
-     CALL PDSYEVD( jobv, 'L', n, s, 1, 1, desch, w, vv, 1, 1, desch, rtmp, lwork, itmp, liwork, info )
-
-     IF( info /= 0 ) CALL errore( ' pdsyevd_drv ', ' PDSYEVD ', ABS( info ) )
-
-     lwork  = MAX( 131072, 2*INT( rtmp(1) ) + 1 )
-     liwork = MAX( 8*n , itmp(1) + 1 )
-
-     ALLOCATE( work( lwork ) )
-     ALLOCATE( iwork( liwork ) )
-
-     CALL PDSYEVD( jobv, 'L', n, s, 1, 1, desch, w, vv, 1, 1, desch, work, lwork, iwork, liwork, info )
-
-     IF( info /= 0 ) CALL errore( ' pdsyevd_drv ', ' PDSYEVD ', ABS( info ) )
-
-     IF( tv ) s = vv
-
-     IF( ALLOCATED( vv ) ) DEALLOCATE( vv )
-     DEALLOCATE( work )
-     DEALLOCATE( iwork )
-#endif 
-
-     RETURN
-  END SUBROUTINE pdsyevd_drv
-
-#endif
-
-
-END MODULE dspev_module
--- a/Modules/make.depend
+++ b/Modules/make.depend
@ -55,10 +55,6 @@ constraints_module.o : ions_base.o
 constraints_module.o : kind.o
 control_flags.o : kind.o
 control_flags.o : parameters.o
-dspev_drv.o : kind.o
-dspev_drv.o : mp.o
-dspev_drv.o : mp_bands.o
-dspev_drv.o : mp_diag.o
 electrons_base.o : constants.o
 electrons_base.o : io_global.o
 electrons_base.o : kind.o
@ -192,11 +188,6 @@ plugin_variables.o : kind.o
 plugin_variables.o : parameters.o
 pseudo_types.o : kind.o
 pseudo_types.o : radial_grids.o
-ptoolkit.o : descriptors.o
-ptoolkit.o : dspev_drv.o
-ptoolkit.o : kind.o
-ptoolkit.o : parallel_include.o
-ptoolkit.o : zhpev_drv.o
 qexml.o : ../iotk/src/iotk_module.o
 qexml.o : kind.o
 qexml.o : wrappers.o
@ -405,7 +396,3 @@ xml_io_base.o : mp_wave.o
 xml_io_base.o : parser.o
 xml_io_base.o : wrappers.o
 zdotc_wrapper.o : kind.o
-zhpev_drv.o : io_global.o
-zhpev_drv.o : kind.o
-zhpev_drv.o : mp.o
-zhpev_drv.o : mp_diag.o
--- a/Modules/mp_base.f90
+++ b/Modules/mp_base.f90
@ -189,6 +189,64 @@ END SUBROUTINE mp_synchronize
 !
 ! ... "reduce"-like subroutines
 !
+#if defined (__USE_INPLACE_MPI)
+!
+!----------------------------------------------------------------------------
+SUBROUTINE reduce_base_real( dim, ps, comm, root )
+  !----------------------------------------------------------------------------
+  !
+  ! ... sums a distributed variable ps(dim) over the processors.
+  ! ... This version uses a fixed-length buffer of appropriate (?) dim
+  !
+  USE kinds, ONLY : DP
+  USE parallel_include  
+  !
+  IMPLICIT NONE
+  !
+  INTEGER,  INTENT(IN)    :: dim     ! size of the array
+  REAL(DP)                :: ps(dim) ! array whose elements have to be reduced
+  INTEGER,  INTENT(IN)    :: comm    ! communicator
+  INTEGER,  INTENT(IN)    :: root    ! if root <  0 perform a reduction to all procs
+                                     ! if root >= 0 perform a reduce only to root proc.
+  !
+#if defined (__MPI)  
+  !
+  INTEGER            :: info
+  !
+#if defined __TRACE
+  write(*,*) 'reduce_base_real IN'
+#endif
+  !
+  IF ( dim <= 0 ) GO TO 1  ! go to the end of the subroutine
+  !
+  ! ... synchronize processes
+  !
+#if defined __USE_BARRIER
+  CALL mp_synchronize( comm )
+#endif
+  !
+  IF( root >= 0 ) THEN
+     CALL MPI_REDUCE( MPI_IN_PLACE, ps, dim, MPI_DOUBLE_PRECISION, MPI_SUM, root, comm, info )
+     IF( info /= 0 ) CALL errore( 'reduce_base_real', 'error in mpi_reduce 1', info )
+  ELSE
+     CALL MPI_ALLREDUCE( MPI_IN_PLACE, ps, dim, MPI_DOUBLE_PRECISION, MPI_SUM, comm, info )
+     IF( info /= 0 ) CALL errore( 'reduce_base_real', 'error in mpi_allreduce 1', info )
+  END IF
+  !
+1 CONTINUE
+  !
+#if defined __TRACE
+  write(*,*) 'reduce_base_real OUT'
+#endif
+  !
+#endif
+  !
+  RETURN
+  !
+END SUBROUTINE reduce_base_real
+!
+#else
+!
 !----------------------------------------------------------------------------
 SUBROUTINE reduce_base_real( dim, ps, comm, root )
  !----------------------------------------------------------------------------
@ -286,8 +344,63 @@ SUBROUTINE reduce_base_real( dim, ps, comm, root )
  !
 END SUBROUTINE reduce_base_real
 !
+#endif
 !
 !
+#if defined (__USE_INPLACE_MPI)
+!
+!----------------------------------------------------------------------------
+SUBROUTINE reduce_base_integer( dim, ps, comm, root )
+  !----------------------------------------------------------------------------
+  !
+  ! ... sums a distributed variable ps(dim) over the processors.
+  ! ... This version uses a fixed-length buffer of appropriate (?) dim
+  !
+  USE kinds, ONLY : DP
+  USE parallel_include  
+  !
+  IMPLICIT NONE
+  !
+  INTEGER,  INTENT(IN)    :: dim
+  INTEGER                 :: ps(dim)
+  INTEGER,  INTENT(IN)    :: comm    ! communicator
+  INTEGER,  INTENT(IN)    :: root    ! if root <  0 perform a reduction to all procs
+                                     ! if root >= 0 perform a reduce only to root proc.
+  !
+#if defined (__MPI)  
+  !
+  INTEGER            :: info
+  !
+#if defined __TRACE
+  write(*,*) 'reduce_base_integer IN'
+#endif
+  !
+  ! ... synchronize processes
+  !
+#if defined __USE_BARRIER
+  CALL mp_synchronize( comm )
+#endif
+  !
+  IF( root >= 0 ) THEN
+     CALL MPI_REDUCE( MPI_IN_PLACE, ps, dim, MPI_INTEGER, MPI_SUM, root, comm, info )
+     IF( info /= 0 ) CALL errore( 'reduce_base_integer', 'error in mpi_reduce 1', info )
+  ELSE
+     CALL MPI_ALLREDUCE( MPI_IN_PLACE, ps, dim, MPI_INTEGER, MPI_SUM, comm, info )
+     IF( info /= 0 ) CALL errore( 'reduce_base_integer', 'error in mpi_allreduce 1', info )
+  END IF
+  !
+#if defined __TRACE
+  write(*,*) 'reduce_base_integer OUT'
+#endif
+  !
+#endif
+  !
+  RETURN
+  !
+END SUBROUTINE reduce_base_integer
+!
+#else
+!
 !----------------------------------------------------------------------------
 SUBROUTINE reduce_base_integer( dim, ps, comm, root )
  !----------------------------------------------------------------------------
@ -383,7 +496,8 @@ SUBROUTINE reduce_base_integer( dim, ps, comm, root )
  RETURN
  !
 END SUBROUTINE reduce_base_integer
-
+!
+#endif
 !
 ! ... "reduce"-like subroutines
 !
--- a/Modules/ptoolkit.f90
+++ b/Modules/ptoolkit.f90
--- a/Modules/zhpev_drv.f90
+++ b/Modules/zhpev_drv.f90