quantum-espresso/LAXlib/zhpev_drv.f90

1638 lines
48 KiB
Fortran

!
! Copyright (C) 2001-2009 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
MODULE zhpev_module
IMPLICIT NONE
SAVE
CONTAINS
!
!-------------------------------------------------------------------------
SUBROUTINE pzhptrd( n, nrl, ap, lda, d, e, tau, nproc, me, comm )
!-------------------------------------------------------------------------
!
! Parallel MPI version of the LAPACK routine ZHPTRD
!
! Carlo Cavazzoni (carlo.cavazzoni@cineca.it) -- CINECA
! Dicember 12, 1999
!
! REFERENCES :
!
! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING.
! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING,
! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE.
!
! PARALLEL NUMERICAL ALGORITHMS,
! T.L. FREEMAN AND C.PHILLIPS,
! PRENTICE HALL INTERNATIONAL (1992).
!
! LAPACK routine (version 2.0) --
! Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
! Courant Institute, Argonne National Lab, and Rice University
!
USE laxlib_parallel_include
IMPLICIT NONE
include 'laxlib_kinds.fh'
! .. __SCALAR Arguments ..
INTEGER LDA, N, NRL, NPROC, ME, comm
! ..
! .. Array Arguments ..
REAL(DP) D( * ), E( * )
COMPLEX(DP) AP(LDA, * ), TAU( * )
! ..
!
! Purpose
! =======
!
! PZHPTRD reduces a complex Hermitian distributed matrix AP to
! real symmetric tridiagonal form T by a unitary similarity
! transformation: Q**H * A * Q = T.
!
! Arguments
! =========
!
! N (input) INTEGER
! The order of the mglobal atrix AP. N >= 0.
!
! NRL (input) INTEGER
! The number of local rows of the matrix AP. NRL >= 0.
!
! AP (input/output) COMPLEX(DP) array, dimension (LDA,N)
! On entry, the Hermitian matrix AP.
! The rows of the matrix are distributed among processors
! with blocking factor 1.
! Example for NPROC = 4 :
! ROW | PE
! 1 | 0
! 2 | 1
! 3 | 2
! 4 | 3
! 5 | 0
! 6 | 1
! .. | ..
! On exit, the diagonal and first subdiagonal
! of A are overwritten by the corresponding elements of the
! tridiagonal matrix T, and the elements below the first
! subdiagonal, with the array TAU, represent the unitary
! matrix Q as a product of elementary reflectors;
!
! LDA (input) INTEGER
! Leading dimension of the local matrix AP, LDA > NRL
!
! D (output) DOUBLE PRECISION array, dimension (N)
! The diagonal elements of the tridiagonal matrix T:
! D(i) = AP(i,i).
!
! E (output) DOUBLE PRECISION array, dimension (N-1)
! The off-diagonal elements of the tridiagonal matrix T:
! E(i) = A(i+1,i)
!
! TAU (output) COMPLEX(DP) array, dimension (N-1)
! The __SCALAR factors of the elementary reflectors (see Further
! Details).
!
! NPROC (input) INTEGER
! Number of processors
!
! ME (input) INTEGER
! Index of the local processor ( 0, 1, 2, ..., NPROC-1 )
!
! Further Details
! ===============
!
! the matrix Q is represented as a product of elementary
! reflectors
!
! Q = H(1) H(2) . . . H(n-1).
!
! Each H(i) has the form
!
! H(i) = I - tau * v * v'
!
! where tau is a complex __SCALAR, and v is a complex vector with
! v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP,
! overwriting A(i+2:n,i), and tau is stored in TAU(i).
!
! =====================================================================
!
! .. Parameters ..
COMPLEX(DP) ONE, ZERO, HALF
PARAMETER ( ONE = ( 1.0_DP, 0.0_DP ),ZERO = ( 0.0_DP, 0.0_DP ), &
& HALF = ( 0.5_DP, 0.0_DP ) )
REAL(DP) RONE, RZERO
PARAMETER ( RONE = 1.0_DP, RZERO = 0.0_DP )
INTEGER QI
INTEGER IL(N+1)
INTEGER OW(N+1)
COMPLEX(DP) CTMP
COMPLEX(DP) CTMPV(N+1)
COMPLEX(DP) TAUL(N+1)
COMPLEX(DP) APKI(N+1)
REAL(DP) TMP
REAL(DP) TMPV(N+1)
! ..
! .. Local __SCALARs ..
INTEGER J, I, I1, K, I2, NI1, JL
INTEGER KL, J1
COMPLEX(DP) ALPHA, TAUI
INTEGER KNT, IERR
REAL(DP) ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM
! ..
! .. External Subroutines ..
EXTERNAL zaxpy
EXTERNAL zdscal, zscal
! ..
! .. External Functions ..
! some compiler don't like complex functions
!COMPLEX(DP) zdotc
!EXTERNAL zdotc
!COMPLEX(DP) ZLADIV
!EXTERNAL ZLADIV
REAL(DP) DLAMCH, DLAPY3, DZNRM2
EXTERNAL DLAMCH, DLAPY3, DZNRM2
! ..
! .. Intrinsic Functions ..
INTRINSIC DABS, DBLE, AIMAG, SIGN
!
! .. Executable Statements ..
!
! Quick return if possible
!
IF(N.LE.0) THEN
RETURN
END IF
DO I = 1,N+1
QI = (I-1)/NPROC
OW(I) = MOD((I-1),NPROC)
IF(ME .le. OW(I) ) then
IL(I) = QI + 1
ELSE
IL(I) = QI
END IF
END DO
!
! Reduce the lower triangle of A.
!
IF (OW(1).EQ.ME) THEN
AP( IL(1), 1 ) = DBLE( AP( IL(1), 1 ) )
END IF
DO I = 1, N - 1
!
! Generate elementary reflector H(i) = I - tau * v * v'
! to annihilate A(i+2:n,i)
!
IF (OW(I+1).EQ.ME) THEN
ALPHA = AP( IL(I+1), I )
END IF
#if defined __MPI
CALL MPI_BCAST( alpha, 2, MPI_DOUBLE_PRECISION, ow( i+1 ), comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 1', ierr )
#endif
IF( (N-I).LE.0 ) THEN
TAUI = RZERO
ELSE
IF(OW(I+2).EQ.ME) THEN
I2 = IL(I+2)
ELSE
I2 = IL(I+2) + 1 ! I+2
ENDIF
NI1 = NRL - I2 + 1 ! N-I-1
IF((N-I-1).GT.0) THEN
IF( NI1 .GT. 0 ) THEN
XNORM = DZNRM2( NI1, AP( I2, I ), 1 )
ELSE
XNORM = 0.0_DP
END IF
#if defined __MPI
XNORM = XNORM ** 2
CALL MPI_ALLREDUCE( MPI_IN_PLACE, xnorm, 1, MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' pzhptrd ', 'error in mpi_allreduce 1', ierr )
XNORM = SQRT( xnorm )
#endif
ELSE
XNORM = 0.0_DP
ENDIF
ALPHR = DBLE( ALPHA )
ALPHI = AIMAG( ALPHA )
IF( XNORM.EQ.RZERO .AND. ALPHI.EQ.RZERO ) THEN
TAUI = RZERO
ELSE
BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR )
SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' )
RSAFMN = RONE / SAFMIN
IF( DABS( BETA ).LT.SAFMIN ) THEN
KNT = 0
10 CONTINUE
KNT = KNT + 1
IF(NI1.GT.0) THEN
CALL zdscal( NI1, RSAFMN, AP( I2, I ), 1 )
ENDIF
BETA = BETA*RSAFMN
ALPHI = ALPHI*RSAFMN
ALPHR = ALPHR*RSAFMN
IF( DABS( BETA ).LT.SAFMIN ) GO TO 10
IF((N-I-1).GT.0) THEN
XNORM = DZNRM2( NI1, AP( I2, I ), 1 )
#if defined __MPI
XNORM = XNORM ** 2
CALL MPI_ALLREDUCE( MPI_IN_PLACE, xnorm, 1, MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' pzhptrd ', 'error in mpi_allreduce 2', ierr )
XNORM = SQRT( XNORM )
#endif
ELSE
XNORM = 0.0_DP
ENDIF
ALPHA = CMPLX( ALPHR, ALPHI, KIND=DP )
BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR )
TAUI = CMPLX( (BETA-ALPHR)/BETA, -ALPHI/BETA, KIND=DP )
! next line yields problems on some compilers
! ALPHA = ZLADIV( ONE, ALPHA-BETA )
ALPHA = ONE / (ALPHA-BETA)
IF(NI1.GT.0) THEN
CALL zscal( NI1, ALPHA, AP( I2, I ), 1 )
ENDIF
ALPHA = BETA
DO J = 1, KNT
ALPHA = ALPHA*SAFMIN
END DO
ELSE
TAUI = CMPLX( (BETA-ALPHR)/BETA, -ALPHI/BETA, KIND=DP )
! next line yields problems on some compilers
! ALPHA = ZLADIV( ONE, ALPHA-BETA )
ALPHA = ONE / (ALPHA-BETA)
IF(NI1.GT.0) THEN
CALL zscal( NI1, ALPHA, AP( I2, I ), 1 )
ENDIF
ALPHA = BETA
END IF
END IF
ENDIF
!
E( I ) = ALPHA
!
IF( TAUI.NE.ZERO ) THEN
!
! Apply H(i) from both sides to A(i+1:n,i+1:n)
!
! ... AP( I+1, I ) = ONE
IF (OW(I+1).EQ.ME) THEN
AP( IL(I+1), I ) = ONE
END IF
!
! Compute y := tau * A * v storing y in TAU(i:n-1)
!
! ... broadcast A(K,I)
IF(OW(I+1).EQ.ME) THEN
I1 = IL(I+1)
ELSE
I1 = IL(I+1) + 1 ! I+2
ENDIF
#if defined __MPI
DO J = I+1, N
CTMPV(J) = ZERO
END DO
DO JL = I1, NRL
J = ME + (JL-1)*NPROC + 1
CTMPV(J) = AP(JL,I)
END DO
CALL MPI_ALLREDUCE( ctmpv( i + 1 ), apki( i + 1 ), 2*(n - i), MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' pzhptrd ', 'error in mpi_allreduce 3', ierr )
#else
DO J = I+1,N
APKI(J) = AP(J,I)
ENDDO
#endif
DO J = I+1, N+1
TAU(J-1) = ZERO
END DO
DO JL = I1, NRL
J = ME + (JL-1)*NPROC + 1
TAU(J-1) = ZERO
DO K = I+1, J
TAU(J-1) = TAU(J-1) + TAUI * AP(JL,K) * APKI(K)
END DO
END DO
DO J = I+1, N
IF(OW(J+1).EQ.ME) THEN
J1 = IL(J+1)
ELSE
J1 = IL(J+1) + 1 ! I+2
ENDIF
DO KL = J1, NRL
K = ME + (KL-1)*NPROC + 1
TAU(J-1) = TAU(J-1) + TAUI * CONJG(AP(KL,J)) * APKI(K)
END DO
END DO
#if defined __MPI
! ... parallel sum TAU
CALL MPI_ALLREDUCE( MPI_IN_PLACE, tau( i ), 2*(n - i + 1), MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' pzhptrd ', 'error in mpi_allreduce 3', ierr )
#endif
!
! Compute w := y - 1/2 * tau * (y'*v) * v
!
! ... ALPHA = -HALF*TAUI*zdotc(N-I,TAU(I),1,AP(I+1,I),1)
JL = 1
DO J = I, N
IF(OW(J+1).EQ.ME) THEN
TAUL(JL) = TAU(J)
JL = JL + 1
END IF
END DO
IF(OW(I+1).EQ.ME) THEN
I1 = IL(I+1)
ELSE
I1 = IL(I+1) + 1 ! I+1
ENDIF
NI1 = NRL - I1 + 1 ! N-I
IF ( NI1 > 0 ) THEN
! next line yields problems on some compilers
!ALPHA = -HALF*TAUI*zdotc(NI1,TAUL(1),1,AP(I1,I),1)
ALPHA = -HALF*TAUI*dot_product(TAUL(1:NI1),AP(I1:I1+NI1-1,I))
ELSE
ALPHA = 0.0_DP
END IF
#if defined __MPI
CALL MPI_ALLREDUCE( MPI_IN_PLACE, alpha, 2, MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' pzhptrd ', 'error in mpi_allreduce 4', ierr )
#endif
#if defined __MPI
IF ( NI1 > 0 ) CALL zaxpy(NI1,ALPHA,AP(I1,I),1,TAUL(1),1)
JL = 1
DO J = I, N
CTMPV(J) = ZERO
IF(OW(J+1).EQ.ME) THEN
CTMPV(J) = TAUL(JL)
JL = JL + 1
END IF
END DO
CALL MPI_ALLREDUCE( ctmpv( i ), tau( i ), 2*(n - i + 1), MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' pzhptrd ', 'error in mpi_allreduce 5', ierr )
#else
CALL zaxpy(N-I,ALPHA,AP(I+1,I),1,TAU(I),1)
#endif
!
! Apply the transformation as a rank-2 update:
! A := A - v * w' - w * v'
!
! ... broadcast A(K,I)
IF(OW(I+1).EQ.ME) THEN
I1 = IL(I+1)
ELSE
I1 = IL(I+1) + 1 ! I+2
ENDIF
#if defined __MPI
DO J = I+1, N
CTMPV(J) = ZERO
END DO
DO JL = I1, NRL
J = ME + (JL-1)*NPROC + 1
CTMPV(J) = AP(JL,I)
END DO
CALL MPI_ALLREDUCE( ctmpv( i + 1 ), apki( i + 1 ), 2*(n - i), MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' pzhptrd ', 'error in mpi_allreduce 6', ierr )
#else
DO J = I+1, N
APKI(J) = AP(J,I)
END DO
#endif
DO K = I+1,N
DO JL = I1,NRL
J = ME + (JL-1)*NPROC + 1
AP(JL,K) = AP(JL,K) - ONE * AP(JL,I) * CONJG(TAU(K-1)) - &
& CONJG(ONE) * TAU(J-1) * CONJG(APKI(K))
END DO
END DO
!
END IF
IF(OW(I+1).EQ.ME) THEN
AP(IL(I+1),I) = E( I )
END IF
IF(OW(I).EQ.ME) THEN
D( I ) = DBLE(AP( IL(I),I ))
END IF
#if defined __MPI
CALL MPI_BCAST( d(i), 1, MPI_DOUBLE_PRECISION, ow(i), comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 2', ierr )
#endif
TAU( I ) = TAUI
END DO
IF(OW(I).EQ.ME) THEN
D( N ) = DBLE(AP( IL(I),I ))
END IF
#if defined __MPI
CALL MPI_BCAST( d(n), 1, MPI_DOUBLE_PRECISION, ow(i), comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 3', ierr )
#endif
!
RETURN
!
! End of ZHPTRD
!
END SUBROUTINE pzhptrd
!==----------------------------------------------==!
SUBROUTINE pzupgtr( n, nrl, ap, lda, tau, q, ldq, nproc, me, comm)
!
! Parallel MPI version of the LAPACK routine ZUPGTR
!
! Carlo Cavazzoni (carlo.cavazzoni@cineca.it) -- CINECA
! Dicember 12, 1999
!
! REFERENCES :
!
! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING.
! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING,
! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE.
!
! PARALLEL NUMERICAL ALGORITHMS,
! T.L. FREEMAN AND C.PHILLIPS,
! PRENTICE HALL INTERNATIONAL (1992).
!
! LAPACK routine (version 2.0) --
! Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
! Courant Institute, Argonne National Lab, and Rice University
USE laxlib_parallel_include
IMPLICIT NONE
include 'laxlib_kinds.fh'
!
! .. __SCALAR Arguments ..
INTEGER INFO, LDQ, N, LDA, NRL, NPROC, ME, comm
! ..
! .. Array Arguments ..
COMPLEX(DP) AP(LDA, * ), Q( LDQ, * ), TAU( * )
! ..
!
! Purpose
! =======
!
! PZUPGTR generates a complex unitary matrix Q which is defined as the
! product of n-1 elementary reflectors H(i) of order n, as returned by
! PZHPTRD :
!
! Q = H(1) H(2) . . . H(n-1).
!
! Arguments
! =========
!
! N (input) INTEGER
! The order of the mglobal atrix AP. N >= 0.
!
! NRL (input) INTEGER
! The number of local rows of the matrix AP. NRL >= 0.
!
! AP (input) COMPLEX(DP) array, dimension (LDA,N)
! The vectors which define the elementary reflectors, as
! returned by PZHPTRD.
! The rows of the matrix are distributed among processors
! with blocking factor 1.
! Example for NPROC = 4 :
! ROW | PE
! 1 | 0
! 2 | 1
! 3 | 2
! 4 | 3
! 5 | 0
! 6 | 1
! .. | ..
!
! LDA (input) INTEGER
! Leading dimension of the local matrix AP, LDA > NRL
!
! TAU (input) COMPLEX(DP) array, dimension (N-1)
! TAU(i) must contain the __SCALAR factor of the elementary
! reflector H(i), as returned by PZHPTRD.
!
! Q (output) COMPLEX(DP) array, dimension (LDQ,N)
! The N-by-N unitary matrix Q.
! The rows of the matrix are distributed among processors
! in the same way of the matrix AP
!
! LDQ (input) INTEGER
! The leading dimension of the array Q. LDQ >= max(1,NRL).
!
! NPROC (input) INTEGER
! Number of processors
!
! ME (input) INTEGER
! Index of the local processor ( 0, 1, 2, ..., NPROC-1 )
!
! =====================================================================
!
! .. Parameters ..
COMPLEX(DP) ONE, ZERO
PARAMETER ( ONE = (1.0_DP,0.0_DP), ZERO = (0.0_DP,0.0_DP) )
! change the following parameters to tune the performances
!
INTEGER, PARAMETER :: opt_zgemv = 40
INTEGER, PARAMETER :: opt_zgerc = 40
INTEGER QI
INTEGER IL(N+1)
INTEGER OW(N+1)
COMPLEX(DP) CTMP
COMPLEX(DP) WORK(N+1)
! ..
! .. Local __SCALARs ..
INTEGER :: I, IINFO, J, K, JL, KL, J1, I1, I2, NI1, L, IERR
INTEGER :: ibeg, iend, nr
INTEGER, EXTERNAL :: ldim_cyclic, lind_cyclic
! ..
! .. Executable Statements ..
!
! Test the input arguments
!
! Quick return if possible
!
IF( N == 0 ) THEN
RETURN
END IF
nr = ldim_cyclic( n, nproc, me )
!
IF( nr /= nrl ) &
CALL lax_error__( " pzupgtr ", " inconsistent dimensions ", nrl )
!
ibeg = lind_cyclic( 1, n, nproc, me )
iend = lind_cyclic( nr, n, nproc, me )
!
DO I = 1,N+1
QI = (I-1)/NPROC
OW(I) = MOD((I-1),NPROC)
IF(ME .le. OW(I) ) then
IL(I) = QI + 1
ELSE
IL(I) = QI
END IF
END DO
!
! Unpack the vectors which define the elementary reflectors and
! set the first row and column of Q equal to those of the unit
! matrix
!
IF(OW(1).EQ.ME) THEN
Q( IL(1), 1 ) = ONE
DO KL = 2, NRL
Q( KL, 1 ) = ZERO
END DO
DO J = 2, N
Q( IL(1), J ) = ZERO
END DO
ELSE
DO KL = 1, NRL
Q( KL, 1 ) = ZERO
END DO
ENDIF
DO J = 2, N
IF(OW(J+1).EQ.ME) THEN
J1 = IL(J+1)
ELSE
J1 = IL(J+1) + 1
ENDIF
DO KL = J1, NRL
Q( KL, J ) = AP( KL, J-1 )
END DO
END DO
IF( N.GT.1 ) THEN
!
! Generate Q(2:n,2:n)
!
DO I = N-1, 1, -1
!
! Apply H(i) to A(i:m,i:n) from the left
!
IF( I.LT.(N-1) ) THEN
IF(OW(I+1).EQ.ME) THEN
Q( IL(I+1), I+1 ) = ONE
END IF
!
! Form H * C
!
IF( TAU(I).NE.ZERO ) THEN
!
! w := C' * v
!
IF(OW(I+1).EQ.ME) THEN
I1 = IL(I+1)
ELSE
I1 = IL(I+1) + 1
ENDIF
!
IF( N-1-I > OPT_ZGEMV ) THEN
IF( NRL-I1+1 > 0 ) THEN
CALL zgemv( 'C', NRL-I1+1, N-1-I, one, Q( I1, I+1+1 ), ldq, Q( I1, I+1 ), 1, zero, work, 1 )
ELSE
work( 1 : N-1-I ) = 0.0_DP
END IF
ELSE
DO J = 1, N-1-I
CTMP = ZERO
DO KL = I1, NRL
CTMP = CTMP + CONJG( Q( KL, J+I+1 ) ) * Q( KL,I+1 )
END DO
WORK(J) = CTMP
END DO
END IF
#if defined __MPI
CALL MPI_ALLREDUCE( MPI_IN_PLACE, work, 2*(n - 1 - i), MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' pzupgtr ', 'error in mpi_allreduce 1', ierr )
#endif
!
! C := C - v * w'
!
IF( N-1-I > opt_zgerc ) THEN
IF( NRL-I1+1 > 0 ) THEN
CALL zgerc( NRL-I1+1, N-1-I, -TAU(I), Q(I1, I+1), 1, work, 1, Q( I1, 1+I+1 ), ldq )
END IF
ELSE
DO J = 1, N-1-I
CTMP = -TAU(I) * CONJG( WORK( J ) )
DO KL = I1, NRL
Q( KL, J+I+1 ) = Q( KL, J+I+1 ) + CTMP * Q(KL, I+1)
END DO
END DO
END IF
END IF
END IF
IF( I.LT.(N-1) ) THEN
IF(OW(I+2).EQ.ME) THEN
I2 = IL(I+2) ! I+2
ELSE
I2 = IL(I+2) + 1 ! local ind. of the first element > I+2
ENDIF
NI1 = NRL - I2 + 1 ! N-I-1
IF ( NI1 > 0 ) CALL zscal( NI1, -TAU( I ), Q( I2, I+1 ), 1 )
END IF
IF(OW(I+1).EQ.ME) THEN
Q( IL(I+1), I+1 ) = ONE - TAU( I )
END IF
!
! Set A(1:i-1,i) to zero
!
DO L = 1, I - 1
IF(OW(L+1).EQ.ME) THEN
Q( IL(L+1), I+1 ) = ZERO
END IF
END DO
END DO
END IF
RETURN
!
! End of ZUPGTR
!
END SUBROUTINE pzupgtr
!==----------------------------------------------==!
SUBROUTINE pzsteqr( compz, n, nrl, d, e, z, ldz, nproc, me, comm )
!
! Parallel MPI version of the LAPACK routine ZHPTRD
!
! Carlo Cavazzoni (carlo.cavazzoni@cineca.it) -- CINECA
! Dicember 12, 1999
!
! REFERENCES :
!
! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING.
! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING,
! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE.
!
! PARALLEL NUMERICAL ALGORITHMS,
! T.L. FREEMAN AND C.PHILLIPS,
! PRENTICE HALL INTERNATIONAL (1992).
!
! LAPACK routine (version 2.0) --
! Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
! Courant Institute, Argonne National Lab, and Rice University
!
USE laxlib_parallel_include
IMPLICIT NONE
include 'laxlib_kinds.fh'
! .. __SCALAR Arguments ..
CHARACTER COMPZ
INTEGER LDZ, N, NRL, NPROC, ME, comm
! ..
! .. Array Arguments ..
REAL(DP) D( * ), E( * )
COMPLEX(DP) Z( LDZ, * )
! ..
!
! Purpose
! =======
!
! PZSTEQR computes all eigenvalues and, optionally, eigenvectors of a
! symmetric tridiagonal matrix using the implicit QL or QR method.
! The eigenvectors of a full or band complex Hermitian matrix can also
! be found if PZHPTRD has been used to reduce this
! matrix to tridiagonal form.
!
! Arguments
! =========
!
! COMPZ (input) CHARACTER*1
! = 'N': Compute eigenvalues only.
! = 'V': Compute eigenvalues and eigenvectors of the original
! Hermitian matrix. On entry, Z must contain the
! unitary matrix used to reduce the original matrix
! to tridiagonal form.
! = 'I': Compute eigenvalues and eigenvectors of the
! tridiagonal matrix. Z is initialized to the identity
! matrix.
!
! N (input) INTEGER
! The order of the mglobal atrix AP. N >= 0.
!
! NRL (input) INTEGER
! The number of local rows of the matrix AP. NRL >= 0.
!
! D (input/output) DOUBLE PRECISION array, dimension (N)
! On entry, the diagonal elements of the tridiagonal matrix.
! On exit, if INFO = 0, the eigenvalues in ascending order.
!
! E (input/output) DOUBLE PRECISION array, dimension (N-1)
! On entry, the (n-1) subdiagonal elements of the tridiagonal
! matrix.
! On exit, E has been destroyed.
!
! Z (input/output) COMPLEX(DP) array, dimension (LDZ, N)
! On entry, if COMPZ = 'V', then Z contains the unitary
! matrix used in the reduction to tridiagonal form.
! On exit if COMPZ = 'V', Z contains the
! orthonormal eigenvectors of the original Hermitian matrix,
! and if COMPZ = 'I', Z contains the orthonormal eigenvectors
! of the symmetric tridiagonal matrix.
! If COMPZ = 'N', then Z is not referenced.
! The rows of the matrix are distributed among processors
! with blocking factor 1, i.e. for NPROC = 4 :
! ROW Index | Processor index owning the row
! 1 | 0
! 2 | 1
! 3 | 2
! 4 | 3
! 5 | 0
! 6 | 1
! .. | ..
!
! LDZ (input) INTEGER
! The leading dimension of the array Z. LDZ >= 1, and if
! eigenvectors are desired, then LDZ >= max(1,NRL).
!
! NPROC (input) INTEGER
! Number of processors
!
! ME (input) INTEGER
! Index of the local processor ( 0, 1, 2, ..., NPROC-1 )
!
! =====================================================================
!
! .. Parameters ..
REAL(DP) RZERO, RONE, TWO, THREE, CTEMP, STEMP
PARAMETER ( RZERO = 0.0_DP, RONE = 1.0_DP, TWO = 2.0_DP, &
& THREE = 3.0_DP )
COMPLEX(DP) ZERO, ONE,ZTEMP
PARAMETER ( ZERO = ( 0.0_DP, 0.0_DP ), ONE = ( 1.0_DP, 0.0_DP ) )
INTEGER MAXIT
PARAMETER ( MAXIT = 30 )
! ..
INTEGER :: QI, KL, INFO
INTEGER :: IL(N+1)
INTEGER :: OW(N+1)
REAL(DP) :: WORK(2*N)
REAL(DP) :: dvar(6)
! .. Local __SCALARs ..
INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, &
& LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, &
& NM1, NMAXIT, IERR
REAL(DP) ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, &
& S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST
! ..
! .. External Functions ..
LOGICAL LSAME
REAL(DP) DLAMCH, DLANST, DLAPY2
EXTERNAL LSAME, DLAMCH, DLANST, DLAPY2
! ..
! .. External Subroutines ..
EXTERNAL DLAE2, DLAEV2, DLARTG, DLASCL, DLASRT, XERBLA
EXTERNAL ZLASET, ZLASR, ZSWAP
! ..
! .. Intrinsic Functions ..
INTRINSIC DABS, MAX, SIGN, SQRT
! ..
! .. Executable Statements ..
!
! Test the input parameters.
!
INFO = 0
! DEBUG START
! if( n > 400 ) then
! write( 4000 + me, * ) LDZ, N, NRL, NPROC, ME, comm
! do i = 1, n
! write( 4000 + me, * ) d( i )
! end do
! do i = 1, n
! write( 4000 + me, * ) e( i )
! end do
! do j = 1, n
! do i = 1, nrl
! write( 4000 + me, * ) z( i, j )
! end do
! end do
! close( 4000 + me )
! call mpi_barrier( comm, i )
! stop 'qui'
! end if
! DEBUG END
!
IF( LSAME( COMPZ, 'N' ) ) THEN
ICOMPZ = 0
ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
ICOMPZ = 1
ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
ICOMPZ = 2
ELSE
ICOMPZ = -1
END IF
IF( ICOMPZ.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( (LDZ.LT.1) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX(1,NRL) ) ) THEN
INFO = -6
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'ZSTEQR', -INFO )
RETURN
END IF
!
! Quick return if possible
!
IF(N.LE.0) THEN
RETURN
END IF
!
DO I = 1,N+1
QI = (I-1)/NPROC
OW(I) = MOD((I-1),NPROC)
IF(ME .le. OW(I) ) then
IL(I) = QI + 1
ELSE
IL(I) = QI
END IF
END DO
IF( N.EQ.1 ) THEN
IF( ICOMPZ.EQ.2 .AND. OW(1).EQ.ME ) Z( IL(1), 1 ) = ONE
RETURN
END IF
!
! Determine the unit roundoff and over/underflow thresholds.
! We ensure that all procs have the same data!
!
EPS = DLAMCH( 'E' )
EPS2 = EPS**2
SAFMIN = DLAMCH( 'S' )
SAFMAX = RONE / SAFMIN
SSFMAX = SQRT( SAFMAX ) / THREE
SSFMIN = SQRT( SAFMIN ) / EPS2
!
dvar(1) = EPS
dvar(2) = EPS2
dvar(3) = SAFMIN
dvar(4) = SAFMAX
dvar(5) = SSFMAX
dvar(6) = SSFMIN
!
#if defined(__MPI)
CALL MPI_BCAST( dvar, 6, MPI_DOUBLE_PRECISION, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 4', ierr )
#endif
!
EPS = dvar(1)
EPS2 = dvar(2)
SAFMIN = dvar(3)
SAFMAX = dvar(4)
SSFMAX = dvar(5)
SSFMIN = dvar(6)
!
! Compute the eigenvalues and eigenvectors of the tridiagonal
! matrix.
!
IF( ICOMPZ.EQ.2 ) THEN
CALL ZLASET( 'Full', NRL, N, ZERO, ZERO, Z, LDZ )
DO J = 1, N
IF(OW(J).EQ.ME) THEN
Z( IL(J), J ) = ONE
END IF
END DO
END IF
!
NMAXIT = N*MAXIT
JTOT = 0
!
! Determine where the matrix splits and choose QL or QR iteration
! for each block, according to whether top or bottom diagonal
! element is smaller.
!
L1 = 1
NM1 = N - 1
!
10 CONTINUE
IF( L1 .GT. N ) GO TO 160
IF( L1 .GT. 1 ) E( L1-1 ) = RZERO
IF( me == 0 ) THEN
IF( L1.LE.NM1 ) THEN
DO M = L1, NM1
TST = DABS( E( M ) )
IF( TST .EQ. RZERO ) GO TO 30
IF( TST .LE. ( SQRT(DABS(D(M)))*SQRT(DABS(D(M+1))) ) * EPS ) THEN
E( M ) = RZERO
GO TO 30
END IF
END DO
END IF
M = N
!
30 CONTINUE
END IF
#if defined(__MPI)
CALL MPI_BCAST( e(l1), nm1-l1+1, MPI_DOUBLE_PRECISION, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 5', ierr )
CALL MPI_BCAST( m, 1, MPI_INTEGER, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 6', ierr )
#endif
L = L1
LSV = L
LEND = M
LENDSV = LEND
L1 = M + 1
IF( LEND.EQ.L ) GO TO 10
!
! Scale submatrix in rows and columns L to LEND
!
ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) )
ISCALE = 0
IF( ANORM.EQ.RZERO ) GO TO 10
IF( ANORM.GT.SSFMAX ) THEN
ISCALE = 1
CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, INFO )
CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, INFO )
ELSE IF( ANORM.LT.SSFMIN ) THEN
ISCALE = 2
CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, INFO )
CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, INFO )
END IF
!
! Choose between QL and QR iteration
!
IF( DABS( D( LEND ) ).LT.DABS( D( L ) ) ) THEN
LEND = LSV
L = LENDSV
END IF
!
IF( LEND.GT.L ) THEN
!
! QL Iteration
!
! Look for small subdiagonal element.
!
40 CONTINUE
IF( me == 0 ) THEN
IF( L.NE.LEND ) THEN
LENDM1 = LEND - 1
DO M = L, LENDM1
TST = DABS( E( M ) )**2
IF( TST.LE.( EPS2*DABS(D(M)) )*DABS(D(M+1))+ SAFMIN )GO TO 60
END DO
END IF
!
M = LEND
!
60 CONTINUE
END IF
#if defined(__MPI)
CALL MPI_BCAST( m, 1, MPI_INTEGER, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 7', ierr )
#endif
IF( M.LT.LEND ) E( M ) = RZERO
P = D( L )
IF( M.EQ.L ) THEN
!
! Eigenvalue found.
!
D( L ) = P
L = L + 1
IF( L.LE.LEND ) GO TO 40
GO TO 140
END IF
!
! If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
! to compute its eigensystem.
!
IF( M.EQ.L+1 ) THEN
IF( ICOMPZ.GT.0 ) THEN
CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S )
WORK( L ) = C
WORK( N-1+L ) = S
CTEMP = WORK( L )
STEMP = WORK( N-1+L )
IF( ( CTEMP.NE.RONE ) .OR. ( STEMP.NE.RZERO ) ) THEN
DO KL = 1, NRL
ZTEMP = Z( KL, 1+L )
Z( KL, 1+L ) = CTEMP*ZTEMP - STEMP*Z( KL, L )
Z( KL, L ) = STEMP*ZTEMP + CTEMP*Z( KL, L )
END DO
END IF
ELSE
CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 )
END IF
D( L ) = RT1
D( L+1 ) = RT2
E( L ) = RZERO
L = L + 2
IF( L.LE.LEND ) GO TO 40
GO TO 140
END IF
!
IF( JTOT.EQ.NMAXIT ) GO TO 140
JTOT = JTOT + 1
!
! Form shift.
!
!
! iteration is performed on one processor and results broadcast
! to all others to prevent potential problems if all processors
! do not behave in exactly the same way (even with the same data!)
!
if ( me == 0 ) then
G = ( D( L+1 )-P ) / ( TWO*E( L ) )
R = DLAPY2( G, RONE )
G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) )
!
S = RONE
C = RONE
P = RZERO
!
! Inner loop
!
MM1 = M - 1
DO I = MM1, L, -1
F = S*E( I )
B = C*E( I )
CALL DLARTG( G, F, C, S, R )
IF( I.NE.M-1 ) E( I+1 ) = R
G = D( I+1 ) - P
R = ( D( I )-G )*S + TWO*C*B
P = S*R
D( I+1 ) = G + P
G = C*R - B
!
! If eigenvectors are desired, then save rotations.
!
IF( ICOMPZ.GT.0 ) THEN
WORK( I ) = C
WORK( N-1+I ) = -S
END IF
END DO
D( L ) = D( L ) - P
E( L ) = G
END IF
#if defined __MPI
CALL MPI_BCAST( d( l ), m - l + 1, MPI_DOUBLE_PRECISION, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 8', ierr )
CALL MPI_BCAST( e( l ), m - l + 1, MPI_DOUBLE_PRECISION, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 9', ierr )
#endif
!
! If eigenvectors are desired, then apply saved rotations.
!
IF( ICOMPZ.GT.0 ) THEN
#if defined __MPI
CALL MPI_BCAST( work, 2*n, MPI_DOUBLE_PRECISION, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 10', ierr )
#endif
DO J = M - L + 1 - 1, 1, -1
CTEMP = WORK( L + J -1)
STEMP = WORK( N-1+L + J-1)
IF( ( CTEMP.NE.RONE ) .OR. ( STEMP.NE.RZERO ) ) THEN
DO KL = 1, NRL
ZTEMP = Z( KL, J+1+L-1 )
Z( KL, J+1+L-1 ) = CTEMP*ZTEMP - STEMP*Z( KL, J+L-1 )
Z( KL, J+L-1 ) = STEMP*ZTEMP + CTEMP*Z( KL, J+L-1 )
END DO
END IF
END DO
END IF
!
GO TO 40
!
ELSE
!
! QR Iteration
!
! Look for small superdiagonal element.
!
90 CONTINUE
IF( me == 0 ) THEN
IF( L.NE.LEND ) THEN
LENDP1 = LEND + 1
DO 100 M = L, LENDP1, -1
TST = DABS( E( M-1 ) )**2
IF( TST.LE.(EPS2*DABS(D(M)))*DABS(D(M-1))+ SAFMIN )GO TO 110
100 CONTINUE
END IF
!
M = LEND
!
110 CONTINUE
END IF
#if defined __MPI
CALL MPI_BCAST( m, 1, MPI_INTEGER, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 11', ierr )
#endif
IF( M.GT.LEND ) E( M-1 ) = RZERO
P = D( L )
IF( M.EQ.L ) THEN
!
! Eigenvalue found.
!
D( L ) = P
L = L - 1
IF( L.GE.LEND ) GO TO 90
GO TO 140
END IF
!
! If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
! to compute its eigensystem.
!
IF( M.EQ.L-1 ) THEN
IF( ICOMPZ.GT.0 ) THEN
CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S )
WORK( M ) = C
WORK( N-1+M ) = S
CTEMP = WORK( M )
STEMP = WORK( N-1+M )
IF( ( CTEMP.NE.RONE ) .OR. ( STEMP.NE.RZERO ) ) THEN
DO KL = 1, NRL
ZTEMP = Z( KL, L)
Z( KL, L ) = CTEMP*ZTEMP - STEMP*Z( KL, L-1 )
Z( KL, L-1 ) = STEMP*ZTEMP + CTEMP*Z( KL, L-1 )
END DO
END IF
ELSE
CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 )
END IF
D( L-1 ) = RT1
D( L ) = RT2
E( L-1 ) = RZERO
L = L - 2
IF( L.GE.LEND ) GO TO 90
GO TO 140
END IF
!
IF( JTOT.EQ.NMAXIT ) GO TO 140
JTOT = JTOT + 1
!
! Form shift.
!
!
! iteration is performed on one processor and results broadcast
! to all others to prevent potential problems if all processors
! do not behave in exactly the same way (even with the same data!)
!
if ( me == 0 ) then
G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) )
R = DLAPY2( G, RONE )
G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) )
!
S = RONE
C = RONE
P = RZERO
!
! Inner loop
!
LM1 = L - 1
DO I = M, LM1
F = S*E( I )
B = C*E( I )
CALL DLARTG( G, F, C, S, R )
IF( I.NE.M ) E( I-1 ) = R
G = D( I ) - P
R = ( D( I+1 )-G )*S + TWO*C*B
P = S*R
D( I ) = G + P
G = C*R - B
!
! If eigenvectors are desired, then save rotations.
!
IF( ICOMPZ.GT.0 ) THEN
WORK( I ) = C
WORK( N-1+I ) = S
END IF
END DO
D( L ) = D( L ) - P
E( LM1 ) = G
END IF
#if defined __MPI
CALL MPI_BCAST( d(m), l-m+1, MPI_DOUBLE_PRECISION, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 12', ierr )
CALL MPI_BCAST( e(m), l-m+1, MPI_DOUBLE_PRECISION, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 13', ierr )
#endif
!
! If eigenvectors are desired, then apply saved rotations.
!
IF( ICOMPZ.GT.0 ) THEN
#if defined __MPI
CALL MPI_BCAST( work, 2*n, MPI_DOUBLE_PRECISION, 0, comm, ierr )
IF( ierr /= 0 ) CALL lax_error__( ' ptredv ', 'error in mpi_bcast 14', ierr )
#endif
DO J = 1, L - M
CTEMP = WORK( M+J-1 )
STEMP = WORK( N-1+M+J-1 )
IF( ( CTEMP.NE.RONE ) .OR. ( STEMP.NE.RZERO ) ) THEN
DO KL = 1, NRL
ZTEMP = Z( KL, J+M )
Z( KL, J+M ) = CTEMP*ZTEMP - STEMP*Z(KL, J+M-1)
Z( KL, J+M-1 ) = STEMP*ZTEMP + CTEMP*Z(KL, J+M-1)
END DO
END IF
END DO
END IF
!
GO TO 90
!
END IF
!
! Undo scaling if necessary
!
140 CONTINUE
IF( ISCALE.EQ.1 ) THEN
CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, &
& D( LSV ), N, INFO )
CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), &
& N, INFO )
ELSE IF( ISCALE.EQ.2 ) THEN
CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, &
& D( LSV ), N, INFO )
CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), &
& N, INFO )
END IF
!
! Check for no convergence to an eigenvalue after a total
! of N*MAXIT iterations.
!
IF( JTOT .EQ. NMAXIT ) THEN
DO 150 I = 1, N - 1
IF( E( I ) .NE. RZERO ) INFO = INFO + 1
150 CONTINUE
WRITE(6,*) 'WARNING pzsteqr, convergence not achieved INFO = ', INFO
RETURN
END IF
GO TO 10
!
! Order eigenvalues and eigenvectors.
!
160 CONTINUE
IF( ICOMPZ.EQ.0 ) THEN
!
! Use Quick Sort
!
CALL DLASRT( 'I', N, D, INFO )
!
ELSE
!
! Use Selection Sort to minimize swaps of eigenvectors
!
DO 180 II = 2, N
I = II - 1
K = I
P = D( I )
DO 170 J = II, N
IF( D( J ).LT.P ) THEN
K = J
P = D( J )
END IF
170 CONTINUE
IF( K.NE.I ) THEN
D( K ) = D( I )
D( I ) = P
CALL ZSWAP( NRL, Z( 1, I ), 1, Z( 1, K ), 1 )
END IF
180 CONTINUE
END IF
RETURN
!
! End of ZSTEQR
!
END SUBROUTINE pzsteqr
!==----------------------------------------------==!
#if defined __SCALAPACK
SUBROUTINE pzheevd_drv( tv, n, nb, h, w, ortho_cntx, ortho_comm )
#if defined(__ELPA_2015) || defined(__ELPA_2016)
USE elpa1
#elif defined(__ELPA)
use elpa
#endif
USE laxlib_parallel_include
IMPLICIT NONE
include 'laxlib_kinds.fh'
LOGICAL, INTENT(IN) :: tv
! if tv is true compute eigenvalues and eigenvectors (not used)
INTEGER, INTENT(IN) :: nb, n, ortho_cntx, ortho_comm
! nb = block size, n = matrix size, ortho_cntx = BLACS context
COMPLEX(DP) :: h(:,:)
! input: h = matrix to be diagonalized
! output: h = eigenvectors
REAL(DP) :: w(:)
! output: w = eigenvalues
COMPLEX(DP) :: ztmp( 4 )
REAL(DP) :: rtmp( 4 )
INTEGER :: itmp( 4 ),ldw
COMPLEX(DP), ALLOCATABLE :: work(:)
COMPLEX(DP), ALLOCATABLE :: v(:,:)
REAL(DP), ALLOCATABLE :: rwork(:)
INTEGER, ALLOCATABLE :: iwork(:)
INTEGER :: LWORK, LRWORK, LIWORK
INTEGER :: desch( 10 ), info, ierr
CHARACTER :: jobv
#if defined(__ELPA) || defined(__ELPA_2015) || defined(__ELPA_2016)
INTEGER :: nprow,npcol,my_prow, my_pcol,mpi_comm_rows, mpi_comm_cols
LOGICAL :: success
#endif
#if defined(__ELPA)
class(elpa_t), pointer :: elpa_h
#endif
!
IF( tv ) THEN
ALLOCATE( v( SIZE( h, 1 ), SIZE( h, 2 ) ) )
jobv = 'V'
ELSE
CALL lax_error__('pzheevd_drv', 'pzheevd does not compute eigenvalue only',1)
END IF
call descinit( desch, n, n, nb, nb, 0, 0, ortho_cntx, size(h,1), info )
#if defined(__ELPA) || defined(__ELPA_2015) || defined(__ELPA_2016)
CALL BLACS_Gridinfo(ortho_cntx,nprow, npcol, my_prow,my_pcol)
#if defined(__ELPA)
! => from elpa-2018.11.001 to 2019.xx.xx
if (elpa_init(20181101) /= ELPA_OK) then
print *, "ELPA API version in use not supported. Aborting ..."
stop
endif
elpa_h => elpa_allocate(ierr)
if (ierr /= ELPA_OK) then
print *,"Problem initializing ELPA. Aborting ..."
stop
endif
#if defined(__DEBUG)
call elpa_h%set("debug",1,ierr)
#endif
! set parameters describing the matrix and it's MPI distribution
call elpa_h%set("na", n, ierr)
call elpa_h%set("nev", n, ierr)
call elpa_h%set("nblk", size(h,2), ierr)
call elpa_h%set("local_nrows", size(h,1),ierr)
call elpa_h%set("local_ncols", nb,ierr)
call elpa_h%set("mpi_comm_parent", ortho_comm, ierr)
call elpa_h%set("process_row", my_prow, ierr)
call elpa_h%set("process_col", my_pcol, ierr)
ierr = elpa_h%setup()
if (ierr .ne. ELPA_OK) then
print *,"Problem setting up ELPA options. Aborting ..."
stop
endif
call elpa_h%set("solver", ELPA_SOLVER_1STAGE, ierr)
call elpa_h%eigenvectors(h, w, v, ierr)
call elpa_deallocate(elpa_h, ierr)
call elpa_uninit(ierr)
#elif defined(__ELPA_2016)
! -> from ELPA 2016.11.001_pre thru 2017.XX.XX to elpa-2018.05.001
ierr = elpa_get_communicators(ortho_comm, my_prow, my_pcol,mpi_comm_rows, mpi_comm_cols)
success = solve_evp_complex_1stage_double(n, n, h, size(h,1), w, v, size(h,1), size(h,2), nb, &
mpi_comm_rows, mpi_comm_cols, ortho_comm)
! -> ELPA 2016.05.003
!ierr = get_elpa_row_col_comms(ortho_comm, my_prow, my_pcol,mpi_comm_rows, mpi_comm_cols)
!success = solve_evp_complex(n, n, h, size(h,1), w, v, size(h,1), size(h,2), nb, &
! mpi_comm_rows, mpi_comm_cols)
#elif defined(__ELPA_2015)
ierr = get_elpa_row_col_comms(ortho_comm, my_prow, my_pcol,mpi_comm_rows, mpi_comm_cols)
ierr = solve_evp_complex(n, n, h, size(h,1), w, v, size(h,1), size(h,2), nb, &
mpi_comm_rows, mpi_comm_cols)
#endif
h = v
#if defined(__MPI) && (defined(__ELPA_2015) || defined(__ELPA_2016))
CALL mpi_comm_free( mpi_comm_rows, ierr )
CALL mpi_comm_free( mpi_comm_cols, ierr )
#endif
#else
!
lwork = -1
lrwork = -1
liwork = -1
!
CALL PZHEEVD( 'V', 'L', n, h, 1, 1, desch, w, v, 1, 1, &
desch, ztmp, LWORK, rtmp, LRWORK, itmp, LIWORK, INFO )
IF( info /= 0 ) CALL lax_error__( ' cdiaghg ', ' PZHEEVD ', ABS( info ) )
lwork = INT( REAL(ztmp(1)) ) + 1
lrwork = INT( rtmp(1) ) + 1
liwork = itmp(1) + 1
ALLOCATE( work( lwork ) )
ALLOCATE( rwork( lrwork ) )
ALLOCATE( iwork( liwork ) )
CALL PZHEEVD( 'V', 'L', n, h, 1, 1, desch, w, v, 1, 1, &
desch, work, LWORK, rwork, LRWORK, iwork, LIWORK, INFO )
IF( info /= 0 ) CALL lax_error__( ' cdiaghg ', ' PZHEEVD ', ABS( info ) )
IF( tv ) h = v
#endif
IF ( ALLOCATED (work) )DEALLOCATE( work )
IF ( ALLOCATED (rwork) )DEALLOCATE( rwork )
IF ( ALLOCATED (iwork) )DEALLOCATE( iwork )
IF ( ALLOCATED( v ) ) DEALLOCATE( v )
RETURN
END SUBROUTINE pzheevd_drv
#endif
END MODULE zhpev_module
!==----------------------------------------------==!
SUBROUTINE zhpev_drv_x( JOBZ, UPLO, N, AP, W, Z, LDZ )
use zhpev_module
IMPLICIT NONE
include 'laxlib_kinds.fh'
CHARACTER :: JOBZ, UPLO
INTEGER :: IOPT, INFO, LDZ, N
COMPLEX(DP) :: AP( * ), Z( LDZ, * )
REAL(DP) :: W( * )
REAL(DP), ALLOCATABLE :: RWORK(:)
COMPLEX(DP), ALLOCATABLE :: ZWORK(:)
ALLOCATE( rwork( MAX(1, 3*n-2) ), zwork( MAX(1, 2*n-1)) )
CALL ZHPEV(jobz, uplo, n, ap, w, z, ldz, zwork, rwork, INFO)
DEALLOCATE( rwork, zwork )
IF( INFO .NE. 0 ) THEN
CALL lax_error__( ' zhpev_drv ', ' diagonalization failed ',INFO )
END IF
RETURN
END SUBROUTINE
!==----------------------------------------------==!
SUBROUTINE pzhpev_drv_x( jobz, ap, lda, w, z, ldz, nrl, n, nproc, mpime, comm )
use zhpev_module
IMPLICIT NONE
include 'laxlib_kinds.fh'
CHARACTER :: JOBZ
INTEGER, INTENT(IN) :: lda, ldz, nrl, n, nproc, mpime
INTEGER, INTENT(IN) :: comm
COMPLEX(DP) :: ap( lda, * ), z( ldz, * )
REAL(DP) :: w( * )
REAL(DP), ALLOCATABLE :: rwork( : )
COMPLEX(DP), ALLOCATABLE :: cwork( : )
!
ALLOCATE( rwork( n ) )
ALLOCATE( cwork( n ) )
!
CALL pzhptrd( n, nrl, ap, lda, w, rwork, cwork, nproc, mpime, comm)
IF( jobz == 'V' .OR. jobz == 'v' ) THEN
CALL pzupgtr( n, nrl, ap, lda, cwork, z, ldz, nproc, mpime, comm)
END IF
CALL pzsteqr( jobz, n, nrl, w, rwork, z, ldz, nproc, mpime, comm)
DEALLOCATE( cwork )
DEALLOCATE( rwork )
RETURN
END SUBROUTINE