2003-03-03 07:19:44 +08:00
|
|
|
!
|
2006-02-03 09:16:33 +08:00
|
|
|
! Copyright (C) 2001-2005 Quantum-ESPRESSO group
|
2003-03-03 07:19:44 +08:00
|
|
|
! This file is distributed under the terms of the
|
|
|
|
! GNU General Public License. See the file `License'
|
|
|
|
! in the root directory of the present distribution,
|
|
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
|
|
!
|
2004-10-26 17:32:48 +08:00
|
|
|
#include "f_defs.h"
|
2006-02-03 09:16:33 +08:00
|
|
|
!
|
2003-03-03 07:19:44 +08:00
|
|
|
!==----------------------------------------------==!
|
|
|
|
MODULE parallel_toolkit
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
2006-02-03 09:16:33 +08:00
|
|
|
USE kinds, ONLY : DP
|
|
|
|
USE io_global, ONLY : stdout
|
2004-06-17 17:29:03 +08:00
|
|
|
USE parallel_include
|
|
|
|
|
2003-03-03 07:19:44 +08:00
|
|
|
IMPLICIT NONE
|
|
|
|
SAVE
|
|
|
|
PRIVATE
|
|
|
|
|
2006-02-03 09:16:33 +08:00
|
|
|
PUBLIC :: pdspev_drv, dspev_drv, &
|
|
|
|
diagonalize, pzhpev_drv, zhpev_drv, cdiagonalize
|
|
|
|
PUBLIC :: rep_matmul_drv
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
CONTAINS
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
2006-02-03 09:16:33 +08:00
|
|
|
SUBROUTINE ptredv( a, lda, d, e, v, ldv, nrl, n, nproc, me, comm )
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
!
|
|
|
|
! Parallel version of the famous HOUSEHOLDER tridiagonalization
|
|
|
|
! Algorithm for simmetric matrix.
|
|
|
|
!
|
|
|
|
! AUTHOR : Carlo Cavazzoni - SISSA 1997
|
|
|
|
! comments and suggestions to : carlo.cavazzoni@cineca.it
|
|
|
|
!
|
|
|
|
! REFERENCES :
|
|
|
|
!
|
|
|
|
! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING.
|
|
|
|
! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING,
|
|
|
|
! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE.
|
|
|
|
!
|
|
|
|
! PARALLEL NUMERICAL ALGORITHMS,
|
|
|
|
! T.L. FREEMAN AND C.PHILLIPS,
|
|
|
|
! PRENTICE HALL INTERNATIONAL (1992).
|
|
|
|
!
|
|
|
|
!
|
|
|
|
!
|
|
|
|
! INPUTS :
|
|
|
|
!
|
|
|
|
! A(NRL,N) Local part of the global matrix A(N,N) to be reduced,
|
|
|
|
! only the upper triangle is needed.
|
|
|
|
! The rows of the matrix are distributed amoung processors
|
|
|
|
! with blocking factor 1.
|
|
|
|
! Example for NPROC = 4 :
|
|
|
|
! ROW | PE
|
|
|
|
! 1 | 0
|
|
|
|
! 2 | 1
|
|
|
|
! 3 | 2
|
|
|
|
! 4 | 3
|
|
|
|
! 5 | 0
|
|
|
|
! 6 | 1
|
|
|
|
! .. | ..
|
|
|
|
!
|
|
|
|
! LDA LEADING DIMENSION OF MATRIX A.
|
|
|
|
!
|
|
|
|
! LDV LEADING DIMENSION OF MATRIX V.
|
|
|
|
!
|
|
|
|
! NRL NUMBER OF ROWS BELONGING TO THE LOCAL PROCESSOR.
|
|
|
|
!
|
|
|
|
! N DIMENSION OF THE GLOBAL MATRIX.
|
|
|
|
!
|
|
|
|
! NPROC NUMBER OF PROCESSORS.
|
|
|
|
!
|
|
|
|
! ME INDEX OF THE LOCAL PROCESSOR (Starting from 0).
|
|
|
|
!
|
|
|
|
!
|
|
|
|
! OUTPUTS :
|
|
|
|
!
|
|
|
|
! V(NRL,N) Ortogonal transormation that tridiagonalize A,
|
|
|
|
! this matrix is distributed omong processor
|
|
|
|
! in the same way as A.
|
|
|
|
!
|
|
|
|
! D(N) Diagonal elements of the tridiagonal matrix
|
|
|
|
! this vector is equal on all processors.
|
|
|
|
!
|
|
|
|
! E(N) Subdiagonal elements of the tridiagonal matrix
|
|
|
|
! this vector is equal on all processors.
|
|
|
|
!
|
|
|
|
!
|
|
|
|
|
|
|
|
USE kinds
|
|
|
|
IMPLICIT NONE
|
|
|
|
|
|
|
|
INTEGER, PARAMETER :: PTRED_WORK_SIZE = 2333
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __SHMEM
|
|
|
|
include "mpp/shmem.fh"
|
|
|
|
INTEGER PSYNC_BC(SHMEM_BCAST_SYNC_SIZE)
|
|
|
|
INTEGER PSYNC_B(SHMEM_BARRIER_SYNC_SIZE)
|
|
|
|
INTEGER PSYNC_STA(SHMEM_REDUCE_SYNC_SIZE)
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) pWrk(MAX(PTRED_WORK_SIZE,SHMEM_REDUCE_MIN_WRKDATA_SIZE))
|
2003-03-03 07:19:44 +08:00
|
|
|
DATA PSYNC_BC /SHMEM_BCAST_SYNC_SIZE*SHMEM_SYNC_VALUE/
|
|
|
|
DATA PSYNC_B /SHMEM_BARRIER_SYNC_SIZE*SHMEM_SYNC_VALUE/
|
|
|
|
DATA PSYNC_STA /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/
|
|
|
|
SAVE PSYNC_B, PSYNC_STA, PSYNC_BC,pWrk
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
INTEGER :: N, NRL, LDA, LDV
|
2006-02-03 09:16:33 +08:00
|
|
|
INTEGER :: NPROC, ME, comm
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) :: A(LDA,N), D(N), E(N), V(LDV,N)
|
2003-03-03 07:19:44 +08:00
|
|
|
!
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) :: DDOT
|
2003-03-03 07:19:44 +08:00
|
|
|
!
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) :: g, scale, sigma, kappa, f, h, tmp
|
|
|
|
REAL(DP) :: u(PTRED_WORK_SIZE)
|
|
|
|
REAL(DP) :: p(PTRED_WORK_SIZE)
|
|
|
|
REAL(DP) :: vtmp(PTRED_WORK_SIZE)
|
2003-03-03 07:19:44 +08:00
|
|
|
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) :: tu, tp, one_over_h
|
|
|
|
REAL(DP) :: one_over_scale
|
|
|
|
REAL(DP) :: ul(PTRED_WORK_SIZE)
|
|
|
|
REAL(DP) :: pl(PTRED_WORK_SIZE)
|
2005-01-07 20:57:41 +08:00
|
|
|
#if (defined __SHMEM && defined __ALTIX) || (defined __SHMEM && defined __ORIGIN)
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP), SAVE :: d_tmp(PTRED_WORK_SIZE)
|
2005-01-07 20:57:41 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
integer :: l, i, j, k, t, tl, ierr
|
|
|
|
integer :: kl, jl, ks, lloc
|
|
|
|
integer :: is(PTRED_WORK_SIZE)
|
|
|
|
integer :: ri(PTRED_WORK_SIZE)
|
|
|
|
|
|
|
|
save :: g,h,scale,sigma,kappa,f,u,p,tmp,vtmp
|
|
|
|
save :: tu,tp,one_over_h,one_over_scale,ul,pl
|
|
|
|
save :: l,i,j,k,t,tl,ierr,kl,jl,is,ri,ks,lloc
|
|
|
|
|
|
|
|
|
|
|
|
! .......... FOR I=N STEP -1 UNTIL 1 DO -- ..........
|
|
|
|
|
|
|
|
! ttot = 0
|
|
|
|
! mem1 = 0
|
|
|
|
|
|
|
|
IF( N == 0 ) THEN
|
|
|
|
RETURN
|
|
|
|
ELSE IF( (N > PTRED_WORK_SIZE) .OR. (N < 0) ) THEN
|
2003-11-04 18:26:03 +08:00
|
|
|
WRITE( stdout,*) ' *** ERROR IN PTREDV'
|
|
|
|
WRITE( stdout,*) ' N OUT OF RANGE : ',N
|
2003-03-03 07:19:44 +08:00
|
|
|
STOP
|
|
|
|
END IF
|
|
|
|
|
|
|
|
DO I = N, 1, -1
|
|
|
|
IS(I) = (I-1)/NPROC
|
|
|
|
RI(I) = MOD((I-1),NPROC)
|
|
|
|
IF(ME .le. RI(I) ) then
|
|
|
|
IS(I) = IS(I) + 1
|
|
|
|
END IF
|
2003-11-04 18:26:03 +08:00
|
|
|
! WRITE( stdout,100) 'ME,I,RI,IS',ME,I,RI(I),IS(I)
|
2003-03-03 07:19:44 +08:00
|
|
|
100 FORMAT(A,2X,5I4)
|
|
|
|
END DO
|
|
|
|
|
|
|
|
DO I = N, 2, -1
|
|
|
|
|
|
|
|
L = I - 1 ! primo elemeto
|
|
|
|
H = 0.0d0
|
|
|
|
|
|
|
|
IF ( L .GT. 1 ) THEN
|
|
|
|
|
|
|
|
SCALE = 0.0D0
|
|
|
|
DO K = 1, is(l)
|
|
|
|
SCALE = SCALE + DABS( A(K,I) )
|
|
|
|
END DO
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __SHMEM
|
|
|
|
call shmem_barrier_all
|
|
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(SCALE, SCALE, 1, 0, 0, nproc, pWrk, pSync_sta)
|
|
|
|
# elif defined __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_ALLREDUCE(SCALE, TMP, 1, MPI_DOUBLE_PRECISION, MPI_SUM, comm, IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
SCALE = TMP
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
IF ( SCALE .EQ. 0.0D0 ) THEN
|
|
|
|
IF (RI(L).EQ.ME) THEN
|
|
|
|
E(I) = A(is(L),I)
|
|
|
|
END IF
|
|
|
|
ELSE
|
|
|
|
|
|
|
|
! ...... CALCOLO DI SIGMA E DI H
|
|
|
|
|
|
|
|
ONE_OVER_SCALE = 1.0d0/SCALE
|
|
|
|
SIGMA = 0.0D0
|
|
|
|
DO k = 1,is(L)
|
|
|
|
A(k,I) = A(k,I) * ONE_OVER_SCALE
|
|
|
|
SIGMA = SIGMA + A(k,I)**2
|
|
|
|
END DO
|
|
|
|
|
|
|
|
IF(RI(L) .eq. ME ) THEN
|
|
|
|
F = A(is(L),I)
|
|
|
|
END IF
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __SHMEM
|
|
|
|
call shmem_barrier_all
|
|
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(SIGMA, SIGMA, 1, 0, 0, nproc, pWrk, pSync_sta)
|
|
|
|
call shmem_barrier_all
|
2005-01-07 20:57:41 +08:00
|
|
|
# if defined __ALTIX || defined __ORIGIN
|
|
|
|
CALL SHMEM_BROADCAST8(F,F,1,RI(L),0,0,nproc,pSync_bc)
|
|
|
|
# else
|
2003-03-03 07:19:44 +08:00
|
|
|
CALL SHMEM_BROADCAST(F,F,1,RI(L),0,0,nproc,pSync_bc)
|
2005-01-07 20:57:41 +08:00
|
|
|
# endif
|
2003-03-03 07:19:44 +08:00
|
|
|
# elif defined __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_ALLREDUCE(SIGMA, TMP, 1, MPI_DOUBLE_PRECISION, MPI_SUM, comm, IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
SIGMA = TMP
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_BCAST(F, 1, MPI_DOUBLE_PRECISION, RI(L), comm, IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
G = -SIGN(SQRT(SIGMA),F)
|
|
|
|
H = SIGMA - F*G
|
|
|
|
ONE_OVER_H = 1.0d0/H
|
|
|
|
E(I) = SCALE*G
|
|
|
|
|
|
|
|
! ...... COSTRUZIONE DEL VETTORE U
|
|
|
|
|
2004-02-05 21:18:10 +08:00
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 8
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
DO k = 1,L
|
|
|
|
vtmp(k) = 0.0d0
|
|
|
|
END DO
|
|
|
|
|
|
|
|
k = ME + 1
|
2004-02-05 21:18:10 +08:00
|
|
|
|
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 4
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
DO kl = 1,is(l)
|
|
|
|
vtmp(k) = A(kl,I)
|
|
|
|
UL(kl) = A(kl,I)
|
|
|
|
k = k + NPROC
|
|
|
|
END DO
|
|
|
|
IF(RI(L) .eq. ME ) THEN
|
|
|
|
vtmp(L) = F - G
|
|
|
|
UL(is(l)) = F - G
|
|
|
|
A(is(l),I) = F - G
|
|
|
|
END IF
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __SHMEM
|
|
|
|
call shmem_barrier_all
|
|
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(VTMP, U, L, 0, 0, nproc, pWrk, pSync_sta)
|
|
|
|
# elif defined __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_ALLREDUCE(VTMP,U,L,MPI_DOUBLE_PRECISION,MPI_SUM,comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
u(1:l) = vtmp(1:l)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
! ...... COSTRUZIONE DEL VETTORE P
|
|
|
|
|
|
|
|
KAPPA = 0.0d0
|
2004-02-05 21:18:10 +08:00
|
|
|
|
2003-03-03 07:19:44 +08:00
|
|
|
DO J = 1,L
|
2004-02-05 21:18:10 +08:00
|
|
|
|
2003-03-03 07:19:44 +08:00
|
|
|
vtmp(j) = 0.0d0
|
2004-02-05 21:18:10 +08:00
|
|
|
|
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 8
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
DO KL = 1, IS(J)
|
|
|
|
vtmp(J) = vtmp(J) + A(KL,J) * UL(KL)
|
|
|
|
END DO
|
2004-02-05 21:18:10 +08:00
|
|
|
|
2003-03-03 07:19:44 +08:00
|
|
|
IF(L.GT.J .AND. ME.EQ.RI(J)) then
|
2004-02-05 21:18:10 +08:00
|
|
|
|
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 8
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
DO K = J+1,L
|
|
|
|
vtmp(J) = vtmp(J) + A(IS(J),K) * U(K)
|
|
|
|
END DO
|
|
|
|
END IF
|
2004-02-05 21:18:10 +08:00
|
|
|
|
2003-03-03 07:19:44 +08:00
|
|
|
vtmp(J) = vtmp(J) * ONE_OVER_H
|
|
|
|
KAPPA = KAPPA + vtmp(J) * U(J) * 0.5d0 * ONE_OVER_H
|
|
|
|
END DO
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __SHMEM
|
|
|
|
call shmem_barrier_all
|
|
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(KAPPA, KAPPA, 1, 0, 0, nproc, pWrk, pSync_sta)
|
|
|
|
call shmem_barrier_all
|
|
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(vtmp, P, L, 0, 0, nproc, pWrk, pSync_sta)
|
|
|
|
call shmem_barrier_all
|
|
|
|
# elif defined __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_ALLREDUCE(KAPPA,TMP,1,MPI_DOUBLE_PRECISION,MPI_SUM, comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
KAPPA = TMP
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_ALLREDUCE(vtmp,p,L,MPI_DOUBLE_PRECISION,MPI_SUM,comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
p(1:l) = vtmp(1:l)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
CALL DAXPY(l, -kappa, u, 1, p, 1)
|
|
|
|
|
|
|
|
k = me + 1
|
2004-02-05 21:18:10 +08:00
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 8
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
DO kl = 1,is(l)
|
|
|
|
PL(kl) = P(k)
|
|
|
|
k = k + NPROC
|
|
|
|
END DO
|
|
|
|
|
|
|
|
DO J = 1,L
|
|
|
|
tu= U(J)
|
|
|
|
tp= P(J)
|
2004-02-05 21:18:10 +08:00
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 12
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
DO KL = 1,is(l)
|
|
|
|
A(KL,j) = A(KL,j) - UL(KL) * tp
|
|
|
|
A(KL,j) = A(KL,j) - PL(KL) * tu
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
|
|
|
|
ELSE
|
|
|
|
|
|
|
|
IF(RI(L).EQ.ME) THEN
|
|
|
|
G = A(is(l),I)
|
|
|
|
END IF
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __SHMEM
|
|
|
|
call shmem_barrier_all
|
2005-01-07 20:57:41 +08:00
|
|
|
# if defined __ALTIX || defined __ORIGIN
|
|
|
|
CALL SHMEM_BROADCAST8(G,G,1,RI(L),0,0,nproc,pSync_bc)
|
|
|
|
# else
|
2003-03-03 07:19:44 +08:00
|
|
|
CALL SHMEM_BROADCAST(G,G,1,RI(L),0,0,nproc,pSync_bc)
|
2005-01-07 20:57:41 +08:00
|
|
|
# endif
|
2003-03-03 07:19:44 +08:00
|
|
|
# elif defined __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_BCAST(G,1,MPI_DOUBLE_PRECISION,RI(L),comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
E(I) = G
|
|
|
|
|
|
|
|
END IF
|
|
|
|
|
|
|
|
D(I) = H
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __SHMEM
|
|
|
|
call shmem_barrier_all
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
END DO
|
|
|
|
|
|
|
|
E(1) = 0.0d0
|
|
|
|
D(1) = 0.0d0
|
|
|
|
|
|
|
|
|
|
|
|
! t1 = mclock()
|
|
|
|
|
|
|
|
DO J = 1,N
|
2004-02-05 21:18:10 +08:00
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 12
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
DO I = 1,NRL
|
|
|
|
V(I,J) = 0.0d0
|
|
|
|
END DO
|
|
|
|
IF(RI(J).EQ.ME) THEN
|
|
|
|
V(IS(J),J) = 1.0d0
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
|
|
|
|
DO I = 2,N
|
|
|
|
L = I - 1
|
|
|
|
LLOC = IS(L)
|
|
|
|
IF(D(I).NE.0.0d0) THEN
|
|
|
|
ONE_OVER_H = 1.0d0/D(I)
|
|
|
|
DO J = 1, L
|
|
|
|
P(J) = DDOT(LLOC,V(1,J),1,A(1,I),1)
|
|
|
|
END DO
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __SHMEM
|
|
|
|
call shmem_barrier_all
|
|
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(P, VTMP, L, 0, 0, nproc, pWrk, pSync_sta)
|
|
|
|
# elif defined __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_ALLREDUCE(P,VTMP,L,MPI_DOUBLE_PRECISION,MPI_SUM,comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
vtmp(1:l) = p(1:l)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
DO J = 1, L
|
|
|
|
call DAXPY(LLOC,-VTMP(J)*ONE_OVER_H,A(1,I),1,V(1,J),1)
|
|
|
|
END DO
|
|
|
|
|
|
|
|
END IF
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __SHMEM
|
|
|
|
call shmem_barrier_all
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
END DO
|
|
|
|
|
|
|
|
|
|
|
|
DO I = 1,N
|
|
|
|
U(I) = 0.0d0
|
|
|
|
IF(RI(I).eq.ME) then
|
|
|
|
U(I) = A(IS(I),I)
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __SHMEM
|
2005-01-07 20:57:41 +08:00
|
|
|
#if defined __ALTIX || defined __ORIGIN
|
|
|
|
d_tmp(1:n) = d(1:n)
|
|
|
|
call shmem_barrier_all
|
|
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(U, d_tmp, N, 0, 0, nproc, pWrk, &
|
|
|
|
& pSync_sta)
|
|
|
|
#else
|
2003-03-03 07:19:44 +08:00
|
|
|
call shmem_barrier_all
|
|
|
|
CALL SHMEM_REAL8_SUM_TO_ALL(U, D, N, 0, 0, nproc, pWrk, pSync_sta)
|
2005-01-07 20:57:41 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
call shmem_barrier_all
|
|
|
|
# elif defined __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_ALLREDUCE(U,D,N,MPI_DOUBLE_PRECISION,MPI_SUM,comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
D(1:N) = U(1:N)
|
|
|
|
#endif
|
|
|
|
|
2006-01-10 22:04:13 +08:00
|
|
|
RETURN
|
|
|
|
END SUBROUTINE ptredv
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
2006-01-10 22:04:13 +08:00
|
|
|
|
|
|
|
SUBROUTINE ptqliv( d, e, n, z, ldz, nrl )
|
|
|
|
|
2003-03-03 07:19:44 +08:00
|
|
|
!
|
|
|
|
! Modified QL algorithm for CRAY T3E PARALLEL MACHINE
|
|
|
|
! calculate the eigenvectors and eigenvalues of a matrix reduced to
|
|
|
|
! tridiagonal form by PTREDV.
|
|
|
|
!
|
|
|
|
! AUTHOR : Carlo Cavazzoni - SISSA 1997
|
|
|
|
! comments and suggestions to : cava@sissa.it
|
|
|
|
!
|
|
|
|
! REFERENCES :
|
|
|
|
!
|
|
|
|
! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING.
|
|
|
|
! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING,
|
|
|
|
! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE.
|
|
|
|
!
|
|
|
|
! PARALLEL NUMERICAL ALGORITHMS,
|
|
|
|
! T.L. FREEMAN AND C.PHILLIPS,
|
|
|
|
! PRENTICE HALL INTERNATIONAL (1992).
|
|
|
|
!
|
|
|
|
! NOTE : the algorithm tha finds the eigenvalues is not parallelized
|
|
|
|
! ( it scales as O(N^2) ), I prefere to parallelize only the
|
|
|
|
! updating of the eigenvectors because its the most costly
|
|
|
|
! part of the algorithm ( it scales as O(N^3) ).
|
|
|
|
! For large matrix in practice all the time is spent in the updating
|
|
|
|
! that in this routine scales linearly with the number of processors,
|
|
|
|
! in fact there is no communication at all.
|
|
|
|
!
|
|
|
|
!
|
|
|
|
! INPUTS :
|
|
|
|
!
|
|
|
|
! D(N) Diagonal elements of the tridiagonal matrix
|
|
|
|
! this vector is equal on all processors.
|
|
|
|
!
|
|
|
|
! E(N) Subdiagonal elements of the tridiagonal matrix
|
|
|
|
! this vector is equal on all processors.
|
|
|
|
!
|
|
|
|
! N DIMENSION OF THE GLOBAL MATRIX.
|
|
|
|
!
|
|
|
|
! NRL NUMBER OF ROWS OF Z BELONGING TO THE LOCAL PROCESSOR.
|
|
|
|
!
|
|
|
|
! LDZ LEADING DIMENSION OF MATRIX Z.
|
|
|
|
!
|
|
|
|
! Z(LDZ,N) Ortogonal transormation that tridiagonalize the original
|
|
|
|
! matrix A.
|
|
|
|
! The rows of the matrix are distributed amoung processors
|
|
|
|
! with blocking factor 1.
|
|
|
|
! Example for NPROC = 4 :
|
|
|
|
! ROW | PE
|
|
|
|
! 1 | 0
|
|
|
|
! 2 | 1
|
|
|
|
! 3 | 2
|
|
|
|
! 4 | 3
|
|
|
|
! 5 | 0
|
|
|
|
! 6 | 1
|
|
|
|
! .. | ..
|
|
|
|
!
|
|
|
|
!
|
|
|
|
!
|
|
|
|
! OUTPUTS :
|
|
|
|
!
|
|
|
|
! Z(LDZ,N) EIGENVECTORS OF THE ORIGINAL MATRIX.
|
|
|
|
! THE Jth COLUMN of Z contains the eigenvectors associated
|
|
|
|
! with the jth eigenvalue.
|
|
|
|
! The eigenvectors are scattered among processors (4PE examp. )
|
|
|
|
! eigenvector | PE
|
|
|
|
! elements |
|
|
|
|
! V(1) | 0
|
|
|
|
! V(2) | 1
|
|
|
|
! V(3) | 2
|
|
|
|
! V(4) | 3
|
|
|
|
! V(5) | 0
|
|
|
|
! V(6) | 1
|
|
|
|
! .... ..
|
|
|
|
!
|
|
|
|
! D(N) Eigenvalues of the original matrix,
|
|
|
|
! this vector is equal on all processors.
|
|
|
|
!
|
|
|
|
!
|
|
|
|
!
|
|
|
|
!
|
|
|
|
USE kinds
|
|
|
|
IMPLICIT NONE
|
|
|
|
|
|
|
|
INTEGER n,nrl,ldz
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) d(n),e(n)
|
|
|
|
REAL(DP) z(ldz,n)
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
INTEGER FV_WORK_SIZE
|
|
|
|
INTEGER CSV_WORK_SIZE
|
2004-10-16 01:25:56 +08:00
|
|
|
PARAMETER ( FV_WORK_SIZE = 2000 )
|
2003-03-03 07:19:44 +08:00
|
|
|
PARAMETER ( CSV_WORK_SIZE = 2000 )
|
|
|
|
|
|
|
|
INTEGER i,iter,mk,k,l,m
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) b,dd,f,g,p,r,c,s
|
|
|
|
REAL(DP) cv(CSV_WORK_SIZE)
|
|
|
|
REAL(DP) sv(CSV_WORK_SIZE)
|
|
|
|
REAL(DP) fv1(FV_WORK_SIZE)
|
|
|
|
REAL(DP) fv2(FV_WORK_SIZE)
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
|
|
|
|
save cv,sv,fv1,fv2,b,dd,f,g,p,r,c,s
|
|
|
|
save i,iter,mk,k,l,m
|
|
|
|
|
|
|
|
|
|
|
|
if(n.gt.CSV_WORK_SIZE) then
|
|
|
|
print *,' ptqli CSV_WORK_SIZE too small '
|
|
|
|
stop
|
|
|
|
end if
|
|
|
|
if(nrl.gt.FV_WORK_SIZE) then
|
|
|
|
print *,' ptqli FV_WORK_SIZE too small '
|
|
|
|
stop
|
|
|
|
end if
|
|
|
|
|
|
|
|
do l = 2,n
|
|
|
|
e(l-1) = e(l)
|
|
|
|
end do
|
|
|
|
|
|
|
|
do 15 l=1,n
|
|
|
|
iter=0
|
|
|
|
1 do m=l,n-1
|
|
|
|
dd = abs(d(m))+abs(d(m+1))
|
|
|
|
if ( abs(e(m))+dd .eq. dd ) goto 2
|
|
|
|
end do
|
|
|
|
m=n
|
|
|
|
|
|
|
|
2 if(m.ne.l)then
|
2004-10-14 16:38:05 +08:00
|
|
|
if(iter.eq.200) then
|
2003-03-03 07:19:44 +08:00
|
|
|
call errore(' tqli ',' too many iterations ', iter)
|
|
|
|
end if
|
|
|
|
iter=iter+1
|
|
|
|
g=(d(l+1)-d(l))/(2.0d0*e(l))
|
|
|
|
r=pythag(g,1.0d0)
|
|
|
|
g=d(m)-d(l)+e(l)/(g+sign(r,g))
|
|
|
|
s=1.0d0
|
|
|
|
c=1.0d0
|
|
|
|
p=0.0d0
|
|
|
|
do i=m-1,l,-1
|
|
|
|
f=s*e(i)
|
|
|
|
b=c*e(i)
|
|
|
|
r=pythag(f,g)
|
|
|
|
e(i+1)=r
|
|
|
|
if(r.eq.0.0d0)then
|
|
|
|
d(i+1)=d(i+1)-p
|
|
|
|
e(m)=0.0d0
|
|
|
|
goto 1
|
|
|
|
endif
|
|
|
|
c=g/r
|
|
|
|
g=d(i+1)-p
|
|
|
|
s=f/r
|
|
|
|
r=(d(i)-g)*s+2.0d0*c*b
|
|
|
|
p=s*r
|
|
|
|
d(i+1)=g+p
|
|
|
|
g=c*r-b
|
|
|
|
|
|
|
|
! do k=1,nrl
|
|
|
|
! f=z(k,i+1)
|
|
|
|
! z(k,i+1)=s*z(k,i)+c*f
|
|
|
|
! z(k,i)=c*z(k,i)-s*f
|
|
|
|
! end do
|
|
|
|
|
|
|
|
cv(i) = c
|
|
|
|
sv(i) = s
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
do i=m-1,l,-1
|
2004-02-05 21:18:10 +08:00
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 12
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
do k=1,nrl
|
|
|
|
fv2(k) =z(k,i+1)
|
|
|
|
end do
|
2004-02-05 21:18:10 +08:00
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 12
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
do k=1,nrl
|
|
|
|
fv1(k) =z(k,i)
|
|
|
|
end do
|
2004-02-05 21:18:10 +08:00
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 12
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
do k=1,nrl
|
|
|
|
z(k,i+1) =sv(i)*fv1(k) + cv(i)*fv2(k)
|
|
|
|
z(k,i) =cv(i)*fv1(k) - sv(i)*fv2(k)
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
|
|
|
|
d(l)=d(l)-p
|
|
|
|
e(l)=g
|
|
|
|
e(m)=0.0d0
|
|
|
|
goto 1
|
|
|
|
|
|
|
|
endif
|
|
|
|
15 continue
|
|
|
|
|
|
|
|
return
|
2005-05-18 22:02:51 +08:00
|
|
|
END SUBROUTINE ptqliv
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
2006-01-10 22:04:13 +08:00
|
|
|
|
|
|
|
SUBROUTINE peigsrtv(d,v,ldv,n,nrl)
|
|
|
|
|
2003-03-03 07:19:44 +08:00
|
|
|
!
|
|
|
|
! This routine sort eigenvalues and eigenvectors
|
|
|
|
! generated by PTREDV and PTQLIV.
|
|
|
|
!
|
|
|
|
! AUTHOR : Carlo Cavazzoni - SISSA 1997
|
|
|
|
! comments and suggestions to : cava@sissa.it
|
|
|
|
!
|
|
|
|
|
|
|
|
USE kinds
|
|
|
|
IMPLICIT NONE
|
|
|
|
INTEGER n,ldv,nrl
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) d(n),v(ldv,n)
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
INTEGER i,j,k
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) p
|
2003-03-03 07:19:44 +08:00
|
|
|
save i,j,k
|
|
|
|
save p
|
2005-08-28 22:09:42 +08:00
|
|
|
! REAL(DP) fv(nrl)
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
do 13 i=1,n-1
|
|
|
|
k=i
|
|
|
|
p=d(i)
|
|
|
|
do j=i+1,n
|
|
|
|
if(d(j).le.p)then
|
|
|
|
k=j
|
|
|
|
p=d(j)
|
|
|
|
endif
|
|
|
|
end do
|
|
|
|
if(k.ne.i)then
|
|
|
|
d(k)=d(i)
|
|
|
|
d(i)=p
|
|
|
|
!
|
|
|
|
! Exchange local elements of eigenvectors.
|
|
|
|
!
|
2004-02-05 21:18:10 +08:00
|
|
|
#if defined __T3E
|
2003-03-03 07:19:44 +08:00
|
|
|
!DIR$ UNROLL 12
|
2004-02-05 21:18:10 +08:00
|
|
|
#endif
|
2003-03-03 07:19:44 +08:00
|
|
|
do j=1,nrl
|
|
|
|
p=v(j,i)
|
|
|
|
v(j,i)=v(j,k)
|
|
|
|
v(j,k)=p
|
|
|
|
END DO
|
|
|
|
|
|
|
|
endif
|
|
|
|
13 continue
|
|
|
|
return
|
2005-05-18 22:02:51 +08:00
|
|
|
END SUBROUTINE peigsrtv
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
FUNCTION pythag(a,b)
|
|
|
|
USE kinds
|
|
|
|
IMPLICIT NONE
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) a,b,pythag
|
|
|
|
REAL(DP) absa,absb
|
2003-03-03 07:19:44 +08:00
|
|
|
absa=abs(a)
|
|
|
|
absb=abs(b)
|
|
|
|
if(absa.gt.absb)then
|
|
|
|
pythag=absa*sqrt(1.0d0+(absb/absa)**2)
|
|
|
|
else
|
|
|
|
if(absb.eq.0.0d0)then
|
|
|
|
pythag=0.0d0
|
|
|
|
else
|
|
|
|
pythag=absb*sqrt(1.0d0+(absa/absb)**2)
|
|
|
|
endif
|
|
|
|
endif
|
|
|
|
return
|
2005-05-18 01:07:57 +08:00
|
|
|
END FUNCTION pythag
|
2006-02-03 09:16:33 +08:00
|
|
|
!
|
|
|
|
!-------------------------------------------------------------------------
|
|
|
|
SUBROUTINE diagonalize( iopt, a, d, ev, n, nproc, mpime, comm_in )
|
|
|
|
!-------------------------------------------------------------------------
|
|
|
|
!
|
|
|
|
! This subroutine calculate eigenvalues and optionally eigenvectors
|
|
|
|
! of a real symmetric matrix "a" distributed or replicated across
|
|
|
|
! processors
|
|
|
|
!
|
|
|
|
! iopt input, choose the distributions of "a"
|
|
|
|
! iopt = 0 matrix "a" and "ev" are distributed across procs
|
|
|
|
! iopt = 1 matrix "a" and "ev" are replicated across procs
|
|
|
|
! a(:,:) input, matrix to be diagonalized, overwritten on output
|
|
|
|
! d(:) output, array of the eigenvalues
|
|
|
|
! ev(:,:) output, matrix of the eigenvectors
|
|
|
|
! n input, dimension of matrix a
|
|
|
|
! nproc input, number of processors
|
|
|
|
! mpime input, index of this processors (starting from 0!)
|
|
|
|
!
|
|
|
|
! When iopt = 0 the matrix "a" shoud have the following layout:
|
|
|
|
!
|
|
|
|
! A(NRL,N) Local part of the global matrix A(N,N) to be reduced,
|
|
|
|
! The rows of the matrix are cyclically distributed among
|
|
|
|
! processors with blocking factor 1.
|
|
|
|
! Example for NPROC = 4 :
|
|
|
|
! ROW | PE
|
|
|
|
! 1 | 0
|
|
|
|
! 2 | 1
|
|
|
|
! 3 | 2
|
|
|
|
! 4 | 3
|
|
|
|
! 5 | 0
|
|
|
|
! 6 | 1
|
|
|
|
! .. | ..
|
|
|
|
!
|
|
|
|
IMPLICIT NONE
|
|
|
|
!
|
|
|
|
INTEGER, INTENT(IN) :: iopt, n, nproc, mpime
|
|
|
|
REAL(DP), INTENT(OUT) :: d(n)
|
|
|
|
REAL(DP), INTENT(INOUT) :: a(n,n)
|
|
|
|
REAL(DP), INTENT(OUT) :: ev(n,n)
|
|
|
|
INTEGER, OPTIONAL, INTENT(IN) :: comm_in
|
|
|
|
!
|
|
|
|
REAL(DP), ALLOCATABLE :: aloc(:,:)
|
|
|
|
REAL(DP), ALLOCATABLE :: evloc(:,:)
|
|
|
|
REAL(DP), ALLOCATABLE :: e(:)
|
|
|
|
INTEGER :: nrl, i, j, jl, ierr, comm
|
|
|
|
!
|
|
|
|
!
|
|
|
|
IF ( PRESENT( comm_in ) ) THEN
|
|
|
|
!
|
|
|
|
comm = comm_in
|
|
|
|
!
|
2006-02-03 17:28:55 +08:00
|
|
|
#ifdef __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
ELSE
|
|
|
|
!
|
|
|
|
comm = MPI_COMM_WORLD
|
|
|
|
!
|
2006-02-03 17:28:55 +08:00
|
|
|
#endif
|
2006-02-03 09:16:33 +08:00
|
|
|
END IF
|
|
|
|
!
|
|
|
|
nrl = n / nproc
|
|
|
|
!
|
|
|
|
IF ( mpime < MOD( n, nproc ) ) nrl = nrl + 1
|
|
|
|
!
|
|
|
|
ALLOCATE( evloc( nrl, n ) )
|
|
|
|
ALLOCATE( e( n ) )
|
|
|
|
!
|
|
|
|
IF ( iopt == 1 ) THEN
|
|
|
|
!
|
|
|
|
ALLOCATE( aloc( nrl, n ) )
|
|
|
|
!
|
|
|
|
DO i = 1, n
|
|
|
|
DO jl = 1, nrl
|
|
|
|
aloc( jl, i ) = a( (jl-1)*nproc + mpime + 1, i )
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
!
|
|
|
|
CALL ptredv( aloc, nrl, d, e, evloc, nrl, nrl, n, nproc, mpime, comm )
|
|
|
|
!
|
|
|
|
DEALLOCATE( aloc )
|
|
|
|
!
|
|
|
|
ELSE
|
|
|
|
!
|
|
|
|
CALL ptredv( a, nrl, d, e, evloc, nrl, nrl, n, nproc, mpime, comm )
|
|
|
|
!
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
CALL ptqliv( d, e, n, evloc, nrl, nrl )
|
|
|
|
CALL peigsrtv( d, evloc, nrl, n, nrl )
|
|
|
|
!
|
|
|
|
IF ( iopt == 1 ) THEN
|
|
|
|
!
|
|
|
|
DO i = 1,n
|
|
|
|
DO j = 1,n
|
|
|
|
a(j,i) = 0.D0
|
|
|
|
END DO
|
|
|
|
DO jl = 1,nrl
|
|
|
|
a((jl-1)*nproc + mpime + 1,i) = evloc(jl,i)
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
!
|
|
|
|
#if defined (__PARA)
|
|
|
|
# if defined (__MPI)
|
|
|
|
CALL MPI_ALLREDUCE( a, ev, n*n, &
|
|
|
|
MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#else
|
2006-02-03 09:16:33 +08:00
|
|
|
ev = a
|
2003-03-03 07:19:44 +08:00
|
|
|
#endif
|
2006-02-03 09:16:33 +08:00
|
|
|
!
|
|
|
|
ELSE
|
|
|
|
!
|
|
|
|
DO i = 1, n
|
|
|
|
DO jl = 1, nrl
|
|
|
|
ev(jl,i) = evloc(jl,i)
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
!
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
DEALLOCATE( evloc )
|
|
|
|
DEALLOCATE( e )
|
|
|
|
!
|
|
|
|
RETURN
|
|
|
|
!
|
|
|
|
END SUBROUTINE diagonalize
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
2006-02-03 09:16:33 +08:00
|
|
|
SUBROUTINE pdspev_drv( jobz, ap, lda, w, z, ldz, &
|
|
|
|
nrl, n, nproc, mpime, comm_in )
|
|
|
|
USE kinds
|
|
|
|
IMPLICIT NONE
|
|
|
|
CHARACTER :: JOBZ
|
|
|
|
INTEGER :: lda, ldz, nrl, n, nproc, mpime
|
|
|
|
INTEGER, OPTIONAL, INTENT(IN) :: comm_in
|
|
|
|
REAL(DP) :: ap( lda, * ), w( * ), z( ldz, * )
|
|
|
|
REAL(DP) :: sd( n )
|
|
|
|
INTEGER :: comm
|
|
|
|
!
|
|
|
|
IF ( PRESENT( comm_in ) ) THEN
|
|
|
|
comm = comm_in
|
2006-02-03 17:28:55 +08:00
|
|
|
#ifdef __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
ELSE
|
|
|
|
comm = MPI_COMM_WORLD
|
2006-02-03 17:28:55 +08:00
|
|
|
#endif
|
2006-02-03 09:16:33 +08:00
|
|
|
END IF
|
|
|
|
CALL ptredv(ap, lda, w, sd, z, ldz, nrl, n, nproc, mpime, comm)
|
|
|
|
CALL ptqliv(w, sd, n, z, ldz, nrl)
|
|
|
|
CALL peigsrtv(w, z, ldz, n, nrl)
|
|
|
|
RETURN
|
|
|
|
END SUBROUTINE pdspev_drv
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
|
|
|
SUBROUTINE dspev_drv( JOBZ, UPLO, N, AP, W, Z, LDZ )
|
|
|
|
USE kinds
|
|
|
|
IMPLICIT NONE
|
|
|
|
CHARACTER :: JOBZ, UPLO
|
|
|
|
INTEGER :: IOPT, INFO, LDZ, N
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) :: AP( * ), W( * ), Z( LDZ, * )
|
|
|
|
REAL(DP), ALLOCATABLE :: WORK(:)
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
ALLOCATE( work( 3*n ) )
|
|
|
|
|
|
|
|
#if defined __AIX
|
|
|
|
|
|
|
|
IOPT = 0
|
|
|
|
IF((JOBZ .EQ. 'V') .OR. (JOBZ .EQ. 'v') ) iopt = iopt + 1
|
|
|
|
IF((UPLO .EQ. 'U') .OR. (UPLO .EQ. 'u') ) iopt = iopt + 20
|
|
|
|
CALL DSPEV(IOPT, ap, w, z, ldz, n, work, 3*n)
|
|
|
|
|
2003-05-02 06:47:53 +08:00
|
|
|
#else
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
CALL DSPEV(jobz, uplo, n, ap(1), w(1), z(1,1), ldz, work, INFO)
|
|
|
|
IF( info .NE. 0 ) THEN
|
|
|
|
CALL errore( ' dspev_drv ', ' diagonalization failed ',info )
|
|
|
|
END IF
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
DEALLOCATE( work )
|
|
|
|
|
|
|
|
RETURN
|
2005-05-18 22:02:51 +08:00
|
|
|
END SUBROUTINE dspev_drv
|
2003-03-03 07:19:44 +08:00
|
|
|
|
2006-02-03 09:16:33 +08:00
|
|
|
!-------------------------------------------------------------------------
|
|
|
|
SUBROUTINE cdiagonalize( iopt, a, d, ev, n, nproc, mpime, comm_in )
|
|
|
|
!-------------------------------------------------------------------------
|
|
|
|
!
|
|
|
|
! ... this routine calls the appropriate Lapack routine for diagonalizing
|
|
|
|
! ... a complex Hermitian matrix
|
|
|
|
!
|
|
|
|
IMPLICIT NONE
|
|
|
|
!
|
|
|
|
INTEGER, INTENT(IN) :: iopt, n, nproc, mpime
|
|
|
|
REAL(DP), INTENT(OUT) :: d(n)
|
|
|
|
COMPLEX(DP), INTENT(INOUT) :: a(n,n)
|
|
|
|
COMPLEX(DP), INTENT(OUT) :: ev(n,n)
|
|
|
|
INTEGER, OPTIONAL, INTENT(IN) :: comm_in
|
|
|
|
!
|
|
|
|
INTEGER :: i, j, k, nrl, comm, ierr
|
|
|
|
INTEGER :: info = 0
|
|
|
|
!
|
|
|
|
COMPLEX(DP), ALLOCATABLE :: aloc(:)
|
|
|
|
COMPLEX(DP), ALLOCATABLE :: ap(:,:)
|
|
|
|
COMPLEX(DP), ALLOCATABLE :: vp(:,:)
|
|
|
|
REAL(DP), ALLOCATABLE :: rwork(:)
|
|
|
|
COMPLEX(DP), ALLOCATABLE :: cwork(:)
|
|
|
|
!
|
|
|
|
!
|
|
|
|
IF ( PRESENT( comm_in ) ) THEN
|
|
|
|
!
|
|
|
|
comm = comm_in
|
|
|
|
!
|
2006-02-03 17:28:55 +08:00
|
|
|
#ifdef __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
ELSE
|
|
|
|
!
|
|
|
|
comm = MPI_COMM_WORLD
|
|
|
|
!
|
2006-02-03 17:28:55 +08:00
|
|
|
#endif
|
2006-02-03 09:16:33 +08:00
|
|
|
END IF
|
|
|
|
!
|
|
|
|
IF ( ( nproc == 1 ) .OR. ( n < nproc ) ) THEN
|
|
|
|
!
|
|
|
|
! ... here use the standard scalar lapack drivers
|
|
|
|
!
|
|
|
|
ALLOCATE( aloc( n*(n+1)/2 ) )
|
|
|
|
ALLOCATE( rwork( 4*n ) )
|
|
|
|
ALLOCATE( cwork( 4*n ) )
|
|
|
|
!
|
|
|
|
! ... copy the lower-diagonal part of the matrix according to the
|
|
|
|
! ... Lapack packed storage scheme for Hermitian matrices
|
|
|
|
!
|
|
|
|
k = 0
|
|
|
|
DO j = 1, n
|
|
|
|
DO i = j, n
|
|
|
|
k = k + 1
|
2003-03-03 07:19:44 +08:00
|
|
|
aloc(k) = a(i,j)
|
2006-02-03 09:16:33 +08:00
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
!
|
|
|
|
! ... call the Lapack routine
|
|
|
|
!
|
|
|
|
#if defined (__AIX)
|
|
|
|
!
|
|
|
|
iopt = 1
|
|
|
|
!
|
|
|
|
CALL ZHPEV( iopt, aloc, d, ev, n, n, cwork, 4*n )
|
|
|
|
!
|
2003-05-02 06:47:53 +08:00
|
|
|
#else
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL ZHPEV( 'V', 'L', n, aloc, d, ev, n, cwork, rwork, info )
|
2003-03-03 07:19:44 +08:00
|
|
|
#endif
|
2006-02-03 09:16:33 +08:00
|
|
|
!
|
|
|
|
DEALLOCATE( cwork )
|
|
|
|
DEALLOCATE( rwork )
|
|
|
|
DEALLOCATE( aloc )
|
|
|
|
!
|
|
|
|
IF ( info /= 0 ) &
|
|
|
|
CALL errore( 'cdiagonalize', 'info', ABS( info ) )
|
|
|
|
!
|
|
|
|
ELSE
|
|
|
|
!
|
|
|
|
nrl = n / nproc
|
|
|
|
!
|
|
|
|
IF ( mpime < MOD( n, nproc ) ) nrl = nrl + 1
|
|
|
|
!
|
|
|
|
ALLOCATE( vp( nrl, n ) )
|
|
|
|
ALLOCATE( rwork( n ) )
|
|
|
|
ALLOCATE( cwork( n ) )
|
|
|
|
!
|
|
|
|
IF ( iopt == 1 ) THEN
|
|
|
|
!
|
|
|
|
ALLOCATE( ap( nrl, n ) )
|
|
|
|
!
|
|
|
|
DO j = 1, n
|
2003-03-03 07:19:44 +08:00
|
|
|
DO i = 1, nrl
|
2006-02-03 09:16:33 +08:00
|
|
|
ap(i,j) = a(mpime+(i-1)*nproc+1,j)
|
2003-03-03 07:19:44 +08:00
|
|
|
END DO
|
2006-02-03 09:16:33 +08:00
|
|
|
END DO
|
|
|
|
!
|
|
|
|
CALL pzhptrd( n, nrl, ap, nrl, d, rwork, cwork, nproc, mpime, comm )
|
|
|
|
CALL pzupgtr( n, nrl, ap, nrl, cwork, vp, nrl, nproc, mpime, comm)
|
|
|
|
!
|
|
|
|
DEALLOCATE( ap )
|
|
|
|
!
|
|
|
|
ELSE
|
|
|
|
!
|
|
|
|
CALL pzhptrd( n, nrl, a, nrl, d, rwork, cwork, nproc, mpime, comm )
|
|
|
|
CALL pzupgtr( n, nrl, a, nrl, cwork, vp, nrl, nproc, mpime, comm )
|
|
|
|
!
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
CALL pzsteqr( 'V', n, nrl, d, rwork, vp, nrl, nproc, mpime, comm )
|
|
|
|
!
|
|
|
|
IF ( iopt == 1 ) THEN
|
|
|
|
!
|
|
|
|
DO j = 1,n
|
2003-03-03 07:19:44 +08:00
|
|
|
DO i = 1,n
|
2006-02-03 09:16:33 +08:00
|
|
|
a(i,j) = 0.D0
|
2003-03-03 07:19:44 +08:00
|
|
|
END DO
|
|
|
|
DO i = 1, nrl
|
2006-02-03 09:16:33 +08:00
|
|
|
a((i-1)*nproc+mpime+1,j) = vp(i,j)
|
2003-03-03 07:19:44 +08:00
|
|
|
END DO
|
2006-02-03 09:16:33 +08:00
|
|
|
END DO
|
|
|
|
!
|
|
|
|
#if defined (__PARA)
|
|
|
|
# if defined (__MPI)
|
|
|
|
CALL MPI_ALLREDUCE( a, ev, 2*n*n, &
|
|
|
|
MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#else
|
2006-02-03 09:16:33 +08:00
|
|
|
ev(:,:) = a(:,:)
|
2003-03-03 07:19:44 +08:00
|
|
|
#endif
|
2006-02-03 09:16:33 +08:00
|
|
|
ELSE
|
|
|
|
!
|
|
|
|
DO j = 1,n
|
2003-03-03 07:19:44 +08:00
|
|
|
DO i = 1, nrl
|
2006-02-03 09:16:33 +08:00
|
|
|
ev(i,j) = vp(i,j)
|
2003-03-03 07:19:44 +08:00
|
|
|
END DO
|
2006-02-03 09:16:33 +08:00
|
|
|
END DO
|
|
|
|
!
|
2003-03-03 07:19:44 +08:00
|
|
|
END IF
|
2006-02-03 09:16:33 +08:00
|
|
|
!
|
|
|
|
DEALLOCATE( vp )
|
|
|
|
DEALLOCATE( rwork )
|
|
|
|
DEALLOCATE( cwork )
|
|
|
|
!
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
RETURN
|
|
|
|
!
|
|
|
|
END SUBROUTINE cdiagonalize
|
|
|
|
!
|
|
|
|
!-------------------------------------------------------------------------
|
|
|
|
SUBROUTINE pzhptrd( n, nrl, ap, lda, d, e, tau, nproc, me, comm )
|
|
|
|
!-------------------------------------------------------------------------
|
2006-01-10 22:04:13 +08:00
|
|
|
!
|
|
|
|
! Parallel MPI version of the LAPACK routine ZHPTRD
|
|
|
|
!
|
|
|
|
! Carlo Cavazzoni (carlo.cavazzoni@cineca.it) -- CINECA
|
|
|
|
! Dicember 12, 1999
|
|
|
|
!
|
|
|
|
! REFERENCES :
|
|
|
|
!
|
|
|
|
! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING.
|
|
|
|
! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING,
|
|
|
|
! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE.
|
|
|
|
!
|
|
|
|
! PARALLEL NUMERICAL ALGORITHMS,
|
|
|
|
! T.L. FREEMAN AND C.PHILLIPS,
|
|
|
|
! PRENTICE HALL INTERNATIONAL (1992).
|
|
|
|
!
|
|
|
|
! LAPACK routine (version 2.0) --
|
|
|
|
! Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
|
|
|
! Courant Institute, Argonne National Lab, and Rice University
|
|
|
|
!
|
2003-03-03 07:19:44 +08:00
|
|
|
|
2006-02-03 09:16:33 +08:00
|
|
|
IMPLICIT NONE
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
! .. __SCALAR Arguments ..
|
2006-02-03 09:16:33 +08:00
|
|
|
INTEGER LDA, N, NRL, NPROC, ME, comm
|
2003-03-03 07:19:44 +08:00
|
|
|
! ..
|
|
|
|
! .. Array Arguments ..
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) D( * ), E( * )
|
|
|
|
COMPLEX(DP) AP(LDA, * ), TAU( * )
|
2003-03-03 07:19:44 +08:00
|
|
|
! ..
|
|
|
|
!
|
|
|
|
! Purpose
|
|
|
|
! =======
|
|
|
|
!
|
|
|
|
! PZHPTRD reduces a complex Hermitian distributed matrix AP to
|
|
|
|
! real symmetric tridiagonal form T by a unitary similarity
|
|
|
|
! transformation: Q**H * A * Q = T.
|
|
|
|
!
|
|
|
|
! Arguments
|
|
|
|
! =========
|
|
|
|
!
|
|
|
|
! N (input) INTEGER
|
|
|
|
! The order of the mglobal atrix AP. N >= 0.
|
|
|
|
!
|
|
|
|
! NRL (input) INTEGER
|
|
|
|
! The number of local rows of the matrix AP. NRL >= 0.
|
|
|
|
!
|
2005-08-28 22:09:42 +08:00
|
|
|
! AP (input/output) COMPLEX(DP) array, dimension (LDA,N)
|
2003-03-03 07:19:44 +08:00
|
|
|
! On entry, the Hermitian matrix AP.
|
|
|
|
! The rows of the matrix are distributed among processors
|
|
|
|
! with blocking factor 1.
|
|
|
|
! Example for NPROC = 4 :
|
|
|
|
! ROW | PE
|
|
|
|
! 1 | 0
|
|
|
|
! 2 | 1
|
|
|
|
! 3 | 2
|
|
|
|
! 4 | 3
|
|
|
|
! 5 | 0
|
|
|
|
! 6 | 1
|
|
|
|
! .. | ..
|
|
|
|
|
|
|
|
! On exit, the diagonal and first subdiagonal
|
|
|
|
! of A are overwritten by the corresponding elements of the
|
|
|
|
! tridiagonal matrix T, and the elements below the first
|
|
|
|
! subdiagonal, with the array TAU, represent the unitary
|
|
|
|
! matrix Q as a product of elementary reflectors;
|
|
|
|
!
|
|
|
|
! LDA (input) INTEGER
|
|
|
|
! Leading dimension of the local matrix AP, LDA > NRL
|
|
|
|
!
|
|
|
|
! D (output) DOUBLE PRECISION array, dimension (N)
|
|
|
|
! The diagonal elements of the tridiagonal matrix T:
|
|
|
|
! D(i) = AP(i,i).
|
|
|
|
!
|
|
|
|
! E (output) DOUBLE PRECISION array, dimension (N-1)
|
|
|
|
! The off-diagonal elements of the tridiagonal matrix T:
|
|
|
|
! E(i) = A(i+1,i)
|
|
|
|
!
|
2005-08-28 22:09:42 +08:00
|
|
|
! TAU (output) COMPLEX(DP) array, dimension (N-1)
|
2003-03-03 07:19:44 +08:00
|
|
|
! The __SCALAR factors of the elementary reflectors (see Further
|
|
|
|
! Details).
|
|
|
|
!
|
|
|
|
! NPROC (input) INTEGER
|
|
|
|
! Number of processors
|
|
|
|
!
|
|
|
|
! ME (input) INTEGER
|
|
|
|
! Index of the local processor ( 0, 1, 2, ..., NPROC-1 )
|
|
|
|
|
|
|
|
!
|
|
|
|
! Further Details
|
|
|
|
! ===============
|
|
|
|
!
|
|
|
|
! the matrix Q is represented as a product of elementary
|
|
|
|
! reflectors
|
|
|
|
!
|
|
|
|
! Q = H(1) H(2) . . . H(n-1).
|
|
|
|
!
|
|
|
|
! Each H(i) has the form
|
|
|
|
!
|
|
|
|
! H(i) = I - tau * v * v'
|
|
|
|
!
|
|
|
|
! where tau is a complex __SCALAR, and v is a complex vector with
|
|
|
|
! v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP,
|
|
|
|
! overwriting A(i+2:n,i), and tau is stored in TAU(i).
|
|
|
|
!
|
|
|
|
! =====================================================================
|
|
|
|
!
|
|
|
|
! .. Parameters ..
|
|
|
|
|
2005-08-28 22:09:42 +08:00
|
|
|
COMPLEX(DP) ONE, ZERO, HALF
|
2003-03-03 07:19:44 +08:00
|
|
|
PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ),ZERO = ( 0.0D+0, 0.0D+0 ), &
|
|
|
|
& HALF = ( 0.5D+0, 0.0D+0 ) )
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) RONE, RZERO
|
2003-03-03 07:19:44 +08:00
|
|
|
PARAMETER ( RONE = 1.0D+0, RZERO = 0.0D+0 )
|
|
|
|
|
|
|
|
|
|
|
|
INTEGER QI
|
|
|
|
INTEGER IL(N+1)
|
|
|
|
INTEGER OW(N+1)
|
2005-08-28 22:09:42 +08:00
|
|
|
COMPLEX(DP) CTMP
|
|
|
|
COMPLEX(DP) CTMPV(N+1)
|
|
|
|
COMPLEX(DP) TAUL(N+1)
|
|
|
|
COMPLEX(DP) APKI(N+1)
|
|
|
|
REAL(DP) TMP
|
|
|
|
REAL(DP) TMPV(N+1)
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
! ..
|
|
|
|
! .. Local __SCALARs ..
|
|
|
|
INTEGER J, I, I1, K, I2, NI1, JL
|
|
|
|
INTEGER KL, J1
|
2005-08-28 22:09:42 +08:00
|
|
|
COMPLEX(DP) ALPHA, TAUI
|
2003-03-03 07:19:44 +08:00
|
|
|
INTEGER KNT, IERR
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM
|
2003-03-03 07:19:44 +08:00
|
|
|
! ..
|
|
|
|
! .. External Subroutines ..
|
|
|
|
EXTERNAL ZAXPY
|
|
|
|
EXTERNAL ZDSCAL, ZSCAL
|
|
|
|
! ..
|
|
|
|
! .. External Functions ..
|
2005-08-28 22:09:42 +08:00
|
|
|
COMPLEX(DP) ZDOTC
|
2003-03-03 07:19:44 +08:00
|
|
|
EXTERNAL ZDOTC
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) DLAMCH, DLAPY3, DZNRM2
|
|
|
|
COMPLEX(DP) ZLADIV
|
2003-03-03 07:19:44 +08:00
|
|
|
EXTERNAL DLAMCH, DLAPY3, DZNRM2, ZLADIV
|
|
|
|
! ..
|
|
|
|
! .. Intrinsic Functions ..
|
General cleanup of intrinsic functions:
conversion to real => DBLE
(including real part of a complex number)
conversion to complex => CMPLX
complex conjugate => CONJG
imaginary part => AIMAG
All functions are uppercase.
CMPLX is preprocessed by f_defs.h and performs an explicit cast:
#define CMPLX(a,b) cmplx(a,b,kind=DP)
This implies that 1) f_defs.h must be included whenever a CMPLX is present,
2) CMPLX should stay in a single line, 3) DP must be defined.
All occurrences of real, float, dreal, dfloat, dconjg, dimag, dcmplx
removed - please do not reintroduce any of them.
Tested only with ifc7 and g95 - beware unintended side effects
Maybe not the best solution (explicit casts everywhere would be better)
but it can be easily changed with a script if the need arises.
The following code might be used to test for possible trouble:
program test_intrinsic
implicit none
integer, parameter :: dp = selected_real_kind(14,200)
real (kind=dp) :: a = 0.123456789012345_dp
real (kind=dp) :: b = 0.987654321098765_dp
complex (kind=dp) :: c = ( 0.123456789012345_dp, 0.987654321098765_dp)
print *, ' A = ', a
print *, ' DBLE(A)= ', DBLE(a)
print *, ' C = ', c
print *, 'CONJG(C)= ', CONJG(c)
print *, 'DBLE(c),AIMAG(C) = ', DBLE(c), AIMAG(c)
print *, 'CMPLX(A,B,kind=dp)= ', CMPLX( a, b, kind=dp)
end program test_intrinsic
Note that CMPLX and REAL without a cast yield single precision numbers on
ifc7 and g95 !!!
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@2133 c92efa57-630b-4861-b058-cf58834340f0
2005-08-27 01:44:42 +08:00
|
|
|
INTRINSIC DABS, DBLE, AIMAG, SIGN
|
|
|
|
! cmplx removed because preprocessed
|
2003-03-03 07:19:44 +08:00
|
|
|
!
|
|
|
|
! .. Executable Statements ..
|
|
|
|
!
|
|
|
|
! Quick return if possible
|
|
|
|
!
|
|
|
|
IF(N.LE.0) THEN
|
|
|
|
RETURN
|
|
|
|
END IF
|
|
|
|
|
|
|
|
|
|
|
|
DO I = 1,N+1
|
|
|
|
QI = (I-1)/NPROC
|
|
|
|
OW(I) = MOD((I-1),NPROC)
|
|
|
|
IF(ME .le. OW(I) ) then
|
|
|
|
IL(I) = QI + 1
|
|
|
|
ELSE
|
|
|
|
IL(I) = QI
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
!
|
|
|
|
! Reduce the lower triangle of A.
|
|
|
|
!
|
|
|
|
IF (OW(1).EQ.ME) THEN
|
General cleanup of intrinsic functions:
conversion to real => DBLE
(including real part of a complex number)
conversion to complex => CMPLX
complex conjugate => CONJG
imaginary part => AIMAG
All functions are uppercase.
CMPLX is preprocessed by f_defs.h and performs an explicit cast:
#define CMPLX(a,b) cmplx(a,b,kind=DP)
This implies that 1) f_defs.h must be included whenever a CMPLX is present,
2) CMPLX should stay in a single line, 3) DP must be defined.
All occurrences of real, float, dreal, dfloat, dconjg, dimag, dcmplx
removed - please do not reintroduce any of them.
Tested only with ifc7 and g95 - beware unintended side effects
Maybe not the best solution (explicit casts everywhere would be better)
but it can be easily changed with a script if the need arises.
The following code might be used to test for possible trouble:
program test_intrinsic
implicit none
integer, parameter :: dp = selected_real_kind(14,200)
real (kind=dp) :: a = 0.123456789012345_dp
real (kind=dp) :: b = 0.987654321098765_dp
complex (kind=dp) :: c = ( 0.123456789012345_dp, 0.987654321098765_dp)
print *, ' A = ', a
print *, ' DBLE(A)= ', DBLE(a)
print *, ' C = ', c
print *, 'CONJG(C)= ', CONJG(c)
print *, 'DBLE(c),AIMAG(C) = ', DBLE(c), AIMAG(c)
print *, 'CMPLX(A,B,kind=dp)= ', CMPLX( a, b, kind=dp)
end program test_intrinsic
Note that CMPLX and REAL without a cast yield single precision numbers on
ifc7 and g95 !!!
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@2133 c92efa57-630b-4861-b058-cf58834340f0
2005-08-27 01:44:42 +08:00
|
|
|
AP( IL(1), 1 ) = DBLE( AP( IL(1), 1 ) )
|
2003-03-03 07:19:44 +08:00
|
|
|
END IF
|
|
|
|
|
|
|
|
DO I = 1, N - 1
|
|
|
|
!
|
|
|
|
! Generate elementary reflector H(i) = I - tau * v * v'
|
|
|
|
! to annihilate A(i+2:n,i)
|
|
|
|
!
|
|
|
|
IF (OW(I+1).EQ.ME) THEN
|
|
|
|
ALPHA = AP( IL(I+1), I )
|
|
|
|
END IF
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_BCAST(ALPHA,1,MPI_DOUBLE_COMPLEX,OW(I+1),comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
IF( (N-I).LE.0 ) THEN
|
|
|
|
TAUI = RZERO
|
|
|
|
ELSE
|
|
|
|
IF(OW(I+2).EQ.ME) THEN
|
|
|
|
I2 = IL(I+2)
|
|
|
|
ELSE
|
|
|
|
I2 = IL(I+2) + 1 ! I+2
|
|
|
|
ENDIF
|
|
|
|
NI1 = NRL - I2 + 1 ! N-I-1
|
|
|
|
|
|
|
|
IF((N-I-1).GT.0) THEN
|
|
|
|
XNORM = DZNRM2( NI1, AP( I2, I ), 1 )
|
|
|
|
#if defined __PARA
|
|
|
|
XNORM = XNORM ** 2
|
|
|
|
# if defined __MPI
|
|
|
|
CALL MPI_ALLREDUCE(XNORM,TMP,1,MPI_DOUBLE_PRECISION, &
|
2006-02-03 09:16:33 +08:00
|
|
|
& MPI_SUM, comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
XNORM = SQRT(TMP)
|
|
|
|
#endif
|
|
|
|
ELSE
|
|
|
|
XNORM = 0.0D0
|
|
|
|
ENDIF
|
|
|
|
|
General cleanup of intrinsic functions:
conversion to real => DBLE
(including real part of a complex number)
conversion to complex => CMPLX
complex conjugate => CONJG
imaginary part => AIMAG
All functions are uppercase.
CMPLX is preprocessed by f_defs.h and performs an explicit cast:
#define CMPLX(a,b) cmplx(a,b,kind=DP)
This implies that 1) f_defs.h must be included whenever a CMPLX is present,
2) CMPLX should stay in a single line, 3) DP must be defined.
All occurrences of real, float, dreal, dfloat, dconjg, dimag, dcmplx
removed - please do not reintroduce any of them.
Tested only with ifc7 and g95 - beware unintended side effects
Maybe not the best solution (explicit casts everywhere would be better)
but it can be easily changed with a script if the need arises.
The following code might be used to test for possible trouble:
program test_intrinsic
implicit none
integer, parameter :: dp = selected_real_kind(14,200)
real (kind=dp) :: a = 0.123456789012345_dp
real (kind=dp) :: b = 0.987654321098765_dp
complex (kind=dp) :: c = ( 0.123456789012345_dp, 0.987654321098765_dp)
print *, ' A = ', a
print *, ' DBLE(A)= ', DBLE(a)
print *, ' C = ', c
print *, 'CONJG(C)= ', CONJG(c)
print *, 'DBLE(c),AIMAG(C) = ', DBLE(c), AIMAG(c)
print *, 'CMPLX(A,B,kind=dp)= ', CMPLX( a, b, kind=dp)
end program test_intrinsic
Note that CMPLX and REAL without a cast yield single precision numbers on
ifc7 and g95 !!!
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@2133 c92efa57-630b-4861-b058-cf58834340f0
2005-08-27 01:44:42 +08:00
|
|
|
ALPHR = DBLE( ALPHA )
|
2003-03-03 07:19:44 +08:00
|
|
|
ALPHI = AIMAG( ALPHA )
|
|
|
|
IF( XNORM.EQ.RZERO .AND. ALPHI.EQ.RZERO ) THEN
|
|
|
|
TAUI = RZERO
|
|
|
|
ELSE
|
|
|
|
BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR )
|
|
|
|
SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' )
|
|
|
|
RSAFMN = RONE / SAFMIN
|
|
|
|
IF( DABS( BETA ).LT.SAFMIN ) THEN
|
|
|
|
KNT = 0
|
|
|
|
10 CONTINUE
|
|
|
|
KNT = KNT + 1
|
|
|
|
|
|
|
|
IF(NI1.GT.0) THEN
|
|
|
|
CALL ZDSCAL( NI1, RSAFMN, AP( I2, I ), 1 )
|
|
|
|
ENDIF
|
|
|
|
|
|
|
|
BETA = BETA*RSAFMN
|
|
|
|
ALPHI = ALPHI*RSAFMN
|
|
|
|
ALPHR = ALPHR*RSAFMN
|
|
|
|
IF( DABS( BETA ).LT.SAFMIN ) GO TO 10
|
|
|
|
|
|
|
|
IF((N-I-1).GT.0) THEN
|
|
|
|
XNORM = DZNRM2( NI1, AP( I2, I ), 1 )
|
|
|
|
#if defined __PARA
|
|
|
|
XNORM = XNORM ** 2
|
|
|
|
# if defined __MPI
|
|
|
|
CALL MPI_ALLREDUCE(XNORM,TMP,1,MPI_DOUBLE_PRECISION, &
|
2006-02-03 09:16:33 +08:00
|
|
|
& MPI_SUM, comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
XNORM = SQRT(TMP)
|
|
|
|
#endif
|
|
|
|
ELSE
|
|
|
|
XNORM = 0.0D0
|
|
|
|
ENDIF
|
|
|
|
|
General cleanup of intrinsic functions:
conversion to real => DBLE
(including real part of a complex number)
conversion to complex => CMPLX
complex conjugate => CONJG
imaginary part => AIMAG
All functions are uppercase.
CMPLX is preprocessed by f_defs.h and performs an explicit cast:
#define CMPLX(a,b) cmplx(a,b,kind=DP)
This implies that 1) f_defs.h must be included whenever a CMPLX is present,
2) CMPLX should stay in a single line, 3) DP must be defined.
All occurrences of real, float, dreal, dfloat, dconjg, dimag, dcmplx
removed - please do not reintroduce any of them.
Tested only with ifc7 and g95 - beware unintended side effects
Maybe not the best solution (explicit casts everywhere would be better)
but it can be easily changed with a script if the need arises.
The following code might be used to test for possible trouble:
program test_intrinsic
implicit none
integer, parameter :: dp = selected_real_kind(14,200)
real (kind=dp) :: a = 0.123456789012345_dp
real (kind=dp) :: b = 0.987654321098765_dp
complex (kind=dp) :: c = ( 0.123456789012345_dp, 0.987654321098765_dp)
print *, ' A = ', a
print *, ' DBLE(A)= ', DBLE(a)
print *, ' C = ', c
print *, 'CONJG(C)= ', CONJG(c)
print *, 'DBLE(c),AIMAG(C) = ', DBLE(c), AIMAG(c)
print *, 'CMPLX(A,B,kind=dp)= ', CMPLX( a, b, kind=dp)
end program test_intrinsic
Note that CMPLX and REAL without a cast yield single precision numbers on
ifc7 and g95 !!!
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@2133 c92efa57-630b-4861-b058-cf58834340f0
2005-08-27 01:44:42 +08:00
|
|
|
ALPHA = CMPLX( ALPHR, ALPHI )
|
2003-03-03 07:19:44 +08:00
|
|
|
BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR )
|
General cleanup of intrinsic functions:
conversion to real => DBLE
(including real part of a complex number)
conversion to complex => CMPLX
complex conjugate => CONJG
imaginary part => AIMAG
All functions are uppercase.
CMPLX is preprocessed by f_defs.h and performs an explicit cast:
#define CMPLX(a,b) cmplx(a,b,kind=DP)
This implies that 1) f_defs.h must be included whenever a CMPLX is present,
2) CMPLX should stay in a single line, 3) DP must be defined.
All occurrences of real, float, dreal, dfloat, dconjg, dimag, dcmplx
removed - please do not reintroduce any of them.
Tested only with ifc7 and g95 - beware unintended side effects
Maybe not the best solution (explicit casts everywhere would be better)
but it can be easily changed with a script if the need arises.
The following code might be used to test for possible trouble:
program test_intrinsic
implicit none
integer, parameter :: dp = selected_real_kind(14,200)
real (kind=dp) :: a = 0.123456789012345_dp
real (kind=dp) :: b = 0.987654321098765_dp
complex (kind=dp) :: c = ( 0.123456789012345_dp, 0.987654321098765_dp)
print *, ' A = ', a
print *, ' DBLE(A)= ', DBLE(a)
print *, ' C = ', c
print *, 'CONJG(C)= ', CONJG(c)
print *, 'DBLE(c),AIMAG(C) = ', DBLE(c), AIMAG(c)
print *, 'CMPLX(A,B,kind=dp)= ', CMPLX( a, b, kind=dp)
end program test_intrinsic
Note that CMPLX and REAL without a cast yield single precision numbers on
ifc7 and g95 !!!
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@2133 c92efa57-630b-4861-b058-cf58834340f0
2005-08-27 01:44:42 +08:00
|
|
|
TAUI = CMPLX( (BETA-ALPHR)/BETA, -ALPHI/BETA )
|
|
|
|
ALPHA = ZLADIV( ONE, ALPHA-BETA )
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
IF(NI1.GT.0) THEN
|
|
|
|
CALL ZSCAL( NI1, ALPHA, AP( I2, I ), 1 )
|
|
|
|
ENDIF
|
|
|
|
|
|
|
|
ALPHA = BETA
|
|
|
|
DO J = 1, KNT
|
|
|
|
ALPHA = ALPHA*SAFMIN
|
|
|
|
END DO
|
|
|
|
|
|
|
|
ELSE
|
|
|
|
|
General cleanup of intrinsic functions:
conversion to real => DBLE
(including real part of a complex number)
conversion to complex => CMPLX
complex conjugate => CONJG
imaginary part => AIMAG
All functions are uppercase.
CMPLX is preprocessed by f_defs.h and performs an explicit cast:
#define CMPLX(a,b) cmplx(a,b,kind=DP)
This implies that 1) f_defs.h must be included whenever a CMPLX is present,
2) CMPLX should stay in a single line, 3) DP must be defined.
All occurrences of real, float, dreal, dfloat, dconjg, dimag, dcmplx
removed - please do not reintroduce any of them.
Tested only with ifc7 and g95 - beware unintended side effects
Maybe not the best solution (explicit casts everywhere would be better)
but it can be easily changed with a script if the need arises.
The following code might be used to test for possible trouble:
program test_intrinsic
implicit none
integer, parameter :: dp = selected_real_kind(14,200)
real (kind=dp) :: a = 0.123456789012345_dp
real (kind=dp) :: b = 0.987654321098765_dp
complex (kind=dp) :: c = ( 0.123456789012345_dp, 0.987654321098765_dp)
print *, ' A = ', a
print *, ' DBLE(A)= ', DBLE(a)
print *, ' C = ', c
print *, 'CONJG(C)= ', CONJG(c)
print *, 'DBLE(c),AIMAG(C) = ', DBLE(c), AIMAG(c)
print *, 'CMPLX(A,B,kind=dp)= ', CMPLX( a, b, kind=dp)
end program test_intrinsic
Note that CMPLX and REAL without a cast yield single precision numbers on
ifc7 and g95 !!!
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@2133 c92efa57-630b-4861-b058-cf58834340f0
2005-08-27 01:44:42 +08:00
|
|
|
TAUI = CMPLX( (BETA-ALPHR)/BETA, -ALPHI/BETA )
|
|
|
|
ALPHA = ZLADIV( ONE, ALPHA-BETA )
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
IF(NI1.GT.0) THEN
|
|
|
|
CALL ZSCAL( NI1, ALPHA, AP( I2, I ), 1 )
|
|
|
|
ENDIF
|
|
|
|
|
|
|
|
ALPHA = BETA
|
|
|
|
END IF
|
|
|
|
END IF
|
|
|
|
ENDIF
|
|
|
|
!
|
|
|
|
E( I ) = ALPHA
|
|
|
|
!
|
|
|
|
IF( TAUI.NE.ZERO ) THEN
|
|
|
|
!
|
|
|
|
! Apply H(i) from both sides to A(i+1:n,i+1:n)
|
|
|
|
!
|
|
|
|
! ... AP( I+1, I ) = ONE
|
|
|
|
IF (OW(I+1).EQ.ME) THEN
|
|
|
|
AP( IL(I+1), I ) = ONE
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
! Compute y := tau * A * v storing y in TAU(i:n-1)
|
|
|
|
!
|
|
|
|
|
|
|
|
! ... broadcast A(K,I)
|
|
|
|
IF(OW(I+1).EQ.ME) THEN
|
|
|
|
I1 = IL(I+1)
|
|
|
|
ELSE
|
|
|
|
I1 = IL(I+1) + 1 ! I+2
|
|
|
|
ENDIF
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
DO J = I+1, N
|
|
|
|
CTMPV(J) = ZERO
|
|
|
|
END DO
|
|
|
|
DO JL = I1, NRL
|
|
|
|
J = ME + (JL-1)*NPROC + 1
|
|
|
|
CTMPV(J) = AP(JL,I)
|
|
|
|
END DO
|
|
|
|
# if defined __MPI
|
|
|
|
CALL MPI_ALLREDUCE(CTMPV(I+1),APKI(I+1),N-I, &
|
2006-02-03 09:16:33 +08:00
|
|
|
& MPI_DOUBLE_COMPLEX, MPI_SUM, comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
DO J = I+1,N
|
|
|
|
APKI(J) = AP(J,I)
|
|
|
|
ENDDO
|
|
|
|
#endif
|
|
|
|
DO J = I+1, N+1
|
|
|
|
TAU(J-1) = ZERO
|
|
|
|
END DO
|
|
|
|
DO JL = I1, NRL
|
|
|
|
J = ME + (JL-1)*NPROC + 1
|
|
|
|
TAU(J-1) = ZERO
|
|
|
|
DO K = I+1, J
|
|
|
|
TAU(J-1) = TAU(J-1) + TAUI * AP(JL,K) * APKI(K)
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
DO J = I+1, N
|
|
|
|
IF(OW(J+1).EQ.ME) THEN
|
|
|
|
J1 = IL(J+1)
|
|
|
|
ELSE
|
|
|
|
J1 = IL(J+1) + 1 ! I+2
|
|
|
|
ENDIF
|
|
|
|
DO KL = J1, NRL
|
|
|
|
K = ME + (KL-1)*NPROC + 1
|
|
|
|
TAU(J-1) = TAU(J-1) + TAUI * CONJG(AP(KL,J)) * APKI(K)
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
! ... parallel sum TAU
|
|
|
|
# if defined __MPI
|
|
|
|
CALL MPI_ALLREDUCE(TAU(I),CTMPV(I),N-I+1, &
|
2006-02-03 09:16:33 +08:00
|
|
|
& MPI_DOUBLE_COMPLEX,MPI_SUM,comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
DO J = I, N
|
|
|
|
TAU(J) = CTMPV(J)
|
|
|
|
END DO
|
|
|
|
#endif
|
|
|
|
!
|
|
|
|
! Compute w := y - 1/2 * tau * (y'*v) * v
|
|
|
|
!
|
|
|
|
! ... ALPHA = -HALF*TAUI*ZDOTC(N-I,TAU(I),1,AP(I+1,I),1)
|
|
|
|
|
|
|
|
JL = 1
|
|
|
|
DO J = I, N
|
|
|
|
IF(OW(J+1).EQ.ME) THEN
|
|
|
|
TAUL(JL) = TAU(J)
|
|
|
|
JL = JL + 1
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
IF(OW(I+1).EQ.ME) THEN
|
|
|
|
I1 = IL(I+1)
|
|
|
|
ELSE
|
|
|
|
I1 = IL(I+1) + 1 ! I+1
|
|
|
|
ENDIF
|
|
|
|
NI1 = NRL - I1 + 1 ! N-I
|
|
|
|
ALPHA = -HALF*TAUI*ZDOTC(NI1,TAUL(1),1,AP(I1,I),1)
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __MPI
|
|
|
|
CALL MPI_ALLREDUCE(ALPHA,CTMP,1, &
|
2006-02-03 09:16:33 +08:00
|
|
|
& MPI_DOUBLE_COMPLEX, MPI_SUM, comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
ALPHA = CTMP
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
CALL ZAXPY(NI1,ALPHA,AP(I1,I),1,TAUL(1),1)
|
|
|
|
JL = 1
|
|
|
|
DO J = I, N
|
|
|
|
CTMPV(J) = ZERO
|
|
|
|
IF(OW(J+1).EQ.ME) THEN
|
|
|
|
CTMPV(J) = TAUL(JL)
|
|
|
|
JL = JL + 1
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
# if defined __MPI
|
|
|
|
CALL MPI_ALLREDUCE(CTMPV(I),TAU(I),N-I+1, &
|
2006-02-03 09:16:33 +08:00
|
|
|
& MPI_DOUBLE_COMPLEX, MPI_SUM, comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
CALL ZAXPY(N-I,ALPHA,AP(I+1,I),1,TAU(I),1)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
!
|
|
|
|
! Apply the transformation as a rank-2 update:
|
|
|
|
! A := A - v * w' - w * v'
|
|
|
|
!
|
|
|
|
! ... broadcast A(K,I)
|
|
|
|
IF(OW(I+1).EQ.ME) THEN
|
|
|
|
I1 = IL(I+1)
|
|
|
|
ELSE
|
|
|
|
I1 = IL(I+1) + 1 ! I+2
|
|
|
|
ENDIF
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
DO J = I+1, N
|
|
|
|
CTMPV(J) = ZERO
|
|
|
|
END DO
|
|
|
|
DO JL = I1, NRL
|
|
|
|
J = ME + (JL-1)*NPROC + 1
|
|
|
|
CTMPV(J) = AP(JL,I)
|
|
|
|
END DO
|
|
|
|
# if defined __MPI
|
|
|
|
CALL MPI_ALLREDUCE(CTMPV(I+1),APKI(I+1),N-I, &
|
2006-02-03 09:16:33 +08:00
|
|
|
& MPI_DOUBLE_COMPLEX, MPI_SUM, comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
DO J = I+1, N
|
|
|
|
APKI(J) = AP(J,I)
|
|
|
|
END DO
|
|
|
|
#endif
|
|
|
|
|
|
|
|
DO K = I+1,N
|
|
|
|
DO JL = I1,NRL
|
|
|
|
J = ME + (JL-1)*NPROC + 1
|
|
|
|
AP(JL,K) = AP(JL,K) - ONE * AP(JL,I) * CONJG(TAU(K-1)) - &
|
|
|
|
& CONJG(ONE) * TAU(J-1) * CONJG(APKI(K))
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
!
|
|
|
|
END IF
|
|
|
|
IF(OW(I+1).EQ.ME) THEN
|
|
|
|
AP(IL(I+1),I) = E( I )
|
|
|
|
END IF
|
|
|
|
IF(OW(I).EQ.ME) THEN
|
General cleanup of intrinsic functions:
conversion to real => DBLE
(including real part of a complex number)
conversion to complex => CMPLX
complex conjugate => CONJG
imaginary part => AIMAG
All functions are uppercase.
CMPLX is preprocessed by f_defs.h and performs an explicit cast:
#define CMPLX(a,b) cmplx(a,b,kind=DP)
This implies that 1) f_defs.h must be included whenever a CMPLX is present,
2) CMPLX should stay in a single line, 3) DP must be defined.
All occurrences of real, float, dreal, dfloat, dconjg, dimag, dcmplx
removed - please do not reintroduce any of them.
Tested only with ifc7 and g95 - beware unintended side effects
Maybe not the best solution (explicit casts everywhere would be better)
but it can be easily changed with a script if the need arises.
The following code might be used to test for possible trouble:
program test_intrinsic
implicit none
integer, parameter :: dp = selected_real_kind(14,200)
real (kind=dp) :: a = 0.123456789012345_dp
real (kind=dp) :: b = 0.987654321098765_dp
complex (kind=dp) :: c = ( 0.123456789012345_dp, 0.987654321098765_dp)
print *, ' A = ', a
print *, ' DBLE(A)= ', DBLE(a)
print *, ' C = ', c
print *, 'CONJG(C)= ', CONJG(c)
print *, 'DBLE(c),AIMAG(C) = ', DBLE(c), AIMAG(c)
print *, 'CMPLX(A,B,kind=dp)= ', CMPLX( a, b, kind=dp)
end program test_intrinsic
Note that CMPLX and REAL without a cast yield single precision numbers on
ifc7 and g95 !!!
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@2133 c92efa57-630b-4861-b058-cf58834340f0
2005-08-27 01:44:42 +08:00
|
|
|
D( I ) = DBLE(AP( IL(I),I ))
|
2003-03-03 07:19:44 +08:00
|
|
|
END IF
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_BCAST(D(I),1,MPI_DOUBLE_PRECISION,OW(I), comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
TAU( I ) = TAUI
|
|
|
|
END DO
|
|
|
|
IF(OW(I).EQ.ME) THEN
|
General cleanup of intrinsic functions:
conversion to real => DBLE
(including real part of a complex number)
conversion to complex => CMPLX
complex conjugate => CONJG
imaginary part => AIMAG
All functions are uppercase.
CMPLX is preprocessed by f_defs.h and performs an explicit cast:
#define CMPLX(a,b) cmplx(a,b,kind=DP)
This implies that 1) f_defs.h must be included whenever a CMPLX is present,
2) CMPLX should stay in a single line, 3) DP must be defined.
All occurrences of real, float, dreal, dfloat, dconjg, dimag, dcmplx
removed - please do not reintroduce any of them.
Tested only with ifc7 and g95 - beware unintended side effects
Maybe not the best solution (explicit casts everywhere would be better)
but it can be easily changed with a script if the need arises.
The following code might be used to test for possible trouble:
program test_intrinsic
implicit none
integer, parameter :: dp = selected_real_kind(14,200)
real (kind=dp) :: a = 0.123456789012345_dp
real (kind=dp) :: b = 0.987654321098765_dp
complex (kind=dp) :: c = ( 0.123456789012345_dp, 0.987654321098765_dp)
print *, ' A = ', a
print *, ' DBLE(A)= ', DBLE(a)
print *, ' C = ', c
print *, 'CONJG(C)= ', CONJG(c)
print *, 'DBLE(c),AIMAG(C) = ', DBLE(c), AIMAG(c)
print *, 'CMPLX(A,B,kind=dp)= ', CMPLX( a, b, kind=dp)
end program test_intrinsic
Note that CMPLX and REAL without a cast yield single precision numbers on
ifc7 and g95 !!!
git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@2133 c92efa57-630b-4861-b058-cf58834340f0
2005-08-27 01:44:42 +08:00
|
|
|
D( N ) = DBLE(AP( IL(I),I ))
|
2003-03-03 07:19:44 +08:00
|
|
|
END IF
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
CALL MPI_BCAST(D(N),1,MPI_DOUBLE_PRECISION,OW(I), comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
!
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
!
|
|
|
|
! End of ZHPTRD
|
|
|
|
!
|
2005-05-18 22:02:51 +08:00
|
|
|
END SUBROUTINE pzhptrd
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
2006-02-03 09:16:33 +08:00
|
|
|
SUBROUTINE pzupgtr( n, nrl, ap, lda, tau, q, ldq, nproc, me, comm)
|
2006-01-10 22:04:13 +08:00
|
|
|
|
2003-03-03 07:19:44 +08:00
|
|
|
!
|
|
|
|
! Parallel MPI version of the LAPACK routine ZUPGTR
|
|
|
|
!
|
|
|
|
! Carlo Cavazzoni (carlo.cavazzoni@cineca.it) -- CINECA
|
|
|
|
! Dicember 12, 1999
|
|
|
|
!
|
|
|
|
! REFERENCES :
|
|
|
|
!
|
|
|
|
! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING.
|
|
|
|
! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING,
|
|
|
|
! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE.
|
|
|
|
!
|
|
|
|
! PARALLEL NUMERICAL ALGORITHMS,
|
|
|
|
! T.L. FREEMAN AND C.PHILLIPS,
|
|
|
|
! PRENTICE HALL INTERNATIONAL (1992).
|
|
|
|
!
|
|
|
|
! LAPACK routine (version 2.0) --
|
|
|
|
! Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
|
|
|
! Courant Institute, Argonne National Lab, and Rice University
|
|
|
|
|
|
|
|
|
|
|
|
USE kinds
|
|
|
|
IMPLICIT NONE
|
|
|
|
|
|
|
|
!
|
|
|
|
! .. __SCALAR Arguments ..
|
|
|
|
|
2006-02-03 09:16:33 +08:00
|
|
|
INTEGER INFO, LDQ, N, LDA, NRL, NPROC, ME, comm
|
2003-03-03 07:19:44 +08:00
|
|
|
! ..
|
|
|
|
! .. Array Arguments ..
|
2005-08-28 22:09:42 +08:00
|
|
|
COMPLEX(DP) AP(LDA, * ), Q( LDQ, * ), TAU( * )
|
2003-03-03 07:19:44 +08:00
|
|
|
! ..
|
|
|
|
!
|
|
|
|
! Purpose
|
|
|
|
! =======
|
|
|
|
!
|
|
|
|
! PZUPGTR generates a complex unitary matrix Q which is defined as the
|
|
|
|
! product of n-1 elementary reflectors H(i) of order n, as returned by
|
|
|
|
! PZHPTRD :
|
|
|
|
!
|
|
|
|
! Q = H(1) H(2) . . . H(n-1).
|
|
|
|
!
|
|
|
|
! Arguments
|
|
|
|
! =========
|
|
|
|
!
|
|
|
|
! N (input) INTEGER
|
|
|
|
! The order of the mglobal atrix AP. N >= 0.
|
|
|
|
!
|
|
|
|
! NRL (input) INTEGER
|
|
|
|
! The number of local rows of the matrix AP. NRL >= 0.
|
|
|
|
!
|
2005-08-28 22:09:42 +08:00
|
|
|
! AP (input) COMPLEX(DP) array, dimension (LDA,N)
|
2003-03-03 07:19:44 +08:00
|
|
|
! The vectors which define the elementary reflectors, as
|
|
|
|
! returned by PZHPTRD.
|
|
|
|
! The rows of the matrix are distributed among processors
|
|
|
|
! with blocking factor 1.
|
|
|
|
! Example for NPROC = 4 :
|
|
|
|
! ROW | PE
|
|
|
|
! 1 | 0
|
|
|
|
! 2 | 1
|
|
|
|
! 3 | 2
|
|
|
|
! 4 | 3
|
|
|
|
! 5 | 0
|
|
|
|
! 6 | 1
|
|
|
|
! .. | ..
|
|
|
|
!
|
|
|
|
! LDA (input) INTEGER
|
|
|
|
! Leading dimension of the local matrix AP, LDA > NRL
|
|
|
|
!
|
2005-08-28 22:09:42 +08:00
|
|
|
! TAU (input) COMPLEX(DP) array, dimension (N-1)
|
2003-03-03 07:19:44 +08:00
|
|
|
! TAU(i) must contain the __SCALAR factor of the elementary
|
|
|
|
! reflector H(i), as returned by PZHPTRD.
|
|
|
|
!
|
2005-08-28 22:09:42 +08:00
|
|
|
! Q (output) COMPLEX(DP) array, dimension (LDQ,N)
|
2003-03-03 07:19:44 +08:00
|
|
|
! The N-by-N unitary matrix Q.
|
|
|
|
! The rows of the matrix are distributed among processors
|
|
|
|
! in the same way of the matrix AP
|
|
|
|
!
|
|
|
|
! LDQ (input) INTEGER
|
|
|
|
! The leading dimension of the array Q. LDQ >= max(1,NRL).
|
|
|
|
!
|
|
|
|
! NPROC (input) INTEGER
|
|
|
|
! Number of processors
|
|
|
|
!
|
|
|
|
! ME (input) INTEGER
|
|
|
|
! Index of the local processor ( 0, 1, 2, ..., NPROC-1 )
|
|
|
|
!
|
|
|
|
! =====================================================================
|
|
|
|
!
|
|
|
|
! .. Parameters ..
|
|
|
|
|
2005-08-28 22:09:42 +08:00
|
|
|
COMPLEX(DP) ONE, ZERO
|
2003-03-03 07:19:44 +08:00
|
|
|
PARAMETER ( ONE = (1.0D+0,0.0D+0), ZERO = (0.0D+0,0.0D+0) )
|
|
|
|
|
|
|
|
INTEGER QI
|
|
|
|
INTEGER IL(N+1)
|
|
|
|
INTEGER OW(N+1)
|
2005-08-28 22:09:42 +08:00
|
|
|
COMPLEX(DP) CTMP
|
|
|
|
COMPLEX(DP) CTMPV(N+1)
|
|
|
|
COMPLEX(DP) WORK(N+1)
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
! ..
|
|
|
|
! .. Local __SCALARs ..
|
|
|
|
INTEGER I, IINFO, J, K, JL, KL, J1, I1, I2, NI1, L, IERR
|
|
|
|
! ..
|
|
|
|
|
|
|
|
! .. Executable Statements ..
|
|
|
|
!
|
|
|
|
! Test the input arguments
|
|
|
|
!
|
|
|
|
! Quick return if possible
|
|
|
|
!
|
|
|
|
IF( N.EQ.0 ) THEN
|
|
|
|
RETURN
|
|
|
|
END IF
|
|
|
|
|
|
|
|
!
|
|
|
|
DO I = 1,N+1
|
|
|
|
QI = (I-1)/NPROC
|
|
|
|
OW(I) = MOD((I-1),NPROC)
|
|
|
|
IF(ME .le. OW(I) ) then
|
|
|
|
IL(I) = QI + 1
|
|
|
|
ELSE
|
|
|
|
IL(I) = QI
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
!
|
|
|
|
! Unpack the vectors which define the elementary reflectors and
|
|
|
|
! set the first row and column of Q equal to those of the unit
|
|
|
|
! matrix
|
|
|
|
!
|
|
|
|
IF(OW(1).EQ.ME) THEN
|
|
|
|
Q( IL(1), 1 ) = ONE
|
|
|
|
DO KL = 2, NRL
|
|
|
|
Q( KL, 1 ) = ZERO
|
|
|
|
END DO
|
|
|
|
DO J = 2, N
|
|
|
|
Q( IL(1), J ) = ZERO
|
|
|
|
END DO
|
|
|
|
ELSE
|
|
|
|
DO KL = 1, NRL
|
|
|
|
Q( KL, 1 ) = ZERO
|
|
|
|
END DO
|
|
|
|
ENDIF
|
|
|
|
|
|
|
|
DO J = 2, N
|
|
|
|
IF(OW(J+1).EQ.ME) THEN
|
|
|
|
J1 = IL(J+1)
|
|
|
|
ELSE
|
|
|
|
J1 = IL(J+1) + 1
|
|
|
|
ENDIF
|
|
|
|
DO KL = J1, NRL
|
|
|
|
Q( KL, J ) = AP( KL, J-1 )
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
|
|
|
|
IF( N.GT.1 ) THEN
|
|
|
|
!
|
|
|
|
! Generate Q(2:n,2:n)
|
|
|
|
!
|
|
|
|
DO I = N-1, 1, -1
|
|
|
|
!
|
|
|
|
! Apply H(i) to A(i:m,i:n) from the left
|
|
|
|
!
|
|
|
|
IF( I.LT.(N-1) ) THEN
|
|
|
|
|
|
|
|
IF(OW(I+1).EQ.ME) THEN
|
|
|
|
Q( IL(I+1), I+1 ) = ONE
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
! Form H * C
|
|
|
|
!
|
|
|
|
IF( TAU(I).NE.ZERO ) THEN
|
|
|
|
!
|
|
|
|
! w := C' * v
|
|
|
|
!
|
|
|
|
IF(OW(I+1).EQ.ME) THEN
|
|
|
|
I1 = IL(I+1)
|
|
|
|
ELSE
|
|
|
|
I1 = IL(I+1) + 1
|
|
|
|
ENDIF
|
|
|
|
DO J = 1, N-1-I
|
|
|
|
CTMP = ZERO
|
|
|
|
DO KL = I1, NRL
|
|
|
|
CTMP = CTMP + CONJG( Q( KL, J+I+1 ) ) * Q( KL,I+1 )
|
|
|
|
END DO
|
|
|
|
WORK(J) = CTMP
|
|
|
|
END DO
|
|
|
|
|
|
|
|
#if defined __PARA
|
|
|
|
# if defined __MPI
|
|
|
|
CALL MPI_ALLREDUCE(WORK,CTMPV,N-1-I,MPI_DOUBLE_COMPLEX, &
|
2006-02-03 09:16:33 +08:00
|
|
|
& MPI_SUM, comm,IERR)
|
2003-03-03 07:19:44 +08:00
|
|
|
# endif
|
|
|
|
DO J = 1,N-1-I
|
|
|
|
WORK(J) = CTMPV(J)
|
|
|
|
END DO
|
|
|
|
#endif
|
|
|
|
!
|
|
|
|
! C := C - v * w'
|
|
|
|
!
|
|
|
|
DO J = 1, N-1-I
|
|
|
|
CTMP = -TAU(I) * CONJG( WORK( J ) )
|
|
|
|
DO KL = I1, NRL
|
|
|
|
Q( KL, J+I+1 ) = Q( KL, J+I+1 ) + CTMP * Q(KL, I+1)
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
END IF
|
|
|
|
|
|
|
|
IF( I.LT.(N-1) ) THEN
|
|
|
|
IF(OW(I+2).EQ.ME) THEN
|
|
|
|
I2 = IL(I+2) ! I+2
|
|
|
|
ELSE
|
|
|
|
I2 = IL(I+2) + 1 ! local ind. of the first element > I+2
|
|
|
|
ENDIF
|
|
|
|
NI1 = NRL - I2 + 1 ! N-I-1
|
|
|
|
CALL ZSCAL( NI1, -TAU( I ), Q( I2, I+1 ), 1 )
|
|
|
|
END IF
|
|
|
|
|
|
|
|
IF(OW(I+1).EQ.ME) THEN
|
|
|
|
Q( IL(I+1), I+1 ) = ONE - TAU( I )
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
! Set A(1:i-1,i) to zero
|
|
|
|
!
|
|
|
|
DO L = 1, I - 1
|
|
|
|
IF(OW(L+1).EQ.ME) THEN
|
|
|
|
Q( IL(L+1), I+1 ) = ZERO
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
!
|
|
|
|
! End of ZUPGTR
|
|
|
|
!
|
2005-05-18 22:02:51 +08:00
|
|
|
END SUBROUTINE pzupgtr
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
2006-02-03 09:16:33 +08:00
|
|
|
SUBROUTINE pzsteqr( compz, n, nrl, d, e, z, ldz, nproc, me, comm )
|
2003-03-03 07:19:44 +08:00
|
|
|
!
|
|
|
|
! Parallel MPI version of the LAPACK routine ZHPTRD
|
|
|
|
!
|
|
|
|
! Carlo Cavazzoni (carlo.cavazzoni@cineca.it) -- CINECA
|
|
|
|
! Dicember 12, 1999
|
|
|
|
!
|
|
|
|
! REFERENCES :
|
|
|
|
!
|
|
|
|
! NUMERICAL RECIPES, THE ART OF SCIENTIFIC COMPUTING.
|
|
|
|
! W.H. PRESS, B.P. FLANNERY, S.A. TEUKOLSKY, AND W.T. VETTERLING,
|
|
|
|
! CAMBRIDGE UNIVERSITY PRESS, CAMBRIDGE.
|
|
|
|
!
|
|
|
|
! PARALLEL NUMERICAL ALGORITHMS,
|
|
|
|
! T.L. FREEMAN AND C.PHILLIPS,
|
|
|
|
! PRENTICE HALL INTERNATIONAL (1992).
|
|
|
|
!
|
|
|
|
! LAPACK routine (version 2.0) --
|
|
|
|
! Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
|
|
|
! Courant Institute, Argonne National Lab, and Rice University
|
|
|
|
!
|
|
|
|
|
|
|
|
USE kinds
|
|
|
|
IMPLICIT NONE
|
|
|
|
|
|
|
|
! .. __SCALAR Arguments ..
|
|
|
|
CHARACTER COMPZ
|
2006-02-03 09:16:33 +08:00
|
|
|
INTEGER LDZ, N, NRL, NPROC, ME, comm
|
2003-03-03 07:19:44 +08:00
|
|
|
! ..
|
|
|
|
! .. Array Arguments ..
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) D( * ), E( * )
|
|
|
|
COMPLEX(DP) Z( LDZ, * )
|
2003-03-03 07:19:44 +08:00
|
|
|
! ..
|
|
|
|
!
|
|
|
|
! Purpose
|
|
|
|
! =======
|
|
|
|
!
|
|
|
|
! PZSTEQR computes all eigenvalues and, optionally, eigenvectors of a
|
|
|
|
! symmetric tridiagonal matrix using the implicit QL or QR method.
|
|
|
|
! The eigenvectors of a full or band complex Hermitian matrix can also
|
|
|
|
! be found if PZHPTRD has been used to reduce this
|
|
|
|
! matrix to tridiagonal form.
|
|
|
|
!
|
|
|
|
! Arguments
|
|
|
|
! =========
|
|
|
|
!
|
|
|
|
! COMPZ (input) CHARACTER*1
|
|
|
|
! = 'N': Compute eigenvalues only.
|
|
|
|
! = 'V': Compute eigenvalues and eigenvectors of the original
|
|
|
|
! Hermitian matrix. On entry, Z must contain the
|
|
|
|
! unitary matrix used to reduce the original matrix
|
|
|
|
! to tridiagonal form.
|
|
|
|
! = 'I': Compute eigenvalues and eigenvectors of the
|
|
|
|
! tridiagonal matrix. Z is initialized to the identity
|
|
|
|
! matrix.
|
|
|
|
!
|
|
|
|
! N (input) INTEGER
|
|
|
|
! The order of the mglobal atrix AP. N >= 0.
|
|
|
|
!
|
|
|
|
! NRL (input) INTEGER
|
|
|
|
! The number of local rows of the matrix AP. NRL >= 0.
|
|
|
|
!
|
|
|
|
! D (input/output) DOUBLE PRECISION array, dimension (N)
|
|
|
|
! On entry, the diagonal elements of the tridiagonal matrix.
|
|
|
|
! On exit, if INFO = 0, the eigenvalues in ascending order.
|
|
|
|
!
|
|
|
|
! E (input/output) DOUBLE PRECISION array, dimension (N-1)
|
|
|
|
! On entry, the (n-1) subdiagonal elements of the tridiagonal
|
|
|
|
! matrix.
|
|
|
|
! On exit, E has been destroyed.
|
|
|
|
!
|
2005-08-28 22:09:42 +08:00
|
|
|
! Z (input/output) COMPLEX(DP) array, dimension (LDZ, N)
|
2003-03-03 07:19:44 +08:00
|
|
|
! On entry, if COMPZ = 'V', then Z contains the unitary
|
|
|
|
! matrix used in the reduction to tridiagonal form.
|
|
|
|
! On exit if COMPZ = 'V', Z contains the
|
|
|
|
! orthonormal eigenvectors of the original Hermitian matrix,
|
|
|
|
! and if COMPZ = 'I', Z contains the orthonormal eigenvectors
|
|
|
|
! of the symmetric tridiagonal matrix.
|
|
|
|
! If COMPZ = 'N', then Z is not referenced.
|
|
|
|
! The rows of the matrix are distributed among processors
|
|
|
|
! with blocking factor 1, i.e. for NPROC = 4 :
|
|
|
|
! ROW Index | Processor index owning the row
|
|
|
|
! 1 | 0
|
|
|
|
! 2 | 1
|
|
|
|
! 3 | 2
|
|
|
|
! 4 | 3
|
|
|
|
! 5 | 0
|
|
|
|
! 6 | 1
|
|
|
|
! .. | ..
|
|
|
|
!
|
|
|
|
! LDZ (input) INTEGER
|
|
|
|
! The leading dimension of the array Z. LDZ >= 1, and if
|
|
|
|
! eigenvectors are desired, then LDZ >= max(1,NRL).
|
|
|
|
!
|
|
|
|
! NPROC (input) INTEGER
|
|
|
|
! Number of processors
|
|
|
|
!
|
|
|
|
! ME (input) INTEGER
|
|
|
|
! Index of the local processor ( 0, 1, 2, ..., NPROC-1 )
|
|
|
|
!
|
|
|
|
! =====================================================================
|
|
|
|
!
|
|
|
|
! .. Parameters ..
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) ZERO, ONE, TWO, THREE, CTEMP, STEMP
|
2003-03-03 07:19:44 +08:00
|
|
|
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, &
|
|
|
|
& THREE = 3.0D0 )
|
2005-08-28 22:09:42 +08:00
|
|
|
COMPLEX(DP) CZERO, CONE,ZTEMP
|
2003-03-03 07:19:44 +08:00
|
|
|
PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), CONE = ( 1.0D0, 0.0D0 ) )
|
|
|
|
INTEGER MAXIT
|
|
|
|
PARAMETER ( MAXIT = 30 )
|
|
|
|
! ..
|
|
|
|
|
|
|
|
INTEGER QI, KL, INFO
|
|
|
|
INTEGER IL(N+1)
|
|
|
|
INTEGER OW(N+1)
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) WORK(2*N)
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
! .. Local __SCALARs ..
|
|
|
|
INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, &
|
|
|
|
& LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, &
|
|
|
|
& NM1, NMAXIT
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, &
|
2003-03-03 07:19:44 +08:00
|
|
|
& S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST
|
|
|
|
! ..
|
|
|
|
! .. External Functions ..
|
|
|
|
LOGICAL LSAME
|
2005-08-28 22:09:42 +08:00
|
|
|
REAL(DP) DLAMCH, DLANST, DLAPY2
|
2003-03-03 07:19:44 +08:00
|
|
|
EXTERNAL LSAME, DLAMCH, DLANST, DLAPY2
|
|
|
|
! ..
|
|
|
|
! .. External Subroutines ..
|
|
|
|
EXTERNAL DLAE2, DLAEV2, DLARTG, DLASCL, DLASRT, XERBLA, &
|
|
|
|
& ZLASET, ZLASR, ZSWAP
|
|
|
|
! ..
|
|
|
|
! .. Intrinsic Functions ..
|
|
|
|
INTRINSIC DABS, MAX, SIGN, SQRT
|
|
|
|
! ..
|
|
|
|
! .. Executable Statements ..
|
|
|
|
!
|
|
|
|
! Test the input parameters.
|
|
|
|
!
|
|
|
|
INFO = 0
|
|
|
|
!
|
|
|
|
IF( LSAME( COMPZ, 'N' ) ) THEN
|
|
|
|
ICOMPZ = 0
|
|
|
|
ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
|
|
|
|
ICOMPZ = 1
|
|
|
|
ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
|
|
|
|
ICOMPZ = 2
|
|
|
|
ELSE
|
|
|
|
ICOMPZ = -1
|
|
|
|
END IF
|
|
|
|
IF( ICOMPZ.LT.0 ) THEN
|
|
|
|
INFO = -1
|
|
|
|
ELSE IF( N.LT.0 ) THEN
|
|
|
|
INFO = -2
|
|
|
|
ELSE IF( (LDZ.LT.1) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX(1,NRL) ) ) THEN
|
|
|
|
INFO = -6
|
|
|
|
END IF
|
|
|
|
IF( INFO.NE.0 ) THEN
|
|
|
|
CALL XERBLA( 'ZSTEQR', -INFO )
|
|
|
|
RETURN
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
! Quick return if possible
|
|
|
|
!
|
|
|
|
IF(N.LE.0) THEN
|
|
|
|
RETURN
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
DO I = 1,N+1
|
|
|
|
QI = (I-1)/NPROC
|
|
|
|
OW(I) = MOD((I-1),NPROC)
|
|
|
|
IF(ME .le. OW(I) ) then
|
|
|
|
IL(I) = QI + 1
|
|
|
|
ELSE
|
|
|
|
IL(I) = QI
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
|
|
|
|
IF( N.EQ.1 ) THEN
|
|
|
|
IF( ICOMPZ.EQ.2 .AND. OW(1).EQ.ME ) Z( IL(1), 1 ) = CONE
|
|
|
|
RETURN
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
! Determine the unit roundoff and over/underflow thresholds.
|
|
|
|
!
|
|
|
|
EPS = DLAMCH( 'E' )
|
|
|
|
EPS2 = EPS**2
|
|
|
|
SAFMIN = DLAMCH( 'S' )
|
|
|
|
SAFMAX = ONE / SAFMIN
|
|
|
|
SSFMAX = SQRT( SAFMAX ) / THREE
|
|
|
|
SSFMIN = SQRT( SAFMIN ) / EPS2
|
|
|
|
!
|
|
|
|
! Compute the eigenvalues and eigenvectors of the tridiagonal
|
|
|
|
! matrix.
|
|
|
|
!
|
|
|
|
IF( ICOMPZ.EQ.2 ) THEN
|
|
|
|
CALL ZLASET( 'Full', NRL, N, CZERO, CZERO, Z, LDZ )
|
|
|
|
DO J = 1, N
|
|
|
|
IF(OW(J).EQ.ME) THEN
|
|
|
|
Z( IL(J), J ) = CONE
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
NMAXIT = N*MAXIT
|
|
|
|
JTOT = 0
|
|
|
|
!
|
|
|
|
! Determine where the matrix splits and choose QL or QR iteration
|
|
|
|
! for each block, according to whether top or bottom diagonal
|
|
|
|
! element is smaller.
|
|
|
|
!
|
|
|
|
L1 = 1
|
|
|
|
NM1 = N - 1
|
|
|
|
!
|
|
|
|
10 CONTINUE
|
|
|
|
IF( L1.GT.N ) GO TO 160
|
|
|
|
IF( L1.GT.1 ) E( L1-1 ) = ZERO
|
|
|
|
IF( L1.LE.NM1 ) THEN
|
|
|
|
DO 20 M = L1, NM1
|
|
|
|
TST = DABS( E( M ) )
|
|
|
|
IF( TST.EQ.ZERO ) GO TO 30
|
|
|
|
IF( TST.LE.( SQRT(DABS(D(M)))*SQRT(DABS(D(M+1))))*EPS ) THEN
|
|
|
|
E( M ) = ZERO
|
|
|
|
GO TO 30
|
|
|
|
END IF
|
|
|
|
20 CONTINUE
|
|
|
|
END IF
|
|
|
|
M = N
|
|
|
|
!
|
|
|
|
30 CONTINUE
|
|
|
|
L = L1
|
|
|
|
LSV = L
|
|
|
|
LEND = M
|
|
|
|
LENDSV = LEND
|
|
|
|
L1 = M + 1
|
|
|
|
IF( LEND.EQ.L ) GO TO 10
|
|
|
|
!
|
|
|
|
! Scale submatrix in rows and columns L to LEND
|
|
|
|
!
|
|
|
|
ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) )
|
|
|
|
ISCALE = 0
|
|
|
|
IF( ANORM.EQ.ZERO ) GO TO 10
|
|
|
|
IF( ANORM.GT.SSFMAX ) THEN
|
|
|
|
ISCALE = 1
|
|
|
|
CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, INFO )
|
|
|
|
CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, INFO )
|
|
|
|
ELSE IF( ANORM.LT.SSFMIN ) THEN
|
|
|
|
ISCALE = 2
|
|
|
|
CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, INFO )
|
|
|
|
CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, INFO )
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
! Choose between QL and QR iteration
|
|
|
|
!
|
|
|
|
IF( DABS( D( LEND ) ).LT.DABS( D( L ) ) ) THEN
|
|
|
|
LEND = LSV
|
|
|
|
L = LENDSV
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
IF( LEND.GT.L ) THEN
|
|
|
|
!
|
|
|
|
! QL Iteration
|
|
|
|
!
|
|
|
|
! Look for small subdiagonal element.
|
|
|
|
!
|
|
|
|
40 CONTINUE
|
|
|
|
IF( L.NE.LEND ) THEN
|
|
|
|
LENDM1 = LEND - 1
|
|
|
|
DO 50 M = L, LENDM1
|
|
|
|
TST = DABS( E( M ) )**2
|
|
|
|
IF( TST.LE.( EPS2*DABS(D(M)) )*DABS(D(M+1))+ SAFMIN )GO TO 60
|
|
|
|
50 CONTINUE
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
M = LEND
|
|
|
|
!
|
|
|
|
60 CONTINUE
|
|
|
|
IF( M.LT.LEND ) E( M ) = ZERO
|
|
|
|
P = D( L )
|
|
|
|
IF( M.EQ.L ) GO TO 80
|
|
|
|
!
|
|
|
|
! If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
|
|
|
|
! to compute its eigensystem.
|
|
|
|
!
|
|
|
|
IF( M.EQ.L+1 ) THEN
|
|
|
|
IF( ICOMPZ.GT.0 ) THEN
|
|
|
|
CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S )
|
|
|
|
WORK( L ) = C
|
|
|
|
WORK( N-1+L ) = S
|
|
|
|
CTEMP = WORK( L )
|
|
|
|
STEMP = WORK( N-1+L )
|
|
|
|
IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
|
|
|
|
DO KL = 1, NRL
|
|
|
|
ZTEMP = Z( KL, 1+L )
|
|
|
|
Z( KL, 1+L ) = CTEMP*ZTEMP - STEMP*Z( KL, L )
|
|
|
|
Z( KL, L ) = STEMP*ZTEMP + CTEMP*Z( KL, L )
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
ELSE
|
|
|
|
CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 )
|
|
|
|
END IF
|
|
|
|
D( L ) = RT1
|
|
|
|
D( L+1 ) = RT2
|
|
|
|
E( L ) = ZERO
|
|
|
|
L = L + 2
|
|
|
|
IF( L.LE.LEND ) GO TO 40
|
|
|
|
GO TO 140
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
IF( JTOT.EQ.NMAXIT ) GO TO 140
|
|
|
|
JTOT = JTOT + 1
|
|
|
|
!
|
|
|
|
! Form shift.
|
|
|
|
!
|
|
|
|
G = ( D( L+1 )-P ) / ( TWO*E( L ) )
|
|
|
|
R = DLAPY2( G, ONE )
|
|
|
|
G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) )
|
|
|
|
!
|
|
|
|
S = ONE
|
|
|
|
C = ONE
|
|
|
|
P = ZERO
|
|
|
|
!
|
|
|
|
! Inner loop
|
|
|
|
!
|
|
|
|
MM1 = M - 1
|
|
|
|
DO 70 I = MM1, L, -1
|
|
|
|
F = S*E( I )
|
|
|
|
B = C*E( I )
|
|
|
|
CALL DLARTG( G, F, C, S, R )
|
|
|
|
IF( I.NE.M-1 ) E( I+1 ) = R
|
|
|
|
G = D( I+1 ) - P
|
|
|
|
R = ( D( I )-G )*S + TWO*C*B
|
|
|
|
P = S*R
|
|
|
|
D( I+1 ) = G + P
|
|
|
|
G = C*R - B
|
|
|
|
!
|
|
|
|
! If eigenvectors are desired, then save rotations.
|
|
|
|
!
|
|
|
|
IF( ICOMPZ.GT.0 ) THEN
|
|
|
|
WORK( I ) = C
|
|
|
|
WORK( N-1+I ) = -S
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
70 CONTINUE
|
|
|
|
!
|
|
|
|
! If eigenvectors are desired, then apply saved rotations.
|
|
|
|
!
|
|
|
|
IF( ICOMPZ.GT.0 ) THEN
|
|
|
|
DO J = M - L + 1 - 1, 1, -1
|
|
|
|
CTEMP = WORK( L + J -1)
|
|
|
|
STEMP = WORK( N-1+L + J-1)
|
|
|
|
IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
|
|
|
|
DO KL = 1, NRL
|
|
|
|
ZTEMP = Z( KL, J+1+L-1 )
|
|
|
|
Z( KL, J+1+L-1 ) = CTEMP*ZTEMP - STEMP*Z( KL, J+L-1 )
|
|
|
|
Z( KL, J+L-1 ) = STEMP*ZTEMP + CTEMP*Z( KL, J+L-1 )
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
D( L ) = D( L ) - P
|
|
|
|
E( L ) = G
|
|
|
|
GO TO 40
|
|
|
|
!
|
|
|
|
! Eigenvalue found.
|
|
|
|
!
|
|
|
|
80 CONTINUE
|
|
|
|
D( L ) = P
|
|
|
|
!
|
|
|
|
L = L + 1
|
|
|
|
IF( L.LE.LEND ) GO TO 40
|
|
|
|
GO TO 140
|
|
|
|
!
|
|
|
|
ELSE
|
|
|
|
!
|
|
|
|
! QR Iteration
|
|
|
|
!
|
|
|
|
! Look for small superdiagonal element.
|
|
|
|
!
|
|
|
|
90 CONTINUE
|
|
|
|
IF( L.NE.LEND ) THEN
|
|
|
|
LENDP1 = LEND + 1
|
|
|
|
DO 100 M = L, LENDP1, -1
|
|
|
|
TST = DABS( E( M-1 ) )**2
|
|
|
|
IF( TST.LE.(EPS2*DABS(D(M)))*DABS(D(M-1))+ SAFMIN )GO TO 110
|
|
|
|
100 CONTINUE
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
M = LEND
|
|
|
|
!
|
|
|
|
110 CONTINUE
|
|
|
|
IF( M.GT.LEND ) E( M-1 ) = ZERO
|
|
|
|
P = D( L )
|
|
|
|
IF( M.EQ.L ) GO TO 130
|
|
|
|
!
|
|
|
|
! If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
|
|
|
|
! to compute its eigensystem.
|
|
|
|
!
|
|
|
|
IF( M.EQ.L-1 ) THEN
|
|
|
|
IF( ICOMPZ.GT.0 ) THEN
|
|
|
|
CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S )
|
|
|
|
WORK( M ) = C
|
|
|
|
WORK( N-1+M ) = S
|
|
|
|
CTEMP = WORK( M )
|
|
|
|
STEMP = WORK( N-1+M )
|
|
|
|
IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
|
|
|
|
DO KL = 1, NRL
|
|
|
|
ZTEMP = Z( KL, L)
|
|
|
|
Z( KL, L ) = CTEMP*ZTEMP - STEMP*Z( KL, L-1 )
|
|
|
|
Z( KL, L-1 ) = STEMP*ZTEMP + CTEMP*Z( KL, L-1 )
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
ELSE
|
|
|
|
CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 )
|
|
|
|
END IF
|
|
|
|
D( L-1 ) = RT1
|
|
|
|
D( L ) = RT2
|
|
|
|
E( L-1 ) = ZERO
|
|
|
|
L = L - 2
|
|
|
|
IF( L.GE.LEND ) GO TO 90
|
|
|
|
GO TO 140
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
IF( JTOT.EQ.NMAXIT ) GO TO 140
|
|
|
|
JTOT = JTOT + 1
|
|
|
|
!
|
|
|
|
! Form shift.
|
|
|
|
!
|
|
|
|
G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) )
|
|
|
|
R = DLAPY2( G, ONE )
|
|
|
|
G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) )
|
|
|
|
!
|
|
|
|
S = ONE
|
|
|
|
C = ONE
|
|
|
|
P = ZERO
|
|
|
|
!
|
|
|
|
! Inner loop
|
|
|
|
!
|
|
|
|
LM1 = L - 1
|
|
|
|
DO 120 I = M, LM1
|
|
|
|
F = S*E( I )
|
|
|
|
B = C*E( I )
|
|
|
|
CALL DLARTG( G, F, C, S, R )
|
|
|
|
IF( I.NE.M ) E( I-1 ) = R
|
|
|
|
G = D( I ) - P
|
|
|
|
R = ( D( I+1 )-G )*S + TWO*C*B
|
|
|
|
P = S*R
|
|
|
|
D( I ) = G + P
|
|
|
|
G = C*R - B
|
|
|
|
!
|
|
|
|
! If eigenvectors are desired, then save rotations.
|
|
|
|
!
|
|
|
|
IF( ICOMPZ.GT.0 ) THEN
|
|
|
|
WORK( I ) = C
|
|
|
|
WORK( N-1+I ) = S
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
120 CONTINUE
|
|
|
|
!
|
|
|
|
! If eigenvectors are desired, then apply saved rotations.
|
|
|
|
!
|
|
|
|
IF( ICOMPZ.GT.0 ) THEN
|
|
|
|
DO J = 1, L - M
|
|
|
|
CTEMP = WORK( M+J-1 )
|
|
|
|
STEMP = WORK( N-1+M+J-1 )
|
|
|
|
IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
|
|
|
|
DO KL = 1, NRL
|
|
|
|
ZTEMP = Z( KL, J+M )
|
|
|
|
Z( KL, J+M ) = CTEMP*ZTEMP - STEMP*Z(KL, J+M-1)
|
|
|
|
Z( KL, J+M-1 ) = STEMP*ZTEMP + CTEMP*Z(KL, J+M-1)
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
D( L ) = D( L ) - P
|
|
|
|
E( LM1 ) = G
|
|
|
|
GO TO 90
|
|
|
|
!
|
|
|
|
! Eigenvalue found.
|
|
|
|
!
|
|
|
|
130 CONTINUE
|
|
|
|
D( L ) = P
|
|
|
|
!
|
|
|
|
L = L - 1
|
|
|
|
IF( L.GE.LEND ) GO TO 90
|
|
|
|
GO TO 140
|
|
|
|
!
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
! Undo scaling if necessary
|
|
|
|
!
|
|
|
|
140 CONTINUE
|
|
|
|
IF( ISCALE.EQ.1 ) THEN
|
|
|
|
CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, &
|
|
|
|
& D( LSV ), N, INFO )
|
|
|
|
CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), &
|
|
|
|
& N, INFO )
|
|
|
|
ELSE IF( ISCALE.EQ.2 ) THEN
|
|
|
|
CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, &
|
|
|
|
& D( LSV ), N, INFO )
|
|
|
|
CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), &
|
|
|
|
& N, INFO )
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
! Check for no convergence to an eigenvalue after a total
|
|
|
|
! of N*MAXIT iterations.
|
|
|
|
!
|
|
|
|
IF( JTOT.EQ.NMAXIT ) THEN
|
|
|
|
DO 150 I = 1, N - 1
|
|
|
|
IF( E( I ).NE.ZERO ) INFO = INFO + 1
|
|
|
|
150 CONTINUE
|
|
|
|
RETURN
|
|
|
|
END IF
|
|
|
|
GO TO 10
|
|
|
|
!
|
|
|
|
! Order eigenvalues and eigenvectors.
|
|
|
|
!
|
|
|
|
160 CONTINUE
|
|
|
|
IF( ICOMPZ.EQ.0 ) THEN
|
|
|
|
!
|
|
|
|
! Use Quick Sort
|
|
|
|
!
|
|
|
|
CALL DLASRT( 'I', N, D, INFO )
|
|
|
|
!
|
|
|
|
ELSE
|
|
|
|
!
|
|
|
|
! Use Selection Sort to minimize swaps of eigenvectors
|
|
|
|
!
|
|
|
|
DO 180 II = 2, N
|
|
|
|
I = II - 1
|
|
|
|
K = I
|
|
|
|
P = D( I )
|
|
|
|
DO 170 J = II, N
|
|
|
|
IF( D( J ).LT.P ) THEN
|
|
|
|
K = J
|
|
|
|
P = D( J )
|
|
|
|
END IF
|
|
|
|
170 CONTINUE
|
|
|
|
IF( K.NE.I ) THEN
|
|
|
|
D( K ) = D( I )
|
|
|
|
D( I ) = P
|
|
|
|
CALL ZSWAP( NRL, Z( 1, I ), 1, Z( 1, K ), 1 )
|
|
|
|
END IF
|
|
|
|
180 CONTINUE
|
|
|
|
END IF
|
|
|
|
RETURN
|
|
|
|
!
|
|
|
|
! End of ZSTEQR
|
|
|
|
!
|
2005-05-18 22:02:51 +08:00
|
|
|
END SUBROUTINE pzsteqr
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
2006-01-10 22:04:13 +08:00
|
|
|
SUBROUTINE zhpev_drv( JOBZ, UPLO, N, AP, W, Z, LDZ )
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
USE kinds
|
|
|
|
IMPLICIT NONE
|
|
|
|
|
|
|
|
CHARACTER :: JOBZ, UPLO
|
|
|
|
INTEGER :: IOPT, INFO, LDZ, N
|
2005-08-28 22:09:42 +08:00
|
|
|
COMPLEX(DP) :: AP( * ), Z( LDZ, * )
|
|
|
|
REAL(DP) :: W( * )
|
|
|
|
REAL(DP), ALLOCATABLE :: RWORK(:)
|
|
|
|
COMPLEX(DP), ALLOCATABLE :: ZWORK(:)
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
#if defined __AIX
|
|
|
|
|
|
|
|
IOPT = 0
|
|
|
|
IF((JOBZ .EQ. 'V') .OR. (JOBZ .EQ. 'v') ) iopt = iopt + 1
|
|
|
|
IF((UPLO .EQ. 'U') .OR. (UPLO .EQ. 'u') ) iopt = iopt + 20
|
|
|
|
ALLOCATE( rwork( 4*n ) )
|
|
|
|
CALL ZHPEV(IOPT, ap, w, z, ldz, n, rwork, 4*n)
|
|
|
|
DEALLOCATE( rwork )
|
|
|
|
|
2003-05-02 06:47:53 +08:00
|
|
|
#else
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
ALLOCATE( rwork( MAX(1, 3*n-2) ), zwork( MAX(1, 2*n-1)) )
|
|
|
|
CALL ZHPEV(jobz, uplo, n, ap, w, z, ldz, zwork, rwork, INFO)
|
|
|
|
DEALLOCATE( rwork, zwork )
|
|
|
|
IF( info .NE. 0 ) THEN
|
|
|
|
CALL errore( ' dspev_drv ', ' diagonalization failed ',info )
|
|
|
|
END IF
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
RETURN
|
2006-01-10 22:04:13 +08:00
|
|
|
END SUBROUTINE zhpev_drv
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
2006-02-03 09:16:33 +08:00
|
|
|
SUBROUTINE pzhpev_drv( jobz, ap, lda, w, z, ldz, &
|
|
|
|
nrl, n, nproc, mpime, comm_in )
|
|
|
|
USE kinds
|
|
|
|
IMPLICIT NONE
|
|
|
|
CHARACTER :: JOBZ
|
|
|
|
INTEGER :: lda, ldz, nrl, n, nproc, mpime
|
|
|
|
INTEGER, OPTIONAL, INTENT(IN) :: comm_in
|
|
|
|
COMPLEX(DP) :: ap( lda, * ), z( ldz, * )
|
|
|
|
REAL(DP) :: w( * )
|
|
|
|
REAL(DP) :: rwork( n )
|
|
|
|
COMPLEX(DP) :: cwork( n )
|
|
|
|
INTEGER :: comm
|
|
|
|
!
|
|
|
|
IF ( PRESENT( comm_in ) ) THEN
|
|
|
|
comm = comm_in
|
2006-02-03 17:28:55 +08:00
|
|
|
#ifdef __MPI
|
2006-02-03 09:16:33 +08:00
|
|
|
ELSE
|
|
|
|
comm = MPI_COMM_WORLD
|
2006-02-03 17:28:55 +08:00
|
|
|
#endif
|
2006-02-03 09:16:33 +08:00
|
|
|
END IF
|
|
|
|
CALL pzhptrd( n, nrl, ap, lda, w, rwork, cwork, nproc, mpime, comm)
|
|
|
|
IF( jobz == 'V' .OR. jobz == 'v' ) THEN
|
|
|
|
CALL pzupgtr( n, nrl, ap, lda, cwork, z, ldz, nproc, mpime, comm)
|
|
|
|
END IF
|
|
|
|
CALL pzsteqr( jobz, n, nrl, w, rwork, z, ldz, nproc, mpime, comm)
|
|
|
|
RETURN
|
2006-01-10 22:04:13 +08:00
|
|
|
END SUBROUTINE pzhpev_drv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
!
|
|
|
|
! My parallel blas
|
|
|
|
!
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
|
|
|
|
|
|
|
|
SUBROUTINE mattr_drv( m, k, a, lda, b, ldb, nb, dims, coor, comm )
|
|
|
|
!
|
|
|
|
! Compute B as the transpose of matrix A
|
|
|
|
! A and B are distributed on a 2D cartesian processor
|
|
|
|
! grid in a block cyclic way (as in scalapack),
|
|
|
|
! using a block size of NB
|
|
|
|
!
|
|
|
|
! B := A'
|
|
|
|
!
|
|
|
|
! A is a K by M matrix
|
|
|
|
! B is an M by K matrix
|
|
|
|
!
|
|
|
|
implicit none
|
|
|
|
!
|
|
|
|
INTEGER, INTENT(IN) :: m, k
|
|
|
|
INTEGER, INTENT(IN) :: lda, ldb
|
|
|
|
REAL*8 :: a(lda,*), b(ldb,*)
|
|
|
|
INTEGER, INTENT(IN) :: nb, dims(2), coor(2), comm
|
|
|
|
!
|
|
|
|
#if defined __MPI
|
|
|
|
|
|
|
|
include 'mpif.h'
|
|
|
|
|
|
|
|
!
|
|
|
|
integer ierr
|
|
|
|
integer ndims, rowid, colid
|
|
|
|
integer coosrc(2), coodst(2), ipsrc, ipdst, mpime
|
|
|
|
integer ihsnd, ihrcv
|
|
|
|
logical periods(2)
|
|
|
|
!
|
|
|
|
integer :: iu
|
|
|
|
integer :: i, j, nk, nm
|
|
|
|
integer :: ii, jj
|
|
|
|
integer :: isrc, jsrc
|
|
|
|
integer :: idst, jdst
|
|
|
|
integer :: itag
|
|
|
|
integer :: nmb, nkb
|
|
|
|
integer :: istatus( MPI_STATUS_SIZE )
|
|
|
|
real*8, allocatable :: abuf(:,:)
|
|
|
|
!
|
|
|
|
integer :: numroc
|
|
|
|
integer :: indxg2l
|
|
|
|
external :: numroc, indxg2l
|
|
|
|
|
|
|
|
!
|
|
|
|
CALL GRID2D_RANK( dims(1), dims(2), coor(1), coor(2), mpime )
|
|
|
|
!
|
|
|
|
iu = 200 + mpime
|
|
|
|
!
|
|
|
|
! Compute the global number of blocks for matrix dimension
|
|
|
|
!
|
|
|
|
nmb = ( m + nb - 1 ) / nb
|
|
|
|
nkb = ( k + nb - 1 ) / nb
|
|
|
|
!
|
|
|
|
! WRITE(iu,*) 'TR',coor(1),coor(2),' m,k,nmb,nkb,nb = ',m,k,nmb,nkb,nb
|
|
|
|
! WRITE(iu,*) 'TR',coor(1),coor(2),' ---------------- '
|
|
|
|
!
|
|
|
|
ALLOCATE( abuf( nb, nb ) )
|
|
|
|
!
|
|
|
|
DO i = 1, nmb
|
|
|
|
DO j = 1, nkb
|
|
|
|
!
|
|
|
|
itag = j + nkb * (i-1)
|
|
|
|
!
|
|
|
|
coosrc(1) = MOD( (j-1), dims(1) )
|
|
|
|
coosrc(2) = MOD( (i-1), dims(2) )
|
|
|
|
!
|
|
|
|
coodst(1) = MOD( (i-1), dims(1) )
|
|
|
|
coodst(2) = MOD( (j-1), dims(2) )
|
|
|
|
!
|
|
|
|
CALL GRID2D_RANK( dims(1), dims(2), coosrc(1), coosrc(2), ipsrc )
|
|
|
|
CALL GRID2D_RANK( dims(1), dims(2), coodst(1), coodst(2), ipdst )
|
|
|
|
!
|
|
|
|
jsrc = INDXG2L( 1 + (j-1)*nb, nb, coor(1), 0, dims(1) )
|
|
|
|
isrc = INDXG2L( 1 + (i-1)*nb, nb, coor(2), 0, dims(2) )
|
|
|
|
!
|
|
|
|
jdst = INDXG2L( 1 + (j-1)*nb, nb, coor(2), 0, dims(2) )
|
|
|
|
idst = INDXG2L( 1 + (i-1)*nb, nb, coor(1), 0, dims(1) )
|
|
|
|
!
|
|
|
|
nk = MIN( nb, k - (j-1)*nb ) ! number of element in the block
|
|
|
|
nm = MIN( nb, m - (i-1)*nb ) ! number of element in the block
|
|
|
|
!
|
|
|
|
! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' cccsrc,ipsrc = ', coosrc(1), coosrc(2), ipsrc
|
|
|
|
! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' cccdst,ipdst = ', coodst(1), coodst(2), ipdst
|
|
|
|
! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' i,j,nm,nk = ', i,j,nm,nk
|
|
|
|
! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' isrc,jsrc = ', isrc,jsrc
|
|
|
|
! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' idst,jdst = ', idst,jdst
|
|
|
|
!
|
|
|
|
IF( ipsrc == ipdst ) THEN
|
|
|
|
IF( ipsrc == mpime ) THEN
|
|
|
|
DO ii = 1, nm
|
|
|
|
DO jj = 1, nk
|
|
|
|
b( idst + ii - 1, jdst + jj - 1 ) = a( jsrc + jj - 1, isrc + ii - 1 )
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
ELSE
|
|
|
|
IF( ipsrc == mpime ) THEN
|
|
|
|
DO ii = 1, nm
|
|
|
|
DO jj = 1, nk
|
|
|
|
abuf( ii, jj ) = a( jsrc + jj - 1, isrc + ii - 1 )
|
|
|
|
!
|
|
|
|
! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' ii,jj,abuf_snd = ', ii,jj,abuf( ii, jj )
|
|
|
|
!
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
CALL MPI_ISEND( abuf, nb*nb, MPI_DOUBLE_PRECISION, ipdst, itag, comm, ihsnd, ierr )
|
|
|
|
CALL mpi_wait(ihsnd, istatus, ierr)
|
|
|
|
ELSE IF( ipdst == mpime ) THEN
|
|
|
|
CALL MPI_IRECV( abuf, nb*nb, MPI_DOUBLE_PRECISION, ipsrc, itag, comm, ihrcv, ierr )
|
|
|
|
CALL mpi_wait(ihrcv, istatus, ierr)
|
|
|
|
DO jj = 1, nk
|
|
|
|
DO ii = 1, nm
|
|
|
|
!
|
|
|
|
! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' ii,jj,abuf_rcv = ', ii,jj,abuf( ii, jj )
|
|
|
|
!
|
|
|
|
b( idst + ii - 1, jdst + jj - 1 ) = abuf( ii, jj )
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
! WRITE(iu,*) 'TR',coor(1),coor(2),itag,' ---------------- '
|
|
|
|
!
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
INTEGER :: i, j
|
|
|
|
|
|
|
|
DO j = 1, k
|
|
|
|
DO i = 1, m
|
|
|
|
B( i, j ) = A( j, i )
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
END SUBROUTINE mattr_drv
|
|
|
|
|
|
|
|
|
|
|
|
! ---------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
SUBROUTINE matsplit_drv( m, k, ar, ldar, a, lda, nb, dims, coor, comm )
|
|
|
|
!
|
|
|
|
implicit none
|
|
|
|
!
|
|
|
|
INTEGER, INTENT(IN) :: m, k
|
|
|
|
INTEGER, INTENT(IN) :: ldar
|
|
|
|
REAL*8 :: ar(ldar,*) ! matrix to be splitted, replicated on all proc
|
|
|
|
INTEGER, INTENT(IN) :: lda
|
|
|
|
REAL*8 :: a(lda,*)
|
|
|
|
INTEGER, INTENT(IN) :: nb, coor(2), dims(2), comm
|
|
|
|
!
|
|
|
|
INTEGER :: i, j, nra, nca, ii, jj
|
|
|
|
!
|
|
|
|
INTEGER :: numroc, INDXL2G
|
|
|
|
EXTERNAL :: numroc, INDXL2G
|
|
|
|
|
|
|
|
nra = NUMROC( m, nb, coor(1), 0, dims(1) ) ! total number of local row for matrix A, C
|
|
|
|
nca = NUMROC( k, nb, coor(2), 0, dims(2) ) ! total number of local columns of A
|
|
|
|
|
|
|
|
do j = 1, nca
|
|
|
|
jj = INDXL2G( j, NB, coor(2), 0, dims(2) )
|
|
|
|
do i = 1, nra
|
|
|
|
ii = INDXL2G( i, NB, coor(1), 0, dims(1) )
|
|
|
|
a( i, j ) = ar( ii, jj )
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
END SUBROUTINE matsplit_drv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! ---------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
SUBROUTINE matmerge_drv( m, k, a, lda, ar, ldar, nb, dims, coor, comm )
|
|
|
|
!
|
|
|
|
implicit none
|
|
|
|
!
|
|
|
|
INTEGER, INTENT(IN) :: m, k
|
|
|
|
INTEGER, INTENT(IN) :: ldar
|
|
|
|
REAL*8 :: ar(ldar,*) ! matrix to be merged, replicated on all proc
|
|
|
|
INTEGER, INTENT(IN) :: lda
|
|
|
|
REAL*8 :: a(lda,*)
|
|
|
|
INTEGER, INTENT(IN) :: nb, coor(2), dims(2), comm
|
|
|
|
!
|
|
|
|
INTEGER :: i, j, ii, jj, ierr
|
|
|
|
|
|
|
|
#if defined __MPI
|
|
|
|
|
|
|
|
include 'mpif.h'
|
|
|
|
!
|
|
|
|
|
|
|
|
INTEGER :: jsrc, isrc, ipsrc, coosrc(2)
|
|
|
|
INTEGER :: nmb, nkb, nk, nm, mpime
|
|
|
|
|
|
|
|
REAL*8, ALLOCATABLE :: buf(:,:)
|
|
|
|
!
|
|
|
|
INTEGER :: INDXG2L
|
|
|
|
EXTERNAL :: INDXG2L
|
|
|
|
!
|
|
|
|
CALL GRID2D_RANK( dims(1), dims(2), coor(1), coor(2), mpime )
|
|
|
|
|
|
|
|
nmb = ( m + nb - 1 ) / nb
|
|
|
|
nkb = ( k + nb - 1 ) / nb
|
|
|
|
|
|
|
|
ALLOCATE( buf( nb, nb ) )
|
|
|
|
|
|
|
|
DO j = 1, nkb
|
|
|
|
DO i = 1, nmb
|
|
|
|
!
|
|
|
|
coosrc(1) = MOD( (i-1), dims(1) )
|
|
|
|
coosrc(2) = MOD( (j-1), dims(2) )
|
|
|
|
!
|
|
|
|
CALL GRID2D_RANK( dims(1), dims(2), coosrc(1), coosrc(2), ipsrc )
|
|
|
|
!
|
|
|
|
isrc = INDXG2L( 1 + (i-1)*nb, nb, coor(1), 0, dims(1) )
|
|
|
|
jsrc = INDXG2L( 1 + (j-1)*nb, nb, coor(2), 0, dims(2) )
|
|
|
|
!
|
|
|
|
nm = MIN( nb, m - (i-1)*nb ) ! number of element in the block
|
|
|
|
nk = MIN( nb, k - (j-1)*nb ) ! number of element in the block
|
|
|
|
|
|
|
|
IF( ipsrc == mpime ) THEN
|
|
|
|
DO jj = 1, nk
|
|
|
|
DO ii = 1, nm
|
|
|
|
buf( ii, jj ) = a( isrc + ii - 1, jsrc + jj - 1 )
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
ENDIF
|
|
|
|
!
|
|
|
|
CALL MPI_BCAST( buf, nb*nb, MPI_DOUBLE_PRECISION, ipsrc, comm, ierr )
|
|
|
|
!
|
|
|
|
do jj = 1, nk
|
|
|
|
do ii = 1, nm
|
|
|
|
ar( ii + (i-1)*nb, jj + (j-1)*nb ) = buf( ii, jj )
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
!
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
!
|
|
|
|
DEALLOCATE( buf )
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
DO j = 1, k
|
|
|
|
DO i = 1, m
|
|
|
|
ar( i, j ) = a( i, j )
|
|
|
|
END DO
|
|
|
|
END DO
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
END SUBROUTINE matmerge_drv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! ---------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
SUBROUTINE matscal_drv( m, n, beta, c, ldc, nb, dims, coor, comm )
|
|
|
|
!
|
|
|
|
implicit none
|
|
|
|
!
|
|
|
|
INTEGER, INTENT(IN) :: m, n
|
|
|
|
REAL*8, INTENT(IN) :: beta
|
|
|
|
INTEGER, INTENT(IN) :: ldc
|
|
|
|
REAL*8 :: c(ldc,*)
|
|
|
|
INTEGER, INTENT(IN) :: nb, coor(2), dims(2), comm
|
|
|
|
!
|
|
|
|
INTEGER :: i, j, nr, nc, ierr
|
|
|
|
!
|
|
|
|
INTEGER :: numroc
|
|
|
|
EXTERNAL :: numroc
|
|
|
|
|
|
|
|
nr = NUMROC( m, nb, coor(1), 0, dims(1) ) ! local row of C
|
|
|
|
nc = NUMROC( n, nb, coor(2), 0, dims(2) ) ! local colum of C
|
|
|
|
|
|
|
|
IF( beta == 0.0d0 ) THEN
|
|
|
|
do j = 1, nc
|
|
|
|
do i = 1, nr
|
|
|
|
c(i,j) = 0.0d0
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
ELSE
|
|
|
|
do j = 1, nc
|
|
|
|
do i = 1, nr
|
|
|
|
c(i,j) = beta * c(i,j)
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
END IF
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
END SUBROUTINE
|
|
|
|
|
|
|
|
|
|
|
|
! ---------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
SUBROUTINE matmul_drv( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC, nb, dims, coor, comm )
|
|
|
|
!
|
|
|
|
implicit none
|
|
|
|
!
|
|
|
|
CHARACTER(LEN=1), INTENT(IN) :: transa, transb
|
|
|
|
INTEGER, INTENT(IN) :: m, n, k
|
|
|
|
REAL*8, INTENT(IN) :: alpha, beta
|
|
|
|
INTEGER, INTENT(IN) :: lda, ldb, ldc
|
|
|
|
REAL*8 :: a(lda,*), b(ldb,*), c(ldc,*)
|
|
|
|
INTEGER, INTENT(IN) :: nb, dims(2), coor(2), comm
|
|
|
|
!
|
|
|
|
! DGEMM PERFORMS ONE OF THE MATRIX-MATRIX OPERATIONS
|
|
|
|
!
|
|
|
|
! C := ALPHA*OP( A )*OP( B ) + BETA*C,
|
|
|
|
!
|
|
|
|
! WHERE OP( X ) IS ONE OF
|
|
|
|
!
|
|
|
|
! OP( X ) = X OR OP( X ) = X',
|
|
|
|
!
|
|
|
|
! ALPHA AND BETA ARE SCALARS, AND A, B AND C ARE MATRICES, WITH OP( A )
|
|
|
|
! AN M BY K MATRIX, OP( B ) A K BY N MATRIX AND C AN M BY N MATRIX.
|
|
|
|
!
|
|
|
|
!
|
|
|
|
!
|
|
|
|
|
|
|
|
#if defined __MPI
|
|
|
|
|
|
|
|
include 'mpif.h'
|
|
|
|
!
|
|
|
|
integer ierr
|
|
|
|
integer ndims, rowid, colid
|
|
|
|
integer comm_row, comm_col
|
|
|
|
!
|
|
|
|
integer :: ib, jb, kb, ibl, kbl, jbl
|
|
|
|
integer :: i, j, kk, ni, nj, nk, nm, il, jl
|
|
|
|
integer :: nnb, nmb, nkb
|
|
|
|
integer :: nr, nra, nca, nc, nrb, ncb, ii, jj
|
|
|
|
integer :: nrt, ncat, nct, nrbt
|
|
|
|
real*8, allocatable :: abuf(:,:), bbuf(:,:)
|
|
|
|
real*8, allocatable :: at(:,:)
|
|
|
|
real*8, allocatable :: bt(:,:)
|
|
|
|
!
|
|
|
|
integer :: numroc
|
|
|
|
integer :: indxg2l
|
|
|
|
external :: numroc, indxg2l
|
|
|
|
!
|
|
|
|
IF( dims(1) * dims(2) == 1 ) THEN
|
|
|
|
|
|
|
|
! if there is only one proc no need of using parallel alg.
|
|
|
|
|
|
|
|
call dgemm( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
|
|
|
|
CALL MPI_COMM_SPLIT( COMM, coor(2), coor(1), COMM_COL, IERR )
|
|
|
|
CALL MPI_COMM_RANK( COMM_COL, rowid, IERR )
|
|
|
|
!
|
|
|
|
CALL MPI_COMM_SPLIT( COMM, coor(1), coor(2), COMM_ROW, IERR )
|
|
|
|
CALL MPI_COMM_RANK( COMM_ROW, colid, IERR )
|
|
|
|
!
|
|
|
|
! Compute the global number of blocks for matrix dimension
|
|
|
|
!
|
|
|
|
nmb = ( m + nb - 1 ) / nb
|
|
|
|
!
|
|
|
|
nnb = ( n + nb - 1 ) / nb
|
|
|
|
!
|
|
|
|
nkb = ( k + nb - 1 ) / nb
|
|
|
|
!
|
|
|
|
! Compute the total number of local row for matrix A, C
|
|
|
|
!
|
|
|
|
nr = NUMROC( m, nb, coor(1), 0, dims(1) ) ! local row of C
|
|
|
|
!
|
|
|
|
nra = NUMROC( m, nb, coor(1), 0, dims(1) ) ! local row of OP( A )
|
|
|
|
nca = NUMROC( k, nb, coor(2), 0, dims(2) ) ! local columns of OP( A )
|
|
|
|
!
|
|
|
|
nrb = NUMROC( k, nb, coor(1), 0, dims(1) ) ! local row of OP( B )
|
|
|
|
ncb = NUMROC( n, nb, coor(2), 0, dims(2) ) ! local colum of OP( B )
|
|
|
|
!
|
|
|
|
nc = NUMROC( n, nb, coor(2), 0, dims(2) ) ! local colum of C
|
|
|
|
!
|
|
|
|
IF( transa == 'T' .OR. transa == 't' ) THEN
|
|
|
|
!
|
|
|
|
ALLOCATE( at( nra, nca ) )
|
|
|
|
!
|
|
|
|
CALL mattr_drv( m, k, a, lda, at, nra, nb, dims, coor, comm )
|
|
|
|
!
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
IF( transb == 'T' .OR. transb == 't' ) THEN
|
|
|
|
!
|
|
|
|
ALLOCATE( bt( nrb, ncb ) )
|
|
|
|
!
|
|
|
|
CALL mattr_drv( k, n, b, ldb, bt, nrb, nb, dims, coor, comm )
|
|
|
|
!
|
|
|
|
END IF
|
|
|
|
!
|
|
|
|
! Scale matrix C
|
|
|
|
!
|
|
|
|
CALL matscal_drv( m, n, beta, c, ldc, nb, dims, coor, comm )
|
|
|
|
!
|
|
|
|
! loop over the rows/columns blocks of matrix OP(A)/OP(B)
|
|
|
|
!
|
|
|
|
do kb = 1, nkb
|
|
|
|
!
|
|
|
|
kk = ( kb - 1 ) * nb + 1 ! first element of the block (global index)
|
|
|
|
nk = MIN( nb, k - kk + 1 ) ! number of element in the block
|
|
|
|
|
|
|
|
colid = MOD( (kb-1), dims(2) ) ! processor owning the block
|
|
|
|
rowid = MOD( (kb-1), dims(1) )
|
|
|
|
|
|
|
|
allocate( abuf( nr, nk ) )
|
|
|
|
|
|
|
|
if( colid == coor(2) ) then
|
|
|
|
nrt = 0
|
|
|
|
ibl = 0
|
|
|
|
kbl = INDXG2L( 1 + (kb-1)*nb, nb, coor(2), 0, dims(2) )
|
|
|
|
do ib = 1 + coor(1), nmb, dims(1)
|
|
|
|
i = ( ib - 1 ) * nb + 1
|
|
|
|
ni = MIN( nb, m - i + 1 )
|
|
|
|
IF( transa == 'T' .OR. transa == 't' ) THEN
|
|
|
|
do jj = 1, nk
|
|
|
|
do ii = 1, ni
|
|
|
|
abuf( ii + nrt, jj ) = at( ii + ibl*nb, jj + kbl - 1 )
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
ELSE
|
|
|
|
do jj = 1, nk
|
|
|
|
do ii = 1, ni
|
|
|
|
abuf( ii + nrt, jj ) = a( ii + ibl*nb, jj + kbl - 1 )
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
END IF
|
|
|
|
nrt = nrt + ni
|
|
|
|
ibl = ibl + 1
|
|
|
|
end do
|
|
|
|
end if
|
|
|
|
CALL MPI_BCAST( abuf(1,1), nr*nk, MPI_DOUBLE_PRECISION, colid, COMM_ROW, IERR )
|
|
|
|
|
|
|
|
allocate( bbuf( nk, nc ) )
|
|
|
|
|
|
|
|
if( rowid == coor(1) ) then
|
|
|
|
nct = 0
|
|
|
|
jbl = 0
|
|
|
|
kbl = INDXG2L( 1 + (kb-1)*nb, nb, coor(1), 0, dims(1) )
|
|
|
|
do jb = 1 + coor(2), nnb, dims(2)
|
|
|
|
j = ( jb - 1 ) * nb + 1
|
|
|
|
nj = MIN( nb, n - j + 1 )
|
|
|
|
IF( transb == 'T' .OR. transb == 't' ) THEN
|
|
|
|
do jj = 1, nj
|
|
|
|
do ii = 1, nk
|
|
|
|
bbuf( ii, jj + nct ) = bt( ii + kbl - 1, jj + jbl*nb )
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
ELSE
|
|
|
|
do jj = 1, nj
|
|
|
|
do ii = 1, nk
|
|
|
|
bbuf( ii, jj + nct ) = b( ii + kbl - 1, jj + jbl*nb )
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
END IF
|
|
|
|
nct = nct + nj
|
|
|
|
jbl = jbl + 1
|
|
|
|
end do
|
|
|
|
end if
|
|
|
|
|
|
|
|
CALL MPI_BCAST( bbuf(1,1), nk*nc, MPI_DOUBLE_PRECISION, rowid, COMM_COL, IERR )
|
|
|
|
|
|
|
|
ii = 1
|
|
|
|
do ib = 1 + coor(1), nmb, dims(1)
|
|
|
|
i = ( ib - 1 ) * nb + 1
|
|
|
|
il = INDXG2L( i, nb, coor(1), 0, dims(1) )
|
|
|
|
ni = MIN( nb, m - i + 1 )
|
|
|
|
jj = 1
|
|
|
|
do jb = 1 + coor(2), nnb, dims(2)
|
|
|
|
j = ( jb - 1 ) * nb + 1
|
|
|
|
jl = INDXG2L( j, nb, coor(2), 0, dims(2) )
|
|
|
|
nj = MIN( nb, n - j + 1 )
|
|
|
|
call dgemm( 'n', 'n', ni, nj, nk, alpha, abuf( ii, 1 ), nra, bbuf( 1, jj ), nk, 1.0d0, c( il, jl ), ldc )
|
|
|
|
jj = jj + nj
|
|
|
|
end do
|
|
|
|
ii = ii + ni
|
|
|
|
end do
|
|
|
|
|
|
|
|
deallocate( abuf )
|
|
|
|
deallocate( bbuf )
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
IF( ALLOCATED( at ) ) DEALLOCATE( at )
|
|
|
|
IF( ALLOCATED( bt ) ) DEALLOCATE( bt )
|
|
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
! if we are not compiling with __MPI this is equivalent to a blas call
|
|
|
|
|
|
|
|
call dgemm( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
END SUBROUTINE
|
|
|
|
|
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
!
|
|
|
|
! Copyright (C) 2005 Carlo Cavazzoni
|
|
|
|
! This file is distributed under the terms of the
|
|
|
|
! GNU General Public License. See the file `License'
|
|
|
|
! in the root directory of the present distribution,
|
|
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
|
|
!
|
|
|
|
|
|
|
|
|
|
|
|
SUBROUTINE rep_matmul_drv( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC, comm )
|
|
|
|
!
|
|
|
|
! Parallel matrix multiplication with replicated matrix
|
|
|
|
!
|
|
|
|
implicit none
|
|
|
|
!
|
|
|
|
CHARACTER(LEN=1), INTENT(IN) :: transa, transb
|
|
|
|
INTEGER, INTENT(IN) :: m, n, k
|
|
|
|
REAL*8, INTENT(IN) :: alpha, beta
|
|
|
|
INTEGER, INTENT(IN) :: lda, ldb, ldc
|
|
|
|
REAL*8 :: a(lda,*), b(ldb,*), c(ldc,*)
|
|
|
|
INTEGER, INTENT(IN) :: comm
|
|
|
|
!
|
|
|
|
! DGEMM PERFORMS ONE OF THE MATRIX-MATRIX OPERATIONS
|
|
|
|
!
|
|
|
|
! C := ALPHA*OP( A )*OP( B ) + BETA*C,
|
|
|
|
!
|
|
|
|
! WHERE OP( X ) IS ONE OF
|
|
|
|
!
|
|
|
|
! OP( X ) = X OR OP( X ) = X',
|
|
|
|
!
|
|
|
|
! ALPHA AND BETA ARE SCALARS, AND A, B AND C ARE MATRICES, WITH OP( A )
|
|
|
|
! AN M BY K MATRIX, OP( B ) A K BY N MATRIX AND C AN M BY N MATRIX.
|
|
|
|
!
|
|
|
|
!
|
|
|
|
!
|
|
|
|
|
|
|
|
#if defined __MPI
|
|
|
|
|
|
|
|
include 'mpif.h'
|
|
|
|
!
|
|
|
|
|
|
|
|
INTEGER :: ME, I, II, J, JJ, IP, SOUR, DEST, INFO, IERR, ioff, ldx
|
|
|
|
INTEGER :: NB, IB_S, NB_SOUR, IB_SOUR, IBUF
|
|
|
|
INTEGER :: nproc, mpime, q, r
|
|
|
|
|
|
|
|
REAL*8, ALLOCATABLE :: auxa( : )
|
|
|
|
REAL*8, ALLOCATABLE :: auxc( : )
|
|
|
|
|
|
|
|
!
|
|
|
|
! ... BODY
|
|
|
|
!
|
|
|
|
|
|
|
|
CALL MPI_COMM_SIZE(comm, NPROC, IERR)
|
|
|
|
CALL MPI_COMM_RANK(comm, MPIME, IERR)
|
|
|
|
|
|
|
|
IF ( NPROC == 1 ) THEN
|
|
|
|
|
|
|
|
! if there is only one proc no need of using parallel alg.
|
|
|
|
|
|
|
|
CALL DGEMM(TRANSA, TRANSB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc)
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
END IF
|
|
|
|
|
|
|
|
ME = MPIME + 1
|
|
|
|
Q = INT( m / NPROC )
|
|
|
|
R = MOD( m , NPROC )
|
|
|
|
|
|
|
|
! ... Find out the number of elements in the local block
|
|
|
|
! along "M" first dimension os matrix A
|
|
|
|
|
|
|
|
NB = Q
|
|
|
|
IF( ME <= R ) NB = NB + 1
|
|
|
|
|
|
|
|
! ... Find out the global index of the local first row
|
|
|
|
|
|
|
|
IF( ME <= R ) THEN
|
|
|
|
ib_s = (Q+1)*(ME-1) + 1
|
|
|
|
ELSE
|
|
|
|
ib_s = Q*(ME-1) + R + 1
|
|
|
|
END IF
|
|
|
|
|
|
|
|
ldx = m / nproc + 1
|
|
|
|
|
|
|
|
ALLOCATE( auxa( MAX( n, m ) * ldx ) )
|
|
|
|
ALLOCATE( auxc( MAX( n, m ) * ldx ) )
|
|
|
|
|
|
|
|
IF( TRANSA == 'N' .OR. TRANSA == 'n' ) THEN
|
|
|
|
ibuf = 0
|
|
|
|
ioff = ib_s - 1
|
|
|
|
DO J = 1, k
|
|
|
|
DO I = 1, NB
|
|
|
|
auxa( ibuf + I ) = A( I + ioff, J )
|
|
|
|
END DO
|
|
|
|
ibuf = ibuf + ldx
|
|
|
|
END DO
|
|
|
|
ELSE
|
|
|
|
ibuf = 0
|
|
|
|
ioff = ib_s - 1
|
|
|
|
DO J = 1, k
|
|
|
|
DO I = 1, NB
|
|
|
|
auxa( ibuf + I ) = A( J, I + ioff )
|
|
|
|
END DO
|
|
|
|
ibuf = ibuf + ldx
|
|
|
|
END DO
|
|
|
|
!ioff = ib_s - 1
|
|
|
|
!call mytranspose( A( 1, ioff + 1 ), lda, auxa(1), ldx, m, nb)
|
|
|
|
END IF
|
|
|
|
|
|
|
|
IF( beta /= 0.0d0 ) THEN
|
|
|
|
ibuf = 0
|
|
|
|
ioff = ib_s - 1
|
|
|
|
DO J = 1, n
|
|
|
|
DO I = 1, NB
|
|
|
|
auxc( ibuf + I ) = C( I + ioff, J )
|
|
|
|
END DO
|
|
|
|
ibuf = ibuf + ldx
|
|
|
|
END DO
|
|
|
|
END IF
|
|
|
|
|
|
|
|
CALL DGEMM( 'N', transb, nb, n, k, alpha, auxa(1), ldx, B, ldb, beta, auxc(1), ldx )
|
|
|
|
|
|
|
|
! ... Here processors exchange blocks
|
|
|
|
|
|
|
|
DO IP = 0, NPROC-1
|
|
|
|
|
|
|
|
! ... Find out the number of elements in the block of processor SOUR
|
|
|
|
|
|
|
|
NB_SOUR = q
|
|
|
|
IF( (IP+1) .LE. r ) NB_SOUR = NB_SOUR+1
|
|
|
|
|
|
|
|
! ... Find out the global index of the first row owned by SOUR
|
|
|
|
|
|
|
|
IF( (IP+1) .LE. r ) THEN
|
|
|
|
ib_sour = (Q+1)*IP + 1
|
|
|
|
ELSE
|
|
|
|
ib_sour = Q*IP + R + 1
|
|
|
|
END IF
|
|
|
|
|
|
|
|
IF( mpime == ip ) auxa(1:n*ldx) = auxc(1:n*ldx)
|
|
|
|
|
|
|
|
CALL MPI_BCAST( auxa(1), ldx*n, mpi_double_precision, ip, comm, IERR)
|
|
|
|
|
|
|
|
IBUF = 0
|
|
|
|
ioff = IB_SOUR - 1
|
|
|
|
DO J = 1, N
|
|
|
|
DO I = 1, NB_SOUR
|
|
|
|
C( I + ioff, J ) = AUXA( IBUF + I )
|
|
|
|
END DO
|
|
|
|
IBUF = IBUF + ldx
|
|
|
|
END DO
|
|
|
|
|
|
|
|
END DO
|
|
|
|
|
|
|
|
DEALLOCATE( auxa, auxc )
|
|
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
! if we are not compiling with __MPI this is equivalent to a blas call
|
|
|
|
|
|
|
|
CALL DGEMM(TRANSA, TRANSB, m, N, k, alpha, A, lda, B, ldb, beta, C, ldc)
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RETURN
|
2003-03-03 07:19:44 +08:00
|
|
|
|
2006-01-10 22:04:13 +08:00
|
|
|
END SUBROUTINE rep_matmul_drv
|
2003-03-03 07:19:44 +08:00
|
|
|
|
|
|
|
!==----------------------------------------------==!
|
|
|
|
END MODULE parallel_toolkit
|
|
|
|
!==----------------------------------------------==!
|