quantum-espresso/flib/matmul.f90

904 lines
24 KiB
Fortran

!
! Copyright (C) 2002 FPMD group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
#if defined __TEST_MYMATMUL
PROGRAM matmul
IMPLICIT NONE
REAL*8, ALLOCATABLE :: ax(:,:), bx(:,:), cx(:,:), dx(:,:), ex(:,:), fx(:,:)
REAL*8, ALLOCATABLE :: a(:,:), b(:,:), c(:,:), d(:,:), e(:,:), f(:,:)
INTEGER :: n, nloc, mpime, nproc, root, group
INTEGER :: i, j, k, iloc, jloc
INTEGER :: ldim_cyclic, owner_cyclic, lind_cyclic
CALL PARALLEL_STARTUP(NPROC,MPIME,ROOT,GROUP)
n = 5
nloc = ldim_cyclic(n, nproc, mpime)
ALLOCATE( a(n,n), b(n,n), c(n,n), d(n,n), e(n,n), f(n,n) )
ALLOCATE( ax(nloc,n), bx(nloc,n), cx(nloc,n), dx(n,nloc), ex(nloc,n), fx(n,nloc) )
CALL RANDOM_NUMBER( a )
CALL RANDOM_NUMBER( b )
do j = 1, n
do i = 1, n
IF( mpime == owner_cyclic(i, n, nproc) ) THEN
iloc = lind_cyclic(i, n, nproc, mpime)
ax( iloc, j ) = a( i, j )
bx( iloc, j ) = b( i, j )
END IF
IF( mpime == owner_cyclic(j, n, nproc) ) THEN
jloc = lind_cyclic(j, n, nproc, mpime)
dx( i, jloc ) = a( i, j )
fx( i, jloc ) = b( i, j )
END IF
end do
end do
do j = 1, n
do i = 1, n
c(i,j) = 0.0d0
do k = 1, n
!c(i,j) = c(i,j) + a(i,k) * b(k,j)
!c(i,j) = c(i,j) + a(i,k) * b(j,k)
!c(i,j) = c(i,j) + a(k,i) * b(k,j)
c(i,j) = c(i,j) + a(k,i) * b(j,k)
end do
end do
end do
IF( mpime == root ) THEN
do i = 1, n
write(6,100) (c(i,j),j=1,n), (a(i,j),j=1,n), (b(i,j),j=1,n)
end do
100 FORMAT( 5F7.4, ' |', 5F7.4, ' |', 5F7.4 )
END IF
!CALL mymatmul(ax, nloc, 'T', 'R', bx, nloc, 'T', 'R', cx, nloc, 'R', n, mpime, nproc)
!CALL mymatmul(ax, nloc, 'N', 'R', bx, nloc, 'T', 'R', cx, nloc, 'R', n, mpime, nproc)
!CALL mymatmul(ax, nloc, 'T', 'R', bx, nloc, 'N', 'R', cx, nloc, 'R', n, mpime, nproc)
CALL mymatmul(ax, nloc, 'T', 'R', bx, nloc, 'T', 'R', cx, nloc, 'R', n, mpime, nproc)
! CALL mymatmul(ax, nloc, 'N', 'R', bx, nloc, 'N', 'R', cx, nloc, 'R', n, mpime, nproc)
! CALL mymatmul(ax, nloc, 'N', 'R', fx, n, 'N', 'C', cx, nloc, 'R', n, mpime, nproc)
! CALL mymatmul(dx, n, 'N', 'C', bx, nloc, 'N', 'R', cx, nloc, 'R', n, mpime, nproc)
! CALL mymatmul(dx, n, 'N', 'C', fx, n, 'N', 'C', cx, nloc, 'R', n, mpime, nproc)
CALL mytrasp_dist(cx, nloc, 'R', dx, n, 'C', n, mpime, nproc)
CALL mytrasp_dati(dx, n, 'C', ex, nloc, 'R', n, mpime, nproc)
CALL mytrasp_dati(dx, n, 'C', fx, n, 'C', n, mpime, nproc)
do j = 1, n
do i = 1, n
c(i,j) = 0.0d0
b(i,j) = 0.0d0
a(i,j) = 0.0d0
d(i,j) = 0.0d0
e(i,j) = 0.0d0
f(i,j) = 0.0d0
IF( mpime == owner_cyclic(i, n, nproc) ) THEN
iloc = lind_cyclic(i, n, nproc, mpime)
c(i, j) = cx( iloc, j )
a(i, j) = ax( iloc, j )
b(i, j) = bx( iloc, j )
e(i, j) = ex( iloc, j )
END IF
IF( mpime == owner_cyclic(j, n, nproc) ) THEN
jloc = lind_cyclic(j, n, nproc, mpime)
d(i, j) = dx( i, jloc )
f(i, j) = fx( i, jloc )
END IF
END DO
END DO
CALL PARALLEL_SUM_REAL( c, SIZE(c) )
CALL PARALLEL_SUM_REAL( a, SIZE(a) )
CALL PARALLEL_SUM_REAL( b, SIZE(b) )
CALL PARALLEL_SUM_REAL( d, SIZE(d) )
CALL PARALLEL_SUM_REAL( e, SIZE(e) )
CALL PARALLEL_SUM_REAL( f, SIZE(f) )
IF( mpime == root ) THEN
write(6,*) &
'--------------------------------------------------------------------------------------------------'
do i = 1, n
write(6,110) (c(i,j),j=1,n), (a(i,j),j=1,n), (b(i,j),j=1,n)
end do
110 FORMAT( 5F7.4, ' |', 5F7.4, ' |', 5F7.4 )
END IF
IF( mpime == root ) THEN
write(6,*) &
'--------------------------------------------------------------------------------------------------'
do i = 1, n
write(6,120) (d(i,j),j=1,n), (e(i,j),j=1,n), (f(i,j),j=1,n)
end do
120 FORMAT( 5F7.4, ' |', 5F7.4, ' |', 5F7.4 )
END IF
DEALLOCATE( a, b, c, d, e, f )
DEALLOCATE( ax, bx, cx, dx, ex, fx )
CALL PARALLEL_HANGUP
END PROGRAM matmul
#endif
SUBROUTINE mymatmul(ax, lda, tac, dista, bx, ldb, tbc, distb, cx, ldc, distc, n, mpime, nproc)
IMPLICIT NONE
INTEGER :: lda, ldc, ldb, n, mpime, nproc
CHARACTER(LEN=1) :: tac, tbc
CHARACTER(LEN=1) :: dista, distb, distc
REAL*8 :: ax(lda,*), bx(ldb,*), cx(ldc,*)
REAL*8, ALLOCATABLE :: bs(:,:), br(:,:)
REAL*8, ALLOCATABLE :: as(:,:), ar(:,:)
REAL*8, ALLOCATABLE :: cs(:,:), cr(:,:), cc(:,:)
INTEGER :: ldim_cyclic, owner_cyclic, lind_cyclic
INTEGER :: ip, i, ii, j, jj, k, kk, nloc_ip, nloc
INTEGER :: nloc_src, nloc_dst, idest, isour
LOGICAL :: ta, tb
CHARACTER(LEN=1) :: da, db, dc
ta = .FALSE.
tb = .FALSE.
da = 'R'
db = 'R'
dc = 'R'
IF( tac == 't' .OR. tac == 'T') THEN
ta = .TRUE.
END IF
IF( tbc == 't' .OR. tbc == 'T') THEN
tb = .TRUE.
END IF
IF( dista == 'c' .OR. dista == 'C' ) THEN
da = 'C'
END IF
IF( distb == 'c' .OR. distb == 'C' ) THEN
db = 'C'
END IF
IF( distc == 'c' .OR. distc == 'C' ) THEN
dc = 'C'
END IF
nloc = ldim_cyclic(n, nproc, mpime)
IF( .NOT. ta .AND. .NOT. tb ) THEN
IF( dc == 'R' .AND. da == 'R' .AND. db == 'R' ) THEN
CALL mymatmul_rrrnn(ax, lda, bx, ldb, cx, ldc, n, mpime, nproc)
ELSE IF( dc == 'R' .AND. da == 'R' .AND. db == 'C' ) THEN
CALL mymatmul_rrcnn(ax, lda, bx, ldb, cx, ldc, n, mpime, nproc)
ELSE IF( dc == 'R' .AND. da == 'C' .AND. db == 'R' ) THEN
ALLOCATE( ar(nloc, n) )
CALL mytrasp_dist(ax, lda, 'C', ar, nloc, 'R', n, mpime, nproc)
CALL mymatmul_rrrnn(ar, nloc, bx, ldb, cx, ldc, n, mpime, nproc)
DEALLOCATE( ar )
ELSE IF( dc == 'R' .AND. da == 'C' .AND. db == 'C' ) THEN
ALLOCATE( ar(nloc, n) )
CALL mytrasp_dist(ax, lda, 'C', ar, nloc, 'R', n, mpime, nproc)
CALL mymatmul_rrcnn(ar, nloc, bx, ldb, cx, ldc, n, mpime, nproc)
DEALLOCATE( ar )
ELSE IF( dc == 'C' .AND. da == 'R' .AND. db == 'R' ) THEN
ALLOCATE( cc(nloc, n) )
CALL mymatmul_rrrnn(ax, lda, bx, ldb, cc, nloc, n, mpime, nproc)
CALL mytrasp_dist(cc, nloc, 'R', cx, ldc, 'C', n, mpime, nproc)
DEALLOCATE( cc )
ELSE IF( dc == 'C' .AND. da == 'R' .AND. db == 'C' ) THEN
ALLOCATE( cc(nloc, n) )
CALL mymatmul_rrcnn(ax, lda, bx, ldb, cc, nloc, n, mpime, nproc)
CALL mytrasp_dist(cc, nloc, 'R', cx, ldc, 'C', n, mpime, nproc)
DEALLOCATE( cc )
ELSE IF( dc == 'C' .AND. da == 'C' .AND. db == 'R' ) THEN
ALLOCATE( ar(nloc, n) )
ALLOCATE( cc(nloc, n) )
CALL mytrasp_dist(ax, lda, 'C', ar, nloc, 'R', n, mpime, nproc)
CALL mymatmul_rrrnn(ar, nloc, bx, ldb, cc, nloc, n, mpime, nproc)
CALL mytrasp_dist(cc, nloc, 'R', cx, ldc, 'C', n, mpime, nproc)
DEALLOCATE( ar )
DEALLOCATE( cc )
ELSE IF( dc == 'C' .AND. da == 'C' .AND. db == 'C' ) THEN
ALLOCATE( ar(nloc, n) )
ALLOCATE( cc(nloc, n) )
CALL mytrasp_dist(ax, lda, 'C', ar, nloc, 'R', n, mpime, nproc)
CALL mymatmul_rrcnn(ar, nloc, bx, ldb, cc, nloc, n, mpime, nproc)
CALL mytrasp_dist(cc, nloc, 'R', cx, ldc, 'C', n, mpime, nproc)
DEALLOCATE( ar )
DEALLOCATE( cc )
ELSE
WRITE(6,*) ' ** ERROR (1) in mymatmul ** '
STOP
END IF
ELSE IF( ta .AND. (.NOT. tb) ) THEN
IF( dc == 'R' .AND. da == 'R' .AND. db == 'R' ) THEN
CALL mymatmul_rrrtn(ax, lda, bx, ldb, cx, ldc, n, mpime, nproc)
ELSE IF( dc == 'C' .AND. da == 'R' .AND. db == 'R' ) THEN
ALLOCATE( cc(nloc, n) )
CALL mymatmul_rrrtn(ax, lda, bx, ldb, cc, nloc, n, mpime, nproc)
CALL mytrasp_dist(cc, nloc, 'R', cx, ldc, 'C', n, mpime, nproc)
DEALLOCATE( cc )
ELSE
WRITE(6,*) ' ** ERROR (2) in mymatmul ** '
STOP
END IF
ELSE IF( (.NOT. ta) .AND. tb ) THEN
IF( dc == 'R' .AND. da == 'R' .AND. db == 'R' ) THEN
CALL mymatmul_rrrnt(ax, lda, bx, ldb, cx, ldc, n, mpime, nproc)
ELSE IF( dc == 'C' .AND. da == 'R' .AND. db == 'R' ) THEN
ALLOCATE( cc(nloc, n) )
CALL mymatmul_rrrnt(ax, lda, bx, ldb, cc, nloc, n, mpime, nproc)
CALL mytrasp_dist(cc, nloc, 'R', cx, ldc, 'C', n, mpime, nproc)
DEALLOCATE( cc )
ELSE
WRITE(6,*) ' ** ERROR (3) in mymatmul ** '
STOP
END IF
ELSE IF( ta .AND. tb ) THEN
IF( dc == 'R' .AND. da == 'R' .AND. db == 'R' ) THEN
CALL mymatmul_rrrtt(ax, lda, bx, ldb, cx, ldc, n, mpime, nproc)
ELSE IF( dc == 'C' .AND. da == 'R' .AND. db == 'R' ) THEN
ALLOCATE( cc(nloc, n) )
CALL mymatmul_rrrtt(ax, lda, bx, ldb, cc, nloc, n, mpime, nproc)
CALL mytrasp_dist(cc, nloc, 'R', cx, ldc, 'C', n, mpime, nproc)
DEALLOCATE( cc )
ELSE
WRITE(6,*) ' ** ERROR (4) in mymatmul ** '
STOP
END IF
ELSE
WRITE(6,*) ' ** ERROR (5) in mymatmul ** '
STOP
END IF
RETURN
END SUBROUTINE mymatmul
SUBROUTINE mymatmul_rrrnn(ax, lda, bx, ldb, cx, ldc, n, mpime, nproc)
IMPLICIT NONE
#if defined __MPI
INCLUDE 'mpif.h'
INTEGER ISTATUS(MPI_STATUS_SIZE), ierr
#endif
INTEGER :: lda, ldc, ldb, n, mpime, nproc
REAL*8 :: ax(lda,*), bx(ldb,*), cx(ldc,*)
REAL*8, ALLOCATABLE :: bs(:,:), br(:,:)
INTEGER :: ldim_cyclic, owner_cyclic, lind_cyclic
INTEGER :: ip, i, ii, j, jj, k, kk, nloc_ip, nloc
INTEGER :: nloc_src, nloc_dst, idest, isour
nloc = ldim_cyclic(n, nproc, mpime)
DO j = 1, n
DO i = 1, nloc
cx(i,j) = 0.0d0
END DO
END DO
#if defined __MPI
DO ip = 0, nproc - 1
isour = MOD(mpime-ip+nproc, nproc)
idest = MOD(mpime+ip , nproc)
nloc_src = ldim_cyclic(n, nproc, isour)
ALLOCATE( br( nloc_src, n ) )
IF( nloc /= ldb ) THEN
ALLOCATE( bs( nloc, n ) )
DO j = 1, n
DO i = 1, nloc
bs(i,j) = bx(i,j)
END DO
END DO
CALL MPI_SENDRECV(bs, SIZE(bs), MPI_DOUBLE_PRECISION, &
IDEST, ip, br, SIZE(br), MPI_DOUBLE_PRECISION, &
ISOUR, ip, MPI_COMM_WORLD, ISTATUS, ierr)
DEALLOCATE( bs )
ELSE
CALL MPI_SENDRECV(bx, nloc*n, MPI_DOUBLE_PRECISION, &
IDEST, ip, br, SIZE(br), MPI_DOUBLE_PRECISION, &
ISOUR, ip, MPI_COMM_WORLD, ISTATUS, ierr)
END IF
IF(ierr .NE. 0) THEN
WRITE(6,*) ' ** ERROR in sendrecv ** '
STOP
END IF
DO j = 1, n
kk = isour+1
DO k = 1, nloc_src
DO i = 1, nloc
cx(i,j) = cx(i,j) + ax(i,kk) * br(k,j)
END DO
kk = kk + nproc
END DO
END DO
DEALLOCATE( br )
END DO
#else
DO j = 1, n
DO k = 1, n
DO i = 1, n
cx(i,j) = cx(i,j) + ax(i,k) * bx(k,j)
END DO
END DO
END DO
#endif
RETURN
END SUBROUTINE mymatmul_rrrnn
SUBROUTINE mymatmul_rrcnn(ax, lda, bx, ldb, cx, ldc, n, mpime, nproc)
IMPLICIT NONE
#if defined __MPI
INCLUDE 'mpif.h'
INTEGER ISTATUS(MPI_STATUS_SIZE), ierr
#endif
INTEGER :: lda, ldc, ldb, n, mpime, nproc
REAL*8 :: ax(lda,*), bx(ldb,*), cx(ldc,*)
REAL*8, ALLOCATABLE :: bs(:,:), br(:,:)
INTEGER :: ldim_cyclic, owner_cyclic, lind_cyclic
INTEGER :: ip, i, ii, j, jj, k, kk, nloc_ip, nloc
INTEGER :: nloc_src, nloc_dst, idest, isour
nloc = ldim_cyclic(n, nproc, mpime)
DO j = 1, n
DO i = 1, nloc
cx(i,j) = 0.0d0
END DO
END DO
DO ip = 0, nproc - 1
isour = MOD(mpime-ip+nproc, nproc)
idest = MOD(mpime+ip , nproc)
nloc_src = ldim_cyclic(n, nproc, isour)
ALLOCATE( bs( n, nloc) )
ALLOCATE( br( n, nloc_src ) )
DO j = 1, nloc
DO i = 1, n
bs(i,j) = bx(i,j)
END DO
END DO
!CALL sendrecv_real(bs, SIZE(bs), idest, br, SIZE(br), isour, ip)
#if defined __MPI
CALL MPI_SENDRECV(bs, SIZE(bs), MPI_DOUBLE_PRECISION, &
IDEST, ip, br, SIZE(br), MPI_DOUBLE_PRECISION, &
ISOUR, ip, MPI_COMM_WORLD, ISTATUS, ierr)
IF(ierr .NE. 0) THEN
WRITE(6,*) ' ** ERROR in sendrecv ** '
STOP
END IF
#else
br = bs
#endif
jj = isour+1
DO j = 1, nloc_src
DO k = 1, n
DO i = 1, nloc
cx(i,jj) = cx(i,jj) + ax(i,k) * br(k,j)
END DO
END DO
jj = jj + nproc
END DO
DEALLOCATE( bs )
DEALLOCATE( br )
END DO
RETURN
END SUBROUTINE mymatmul_rrcnn
SUBROUTINE mymatmul_rrrtn(ax, lda, bx, ldb, cx, ldc, n, mpime, nproc)
IMPLICIT NONE
#if defined __MPI
INCLUDE 'mpif.h'
INTEGER ISTATUS(MPI_STATUS_SIZE), ierr
#endif
INTEGER :: lda, ldc, ldb, n, mpime, nproc
REAL*8 :: ax(lda,*), bx(ldb,*), cx(ldc,*)
REAL*8, ALLOCATABLE :: bs(:,:), br(:,:)
REAL*8, ALLOCATABLE :: as(:,:), ar(:,:)
REAL*8, ALLOCATABLE :: cs(:,:), cr(:,:), cc(:,:)
INTEGER :: ldim_cyclic, owner_cyclic, lind_cyclic
INTEGER :: ip, i, ii, j, jj, k, kk, nloc_ip, nloc
INTEGER :: nloc_src, nloc_dst, idest, isour
nloc = ldim_cyclic(n, nproc, mpime)
#if defined __MPI
DO ip = 0, nproc - 1
nloc_ip = ldim_cyclic(n, nproc, ip)
ALLOCATE( cs( nloc_ip, n ) )
DO j = 1, n
ii = ip+1
DO i = 1, nloc_ip
cs(i,j) = 0.0d0
DO k = 1, nloc
cs(i,j) = cs(i,j) + ax(k, ii) * bx(k, j)
END DO
ii = ii + nproc
END DO
END DO
IF( ldc /= nloc_ip ) THEN
ALLOCATE( cr( nloc_ip, n ) )
CALL MPI_REDUCE(cs, cr, SIZE(cs), MPI_DOUBLE_PRECISION, MPI_SUM, ip, MPI_COMM_WORLD, ierr)
IF( mpime == ip ) THEN
DO j = 1, n
DO i = 1, nloc_ip
cx(i,j) = cr(i,j)
END DO
END DO
END IF
DEALLOCATE( cr )
ELSE
CALL MPI_REDUCE(cs, cx, n*nloc_ip, MPI_DOUBLE_PRECISION, MPI_SUM, ip, MPI_COMM_WORLD, ierr)
END IF
DEALLOCATE( cs )
END DO
#else
DO j = 1, n
DO i = 1, n
cx(i,j) = 0.0d0
DO k = 1, n
cx(i,j) = cx(i,j) + ax(k, i) * bx(k, j)
END DO
END DO
END DO
#endif
RETURN
END SUBROUTINE mymatmul_rrrtn
SUBROUTINE mymatmul_rrrnt(ax, lda, bx, ldb, cx, ldc, n, mpime, nproc)
IMPLICIT NONE
#if defined __MPI
INCLUDE 'mpif.h'
INTEGER ISTATUS(MPI_STATUS_SIZE), ierr
#endif
INTEGER :: lda, ldc, ldb, n, mpime, nproc
REAL*8 :: ax(lda,*), bx(ldb,*), cx(ldc,*)
REAL*8, ALLOCATABLE :: bs(:,:), br(:,:)
REAL*8, ALLOCATABLE :: as(:,:), ar(:,:)
REAL*8, ALLOCATABLE :: cs(:,:), cr(:,:), cc(:,:)
INTEGER :: ldim_cyclic, owner_cyclic, lind_cyclic
INTEGER :: ip, i, ii, j, jj, k, kk, nloc_ip, nloc
INTEGER :: nloc_src, nloc_dst, idest, isour
nloc = ldim_cyclic(n, nproc, mpime)
DO j = 1, n
DO i = 1, nloc
cx(i,j) = 0.0d0
END DO
END DO
#if defined __MPI
DO ip = 0, nproc - 1
isour = MOD(mpime-ip+nproc, nproc)
idest = MOD(mpime+ip , nproc)
nloc_src = ldim_cyclic(n, nproc, isour)
ALLOCATE( br( nloc_src, n ) )
IF( nloc /= ldb ) THEN
ALLOCATE( bs( nloc, n ) )
DO j = 1, n
DO i = 1, nloc
bs(i,j) = bx(i,j)
END DO
END DO
CALL MPI_SENDRECV(bs, n*nloc, MPI_DOUBLE_PRECISION, &
IDEST, ip, br, SIZE(br), MPI_DOUBLE_PRECISION, &
ISOUR, ip, MPI_COMM_WORLD, ISTATUS, ierr)
DEALLOCATE( bs )
ELSE
CALL MPI_SENDRECV(bx, n*nloc, MPI_DOUBLE_PRECISION, &
IDEST, ip, br, SIZE(br), MPI_DOUBLE_PRECISION, &
ISOUR, ip, MPI_COMM_WORLD, ISTATUS, ierr)
ENDIF
IF(ierr .NE. 0) THEN
WRITE(6,*) ' ** ERROR in sendrecv ** '
STOP
END IF
jj = isour+1
DO j = 1, nloc_src
kk = isour+1
DO k = 1, n
DO i = 1, nloc
cx(i,jj) = cx(i,jj) + ax(i,k) * br(j,k)
END DO
END DO
jj = jj + nproc
END DO
DEALLOCATE( br )
END DO
#else
DO j = 1, n
DO k = 1, n
DO i = 1, n
cx(i,j) = cx(i,j) + ax(i,k) * bx(j,k)
END DO
END DO
END DO
#endif
RETURN
END SUBROUTINE mymatmul_rrrnt
SUBROUTINE mymatmul_rrrtt(ax, lda, bx, ldb, cx, ldc, n, mpime, nproc)
IMPLICIT NONE
#if defined __MPI
INCLUDE 'mpif.h'
INTEGER ISTATUS(MPI_STATUS_SIZE), ierr
#endif
INTEGER :: lda, ldc, ldb, n, mpime, nproc
REAL*8 :: ax(lda,*), bx(ldb,*), cx(ldc,*)
REAL*8, ALLOCATABLE :: bs(:,:), br(:,:)
REAL*8, ALLOCATABLE :: as(:,:), ar(:,:)
REAL*8, ALLOCATABLE :: cs(:,:), cr(:,:), cc(:,:)
INTEGER :: ldim_cyclic, owner_cyclic, lind_cyclic
INTEGER :: ip, i, ii, j, jj, k, kk, nloc_ip, nloc
INTEGER :: nloc_src, nloc_dst, idest, isour
nloc = ldim_cyclic(n, nproc, mpime)
ALLOCATE( cc( nloc, n ) )
DO j = 1, n
DO i = 1, nloc
cc(i,j) = 0.0d0
END DO
END DO
DO ip = 0, nproc - 1
isour = MOD(mpime-ip+nproc, nproc)
idest = MOD(mpime+ip , nproc)
nloc_src = ldim_cyclic(n, nproc, isour)
ALLOCATE( as( nloc, n ) )
ALLOCATE( ar( nloc_src, n ) )
DO j = 1, n
DO i = 1, nloc
as(i,j) = ax(i,j)
END DO
END DO
!CALL sendrecv_real(as, SIZE(as), idest, ar, SIZE(ar), isour, ip)
#if defined __MPI
CALL MPI_SENDRECV(as, SIZE(as), MPI_DOUBLE_PRECISION, &
IDEST, ip, ar, SIZE(ar), MPI_DOUBLE_PRECISION, &
ISOUR, ip, MPI_COMM_WORLD, ISTATUS, ierr)
IF(ierr .NE. 0) THEN
WRITE(6,*) ' ** ERROR in sendrecv ** '
STOP
END IF
#else
ar = as
#endif
DO j = 1, n
kk = isour+1
DO k = 1, nloc_src
DO i = 1, nloc
cc(i,j) = cc(i,j) + bx(i,kk) * ar(k,j)
END DO
kk = kk + nproc
END DO
END DO
DEALLOCATE( as )
DEALLOCATE( ar )
END DO
CALL mytrasp_dati(cc, nloc, 'R', cx, nloc, 'R', n, mpime, nproc)
DEALLOCATE( cc )
RETURN
END SUBROUTINE mymatmul_rrrtt
SUBROUTINE mytrasp_dati(ax, lda, dista, bx, ldb, distb, n, mpime, nproc)
IMPLICIT NONE
#if defined __MPI
INCLUDE 'mpif.h'
INTEGER ISTATUS(MPI_STATUS_SIZE), ierr
#endif
INTEGER :: lda, ldb, n, mpime, nproc
CHARACTER(LEN=1) :: dista, distb
REAL*8 :: ax(lda,*), bx(ldb,*)
REAL*8, ALLOCATABLE :: bs(:,:), br(:,:)
REAL*8, ALLOCATABLE :: as(:,:), ar(:,:)
REAL*8, ALLOCATABLE :: cs(:,:), cr(:,:), cc(:,:)
INTEGER :: ldim_cyclic, owner_cyclic, lind_cyclic
INTEGER :: ip, i, ii, j, jj, k, kk, nloc_ip, nloc
INTEGER :: nloc_src, nloc_dst, idest, isour
CHARACTER(LEN=1) :: da, db
da = 'R'
db = 'R'
IF( dista == 'c' .OR. dista == 'C' ) THEN
da = 'C'
END IF
IF( distb == 'c' .OR. distb == 'C' ) THEN
db = 'C'
END IF
nloc = ldim_cyclic(n, nproc, mpime)
IF( da == 'R' .AND. db == 'R' ) THEN
DO ip = 0, nproc - 1
isour = MOD(mpime-ip+nproc, nproc)
idest = MOD(mpime+ip , nproc)
nloc_src = ldim_cyclic(n, nproc, isour)
nloc_dst = ldim_cyclic(n, nproc, idest)
ALLOCATE( cs( nloc, nloc_dst ) )
ALLOCATE( cr( nloc_src, nloc ) )
jj = idest+1
DO j = 1, nloc_dst
DO i = 1, nloc
cs(i,j) = ax(i,jj)
END DO
jj = jj + nproc
END DO
! CALL sendrecv_real(cs, SIZE(cs), idest, cr, SIZE(cr), isour, ip)
#if defined __MPI
CALL MPI_SENDRECV(cs, SIZE(cs), MPI_DOUBLE_PRECISION, &
IDEST, ip, cr, SIZE(cr), MPI_DOUBLE_PRECISION, &
ISOUR, ip, MPI_COMM_WORLD, ISTATUS, ierr)
IF(ierr .NE. 0) THEN
WRITE(6,*) ' ** ERROR in sendrecv ** '
STOP
END IF
#else
cr = cs
#endif
jj = isour+1
DO j = 1, nloc_src
DO i = 1, nloc
bx(i,jj) = cr(j,i)
END DO
jj = jj + nproc
END DO
DEALLOCATE( cs )
DEALLOCATE( cr )
END DO
ELSE IF( da == 'R' .AND. db == 'C' ) THEN
DO j = 1, n
DO i = 1, nloc
bx(j,i) = ax(i,j)
END DO
END DO
ELSE IF( da == 'C' .AND. db == 'R' ) THEN
DO j = 1, n
DO i = 1, nloc
bx(i,j) = ax(j,i)
END DO
END DO
ELSE IF( da == 'C' .AND. db == 'C' ) THEN
DO ip = 0, nproc - 1
isour = MOD(mpime-ip+nproc, nproc)
idest = MOD(mpime+ip , nproc)
nloc_src = ldim_cyclic(n, nproc, isour)
nloc_dst = ldim_cyclic(n, nproc, idest)
ALLOCATE( cs( nloc_dst, nloc ) )
ALLOCATE( cr( nloc, nloc_src ) )
DO i = 1, nloc
jj = idest+1
DO j = 1, nloc_dst
cs(j,i) = ax(jj,i)
jj = jj + nproc
END DO
END DO
!CALL sendrecv_real(cs, SIZE(cs), idest, cr, SIZE(cr), isour, ip)
#if defined __MPI
CALL MPI_SENDRECV(cs, SIZE(cs), MPI_DOUBLE_PRECISION, &
IDEST, ip, cr, SIZE(cr), MPI_DOUBLE_PRECISION, &
ISOUR, ip, MPI_COMM_WORLD, ISTATUS, ierr)
IF(ierr .NE. 0) THEN
WRITE(6,*) ' ** ERROR in sendrecv ** '
STOP
END IF
#else
cr = cs
#endif
DO i = 1, nloc
jj = isour+1
DO j = 1, nloc_src
bx(jj,i) = cr(i,j)
jj = jj + nproc
END DO
END DO
DEALLOCATE( cs )
DEALLOCATE( cr )
END DO
ELSE
! ERROR
END IF
RETURN
END SUBROUTINE mytrasp_dati
SUBROUTINE mytrasp_dist(ax, lda, dista, bx, ldb, distb, n, mpime, nproc)
IMPLICIT NONE
#if defined __MPI
INCLUDE 'mpif.h'
INTEGER ISTATUS(MPI_STATUS_SIZE), ierr
#endif
INTEGER :: lda, ldb, n, mpime, nproc
CHARACTER(LEN=1) :: dista, distb
REAL*8 :: ax(lda,*), bx(ldb,*)
REAL*8, ALLOCATABLE :: bs(:,:), br(:,:)
REAL*8, ALLOCATABLE :: as(:,:), ar(:,:)
REAL*8, ALLOCATABLE :: cs(:,:), cr(:,:), cc(:,:)
INTEGER :: ldim_cyclic, owner_cyclic, lind_cyclic
INTEGER :: ip, i, ii, j, jj, k, kk, nloc_ip, nloc
INTEGER :: nloc_src, nloc_dst, idest, isour
CHARACTER(LEN=1) :: da, db
da = 'R'
db = 'R'
IF( dista == 'c' .OR. dista == 'C' ) THEN
da = 'C'
END IF
IF( distb == 'c' .OR. distb == 'C' ) THEN
db = 'C'
END IF
nloc = ldim_cyclic(n, nproc, mpime)
IF( da == 'R' .AND. db == 'R' ) THEN
DO j = 1, n
DO i = 1, nloc
bx(i,j) = ax(i,j)
END DO
END DO
ELSE IF( da == 'R' .AND. db == 'C' ) THEN
DO ip = 0, nproc - 1
isour = MOD(mpime-ip+nproc, nproc)
idest = MOD(mpime+ip , nproc)
nloc_src = ldim_cyclic(n, nproc, isour)
nloc_dst = ldim_cyclic(n, nproc, idest)
ALLOCATE( cs( nloc, nloc_dst ) )
ALLOCATE( cr( nloc_src, nloc ) )
jj = idest+1
DO j = 1, nloc_dst
DO i = 1, nloc
cs(i,j) = ax(i,jj)
END DO
jj = jj + nproc
END DO
!CALL sendrecv_real(cs, SIZE(cs), idest, cr, SIZE(cr), isour, ip)
#if defined __MPI
CALL MPI_SENDRECV(cs, SIZE(cs), MPI_DOUBLE_PRECISION, &
IDEST, ip, cr, SIZE(cr), MPI_DOUBLE_PRECISION, &
ISOUR, ip, MPI_COMM_WORLD, ISTATUS, ierr)
IF(ierr .NE. 0) THEN
WRITE(6,*) ' ** ERROR in sendrecv ** '
STOP
END IF
#else
cr = cs
#endif
DO j = 1, nloc
ii = isour+1
DO i = 1, nloc_src
bx(ii,j) = cr(i,j)
ii = ii + nproc
END DO
END DO
DEALLOCATE( cs )
DEALLOCATE( cr )
END DO
ELSE IF( da == 'C' .AND. db == 'R' ) THEN
DO ip = 0, nproc - 1
isour = MOD(mpime-ip+nproc, nproc)
idest = MOD(mpime+ip , nproc)
nloc_src = ldim_cyclic(n, nproc, isour)
nloc_dst = ldim_cyclic(n, nproc, idest)
ALLOCATE( cs( nloc_dst, nloc ) )
ALLOCATE( cr( nloc, nloc_src ) )
DO i = 1, nloc
jj = idest+1
DO j = 1, nloc_dst
cs(j, i) = ax(jj, i)
jj = jj + nproc
END DO
END DO
!CALL sendrecv_real(cs, SIZE(cs), idest, cr, SIZE(cr), isour, ip)
#if defined __MPI
CALL MPI_SENDRECV(cs, SIZE(cs), MPI_DOUBLE_PRECISION, &
IDEST, ip, cr, SIZE(cr), MPI_DOUBLE_PRECISION, &
ISOUR, ip, MPI_COMM_WORLD, ISTATUS, ierr)
IF(ierr .NE. 0) THEN
WRITE(6,*) ' ** ERROR in sendrecv ** '
STOP
END IF
#else
cr = cs
#endif
ii = isour+1
DO i = 1, nloc_src
DO j = 1, nloc
bx(j,ii) = cr(j,i)
END DO
ii = ii + nproc
END DO
DEALLOCATE( cs )
DEALLOCATE( cr )
END DO
ELSE IF( da == 'C' .AND. db == 'C' ) THEN
DO j = 1, nloc
DO i = 1, n
bx(i,j) = ax(i,j)
END DO
END DO
ELSE
! ERROR
END IF
RETURN
END SUBROUTINE mytrasp_dist