mirror of https://gitlab.com/QEF/q-e.git
148 lines
3.6 KiB
Fortran
148 lines
3.6 KiB
Fortran
!
|
|
! Copyright (C) 2001 PWSCF group
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
! or http://www.gnu.org/copyleft/gpl.txt .
|
|
!
|
|
#ifdef __BENCHLIB
|
|
|
|
subroutine c_gemm (forma, formb, l, n, m, alpha, a, lda, b, ldb, &
|
|
beta, c, ldc)
|
|
! driver for CGEMM routine
|
|
! it conjugate matrixes before calling the SGEMM subroutine in the T/N o
|
|
! or T T mode in order to exploit cache and stream buffer mechanics
|
|
! the calling parameter are the same as in the actual sgemm routines
|
|
! to use it, just replace ZGEMM with c_gemm in the original code...
|
|
implicit none
|
|
character (len=1) :: forma, formb
|
|
real (8) :: alpha, beta
|
|
complex (8) :: a, b, c
|
|
complex (8) , allocatable::auxa (:, :)
|
|
complex (8) , allocatable::auxb (:, :)
|
|
complex (8) , allocatable::auxc (:, :)
|
|
integer :: i, j
|
|
integer :: lda, ldb, ldc, l, m, n
|
|
|
|
dimension a (lda, * ), b (ldb, * ), c (ldc, * )
|
|
integer :: irtc
|
|
integer :: mm, nn, kk
|
|
integer :: s2, s3, itype
|
|
|
|
integer :: info (2)
|
|
mm = l
|
|
nn = n
|
|
kk = m
|
|
! defines the algorithm to transpose matrix
|
|
|
|
itype = 2
|
|
if (forma.eq.'c'.or.forma.eq.'C') then
|
|
if (formb.eq.'c'.or.formb.eq.'C') then
|
|
allocate (auxc (mm, nn) )
|
|
s2 = irtc ()
|
|
call cgemm ('n', 'n', mm, nn, kk, alpha, b, ldb, a, lda, &
|
|
beta, auxc, mm)
|
|
s3 = irtc ()
|
|
call c_transpose (auxc, mm, c, ldc, nn, mm, itype, info (2) )
|
|
deallocate (auxc)
|
|
else
|
|
allocate (auxa (mm, kk) )
|
|
call c_transpose (a, lda, auxa, mm, mm, kk, itype, info (2) )
|
|
s2 = irtc ()
|
|
call cgemm ('n', 'n', mm, nn, kk, alpha, auxa, mm, b, ldb, &
|
|
beta, c, ldc)
|
|
s3 = irtc ()
|
|
deallocate (auxa)
|
|
endif
|
|
elseif (formb.eq.'c'.or.formb.eq.'C') then
|
|
allocate (auxb (kk, nn) )
|
|
call c_transpose (b, ldb, auxb, kk, kk, nn, itype, info (2) )
|
|
s2 = irtc ()
|
|
call cgemm ('n', 'n', mm, nn, kk, alpha, a, lda, auxb, kk, &
|
|
beta, c, ldc)
|
|
s3 = irtc ()
|
|
deallocate (auxb)
|
|
else
|
|
s2 = irtc ()
|
|
call cgemm (forma, formb, mm, nn, kk, alpha, a, lda, b, ldb, &
|
|
beta, c, ldc)
|
|
s3 = irtc ()
|
|
info (2) = 0
|
|
|
|
endif
|
|
return
|
|
|
|
end subroutine c_gemm
|
|
|
|
subroutine c_transpose (a, lda, b, ldb, n, m, itype, info)
|
|
|
|
implicit none
|
|
|
|
include "mpp/shmem.fh"
|
|
integer :: lda, ldb, n, m, itype, info
|
|
|
|
|
|
complex (8) :: a (lda, * ), b (ldb, * )
|
|
! driver for matrix trasposition
|
|
!
|
|
! a (input ) m x n matrix to be transposed
|
|
! b (output) n x m transposed matrix
|
|
!
|
|
! itype (input ) type of transposition method
|
|
! info (output) routine execution time ( microseconds )
|
|
integer :: irtc, my_pe, lputp
|
|
integer :: i, j, jn, mype
|
|
|
|
integer :: s2, s3
|
|
|
|
if (itype.eq.0) then
|
|
s2 = irtc ()
|
|
do j = 1, n, 480
|
|
jn = min (n - j + 1, 480)
|
|
do i = 1, m
|
|
call lgetv (a (i, j), lda, jn)
|
|
call lputv (b (j, i), 1, jn)
|
|
enddo
|
|
enddo
|
|
10 if (lputp () .ne.0) goto 10
|
|
|
|
s3 = irtc ()
|
|
|
|
|
|
elseif (itype.eq.1) then
|
|
! traspose with shmem
|
|
mype = my_pe ()
|
|
s2 = irtc ()
|
|
do j = 1, m
|
|
call shmem_iget (b (1, j), a (j, 1), 1, lda, n, mype)
|
|
enddo
|
|
|
|
s3 = irtc ()
|
|
|
|
elseif (itype.eq.2) then
|
|
s2 = irtc ()
|
|
do j = 1, m
|
|
do i = 1, n
|
|
b (i, j) = conjg (a (j, i) )
|
|
enddo
|
|
enddo
|
|
|
|
s3 = irtc ()
|
|
|
|
else
|
|
WRITE( stdout, * ) '*** c_transpose : parameter itype out of range'
|
|
|
|
stop
|
|
|
|
endif
|
|
|
|
info = int (dble (s3 - s2) * 3.333d-3)
|
|
return
|
|
|
|
end subroutine c_transpose
|
|
#else
|
|
subroutine cgemmdummy
|
|
return
|
|
end subroutine cgemmdummy
|
|
#endif
|