opt[streams,1z]: zaxpy is needed to stream loop on a2a, test works

This commit is contained in:
Laura Bellentani 2023-08-14 12:41:01 +03:00 committed by fabrizio22
parent 8806407cd6
commit 03e5038b5f
3 changed files with 15 additions and 11 deletions

View File

@ -386,10 +386,10 @@ END MODULE
TYPE(C_PTR), INTENT(IN), OPTIONAL :: stream
LOGICAL, INTENT(IN), OPTIONAL :: in_place
COMPLEX (DP) :: c(:), cout(:), ctmp(ldz*nsl), couttmp(ldz*nsl)
COMPLEX (DP) :: c(:), cout(:), ctmp(ldz*nsl), couttmp(ldz*nsl), itscale
REAL (DP) :: tscale
INTEGER :: i, err, idir, ip, void
INTEGER :: i, err, idir, ip, void, incy
INTEGER, SAVE :: zdims( 3, ndims ) = -1
INTEGER, SAVE :: icurrent = 1
LOGICAL :: found
@ -461,6 +461,7 @@ END MODULE
CALL hipCheck(hipDeviceSynchronize())
tscale = 1.0_DP / nz
IF (.NOT.PRESENT(stream)) THEN
IF (is_inplace) THEN
!$omp target teams distribute parallel do simd
DO i=1, ldz * nsl
@ -472,6 +473,15 @@ END MODULE
cout( i ) = cout( i ) * tscale
END DO
ENDIF
ELSE
incy=1
itscale=CMPLX(tscale-1.0_DP)
IF (is_inplace) THEN
CALL a2azaxpy(ldz*nsl,itscale,c,1,c,incy)
ELSE
CALL a2azaxpy(ldz*nsl,itscale,cout,1,cout,incy)
ENDIF
ENDIF
ELSE IF (isign > 0) THEN

View File

@ -359,19 +359,13 @@ SUBROUTINE MYZGEMV2(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
END SUBROUTINE MYZGEMV2
SUBROUTINE A2AZAXPY(N,ALPHA,X,INCX,Y,INCY)
#if defined(__OPENMP_GPU)
#if defined(__ROCBLAS)
use rocblas_utils
#endif
#endif
INTEGER, INTENT(IN) :: N,INCX
INTEGER, INTENT(INOUT) :: INCY
DOUBLE COMPLEX, INTENT(IN) :: ALPHA
DOUBLE COMPLEX, INTENT(IN) :: X(*)
DOUBLE COMPLEX, INTENT(OUT) :: Y(*)
#if defined(__ROCBLAS)
DOUBLE COMPLEX, INTENT(INOUT) :: Y(*)
CALL rocblas_a2a_zaxpy(N,ALPHA,X,INCX,Y,INCY)
#endif
END SUBROUTINE A2AZAXPY
! In principle this can go away .......

View File

@ -345,7 +345,7 @@ MODULE rocblas_utils
IMPLICIT NONE
TYPE(C_PTR), VALUE :: handle
INTEGER(rocblas_int), VALUE :: n
COMPLEX(c_double) :: alpha
COMPLEX(c_double_complex) :: alpha
TYPE(C_PTR), VALUE :: x
INTEGER(rocblas_int), VALUE :: incx
TYPE(C_PTR), VALUE :: y
@ -675,7 +675,7 @@ MODULE rocblas_utils
INTEGER, INTENT(INOUT) :: incy
COMPLEX(DP), INTENT(IN) :: alpha
COMPLEX(DP), INTENT(IN) :: x(n)
COMPLEX(DP), INTENT(OUT) :: y(n)
COMPLEX(DP), INTENT(INOUT) :: y(n)
INTEGER :: rn, rincx, rincy
INTEGER :: stat
rn = int(n, kind(rocblas_int))