End CUF support in becmod module (remove calbec_cuf)

This commit is contained in:
Ivan Carnimeo 2024-08-14 10:21:43 +02:00
parent d862590701
commit 4c8471a03e
1 changed files with 0 additions and 662 deletions

View File

@ -57,26 +57,6 @@ MODULE becmod
!
END INTERFACE
!
#if defined(__CUDA)
INTERFACE calbec_cuf
!civn: this is a TEMPORARY interface used only in
! PW/src/h_psi_gpu.f90
! PW/src/s_1psi_gpu.f90
! PW/src/vhpsi_gpu.f90
! PW/src/oscdft_functions_gpu.f90
! to avoid too many data movements with evc_d.
! TO BE REMOVED AS SOON evc_d will be removed!
MODULE PROCEDURE calbec_k_cuf, calbec_gamma_cuf, calbec_gamma_nocomm_cuf, calbec_nc_cuf, calbec_bec_type_cuf,&
! usage: call calbec( offload_type, beta, psi_d, betapsi )
! - beta and betapsi are OpenACC, psi_d is CUDA Fortran
! - this allows to call calbec passing evc_d
calbec_k_cuf2, calbec_gamma_cuf2, calbec_gamma_nocomm_cuf2, calbec_nc_cuf2, calbec_bec_type_cuf2
! usage: call calbec( offload_type, beta_d, psi, betapsi )
! - psi and betapsi are OpenACC, beta_d is CUDA Fortran
! - this allows to call calbec passing beta_d
END INTERFACE
#endif
!
INTERFACE becscal
!
MODULE PROCEDURE becscal_nck, becscal_gamma
@ -167,154 +147,6 @@ CONTAINS
!
END SUBROUTINE calbec_bec_type_acc
!
#if defined(__CUDA)
!-----------------------------------------------------------------------
SUBROUTINE calbec_bec_type_cuf ( offload, npw, beta, psi_d, betapsi, nbnd )
!-----------------------------------------------------------------------
!
! beta and betapsi, are assumed OpenACC data on GPU
! psi_d if CUF
!
USE mp_bands, ONLY: intra_bgrp_comm
USE mp, ONLY: mp_get_comm_null
!
IMPLICIT NONE
TYPE(offload_kind_acc), INTENT(IN) :: offload
COMPLEX (DP), INTENT (in) :: beta(:,:)
COMPLEX (DP), DEVICE, INTENT (in) :: psi_d(:,:)
TYPE (bec_type), INTENT (inout) :: betapsi ! NB: must be INOUT otherwise
! the allocatd array is lost
INTEGER, INTENT (in) :: npw
INTEGER, OPTIONAL :: nbnd
!
INTEGER :: local_nbnd
INTEGER, EXTERNAL :: ldim_block, gind_block
INTEGER :: m_loc, m_begin, ip
REAL(DP), ALLOCATABLE :: dtmp(:,:)
!$acc declare device_resident(dtmp)
!
IF ( present (nbnd) ) THEN
local_nbnd = nbnd
ELSE
local_nbnd = size ( psi_d, 2)
ENDIF
IF ( gamma_only ) THEN
!
IF( betapsi%comm == mp_get_comm_null() ) THEN
!
CALL calbec_gamma_cuf ( offload_acc, npw, beta, psi_d, betapsi%r, local_nbnd, intra_bgrp_comm )
!
ELSE
!
ALLOCATE( dtmp( SIZE( betapsi%r, 1 ), SIZE( betapsi%r, 2 ) ) )
!
DO ip = 0, betapsi%nproc - 1
m_loc = ldim_block( betapsi%nbnd , betapsi%nproc, ip )
m_begin = gind_block( 1, betapsi%nbnd, betapsi%nproc, ip )
IF( ( m_begin + m_loc - 1 ) > local_nbnd ) m_loc = local_nbnd - m_begin + 1
IF( m_loc > 0 ) THEN
CALL calbec_gamma_cuf ( offload_acc, npw, beta, psi_d(:,m_begin:m_begin+m_loc-1), dtmp, m_loc, betapsi%comm )
IF( ip == betapsi%mype ) THEN
!$acc kernels
betapsi%r(:,1:m_loc) = dtmp(:,1:m_loc)
!$acc end kernels
END IF
END IF
END DO
DEALLOCATE( dtmp )
!
END IF
!
ELSEIF ( noncolin) THEN
!
CALL calbec_nc_cuf ( offload_acc, npw, beta, psi_d, betapsi%nc, local_nbnd )
!
ELSE
!
CALL calbec_k_cuf ( offload_acc, npw, beta, psi_d, betapsi%k, local_nbnd )
!
ENDIF
!
RETURN
!
END SUBROUTINE calbec_bec_type_cuf
!
!-----------------------------------------------------------------------
SUBROUTINE calbec_bec_type_cuf2 ( offload, npw, beta_d, psi, betapsi, nbnd )
!-----------------------------------------------------------------------
!
! psi and betapsi, are assumed OpenACC data on GPU
! beta if CUF
!
USE mp_bands, ONLY: intra_bgrp_comm
USE mp, ONLY: mp_get_comm_null
!
IMPLICIT NONE
TYPE(offload_kind_acc), INTENT(IN) :: offload
COMPLEX (DP), DEVICE, INTENT (in) :: beta_d(:,:)
COMPLEX (DP), INTENT (in) :: psi(:,:)
TYPE (bec_type), INTENT (inout) :: betapsi ! NB: must be INOUT otherwise
! the allocatd array is lost
INTEGER, INTENT (in) :: npw
INTEGER, OPTIONAL :: nbnd
!
INTEGER :: local_nbnd
INTEGER, EXTERNAL :: ldim_block, gind_block
INTEGER :: m_loc, m_begin, ip
REAL(DP), ALLOCATABLE :: dtmp(:,:)
!$acc declare device_resident(dtmp)
!
IF ( present (nbnd) ) THEN
local_nbnd = nbnd
ELSE
local_nbnd = size ( psi, 2)
ENDIF
IF ( gamma_only ) THEN
!
IF( betapsi%comm == mp_get_comm_null() ) THEN
!
CALL calbec_gamma_cuf2 ( offload_acc, npw, beta_d, psi, betapsi%r, local_nbnd, intra_bgrp_comm )
!
ELSE
!
ALLOCATE( dtmp( SIZE( betapsi%r, 1 ), SIZE( betapsi%r, 2 ) ) )
!
DO ip = 0, betapsi%nproc - 1
m_loc = ldim_block( betapsi%nbnd , betapsi%nproc, ip )
m_begin = gind_block( 1, betapsi%nbnd, betapsi%nproc, ip )
IF( ( m_begin + m_loc - 1 ) > local_nbnd ) m_loc = local_nbnd - m_begin + 1
IF( m_loc > 0 ) THEN
CALL calbec_gamma_cuf2 ( offload_acc, npw, beta_d, psi(:,m_begin:m_begin+m_loc-1), dtmp, m_loc, betapsi%comm )
IF( ip == betapsi%mype ) THEN
!$acc kernels
betapsi%r(:,1:m_loc) = dtmp(:,1:m_loc)
!$acc end kernels
END IF
END IF
END DO
DEALLOCATE( dtmp )
!
END IF
!
ELSEIF ( noncolin) THEN
!
CALL calbec_nc_cuf2 ( offload_acc, npw, beta_d, psi, betapsi%nc, local_nbnd )
!
ELSE
!
CALL calbec_k_cuf2 ( offload_acc, npw, beta_d, psi, betapsi%k, local_nbnd )
!
ENDIF
!
RETURN
!
END SUBROUTINE calbec_bec_type_cuf2
#endif
!
!-----------------------------------------------------------------------
SUBROUTINE calbec_bec_type_cpu ( offload, npw, beta, psi, betapsi, nbnd )
!-----------------------------------------------------------------------
@ -427,58 +259,6 @@ CONTAINS
!
END SUBROUTINE calbec_gamma_nocomm_acc
!
#if defined(__CUDA)
!-----------------------------------------------------------------------
SUBROUTINE calbec_gamma_nocomm_cuf ( offload, npw, beta, psi_d, betapsi, nbnd )
!-----------------------------------------------------------------------
!
! beta, psi, betapsi, are assumed OpenACC data on GPU
!
USE mp_bands, ONLY: intra_bgrp_comm
IMPLICIT NONE
TYPE(offload_kind_acc), INTENT(IN) :: offload
COMPLEX (DP), INTENT (in) :: beta(:,:)
COMPLEX (DP), DEVICE, INTENT (in) :: psi_d(:,:)
REAL (DP), INTENT (out) :: betapsi(:,:)
INTEGER, INTENT (in) :: npw
INTEGER, OPTIONAL :: nbnd
INTEGER :: m
IF ( present (nbnd) ) THEN
m = nbnd
ELSE
m = size ( psi_d, 2)
ENDIF
CALL calbec_gamma_cuf ( offload_acc, npw, beta, psi_d, betapsi, m, intra_bgrp_comm )
RETURN
!
END SUBROUTINE calbec_gamma_nocomm_cuf
!
!-----------------------------------------------------------------------
SUBROUTINE calbec_gamma_nocomm_cuf2 ( offload, npw, beta_d, psi, betapsi, nbnd )
!-----------------------------------------------------------------------
!
! beta, psi, betapsi, are assumed OpenACC data on GPU
!
USE mp_bands, ONLY: intra_bgrp_comm
IMPLICIT NONE
TYPE(offload_kind_acc), INTENT(IN) :: offload
COMPLEX (DP), DEVICE, INTENT (in) :: beta_d(:,:)
COMPLEX (DP), INTENT (in) :: psi(:,:)
REAL (DP), INTENT (out) :: betapsi(:,:)
INTEGER, INTENT (in) :: npw
INTEGER, OPTIONAL :: nbnd
INTEGER :: m
IF ( present (nbnd) ) THEN
m = nbnd
ELSE
m = size ( psi, 2)
ENDIF
CALL calbec_gamma_cuf2 ( offload_acc, npw, beta_d, psi, betapsi, m, intra_bgrp_comm )
RETURN
!
END SUBROUTINE calbec_gamma_nocomm_cuf2
#endif
!
!-----------------------------------------------------------------------
SUBROUTINE calbec_gamma_nocomm_cpu ( offload, npw, beta, psi, betapsi, nbnd )
!-----------------------------------------------------------------------
@ -600,172 +380,6 @@ CONTAINS
!
END SUBROUTINE calbec_gamma_acc
!
#if defined(__CUDA)
!-----------------------------------------------------------------------
SUBROUTINE calbec_gamma_cuf ( offload, npw, beta, psi_d, betapsi, nbnd, comm )
!-----------------------------------------------------------------------
!! matrix times matrix with summation index (k=1,npw) running on
!! half of the G-vectors or PWs - assuming k=0 is the G=0 component:
!
!! $$ betapsi(i,j) = 2Re(\sum_k beta^*(i,k)psi(k,j)) + beta^*(i,0)psi(0,j) $$
!
! beta, psi, betapsi, are assumed OpenACC data on GPU
!
USE mp, ONLY : mp_sum, mp_size
!
IMPLICIT NONE
TYPE(offload_kind_acc), INTENT(IN) :: offload
COMPLEX (DP), INTENT (in) :: beta(:,:)
COMPLEX (DP), DEVICE, INTENT (in) :: psi_d(:,:)
REAL (DP), INTENT (out) :: betapsi(:,:)
INTEGER, INTENT (in) :: npw
INTEGER, INTENT (in) :: nbnd
INTEGER, INTENT (in) :: comm
!
INTEGER :: nkb, npwx, m
!
m = nbnd
!
nkb = size (beta, 2)
IF ( nkb == 0 ) RETURN
!
CALL start_clock( 'calbec' )
IF ( npw == 0 ) THEN
!$acc kernels
betapsi(:,:)=0.0_DP
!$acc end kernels
END IF
npwx= size (beta, 1)
IF ( npwx /= size (psi_d, 1) ) CALL errore ('calbec', 'size mismatch', 1)
IF ( npwx < npw ) CALL errore ('calbec', 'size mismatch', 2)
#if defined(DEBUG)
WRITE (*,*) 'calbec gamma'
WRITE (*,*) nkb, size (betapsi,1) , m , size (betapsi, 2)
#endif
IF ( nkb /= size (betapsi,1) .or. m > size (betapsi, 2) ) &
CALL errore ('calbec', 'size mismatch', 3)
!
IF ( m == 1 ) THEN
!
!$acc host_data use_device(beta,betapsi)
CALL MYDGEMV( 'C', 2*npw, nkb, 2.0_DP, beta, 2*npwx, psi_d, 1, 0.0_DP, &
betapsi, 1 )
!$acc end host_data
IF ( gstart == 2 ) THEN
!$acc kernels deviceptr(psi_d)
betapsi(:,1) = betapsi(:,1) - beta(1,:)*psi_d(1,1)
!$acc end kernels
END IF
!
ELSE
!
!$acc host_data use_device(beta,betapsi)
CALL MYDGEMM( 'C', 'N', nkb, m, 2*npw, 2.0_DP, beta, 2*npwx, psi_d, &
2*npwx, 0.0_DP, betapsi, nkb )
!$acc end host_data
IF ( gstart == 2 ) THEN
!$acc host_data use_device(beta,betapsi)
CALL MYDGER( nkb, m, -1.0_DP, beta, 2*npwx, psi_d, 2*npwx, betapsi, nkb )
!$acc end host_data
END IF
!
ENDIF
!
IF (mp_size(comm) > 1) THEN
!$acc host_data use_device(betapsi)
CALL mp_sum( betapsi( :, 1:m ), comm )
!$acc end host_data
END IF
!
CALL stop_clock( 'calbec' )
!
RETURN
!
END SUBROUTINE calbec_gamma_cuf
!
!-----------------------------------------------------------------------
SUBROUTINE calbec_gamma_cuf2 ( offload, npw, beta_d, psi, betapsi, nbnd, comm )
!-----------------------------------------------------------------------
!! matrix times matrix with summation index (k=1,npw) running on
!! half of the G-vectors or PWs - assuming k=0 is the G=0 component:
!
!! $$ betapsi(i,j) = 2Re(\sum_k beta^*(i,k)psi(k,j)) + beta^*(i,0)psi(0,j) $$
!
! beta, psi, betapsi, are assumed OpenACC data on GPU
!
USE mp, ONLY : mp_sum, mp_size
!
IMPLICIT NONE
TYPE(offload_kind_acc), INTENT(IN) :: offload
COMPLEX (DP), DEVICE, INTENT (in) :: beta_d(:,:)
COMPLEX (DP), INTENT (in) :: psi(:,:)
REAL (DP), INTENT (out) :: betapsi(:,:)
INTEGER, INTENT (in) :: npw
INTEGER, INTENT (in) :: nbnd
INTEGER, INTENT (in) :: comm
!
INTEGER :: nkb, npwx, m
!
m = nbnd
!
nkb = size (beta_d, 2)
IF ( nkb == 0 ) RETURN
!
CALL start_clock( 'calbec' )
IF ( npw == 0 ) THEN
!$acc kernels
betapsi(:,:)=0.0_DP
!$acc end kernels
END IF
npwx= size (beta_d, 1)
IF ( npwx /= size (psi, 1) ) CALL errore ('calbec', 'size mismatch', 1)
IF ( npwx < npw ) CALL errore ('calbec', 'size mismatch', 2)
#if defined(DEBUG)
WRITE (*,*) 'calbec gamma'
WRITE (*,*) nkb, size (betapsi,1) , m , size (betapsi, 2)
#endif
IF ( nkb /= size (betapsi,1) .or. m > size (betapsi, 2) ) &
CALL errore ('calbec', 'size mismatch', 3)
!
IF ( m == 1 ) THEN
!
!$acc host_data use_device(psi,betapsi)
CALL MYDGEMV( 'C', 2*npw, nkb, 2.0_DP, beta_d, 2*npwx, psi, 1, 0.0_DP, &
betapsi, 1 )
!$acc end host_data
IF ( gstart == 2 ) THEN
!$acc kernels deviceptr(beta_d)
betapsi(:,1) = betapsi(:,1) - beta_d(1,:)*psi(1,1)
!$acc end kernels
END IF
!
ELSE
!
!$acc host_data use_device(psi,betapsi)
CALL MYDGEMM( 'C', 'N', nkb, m, 2*npw, 2.0_DP, beta_d, 2*npwx, psi, &
2*npwx, 0.0_DP, betapsi, nkb )
!$acc end host_data
IF ( gstart == 2 ) THEN
!$acc host_data use_device(psi,betapsi)
CALL MYDGER( nkb, m, -1.0_DP, beta_d, 2*npwx, psi, 2*npwx, betapsi, nkb )
!$acc end host_data
END IF
!
ENDIF
!
IF (mp_size(comm) > 1) THEN
!$acc host_data use_device(betapsi)
CALL mp_sum( betapsi( :, 1:m ), comm )
!$acc end host_data
END IF
!
CALL stop_clock( 'calbec' )
!
RETURN
!
END SUBROUTINE calbec_gamma_cuf2
#endif
!
!-----------------------------------------------------------------------
SUBROUTINE calbec_gamma_cpu ( offload, npw, beta, psi, betapsi, nbnd, comm )
!-----------------------------------------------------------------------
@ -911,152 +525,6 @@ CONTAINS
!
END SUBROUTINE calbec_k_acc
!
#if defined(__CUDA)
!-----------------------------------------------------------------------
SUBROUTINE calbec_k_cuf ( offload, npw, beta, psi_d, betapsi, nbnd )
!-----------------------------------------------------------------------
!! Matrix times matrix with summation index (k=1,npw) running on
!! G-vectors or PWs:
!! $$ betapsi(i,j) = \sum_k beta^*(i,k) psi(k,j)$$
!
USE mp_bands, ONLY : intra_bgrp_comm
USE mp, ONLY : mp_sum, mp_size
IMPLICIT NONE
TYPE(offload_kind_acc), INTENT(IN) :: offload
COMPLEX (DP), INTENT (in) :: beta(:,:)
COMPLEX (DP), DEVICE, INTENT (in) :: psi_d(:,:)
COMPLEX (DP), INTENT (out) :: betapsi(:,:)
INTEGER, INTENT (in) :: npw
INTEGER, OPTIONAL :: nbnd
!
INTEGER :: nkb, npwx, m
!
nkb = size (beta, 2)
IF ( nkb == 0 ) RETURN
!
CALL start_clock( 'calbec' )
IF ( npw == 0 ) THEN
!$acc kernels
betapsi(:,:)=(0.0_DP,0.0_DP)
!$acc end kernels
END IF
npwx= size (beta, 1)
IF ( npwx /= size (psi_d, 1) ) CALL errore ('calbec', 'size mismatch', 1)
IF ( npwx < npw ) CALL errore ('calbec', 'size mismatch', 2)
IF ( present (nbnd) ) THEN
m = nbnd
ELSE
m = size ( psi_d, 2)
ENDIF
#if defined(DEBUG)
WRITE (*,*) 'calbec k'
WRITE (*,*) nkb, size (betapsi,1) , m , size (betapsi, 2)
#endif
IF ( nkb /= size (betapsi,1) .or. m > size (betapsi, 2) ) &
CALL errore ('calbec', 'size mismatch', 3)
!
IF ( m == 1 ) THEN
!
!$acc host_data use_device(beta, betapsi)
CALL MYZGEMV( 'C', npw, nkb, (1.0_DP,0.0_DP), beta, npwx, psi_d, 1, &
(0.0_DP, 0.0_DP), betapsi, 1 )
!$acc end host_data
!
ELSE
!
!$acc host_data use_device(beta, betapsi)
CALL MYZGEMM( 'C', 'N', nkb, m, npw, (1.0_DP,0.0_DP), &
beta, npwx, psi_d, npwx, (0.0_DP,0.0_DP), betapsi, nkb )
!$acc end host_data
!
ENDIF
!
IF (mp_size(intra_bgrp_comm) > 1) THEN
!$acc host_data use_device(betapsi)
CALL mp_sum( betapsi( :, 1:m ), intra_bgrp_comm )
!$acc end host_data
END IF
!
CALL stop_clock( 'calbec' )
!
RETURN
!
END SUBROUTINE calbec_k_cuf
!
!-----------------------------------------------------------------------
SUBROUTINE calbec_k_cuf2 ( offload, npw, beta_d, psi, betapsi, nbnd )
!-----------------------------------------------------------------------
!! Matrix times matrix with summation index (k=1,npw) running on
!! G-vectors or PWs:
!! $$ betapsi(i,j) = \sum_k beta^*(i,k) psi(k,j)$$
!
USE mp_bands, ONLY : intra_bgrp_comm
USE mp, ONLY : mp_sum, mp_size
IMPLICIT NONE
TYPE(offload_kind_acc), INTENT(IN) :: offload
COMPLEX (DP), DEVICE, INTENT (in) :: beta_d(:,:)
COMPLEX (DP), INTENT (in) :: psi(:,:)
COMPLEX (DP), INTENT (out) :: betapsi(:,:)
INTEGER, INTENT (in) :: npw
INTEGER, OPTIONAL :: nbnd
!
INTEGER :: nkb, npwx, m
!
nkb = size (beta_d, 2)
IF ( nkb == 0 ) RETURN
!
CALL start_clock( 'calbec' )
IF ( npw == 0 ) THEN
!$acc kernels
betapsi(:,:)=(0.0_DP,0.0_DP)
!$acc end kernels
END IF
npwx= size (beta_d, 1)
IF ( npwx /= size (psi, 1) ) CALL errore ('calbec', 'size mismatch', 1)
IF ( npwx < npw ) CALL errore ('calbec', 'size mismatch', 2)
IF ( present (nbnd) ) THEN
m = nbnd
ELSE
m = size ( psi, 2)
ENDIF
#if defined(DEBUG)
WRITE (*,*) 'calbec k'
WRITE (*,*) nkb, size (betapsi,1) , m , size (betapsi, 2)
#endif
IF ( nkb /= size (betapsi,1) .or. m > size (betapsi, 2) ) &
CALL errore ('calbec', 'size mismatch', 3)
!
IF ( m == 1 ) THEN
!
!$acc host_data use_device(psi, betapsi)
CALL MYZGEMV( 'C', npw, nkb, (1.0_DP,0.0_DP), beta_d, npwx, psi, 1, &
(0.0_DP, 0.0_DP), betapsi, 1 )
!$acc end host_data
!
ELSE
!
!$acc host_data use_device(psi, betapsi)
CALL MYZGEMM( 'C', 'N', nkb, m, npw, (1.0_DP,0.0_DP), &
beta_d, npwx, psi, npwx, (0.0_DP,0.0_DP), betapsi, nkb )
!$acc end host_data
!
ENDIF
!
IF (mp_size(intra_bgrp_comm) > 1) THEN
!$acc host_data use_device(betapsi)
CALL mp_sum( betapsi( :, 1:m ), intra_bgrp_comm )
!$acc end host_data
END IF
!
CALL stop_clock( 'calbec' )
!
RETURN
!
END SUBROUTINE calbec_k_cuf2
#endif
!
!-----------------------------------------------------------------------
SUBROUTINE calbec_k_cpu ( offload, npw, beta, psi, betapsi, nbnd )
!-----------------------------------------------------------------------
@ -1201,136 +669,6 @@ CONTAINS
!
END SUBROUTINE calbec_nc_acc
!
#if defined(__CUDA)
!-----------------------------------------------------------------------
SUBROUTINE calbec_nc_cuf ( offload, npw, beta, psi_d, betapsi, nbnd )
!-----------------------------------------------------------------------
!! Matrix times matrix with summation index (k below) running on
!! G-vectors or PWs corresponding to two different polarizations:
!
!! * \(betapsi(i,1,j) = \sum_k=1,npw beta^*(i,k) psi(k,j)\)
!! * \(betapsi(i,2,j) = \sum_k=1,npw beta^*(i,k) psi(k+npwx,j)\)
!
USE mp_bands, ONLY : intra_bgrp_comm
USE mp, ONLY : mp_sum, mp_size
IMPLICIT NONE
TYPE(offload_kind_acc), INTENT(IN) :: offload
COMPLEX (DP), INTENT (in) :: beta(:,:)
COMPLEX (DP), DEVICE, INTENT (in) :: psi_d(:,:)
COMPLEX (DP), INTENT (out) :: betapsi(:,:,:)
INTEGER, INTENT (in) :: npw
INTEGER, OPTIONAL :: nbnd
!
INTEGER :: nkb, npwx, npol, m
!
nkb = size (beta, 2)
IF ( nkb == 0 ) RETURN
!
CALL start_clock ('calbec')
IF ( npw == 0 ) THEN
!$acc kernels
betapsi(:,:,:)=(0.0_DP,0.0_DP)
!$acc end kernels
END IF
npwx= size (beta, 1)
IF ( 2*npwx /= size (psi_d, 1) ) CALL errore ('calbec', 'size mismatch', 1)
IF ( npwx < npw ) CALL errore ('calbec', 'size mismatch', 2)
IF ( present (nbnd) ) THEN
m = nbnd
ELSE
m = size ( psi_d, 2)
ENDIF
npol= size (betapsi, 2)
#if defined(DEBUG)
WRITE (*,*) 'calbec nc'
WRITE (*,*) nkb, size (betapsi,1) , m , size (betapsi, 3)
#endif
IF ( nkb /= size (betapsi,1) .or. m > size (betapsi, 3) ) &
CALL errore ('calbec', 'size mismatch', 3)
!
!$acc host_data use_device(beta, betapsi)
CALL MYZGEMM ('C', 'N', nkb, m*npol, npw, (1.0_DP, 0.0_DP), beta, &
npwx, psi_d, npwx, (0.0_DP, 0.0_DP), betapsi, nkb)
!$acc end host_data
!
IF (mp_size(intra_bgrp_comm) > 1) THEN
!$acc host_data use_device(betapsi)
CALL mp_sum( betapsi( :, :, 1:m ), intra_bgrp_comm )
!$acc end host_data
END IF
!
CALL stop_clock( 'calbec' )
!
RETURN
!
END SUBROUTINE calbec_nc_cuf
!
!-----------------------------------------------------------------------
SUBROUTINE calbec_nc_cuf2 ( offload, npw, beta_d, psi, betapsi, nbnd )
!-----------------------------------------------------------------------
!! Matrix times matrix with summation index (k below) running on
!! G-vectors or PWs corresponding to two different polarizations:
!
!! * \(betapsi(i,1,j) = \sum_k=1,npw beta^*(i,k) psi(k,j)\)
!! * \(betapsi(i,2,j) = \sum_k=1,npw beta^*(i,k) psi(k+npwx,j)\)
!
USE mp_bands, ONLY : intra_bgrp_comm
USE mp, ONLY : mp_sum, mp_size
IMPLICIT NONE
TYPE(offload_kind_acc), INTENT(IN) :: offload
COMPLEX (DP), DEVICE, INTENT (in) :: beta_d(:,:)
COMPLEX (DP), INTENT (in) :: psi(:,:)
COMPLEX (DP), INTENT (out) :: betapsi(:,:,:)
INTEGER, INTENT (in) :: npw
INTEGER, OPTIONAL :: nbnd
!
INTEGER :: nkb, npwx, npol, m
!
nkb = size (beta_d, 2)
IF ( nkb == 0 ) RETURN
!
CALL start_clock ('calbec')
IF ( npw == 0 ) THEN
!$acc kernels
betapsi(:,:,:)=(0.0_DP,0.0_DP)
!$acc end kernels
END IF
npwx= size (beta_d, 1)
IF ( 2*npwx /= size (psi, 1) ) CALL errore ('calbec', 'size mismatch', 1)
IF ( npwx < npw ) CALL errore ('calbec', 'size mismatch', 2)
IF ( present (nbnd) ) THEN
m = nbnd
ELSE
m = size ( psi, 2)
ENDIF
npol= size (betapsi, 2)
#if defined(DEBUG)
WRITE (*,*) 'calbec nc'
WRITE (*,*) nkb, size (betapsi,1) , m , size (betapsi, 3)
#endif
IF ( nkb /= size (betapsi,1) .or. m > size (betapsi, 3) ) &
CALL errore ('calbec', 'size mismatch', 3)
!
!$acc host_data use_device(psi, betapsi)
CALL MYZGEMM ('C', 'N', nkb, m*npol, npw, (1.0_DP, 0.0_DP), beta_d, &
npwx, psi, npwx, (0.0_DP, 0.0_DP), betapsi, nkb)
!$acc end host_data
!
IF (mp_size(intra_bgrp_comm) > 1) THEN
!$acc host_data use_device(betapsi)
CALL mp_sum( betapsi( :, :, 1:m ), intra_bgrp_comm )
!$acc end host_data
END IF
!
CALL stop_clock( 'calbec' )
!
RETURN
!
END SUBROUTINE calbec_nc_cuf2
#endif
!
!-----------------------------------------------------------------------
SUBROUTINE calbec_nc_cpu ( offload, npw, beta, psi, betapsi, nbnd )
!-----------------------------------------------------------------------