mirror of https://gitlab.com/QEF/q-e.git
Moved becp to standard duplicated modules. Initial porting of becmod subroutines, but still a WIP! Other minor changes.
This commit is contained in:
parent
4f14b53cba
commit
ffaf3612e4
|
@ -0,0 +1,317 @@
|
|||
!
|
||||
! Copyright (C) 2002-2011 Quantum ESPRESSO group
|
||||
! This file is distributed under the terms of the
|
||||
! GNU General Public License. See the file `License'
|
||||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
#define DIMS2D(my_array) lbound(my_array,1):ubound(my_array,1),lbound(my_array,2):ubound(my_array,2)
|
||||
!=----------------------------------------------------------------------------=!
|
||||
MODULE becmod_gpum
|
||||
!=----------------------------------------------------------------------------=!
|
||||
USE kinds, ONLY : DP
|
||||
|
||||
IMPLICIT NONE
|
||||
SAVE
|
||||
!
|
||||
TYPE bec_type_d
|
||||
#if defined(__CUDA)
|
||||
REAL(DP), ALLOCATABLE, DEVICE :: r_d(:, :)
|
||||
#else
|
||||
REAL(DP), ALLOCATABLE :: r_d(:, :)
|
||||
#endif
|
||||
#if defined(__CUDA)
|
||||
COMPLEX(DP), ALLOCATABLE, DEVICE :: k_d(:, :)
|
||||
#else
|
||||
COMPLEX(DP), ALLOCATABLE :: k_d(:, :)
|
||||
#endif
|
||||
#if defined(__CUDA)
|
||||
COMPLEX(DP), ALLOCATABLE, DEVICE :: nc_d(:, :, :)
|
||||
#else
|
||||
COMPLEX(DP), ALLOCATABLE :: nc_d(:, :, :)
|
||||
#endif
|
||||
INTEGER :: comm
|
||||
INTEGER :: nbnd
|
||||
INTEGER :: nproc
|
||||
INTEGER :: mype
|
||||
INTEGER :: nbnd_loc
|
||||
INTEGER :: ibnd_begin
|
||||
END TYPE bec_type_d
|
||||
!
|
||||
TYPE (bec_type_d), TARGET :: becp_d ! <beta|psi>
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
|
||||
LOGICAL :: becp_r_ood = .false. ! used to flag out of date variables
|
||||
LOGICAL :: becp_d_r_d_ood = .false. ! used to flag out of date variables
|
||||
LOGICAL :: becp_k_ood = .false. ! used to flag out of date variables
|
||||
LOGICAL :: becp_d_k_d_ood = .false. ! used to flag out of date variables
|
||||
LOGICAL :: becp_nc_ood = .false. ! used to flag out of date variables
|
||||
LOGICAL :: becp_d_nc_d_ood = .false. ! used to flag out of date variables
|
||||
!
|
||||
#endif
|
||||
CONTAINS
|
||||
!
|
||||
SUBROUTINE using_becp_r(intento)
|
||||
!
|
||||
! intento is used to specify what the variable will be used for :
|
||||
! 0 -> in , the variable needs to be synchronized but won't be changed
|
||||
! 1 -> inout , the variable needs to be synchronized AND will be changed
|
||||
! 2 -> out , NO NEED to synchronize the variable, everything will be overwritten
|
||||
!
|
||||
USE becmod, ONLY : becp
|
||||
implicit none
|
||||
INTEGER, INTENT(IN) :: intento
|
||||
INTEGER :: intento_
|
||||
intento_ = intento
|
||||
#if defined(__CUDA)
|
||||
!
|
||||
IF (becp_r_ood) THEN
|
||||
IF (.not. allocated(becp_d%r_d)) THEN
|
||||
CALL errore('using_r_d', 'PANIC: sync of becp%r from becp_d%r_d with unallocated array. Bye!!', 1)
|
||||
stop
|
||||
END IF
|
||||
IF (.not. allocated(becp%r)) THEN
|
||||
IF (intento_ /= 2) THEN
|
||||
print *, "WARNING: sync of becp%r with unallocated array and intento /= 2? Changed to 2!"
|
||||
intento_ = 2
|
||||
END IF
|
||||
END IF
|
||||
IF (intento_ < 2) THEN
|
||||
print *, "Really copied becp%r D->H"
|
||||
becp%r = becp_d%r_d
|
||||
! update auxiliary variables
|
||||
becp%comm = becp_d%comm
|
||||
becp%nbnd = becp_d%nbnd
|
||||
becp%nproc = becp_d%nproc
|
||||
becp%mype = becp_d%mype
|
||||
becp%nbnd_loc = becp_d%nbnd_loc
|
||||
becp%ibnd_begin = becp_d%ibnd_begin
|
||||
!
|
||||
END IF
|
||||
becp_r_ood = .false.
|
||||
ENDIF
|
||||
IF (intento_ > 0) becp_d_r_d_ood = .true.
|
||||
#endif
|
||||
END SUBROUTINE using_becp_r
|
||||
!
|
||||
SUBROUTINE using_becp_r_d(intento)
|
||||
!
|
||||
USE becmod, ONLY : becp
|
||||
implicit none
|
||||
INTEGER, INTENT(IN) :: intento
|
||||
#if defined(__CUDA)
|
||||
!
|
||||
IF (.not. allocated(becp%r)) THEN
|
||||
IF (intento /= 2) print *, "WARNING: sync of becp%r_d with unallocated array and intento /= 2?"
|
||||
IF (allocated(becp_d%r_d)) DEALLOCATE(becp_d%r_d)
|
||||
becp_d_r_d_ood = .false.
|
||||
RETURN
|
||||
END IF
|
||||
! here we know that r is allocated, check if size is 0
|
||||
IF ( SIZE(becp%r) == 0 ) THEN
|
||||
print *, "Refusing to allocate 0 dimensional array becp_d%r_d. If used, code will crash."
|
||||
RETURN
|
||||
END IF
|
||||
!
|
||||
IF (becp_d_r_d_ood) THEN
|
||||
IF ( allocated(becp_d%r_d) .and. (SIZE(becp_d%r_d)/=SIZE(becp%r))) deallocate(becp_d%r_d)
|
||||
IF (.not. allocated(becp_d%r_d)) ALLOCATE(becp_d%r_d, MOLD=becp%r) ! this copy may be avoided
|
||||
IF (intento < 2) THEN
|
||||
print *, "Really copied becp%r H->D"
|
||||
becp_d%r_d = becp%r
|
||||
! update auxiliary variables
|
||||
becp_d%comm = becp%comm
|
||||
becp_d%nbnd = becp%nbnd
|
||||
becp_d%nproc = becp%nproc
|
||||
becp_d%mype = becp%mype
|
||||
becp_d%nbnd_loc = becp%nbnd_loc
|
||||
becp_d%ibnd_begin = becp%ibnd_begin
|
||||
!
|
||||
END IF
|
||||
becp_d_r_d_ood = .false.
|
||||
ENDIF
|
||||
IF (intento > 0) becp_r_ood = .true.
|
||||
#else
|
||||
CALL errore('using_becp_d%r_d', 'Trying to use device data without device compilated code!', 1)
|
||||
#endif
|
||||
END SUBROUTINE using_becp_r_d
|
||||
!
|
||||
SUBROUTINE using_becp_k(intento)
|
||||
!
|
||||
! intento is used to specify what the variable will be used for :
|
||||
! 0 -> in , the variable needs to be synchronized but won't be changed
|
||||
! 1 -> inout , the variable needs to be synchronized AND will be changed
|
||||
! 2 -> out , NO NEED to synchronize the variable, everything will be overwritten
|
||||
!
|
||||
USE becmod, ONLY : becp
|
||||
implicit none
|
||||
INTEGER, INTENT(IN) :: intento
|
||||
INTEGER :: intento_
|
||||
intento_ = intento
|
||||
#if defined(__CUDA)
|
||||
!
|
||||
IF (becp_k_ood) THEN
|
||||
IF (.not. allocated(becp_d%k_d)) THEN
|
||||
CALL errore('using_k_d', 'PANIC: sync of becp%k from becp_d%k_d with unallocated array. Bye!!', 1)
|
||||
stop
|
||||
END IF
|
||||
IF (.not. allocated(becp%k)) THEN
|
||||
IF (intento_ /= 2) THEN
|
||||
print *, "WARNING: sync of becp%k with unallocated array and intento /= 2? Changed to 2!"
|
||||
intento_ = 2
|
||||
END IF
|
||||
END IF
|
||||
IF (intento_ < 2) THEN
|
||||
print *, "Really copied becp%k D->H"
|
||||
becp%k = becp_d%k_d
|
||||
! update auxiliary variables
|
||||
becp%comm = becp_d%comm
|
||||
becp%nbnd = becp_d%nbnd
|
||||
becp%nproc = becp_d%nproc
|
||||
becp%mype = becp_d%mype
|
||||
becp%nbnd_loc = becp_d%nbnd_loc
|
||||
becp%ibnd_begin = becp_d%ibnd_begin
|
||||
!
|
||||
END IF
|
||||
becp_k_ood = .false.
|
||||
ENDIF
|
||||
IF (intento_ > 0) becp_d_k_d_ood = .true.
|
||||
#endif
|
||||
END SUBROUTINE using_becp_k
|
||||
!
|
||||
SUBROUTINE using_becp_k_d(intento)
|
||||
!
|
||||
USE becmod, ONLY : becp
|
||||
implicit none
|
||||
INTEGER, INTENT(IN) :: intento
|
||||
#if defined(__CUDA)
|
||||
!
|
||||
IF (.not. allocated(becp%k)) THEN
|
||||
IF (intento /= 2) print *, "WARNING: sync of becp%k_d with unallocated array and intento /= 2?"
|
||||
IF (allocated(becp_d%k_d)) DEALLOCATE(becp_d%k_d)
|
||||
becp_d_k_d_ood = .false.
|
||||
RETURN
|
||||
END IF
|
||||
! here we know that k is allocated, check if size is 0
|
||||
IF ( SIZE(becp%k) == 0 ) THEN
|
||||
print *, "Refusing to allocate 0 dimensional array becp_d%k_d. If used, code will crash."
|
||||
RETURN
|
||||
END IF
|
||||
!
|
||||
IF (becp_d_k_d_ood) THEN
|
||||
IF ( allocated(becp_d%k_d) .and. (SIZE(becp_d%k_d)/=SIZE(becp%k))) deallocate(becp_d%k_d)
|
||||
IF (.not. allocated(becp_d%k_d)) ALLOCATE(becp_d%k_d, MOLD=becp%k) ! this copy may be avoided
|
||||
IF (intento < 2) THEN
|
||||
print *, "Really copied becp%k H->D"
|
||||
becp_d%k_d = becp%k
|
||||
! update auxiliary variables
|
||||
becp_d%comm = becp%comm
|
||||
becp_d%nbnd = becp%nbnd
|
||||
becp_d%nproc = becp%nproc
|
||||
becp_d%mype = becp%mype
|
||||
becp_d%nbnd_loc = becp%nbnd_loc
|
||||
becp_d%ibnd_begin = becp%ibnd_begin
|
||||
!
|
||||
END IF
|
||||
becp_d_k_d_ood = .false.
|
||||
ENDIF
|
||||
IF (intento > 0) becp_k_ood = .true.
|
||||
#else
|
||||
CALL errore('using_becp_d%k_d', 'Trying to use device data without device compilated code!', 1)
|
||||
#endif
|
||||
END SUBROUTINE using_becp_k_d
|
||||
!
|
||||
SUBROUTINE using_becp_nc(intento)
|
||||
!
|
||||
! intento is used to specify what the variable will be used for :
|
||||
! 0 -> in , the variable needs to be synchronized but won't be changed
|
||||
! 1 -> inout , the variable needs to be synchronized AND will be changed
|
||||
! 2 -> out , NO NEED to synchronize the variable, everything will be overwritten
|
||||
!
|
||||
USE becmod, ONLY : becp
|
||||
implicit none
|
||||
INTEGER, INTENT(IN) :: intento
|
||||
INTEGER :: intento_
|
||||
intento_ = intento
|
||||
#if defined(__CUDA)
|
||||
!
|
||||
IF (becp_nc_ood) THEN
|
||||
IF (.not. allocated(becp_d%nc_d)) THEN
|
||||
CALL errore('using_nc_d', 'PANIC: sync of becp%nc from becp_d%nc_d with unallocated array. Bye!!', 1)
|
||||
stop
|
||||
END IF
|
||||
IF (.not. allocated(becp%nc)) THEN
|
||||
IF (intento_ /= 2) THEN
|
||||
print *, "WARNING: sync of becp%nc with unallocated array and intento /= 2? Changed to 2!"
|
||||
intento_ = 2
|
||||
END IF
|
||||
END IF
|
||||
IF (intento_ < 2) THEN
|
||||
print *, "Really copied becp%nc D->H"
|
||||
becp%nc = becp_d%nc_d
|
||||
! update auxiliary variables
|
||||
becp%comm = becp_d%comm
|
||||
becp%nbnd = becp_d%nbnd
|
||||
becp%nproc = becp_d%nproc
|
||||
becp%mype = becp_d%mype
|
||||
becp%nbnd_loc = becp_d%nbnd_loc
|
||||
becp%ibnd_begin = becp_d%ibnd_begin
|
||||
!
|
||||
END IF
|
||||
becp_nc_ood = .false.
|
||||
ENDIF
|
||||
IF (intento_ > 0) becp_d_nc_d_ood = .true.
|
||||
#endif
|
||||
END SUBROUTINE using_becp_nc
|
||||
!
|
||||
SUBROUTINE using_becp_nc_d(intento)
|
||||
!
|
||||
USE becmod, ONLY : becp
|
||||
implicit none
|
||||
INTEGER, INTENT(IN) :: intento
|
||||
#if defined(__CUDA)
|
||||
!
|
||||
IF (.not. allocated(becp%nc)) THEN
|
||||
IF (intento /= 2) print *, "WARNING: sync of becp%nc_d with unallocated array and intento /= 2?"
|
||||
IF (allocated(becp_d%nc_d)) DEALLOCATE(becp_d%nc_d)
|
||||
becp_d_nc_d_ood = .false.
|
||||
RETURN
|
||||
END IF
|
||||
! here we know that nc is allocated, check if size is 0
|
||||
IF ( SIZE(becp%nc) == 0 ) THEN
|
||||
print *, "Refusing to allocate 0 dimensional array becp_d%nc_d. If used, code will crash."
|
||||
RETURN
|
||||
END IF
|
||||
!
|
||||
IF (becp_d_nc_d_ood) THEN
|
||||
IF ( allocated(becp_d%nc_d) .and. (SIZE(becp_d%nc_d)/=SIZE(becp%nc))) deallocate(becp_d%nc_d)
|
||||
IF (.not. allocated(becp_d%nc_d)) ALLOCATE(becp_d%nc_d, MOLD=becp%nc) ! this copy may be avoided
|
||||
IF (intento < 2) THEN
|
||||
print *, "Really copied becp%nc H->D"
|
||||
becp_d%nc_d = becp%nc
|
||||
! update auxiliary variables
|
||||
becp_d%comm = becp%comm
|
||||
becp_d%nbnd = becp%nbnd
|
||||
becp_d%nproc = becp%nproc
|
||||
becp_d%mype = becp%mype
|
||||
becp_d%nbnd_loc = becp%nbnd_loc
|
||||
becp_d%ibnd_begin = becp%ibnd_begin
|
||||
!
|
||||
END IF
|
||||
becp_d_nc_d_ood = .false.
|
||||
ENDIF
|
||||
IF (intento > 0) becp_nc_ood = .true.
|
||||
#else
|
||||
CALL errore('using_becp_d%nc_d', 'Trying to use device data without device compilated code!', 1)
|
||||
#endif
|
||||
END SUBROUTINE using_becp_nc_d
|
||||
!
|
||||
SUBROUTINE deallocate_becmod_gpu
|
||||
IF( ALLOCATED( becp_d%r_d ) ) DEALLOCATE( becp_d%r_d )
|
||||
IF( ALLOCATED( becp_d%k_d ) ) DEALLOCATE( becp_d%k_d )
|
||||
IF( ALLOCATED( becp_d%nc_d ) ) DEALLOCATE( becp_d%nc_d )
|
||||
END SUBROUTINE deallocate_becmod_gpu
|
||||
!=----------------------------------------------------------------------------=!
|
||||
END MODULE becmod_gpum
|
||||
!=----------------------------------------------------------------------------=!
|
|
@ -0,0 +1,517 @@
|
|||
!
|
||||
! Copyright (C) 2001-2007 PWSCF group
|
||||
! This file is distributed under the terms of the
|
||||
! GNU General Public License. See the file `License'
|
||||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
MODULE becmod_subs_gpum
|
||||
|
||||
! NOTA BENE : THE SUBROUTINES IN THIS FILE HAVE NOT BEEN TESTED YET!!!!!!!
|
||||
|
||||
!
|
||||
! ... *bec* contain <beta|psi> - used in h_psi, s_psi, many other places
|
||||
! ... calbec( npw, beta, psi, betapsi [, nbnd ] ) is an interface calculating
|
||||
! ... betapsi(i,j) = <beta(i)|psi(j)> (the sum is over npw components)
|
||||
! ... or betapsi(i,s,j)= <beta(i)|psi(s,j)> (s=polarization index)
|
||||
!
|
||||
USE kinds, ONLY : DP
|
||||
USE control_flags, ONLY : gamma_only, smallmem
|
||||
USE gvect, ONLY : gstart
|
||||
USE noncollin_module, ONLY : noncolin, npol
|
||||
USE becmod_gpum, ONLY : bec_type_d
|
||||
!
|
||||
SAVE
|
||||
!
|
||||
PRIVATE
|
||||
!
|
||||
INTERFACE calbec_gpu
|
||||
!
|
||||
MODULE PROCEDURE calbec_k_gpu, calbec_gamma_gpu, calbec_gamma_nocomm_gpu, calbec_nc_gpu, calbec_bec_type_gpu
|
||||
!
|
||||
END INTERFACE
|
||||
|
||||
INTERFACE becscal_gpu
|
||||
!
|
||||
MODULE PROCEDURE becscal_nck_gpu, becscal_gamma_gpu
|
||||
!
|
||||
END INTERFACE
|
||||
!
|
||||
PUBLIC :: allocate_bec_type_gpu, deallocate_bec_type_gpu, calbec_gpu, &
|
||||
beccopy_gpu, becscal_gpu, is_allocated_bec_type_gpu, &
|
||||
using_becp_auto, using_becp_d_auto
|
||||
!
|
||||
CONTAINS
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE calbec_bec_type_gpu ( npw, beta_d, psi_d, betapsi_d, nbnd )
|
||||
!-----------------------------------------------------------------------
|
||||
!_
|
||||
USE mp_bands, ONLY: intra_bgrp_comm
|
||||
USE mp, ONLY: mp_get_comm_null
|
||||
!
|
||||
IMPLICIT NONE
|
||||
COMPLEX (DP), INTENT (in) :: beta_d(:,:), psi_d(:,:)
|
||||
TYPE (bec_type_d), TARGET, INTENT (inout) :: betapsi_d ! NB: must be INOUT otherwise
|
||||
! the allocatd array is lost
|
||||
INTEGER, INTENT (in) :: npw
|
||||
INTEGER, OPTIONAL :: nbnd
|
||||
#if defined(__CUDA)
|
||||
attributes(DEVICE) :: beta_d, psi_d
|
||||
#endif
|
||||
!
|
||||
INTEGER :: local_nbnd
|
||||
INTEGER, EXTERNAL :: ldim_block, gind_block
|
||||
INTEGER :: m_loc, m_begin, ip
|
||||
REAL(DP), ALLOCATABLE :: dtmp_d(:,:) ! replace this with buffers !
|
||||
INTEGER :: i, j, npwx
|
||||
REAL(DP), POINTER :: betapsi_d_r_d(:,:)
|
||||
#if defined(__CUDA)
|
||||
attributes(DEVICE) :: dtmp_d, betapsi_d_r_d
|
||||
#endif
|
||||
!
|
||||
IF ( present (nbnd) ) THEN
|
||||
local_nbnd = nbnd
|
||||
ELSE
|
||||
local_nbnd = size ( psi_d, 2)
|
||||
ENDIF
|
||||
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
IF( betapsi_d%comm == mp_get_comm_null() ) THEN
|
||||
!
|
||||
CALL calbec_gamma_gpu ( npw, beta_d, psi_d, betapsi_d%r_d, local_nbnd, intra_bgrp_comm )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
ALLOCATE( dtmp_d( SIZE( betapsi_d%r_d, 1 ), SIZE( betapsi_d%r_d, 2 ) ) )
|
||||
!
|
||||
DO ip = 0, betapsi_d%nproc - 1
|
||||
m_loc = ldim_block( betapsi_d%nbnd , betapsi_d%nproc, ip )
|
||||
m_begin = gind_block( 1, betapsi_d%nbnd, betapsi_d%nproc, ip )
|
||||
IF( ( m_begin + m_loc - 1 ) > local_nbnd ) m_loc = local_nbnd - m_begin + 1
|
||||
IF( m_loc > 0 ) THEN
|
||||
CALL calbec_gamma_gpu ( npw, beta_d, psi_d(:,m_begin:m_begin+m_loc-1), dtmp_d, m_loc, betapsi_d%comm )
|
||||
IF( ip == betapsi_d%mype ) THEN
|
||||
npwx = SIZE( betapsi_d%r_d, 1 )
|
||||
betapsi_d_r_d => betapsi_d%r_d
|
||||
!$cuf kernel do(2) <<<*,*>>>
|
||||
DO j=1,m_loc
|
||||
DO i=1, npwx
|
||||
betapsi_d_r_d(i,j) = dtmp_d(i,j)
|
||||
END DO
|
||||
END DO
|
||||
END IF
|
||||
END IF
|
||||
END DO
|
||||
|
||||
DEALLOCATE( dtmp_d )
|
||||
!
|
||||
END IF
|
||||
!
|
||||
ELSEIF ( noncolin) THEN
|
||||
!
|
||||
CALL calbec_nc_gpu ( npw, beta_d, psi_d, betapsi_d%nc_d, local_nbnd )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
CALL calbec_k_gpu ( npw, beta_d, psi_d, betapsi_d%k_d, local_nbnd )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE calbec_bec_type_gpu
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE calbec_gamma_nocomm_gpu ( npw, beta_d, psi_d, betapsi_d, nbnd )
|
||||
!-----------------------------------------------------------------------
|
||||
USE mp_bands, ONLY: intra_bgrp_comm
|
||||
IMPLICIT NONE
|
||||
COMPLEX (DP), INTENT (in) :: beta_d(:,:), psi_d(:,:)
|
||||
REAL (DP), INTENT (out) :: betapsi_d(:,:)
|
||||
INTEGER, INTENT (in) :: npw
|
||||
INTEGER, OPTIONAL :: nbnd
|
||||
INTEGER :: m
|
||||
#if defined(__CUDA)
|
||||
attributes(DEVICE) :: beta_d, psi_d, betapsi_d
|
||||
#endif
|
||||
IF ( present (nbnd) ) THEN
|
||||
m = nbnd
|
||||
ELSE
|
||||
m = size ( psi_d, 2)
|
||||
ENDIF
|
||||
CALL calbec_gamma_gpu ( npw, beta_d, psi_d, betapsi_d, m, intra_bgrp_comm )
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE calbec_gamma_nocomm_gpu
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE calbec_gamma_gpu ( npw, beta_d, psi_d, betapsi_d, nbnd, comm )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
! ... matrix times matrix with summation index (k=1,npw) running on
|
||||
! ... half of the G-vectors or PWs - assuming k=0 is the G=0 component:
|
||||
! ... betapsi(i,j) = 2Re(\sum_k beta^*(i,k)psi(k,j)) + beta^*(i,0)psi(0,j)
|
||||
!
|
||||
USE mp, ONLY : mp_sum
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
USE cublas
|
||||
#endif
|
||||
IMPLICIT NONE
|
||||
COMPLEX (DP), INTENT (in) :: beta_d(:,:), psi_d(:,:)
|
||||
REAL (DP), INTENT (out) :: betapsi_d(:,:)
|
||||
INTEGER, INTENT (in) :: npw
|
||||
INTEGER, INTENT (in) :: nbnd
|
||||
INTEGER, INTENT (in) :: comm
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
attributes(DEVICE) :: beta_d, psi_d, betapsi_d
|
||||
#endif
|
||||
INTEGER :: nkb, npwx, m
|
||||
INTEGER :: i,j
|
||||
!
|
||||
m = nbnd
|
||||
!
|
||||
nkb = size (beta_d, 2)
|
||||
IF ( nkb == 0 ) RETURN
|
||||
!
|
||||
CALL start_clock( 'calbec' )
|
||||
IF ( npw == 0 ) betapsi_d(:,:)=0.0_DP
|
||||
npwx= size (beta_d, 1)
|
||||
IF ( npwx /= size (psi_d, 1) ) CALL errore ('calbec', 'size mismatch', 1)
|
||||
IF ( npwx < npw ) CALL errore ('calbec', 'size mismatch', 2)
|
||||
#if defined(DEBUG)
|
||||
WRITE (*,*) 'calbec gamma'
|
||||
WRITE (*,*) nkb, size (betapsi_d,1) , m , size (betapsi_d, 2)
|
||||
#endif
|
||||
IF ( nkb /= size (betapsi_d,1) .or. m > size (betapsi_d, 2) ) &
|
||||
CALL errore ('calbec', 'size mismatch', 3)
|
||||
!
|
||||
IF ( m == 1 ) THEN
|
||||
!
|
||||
CALL cudaDGEMV( 'C', 2*npw, nkb, 2.0_DP, beta_d, 2*npwx, psi_d, 1, 0.0_DP, &
|
||||
betapsi_d, 1 )
|
||||
IF ( gstart == 2 ) THEN
|
||||
!betapsi_d(:,1) = betapsi_d(:,1) - beta_d(1,:)*psi_d(1,1)
|
||||
!$cuf kernel do(1) <<<*,*>>>
|
||||
DO i=1, nkb
|
||||
betapsi_d(i,1) = betapsi_d(i,1) - DBLE(beta_d(1,i)*psi_d(1,1))
|
||||
END DO
|
||||
END IF
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
CALL DGEMM( 'C', 'N', nkb, m, 2*npw, 2.0_DP, beta_d, 2*npwx, psi_d, &
|
||||
2*npwx, 0.0_DP, betapsi_d, nkb )
|
||||
IF ( gstart == 2 ) &
|
||||
CALL cudaDGER( nkb, m, -1.0_DP, beta_d, 2*npwx, psi_d, 2*npwx, betapsi_d, nkb )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
CALL mp_sum( betapsi_d( :, 1:m ), comm )
|
||||
!
|
||||
CALL stop_clock( 'calbec' )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE calbec_gamma_gpu
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE calbec_k_gpu ( npw, beta_d, psi_d, betapsi_d, nbnd )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
! ... matrix times matrix with summation index (k=1,npw) running on
|
||||
! ... G-vectors or PWs : betapsi(i,j) = \sum_k beta^*(i,k) psi(k,j)
|
||||
!
|
||||
USE mp_bands, ONLY : intra_bgrp_comm
|
||||
USE mp, ONLY : mp_sum
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
USE cublas
|
||||
#endif
|
||||
IMPLICIT NONE
|
||||
COMPLEX (DP), INTENT (in) :: beta_d(:,:), psi_d(:,:)
|
||||
COMPLEX (DP), INTENT (out) :: betapsi_d(:,:)
|
||||
INTEGER, INTENT (in) :: npw
|
||||
INTEGER, OPTIONAL :: nbnd
|
||||
!
|
||||
INTEGER :: nkb, npwx, m
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
attributes(device) :: beta_d, psi_d, betapsi_d
|
||||
#endif
|
||||
nkb = size (beta_d, 2)
|
||||
IF ( nkb == 0 ) RETURN
|
||||
!
|
||||
CALL start_clock( 'calbec' )
|
||||
IF ( npw == 0 ) betapsi_d(:,:)=(0.0_DP,0.0_DP)
|
||||
npwx= size (beta_d, 1)
|
||||
IF ( npwx /= size (psi_d, 1) ) CALL errore ('calbec', 'size mismatch', 1)
|
||||
IF ( npwx < npw ) CALL errore ('calbec', 'size mismatch', 2)
|
||||
IF ( present (nbnd) ) THEN
|
||||
m = nbnd
|
||||
ELSE
|
||||
m = size ( psi_d, 2)
|
||||
ENDIF
|
||||
#if defined(DEBUG)
|
||||
WRITE (*,*) 'calbec k'
|
||||
WRITE (*,*) nkb, size (betapsi_d,1) , m , size (betapsi_d, 2)
|
||||
#endif
|
||||
IF ( nkb /= size (betapsi_d,1) .or. m > size (betapsi_d, 2) ) &
|
||||
CALL errore ('calbec', 'size mismatch', 3)
|
||||
!
|
||||
IF ( m == 1 ) THEN
|
||||
!
|
||||
CALL ZGEMV( 'C', npw, nkb, (1.0_DP,0.0_DP), beta_d, npwx, psi_d, 1, &
|
||||
(0.0_DP, 0.0_DP), betapsi_d, 1 )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
CALL ZGEMM( 'C', 'N', nkb, m, npw, (1.0_DP,0.0_DP), &
|
||||
beta_d, npwx, psi_d, npwx, (0.0_DP,0.0_DP), betapsi_d, nkb )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
CALL mp_sum( betapsi_d( :, 1:m ), intra_bgrp_comm )
|
||||
!
|
||||
CALL stop_clock( 'calbec' )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE calbec_k_gpu
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE calbec_nc_gpu ( npw, beta_d, psi_d, betapsi_d, nbnd )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
! ... matrix times matrix with summation index (k below) running on
|
||||
! ... G-vectors or PWs corresponding to two different polarizations:
|
||||
! ... betapsi(i,1,j) = \sum_k=1,npw beta^*(i,k) psi(k,j)
|
||||
! ... betapsi(i,2,j) = \sum_k=1,npw beta^*(i,k) psi(k+npwx,j)
|
||||
!
|
||||
USE mp_bands, ONLY : intra_bgrp_comm
|
||||
USE mp, ONLY : mp_sum
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
USE cublas
|
||||
#endif
|
||||
IMPLICIT NONE
|
||||
COMPLEX (DP), INTENT (in) :: beta_d(:,:), psi_d(:,:)
|
||||
COMPLEX (DP), INTENT (out) :: betapsi_d(:,:,:)
|
||||
INTEGER, INTENT (in) :: npw
|
||||
INTEGER, OPTIONAL :: nbnd
|
||||
!
|
||||
INTEGER :: nkb, npwx, npol, m
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
attributes(device) :: beta_d, psi_d, betapsi_d
|
||||
#endif
|
||||
nkb = size (beta_d, 2)
|
||||
IF ( nkb == 0 ) RETURN
|
||||
!
|
||||
CALL start_clock ('calbec')
|
||||
IF ( npw == 0 ) betapsi_d(:,:,:)=(0.0_DP,0.0_DP)
|
||||
npwx= size (beta_d, 1)
|
||||
IF ( 2*npwx /= size (psi_d, 1) ) CALL errore ('calbec', 'size mismatch', 1)
|
||||
IF ( npwx < npw ) CALL errore ('calbec', 'size mismatch', 2)
|
||||
IF ( present (nbnd) ) THEN
|
||||
m = nbnd
|
||||
ELSE
|
||||
m = size ( psi_d, 2)
|
||||
ENDIF
|
||||
npol= size (betapsi_d, 2)
|
||||
#if defined(DEBUG)
|
||||
WRITE (*,*) 'calbec nc'
|
||||
WRITE (*,*) nkb, size (betapsi_d,1) , m , size (betapsi_d, 3)
|
||||
#endif
|
||||
IF ( nkb /= size (betapsi_d,1) .or. m > size (betapsi_d, 3) ) &
|
||||
CALL errore ('calbec', 'size mismatch', 3)
|
||||
!
|
||||
CALL ZGEMM ('C', 'N', nkb, m*npol, npw, (1.0_DP, 0.0_DP), beta_d, &
|
||||
npwx, psi_d, npwx, (0.0_DP, 0.0_DP), betapsi_d, nkb)
|
||||
!
|
||||
CALL mp_sum( betapsi_d( :, :, 1:m ), intra_bgrp_comm )
|
||||
!
|
||||
CALL stop_clock( 'calbec' )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE calbec_nc_gpu
|
||||
!
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
FUNCTION is_allocated_bec_type_gpu (bec_d) RESULT (isalloc)
|
||||
!-----------------------------------------------------------------------
|
||||
IMPLICIT NONE
|
||||
TYPE (bec_type_d) :: bec_d
|
||||
LOGICAL :: isalloc
|
||||
isalloc = (allocated(bec_d%r_d) .or. allocated(bec_d%nc_d) .or. allocated(bec_d%k_d))
|
||||
RETURN
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
END FUNCTION is_allocated_bec_type_gpu
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE allocate_bec_type_gpu ( )
|
||||
!-----------------------------------------------------------------------
|
||||
USE becmod_gpum, ONLY : using_becp_r, using_becp_r_d
|
||||
USE becmod_gpum, ONLY : using_becp_k, using_becp_k_d
|
||||
USE becmod_gpum, ONLY : using_becp_nc, using_becp_nc_d
|
||||
IMPLICIT NONE
|
||||
!
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
CALL using_becp_r(2); CALL using_becp_r_d(0)
|
||||
!
|
||||
ELSEIF ( noncolin) THEN
|
||||
!
|
||||
CALL using_becp_nc(2); CALL using_becp_nc_d(0)
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
CALL using_becp_k(2); CALL using_becp_k_d(0)
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE allocate_bec_type_gpu
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE deallocate_bec_type_gpu ( )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
USE becmod_gpum, ONLY : using_becp_r, using_becp_r_d
|
||||
USE becmod_gpum, ONLY : using_becp_k, using_becp_k_d
|
||||
USE becmod_gpum, ONLY : using_becp_nc, using_becp_nc_d
|
||||
IMPLICIT NONE
|
||||
!
|
||||
!
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
CALL using_becp_r(2); CALL using_becp_r_d(0)
|
||||
!
|
||||
ELSEIF ( noncolin) THEN
|
||||
!
|
||||
CALL using_becp_nc(2); CALL using_becp_nc_d(0)
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
CALL using_becp_k(2); CALL using_becp_k_d(0)
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE deallocate_bec_type_gpu
|
||||
|
||||
SUBROUTINE beccopy_gpu(bec, bec1, nkb, nbnd)
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
USE cublas
|
||||
#endif
|
||||
IMPLICIT NONE
|
||||
TYPE(bec_type_d), INTENT(in) :: bec
|
||||
TYPE(bec_type_d) :: bec1
|
||||
INTEGER, INTENT(in) :: nkb, nbnd
|
||||
|
||||
IF (gamma_only) THEN
|
||||
CALL dcopy(nkb*nbnd, bec%r_d, 1, bec1%r_d, 1)
|
||||
ELSEIF (noncolin) THEN
|
||||
CALL zcopy(nkb*npol*nbnd, bec%nc_d, 1, bec1%nc_d, 1)
|
||||
ELSE
|
||||
CALL zcopy(nkb*nbnd, bec%k_d, 1, bec1%k_d, 1)
|
||||
ENDIF
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE beccopy_gpu
|
||||
|
||||
SUBROUTINE becscal_nck_gpu(alpha, bec_d, nkb, nbnd)
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
USE cublas
|
||||
#endif
|
||||
IMPLICIT NONE
|
||||
TYPE(bec_type_d), INTENT(INOUT) :: bec_d
|
||||
COMPLEX(DP), INTENT(IN) :: alpha
|
||||
INTEGER, INTENT(IN) :: nkb, nbnd
|
||||
IF (gamma_only) THEN
|
||||
CALL errore('becscal_nck','called in the wrong case',1)
|
||||
ELSEIF (noncolin) THEN
|
||||
CALL zscal(nkb*npol*nbnd, alpha, bec_d%nc_d, 1)
|
||||
ELSE
|
||||
CALL zscal(nkb*nbnd, alpha, bec_d%k_d, 1)
|
||||
ENDIF
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE becscal_nck_gpu
|
||||
|
||||
SUBROUTINE becscal_gamma_gpu(alpha, bec_d, nkb, nbnd)
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
USE cublas
|
||||
#endif
|
||||
IMPLICIT NONE
|
||||
TYPE(bec_type_d), INTENT(INOUT) :: bec_d
|
||||
REAL(DP), INTENT(IN) :: alpha
|
||||
INTEGER, INTENT(IN) :: nkb, nbnd
|
||||
|
||||
IF (gamma_only) THEN
|
||||
CALL dscal(nkb*nbnd, alpha, bec_d%r_d, 1)
|
||||
ELSE
|
||||
CALL errore('becscal_gamma','called in the wrong case',1)
|
||||
ENDIF
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE becscal_gamma_gpu
|
||||
!
|
||||
SUBROUTINE using_becp_auto(intento)
|
||||
USE becmod_gpum, ONLY : using_becp_r
|
||||
USE becmod_gpum, ONLY : using_becp_k
|
||||
USE becmod_gpum, ONLY : using_becp_nc
|
||||
IMPLICIT NONE
|
||||
INTEGER, INTENT(IN) :: intento
|
||||
!
|
||||
!
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
CALL using_becp_r(intento)
|
||||
!
|
||||
ELSEIF ( noncolin) THEN
|
||||
!
|
||||
CALL using_becp_nc(intento)
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
CALL using_becp_k(intento)
|
||||
!
|
||||
ENDIF
|
||||
END SUBROUTINE using_becp_auto
|
||||
!
|
||||
SUBROUTINE using_becp_d_auto(intento)
|
||||
USE becmod_gpum, ONLY : using_becp_r_d
|
||||
USE becmod_gpum, ONLY : using_becp_k_d
|
||||
USE becmod_gpum, ONLY : using_becp_nc_d
|
||||
IMPLICIT NONE
|
||||
INTEGER, INTENT(IN) :: intento
|
||||
!
|
||||
!
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
CALL using_becp_r_d(intento)
|
||||
!
|
||||
ELSEIF ( noncolin) THEN
|
||||
!
|
||||
CALL using_becp_nc_d(intento)
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
CALL using_becp_k_d(intento)
|
||||
!
|
||||
ENDIF
|
||||
END SUBROUTINE using_becp_d_auto
|
||||
|
||||
|
||||
END MODULE becmod_subs_gpum
|
|
@ -0,0 +1,39 @@
|
|||
!----------------------------------------------
|
||||
! ... this file contains a number of subroutines optionally interfaced
|
||||
! ... to cublas
|
||||
!----------------------------------------------
|
||||
|
||||
SUBROUTINE cudaDGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
#if defined(__CUDA)
|
||||
use cudafor
|
||||
use cublas
|
||||
#endif
|
||||
implicit none
|
||||
DOUBLE PRECISION :: ALPHA,BETA
|
||||
INTEGER :: INCX,INCY,LDA,M,N
|
||||
CHARACTER :: TRANS
|
||||
DOUBLE PRECISION :: A(LDA,*),X(*),Y(*)
|
||||
#if defined(__CUDA)
|
||||
attributes(device) :: A, X, Y
|
||||
#endif
|
||||
!
|
||||
call DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
!
|
||||
END SUBROUTINE cudaDGEMV
|
||||
|
||||
SUBROUTINE cudaDGER ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
|
||||
#if defined(__CUDA)
|
||||
use cudafor
|
||||
use cublas
|
||||
#endif
|
||||
! .. Scalar Arguments ..
|
||||
DOUBLE PRECISION :: ALPHA
|
||||
INTEGER :: INCX, INCY, LDA, M, N
|
||||
! .. Array Arguments ..
|
||||
DOUBLE PRECISION :: A( LDA, * ), X( * ), Y( * )
|
||||
#if defined(__CUDA)
|
||||
attributes(device) :: A, X, Y
|
||||
#endif
|
||||
CALL DGER ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
|
||||
|
||||
END SUBROUTINE cudaDGER
|
|
@ -3,7 +3,12 @@ MODULES += \
|
|||
qe_buffers.o \
|
||||
wavefunctions_gpu.o \
|
||||
recvec_gpu.o \
|
||||
uspp_gpu.o
|
||||
uspp_gpu.o \
|
||||
becmod_gpu.o \
|
||||
becmod_subs_gpu.o \
|
||||
cuda_subroutines.o
|
||||
|
||||
recvec_gpu.o : recvec.o
|
||||
uspp_gpu.o : uspp.o
|
||||
becmod_gpu.o : becmod.o
|
||||
becmod_subs_gpu.o : becmod_gpu.o
|
||||
|
|
|
@ -31,6 +31,7 @@ SUBROUTINE add_vuspsi( lda, n, m, hpsi )
|
|||
USE becmod, ONLY: bec_type, becp
|
||||
!
|
||||
USE uspp_gpum, ONLY : using_vkb, using_indv_ijkb0, using_deeq, using_deeq_nc
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -46,6 +47,7 @@ SUBROUTINE add_vuspsi( lda, n, m, hpsi )
|
|||
!
|
||||
!
|
||||
CALL start_clock( 'add_vuspsi' )
|
||||
CALL using_becp_auto(0)
|
||||
!
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
|
|
|
@ -193,6 +193,7 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
|||
USE wvfct_gpum, ONLY : et_d, using_et, using_et_d, &
|
||||
g2kin_d, using_g2kin, using_g2kin_d
|
||||
USE uspp_gpum, ONLY : using_vkb
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -244,6 +245,7 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
|||
! ... allocate space for <beta_i|psi_j> - used in h_psi and s_psi
|
||||
!
|
||||
CALL allocate_bec_type ( nkb, nbnd, becp, intra_bgrp_comm )
|
||||
CALL using_becp_auto(2)
|
||||
!
|
||||
npw = ngk(ik)
|
||||
IF ( gamma_only ) THEN
|
||||
|
@ -259,6 +261,7 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
|||
! ... deallocate work space
|
||||
!
|
||||
CALL deallocate_bec_type ( becp )
|
||||
CALL using_becp_auto(2)
|
||||
DEALLOCATE( s_diag )
|
||||
DEALLOCATE( h_diag )
|
||||
call using_h_diag(2); call using_s_diag(2)
|
||||
|
|
|
@ -35,6 +35,7 @@ SUBROUTINE compute_becsum ( iflag )
|
|||
!
|
||||
USE wavefunctions_module_gpum, ONLY : using_evc
|
||||
USE uspp_gpum, ONLY : using_vkb
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -55,6 +56,7 @@ SUBROUTINE compute_becsum ( iflag )
|
|||
!
|
||||
becsum(:,:,:) = 0.D0
|
||||
CALL allocate_bec_type (nkb,nbnd, becp,intra_bgrp_comm)
|
||||
CALL using_becp_auto(2)
|
||||
call divide (inter_bgrp_comm, nbnd, ibnd_start, ibnd_end )
|
||||
this_bgrp_nbnd = ibnd_end - ibnd_start + 1
|
||||
!
|
||||
|
@ -93,6 +95,7 @@ SUBROUTINE compute_becsum ( iflag )
|
|||
ENDIF
|
||||
!
|
||||
CALL deallocate_bec_type ( becp )
|
||||
CALL using_becp_auto(2)
|
||||
!
|
||||
CALL stop_clock( 'compute_becsum' )
|
||||
!
|
||||
|
|
|
@ -42,6 +42,7 @@ SUBROUTINE force_hub(forceh)
|
|||
|
||||
USE wavefunctions_module_gpum, ONLY : using_evc
|
||||
USE uspp_gpum, ONLY : using_vkb, using_indv_ijkb0
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
|
||||
IMPLICIT NONE
|
||||
REAL (DP) :: forceh(3,nat) ! output: the Hubbard forces
|
||||
|
@ -66,6 +67,8 @@ SUBROUTINE force_hub(forceh)
|
|||
call allocate_bec_type ( nkb, nbnd, becp)
|
||||
call allocate_bec_type ( nwfcU, nbnd, proj )
|
||||
!
|
||||
CALL using_becp_auto(2)
|
||||
!
|
||||
! poor-man parallelization over bands
|
||||
! - if nproc_pool=1 : nb_s=1, nb_e=nbnd, mykey=0
|
||||
! - if nproc_pool<=nbnd:each processor calculates band nb_s to nb_e; mykey=0
|
||||
|
@ -90,7 +93,7 @@ SUBROUTINE force_hub(forceh)
|
|||
CALL get_buffer (evc, nwordwfc, iunwfc, ik)
|
||||
IF (nks > 1) CALL using_evc(1)
|
||||
|
||||
CALL using_vkb(1)
|
||||
CALL using_vkb(1); CALL using_becp_auto(2)
|
||||
CALL init_us_2 (npw,igk_k(1,ik),xk(1,ik),vkb)
|
||||
CALL calbec( npw, vkb, evc, becp )
|
||||
CALL s_psi (npwx, npw, nbnd, evc, spsi )
|
||||
|
@ -145,6 +148,7 @@ SUBROUTINE force_hub(forceh)
|
|||
DEALLOCATE( wfcatom )
|
||||
DEALLOCATE( spsi )
|
||||
DEALLOCATE( dns )
|
||||
CALL using_becp_auto(2)
|
||||
|
||||
IF (nspin == 1) forceh(:,:) = 2.d0 * forceh(:,:)
|
||||
!
|
||||
|
|
|
@ -38,6 +38,7 @@ SUBROUTINE force_us( forcenl )
|
|||
USE wvfct_gpum, ONLY : using_et
|
||||
USE uspp_gpum, ONLY : using_vkb, using_indv_ijkb0, using_qq_at, &
|
||||
using_deeq
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -52,6 +53,7 @@ SUBROUTINE force_us( forcenl )
|
|||
forcenl(:,:) = 0.D0
|
||||
!
|
||||
CALL allocate_bec_type ( nkb, nbnd, becp, intra_bgrp_comm )
|
||||
CALL using_becp_auto(2)
|
||||
CALL allocate_bec_type ( nkb, nbnd, dbecp, intra_bgrp_comm )
|
||||
ALLOCATE( vkb1( npwx, nkb ) )
|
||||
IF (noncolin) then
|
||||
|
@ -77,7 +79,7 @@ SUBROUTINE force_us( forcenl )
|
|||
CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb )
|
||||
END IF
|
||||
!
|
||||
CALL using_vkb(0)
|
||||
CALL using_vkb(0); CALL using_becp_auto(2)
|
||||
CALL calbec ( npw, vkb, evc, becp )
|
||||
!
|
||||
DO ipol = 1, 3
|
||||
|
@ -105,6 +107,7 @@ SUBROUTINE force_us( forcenl )
|
|||
!
|
||||
! ... if sums over bands are parallelized over the band group
|
||||
!
|
||||
CALL using_becp_auto(1)
|
||||
IF( becp%comm /= mp_get_comm_null() ) CALL mp_sum( forcenl, becp%comm )
|
||||
!
|
||||
IF (noncolin) THEN
|
||||
|
@ -115,6 +118,7 @@ SUBROUTINE force_us( forcenl )
|
|||
DEALLOCATE( vkb1 )
|
||||
CALL deallocate_bec_type ( dbecp )
|
||||
CALL deallocate_bec_type ( becp )
|
||||
CALL using_becp_auto(2)
|
||||
!
|
||||
! ... collect contributions across pools from all k-points
|
||||
!
|
||||
|
|
|
@ -103,6 +103,7 @@ SUBROUTINE h_psi_( lda, n, m, psi, hpsi )
|
|||
!
|
||||
USE wvfct_gpum, ONLY : using_g2kin
|
||||
USE scf_gpum, ONLY : using_vrs
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -126,6 +127,7 @@ SUBROUTINE h_psi_( lda, n, m, psi, hpsi )
|
|||
IF ( gamma_only ) THEN
|
||||
!
|
||||
IF ( real_space .and. nkb > 0 ) then
|
||||
CALL using_becp_auto(1)
|
||||
!
|
||||
! ... real-space algorithm
|
||||
! ... fixme: real_space without beta functions does not make sense
|
||||
|
@ -167,6 +169,7 @@ SUBROUTINE h_psi_( lda, n, m, psi, hpsi )
|
|||
! ... real-space algorithm
|
||||
! ... fixme: real_space without beta functions does not make sense
|
||||
!
|
||||
CALL using_becp_auto(1)
|
||||
IF ( dffts%has_task_groups ) then
|
||||
incr = fftx_ntgrp(dffts)
|
||||
ELSE
|
||||
|
@ -199,6 +202,8 @@ SUBROUTINE h_psi_( lda, n, m, psi, hpsi )
|
|||
! ... (not in the real-space case: it is done together with V_loc)
|
||||
!
|
||||
IF ( nkb > 0 .AND. .NOT. real_space) THEN
|
||||
!
|
||||
CALL using_becp_auto(1)
|
||||
!
|
||||
CALL start_clock( 'h_psi:calbec' )
|
||||
CALL calbec ( n, vkb, psi, becp, m )
|
||||
|
@ -242,6 +247,7 @@ SUBROUTINE h_psi_( lda, n, m, psi, hpsi )
|
|||
CALL vexxace_k(lda,m,psi,ee,hpsi)
|
||||
END IF
|
||||
ELSE
|
||||
CALL using_becp_auto(0)
|
||||
CALL vexx( lda, n, m, psi, hpsi, becp )
|
||||
END IF
|
||||
END IF
|
||||
|
|
|
@ -113,6 +113,7 @@ SUBROUTINE h_psi__gpu( lda, n, m, psi_d, hpsi_d )
|
|||
!
|
||||
USE wvfct_gpum, ONLY : g2kin_d, using_g2kin_d
|
||||
USE uspp_gpum, ONLY : using_vkb
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -132,6 +133,7 @@ SUBROUTINE h_psi__gpu( lda, n, m, psi_d, hpsi_d )
|
|||
CALL using_g2kin_d(0)
|
||||
CALL using_vrs_d(0)
|
||||
CALL using_vkb(0)
|
||||
CALL using_becp_auto(0)
|
||||
!
|
||||
hpsi_d (:, 1:m) = (0.0_dp, 0.0_dp)
|
||||
|
||||
|
|
|
@ -218,6 +218,7 @@ SUBROUTINE new_ns(ns)
|
|||
USE uspp_param, ONLY : nhm, nh
|
||||
!
|
||||
USE uspp_gpum, ONLY : using_vkb, using_indv_ijkb0
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
REAL(DP), INTENT(IN) :: q(nwfcU,nhm,nat)
|
||||
|
@ -232,6 +233,7 @@ SUBROUTINE new_ns(ns)
|
|||
! compute <beta|psi>
|
||||
!
|
||||
CALL allocate_bec_type (nkb, nbnd, becp)
|
||||
CALL using_becp_auto(2)
|
||||
CALL using_vkb(1)
|
||||
CALL init_us_2 (npw,igk_k(1,ik),xk(1,ik),vkb)
|
||||
CALL using_evc(0)
|
||||
|
@ -243,6 +245,7 @@ SUBROUTINE new_ns(ns)
|
|||
p%k(:,:) = (0.0_DP,0.0_DP)
|
||||
ENDIF
|
||||
!
|
||||
CALL using_becp_auto(0)
|
||||
DO nt = 1, ntyp
|
||||
!
|
||||
DO na = 1, nat
|
||||
|
@ -279,6 +282,7 @@ SUBROUTINE new_ns(ns)
|
|||
END DO
|
||||
!
|
||||
CALL deallocate_bec_type ( becp )
|
||||
CALL using_becp_auto(2)
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE compute_pproj
|
||||
|
|
|
@ -55,6 +55,7 @@ SUBROUTINE orbm_kubo()
|
|||
!
|
||||
USE scf_gpum, ONLY : using_vrs
|
||||
USE uspp_gpum, ONLY : using_vkb
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
|
||||
! --- Avoid implicit definitions ---
|
||||
IMPLICIT NONE
|
||||
|
@ -123,6 +124,7 @@ SUBROUTINE orbm_kubo()
|
|||
CALL using_vrs(1)
|
||||
CALL set_vrs( vrs, vltot, v%of_r, kedtau, v%kin_r, dfftp%nnr, nspin, doublegrid )
|
||||
CALL allocate_bec_type ( nkb, nbnd, becp )
|
||||
CALL using_becp_auto(2)
|
||||
! Initializations
|
||||
|
||||
! Define small number
|
||||
|
@ -543,6 +545,7 @@ SUBROUTINE orbm_kubo()
|
|||
|
||||
! Deallocate arrays
|
||||
CALL deallocate_bec_type ( becp )
|
||||
CALL using_becp_auto(2)
|
||||
DEALLOCATE(temp)
|
||||
DEALLOCATE(evc_k)
|
||||
DEALLOCATE(evc_kp)
|
||||
|
|
|
@ -30,7 +30,8 @@ SUBROUTINE orthoUwfc
|
|||
USE control_flags, ONLY : gamma_only
|
||||
USE noncollin_module, ONLY : noncolin, npol
|
||||
!
|
||||
USE uspp_gpum, ONLY : using_vkb
|
||||
USE uspp_gpum, ONLY : using_vkb
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -80,6 +81,7 @@ SUBROUTINE orthoUwfc
|
|||
|
||||
! Allocate the array becp = <beta|wfcatom>
|
||||
CALL allocate_bec_type (nkb,natomwfc, becp)
|
||||
CALL using_becp_auto(2)
|
||||
|
||||
DO ik = 1, nks
|
||||
|
||||
|
@ -107,6 +109,7 @@ SUBROUTINE orthoUwfc
|
|||
ENDDO
|
||||
DEALLOCATE (wfcatom, swfcatom)
|
||||
CALL deallocate_bec_type ( becp )
|
||||
CALL using_becp_auto(2)
|
||||
!
|
||||
RETURN
|
||||
|
||||
|
|
|
@ -332,6 +332,10 @@ CONTAINS
|
|||
USE becmod, ONLY: becp, calbec, allocate_bec_type, deallocate_bec_type
|
||||
USE exx, ONLY : exxenergy2, fock2
|
||||
USE funct, ONLY : dft_is_hybrid
|
||||
!
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
||||
COMPLEX(DP), ALLOCATABLE :: aux(:)
|
||||
INTEGER :: npw, ibnd, j, ig, ik,ikk, ispin, na, nt, ijkb0, ikb,jkb, ih,jh
|
||||
|
@ -340,6 +344,7 @@ CONTAINS
|
|||
|
||||
ALLOCATE (aux(dfftp%nnr))
|
||||
CALL allocate_bec_type ( nkb, nbnd, becp )
|
||||
CALL using_becp_auto(2)
|
||||
|
||||
ek = 0.d0
|
||||
eloc= 0.d0
|
||||
|
@ -479,6 +484,7 @@ CONTAINS
|
|||
END IF
|
||||
!
|
||||
CALL deallocate_bec_type (becp)
|
||||
CALL using_becp_auto(2)
|
||||
DEALLOCATE (aux)
|
||||
|
||||
WRITE (stdout,*)
|
||||
|
|
|
@ -1592,6 +1592,8 @@ MODULE realus
|
|||
USE mp_bands, ONLY : intra_bgrp_comm
|
||||
USE mp, ONLY : mp_sum
|
||||
!
|
||||
USE becmod_gpum, ONLY : using_becp_k
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(in) :: ibnd, last
|
||||
|
@ -1606,6 +1608,7 @@ MODULE realus
|
|||
!
|
||||
!
|
||||
CALL start_clock( 'calbec_rs' )
|
||||
CALL using_becp_k(1) ! intento=2?
|
||||
!
|
||||
IF( dffts%has_task_groups ) CALL errore( 'calbec_rs_k', 'task_groups not implemented', 1 )
|
||||
|
||||
|
@ -1673,6 +1676,7 @@ MODULE realus
|
|||
USE fft_base, ONLY : dffts
|
||||
!
|
||||
USE uspp_gpum, ONLY : using_qq_at
|
||||
USE becmod_gpum, ONLY : using_becp_r
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -1690,6 +1694,7 @@ MODULE realus
|
|||
|
||||
! Sync
|
||||
CALL using_qq_at(0)
|
||||
CALL using_becp_r(0)
|
||||
!
|
||||
fac = sqrt(omega)
|
||||
!
|
||||
|
@ -1760,6 +1765,7 @@ MODULE realus
|
|||
USE fft_base, ONLY : dffts
|
||||
!
|
||||
USE uspp_gpum, ONLY : using_qq_at
|
||||
USE becmod_gpum, ONLY : using_becp_k
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -1778,6 +1784,7 @@ MODULE realus
|
|||
|
||||
! Sync
|
||||
CALL using_qq_at(0)
|
||||
CALL using_becp_k(0)
|
||||
|
||||
call set_xkphase(current_k)
|
||||
|
||||
|
@ -1853,6 +1860,7 @@ MODULE realus
|
|||
USE fft_base, ONLY : dffts
|
||||
!
|
||||
USE uspp_gpum, ONLY : using_deeq
|
||||
USE becmod_gpum, ONLY : using_becp_r
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -1873,6 +1881,7 @@ MODULE realus
|
|||
ELSE !non task groups part starts here
|
||||
|
||||
CALL using_deeq(0)
|
||||
CALL using_becp_r(0)
|
||||
!
|
||||
fac = sqrt(omega)
|
||||
!
|
||||
|
@ -1958,6 +1967,7 @@ MODULE realus
|
|||
USE fft_base, ONLY : dffts
|
||||
!
|
||||
USE uspp_gpum, ONLY : using_deeq
|
||||
USE becmod_gpum, ONLY : using_becp_k
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -1975,6 +1985,7 @@ MODULE realus
|
|||
IF( dffts%has_task_groups ) CALL errore( 'add_vuspsir_k', 'task_groups not implemented', 1 )
|
||||
|
||||
CALL using_deeq(0)
|
||||
CALL using_becp_k(0)
|
||||
|
||||
call set_xkphase(current_k)
|
||||
!
|
||||
|
|
|
@ -169,6 +169,7 @@ SUBROUTINE s_psi_( lda, n, m, psi, spsi )
|
|||
! ... gamma version
|
||||
!
|
||||
USE mp, ONLY: mp_get_comm_null, mp_circular_shift_left
|
||||
USE becmod_gpum, ONLY : using_becp_r
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -185,6 +186,7 @@ SUBROUTINE s_psi_( lda, n, m, psi, spsi )
|
|||
!
|
||||
CALL using_indv_ijkb0(0)
|
||||
CALL using_qq_at(0)
|
||||
CALL using_becp_r(0)
|
||||
!
|
||||
IF( becp%comm == mp_get_comm_null() ) THEN
|
||||
nproc = 1
|
||||
|
@ -282,6 +284,8 @@ SUBROUTINE s_psi_( lda, n, m, psi, spsi )
|
|||
!
|
||||
! ... k-points version
|
||||
!
|
||||
USE becmod_gpum, ONLY : using_becp_k
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
! ... local variables
|
||||
|
@ -293,6 +297,7 @@ SUBROUTINE s_psi_( lda, n, m, psi, spsi )
|
|||
!
|
||||
CALL using_indv_ijkb0(0)
|
||||
CALL using_qq_at(0)
|
||||
CALL using_becp_k(0)
|
||||
!
|
||||
ALLOCATE( ps( nkb, m ), STAT=ierr )
|
||||
IF( ierr /= 0 ) &
|
||||
|
@ -344,6 +349,8 @@ SUBROUTINE s_psi_( lda, n, m, psi, spsi )
|
|||
!
|
||||
! ... k-points noncolinear/spinorbit version
|
||||
!
|
||||
USE becmod_gpum, ONLY : using_becp_nc
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
! here the local variables
|
||||
|
@ -356,6 +363,7 @@ SUBROUTINE s_psi_( lda, n, m, psi, spsi )
|
|||
CALL using_indv_ijkb0(0)
|
||||
IF ( .NOT. lspinorb ) CALL using_qq_at(0)
|
||||
IF (lspinorb) CALL using_qq_so(0)
|
||||
CALL using_becp_nc(0)
|
||||
!
|
||||
ALLOCATE (ps(nkb,npol,m),STAT=ierr)
|
||||
IF( ierr /= 0 ) &
|
||||
|
|
|
@ -104,6 +104,7 @@ SUBROUTINE s_psi__gpu( lda, n, m, psi_d, spsi_d )
|
|||
invfft_orbital_k, fwfft_orbital_k, calbec_rs_k, s_psir_k
|
||||
!
|
||||
USE uspp_gpum, ONLY : vkb_d, using_vkb_d
|
||||
USE becmod_gpum, ONLY : using_becp_r, using_becp_k, using_becp_nc
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -208,6 +209,7 @@ SUBROUTINE s_psi__gpu( lda, n, m, psi_d, spsi_d )
|
|||
! the product vkb and psi
|
||||
!
|
||||
CALL using_vkb_d(0)
|
||||
CALL using_becp_r(0)
|
||||
!
|
||||
IF( becp%comm == mp_get_comm_null() ) THEN
|
||||
nproc = 1
|
||||
|
@ -260,7 +262,7 @@ SUBROUTINE s_psi__gpu( lda, n, m, psi_d, spsi_d )
|
|||
!
|
||||
IF( becp%comm == mp_get_comm_null() ) THEN
|
||||
IF ( m == 1 ) THEN
|
||||
CALL myDGEMV( 'N', 2 * n, nkb, 1.D0, vkb_d, &
|
||||
CALL cudaDGEMV( 'N', 2 * n, nkb, 1.D0, vkb_d, &
|
||||
2 * lda, ps_d, 1, 1.D0, spsi_d, 1 )
|
||||
ELSE
|
||||
CALL cublasDGEMM( 'N', 'N', 2 * n, m, nkb, 1.D0, vkb_d, &
|
||||
|
@ -328,6 +330,7 @@ SUBROUTINE s_psi__gpu( lda, n, m, psi_d, spsi_d )
|
|||
|
||||
! sync vkb if needed
|
||||
CALL using_vkb_d(0)
|
||||
CALL using_becp_k(0)
|
||||
!
|
||||
ps(:,:) = ( 0.D0, 0.D0 )
|
||||
!
|
||||
|
@ -394,6 +397,7 @@ SUBROUTINE s_psi__gpu( lda, n, m, psi_d, spsi_d )
|
|||
|
||||
! sync vkb if needed
|
||||
CALL using_vkb_d(0)
|
||||
CALL using_becp_nc(0)
|
||||
|
||||
ps(:,:,:) = (0.D0,0.D0)
|
||||
!
|
||||
|
@ -448,18 +452,7 @@ SUBROUTINE s_psi__gpu( lda, n, m, psi_d, spsi_d )
|
|||
END SUBROUTINE s_psi__gpu
|
||||
!@nje
|
||||
|
||||
SUBROUTINE myDGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
use cudafor
|
||||
use cublas
|
||||
implicit none
|
||||
DOUBLE PRECISION ALPHA,BETA
|
||||
INTEGER INCX,INCY,LDA,M,N
|
||||
CHARACTER TRANS
|
||||
DOUBLE PRECISION, DEVICE :: A(LDA,*),X(*),Y(*)
|
||||
!
|
||||
call DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
!
|
||||
END SUBROUTINE myDGEMV
|
||||
|
||||
|
||||
SUBROUTINE s_psi_gpu_compatibility( lda, n, m, psi_d, spsi_d )
|
||||
USE kinds, ONLY : DP
|
||||
|
|
|
@ -140,6 +140,7 @@ SUBROUTINE dndepsilon ( ipol, jpol, ldim, dns )
|
|||
me_pool, nproc_pool
|
||||
USE mp, ONLY : mp_sum
|
||||
USE wavefunctions_module_gpum, ONLY : using_evc
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -175,6 +176,8 @@ SUBROUTINE dndepsilon ( ipol, jpol, ldim, dns )
|
|||
call allocate_bec_type ( nkb,nbnd, becp )
|
||||
ALLOCATE ( dns_(ldim,ldim,nspin,nat) )
|
||||
!
|
||||
CALL using_becp_auto(2)
|
||||
!
|
||||
! D_Sl for l=1 and l=2 are already initialized, for l=0 D_S0 is 1
|
||||
!
|
||||
! Offset of atomic wavefunctions initialized in setup and stored in offsetU
|
||||
|
@ -280,6 +283,8 @@ SUBROUTINE dndepsilon ( ipol, jpol, ldim, dns )
|
|||
DEALLOCATE ( spsi )
|
||||
DEALLOCATE ( wfcatom )
|
||||
|
||||
CALL using_becp_auto(2)
|
||||
|
||||
RETURN
|
||||
END SUBROUTINE dndepsilon
|
||||
!
|
||||
|
@ -490,6 +495,7 @@ SUBROUTINE dprojdepsilon_gamma ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj
|
|||
USE mp, ONLY : mp_sum
|
||||
USE wavefunctions_module_gpum, ONLY : using_evc
|
||||
USE uspp_gpum, ONLY : using_vkb, using_indv_ijkb0, using_qq_at
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
|
|
@ -34,6 +34,7 @@ SUBROUTINE stres_us( ik, gk, sigmanlc )
|
|||
USE wavefunctions_module_gpum, ONLY : using_evc
|
||||
USE wvfct_gpum, ONLY : using_et
|
||||
USE uspp_gpum, ONLY : using_vkb, using_deeq
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -56,7 +57,8 @@ SUBROUTINE stres_us( ik, gk, sigmanlc )
|
|||
IF ( nks > 1 ) CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb )
|
||||
!
|
||||
CALL allocate_bec_type ( nkb, nbnd, becp, intra_bgrp_comm )
|
||||
CALL using_vkb(0)
|
||||
|
||||
CALL using_vkb(0); CALL using_becp_auto(2)
|
||||
CALL calbec( npw, vkb, evc, becp )
|
||||
!
|
||||
ALLOCATE( qm1( npwx ) )
|
||||
|
@ -81,6 +83,7 @@ SUBROUTINE stres_us( ik, gk, sigmanlc )
|
|||
!
|
||||
DEALLOCATE( qm1 )
|
||||
CALL deallocate_bec_type ( becp )
|
||||
CALL using_becp_auto(2)
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
|
|
@ -47,6 +47,7 @@ SUBROUTINE sum_band()
|
|||
USE wavefunctions_module_gpum, ONLY : using_evc
|
||||
USE wvfct_gpum, ONLY : using_et
|
||||
USE uspp_gpum, ONLY : using_vkb
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -110,6 +111,7 @@ SUBROUTINE sum_band()
|
|||
! ... Allocate (and later deallocate) arrays needed in specific cases
|
||||
!
|
||||
IF ( okvan ) CALL allocate_bec_type (nkb,nbnd, becp,intra_bgrp_comm)
|
||||
IF ( okvan ) CALL using_becp_auto(2)
|
||||
IF (dft_is_meta() .OR. lxdm) ALLOCATE (kplusg(npwx))
|
||||
!
|
||||
! ... specialized routines are called to sum at Gamma or for each k point
|
||||
|
@ -132,6 +134,7 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
IF (dft_is_meta() .OR. lxdm) DEALLOCATE (kplusg)
|
||||
IF ( okvan ) CALL deallocate_bec_type ( becp )
|
||||
IF ( okvan ) CALL using_becp_auto(2)
|
||||
!
|
||||
! ... sum charge density over pools (distributed k-points) and bands
|
||||
!
|
||||
|
@ -878,6 +881,7 @@ SUBROUTINE sum_bec ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd )
|
|||
USE wavefunctions_module_gpum, ONLY : using_evc
|
||||
USE wvfct_gpum, ONLY : using_et
|
||||
USE uspp_gpum, ONLY : using_vkb, using_indv_ijkb0
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
INTEGER, INTENT(IN) :: ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd
|
||||
|
@ -893,6 +897,7 @@ SUBROUTINE sum_bec ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd )
|
|||
CALL using_et(0)
|
||||
CALL using_vkb(0)
|
||||
CALL using_indv_ijkb0(0)
|
||||
CALL using_becp_auto(2)
|
||||
!
|
||||
npw = ngk(ik)
|
||||
IF ( .NOT. real_space ) THEN
|
||||
|
|
|
@ -640,6 +640,7 @@ SUBROUTINE extrapolate_wfcs( wfc_extr )
|
|||
!
|
||||
USE wavefunctions_module_gpum, ONLY : using_evc
|
||||
USE uspp_gpum, ONLY : using_vkb
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -745,6 +746,7 @@ SUBROUTINE extrapolate_wfcs( wfc_extr )
|
|||
!
|
||||
IF ( nkb > 0 ) CALL using_vkb(1)
|
||||
IF ( nkb > 0 ) CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb )
|
||||
CALL using_becp_auto(2)
|
||||
CALL calbec( npw, vkb, evc, becp )
|
||||
!
|
||||
CALL s_psi ( npwx, npw, nbnd, evc, aux )
|
||||
|
@ -841,6 +843,7 @@ SUBROUTINE extrapolate_wfcs( wfc_extr )
|
|||
DEALLOCATE( u_m, w_m, ew, aux, evcold, sp_m )
|
||||
DEALLOCATE( work, rwork )
|
||||
CALL deallocate_bec_type ( becp )
|
||||
CALL using_becp_auto(2)
|
||||
!
|
||||
CLOSE( UNIT = iunoldwfc, STATUS = 'KEEP' )
|
||||
IF ( wfc_extr > 2 .OR. wfc_order > 2 ) &
|
||||
|
|
|
@ -31,10 +31,13 @@ SUBROUTINE usnldiag_gpu (npw, h_diag_d, s_diag_d)
|
|||
!
|
||||
INTEGER, INTENT(in) :: npw
|
||||
! number of plane waves
|
||||
REAL(dp), DEVICE, INTENT(inout) :: h_diag_d (npwx,npol)
|
||||
REAL(dp), INTENT(inout) :: h_diag_d (npwx,npol)
|
||||
! the diagonal part of the hamiltonian
|
||||
REAL(dp), DEVICE, INTENT(out) :: s_diag_d (npwx,npol)
|
||||
REAL(dp), INTENT(out) :: s_diag_d (npwx,npol)
|
||||
! the diagonal part of the S matrix
|
||||
#if defined(__CUDA)
|
||||
attributes(DEVICE) :: h_diag_d, s_diag_d
|
||||
#endif
|
||||
!
|
||||
INTEGER :: ig, ipol
|
||||
!
|
||||
|
|
|
@ -238,6 +238,7 @@ SUBROUTINE init_wfc ( ik )
|
|||
!
|
||||
USE wavefunctions_module_gpum, ONLY : using_evc
|
||||
USE wvfct_gpum, ONLY : using_et
|
||||
USE becmod_subs_gpum, ONLY : using_becp_auto
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -345,6 +346,7 @@ SUBROUTINE init_wfc ( ik )
|
|||
! ... Allocate space for <beta|psi>
|
||||
!
|
||||
CALL allocate_bec_type ( nkb, n_starting_wfc, becp, intra_bgrp_comm )
|
||||
CALL using_becp_auto (2)
|
||||
!
|
||||
! ... the following trick is for electric fields with Berry's phase:
|
||||
! ... by setting lelfield = .false. one prevents the calculation of
|
||||
|
@ -370,6 +372,7 @@ SUBROUTINE init_wfc ( ik )
|
|||
et(1:nbnd,ik) = etatom(1:nbnd)
|
||||
!
|
||||
CALL deallocate_bec_type ( becp )
|
||||
CALL using_becp_auto (2)
|
||||
DEALLOCATE( etatom )
|
||||
DEALLOCATE( wfcatom )
|
||||
!
|
||||
|
|
Loading…
Reference in New Issue