Fix ph_base/ tests at Gamma (still broken with kpoints)

This commit is contained in:
Ivan Carnimeo 2024-04-18 09:48:51 +02:00
parent 84ea8ff29a
commit ecf7e0c845
10 changed files with 37 additions and 73 deletions

View File

@ -36,7 +36,6 @@ SUBROUTINE c_bands_nscf_ph( )
USE noncollin_module, ONLY : noncolin, npol, domag
USE io_files, ONLY : tmp_dir, prefix
USE uspp_init, ONLY : init_us_2
USE wavefunctions_gpum, ONLY : using_evc, using_evc_d
!
IMPLICIT NONE
!
@ -57,10 +56,10 @@ SUBROUTINE c_bands_nscf_ph( )
!
! ... If restarting, calculated wavefunctions have to be read from file
!
CALL using_evc(2)
DO ik = 1, ik_
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
END DO
!$acc update device(evc)
!
IF ( isolve == 0 ) THEN
WRITE( stdout, '(5X,"Davidson diagonalization with overlap")' )
@ -103,8 +102,8 @@ SUBROUTINE c_bands_nscf_ph( )
!
IF ( TRIM(starting_wfc) == 'file' ) THEN
!
CALL using_evc(2)
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
!$acc update device(evc)
!
ELSE
!
@ -114,28 +113,30 @@ SUBROUTINE c_bands_nscf_ph( )
!
! ... diagonalization of bands for k-point ik
!
#if defined(__CUDA)
call using_evc_d(0)
#endif
Call check_wfc( '@1', 'DH' )
call diag_bands ( 1, ik, avg_iter )
!$acc update self(evc)
Call check_wfc( '@2', 'DH' )
!
! In the noncolinear magnetic case we have k, k+q, -k -k-q and
! to the last two wavefunctions we must apply t_rev.
! When lgamma is true we have only k and -k
!
IF (noncolin.AND.domag) THEN
call using_evc(0)
IF (lgamma.AND. MOD(ik,2)==0) THEN
CALL apply_trev(evc, ik, ik-1)
!$acc update device(evc)
ELSEIF (.NOT.lgamma.AND.(MOD(ik,4)==3.OR.MOD(ik,4)==0)) THEN
CALL apply_trev(evc, ik, ik-2)
!$acc update device(evc)
ENDIF
ENDIF
!
! ... save wave-functions (unless disabled in input)
!
call using_evc(0)
Call check_wfc( '@3', 'DH' )
IF ( io_level > -1 ) CALL save_buffer ( evc, nwordwfc, iunwfc, ik )
Call check_wfc( '@4', 'DH' )
!
! ... beware: with pools, if the number of k-points on different
! ... pools differs, make sure that all processors are still in
@ -168,6 +169,7 @@ SUBROUTINE c_bands_nscf_ph( )
WRITE( stdout, '(/,5X,"ethr = ",1PE9.2,", avg # of iterations =",0PF5.1)' ) &
ethr, avg_iter
!
Call check_wfc( '@5', 'DH' )
CALL stop_clock( 'c_bands' )
!
RETURN

View File

@ -27,9 +27,6 @@ subroutine incdrhoscf (drhoscf, weight, ik, dbecsum, dpsi)
USE mp_bands, ONLY : me_bgrp, inter_bgrp_comm, ntask_groups
USE mp, ONLY : mp_sum
USE fft_helper_subroutines
#if defined(__CUDA)
USE wavefunctions_gpum, ONLY : evc_d
#endif
IMPLICIT NONE
!
@ -63,7 +60,7 @@ subroutine incdrhoscf (drhoscf, weight, ik, dbecsum, dpsi)
INTEGER, POINTER, DEVICE :: nl_d(:)
!
nl_d => dffts%nl_d
evc_d = evc
!$acc update device(evc)
#else
INTEGER, ALLOCATABLE :: nl_d(:)
!
@ -101,7 +98,7 @@ subroutine incdrhoscf (drhoscf, weight, ik, dbecsum, dpsi)
! dpsi contains the perturbed wavefunctions of this k point
! evc contains the unperturbed wavefunctions of this k point
!
!$acc data copyin(dpsi(1:npwx,1:nbnd)) copy(drhoscf(1:v_siz)) create(psi(1:v_siz),dpsic(1:v_siz)) present(igk_k) deviceptr(evc_d, nl_d)
!$acc data copyin(dpsi(1:npwx,1:nbnd)) copy(drhoscf(1:v_siz)) create(psi(1:v_siz),dpsic(1:v_siz)) present(igk_k) deviceptr(nl_d)
do ibnd = 1, nbnd_occ(ikk), incr
!
IF ( dffts%has_task_groups ) THEN
@ -159,11 +156,7 @@ subroutine incdrhoscf (drhoscf, weight, ik, dbecsum, dpsi)
!$acc parallel loop
do ig = 1, npw
itmp = nl_d (igk_k(ig,ikk) )
#if defined(__CUDA)
psi (itmp ) = evc_d (ig, ibnd)
#else
psi (itmp ) = evc (ig, ibnd)
#endif
enddo
!$acc parallel loop
do ig = 1, npwq

View File

@ -31,9 +31,6 @@ subroutine incdrhoscf_nc (drhoscf, weight, ik, dbecsum, dpsi, rsign)
USE mp_bands, ONLY : me_bgrp, inter_bgrp_comm, ntask_groups
USE mp, ONLY : mp_sum
USE fft_helper_subroutines
#if defined(__CUDA)
USE wavefunctions_gpum, ONLY : evc_d
#endif
IMPLICIT NONE
!
@ -70,7 +67,7 @@ subroutine incdrhoscf_nc (drhoscf, weight, ik, dbecsum, dpsi, rsign)
INTEGER, POINTER, DEVICE :: nl_d(:)
!
nl_d => dffts%nl_d
evc_d = evc
!$acc update device(evc)
#else
INTEGER, ALLOCATABLE :: nl_d(:)
!
@ -109,7 +106,7 @@ subroutine incdrhoscf_nc (drhoscf, weight, ik, dbecsum, dpsi, rsign)
! dpsi contains the perturbed wavefunctions of this k point
! evc contains the unperturbed wavefunctions of this k point
!
!$acc data copyin(dpsi(1:npwx*npol,1:nbnd)) copy(drhoscf(1:v_sizp,1:nspin_mag)) create(psi(1:v_siz,1:npol),dpsic(1:v_siz,1:npol)) present(igk_k) deviceptr(evc_d, nl_d)
!$acc data copyin(dpsi(1:npwx*npol,1:nbnd)) copy(drhoscf(1:v_sizp,1:nspin_mag)) create(psi(1:v_siz,1:npol),dpsic(1:v_siz,1:npol)) present(igk_k) deviceptr(nl_d)
do ibnd = 1, nbnd_occ(ikk), incr
IF (dffts%has_task_groups) THEN
@ -185,13 +182,8 @@ subroutine incdrhoscf_nc (drhoscf, weight, ik, dbecsum, dpsi, rsign)
!$acc parallel loop
do ig = 1, npw
itmp = nl_d ( igk_k(ig,ikk) )
#if defined(__CUDA)
psi (itmp, 1) = evc_d (ig, ibnd)
psi (itmp, 2) = evc_d (ig+npwx, ibnd)
#else
psi (itmp, 1) = evc (ig, ibnd)
psi (itmp, 2) = evc (ig+npwx, ibnd)
#endif
enddo
!$acc parallel loop
do ig = 1, npwq

View File

@ -31,9 +31,6 @@ subroutine localdos (ldos, ldoss, becsum1, dos_ef)
USE wvfct, ONLY : nbnd, npwx, et
USE becmod, ONLY : calbec, bec_type, allocate_bec_type_acc, deallocate_bec_type_acc
USE wavefunctions, ONLY : evc, psic, psic_nc
#if defined(__CUDA)
USE wavefunctions_gpum, ONLY : evc_d
#endif
USE uspp, ONLY : okvan, nkb, vkb
USE uspp_param, ONLY : upf, nh, nhm
USE qpoint, ONLY : nksq, ikks
@ -80,7 +77,7 @@ subroutine localdos (ldos, ldoss, becsum1, dos_ef)
INTEGER, POINTER, DEVICE :: nl_d(:)
!
nl_d => dffts%nl_d
evc_d = evc
!$acc update device(evc)
#else
INTEGER, ALLOCATABLE :: nl_d(:)
!
@ -114,9 +111,7 @@ subroutine localdos (ldos, ldoss, becsum1, dos_ef)
!
if (nksq > 1) then
call get_buffer (evc, lrwfc, iuwfc, ikks(ik))
#if defined(__CUDA)
evc_d = evc
#endif
!$acc update device(evc)
endif
call init_us_2 (npw, igk_k(1,ikks(ik)), xk (1, ikks(ik)), vkb, .true.)
!
@ -142,13 +137,8 @@ subroutine localdos (ldos, ldoss, becsum1, dos_ef)
!$acc end kernels
!$acc parallel loop present(igk_k, psic_nc)
do ig = 1, npw
#if defined(__CUDA)
psic_nc (nl_d (igk_k(ig,ikks(ik))), 1 ) = evc_d (ig, ibnd)
psic_nc (nl_d (igk_k(ig,ikks(ik))), 2 ) = evc_d (ig+npwx, ibnd)
#else
psic_nc (nl_d (igk_k(ig,ikks(ik))), 1 ) = evc (ig, ibnd)
psic_nc (nl_d (igk_k(ig,ikks(ik))), 2 ) = evc (ig+npwx, ibnd)
#endif
enddo
!$acc end parallel loop
!$acc host_data use_device(psic_nc)
@ -187,11 +177,7 @@ subroutine localdos (ldos, ldoss, becsum1, dos_ef)
!$acc end kernels
!$acc parallel loop present(psic)
do ig = 1, npw
#if defined(__CUDA)
psic (nl_d (igk_k(ig,ikks(ik)) ) ) = evc_d (ig, ibnd)
#else
psic (nl_d (igk_k(ig,ikks(ik)) ) ) = evc (ig, ibnd)
#endif
enddo
!$acc end parallel loop
!$acc host_data use_device(psic)

View File

@ -69,7 +69,7 @@ SUBROUTINE orthogonalize(dvpsi, evq, ikk, ikq, dpsi, npwq, dpsi_computed)
!
ALLOCATE(ps(nbnd,nbnd))
!
!$acc data copyin(evq) present_or_copy(dvpsi) copy(dpsi) create(ps(1:nbnd, 1:nbnd), ps_r(1:nbnd, 1:nbnd))
!$acc data copy(dvpsi,dpsi) create(ps(1:nbnd, 1:nbnd), ps_r(1:nbnd, 1:nbnd))
IF (gamma_only) THEN
!$acc kernels
ps_r(:,:) = 0.0d0

View File

@ -171,8 +171,10 @@ SUBROUTINE sternheimer_kernel(first_iter, time_reversed, npert, lrdvpsi, iudvpsi
IF (nksq > 1 .OR. (noncolin .AND. domag)) THEN
IF (lgamma) THEN
CALL get_buffer(evc, lrwfc, iuwfc, ikmk)
!$acc update device(evc)
ELSE
CALL get_buffer(evc, lrwfc, iuwfc, ikmk)
!$acc update device(evc)
CALL get_buffer(evq, lrwfc, iuwfc, ikmkmq)
ENDIF
ENDIF

View File

@ -48,6 +48,9 @@ SUBROUTINE do_phonon(auxdyn)
USE buffers, ONLY : close_buffer
USE control_flags, ONLY : use_gpu
USE environment, ONLY : print_cuda_info
!civn
use wavefunctions, only : evc
!
IMPLICIT NONE
!
@ -58,6 +61,9 @@ SUBROUTINE do_phonon(auxdyn)
!
qind = 0
!
call check_wfc( '@@0HH', 'HH' )
!$acc update device(evc)
call check_wfc( '@@0DH', 'DH' )
DO iq = 1, nqs
!
CALL prepare_q(auxdyn, do_band, do_iq, setup_pw, iq)
@ -81,6 +87,7 @@ SUBROUTINE do_phonon(auxdyn)
IF (reduce_io .AND. (qind == 1)) THEN
CALL close_buffer( iunwfc, 'DELETE' )
ENDIF
write(*,*) '@chk calling run_nscf'
CALL run_nscf(do_band, iq)
ELSE
CALL print_cuda_info(check_use_gpu=.true.)
@ -94,14 +101,20 @@ SUBROUTINE do_phonon(auxdyn)
GOTO 100
ENDIF
!
call check_wfc( '@@1HH', 'HH' )
call check_wfc( '@@1DH', 'DH' )
! Initialize the quantities which do not depend on
! the linear response of the system
!
CALL initialize_ph()
call check_wfc( '@@2', 'DH' )
!
! electric field perturbation
!
call check_wfc( '@@3HH', 'HH' )
call check_wfc( '@@3DH', 'DH' )
IF (epsil) CALL phescf()
call check_wfc( '@@4', 'DH' )
!
! IF only_init is .true. the code computes only the
! initialization parts.

View File

@ -35,7 +35,6 @@ subroutine dvqpsi_us (ik, uact, addnlcc, becp1, alphap)
use uspp_param,ONLY : upf
USE wvfct, ONLY : nbnd, npwx
USE wavefunctions, ONLY: evc
USE wavefunctions_gpum, ONLY: evc_d
USE nlcc_ph, ONLY : drc
USE uspp, ONLY : nlcc_any
USE eqv, ONLY : dvpsi, dmuxc, vlocq
@ -123,7 +122,7 @@ subroutine dvqpsi_us (ik, uact, addnlcc, becp1, alphap)
npwq= ngk(ikq)
nnr = dffts%nnr
!
!$acc data create(aux1(1:nnr),aux2(1:nnr)) copyout(dvpsi) copyin(vlocq,drc,dmuxc) present( igk_k ) deviceptr(evc_d, nl_d, nlp_d)
!$acc data create(aux1(1:nnr),aux2(1:nnr)) copyout(dvpsi) copyin(vlocq,drc,dmuxc) present( igk_k ) deviceptr(nl_d, nlp_d)
!$acc kernels present(dvpsi,aux1)
dvpsi(:,:) = (0.d0, 0.d0)
aux1(:) = (0.d0, 0.d0)
@ -255,9 +254,8 @@ subroutine dvqpsi_us (ik, uact, addnlcc, becp1, alphap)
!
! Now we compute dV_loc/dtau in real space
!
#if defined(__CUDA)
evc_d = evc
#endif
!$acc update device(evc)
!
!$acc host_data use_device(aux1)
CALL invfft ('Rho', aux1, dffts)
!$acc end host_data
@ -270,21 +268,13 @@ subroutine dvqpsi_us (ik, uact, addnlcc, becp1, alphap)
!$acc parallel loop present(aux2, igk_k)
do ig = 1, npw
itmp = nl_d (igk_k (ig,ikk) )
#if defined(__CUDA)
aux2 ( itmp ) = evc_d (ig, ibnd)
#else
aux2 ( itmp ) = evc (ig, ibnd)
#endif
enddo
else
!$acc parallel loop present(aux2, igk_k)
do ig = 1, npw
itmp = nl_d (igk_k (ig,ikk) )
#if defined(__CUDA)
aux2 ( itmp ) = evc_d (ig+npwx, ibnd)
#else
aux2 ( itmp ) = evc (ig+npwx, ibnd)
#endif
enddo
end if
!

View File

@ -43,9 +43,6 @@ SUBROUTINE phq_init()
USE wvfct, ONLY : npwx, nbnd
USE gvecw, ONLY : gcutw
USE wavefunctions, ONLY : evc
#if defined(__CUDA)
USE wavefunctions_gpum, ONLY : evc_d
#endif
USE noncollin_module, ONLY : noncolin, domag, npol, lspinorb
USE uspp, ONLY : okvan, vkb, nlcc_any, nkb
USE phus, ONLY : alphap
@ -194,10 +191,8 @@ SUBROUTINE phq_init()
! ... the code
!
#if defined(__CUDA)
evc_d = evc
!$acc data present_or_copyin(evc)
!$acc update device(evc)
Call calbec( offload_type, npw, vkb, evc, bectmp )
!$acc end data
Call becupdate( offload_type, becp1, ik, nksq, bectmp )
#else
Call calbec( offload_type, npw, vkb, evc, becp1(ik) )
@ -221,13 +216,8 @@ SUBROUTINE phq_init()
DO ibnd = 1, nbnd
DO ig = 1, npw
itmp = igk_k(ig,ikk)
#if defined(__CUDA)
aux1(ig,ibnd) = evc_d(ig,ibnd) * tpiba * ( 0.D0, 1.D0 ) * &
( xk(ipol,ikk) + g(ipol,itmp) )
#else
aux1(ig,ibnd) = evc(ig,ibnd) * tpiba * ( 0.D0, 1.D0 ) * &
( xk(ipol,ikk) + g(ipol,itmp) )
#endif
END DO
END DO
IF (noncolin) THEN
@ -235,13 +225,8 @@ SUBROUTINE phq_init()
DO ibnd = 1, nbnd
DO ig = 1, npw
itmp = igk_k(ig,ikk)
#if defined(__CUDA)
aux1(ig+npwx,ibnd)=evc_d(ig+npwx,ibnd)*tpiba*(0.D0,1.D0)*&
( xk(ipol,ikk) + g(ipol,itmp) )
#else
aux1(ig+npwx,ibnd)=evc(ig+npwx,ibnd)*tpiba*(0.D0,1.D0)*&
( xk(ipol,ikk) + g(ipol,itmp) )
#endif
END DO
END DO
END IF

View File

@ -163,6 +163,7 @@ subroutine solve_e
!
IF (nksq > 1) THEN
CALL get_buffer(evc, lrwfc, iuwfc, ikk)
!$acc update device(evc)
ENDIF
!
CALL init_us_2(npw, igk_k(1, ikk), xk(1, ikk), vkb, .true.)