porting lr_calc_dens and 'interaction' for NC cases

This commit is contained in:
Oscar Baseggio 2023-05-11 15:30:51 +02:00 committed by Oscar Baseggio
parent 0b060a0215
commit 9c285b3908
3 changed files with 122 additions and 95 deletions

View File

@ -2206,6 +2206,9 @@ MODULE realus
!
!The new task group version based on vloc_psi
!print *, "->Real space"
!
!$acc data present_or_copyin(orbital) present_or_copyout(psic)
!
CALL start_clock( 'invfft_orbital' )
!
IF( dffts%has_task_groups ) THEN
@ -2227,11 +2230,11 @@ MODULE realus
CALL wave_g2r( orbital(1:ngk(1),ibnd:ebnd), psic, dffts )
!
!-------------TEMPORARY---------------------
ngk1 = SIZE(psic)
#if defined(_OPENACC)
is_present = acc_is_present(psic,ngk1)
!$acc update self(psic) if (is_present)
#endif
! ngk1 = SIZE(psic)
!#if defined(_OPENACC)
! is_present = acc_is_present(psic,ngk1)
! !$acc update self(psic) if (is_present)
!#endif
!-------------------------------------------
!
IF (PRESENT(conserved)) THEN
@ -2245,6 +2248,8 @@ MODULE realus
!
CALL stop_clock( 'invfft_orbital' )
!
!$acc end data
!
END SUBROUTINE invfft_orbital_gamma
!
!--------------------------------------------------------------------------
@ -2298,6 +2303,8 @@ MODULE realus
!print *, "->Fourier space"
CALL start_clock( 'fwfft_orbital' )
!
!$acc data present_or_copyin(psic) present_or_copy(orbital)
!
add_to_orbital_=.FALSE. ; IF( PRESENT(add_to_orbital)) add_to_orbital_ = add_to_orbital
!
! ... New task_groups versions
@ -2353,29 +2360,31 @@ MODULE realus
!
!-------------TEMPORARY---------------------
ngk1 = SIZE(psic)
#if defined(_OPENACC)
is_present = acc_is_present(psic,ngk1)
!$acc update self(psic) if (is_present)
#endif
!#if defined(_OPENACC)
! is_present = acc_is_present(psic,ngk1)
! !$acc update self(psic) if (is_present)
!#endif
!-------------------------------------------
!
fac = 1.d0
IF ( ibnd<last ) fac = 0.5d0
!
IF ( add_to_orbital_ ) THEN
!$omp parallel do
!$acc parallel loop
! !$omp parallel do
DO j = 1, ngk(1)
orbital(j,ibnd) = orbital(j,ibnd) + fac*psio(j,1)
IF (ibnd<last) orbital(j,ibnd+1) = orbital(j,ibnd+1) + fac*psio(j,2)
ENDDO
!$omp end parallel do
! !$omp end parallel do
ELSE
!$omp parallel do
!$acc parallel loop
! !$omp parallel do
DO j = 1, ngk(1)
orbital(j,ibnd) = fac*psio(j,1)
IF (ibnd<last) orbital(j,ibnd+1) = fac*psio(j,2)
ENDDO
!$omp end parallel do
! !$omp end parallel do
ENDIF
!
DEALLOCATE( psio )
@ -2388,6 +2397,8 @@ MODULE realus
!
ENDIF
!
!$acc end data
!
CALL stop_clock( 'fwfft_orbital' )
!
END SUBROUTINE fwfft_orbital_gamma

View File

@ -335,7 +335,7 @@ CONTAINS
!
REAL(DP), ALLOCATABLE :: becp2(:,:)
REAL(DP), ALLOCATABLE :: tg_dvrss(:)
INTEGER :: v_siz, incr, ioff
INTEGER :: v_siz, incr, ioff, ir, nnr_siz
INTEGER :: ibnd_start_gamma, ibnd_end_gamma
!
incr = 2
@ -352,7 +352,13 @@ CONTAINS
!
IF ( interaction ) THEN
!
CALL start_clock('interaction')
nnr_siz= dffts%nnr
!$acc data copyin(revc0(1:nnr_siz,1:nbnd,1), dvrss(1:nnr_siz)) copyout(psic(1:nnr_siz),evc1_new(1:npwx*npol,1:nbnd,1:nks))
!
CALL start_clock_gpu('interaction')
!
! nnr_siz= dffts%nnr
! !$acc data copyin(revc0(1:nnr_siz,1:nbnd,1), dvrss(1:nnr_siz)) copyout(psic(1:nnr_siz))
!
IF (nkb > 0 .and. okvan) THEN
! calculation of becp2
@ -393,23 +399,25 @@ CONTAINS
ENDDO
!end: calculation of becp2
ENDIF
IF ( dffts%has_task_groups ) THEN
!
v_siz = dffts%nnr_tg
!
incr = 2 * fftx_ntgrp(dffts)
!
ALLOCATE( tg_dvrss(1:v_siz) )
tg_dvrss=0.0d0
!
CALL tg_gather(dffts, dvrss, tg_dvrss)
!
ENDIF
!
IF ( dffts%has_task_groups ) THEN
!
v_siz = dffts%nnr_tg
!
incr = 2 * fftx_ntgrp(dffts)
!
ALLOCATE( tg_dvrss(1:v_siz) )
tg_dvrss=0.0d0
!
CALL tg_gather(dffts, dvrss, tg_dvrss)
!
ENDIF
!
! evc1_new is used as a container for the interaction
!
!$acc kernels
evc1_new(:,:,:) = (0.0d0,0.0d0)
!$acc end kernels
!
ibnd_start_gamma = ibnd_start
IF (MOD(ibnd_start, 2)==0) ibnd_start_gamma = ibnd_start + 1
@ -433,7 +441,9 @@ CONTAINS
!
ELSE
!
DO ir = 1,dffts%nnr
!DO ir = 1,dffts%nnr
!$acc parallel loop
DO ir = 1, nnr_siz
!
psic(ir) = revc0(ir,ibnd,1)*CMPLX(dvrss(ir),0.0d0,DP)
!
@ -446,65 +456,65 @@ CONTAINS
ENDIF
!
IF (real_space .and. okvan .and. nkb > 0) THEN
!THE REAL SPACE PART (modified from s_psi)
!fac = sqrt(omega)
!
ijkb0 = 0
iqs = 0
jqs = 0
!
DO nt = 1, ntyp
!
DO ia = 1, nat
!THE REAL SPACE PART (modified from s_psi)
!fac = sqrt(omega)
!
ijkb0 = 0
iqs = 0
jqs = 0
!
DO nt = 1, ntyp
!
DO ia = 1, nat
!
IF ( ityp(ia) == nt ) THEN
!
IF ( ityp(ia) == nt ) THEN
!
mbia = maxbox_beta(ia)
ALLOCATE( w1(nh(nt)), w2(nh(nt)) )
w1 = 0.D0
w2 = 0.D0
!
DO ih = 1, nh(nt)
!
DO jh = 1, nh(nt)
!
jkb = ijkb0 + jh
w1(ih) = w1(ih) + becp2(jkb, ibnd)
IF ( ibnd+1 <= nbnd ) w2(ih) = w2(ih) + &
mbia = maxbox_beta(ia)
ALLOCATE( w1(nh(nt)), w2(nh(nt)) )
w1 = 0.D0
w2 = 0.D0
!
DO ih = 1, nh(nt)
!
DO jh = 1, nh(nt)
!
jkb = ijkb0 + jh
w1(ih) = w1(ih) + becp2(jkb, ibnd)
IF ( ibnd+1 <= nbnd ) w2(ih) = w2(ih) + &
&becp2(jkb, ibnd+1)
!
ENDDO
!
ENDDO
!
!w1 = w1 * fac
!w2 = w2 * fac
ijkb0 = ijkb0 + nh(nt)
!
DO ih = 1, nh(nt)
!
DO ir = 1, mbia
!
iqs = jqs + ir
psic( box_beta(box0(ia)+ir) ) = &
!
ENDDO
!
ENDDO
!
!w1 = w1 * fac
!w2 = w2 * fac
ijkb0 = ijkb0 + nh(nt)
!
DO ih = 1, nh(nt)
!
DO ir = 1, mbia
!
iqs = jqs + ir
psic( box_beta(box0(ia)+ir) ) = &
&psic( box_beta(box0(ia)+ir) ) + &
&betasave(box0(ia)+ir,ih)*&
&CMPLX( w1(ih), w2(ih), KIND=dp )
!
ENDDO
!
jqs = iqs
!
ENDDO
!
!
ENDDO
!
jqs = iqs
!
ENDDO
!
DEALLOCATE( w1, w2 )
!
ENDIF
!
ENDDO
!
ENDDO
!
ENDIF
!
ENDDO
!
ENDDO
!
ENDIF
!
! Back to reciprocal space
@ -514,7 +524,9 @@ CONTAINS
ENDDO
!
#if defined(__MPI)
!$acc host_data use_device(evc1_new)
CALL mp_sum( evc1_new(:,:,1), inter_bgrp_comm )
!$acc end host_data
#endif
IF (dffts%has_task_groups) DEALLOCATE (tg_dvrss)
!
@ -525,7 +537,9 @@ CONTAINS
!
ENDIF
!
CALL stop_clock('interaction')
!!! !$acc end data
CALL stop_clock_gpu('interaction')
!$acc end data
!
ENDIF
!

View File

@ -76,7 +76,7 @@ SUBROUTINE lr_calc_dens( evc1, response_calc )
INTEGER :: ir, ik, ibnd, jbnd, ig, ijkb0, np, na
INTEGER :: ijh,ih,jh,ikb,jkb,is
INTEGER :: i, j, k, l
INTEGER :: v_siz, nnr_siz
INTEGER :: v_siz, nnr_siz, irho
REAL(kind=dp) :: w1, w2, scal, rho_sum
! These are temporary buffers for the response
REAL(kind=dp), ALLOCATABLE :: rho_sum_resp_x(:), rho_sum_resp_y(:),&
@ -95,7 +95,7 @@ SUBROUTINE lr_calc_dens( evc1, response_calc )
v_siz = dfftp%nnr
nnr_siz = dffts%nnr
!
!$acc data create(psic(1:v_siz)) copyin(evc1(1:npwx*npol,1:nbnd,1:nks),revc0(1:nnr_siz,1:nbnd,1:nksq)) copyout( rho_1(1:v_siz,1:nspin_mag))
!$acc data create(psic(1:v_siz)) copyin(evc1(1:npwx*npol,1:nbnd,1:nks)) copyin(revc0(1:nnr_siz,1:nbnd,1:nksq)) copyout( rho_1(1:v_siz,1:nspin_mag))
!
!$acc kernels
psic(:) = (0.0d0, 0.0d0)
@ -181,13 +181,11 @@ SUBROUTINE lr_calc_dens( evc1, response_calc )
!
! The psic workspace can present a memory bottleneck
!
!$acc end data
!
DEALLOCATE ( psic )
!
#if defined(__MPI)
IF(gamma_only) THEN
!$acc host_data use_device(rho_1)
CALL mp_sum(rho_1, inter_pool_comm)
!$acc end host_data
ELSE
CALL mp_sum(rho_1c, inter_pool_comm)
ENDIF
@ -200,7 +198,11 @@ SUBROUTINE lr_calc_dens( evc1, response_calc )
DO is = 1, nspin_mag
!
rho_sum = 0.0d0
rho_sum = SUM(rho_1(:,is))
! rho_sum = SUM(rho_1(:,is))
!$acc parallel loop private(rho_sum) copy(rho_sum)
do irho = 1, v_siz
rho_sum = rho_sum + rho_1(i,is)
enddo
!
#if defined(__MPI)
CALL mp_sum(rho_sum, intra_bgrp_comm )
@ -234,6 +236,10 @@ SUBROUTINE lr_calc_dens( evc1, response_calc )
!
ENDIF
!
!$acc end data
!
DEALLOCATE ( psic )
!
IF (charge_response == 2 .AND. LR_iteration /=0) THEN
!
ALLOCATE( rho_sum_resp_x( dfftp%nr1 ) )
@ -378,7 +384,6 @@ CONTAINS
USE mp, ONLY : mp_sum
USE realus, ONLY : tg_psic
USE fft_base, ONLY : dffts
USE fft_wave, ONLY : wave_g2r
IMPLICIT NONE
!
@ -413,10 +418,7 @@ CONTAINS
ebnd = ibnd
IF ( ibnd < nbnd ) ebnd = ebnd + 1
!
CALL wave_g2r( evc1(1:ngk(1),ibnd:ebnd,1), psic, dffts )
! CALL invfft_orbital_gamma(evc1(:,:,1),ibnd,nbnd)
!
! !$acc update device(psic)
CALL invfft_orbital_gamma(evc1(:,:,1),ibnd,nbnd)
!
IF (dffts%has_task_groups) THEN
!