mirror of https://gitlab.com/QEF/q-e.git
porting lr_calc_dens and 'interaction' for NC cases
This commit is contained in:
parent
0b060a0215
commit
9c285b3908
|
@ -2206,6 +2206,9 @@ MODULE realus
|
|||
!
|
||||
!The new task group version based on vloc_psi
|
||||
!print *, "->Real space"
|
||||
!
|
||||
!$acc data present_or_copyin(orbital) present_or_copyout(psic)
|
||||
!
|
||||
CALL start_clock( 'invfft_orbital' )
|
||||
!
|
||||
IF( dffts%has_task_groups ) THEN
|
||||
|
@ -2227,11 +2230,11 @@ MODULE realus
|
|||
CALL wave_g2r( orbital(1:ngk(1),ibnd:ebnd), psic, dffts )
|
||||
!
|
||||
!-------------TEMPORARY---------------------
|
||||
ngk1 = SIZE(psic)
|
||||
#if defined(_OPENACC)
|
||||
is_present = acc_is_present(psic,ngk1)
|
||||
!$acc update self(psic) if (is_present)
|
||||
#endif
|
||||
! ngk1 = SIZE(psic)
|
||||
!#if defined(_OPENACC)
|
||||
! is_present = acc_is_present(psic,ngk1)
|
||||
! !$acc update self(psic) if (is_present)
|
||||
!#endif
|
||||
!-------------------------------------------
|
||||
!
|
||||
IF (PRESENT(conserved)) THEN
|
||||
|
@ -2245,6 +2248,8 @@ MODULE realus
|
|||
!
|
||||
CALL stop_clock( 'invfft_orbital' )
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
END SUBROUTINE invfft_orbital_gamma
|
||||
!
|
||||
!--------------------------------------------------------------------------
|
||||
|
@ -2298,6 +2303,8 @@ MODULE realus
|
|||
!print *, "->Fourier space"
|
||||
CALL start_clock( 'fwfft_orbital' )
|
||||
!
|
||||
!$acc data present_or_copyin(psic) present_or_copy(orbital)
|
||||
!
|
||||
add_to_orbital_=.FALSE. ; IF( PRESENT(add_to_orbital)) add_to_orbital_ = add_to_orbital
|
||||
!
|
||||
! ... New task_groups versions
|
||||
|
@ -2353,29 +2360,31 @@ MODULE realus
|
|||
!
|
||||
!-------------TEMPORARY---------------------
|
||||
ngk1 = SIZE(psic)
|
||||
#if defined(_OPENACC)
|
||||
is_present = acc_is_present(psic,ngk1)
|
||||
!$acc update self(psic) if (is_present)
|
||||
#endif
|
||||
!#if defined(_OPENACC)
|
||||
! is_present = acc_is_present(psic,ngk1)
|
||||
! !$acc update self(psic) if (is_present)
|
||||
!#endif
|
||||
!-------------------------------------------
|
||||
!
|
||||
fac = 1.d0
|
||||
IF ( ibnd<last ) fac = 0.5d0
|
||||
!
|
||||
IF ( add_to_orbital_ ) THEN
|
||||
!$omp parallel do
|
||||
!$acc parallel loop
|
||||
! !$omp parallel do
|
||||
DO j = 1, ngk(1)
|
||||
orbital(j,ibnd) = orbital(j,ibnd) + fac*psio(j,1)
|
||||
IF (ibnd<last) orbital(j,ibnd+1) = orbital(j,ibnd+1) + fac*psio(j,2)
|
||||
ENDDO
|
||||
!$omp end parallel do
|
||||
! !$omp end parallel do
|
||||
ELSE
|
||||
!$omp parallel do
|
||||
!$acc parallel loop
|
||||
! !$omp parallel do
|
||||
DO j = 1, ngk(1)
|
||||
orbital(j,ibnd) = fac*psio(j,1)
|
||||
IF (ibnd<last) orbital(j,ibnd+1) = fac*psio(j,2)
|
||||
ENDDO
|
||||
!$omp end parallel do
|
||||
! !$omp end parallel do
|
||||
ENDIF
|
||||
!
|
||||
DEALLOCATE( psio )
|
||||
|
@ -2388,6 +2397,8 @@ MODULE realus
|
|||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
CALL stop_clock( 'fwfft_orbital' )
|
||||
!
|
||||
END SUBROUTINE fwfft_orbital_gamma
|
||||
|
|
|
@ -335,7 +335,7 @@ CONTAINS
|
|||
!
|
||||
REAL(DP), ALLOCATABLE :: becp2(:,:)
|
||||
REAL(DP), ALLOCATABLE :: tg_dvrss(:)
|
||||
INTEGER :: v_siz, incr, ioff
|
||||
INTEGER :: v_siz, incr, ioff, ir, nnr_siz
|
||||
INTEGER :: ibnd_start_gamma, ibnd_end_gamma
|
||||
!
|
||||
incr = 2
|
||||
|
@ -352,7 +352,13 @@ CONTAINS
|
|||
!
|
||||
IF ( interaction ) THEN
|
||||
!
|
||||
CALL start_clock('interaction')
|
||||
nnr_siz= dffts%nnr
|
||||
!$acc data copyin(revc0(1:nnr_siz,1:nbnd,1), dvrss(1:nnr_siz)) copyout(psic(1:nnr_siz),evc1_new(1:npwx*npol,1:nbnd,1:nks))
|
||||
!
|
||||
CALL start_clock_gpu('interaction')
|
||||
!
|
||||
! nnr_siz= dffts%nnr
|
||||
! !$acc data copyin(revc0(1:nnr_siz,1:nbnd,1), dvrss(1:nnr_siz)) copyout(psic(1:nnr_siz))
|
||||
!
|
||||
IF (nkb > 0 .and. okvan) THEN
|
||||
! calculation of becp2
|
||||
|
@ -393,23 +399,25 @@ CONTAINS
|
|||
ENDDO
|
||||
!end: calculation of becp2
|
||||
ENDIF
|
||||
|
||||
IF ( dffts%has_task_groups ) THEN
|
||||
!
|
||||
v_siz = dffts%nnr_tg
|
||||
!
|
||||
incr = 2 * fftx_ntgrp(dffts)
|
||||
!
|
||||
ALLOCATE( tg_dvrss(1:v_siz) )
|
||||
tg_dvrss=0.0d0
|
||||
!
|
||||
CALL tg_gather(dffts, dvrss, tg_dvrss)
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
IF ( dffts%has_task_groups ) THEN
|
||||
!
|
||||
v_siz = dffts%nnr_tg
|
||||
!
|
||||
incr = 2 * fftx_ntgrp(dffts)
|
||||
!
|
||||
ALLOCATE( tg_dvrss(1:v_siz) )
|
||||
tg_dvrss=0.0d0
|
||||
!
|
||||
CALL tg_gather(dffts, dvrss, tg_dvrss)
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
! evc1_new is used as a container for the interaction
|
||||
!
|
||||
!$acc kernels
|
||||
evc1_new(:,:,:) = (0.0d0,0.0d0)
|
||||
!$acc end kernels
|
||||
!
|
||||
ibnd_start_gamma = ibnd_start
|
||||
IF (MOD(ibnd_start, 2)==0) ibnd_start_gamma = ibnd_start + 1
|
||||
|
@ -433,7 +441,9 @@ CONTAINS
|
|||
!
|
||||
ELSE
|
||||
!
|
||||
DO ir = 1,dffts%nnr
|
||||
!DO ir = 1,dffts%nnr
|
||||
!$acc parallel loop
|
||||
DO ir = 1, nnr_siz
|
||||
!
|
||||
psic(ir) = revc0(ir,ibnd,1)*CMPLX(dvrss(ir),0.0d0,DP)
|
||||
!
|
||||
|
@ -446,65 +456,65 @@ CONTAINS
|
|||
ENDIF
|
||||
!
|
||||
IF (real_space .and. okvan .and. nkb > 0) THEN
|
||||
!THE REAL SPACE PART (modified from s_psi)
|
||||
!fac = sqrt(omega)
|
||||
!
|
||||
ijkb0 = 0
|
||||
iqs = 0
|
||||
jqs = 0
|
||||
!
|
||||
DO nt = 1, ntyp
|
||||
!
|
||||
DO ia = 1, nat
|
||||
!THE REAL SPACE PART (modified from s_psi)
|
||||
!fac = sqrt(omega)
|
||||
!
|
||||
ijkb0 = 0
|
||||
iqs = 0
|
||||
jqs = 0
|
||||
!
|
||||
DO nt = 1, ntyp
|
||||
!
|
||||
DO ia = 1, nat
|
||||
!
|
||||
IF ( ityp(ia) == nt ) THEN
|
||||
!
|
||||
IF ( ityp(ia) == nt ) THEN
|
||||
!
|
||||
mbia = maxbox_beta(ia)
|
||||
ALLOCATE( w1(nh(nt)), w2(nh(nt)) )
|
||||
w1 = 0.D0
|
||||
w2 = 0.D0
|
||||
!
|
||||
DO ih = 1, nh(nt)
|
||||
!
|
||||
DO jh = 1, nh(nt)
|
||||
!
|
||||
jkb = ijkb0 + jh
|
||||
w1(ih) = w1(ih) + becp2(jkb, ibnd)
|
||||
IF ( ibnd+1 <= nbnd ) w2(ih) = w2(ih) + &
|
||||
mbia = maxbox_beta(ia)
|
||||
ALLOCATE( w1(nh(nt)), w2(nh(nt)) )
|
||||
w1 = 0.D0
|
||||
w2 = 0.D0
|
||||
!
|
||||
DO ih = 1, nh(nt)
|
||||
!
|
||||
DO jh = 1, nh(nt)
|
||||
!
|
||||
jkb = ijkb0 + jh
|
||||
w1(ih) = w1(ih) + becp2(jkb, ibnd)
|
||||
IF ( ibnd+1 <= nbnd ) w2(ih) = w2(ih) + &
|
||||
&becp2(jkb, ibnd+1)
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
!w1 = w1 * fac
|
||||
!w2 = w2 * fac
|
||||
ijkb0 = ijkb0 + nh(nt)
|
||||
!
|
||||
DO ih = 1, nh(nt)
|
||||
!
|
||||
DO ir = 1, mbia
|
||||
!
|
||||
iqs = jqs + ir
|
||||
psic( box_beta(box0(ia)+ir) ) = &
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
!w1 = w1 * fac
|
||||
!w2 = w2 * fac
|
||||
ijkb0 = ijkb0 + nh(nt)
|
||||
!
|
||||
DO ih = 1, nh(nt)
|
||||
!
|
||||
DO ir = 1, mbia
|
||||
!
|
||||
iqs = jqs + ir
|
||||
psic( box_beta(box0(ia)+ir) ) = &
|
||||
&psic( box_beta(box0(ia)+ir) ) + &
|
||||
&betasave(box0(ia)+ir,ih)*&
|
||||
&CMPLX( w1(ih), w2(ih), KIND=dp )
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
jqs = iqs
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
jqs = iqs
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
DEALLOCATE( w1, w2 )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ENDDO
|
||||
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
! Back to reciprocal space
|
||||
|
@ -514,7 +524,9 @@ CONTAINS
|
|||
ENDDO
|
||||
!
|
||||
#if defined(__MPI)
|
||||
!$acc host_data use_device(evc1_new)
|
||||
CALL mp_sum( evc1_new(:,:,1), inter_bgrp_comm )
|
||||
!$acc end host_data
|
||||
#endif
|
||||
IF (dffts%has_task_groups) DEALLOCATE (tg_dvrss)
|
||||
!
|
||||
|
@ -525,7 +537,9 @@ CONTAINS
|
|||
!
|
||||
ENDIF
|
||||
!
|
||||
CALL stop_clock('interaction')
|
||||
!!! !$acc end data
|
||||
CALL stop_clock_gpu('interaction')
|
||||
!$acc end data
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
|
|
|
@ -76,7 +76,7 @@ SUBROUTINE lr_calc_dens( evc1, response_calc )
|
|||
INTEGER :: ir, ik, ibnd, jbnd, ig, ijkb0, np, na
|
||||
INTEGER :: ijh,ih,jh,ikb,jkb,is
|
||||
INTEGER :: i, j, k, l
|
||||
INTEGER :: v_siz, nnr_siz
|
||||
INTEGER :: v_siz, nnr_siz, irho
|
||||
REAL(kind=dp) :: w1, w2, scal, rho_sum
|
||||
! These are temporary buffers for the response
|
||||
REAL(kind=dp), ALLOCATABLE :: rho_sum_resp_x(:), rho_sum_resp_y(:),&
|
||||
|
@ -95,7 +95,7 @@ SUBROUTINE lr_calc_dens( evc1, response_calc )
|
|||
v_siz = dfftp%nnr
|
||||
nnr_siz = dffts%nnr
|
||||
!
|
||||
!$acc data create(psic(1:v_siz)) copyin(evc1(1:npwx*npol,1:nbnd,1:nks),revc0(1:nnr_siz,1:nbnd,1:nksq)) copyout( rho_1(1:v_siz,1:nspin_mag))
|
||||
!$acc data create(psic(1:v_siz)) copyin(evc1(1:npwx*npol,1:nbnd,1:nks)) copyin(revc0(1:nnr_siz,1:nbnd,1:nksq)) copyout( rho_1(1:v_siz,1:nspin_mag))
|
||||
!
|
||||
!$acc kernels
|
||||
psic(:) = (0.0d0, 0.0d0)
|
||||
|
@ -181,13 +181,11 @@ SUBROUTINE lr_calc_dens( evc1, response_calc )
|
|||
!
|
||||
! The psic workspace can present a memory bottleneck
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
DEALLOCATE ( psic )
|
||||
!
|
||||
#if defined(__MPI)
|
||||
IF(gamma_only) THEN
|
||||
!$acc host_data use_device(rho_1)
|
||||
CALL mp_sum(rho_1, inter_pool_comm)
|
||||
!$acc end host_data
|
||||
ELSE
|
||||
CALL mp_sum(rho_1c, inter_pool_comm)
|
||||
ENDIF
|
||||
|
@ -200,7 +198,11 @@ SUBROUTINE lr_calc_dens( evc1, response_calc )
|
|||
DO is = 1, nspin_mag
|
||||
!
|
||||
rho_sum = 0.0d0
|
||||
rho_sum = SUM(rho_1(:,is))
|
||||
! rho_sum = SUM(rho_1(:,is))
|
||||
!$acc parallel loop private(rho_sum) copy(rho_sum)
|
||||
do irho = 1, v_siz
|
||||
rho_sum = rho_sum + rho_1(i,is)
|
||||
enddo
|
||||
!
|
||||
#if defined(__MPI)
|
||||
CALL mp_sum(rho_sum, intra_bgrp_comm )
|
||||
|
@ -234,6 +236,10 @@ SUBROUTINE lr_calc_dens( evc1, response_calc )
|
|||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
DEALLOCATE ( psic )
|
||||
!
|
||||
IF (charge_response == 2 .AND. LR_iteration /=0) THEN
|
||||
!
|
||||
ALLOCATE( rho_sum_resp_x( dfftp%nr1 ) )
|
||||
|
@ -378,7 +384,6 @@ CONTAINS
|
|||
USE mp, ONLY : mp_sum
|
||||
USE realus, ONLY : tg_psic
|
||||
USE fft_base, ONLY : dffts
|
||||
USE fft_wave, ONLY : wave_g2r
|
||||
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -413,10 +418,7 @@ CONTAINS
|
|||
ebnd = ibnd
|
||||
IF ( ibnd < nbnd ) ebnd = ebnd + 1
|
||||
!
|
||||
CALL wave_g2r( evc1(1:ngk(1),ibnd:ebnd,1), psic, dffts )
|
||||
! CALL invfft_orbital_gamma(evc1(:,:,1),ibnd,nbnd)
|
||||
!
|
||||
! !$acc update device(psic)
|
||||
CALL invfft_orbital_gamma(evc1(:,:,1),ibnd,nbnd)
|
||||
!
|
||||
IF (dffts%has_task_groups) THEN
|
||||
!
|
||||
|
|
Loading…
Reference in New Issue