mirror of https://gitlab.com/QEF/q-e.git
stress_acc - gg array on acc
This commit is contained in:
parent
78bbaea11f
commit
3c3e92e7a2
|
@ -120,7 +120,7 @@
|
|||
ALLOCATE( igtongl_d(ngm) )
|
||||
ALLOCATE( gl_d(ngm) )
|
||||
ENDIF
|
||||
!$acc enter data create( mill(1:3, 1:ngm), g(1:3, 1:ngm) )
|
||||
!$acc enter data create( mill(1:3,1:ngm), g(1:3,1:ngm), gg(1:ngm) )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
@ -142,7 +142,10 @@
|
|||
END IF
|
||||
!
|
||||
!
|
||||
IF( ALLOCATED( gg ) ) DEALLOCATE( gg )
|
||||
IF( ALLOCATED( gg ) ) THEN
|
||||
!$acc exit data delete(gg)
|
||||
DEALLOCATE( gg )
|
||||
END IF
|
||||
IF( ALLOCATED( g ) ) THEN
|
||||
!$acc exit data delete(g)
|
||||
DEALLOCATE( g )
|
||||
|
|
|
@ -795,7 +795,7 @@ SUBROUTINE cutoff_stres_sigmaewa( alpha, sdewald, sigmaewa )
|
|||
!
|
||||
sdewald = 0._DP
|
||||
!
|
||||
!$acc parallel loop copyin(g,gg,cutoff_2D,tau,zv,ityp) &
|
||||
!$acc parallel loop copyin(cutoff_2D,tau,zv,ityp) &
|
||||
!$acc& reduction(+:sigma11,sigma21,sigma22,sigma31,sigma32, &
|
||||
!$acc& sigma33)
|
||||
DO ng = gstart, ngm
|
||||
|
|
|
@ -98,7 +98,7 @@ SUBROUTINE init_run()
|
|||
gg_d = gg
|
||||
END IF
|
||||
#endif
|
||||
!$acc update device(mill, g)
|
||||
!$acc update device(mill, g, gg)
|
||||
!
|
||||
IF (do_comp_esm) CALL esm_init(.NOT. lrism)
|
||||
!
|
||||
|
|
|
@ -230,10 +230,10 @@ SUBROUTINE post_xml_init ( )
|
|||
g_d = g
|
||||
gg_d = gg
|
||||
#endif
|
||||
!$acc update device(mill, g)
|
||||
!$acc update device(mill, g, gg)
|
||||
!
|
||||
CALL ggens( dffts, gamma_only, at, g, gg, mill, gcutms, ngms )
|
||||
CALL gshells ( lmovecell )
|
||||
CALL gshells ( lmovecell )
|
||||
!
|
||||
IF (do_comp_esm) CALL esm_init()
|
||||
IF (do_cutoff_2D) CALL cutoff_fact()
|
||||
|
|
|
@ -87,7 +87,7 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
COMPLEX(DP) :: rhostar
|
||||
REAL(DP) :: sigma11, sigma21, sigma22, sigma31, sigma32, sigma33
|
||||
!
|
||||
!$acc data present_or_copyin( g, gg )
|
||||
!$acc data present( g, gg )
|
||||
!
|
||||
tpiba2 = (tpi / alat)**2
|
||||
sigmaewa(:,:) = 0.d0
|
||||
|
@ -109,23 +109,22 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
!
|
||||
IF (upperbound > 1d-7) GOTO 12
|
||||
!
|
||||
! G-space sum here
|
||||
!
|
||||
! Determine if this processor contains G=0 and set the constant term
|
||||
!
|
||||
! ... Determine if this processor contains G=0 and set the constant term
|
||||
! sdewald is the diagonal term
|
||||
IF (gstart == 2) THEN
|
||||
sdewald = tpi * e2 / 4.d0 / alpha * (charge / omega)**2
|
||||
ELSE
|
||||
sdewald = 0.d0
|
||||
ENDIF
|
||||
!
|
||||
! sdewald is the diagonal term
|
||||
IF (gamma_only) THEN
|
||||
fact = 2.d0
|
||||
ELSE
|
||||
fact = 1.d0
|
||||
ENDIF
|
||||
!
|
||||
! ... G-space sum here below
|
||||
!
|
||||
IF (do_cutoff_2D) THEN
|
||||
!
|
||||
CALL cutoff_stres_sigmaewa( alpha, sdewald, sigmaewa )
|
||||
|
@ -189,14 +188,14 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
sigmaewa(l,l) = sigmaewa(l,l) + sdewald
|
||||
ENDDO
|
||||
!
|
||||
! R-space sum here (see ewald.f90 for details on parallelization)
|
||||
! ... R-space sum here (see ewald.f90 for details on parallelization)
|
||||
!
|
||||
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
|
||||
!
|
||||
IF ( mykey == 0 ) THEN
|
||||
rmax = 4.0d0 / SQRT(alpha) / alat
|
||||
!
|
||||
! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1
|
||||
! ... with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1
|
||||
!
|
||||
!$omp parallel do default(none) shared(na_s, na_e, nat, tau, rmax, at, bg, alat, ityp, alpha, omega, zv)&
|
||||
!$omp &private(nb, dtau, r, r2, nrm, nr, rr, fac, l, m)&
|
||||
|
@ -205,7 +204,7 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
DO nb = 1, nat
|
||||
dtau(:) = tau(:,na) - tau(:,nb)
|
||||
!
|
||||
! generates nearest-neighbors shells r(i)=R(i)-dtau(i)
|
||||
! ... generates nearest-neighbors shells r(i)=R(i)-dtau(i)
|
||||
!
|
||||
CALL rgen( dtau, rmax, mxr, at, bg, r, r2, nrm )
|
||||
!
|
||||
|
|
|
@ -70,6 +70,10 @@ SUBROUTINE stress( sigma )
|
|||
!
|
||||
CALL start_clock( 'stress' )
|
||||
!
|
||||
! --------------- ... provisional ... ---------------
|
||||
!$acc update device( g, gg )
|
||||
! -------------------------------------------
|
||||
!
|
||||
! contribution from local potential
|
||||
!
|
||||
IF (.NOT. use_gpu) CALL stres_loc( sigmaloc )
|
||||
|
@ -88,13 +92,13 @@ SUBROUTINE stress( sigma )
|
|||
ELSE
|
||||
IF (.NOT. use_gpu) CALL stres_har( sigmahar )
|
||||
IF ( use_gpu) CALL stres_har_gpu( sigmahar )
|
||||
END IF
|
||||
ENDIF
|
||||
!
|
||||
! xc contribution (diagonal)
|
||||
!
|
||||
sigmaxc(:,:) = 0.d0
|
||||
DO l = 1, 3
|
||||
sigmaxc (l, l) = - (etxc - vtxc) / omega
|
||||
sigmaxc(l,l) = - (etxc - vtxc) / omega
|
||||
ENDDO
|
||||
!
|
||||
! xc contribution: add gradient corrections (non diagonal)
|
||||
|
|
Loading…
Reference in New Issue