stress_acc - Ewald

This commit is contained in:
fabrizio22 2022-07-25 17:03:46 +02:00
parent 9059a2aed6
commit 78bbaea11f
6 changed files with 86 additions and 397 deletions

View File

@ -274,7 +274,6 @@ set(src_pw
src/stres_mgga_gpu.f90
src/atomic_wfc_gpu.f90
src/dvloc_of_g_gpu.f90
src/stres_ewa_gpu.f90
src/stres_knl_gpu.f90
src/compute_deff_gpu.f90
src/add_vhub_to_deeq_gpu.f90

View File

@ -759,87 +759,7 @@ SUBROUTINE cutoff_stres_sigmaewa( alpha, sdewald, sigmaewa )
USE kinds
USE ions_base, ONLY : nat, zv, tau, ityp
USE constants, ONLY : e2, eps8
USE gvect, ONLY : ngm, g, gg, gstart
USE cell_base, ONLY : tpiba2, alat, omega, tpiba
USE io_global, ONLY : stdout
!
IMPLICIT NONE
!
REAL(DP), INTENT(IN) :: alpha
!! tuning param for LR/SR separation
REAL(DP), INTENT(INOUT) :: sigmaewa(3,3)
!! ewald contribution to stress
REAL(DP), INTENT(INOUT) :: sdewald
!! constant and diagonal terms
!
! ... local variables
!
INTEGER :: ng, na, l, m
REAL(DP) :: Gp, G2lzo2Gp, beta, sewald, g2, g2a, arg, fact
COMPLEX(DP) :: rhostar
!
! g(1) is a problem if it's G=0, because we divide by G^2.
! So start at gstart.
! fact=1.0d0, gamma_only not implemented
! G=0 componenent of the long-range part of the local part of the
! pseudopotminus the Hartree potential is set to 0.
! in other words, sdewald=0.
! sdewald is the last term in equation B1 of PRB 32 3792.
! See also similar comment for ewaldg in cutoff_ewald routine
!
sdewald = 0._DP
DO ng = gstart, ngm
Gp = SQRT( g(1,ng)**2 + g(2,ng)**2 )*tpiba
IF (Gp < eps8) THEN
G2lzo2Gp = 0._DP
beta = 0._DP
ELSE
G2lzo2Gp = gg(ng)*tpiba2*lz/2._DP/Gp
beta = G2lzo2Gp*(1._DP-cutoff_2D(ng))/cutoff_2D(ng)
ENDIF
g2 = gg(ng) * tpiba2
g2a = g2 / 4._DP / alpha
rhostar = (0._DP,0._DP)
DO na = 1, nat
arg = (g(1,ng) * tau(1,na) + g(2,ng) * tau(2,na) + &
g(3,ng) * tau(3,na) ) * tpi
rhostar = rhostar + zv (ityp(na) ) * CMPLX(COS(arg), SIN(arg), KIND=DP)
ENDDO
rhostar = rhostar / omega
sewald = tpi * e2 * EXP(-g2a) / g2* cutoff_2D(ng) * ABS(rhostar)**2
! ... sewald is an other diagonal term that is similar to the diagonal terms
! in the other stress contributions. It basically gives a term prop to
! the ewald energy
sdewald = sdewald-sewald
DO l = 1, 3
IF (l == 3) THEN
fact = (g2a + 1.0d0)
ELSE
fact = (1.0d0+g2a-beta)
ENDIF
!
DO m = 1, l
sigmaewa(l,m) = sigmaewa(l,m) + sewald * tpiba2 * 2.d0 * &
g(l,ng) * g(m,ng) / g2 * fact
ENDDO
ENDDO
!
ENDDO
!
RETURN
!
END SUBROUTINE cutoff_stres_sigmaewa
!
!----------------------------------------------------------------------
SUBROUTINE cutoff_stres_sigmaewa_gpu( alpha, sdewald, sigmaewa )
!----------------------------------------------------------------------
!! This subroutine cuts off the Ewald part of the stress.
!! See Eq. (64) in PRB 96 075448
!
USE kinds
USE ions_base, ONLY : nat, zv, tau, ityp
USE constants, ONLY : e2, eps8
USE gvect, ONLY : ngm, gstart, g_d, gg_d
USE gvect, ONLY : ngm, gstart, g, gg
USE cell_base, ONLY : tpiba2, alat, omega, tpiba
USE io_global, ONLY : stdout
!
@ -858,72 +778,62 @@ SUBROUTINE cutoff_stres_sigmaewa_gpu( alpha, sdewald, sigmaewa )
REAL(DP) :: Gp, G2lzo2Gp, beta, sewald, g2, g2a, arg, fact
REAL(DP) :: sigma11, sigma21, sigma22, sigma31, sigma32, sigma33
COMPLEX(DP) :: rhostar
!
INTEGER , ALLOCATABLE :: ityp_d(:)
REAL(DP), ALLOCATABLE :: cutoff2D_d(:), tau_d(:,:), zv_d(:)
!
#if defined(__CUDA)
attributes(DEVICE) :: cutoff2D_d, tau_d, zv_d, ityp_d
#endif
!
ntyp = SIZE(zv)
ALLOCATE( cutoff2D_d(ngm), tau_d(3,nat), zv_d(ntyp) )
ALLOCATE( ityp_d(nat) )
cutoff2D_d = cutoff_2D
tau_d = tau
zv_d = zv
ityp_d = ityp
! g(1) is a problem if it's G=0, because we divide by G^2.
! So start at gstart.
! fact=1.0d0, gamma_only not implemented
! G=0 componenent of the long-range part of the local part of the
! pseudopotminus the Hartree potential is set to 0.
! in other words, sdewald=0.
! sdewald is the last term in equation B1 of PRB 32 3792.
! See also similar comment for ewaldg in cutoff_ewald routine
!
! ... g(1) is a problem if it's G=0, because we divide by G^2.
! So start at gstart.
! fact=1.0d0, gamma_only not implemented
! G=0 componenent of the long-range part of the local part of the
! pseudopotminus the Hartree potential is set to 0.
! in other words, sdewald=0.
! sdewald is the last term in equation B1 of PRB 32 3792.
! See also similar comment for ewaldg in cutoff_ewald routine
!
sigma11 = 0._DP ; sigma21 = 0._DP ; sigma22 = 0._DP
sigma31 = 0._DP ; sigma32 = 0._DP ; sigma33 = 0._DP
!
sdewald = 0._DP
!
!$cuf kernel do (1) <<<*,*>>>
!$acc parallel loop copyin(g,gg,cutoff_2D,tau,zv,ityp) &
!$acc& reduction(+:sigma11,sigma21,sigma22,sigma31,sigma32, &
!$acc& sigma33)
DO ng = gstart, ngm
Gp = SQRT( g_d(1,ng)**2 + g_d(2,ng)**2 )*tpiba
Gp = SQRT( g(1,ng)**2 + g(2,ng)**2 )*tpiba
IF (Gp < eps8) THEN
G2lzo2Gp = 0._DP
beta = 0._DP
ELSE
G2lzo2Gp = gg_d(ng)*tpiba2*lz/2._DP/Gp
beta = G2lzo2Gp*(1._DP-cutoff2D_d(ng))/cutoff2D_d(ng)
G2lzo2Gp = gg(ng)*tpiba2*lz/2._DP/Gp
beta = G2lzo2Gp*(1._DP-cutoff_2D(ng))/cutoff_2D(ng)
ENDIF
g2 = gg_d(ng) * tpiba2
g2 = gg(ng) * tpiba2
g2a = g2 / 4._DP / alpha
rhostar = (0._DP,0._DP)
DO na = 1, nat
arg = (g_d(1,ng) * tau_d(1,na) + g_d(2,ng) * tau_d(2,na) + &
g_d(3,ng) * tau_d(3,na) ) * tpi
rhostar = rhostar + CMPLX(zv_d(ityp_d(na))) * CMPLX(COS(arg),SIN(arg),KIND=DP)
arg = (g(1,ng) * tau(1,na) + g(2,ng) * tau(2,na) + &
g(3,ng) * tau(3,na) ) * tpi
rhostar = rhostar + CMPLX(zv(ityp(na))) * CMPLX(COS(arg),SIN(arg),KIND=DP)
ENDDO
rhostar = rhostar / CMPLX(omega)
sewald = tpi * e2 * EXP(-g2a) / g2* cutoff2D_d(ng) * ABS(rhostar)**2
sewald = tpi * e2 * EXP(-g2a) / g2* cutoff_2D(ng) * ABS(rhostar)**2
! ... sewald is an other diagonal term that is similar to the diagonal terms
! in the other stress contributions. It basically gives a term prop to
! the ewald energy
! in the other stress contributions. It basically gives a term prop to
! the ewald energy
!
sdewald = sdewald - sewald
sigma11 = sigma11 + sewald * tpiba2 * 2._DP * &
g_d(1,ng) * g_d(1,ng) / g2 * (1._DP+g2a-beta)
g(1,ng) * g(1,ng) / g2 * (1._DP+g2a-beta)
sigma21 = sigma21 + sewald * tpiba2 * 2._DP * &
g_d(2,ng) * g_d(1,ng) / g2 * (1._DP+g2a-beta)
g(2,ng) * g(1,ng) / g2 * (1._DP+g2a-beta)
sigma22 = sigma22 + sewald * tpiba2 * 2._DP * &
g_d(2,ng) * g_d(2,ng) / g2 * (1._DP+g2a-beta)
g(2,ng) * g(2,ng) / g2 * (1._DP+g2a-beta)
sigma31 = sigma31 + sewald * tpiba2 * 2._DP * &
g_d(3,ng) * g_d(1,ng) / g2 * (g2a+1._DP)
g(3,ng) * g(1,ng) / g2 * (g2a+1._DP)
sigma32 = sigma32 + sewald * tpiba2 * 2._DP * &
g_d(3,ng) * g_d(2,ng) / g2 * (g2a+1._DP)
g(3,ng) * g(2,ng) / g2 * (g2a+1._DP)
sigma33 = sigma33 + sewald * tpiba2 * 2._DP * &
g_d(3,ng) * g_d(3,ng) / g2 * (g2a+1._DP)
g(3,ng) * g(3,ng) / g2 * (g2a+1._DP)
!
ENDDO
!
@ -934,11 +844,8 @@ SUBROUTINE cutoff_stres_sigmaewa_gpu( alpha, sdewald, sigmaewa )
sigmaewa(3,2) = sigmaewa(3,2) + sigma32
sigmaewa(3,3) = sigmaewa(3,3) + sigma33
!
DEALLOCATE( cutoff2D_d, tau_d, zv_d )
DEALLOCATE( ityp_d )
!
RETURN
!
END SUBROUTINE cutoff_stres_sigmaewa_gpu
END SUBROUTINE cutoff_stres_sigmaewa
!
END MODULE Coul_cut_2D

View File

@ -296,7 +296,6 @@ PWLIBS += \
stres_mgga_gpu.o \
stres_cc_gpu.o \
deriv_drhoc_gpu.o \
stres_ewa_gpu.o \
stres_knl_gpu.o \
stres_us_gpu.o \
compute_deff_gpu.o \

View File

@ -53,7 +53,7 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
REAL(DP), INTENT(IN) :: gcutm
!! input: cut-off of g vectors
REAL(DP), INTENT(OUT) :: sigmaewa(3,3)
! output: the ewald stress
!! output: the ewald stress
!
! ... local variables
!
@ -85,6 +85,9 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
! diagonal term
! nondiagonal term
COMPLEX(DP) :: rhostar
REAL(DP) :: sigma11, sigma21, sigma22, sigma31, sigma32, sigma33
!
!$acc data present_or_copyin( g, gg )
!
tpiba2 = (tpi / alat)**2
sigmaewa(:,:) = 0.d0
@ -94,15 +97,15 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
charge = charge + zv(ityp(na))
ENDDO
!
! choose alpha in order to have convergence in the sum over G
! upperbound is a safe upper bound for the error ON THE ENERGY
! ... choose alpha in order to have convergence in the sum over G
! upperbound is a safe upper bound for the error ON THE ENERGY
!
alpha = 2.9d0
12 alpha = alpha - 0.1d0
!
IF (alpha==0.0) CALL errore( 'stres_ew', 'optimal alpha not found', 1 )
upperbound = e2 * charge**2 * SQRT(2 * alpha / tpi) * &
erfc ( SQRT(tpiba2 * gcutm / 4.0d0 / alpha) )
ERFC( SQRT(tpiba2 * gcutm / 4.0d0 / alpha) )
!
IF (upperbound > 1d-7) GOTO 12
!
@ -118,40 +121,70 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
!
! sdewald is the diagonal term
IF (gamma_only) THEN
fact = 2.d0
fact = 2.d0
ELSE
fact = 1.d0
ENDIF
!
IF (do_cutoff_2D) THEN
!
CALL cutoff_stres_sigmaewa( alpha, sdewald, sigmaewa )
!
ELSE
!$omp parallel do default(none) shared(gstart, ngm, g, gg, tpiba2, alpha, tau, zv, ityp, nat, omega, fact)&
!$omp &private(g2, g2a, rhostar, na, arg, l, m, sewald)&
!$omp &reduction(+:sigmaewa,sdewald)
!
sigma11 = 0._DP ; sigma21 = 0._DP ; sigma22 = 0._DP
sigma31 = 0._DP ; sigma32 = 0._DP ; sigma33 = 0._DP
!
#if !defined(_OPENACC)
!$omp parallel do default(none) shared(gstart, ngm, g, gg, tpiba2, alpha, tau,&
!$omp nat, zv, ityp, omega, fact) private(g2,g2a, rhostar, na, arg,&
!$omp sewald) reduction(+:sdewald,sigma11,sigma21,sigma22,sigma31,&
!$omp sigma32,sigma33)
#else
!$acc parallel loop copyin(tau,zv,ityp) reduction(+:sigma11,sigma21,sigma22,&
!$acc sigma31,sigma32,sigma33) reduction(-:sdewald)
#endif
DO ng = gstart, ngm
g2 = gg (ng) * tpiba2
g2a = g2 / 4.d0 / alpha
rhostar = (0.d0, 0.d0)
g2 = gg(ng) * tpiba2
g2a = g2 / 4._DP / alpha
rhostar = (0._DP,0._DP)
DO na = 1, nat
arg = (g(1,ng) * tau(1,na) + g(2,ng) * tau(2,na) + &
g(3,ng) * tau(3,na) ) * tpi
rhostar = rhostar + zv(ityp(na)) * CMPLX(COS(arg), SIN(arg), KIND=DP)
rhostar = rhostar + CMPLX(zv(ityp(na))) * CMPLX(COS(arg), SIN(arg), KIND=DP)
ENDDO
rhostar = rhostar / omega
rhostar = rhostar / CMPLX(omega)
sewald = fact * tpi * e2 * EXP(-g2a) / g2 * ABS(rhostar)**2
sdewald = sdewald - sewald
DO l = 1, 3
DO m = 1, l
sigmaewa(l,m) = sigmaewa(l,m) + sewald * tpiba2 * 2.d0 * &
g(l,ng) * g(m,ng) / g2 * (g2a + 1)
ENDDO
ENDDO
!
sigma11 = sigma11 + sewald * tpiba2 * 2._DP * &
g(1,ng) * g(1,ng) / g2 * (g2a + 1)
sigma21 = sigma21 + sewald * tpiba2 * 2._DP * &
g(2,ng) * g(1,ng) / g2 * (g2a + 1)
sigma22 = sigma22 + sewald * tpiba2 * 2._DP * &
g(2,ng) * g(2,ng) / g2 * (g2a + 1)
sigma31 = sigma31 + sewald * tpiba2 * 2._DP * &
g(3,ng) * g(1,ng) / g2 * (g2a + 1)
sigma32 = sigma32 + sewald * tpiba2 * 2._DP * &
g(3,ng) * g(2,ng) / g2 * (g2a + 1)
sigma33 = sigma33 + sewald * tpiba2 * 2._DP * &
g(3,ng) * g(3,ng) / g2 * (g2a + 1)
ENDDO
#if !defined(_OPENACC)
!$omp end parallel do
#endif
!
sigmaewa(1,1) = sigmaewa(1,1) + sigma11
sigmaewa(2,1) = sigmaewa(2,1) + sigma21
sigmaewa(2,2) = sigmaewa(2,2) + sigma22
sigmaewa(3,1) = sigmaewa(3,1) + sigma31
sigmaewa(3,2) = sigmaewa(3,2) + sigma32
sigmaewa(3,3) = sigmaewa(3,3) + sigma33
!
ENDIF
!
!$acc end data
!
DO l = 1, 3
sigmaewa(l,l) = sigmaewa(l,l) + sdewald
ENDDO

View File

@ -1,246 +0,0 @@
!
! Copyright (C) 2001-2009 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
!
!-----------------------------------------------------------------------
SUBROUTINE stres_ewa_gpu( alat, nat, ntyp, ityp, zv, at, bg, tau, &
omega, g_d, gg_d, ngm, gstart, gamma_only, &
gcutm, sigmaewa )
!---------------------------------------------------------------------
!! Ewald contribution. Both real- and reciprocal-space terms are
!! present.
!
USE kinds
USE constants, ONLY : tpi, e2, eps6
USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
USE mp, ONLY : mp_sum
USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_stres_sigmaewa_gpu
!
IMPLICIT NONE
!
INTEGER :: nat
!! input: number of atoms in the unit cell
INTEGER :: ntyp
!! input: number of different types of atoms
INTEGER :: ityp(nat)
!! input: the type of each atom
INTEGER :: ngm
!! input: number of plane waves for G sum
INTEGER :: gstart
!! input: first nonzero g vector
LOGICAL, INTENT(IN) :: gamma_only
!! gamma point only
REAL(DP), INTENT(IN) :: tau(3,nat)
!! input: the positions of the atoms in the cell
REAL(DP), INTENT(IN) :: g_d(3,ngm)
!! input: the coordinates of G vectors
REAL(DP), INTENT(IN) :: gg_d(ngm)
!! input: the square moduli of G vectors
REAL(DP), INTENT(IN) :: zv(ntyp)
!! input: the charge of each type of atoms
REAL(DP), INTENT(IN) :: at(3,3)
!! input: the direct lattice vectors
REAL(DP), INTENT(IN) :: bg(3,3)
!! input: the reciprocal lattice vectors
REAL(DP), INTENT(IN) :: omega
!! input: the volume of the unit cell
REAL(DP), INTENT(IN) :: alat
!! input: measure of length
REAL(DP), INTENT(IN) :: gcutm
!! input: cut-off of g vectors
REAL(DP), INTENT(OUT) :: sigmaewa(3,3)
! output: the ewald stress
!
! ... local variables
!
INTEGER, PARAMETER :: mxr = 50
! the maximum number of R vectors included in r sum
INTEGER :: ng, nr, na, nb, l, m, nrm
! counter over reciprocal G vectors
! counter over direct vectors
! counter on atoms
! counter on atoms
! counter on atoms
! number of R vectors included in r sum
INTEGER :: na_s, na_e, mykey
!
REAL(DP) :: charge, arg, tpiba2, dtau(3), alpha, r(3,mxr), &
r2(mxr), rmax, rr, upperbound, fact, fac, g2, g2a, &
sdewald, sewald
! total ionic charge in the cell
! the argument of the phase
! length in reciprocal space
! the difference tau_s - tau_s'
! alpha term in ewald sum
! input of the rgen routine ( not used here )
! the square modulus of R_j-tau_s-tau_s'
! the maximum radius to consider real space sum
! buffer variable
! used to optimize alpha
! auxiliary variables
! diagonal term
! nondiagonal term
!
INTEGER :: ierr(2)
REAL(DP) :: sigma11, sigma21, sigma22, sigma31, sigma32, sigma33
COMPLEX(DP) :: rhostar
!
INTEGER, ALLOCATABLE :: ityp_d(:)
REAL(DP), ALLOCATABLE :: zv_d(:), tau_d(:,:)
!
#if defined(__CUDA)
attributes(DEVICE) :: g_d, gg_d, zv_d, ityp_d, tau_d
#endif
!
tpiba2 = (tpi / alat)**2
sigmaewa(:,:) = 0._DP
charge = 0._DP
!
ALLOCATE( zv_d(ntyp), tau_d(3,nat) )
zv_d = zv
tau_d = tau
ALLOCATE( ityp_d(nat) )
ityp_d = ityp
!
DO na = 1, nat
charge = charge + zv(ityp(na))
ENDDO
!
! choose alpha in order to have convergence in the sum over G
! upperbound is a safe upper bound for the error ON THE ENERGY
!
alpha = 2.9_DP
12 alpha = alpha - 0.1_DP
!
IF (alpha==0.0) CALL errore( 'stres_ew', 'optimal alpha not found', 1 )
upperbound = e2 * charge**2 * SQRT(2 * alpha / tpi) * &
erfc ( SQRT(tpiba2 * gcutm / 4._DP / alpha) )
!
IF (upperbound > 1d-7) GOTO 12
!
! G-space sum here
!
! Determine if this processor contains G=0 and set the constant term
!
IF (gstart == 2) THEN
sdewald = tpi * e2 / 4._DP / alpha * (charge / omega)**2
ELSE
sdewald = 0._DP
ENDIF
!
! sdewald is the diagonal term
IF ( gamma_only ) THEN
fact = 2._DP
ELSE
fact = 1._DP
ENDIF
!
IF ( do_cutoff_2D ) THEN
!
CALL cutoff_stres_sigmaewa_gpu( alpha, sdewald, sigmaewa )
!
ELSE
!
sigma11 = 0._DP ; sigma21 = 0._DP ; sigma22 = 0._DP
sigma31 = 0._DP ; sigma32 = 0._DP ; sigma33 = 0._DP
!
!$cuf kernel do (1) <<<*,*>>>
DO ng = gstart, ngm
g2 = gg_d(ng) * tpiba2
g2a = g2 / 4._DP / alpha
rhostar = (0._DP,0._DP)
DO na = 1, nat
arg = (g_d(1,ng) * tau_d(1,na) + g_d(2,ng) * tau_d(2,na) + &
g_d(3,ng) * tau_d(3,na) ) * tpi
rhostar = rhostar + CMPLX(zv_d(ityp_d(na))) * CMPLX(COS(arg), SIN(arg), KIND=DP)
ENDDO
rhostar = rhostar / CMPLX(omega)
sewald = fact * tpi * e2 * EXP(-g2a) / g2 * ABS(rhostar)**2
sdewald = sdewald - sewald
!
sigma11 = sigma11 + sewald * tpiba2 * 2._DP * &
g_d(1,ng) * g_d(1,ng) / g2 * (g2a + 1)
sigma21 = sigma21 + sewald * tpiba2 * 2._DP * &
g_d(2,ng) * g_d(1,ng) / g2 * (g2a + 1)
sigma22 = sigma22 + sewald * tpiba2 * 2._DP * &
g_d(2,ng) * g_d(2,ng) / g2 * (g2a + 1)
sigma31 = sigma31 + sewald * tpiba2 * 2._DP * &
g_d(3,ng) * g_d(1,ng) / g2 * (g2a + 1)
sigma32 = sigma32 + sewald * tpiba2 * 2._DP * &
g_d(3,ng) * g_d(2,ng) / g2 * (g2a + 1)
sigma33 = sigma33 + sewald * tpiba2 * 2._DP * &
g_d(3,ng) * g_d(3,ng) / g2 * (g2a + 1)
!
ENDDO
!
sigmaewa(1,1) = sigmaewa(1,1) + sigma11
sigmaewa(2,1) = sigmaewa(2,1) + sigma21
sigmaewa(2,2) = sigmaewa(2,2) + sigma22
sigmaewa(3,1) = sigmaewa(3,1) + sigma31
sigmaewa(3,2) = sigmaewa(3,2) + sigma32
sigmaewa(3,3) = sigmaewa(3,3) + sigma33
!
ENDIF
!
DO l = 1, 3
sigmaewa(l,l) = sigmaewa(l,l) + sdewald
ENDDO
!
! R-space sum here (see ewald.f90 for details on parallelization)
!
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
!
IF ( mykey == 0 ) THEN
rmax = 4.0d0 / SQRT(alpha) / alat
!
! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1
!
DO na = na_s, na_e
DO nb = 1, nat
dtau(:) = tau(:,na) - tau(:,nb)
!
! generates nearest-neighbors shells r(i)=R(i)-dtau(i)
!
CALL rgen( dtau, rmax, mxr, at, bg, r, r2, nrm )
!
DO nr = 1, nrm
rr = SQRT(r2 (nr) ) * alat
fac = - e2 / 2.0_DP/ omega * alat**2 * zv(ityp(na)) * &
zv(ityp(nb)) / rr**3 * (erfc(SQRT(alpha) * rr) + &
rr * SQRT(8.0_DP * alpha / tpi) * EXP( - alpha * rr**2) )
DO l = 1, 3
DO m = 1, l
sigmaewa(l,m) = sigmaewa(l,m) + fac * r(l,nr) * r(m,nr)
ENDDO
ENDDO
ENDDO
!
ENDDO
ENDDO
ENDIF
!
DO l = 1, 3
DO m = 1, l - 1
sigmaewa(m,l) = sigmaewa(l,m)
ENDDO
ENDDO
!
DO l = 1, 3
DO m = 1, 3
sigmaewa(l,m) = - sigmaewa(l,m)
ENDDO
ENDDO
!
DEALLOCATE( zv_d, tau_d )
DEALLOCATE( ityp_d )
!
CALL mp_sum( sigmaewa, intra_bgrp_comm )
!
RETURN
!
END SUBROUTINE stres_ewa_gpu

View File

@ -122,13 +122,10 @@ SUBROUTINE stress( sigma )
IF ( do_comp_esm .AND. ( esm_bc /= 'pbc' ) ) THEN ! for ESM stress
CALL esm_stres_ewa( sigmaewa )
ELSE
IF (.NOT. use_gpu) CALL stres_ewa( alat, nat, ntyp, ityp, zv, at, &
bg, tau, omega, g, gg, ngm, gstart, &
gamma_only, gcutm, sigmaewa )
IF ( use_gpu) CALL stres_ewa_gpu( alat, nat, ntyp, ityp, zv, at, bg,&
tau, omega, g_d,gg_d, ngm, gstart,&
gamma_only, gcutm, sigmaewa )
END IF
CALL stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, &
tau, omega, g, gg, ngm, gstart, &
gamma_only, gcutm, sigmaewa )
ENDIF
!
! semi-empirical dispersion contribution: Grimme-D2 and D3
!