mirror of https://gitlab.com/QEF/q-e.git
stress_acc - Ewald
This commit is contained in:
parent
9059a2aed6
commit
78bbaea11f
|
@ -274,7 +274,6 @@ set(src_pw
|
|||
src/stres_mgga_gpu.f90
|
||||
src/atomic_wfc_gpu.f90
|
||||
src/dvloc_of_g_gpu.f90
|
||||
src/stres_ewa_gpu.f90
|
||||
src/stres_knl_gpu.f90
|
||||
src/compute_deff_gpu.f90
|
||||
src/add_vhub_to_deeq_gpu.f90
|
||||
|
|
|
@ -759,87 +759,7 @@ SUBROUTINE cutoff_stres_sigmaewa( alpha, sdewald, sigmaewa )
|
|||
USE kinds
|
||||
USE ions_base, ONLY : nat, zv, tau, ityp
|
||||
USE constants, ONLY : e2, eps8
|
||||
USE gvect, ONLY : ngm, g, gg, gstart
|
||||
USE cell_base, ONLY : tpiba2, alat, omega, tpiba
|
||||
USE io_global, ONLY : stdout
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
REAL(DP), INTENT(IN) :: alpha
|
||||
!! tuning param for LR/SR separation
|
||||
REAL(DP), INTENT(INOUT) :: sigmaewa(3,3)
|
||||
!! ewald contribution to stress
|
||||
REAL(DP), INTENT(INOUT) :: sdewald
|
||||
!! constant and diagonal terms
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: ng, na, l, m
|
||||
REAL(DP) :: Gp, G2lzo2Gp, beta, sewald, g2, g2a, arg, fact
|
||||
COMPLEX(DP) :: rhostar
|
||||
!
|
||||
! g(1) is a problem if it's G=0, because we divide by G^2.
|
||||
! So start at gstart.
|
||||
! fact=1.0d0, gamma_only not implemented
|
||||
! G=0 componenent of the long-range part of the local part of the
|
||||
! pseudopotminus the Hartree potential is set to 0.
|
||||
! in other words, sdewald=0.
|
||||
! sdewald is the last term in equation B1 of PRB 32 3792.
|
||||
! See also similar comment for ewaldg in cutoff_ewald routine
|
||||
!
|
||||
sdewald = 0._DP
|
||||
DO ng = gstart, ngm
|
||||
Gp = SQRT( g(1,ng)**2 + g(2,ng)**2 )*tpiba
|
||||
IF (Gp < eps8) THEN
|
||||
G2lzo2Gp = 0._DP
|
||||
beta = 0._DP
|
||||
ELSE
|
||||
G2lzo2Gp = gg(ng)*tpiba2*lz/2._DP/Gp
|
||||
beta = G2lzo2Gp*(1._DP-cutoff_2D(ng))/cutoff_2D(ng)
|
||||
ENDIF
|
||||
g2 = gg(ng) * tpiba2
|
||||
g2a = g2 / 4._DP / alpha
|
||||
rhostar = (0._DP,0._DP)
|
||||
DO na = 1, nat
|
||||
arg = (g(1,ng) * tau(1,na) + g(2,ng) * tau(2,na) + &
|
||||
g(3,ng) * tau(3,na) ) * tpi
|
||||
rhostar = rhostar + zv (ityp(na) ) * CMPLX(COS(arg), SIN(arg), KIND=DP)
|
||||
ENDDO
|
||||
rhostar = rhostar / omega
|
||||
sewald = tpi * e2 * EXP(-g2a) / g2* cutoff_2D(ng) * ABS(rhostar)**2
|
||||
! ... sewald is an other diagonal term that is similar to the diagonal terms
|
||||
! in the other stress contributions. It basically gives a term prop to
|
||||
! the ewald energy
|
||||
sdewald = sdewald-sewald
|
||||
DO l = 1, 3
|
||||
IF (l == 3) THEN
|
||||
fact = (g2a + 1.0d0)
|
||||
ELSE
|
||||
fact = (1.0d0+g2a-beta)
|
||||
ENDIF
|
||||
!
|
||||
DO m = 1, l
|
||||
sigmaewa(l,m) = sigmaewa(l,m) + sewald * tpiba2 * 2.d0 * &
|
||||
g(l,ng) * g(m,ng) / g2 * fact
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE cutoff_stres_sigmaewa
|
||||
!
|
||||
!----------------------------------------------------------------------
|
||||
SUBROUTINE cutoff_stres_sigmaewa_gpu( alpha, sdewald, sigmaewa )
|
||||
!----------------------------------------------------------------------
|
||||
!! This subroutine cuts off the Ewald part of the stress.
|
||||
!! See Eq. (64) in PRB 96 075448
|
||||
!
|
||||
USE kinds
|
||||
USE ions_base, ONLY : nat, zv, tau, ityp
|
||||
USE constants, ONLY : e2, eps8
|
||||
USE gvect, ONLY : ngm, gstart, g_d, gg_d
|
||||
USE gvect, ONLY : ngm, gstart, g, gg
|
||||
USE cell_base, ONLY : tpiba2, alat, omega, tpiba
|
||||
USE io_global, ONLY : stdout
|
||||
!
|
||||
|
@ -858,72 +778,62 @@ SUBROUTINE cutoff_stres_sigmaewa_gpu( alpha, sdewald, sigmaewa )
|
|||
REAL(DP) :: Gp, G2lzo2Gp, beta, sewald, g2, g2a, arg, fact
|
||||
REAL(DP) :: sigma11, sigma21, sigma22, sigma31, sigma32, sigma33
|
||||
COMPLEX(DP) :: rhostar
|
||||
!
|
||||
INTEGER , ALLOCATABLE :: ityp_d(:)
|
||||
REAL(DP), ALLOCATABLE :: cutoff2D_d(:), tau_d(:,:), zv_d(:)
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
attributes(DEVICE) :: cutoff2D_d, tau_d, zv_d, ityp_d
|
||||
#endif
|
||||
!
|
||||
ntyp = SIZE(zv)
|
||||
ALLOCATE( cutoff2D_d(ngm), tau_d(3,nat), zv_d(ntyp) )
|
||||
ALLOCATE( ityp_d(nat) )
|
||||
cutoff2D_d = cutoff_2D
|
||||
tau_d = tau
|
||||
zv_d = zv
|
||||
ityp_d = ityp
|
||||
! g(1) is a problem if it's G=0, because we divide by G^2.
|
||||
! So start at gstart.
|
||||
! fact=1.0d0, gamma_only not implemented
|
||||
! G=0 componenent of the long-range part of the local part of the
|
||||
! pseudopotminus the Hartree potential is set to 0.
|
||||
! in other words, sdewald=0.
|
||||
! sdewald is the last term in equation B1 of PRB 32 3792.
|
||||
! See also similar comment for ewaldg in cutoff_ewald routine
|
||||
!
|
||||
! ... g(1) is a problem if it's G=0, because we divide by G^2.
|
||||
! So start at gstart.
|
||||
! fact=1.0d0, gamma_only not implemented
|
||||
! G=0 componenent of the long-range part of the local part of the
|
||||
! pseudopotminus the Hartree potential is set to 0.
|
||||
! in other words, sdewald=0.
|
||||
! sdewald is the last term in equation B1 of PRB 32 3792.
|
||||
! See also similar comment for ewaldg in cutoff_ewald routine
|
||||
!
|
||||
sigma11 = 0._DP ; sigma21 = 0._DP ; sigma22 = 0._DP
|
||||
sigma31 = 0._DP ; sigma32 = 0._DP ; sigma33 = 0._DP
|
||||
!
|
||||
sdewald = 0._DP
|
||||
!
|
||||
!$cuf kernel do (1) <<<*,*>>>
|
||||
!$acc parallel loop copyin(g,gg,cutoff_2D,tau,zv,ityp) &
|
||||
!$acc& reduction(+:sigma11,sigma21,sigma22,sigma31,sigma32, &
|
||||
!$acc& sigma33)
|
||||
DO ng = gstart, ngm
|
||||
Gp = SQRT( g_d(1,ng)**2 + g_d(2,ng)**2 )*tpiba
|
||||
Gp = SQRT( g(1,ng)**2 + g(2,ng)**2 )*tpiba
|
||||
IF (Gp < eps8) THEN
|
||||
G2lzo2Gp = 0._DP
|
||||
beta = 0._DP
|
||||
ELSE
|
||||
G2lzo2Gp = gg_d(ng)*tpiba2*lz/2._DP/Gp
|
||||
beta = G2lzo2Gp*(1._DP-cutoff2D_d(ng))/cutoff2D_d(ng)
|
||||
G2lzo2Gp = gg(ng)*tpiba2*lz/2._DP/Gp
|
||||
beta = G2lzo2Gp*(1._DP-cutoff_2D(ng))/cutoff_2D(ng)
|
||||
ENDIF
|
||||
g2 = gg_d(ng) * tpiba2
|
||||
g2 = gg(ng) * tpiba2
|
||||
g2a = g2 / 4._DP / alpha
|
||||
rhostar = (0._DP,0._DP)
|
||||
DO na = 1, nat
|
||||
arg = (g_d(1,ng) * tau_d(1,na) + g_d(2,ng) * tau_d(2,na) + &
|
||||
g_d(3,ng) * tau_d(3,na) ) * tpi
|
||||
rhostar = rhostar + CMPLX(zv_d(ityp_d(na))) * CMPLX(COS(arg),SIN(arg),KIND=DP)
|
||||
arg = (g(1,ng) * tau(1,na) + g(2,ng) * tau(2,na) + &
|
||||
g(3,ng) * tau(3,na) ) * tpi
|
||||
rhostar = rhostar + CMPLX(zv(ityp(na))) * CMPLX(COS(arg),SIN(arg),KIND=DP)
|
||||
ENDDO
|
||||
rhostar = rhostar / CMPLX(omega)
|
||||
sewald = tpi * e2 * EXP(-g2a) / g2* cutoff2D_d(ng) * ABS(rhostar)**2
|
||||
sewald = tpi * e2 * EXP(-g2a) / g2* cutoff_2D(ng) * ABS(rhostar)**2
|
||||
! ... sewald is an other diagonal term that is similar to the diagonal terms
|
||||
! in the other stress contributions. It basically gives a term prop to
|
||||
! the ewald energy
|
||||
! in the other stress contributions. It basically gives a term prop to
|
||||
! the ewald energy
|
||||
!
|
||||
sdewald = sdewald - sewald
|
||||
sigma11 = sigma11 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(1,ng) * g_d(1,ng) / g2 * (1._DP+g2a-beta)
|
||||
g(1,ng) * g(1,ng) / g2 * (1._DP+g2a-beta)
|
||||
sigma21 = sigma21 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(2,ng) * g_d(1,ng) / g2 * (1._DP+g2a-beta)
|
||||
g(2,ng) * g(1,ng) / g2 * (1._DP+g2a-beta)
|
||||
sigma22 = sigma22 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(2,ng) * g_d(2,ng) / g2 * (1._DP+g2a-beta)
|
||||
g(2,ng) * g(2,ng) / g2 * (1._DP+g2a-beta)
|
||||
sigma31 = sigma31 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(3,ng) * g_d(1,ng) / g2 * (g2a+1._DP)
|
||||
g(3,ng) * g(1,ng) / g2 * (g2a+1._DP)
|
||||
sigma32 = sigma32 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(3,ng) * g_d(2,ng) / g2 * (g2a+1._DP)
|
||||
g(3,ng) * g(2,ng) / g2 * (g2a+1._DP)
|
||||
sigma33 = sigma33 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(3,ng) * g_d(3,ng) / g2 * (g2a+1._DP)
|
||||
g(3,ng) * g(3,ng) / g2 * (g2a+1._DP)
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
|
@ -934,11 +844,8 @@ SUBROUTINE cutoff_stres_sigmaewa_gpu( alpha, sdewald, sigmaewa )
|
|||
sigmaewa(3,2) = sigmaewa(3,2) + sigma32
|
||||
sigmaewa(3,3) = sigmaewa(3,3) + sigma33
|
||||
!
|
||||
DEALLOCATE( cutoff2D_d, tau_d, zv_d )
|
||||
DEALLOCATE( ityp_d )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE cutoff_stres_sigmaewa_gpu
|
||||
END SUBROUTINE cutoff_stres_sigmaewa
|
||||
!
|
||||
END MODULE Coul_cut_2D
|
||||
|
|
|
@ -296,7 +296,6 @@ PWLIBS += \
|
|||
stres_mgga_gpu.o \
|
||||
stres_cc_gpu.o \
|
||||
deriv_drhoc_gpu.o \
|
||||
stres_ewa_gpu.o \
|
||||
stres_knl_gpu.o \
|
||||
stres_us_gpu.o \
|
||||
compute_deff_gpu.o \
|
||||
|
|
|
@ -53,7 +53,7 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
REAL(DP), INTENT(IN) :: gcutm
|
||||
!! input: cut-off of g vectors
|
||||
REAL(DP), INTENT(OUT) :: sigmaewa(3,3)
|
||||
! output: the ewald stress
|
||||
!! output: the ewald stress
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
|
@ -85,6 +85,9 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
! diagonal term
|
||||
! nondiagonal term
|
||||
COMPLEX(DP) :: rhostar
|
||||
REAL(DP) :: sigma11, sigma21, sigma22, sigma31, sigma32, sigma33
|
||||
!
|
||||
!$acc data present_or_copyin( g, gg )
|
||||
!
|
||||
tpiba2 = (tpi / alat)**2
|
||||
sigmaewa(:,:) = 0.d0
|
||||
|
@ -94,15 +97,15 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
charge = charge + zv(ityp(na))
|
||||
ENDDO
|
||||
!
|
||||
! choose alpha in order to have convergence in the sum over G
|
||||
! upperbound is a safe upper bound for the error ON THE ENERGY
|
||||
! ... choose alpha in order to have convergence in the sum over G
|
||||
! upperbound is a safe upper bound for the error ON THE ENERGY
|
||||
!
|
||||
alpha = 2.9d0
|
||||
12 alpha = alpha - 0.1d0
|
||||
!
|
||||
IF (alpha==0.0) CALL errore( 'stres_ew', 'optimal alpha not found', 1 )
|
||||
upperbound = e2 * charge**2 * SQRT(2 * alpha / tpi) * &
|
||||
erfc ( SQRT(tpiba2 * gcutm / 4.0d0 / alpha) )
|
||||
ERFC( SQRT(tpiba2 * gcutm / 4.0d0 / alpha) )
|
||||
!
|
||||
IF (upperbound > 1d-7) GOTO 12
|
||||
!
|
||||
|
@ -118,40 +121,70 @@ SUBROUTINE stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
|||
!
|
||||
! sdewald is the diagonal term
|
||||
IF (gamma_only) THEN
|
||||
fact = 2.d0
|
||||
fact = 2.d0
|
||||
ELSE
|
||||
fact = 1.d0
|
||||
ENDIF
|
||||
!
|
||||
IF (do_cutoff_2D) THEN
|
||||
!
|
||||
CALL cutoff_stres_sigmaewa( alpha, sdewald, sigmaewa )
|
||||
!
|
||||
ELSE
|
||||
!$omp parallel do default(none) shared(gstart, ngm, g, gg, tpiba2, alpha, tau, zv, ityp, nat, omega, fact)&
|
||||
!$omp &private(g2, g2a, rhostar, na, arg, l, m, sewald)&
|
||||
!$omp &reduction(+:sigmaewa,sdewald)
|
||||
!
|
||||
sigma11 = 0._DP ; sigma21 = 0._DP ; sigma22 = 0._DP
|
||||
sigma31 = 0._DP ; sigma32 = 0._DP ; sigma33 = 0._DP
|
||||
!
|
||||
#if !defined(_OPENACC)
|
||||
!$omp parallel do default(none) shared(gstart, ngm, g, gg, tpiba2, alpha, tau,&
|
||||
!$omp nat, zv, ityp, omega, fact) private(g2,g2a, rhostar, na, arg,&
|
||||
!$omp sewald) reduction(+:sdewald,sigma11,sigma21,sigma22,sigma31,&
|
||||
!$omp sigma32,sigma33)
|
||||
#else
|
||||
!$acc parallel loop copyin(tau,zv,ityp) reduction(+:sigma11,sigma21,sigma22,&
|
||||
!$acc sigma31,sigma32,sigma33) reduction(-:sdewald)
|
||||
#endif
|
||||
DO ng = gstart, ngm
|
||||
g2 = gg (ng) * tpiba2
|
||||
g2a = g2 / 4.d0 / alpha
|
||||
rhostar = (0.d0, 0.d0)
|
||||
g2 = gg(ng) * tpiba2
|
||||
g2a = g2 / 4._DP / alpha
|
||||
rhostar = (0._DP,0._DP)
|
||||
DO na = 1, nat
|
||||
arg = (g(1,ng) * tau(1,na) + g(2,ng) * tau(2,na) + &
|
||||
g(3,ng) * tau(3,na) ) * tpi
|
||||
rhostar = rhostar + zv(ityp(na)) * CMPLX(COS(arg), SIN(arg), KIND=DP)
|
||||
rhostar = rhostar + CMPLX(zv(ityp(na))) * CMPLX(COS(arg), SIN(arg), KIND=DP)
|
||||
ENDDO
|
||||
rhostar = rhostar / omega
|
||||
rhostar = rhostar / CMPLX(omega)
|
||||
sewald = fact * tpi * e2 * EXP(-g2a) / g2 * ABS(rhostar)**2
|
||||
sdewald = sdewald - sewald
|
||||
DO l = 1, 3
|
||||
DO m = 1, l
|
||||
sigmaewa(l,m) = sigmaewa(l,m) + sewald * tpiba2 * 2.d0 * &
|
||||
g(l,ng) * g(m,ng) / g2 * (g2a + 1)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
sigma11 = sigma11 + sewald * tpiba2 * 2._DP * &
|
||||
g(1,ng) * g(1,ng) / g2 * (g2a + 1)
|
||||
sigma21 = sigma21 + sewald * tpiba2 * 2._DP * &
|
||||
g(2,ng) * g(1,ng) / g2 * (g2a + 1)
|
||||
sigma22 = sigma22 + sewald * tpiba2 * 2._DP * &
|
||||
g(2,ng) * g(2,ng) / g2 * (g2a + 1)
|
||||
sigma31 = sigma31 + sewald * tpiba2 * 2._DP * &
|
||||
g(3,ng) * g(1,ng) / g2 * (g2a + 1)
|
||||
sigma32 = sigma32 + sewald * tpiba2 * 2._DP * &
|
||||
g(3,ng) * g(2,ng) / g2 * (g2a + 1)
|
||||
sigma33 = sigma33 + sewald * tpiba2 * 2._DP * &
|
||||
g(3,ng) * g(3,ng) / g2 * (g2a + 1)
|
||||
ENDDO
|
||||
#if !defined(_OPENACC)
|
||||
!$omp end parallel do
|
||||
#endif
|
||||
!
|
||||
sigmaewa(1,1) = sigmaewa(1,1) + sigma11
|
||||
sigmaewa(2,1) = sigmaewa(2,1) + sigma21
|
||||
sigmaewa(2,2) = sigmaewa(2,2) + sigma22
|
||||
sigmaewa(3,1) = sigmaewa(3,1) + sigma31
|
||||
sigmaewa(3,2) = sigmaewa(3,2) + sigma32
|
||||
sigmaewa(3,3) = sigmaewa(3,3) + sigma33
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
DO l = 1, 3
|
||||
sigmaewa(l,l) = sigmaewa(l,l) + sdewald
|
||||
ENDDO
|
||||
|
|
|
@ -1,246 +0,0 @@
|
|||
!
|
||||
! Copyright (C) 2001-2009 Quantum ESPRESSO group
|
||||
! This file is distributed under the terms of the
|
||||
! GNU General Public License. See the file `License'
|
||||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE stres_ewa_gpu( alat, nat, ntyp, ityp, zv, at, bg, tau, &
|
||||
omega, g_d, gg_d, ngm, gstart, gamma_only, &
|
||||
gcutm, sigmaewa )
|
||||
!---------------------------------------------------------------------
|
||||
!! Ewald contribution. Both real- and reciprocal-space terms are
|
||||
!! present.
|
||||
!
|
||||
USE kinds
|
||||
USE constants, ONLY : tpi, e2, eps6
|
||||
USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp
|
||||
USE mp, ONLY : mp_sum
|
||||
USE Coul_cut_2D, ONLY : do_cutoff_2D, cutoff_stres_sigmaewa_gpu
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER :: nat
|
||||
!! input: number of atoms in the unit cell
|
||||
INTEGER :: ntyp
|
||||
!! input: number of different types of atoms
|
||||
INTEGER :: ityp(nat)
|
||||
!! input: the type of each atom
|
||||
INTEGER :: ngm
|
||||
!! input: number of plane waves for G sum
|
||||
INTEGER :: gstart
|
||||
!! input: first nonzero g vector
|
||||
LOGICAL, INTENT(IN) :: gamma_only
|
||||
!! gamma point only
|
||||
REAL(DP), INTENT(IN) :: tau(3,nat)
|
||||
!! input: the positions of the atoms in the cell
|
||||
REAL(DP), INTENT(IN) :: g_d(3,ngm)
|
||||
!! input: the coordinates of G vectors
|
||||
REAL(DP), INTENT(IN) :: gg_d(ngm)
|
||||
!! input: the square moduli of G vectors
|
||||
REAL(DP), INTENT(IN) :: zv(ntyp)
|
||||
!! input: the charge of each type of atoms
|
||||
REAL(DP), INTENT(IN) :: at(3,3)
|
||||
!! input: the direct lattice vectors
|
||||
REAL(DP), INTENT(IN) :: bg(3,3)
|
||||
!! input: the reciprocal lattice vectors
|
||||
REAL(DP), INTENT(IN) :: omega
|
||||
!! input: the volume of the unit cell
|
||||
REAL(DP), INTENT(IN) :: alat
|
||||
!! input: measure of length
|
||||
REAL(DP), INTENT(IN) :: gcutm
|
||||
!! input: cut-off of g vectors
|
||||
REAL(DP), INTENT(OUT) :: sigmaewa(3,3)
|
||||
! output: the ewald stress
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER, PARAMETER :: mxr = 50
|
||||
! the maximum number of R vectors included in r sum
|
||||
INTEGER :: ng, nr, na, nb, l, m, nrm
|
||||
! counter over reciprocal G vectors
|
||||
! counter over direct vectors
|
||||
! counter on atoms
|
||||
! counter on atoms
|
||||
! counter on atoms
|
||||
! number of R vectors included in r sum
|
||||
INTEGER :: na_s, na_e, mykey
|
||||
!
|
||||
REAL(DP) :: charge, arg, tpiba2, dtau(3), alpha, r(3,mxr), &
|
||||
r2(mxr), rmax, rr, upperbound, fact, fac, g2, g2a, &
|
||||
sdewald, sewald
|
||||
! total ionic charge in the cell
|
||||
! the argument of the phase
|
||||
! length in reciprocal space
|
||||
! the difference tau_s - tau_s'
|
||||
! alpha term in ewald sum
|
||||
! input of the rgen routine ( not used here )
|
||||
! the square modulus of R_j-tau_s-tau_s'
|
||||
! the maximum radius to consider real space sum
|
||||
! buffer variable
|
||||
! used to optimize alpha
|
||||
! auxiliary variables
|
||||
! diagonal term
|
||||
! nondiagonal term
|
||||
!
|
||||
INTEGER :: ierr(2)
|
||||
REAL(DP) :: sigma11, sigma21, sigma22, sigma31, sigma32, sigma33
|
||||
COMPLEX(DP) :: rhostar
|
||||
!
|
||||
INTEGER, ALLOCATABLE :: ityp_d(:)
|
||||
REAL(DP), ALLOCATABLE :: zv_d(:), tau_d(:,:)
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
attributes(DEVICE) :: g_d, gg_d, zv_d, ityp_d, tau_d
|
||||
#endif
|
||||
!
|
||||
tpiba2 = (tpi / alat)**2
|
||||
sigmaewa(:,:) = 0._DP
|
||||
charge = 0._DP
|
||||
!
|
||||
ALLOCATE( zv_d(ntyp), tau_d(3,nat) )
|
||||
zv_d = zv
|
||||
tau_d = tau
|
||||
ALLOCATE( ityp_d(nat) )
|
||||
ityp_d = ityp
|
||||
!
|
||||
DO na = 1, nat
|
||||
charge = charge + zv(ityp(na))
|
||||
ENDDO
|
||||
!
|
||||
! choose alpha in order to have convergence in the sum over G
|
||||
! upperbound is a safe upper bound for the error ON THE ENERGY
|
||||
!
|
||||
alpha = 2.9_DP
|
||||
12 alpha = alpha - 0.1_DP
|
||||
!
|
||||
IF (alpha==0.0) CALL errore( 'stres_ew', 'optimal alpha not found', 1 )
|
||||
upperbound = e2 * charge**2 * SQRT(2 * alpha / tpi) * &
|
||||
erfc ( SQRT(tpiba2 * gcutm / 4._DP / alpha) )
|
||||
!
|
||||
IF (upperbound > 1d-7) GOTO 12
|
||||
!
|
||||
! G-space sum here
|
||||
!
|
||||
! Determine if this processor contains G=0 and set the constant term
|
||||
!
|
||||
IF (gstart == 2) THEN
|
||||
sdewald = tpi * e2 / 4._DP / alpha * (charge / omega)**2
|
||||
ELSE
|
||||
sdewald = 0._DP
|
||||
ENDIF
|
||||
!
|
||||
! sdewald is the diagonal term
|
||||
IF ( gamma_only ) THEN
|
||||
fact = 2._DP
|
||||
ELSE
|
||||
fact = 1._DP
|
||||
ENDIF
|
||||
!
|
||||
IF ( do_cutoff_2D ) THEN
|
||||
!
|
||||
CALL cutoff_stres_sigmaewa_gpu( alpha, sdewald, sigmaewa )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
sigma11 = 0._DP ; sigma21 = 0._DP ; sigma22 = 0._DP
|
||||
sigma31 = 0._DP ; sigma32 = 0._DP ; sigma33 = 0._DP
|
||||
!
|
||||
!$cuf kernel do (1) <<<*,*>>>
|
||||
DO ng = gstart, ngm
|
||||
g2 = gg_d(ng) * tpiba2
|
||||
g2a = g2 / 4._DP / alpha
|
||||
rhostar = (0._DP,0._DP)
|
||||
DO na = 1, nat
|
||||
arg = (g_d(1,ng) * tau_d(1,na) + g_d(2,ng) * tau_d(2,na) + &
|
||||
g_d(3,ng) * tau_d(3,na) ) * tpi
|
||||
rhostar = rhostar + CMPLX(zv_d(ityp_d(na))) * CMPLX(COS(arg), SIN(arg), KIND=DP)
|
||||
ENDDO
|
||||
rhostar = rhostar / CMPLX(omega)
|
||||
sewald = fact * tpi * e2 * EXP(-g2a) / g2 * ABS(rhostar)**2
|
||||
sdewald = sdewald - sewald
|
||||
!
|
||||
sigma11 = sigma11 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(1,ng) * g_d(1,ng) / g2 * (g2a + 1)
|
||||
sigma21 = sigma21 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(2,ng) * g_d(1,ng) / g2 * (g2a + 1)
|
||||
sigma22 = sigma22 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(2,ng) * g_d(2,ng) / g2 * (g2a + 1)
|
||||
sigma31 = sigma31 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(3,ng) * g_d(1,ng) / g2 * (g2a + 1)
|
||||
sigma32 = sigma32 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(3,ng) * g_d(2,ng) / g2 * (g2a + 1)
|
||||
sigma33 = sigma33 + sewald * tpiba2 * 2._DP * &
|
||||
g_d(3,ng) * g_d(3,ng) / g2 * (g2a + 1)
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
sigmaewa(1,1) = sigmaewa(1,1) + sigma11
|
||||
sigmaewa(2,1) = sigmaewa(2,1) + sigma21
|
||||
sigmaewa(2,2) = sigmaewa(2,2) + sigma22
|
||||
sigmaewa(3,1) = sigmaewa(3,1) + sigma31
|
||||
sigmaewa(3,2) = sigmaewa(3,2) + sigma32
|
||||
sigmaewa(3,3) = sigmaewa(3,3) + sigma33
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
DO l = 1, 3
|
||||
sigmaewa(l,l) = sigmaewa(l,l) + sdewald
|
||||
ENDDO
|
||||
!
|
||||
! R-space sum here (see ewald.f90 for details on parallelization)
|
||||
!
|
||||
CALL block_distribute( nat, me_bgrp, nproc_bgrp, na_s, na_e, mykey )
|
||||
!
|
||||
IF ( mykey == 0 ) THEN
|
||||
rmax = 4.0d0 / SQRT(alpha) / alat
|
||||
!
|
||||
! with this choice terms up to ZiZj*erfc(5) are counted (erfc(5)=2x10^-1
|
||||
!
|
||||
DO na = na_s, na_e
|
||||
DO nb = 1, nat
|
||||
dtau(:) = tau(:,na) - tau(:,nb)
|
||||
!
|
||||
! generates nearest-neighbors shells r(i)=R(i)-dtau(i)
|
||||
!
|
||||
CALL rgen( dtau, rmax, mxr, at, bg, r, r2, nrm )
|
||||
!
|
||||
DO nr = 1, nrm
|
||||
rr = SQRT(r2 (nr) ) * alat
|
||||
fac = - e2 / 2.0_DP/ omega * alat**2 * zv(ityp(na)) * &
|
||||
zv(ityp(nb)) / rr**3 * (erfc(SQRT(alpha) * rr) + &
|
||||
rr * SQRT(8.0_DP * alpha / tpi) * EXP( - alpha * rr**2) )
|
||||
DO l = 1, 3
|
||||
DO m = 1, l
|
||||
sigmaewa(l,m) = sigmaewa(l,m) + fac * r(l,nr) * r(m,nr)
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
DO l = 1, 3
|
||||
DO m = 1, l - 1
|
||||
sigmaewa(m,l) = sigmaewa(l,m)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
DO l = 1, 3
|
||||
DO m = 1, 3
|
||||
sigmaewa(l,m) = - sigmaewa(l,m)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
DEALLOCATE( zv_d, tau_d )
|
||||
DEALLOCATE( ityp_d )
|
||||
!
|
||||
CALL mp_sum( sigmaewa, intra_bgrp_comm )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE stres_ewa_gpu
|
||||
|
|
@ -122,13 +122,10 @@ SUBROUTINE stress( sigma )
|
|||
IF ( do_comp_esm .AND. ( esm_bc /= 'pbc' ) ) THEN ! for ESM stress
|
||||
CALL esm_stres_ewa( sigmaewa )
|
||||
ELSE
|
||||
IF (.NOT. use_gpu) CALL stres_ewa( alat, nat, ntyp, ityp, zv, at, &
|
||||
bg, tau, omega, g, gg, ngm, gstart, &
|
||||
gamma_only, gcutm, sigmaewa )
|
||||
IF ( use_gpu) CALL stres_ewa_gpu( alat, nat, ntyp, ityp, zv, at, bg,&
|
||||
tau, omega, g_d,gg_d, ngm, gstart,&
|
||||
gamma_only, gcutm, sigmaewa )
|
||||
END IF
|
||||
CALL stres_ewa( alat, nat, ntyp, ityp, zv, at, bg, &
|
||||
tau, omega, g, gg, ngm, gstart, &
|
||||
gamma_only, gcutm, sigmaewa )
|
||||
ENDIF
|
||||
!
|
||||
! semi-empirical dispersion contribution: Grimme-D2 and D3
|
||||
!
|
||||
|
|
Loading…
Reference in New Issue