mirror of https://gitlab.com/QEF/q-e.git
Vxc_acc - mgga xc on openacc
This commit is contained in:
parent
ff28e52e9f
commit
17d5d34017
|
@ -11,7 +11,7 @@ SUBROUTINE tpssmeta(nnr, nspin,grho,rho,kedtau,etxc)
|
|||
! ===================
|
||||
!--------------------------------------------------------------------
|
||||
use kinds, only: dp
|
||||
use xc_lib, only: xclib_set_threshold
|
||||
use xc_lib, only: xclib_set_threshold, xc_metagcx
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
|
|
@ -12,7 +12,7 @@ SUBROUTINE stres_gradcorr( rho, rhog, rho_core, rhog_core, kedtau, nspin, &
|
|||
!----------------------------------------------------------------------------
|
||||
!
|
||||
USE kinds, ONLY: DP
|
||||
USE xc_lib, ONLY: xclib_dft_is, xclib_get_id, xc_gcx
|
||||
USE xc_lib, ONLY: xclib_dft_is, xclib_get_id, xc_gcx, xc_metagcx
|
||||
USE noncollin_module, ONLY: domag
|
||||
USE mp_bands, ONLY: intra_bgrp_comm
|
||||
USE mp, ONLY: mp_sum
|
||||
|
|
|
@ -159,6 +159,7 @@ SUBROUTINE v_xc_meta( rho, rho_core, rhog_core, etxc, vtxc, v, kedtaur )
|
|||
USE cell_base, ONLY : omega
|
||||
USE funct, ONLY : dft_is_nonlocc, nlc
|
||||
USE scf, ONLY : scf_type, rhoz_or_updw
|
||||
USE xc_lib, ONLY : xc_metagcx
|
||||
USE mp, ONLY : mp_sum
|
||||
USE mp_bands, ONLY : intra_bgrp_comm
|
||||
!
|
||||
|
|
|
@ -94,7 +94,6 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
|
|||
!$omp v1c_out, v2c_out, sx_out, sc_out )
|
||||
!$omp do
|
||||
#endif
|
||||
|
||||
DO ir = 1, length
|
||||
!
|
||||
grho = grho_in(ir)
|
||||
|
|
|
@ -17,7 +17,6 @@ MODULE qe_drivers_mgga
|
|||
USE kind_l, ONLY: DP
|
||||
USE dft_setting_params, ONLY: imeta, imetac, rho_threshold_mgga, &
|
||||
grho2_threshold_mgga, tau_threshold_mgga
|
||||
USE metagga
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -37,31 +36,33 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c
|
|||
!! Available cases: M06L and TPSS. Other mGGA functionals can be used
|
||||
!! through Libxc.
|
||||
!
|
||||
USE metagga
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
!! Number of k-points
|
||||
REAL(DP), DIMENSION(length) :: rho
|
||||
REAL(DP), INTENT(IN) :: rho(length)
|
||||
!! Charge density
|
||||
REAL(DP), DIMENSION(length) :: grho2
|
||||
REAL(DP), INTENT(IN) :: grho2(length)
|
||||
!! Square modulus of the density gradient
|
||||
REAL(DP), DIMENSION(length) :: tau
|
||||
REAL(DP), INTENT(IN) :: tau(length)
|
||||
!! Laplacian of the density
|
||||
REAL(DP), DIMENSION(length) :: ex
|
||||
REAL(DP), INTENT(OUT) :: ex(length)
|
||||
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
|
||||
REAL(DP), DIMENSION(length) :: ec
|
||||
REAL(DP), INTENT(OUT) :: ec(length)
|
||||
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
|
||||
REAL(DP), DIMENSION(length) :: v1x
|
||||
REAL(DP), INTENT(OUT) :: v1x(length)
|
||||
!! \( D\ E_x\ /\ D\ \text{rho} \)
|
||||
REAL(DP), DIMENSION(length) :: v2x
|
||||
REAL(DP), INTENT(OUT) :: v2x(length)
|
||||
!! \( D\ E_x\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
|
||||
REAL(DP), DIMENSION(length) :: v3x
|
||||
REAL(DP), INTENT(OUT) :: v3x(length)
|
||||
!! \( D\ E_x\ /\ D\ \text{tau} \)
|
||||
REAL(DP), DIMENSION(length) :: v1c
|
||||
REAL(DP), INTENT(OUT) :: v1c(length)
|
||||
!! \( D\ E_c\ /\ D\ \text{rho} \)
|
||||
REAL(DP), DIMENSION(length) :: v2c
|
||||
REAL(DP), INTENT(OUT) :: v2c(1,length,1)
|
||||
!! \( D\ E_c\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
|
||||
REAL(DP), DIMENSION(length) :: v3c
|
||||
REAL(DP), INTENT(OUT) :: v3c(length)
|
||||
!! \( D\ E_c\ /\ D\ \text{tau} \)
|
||||
!
|
||||
! ... local variables
|
||||
|
@ -69,15 +70,23 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c
|
|||
INTEGER :: k
|
||||
REAL(DP) :: arho
|
||||
!
|
||||
v1x=0.d0 ; v2x=0.d0 ; v3x=0.d0 ; ex=0.d0
|
||||
v1c=0.d0 ; v2c=0.d0 ; v3c=0.d0 ; ec=0.d0
|
||||
#if defined(_OPENACC)
|
||||
!$acc data deviceptr( rho(length), grho2(length), tau(length), ex(length), ec(length), &
|
||||
!$acc& v1x(length), v2x(length), v3x(length), v1c(length), v2c(1,length,1), &
|
||||
!$acc& v3c(length) )
|
||||
!
|
||||
!$acc parallel loop
|
||||
#endif
|
||||
DO k = 1, length
|
||||
!
|
||||
arho = ABS(rho(k))
|
||||
!
|
||||
IF ( (arho<=rho_threshold_mgga).OR.(grho2(k)<=grho2_threshold_mgga).OR. &
|
||||
(ABS(tau(k))<=rho_threshold_mgga) ) CYCLE
|
||||
(ABS(tau(k))<=rho_threshold_mgga) ) THEN
|
||||
v1x(k)=0.d0 ; v2x(k)=0.d0 ; v3x(k)=0.d0 ; ex(k)=0.d0
|
||||
v1c(k)=0.d0 ; v2c(1,k,1)=0.d0 ; v3c(k)=0.d0 ; ec(k)=0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
!
|
||||
! ...libxc-like threshold management
|
||||
!grho2(k) = MIN( grho2(k), (8.d0*rho(k)*tau(k))**2 )
|
||||
|
@ -86,21 +95,25 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c
|
|||
CASE( 1 )
|
||||
!
|
||||
CALL tpsscxc( arho, grho2(k), tau(k), ex(k), ec(k), v1x(k), v2x(k), &
|
||||
v3x(k), v1c(k), v2c(k), v3c(k) )
|
||||
v3x(k), v1c(k), v2c(1,k,1), v3c(k) )
|
||||
!
|
||||
CASE( 2 )
|
||||
!
|
||||
CALL m06lxc( arho, grho2(k), tau(k), ex(k), ec(k), v1x(k), v2x(k), &
|
||||
v3x(k), v1c(k), v2c(k), v3c(k) )
|
||||
v3x(k), v1c(k), v2c(1,k,1), v3c(k) )
|
||||
!
|
||||
CASE DEFAULT
|
||||
!
|
||||
CALL xclib_error( 'tau_xc', 'This case is not implemented', imeta )
|
||||
!CALL xclib_error( 'tau_xc', 'This case is not implemented', imeta )
|
||||
!
|
||||
END SELECT
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc end data
|
||||
#endif
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE tau_xc
|
||||
|
@ -114,65 +127,74 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c,
|
|||
!! Available cases: M06L and TPSS. Other mGGA functionals can be used
|
||||
!! through Libxc.
|
||||
!
|
||||
USE metagga
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
!! Number of k-points
|
||||
REAL(DP), INTENT(IN), DIMENSION(length,2) :: rho
|
||||
REAL(DP), INTENT(IN) :: rho(length,2)
|
||||
!! Charge density
|
||||
REAL(DP), INTENT(IN), DIMENSION(3,length,2) :: grho
|
||||
REAL(DP), INTENT(IN) :: grho(3,length,2)
|
||||
!! The density gradient
|
||||
REAL(DP), INTENT(IN), DIMENSION(length,2) :: tau
|
||||
REAL(DP), INTENT(IN) :: tau(length,2)
|
||||
!! Laplacian of the density
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: ex
|
||||
REAL(DP), INTENT(OUT) :: ex(length)
|
||||
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: ec
|
||||
REAL(DP), INTENT(OUT) :: ec(length)
|
||||
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1x
|
||||
REAL(DP), INTENT(OUT) :: v1x(length,2)
|
||||
!! \( D\ E_x\ /\ D\ \text{rho} \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v2x
|
||||
REAL(DP), INTENT(OUT) :: v2x(length,2)
|
||||
!! \( D\ E_x\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v3x
|
||||
REAL(DP), INTENT(OUT) :: v3x(length,2)
|
||||
!! \( D\ E_x\ /\ D\ \text{tau} \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1c
|
||||
REAL(DP), INTENT(OUT) :: v1c(length,2)
|
||||
!! \( D\ E_c\ /\ D\ \text{rho} \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(3,length,2) :: v2c
|
||||
REAL(DP), INTENT(OUT) :: v2c(3,length,2)
|
||||
!! \( D\ E_c\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v3c
|
||||
REAL(DP), INTENT(OUT) :: v3c(length,2)
|
||||
!! \( D\ E_c\ /\ D\ \text{tau} \)
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: k
|
||||
REAL(DP) :: rh, zeta, atau, grho2(2), ggrho2
|
||||
REAL(DP) :: rh, zeta, atau, grho2up, grho2dw, ggrho2
|
||||
REAL(DP) :: v2cup, v2cdw
|
||||
!
|
||||
ex=0.0_DP ; v1x=0.0_DP ; v2x=0.0_DP ; v3x=0.0_DP
|
||||
ec=0.0_DP ; v1c=0.0_DP ; v2c=0.0_DP ; v3c=0.0_DP
|
||||
#if defined(_OPENACC)
|
||||
!$acc data deviceptr( rho(length,2), grho(3,length,2), tau(length,2), ex(length), &
|
||||
!$acc& ec(length), v1x(length,2), v2x(length,2), v3x(length,2), &
|
||||
!$acc& v1c(length,2), v2c(3,length,2), v3c(length,2) )
|
||||
!
|
||||
!$acc parallel loop
|
||||
#endif
|
||||
DO k = 1, length
|
||||
!
|
||||
rh = rho(k,1) + rho(k,2)
|
||||
atau = tau(k,1) + tau(k,2) ! KE-density in Hartree
|
||||
! ...libxc-like threshold management
|
||||
!grho2(1) = MIN( SUM(grho(:,k,1)**2), (8.d0*rho(k,1)*tau(k,1))**2 )
|
||||
!grho2(2) = MIN( SUM(grho(:,k,2)**2), (8.d0*rho(k,2)*tau(k,2))**2 )
|
||||
grho2(1) = SUM(grho(:,k,1)**2)
|
||||
grho2(2) = SUM(grho(:,k,2)**2)
|
||||
ggrho2 = ( grho2(1) + grho2(2) ) * 4.0_DP
|
||||
!grho2up = MIN( SUM(grho(:,k,1)**2), (8.d0*rho(k,1)*tau(k,1))**2 )
|
||||
!grho2dw = MIN( SUM(grho(:,k,2)**2), (8.d0*rho(k,2)*tau(k,2))**2 )
|
||||
grho2up = SUM(grho(:,k,1)**2)
|
||||
grho2dw = SUM(grho(:,k,2)**2)
|
||||
ggrho2 = ( grho2up + grho2dw ) * 4.0_DP
|
||||
!
|
||||
IF ( (rh <= rho_threshold_mgga).OR.(ggrho2 <= grho2_threshold_mgga).OR.&
|
||||
(ABS(atau) <= tau_threshold_mgga) ) CYCLE
|
||||
(ABS(atau) <= tau_threshold_mgga) ) THEN
|
||||
v1x(k,:)=0.d0 ; v2x(k,:)=0.d0 ; v3x(k,:)=0.d0 ; ex(k)=0.d0
|
||||
v1c(k,:)=0.d0 ; v2c(:,k,:)=0.d0 ; v3c(k,:)=0.d0 ; ec(k)=0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
!
|
||||
SELECT CASE( imeta )
|
||||
CASE( 1 )
|
||||
!
|
||||
CALL tpsscx_spin( rho(k,1), rho(k,2), grho2(1), grho2(2), tau(k,1), &
|
||||
CALL tpsscx_spin( rho(k,1), rho(k,2), grho2up, grho2dw, tau(k,1), &
|
||||
tau(k,2), ex(k), v1x(k,1), v1x(k,2), v2x(k,1), &
|
||||
v2x(k,2), v3x(k,1), v3x(k,2) )
|
||||
!
|
||||
zeta = (rho(k,1) - rho(k,2)) / rh
|
||||
zeta = MAX( MIN( 0.99999999_DP, zeta ), -0.99999999_DP )
|
||||
zeta = MAX( MIN( 0.99999999_DP, (rho(k,1)-rho(k,2))/rh ), -0.99999999_DP )
|
||||
!
|
||||
CALL tpsscc_spin( rh, zeta, grho(:,k,1), grho(:,k,2), atau, ec(k), &
|
||||
v1c(k,1), v1c(k,2), v2c(:,k,1), v2c(:,k,2), &
|
||||
|
@ -180,7 +202,7 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c,
|
|||
!
|
||||
CASE( 2 )
|
||||
!
|
||||
CALL m06lxc_spin( rho(k,1), rho(k,2), grho2(1), grho2(2), tau(k,1), &
|
||||
CALL m06lxc_spin( rho(k,1), rho(k,2), grho2up, grho2dw, tau(k,1), &
|
||||
tau(k,2), ex(k), ec(k), v1x(k,1), v1x(k,2), &
|
||||
v2x(k,1), v2x(k,2), v3x(k,1), v3x(k,2), v1c(k,1), &
|
||||
v1c(k,2), v2cup, v2cdw, v3c(k,1), v3c(k,2) )
|
||||
|
@ -190,12 +212,16 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c,
|
|||
!
|
||||
CASE DEFAULT
|
||||
!
|
||||
CALL xclib_error( 'tau_xc_spin', 'This case not implemented', imeta )
|
||||
!CALL xclib_error( 'tau_xc_spin', 'This case not implemented', imeta )
|
||||
!
|
||||
END SELECT
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc end data
|
||||
#endif
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE tau_xc_spin
|
||||
|
|
|
@ -6,22 +6,23 @@
|
|||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
!------------------------------------------------------------------------
|
||||
MODULE metagga !<GPU:metagga=>metagga_gpu>
|
||||
MODULE metagga
|
||||
!-------------------------------------------------------------------------
|
||||
!! MetaGGA functionals. Available functionals:
|
||||
!
|
||||
!! * TPSS (Tao, Perdew, Staroverov & Scuseria)
|
||||
!! * M06L
|
||||
!
|
||||
USE exch_lda, ONLY: slater !<GPU:slater=>slater_d,exch_lda=>exch_lda_gpu>
|
||||
USE corr_lda, ONLY: pw, pw_spin !<GPU:pw=>pw_d,pw_spin=>pw_spin_d,corr_lda=>corr_lda_gpu>
|
||||
USE corr_gga, ONLY: pbec, pbec_spin !<GPU:pbec=>pbec_d,pbec_spin=>pbec_spin_d,corr_gga=>corr_gga_gpu>
|
||||
USE exch_lda, ONLY: slater
|
||||
USE corr_lda, ONLY: pw, pw_spin
|
||||
USE corr_gga, ONLY: pbec, pbec_spin
|
||||
!
|
||||
CONTAINS
|
||||
! TPSS
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c ) !<GPU:DEVICE>
|
||||
SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc routine seq
|
||||
!-----------------------------------------------------------------------
|
||||
!! TPSS metaGGA corrections for exchange and correlation - Hartree a.u.
|
||||
!! Definition: \(E_x = \int E_x(\text{rho},\text{grho}) dr\)
|
||||
|
@ -70,9 +71,9 @@ SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c )
|
|||
ENDIF
|
||||
!
|
||||
! exchange
|
||||
CALL metax( rho, grho, tau, sx, v1x, v2x, v3x ) !<GPU:metax=>metax_d>
|
||||
CALL metax( rho, grho, tau, sx, v1x, v2x, v3x )
|
||||
! correlation
|
||||
CALL metac( rho, grho, tau, sc, v1c, v2c, v3c ) !<GPU:metac=>metac_d>
|
||||
CALL metac( rho, grho, tau, sc, v1c, v2c, v3c )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
@ -80,7 +81,8 @@ END SUBROUTINE tpsscxc
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
|
||||
SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x )
|
||||
!$acc routine seq
|
||||
!--------------------------------------------------------------------
|
||||
!! TPSS meta-GGA exchange potential and energy.
|
||||
!
|
||||
|
@ -133,8 +135,8 @@ SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:
|
|||
ENDIF
|
||||
!
|
||||
rs = pi34/rho**third
|
||||
CALL slater( rs, ex_unif, vx_unif ) !<GPU:slater=>slater_d>
|
||||
CALL metaFX( rho, grho2, tau, fx, f1x, f2x, f3x ) !<GPU:metaFX=>metaFX_d>
|
||||
CALL slater( rs, ex_unif, vx_unif )
|
||||
CALL metaFX( rho, grho2, tau, fx, f1x, f2x, f3x )
|
||||
!
|
||||
ex = rho*ex_unif
|
||||
v1x = vx_unif*fx + ex*f1x
|
||||
|
@ -148,7 +150,8 @@ END SUBROUTINE metax
|
|||
!
|
||||
!
|
||||
!------------------------------------------------------------------
|
||||
SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) !<GPU:DEVICE>
|
||||
SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c )
|
||||
!$acc routine seq
|
||||
!--------------------------------------------------------------
|
||||
!! TPSS meta-GGA correlation energy and potentials.
|
||||
!
|
||||
|
@ -208,11 +211,11 @@ SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) !<GPU:
|
|||
IF (rhoup > small) THEN
|
||||
!
|
||||
rs = (pi34/rhoup)**third
|
||||
CALL pw_spin( rs, 1.0_DP, ec_unif, vc_unif_s(1), vc_unif_s(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, 1.0_DP, ec_unif, vc_unif_s(1), vc_unif_s(2) )
|
||||
!
|
||||
IF (ABS(grhoup) > small) THEN
|
||||
! zeta=1.0_DP-small to avoid pow_e of 0 in pbec_spin
|
||||
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup**2, 1, & !<GPU:pbec_spin=>pbec_spin_d>
|
||||
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup**2, 1, &
|
||||
ec_sum, v1c_sum(1), v1c_sum(2), v2c_sum )
|
||||
ELSE
|
||||
ec_sum = 0.0_DP
|
||||
|
@ -229,13 +232,13 @@ SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) !<GPU:
|
|||
ENDIF
|
||||
!
|
||||
rs = (pi34/rho)**third
|
||||
CALL pw( rs, 1, ec_unif, vc_unif ) !<GPU:pw=>pw_d>
|
||||
CALL pw( rs, 1, ec_unif, vc_unif )
|
||||
!
|
||||
! ... PBE correlation energy and potential:
|
||||
! ec_pbe=rho*H, not rho*(epsion_c_uinf + H)
|
||||
! v1c_pbe=D (rho*H) /D rho
|
||||
! v2c_pbe= for rho, 2 for
|
||||
CALL pbec( rho, grho2, 1, ec_pbe, v1c_pbe, v2c_pbe ) !<GPU:pbec=>pbec_d>
|
||||
CALL pbec( rho, grho2, 1, ec_pbe, v1c_pbe, v2c_pbe )
|
||||
!
|
||||
ec_pbe = ec_pbe/rho+ec_unif
|
||||
v1c_pbe = (v1c_pbe+vc_unif-ec_pbe)/rho
|
||||
|
@ -275,7 +278,8 @@ END SUBROUTINE metac
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE metaFX( rho, grho2, tau, fx, f1x, f2x, f3x ) !<GPU:DEVICE>
|
||||
SUBROUTINE metaFX( rho, grho2, tau, fx, f1x, f2x, f3x )
|
||||
!$acc routine seq
|
||||
!-------------------------------------------------------------------------
|
||||
!
|
||||
USE kind_l, ONLY : DP
|
||||
|
@ -397,8 +401,9 @@ END SUBROUTINE metaFX
|
|||
!
|
||||
!
|
||||
!---------------------------------------------------------------------------
|
||||
SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, & !<GPU:DEVICE>
|
||||
SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, &
|
||||
v1xup, v1xdw, v2xup, v2xdw, v3xup, v3xdw )
|
||||
!$acc routine seq
|
||||
!-----------------------------------------------------------------------
|
||||
!! TPSS metaGGA for exchange - Hartree a.u.
|
||||
!
|
||||
|
@ -442,7 +447,7 @@ SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, &
|
|||
rho = rhoup + rhodw
|
||||
IF (rhoup>small .AND. SQRT(ABS(grhoup2))>small &
|
||||
.AND. ABS(tauup) > small) THEN
|
||||
CALL metax( 2.0_DP*rhoup, 4.0_DP*grhoup2, & !<GPU:metax=>metax_d>
|
||||
CALL metax( 2.0_DP*rhoup, 4.0_DP*grhoup2, &
|
||||
2.0_DP*tauup, sxup, v1xup, v2xup, v3xup )
|
||||
ELSE
|
||||
sxup = 0.0_DP
|
||||
|
@ -453,7 +458,7 @@ SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, &
|
|||
!
|
||||
IF (rhodw > small .AND. SQRT(ABS(grhodw2)) > small &
|
||||
.AND. ABS(taudw) > small) THEN
|
||||
CALL metax( 2.0_DP*rhodw, 4.0_DP*grhodw2, & !<GPU:metax=>metax_d>
|
||||
CALL metax( 2.0_DP*rhodw, 4.0_DP*grhodw2, &
|
||||
2.0_DP*taudw, sxdw, v1xdw, v2xdw, v3xdw )
|
||||
ELSE
|
||||
sxdw = 0.0_DP
|
||||
|
@ -472,8 +477,9 @@ END SUBROUTINE tpsscx_spin
|
|||
!
|
||||
!
|
||||
!---------------------------------------------------------------------------
|
||||
SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
||||
SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, &
|
||||
atau, sc, v1cup, v1cdw, v2cup, v2cdw, v3cup, v3cdw )
|
||||
!$acc routine seq
|
||||
!--------------------------------------------------------------------------
|
||||
!! TPSS metaGGA for correlations - Hartree a.u.
|
||||
!
|
||||
|
@ -536,7 +542,7 @@ SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
|||
!
|
||||
v3c = 0.0_DP
|
||||
ELSE
|
||||
CALL metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:metac_spin=>metac_spin_d>
|
||||
CALL metac_spin( rho, zeta, grhoup, grhodw, &
|
||||
atau, sc, v1cup, v1cdw, v2cup, v2cdw, v3c )
|
||||
ENDIF
|
||||
!
|
||||
|
@ -549,8 +555,9 @@ END SUBROUTINE tpsscc_spin
|
|||
!
|
||||
!
|
||||
!---------------------------------------------------------------
|
||||
SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
||||
SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, &
|
||||
tau, sc, v1up, v1dw, v2up, v2dw, v3 )
|
||||
!$acc routine seq
|
||||
!---------------------------------------------------------------
|
||||
USE kind_l, ONLY : DP
|
||||
!
|
||||
|
@ -634,10 +641,10 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
|||
!
|
||||
v2_tmp = 0.0_DP
|
||||
rs = (pi34/rho)**third
|
||||
CALL pw_spin( rs, zeta, ec_u, vc_u(1), vc_u(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, zeta, ec_u, vc_u(1), vc_u(2) )
|
||||
!
|
||||
IF ((ABS(grho) > small) .AND. (zeta <= 1.0_DP)) THEN
|
||||
CALL pbec_spin( rho, zeta, grho2, 1, ec_pbe, v1_pbe(1), v1_pbe(2), v2_tmp ) !<GPU:pbec_spin=>pbec_spin_d>
|
||||
CALL pbec_spin( rho, zeta, grho2, 1, ec_pbe, v1_pbe(1), v1_pbe(2), v2_tmp )
|
||||
v1up_pbe=v1_pbe(1)
|
||||
v1dw_pbe=v1_pbe(2)
|
||||
ELSE
|
||||
|
@ -668,10 +675,10 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
|||
v2_tmp = 0.0_DP
|
||||
!
|
||||
rs = (pi34/rhoup)**third
|
||||
CALL pw_spin( rs, 1.0_DP, ec_u, vc_u(1), vc_u(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, 1.0_DP, ec_u, vc_u(1), vc_u(2) )
|
||||
!
|
||||
IF (SQRT(grhoup2) > small) THEN
|
||||
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup2, 1, & !<GPU:pbec_spin=>pbec_spin_d>
|
||||
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup2, 1, &
|
||||
ecup_0, v1_0v(1), v1_0v(2), v2_tmp )
|
||||
v1up_0 = v1_0v(1)
|
||||
v1dw_0 = v1_0v(2)
|
||||
|
@ -709,11 +716,11 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
|||
v2_tmp = 0.0_DP
|
||||
!
|
||||
rs = (pi34/rhodw)**third
|
||||
CALL pw_spin( rs, -1.0_DP, ec_u, vc_u(1), vc_u(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, -1.0_DP, ec_u, vc_u(1), vc_u(2) )
|
||||
!
|
||||
IF (SQRT(grhodw2) > small) THEN
|
||||
!
|
||||
CALL pbec_spin( rhodw, -1.0_DP+small, grhodw2, 1, & !<GPU:pbec_spin=>pbec_spin_d>
|
||||
CALL pbec_spin( rhodw, -1.0_DP+small, grhodw2, 1, &
|
||||
ecdw_0, v1_0v(1), v1_0v(2), v2_tmp )
|
||||
!
|
||||
v1up_0 = v1_0v(1)
|
||||
|
@ -877,7 +884,8 @@ END SUBROUTINE metac_spin
|
|||
! ec, v1c, v2c, v3c as above for correlation
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) !<GPU:DEVICE>
|
||||
SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc routine seq
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
USE kind_l, ONLY : DP
|
||||
|
@ -923,14 +931,14 @@ SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
|||
taub = taua ! Tau is defined as summ_i( |nabla phi_i|**2 )
|
||||
! in the following M06L routines
|
||||
!
|
||||
CALL m06lx( rhoa, grho2a, taua, ex, v1x, v2x, v3x ) !<GPU:m06lx=>m06lx_d>
|
||||
CALL m06lx( rhoa, grho2a, taua, ex, v1x, v2x, v3x )
|
||||
!
|
||||
ex = two * ex ! Add the two components up + dw
|
||||
!
|
||||
v2x = 0.5_dp * v2x
|
||||
!v3x = 2.0_dp * v3x !**mismatch with Libxc by a factor of 2
|
||||
!
|
||||
CALL m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c, v2c, v3c, & !<GPU:m06lc=>m06lc_d>
|
||||
CALL m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c, v2c, v3c, &
|
||||
v1cb, v2cb, v3cb )
|
||||
!
|
||||
v2c = 0.5_dp * v2c
|
||||
|
@ -940,9 +948,10 @@ END SUBROUTINE m06lxc
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, & !<GPU:DEVICE>
|
||||
SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, &
|
||||
ex, ec, v1xup, v1xdw, v2xup, v2xdw, v3xup, v3xdw, &
|
||||
v1cup, v1cdw, v2cup, v2cdw, v3cup, v3cdw )
|
||||
!$acc routine seq
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
USE kind_l, ONLY : DP
|
||||
|
@ -959,14 +968,14 @@ SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, &
|
|||
taua = tauup * two ! Tau is defined as summ_i( |nabla phi_i|**2 )
|
||||
taub = taudw * two ! in the rest of the routine
|
||||
!
|
||||
CALL m06lx( rhoup, grhoup2, taua, exup, v1xup, v2xup, v3xup ) !<GPU:m06lx=>m06lx_d>
|
||||
CALL m06lx( rhodw, grhodw2, taub, exdw, v1xdw, v2xdw, v3xdw ) !<GPU:m06lx=>m06lx_d>
|
||||
CALL m06lx( rhoup, grhoup2, taua, exup, v1xup, v2xup, v3xup )
|
||||
CALL m06lx( rhodw, grhodw2, taub, exdw, v1xdw, v2xdw, v3xdw )
|
||||
!
|
||||
ex = exup + exdw
|
||||
!v3xup = 2.0_dp * v3xup !**mismatch with Libxc by a factor of 2
|
||||
!v3xdw = 2.0_dp * v3xdw !**
|
||||
!
|
||||
CALL m06lc( rhoup, rhodw, grhoup2, grhodw2, taua, taub, & !<GPU:m06lc=>m06lc_d>
|
||||
CALL m06lc( rhoup, rhodw, grhoup2, grhodw2, taua, taub, &
|
||||
ec, v1cup, v2cup, v3cup, v1cdw, v2cdw, v3cdw )
|
||||
!v3cup = 2.0_dp * v3cup !**
|
||||
!v3cdw = 2.0_dp * v3cdw !**
|
||||
|
@ -977,7 +986,8 @@ END SUBROUTINE m06lxc_spin
|
|||
!=============================== M06L exchange ==========================
|
||||
!
|
||||
!-------------------------------------------------------------------------------
|
||||
SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
|
||||
SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x )
|
||||
!$acc routine seq
|
||||
!---------------------------------------------------------------------------
|
||||
!! M06L exchange.
|
||||
!
|
||||
|
@ -1082,7 +1092,7 @@ SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
|
|||
gh = one + alpha * (xs2 + zs)
|
||||
!
|
||||
IF (gh >= small) THEN
|
||||
CALL gvt4( xs2, zs, d0, d1, d2, d3, d4, d5, alpha, hg, dhg_dxs2, dhg_dzs ) !<GPU:gvt4=>gvt4_d>
|
||||
CALL gvt4( xs2, zs, d0, d1, d2, d3, d4, d5, alpha, hg, dhg_dxs2, dhg_dzs )
|
||||
ELSE
|
||||
hg = zero
|
||||
dhg_dxs2 = zero
|
||||
|
@ -1121,7 +1131,7 @@ SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
|
|||
dfws_drho = dfws*dws_dts*dts_drho
|
||||
dfws_dtau = dfws*dws_dts*dts_dtau
|
||||
!
|
||||
CALL pbex_m06l( two*rho, four*grho2, sx_pbe, v1x_pbe, v2x_pbe ) !<GPU:pbex_m06l=>pbex_m06l_d>
|
||||
CALL pbex_m06l( two*rho, four*grho2, sx_pbe, v1x_pbe, v2x_pbe )
|
||||
!
|
||||
v1x_unif = f43 * CX * rho13
|
||||
!
|
||||
|
@ -1143,7 +1153,8 @@ END SUBROUTINE m06lx
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------
|
||||
SUBROUTINE pbex_m06l( rho, grho2, sx, v1x, v2x ) !<GPU:DEVICE>
|
||||
SUBROUTINE pbex_m06l( rho, grho2, sx, v1x, v2x )
|
||||
!$acc routine seq
|
||||
!---------------------------------------------------------------
|
||||
!! PBE exchange (without Slater exchange):
|
||||
!! J.P.Perdew, K.Burke, M.Ernzerhof, PRL 77, 3865 (1996).
|
||||
|
@ -1211,8 +1222,9 @@ END SUBROUTINE pbex_m06l
|
|||
!=============================== M06L correlation ==========================
|
||||
!
|
||||
!---------------------------------------------------------------------------------
|
||||
SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, & !<GPU:DEVICE>
|
||||
SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
|
||||
v3c_up, v1c_dw, v2c_dw, v3c_dw )
|
||||
!$acc routine seq
|
||||
!-------------------------------------------------------------------------------
|
||||
!! M06L correlation.
|
||||
!
|
||||
|
@ -1366,12 +1378,12 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
|
|||
!
|
||||
rs = rsa
|
||||
zeta = one
|
||||
CALL pw_spin( rs, zeta, ec_pw_a, vc_v(1), vc_v(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, zeta, ec_pw_a, vc_v(1), vc_v(2) )
|
||||
vc_pw_a = vc_v(1)
|
||||
vv = vc_v(2)
|
||||
!
|
||||
CALL gvt4( xs2a, zsa, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hga, dhga_dxs2a, dhga_dzsa ) !<GPU:gvt4=>gvt4_d>
|
||||
CALL gfunc( cs, gama_s, xs2a, gsa, dgsa_dxs2a ) !<GPU:gfunc=>gfunc_d>
|
||||
CALL gvt4( xs2a, zsa, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hga, dhga_dxs2a, dhga_dzsa )
|
||||
CALL gfunc( cs, gama_s, xs2a, gsa, dgsa_dxs2a )
|
||||
!
|
||||
Ec_UEG_aa = rhoa*ec_pw_a
|
||||
num = (dgsa_dxs2a + dhga_dxs2a)*Dsa + (gsa + hga)*dDsa_dxs2a
|
||||
|
@ -1426,12 +1438,12 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
|
|||
!
|
||||
zeta = one
|
||||
rs = rsb
|
||||
CALL pw_spin( rs, zeta, ec_pw_b, vc_v(1), vc_v(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, zeta, ec_pw_b, vc_v(1), vc_v(2) )
|
||||
vc_pw_b = vc_v(1)
|
||||
vv = vc_v(2)
|
||||
!
|
||||
CALL gvt4( xs2b, zsb, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hgb, dhgb_dxs2b, dhgb_dzsb ) !<GPU:gvt4=>gvt4_d>
|
||||
CALL gfunc( cs, gama_s, xs2b, gsb, dgsb_dxs2b ) !<GPU:gfunc=>gfunc_d>
|
||||
CALL gvt4( xs2b, zsb, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hgb, dhgb_dxs2b, dhgb_dzsb )
|
||||
CALL gfunc( cs, gama_s, xs2b, gsb, dgsb_dxs2b )
|
||||
!
|
||||
Ec_UEG_bb = rhob*ec_pw_b
|
||||
num = (dgsb_dxs2b + dhgb_dxs2b)*Dsb + (gsb + hgb)*dDsb_dxs2b
|
||||
|
@ -1468,13 +1480,13 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
|
|||
zeta = (rhoa - rhob)/rho
|
||||
rs = (pi34/rho)**f13
|
||||
!
|
||||
CALL gvt4( xs2ab, zsab, dab0, dab1, dab2, dab3, dab4, dab5, alpha_ab, hgab, & !<GPU:gvt4=>gvt4_d>
|
||||
CALL gvt4( xs2ab, zsab, dab0, dab1, dab2, dab3, dab4, dab5, alpha_ab, hgab, &
|
||||
dhgab_dxs2ab, dhgab_dzsab )
|
||||
!
|
||||
CALL pw_spin( rs, zeta, ec_pw_ab, vc_v(1), vc_v(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, zeta, ec_pw_ab, vc_v(1), vc_v(2) )
|
||||
vc_pw_up=vc_v(1) ; vc_pw_dw=vc_v(2)
|
||||
!
|
||||
CALL gfunc( cab, gama_ab, xs2ab, gsab, dgsab_dxs2ab ) !<GPU:gfunc=>gfunc_d>
|
||||
CALL gfunc( cab, gama_ab, xs2ab, gsab, dgsab_dxs2ab )
|
||||
!
|
||||
decab_drhoa = vc_pw_up - vc_pw_a
|
||||
decab_drhob = vc_pw_dw - vc_pw_b
|
||||
|
@ -1515,7 +1527,8 @@ END SUBROUTINE m06lc
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------
|
||||
SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx ) !<GPU:DEVICE>
|
||||
SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx )
|
||||
!$acc routine seq
|
||||
!-----------------------------------------------------------------
|
||||
!
|
||||
USE kind_l, ONLY : DP
|
||||
|
@ -1545,7 +1558,8 @@ SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx ) !<GPU:DEVICE>
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE gvt4( x, z, a, b, c, d, e, f, alpha, hg, dh_dx, dh_dz ) !<GPU:DEVICE>
|
||||
SUBROUTINE gvt4( x, z, a, b, c, d, e, f, alpha, hg, dh_dx, dh_dz )
|
||||
!$acc routine seq
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
USE kind_l, ONLY : DP
|
||||
|
|
|
@ -48,7 +48,6 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
!! whether you wish to run on gpu in case use_gpu is true
|
||||
!
|
||||
LOGICAL :: run_on_gpu
|
||||
REAL(DP), ALLOCATABLE :: v2c_dummy(:)
|
||||
!
|
||||
run_on_gpu = .FALSE.
|
||||
IF ( PRESENT(run_on_gpu_) ) run_on_gpu = run_on_gpu_
|
||||
|
@ -126,8 +125,8 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v
|
|||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: is
|
||||
REAL(DP), ALLOCATABLE :: grho2(:,:)
|
||||
INTEGER :: is, k, ipol, pol_unpol
|
||||
REAL(DP), ALLOCATABLE :: grho2(:) ,v2cc(:)
|
||||
REAL(DP), PARAMETER :: small = 1.E-10_DP
|
||||
!
|
||||
#if defined(__LIBXC)
|
||||
|
@ -137,7 +136,6 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v
|
|||
REAL(DP), ALLOCATABLE :: vc_rho(:), vc_sigma(:), vc_tau(:)
|
||||
REAL(DP), ALLOCATABLE :: lapl_rho(:), vlapl_rho(:) ! not used in QE
|
||||
!
|
||||
INTEGER :: k, ipol, pol_unpol
|
||||
LOGICAL :: POLARIZED
|
||||
REAL(DP) :: rh, ggrho2, atau
|
||||
#if (XC_MAJOR_VERSION > 4)
|
||||
|
@ -198,19 +196,30 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v
|
|||
!
|
||||
IF ( .NOT.is_libxc(5) .AND. imetac==0 ) THEN
|
||||
!
|
||||
ALLOCATE( grho2(length,ns) )
|
||||
!
|
||||
DO is = 1, ns
|
||||
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
|
||||
ENDDO
|
||||
!
|
||||
IF (ns == 1) CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, &
|
||||
v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1),&
|
||||
v3c(:,1) )
|
||||
IF (ns == 2) CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x,&
|
||||
v1c, v2c, v3c )
|
||||
!
|
||||
DEALLOCATE( grho2 )
|
||||
IF (ns == 1) THEN
|
||||
!
|
||||
ALLOCATE( grho2(length) )
|
||||
grho2(:) = grho(1,:,1)**2 + grho(2,:,1)**2 + grho(3,:,1)**2
|
||||
!
|
||||
!$acc data copyin( rho, grho2, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc host_data use_device( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
CALL tau_xc( length, rho(:,1), grho2, tau(:,1), ex, ec, &
|
||||
v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
ELSEIF (ns == 2) THEN
|
||||
!
|
||||
!$acc data copyin( rho, grho, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc host_data use_device( rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, &
|
||||
v1c, v2c, v3c )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
|
@ -312,27 +321,32 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v
|
|||
DEALLOCATE( vc_rho , vc_sigma, vc_tau, vlapl_rho )
|
||||
!
|
||||
#else
|
||||
!
|
||||
ALLOCATE( grho2(length,ns) )
|
||||
!
|
||||
DO is = 1, ns
|
||||
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
|
||||
ENDDO
|
||||
!
|
||||
IF (ns == 1) THEN
|
||||
!
|
||||
CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, v1x(:,1), &
|
||||
v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1), v3c(:,1) )
|
||||
ALLOCATE( grho2(length) )
|
||||
grho2(:) = grho(1,:,1)**2 + grho(2,:,1)**2 + grho(3,:,1)**2
|
||||
!
|
||||
!$acc data copyin( rho, grho2, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc host_data use_device( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
CALL tau_xc( length, rho(:,1), grho2, tau(:,1), ex, ec, v1x(:,1), &
|
||||
v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
ELSEIF (ns == 2) THEN
|
||||
!
|
||||
!$acc data copyin( rho, grho, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc host_data use_device( rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, &
|
||||
v2c, v3c )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
#endif
|
||||
!
|
||||
RETURN
|
||||
|
@ -391,7 +405,7 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
|
|||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: is
|
||||
INTEGER :: k, is, ipol, pol_unpol
|
||||
REAL(DP), ALLOCATABLE :: grho2(:,:)
|
||||
REAL(DP), PARAMETER :: small = 1.E-10_DP
|
||||
!
|
||||
|
@ -402,19 +416,24 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
|
|||
REAL(DP), ALLOCATABLE :: vc_rho(:), vc_sigma(:), vc_tau(:)
|
||||
REAL(DP), ALLOCATABLE :: lapl_rho(:), vlapl_rho(:) ! not used in QE
|
||||
!
|
||||
INTEGER :: k, ipol, pol_unpol
|
||||
LOGICAL :: POLARIZED
|
||||
REAL(DP) :: rh, ggrho2, atau
|
||||
#if (XC_MAJOR_VERSION > 4)
|
||||
INTEGER(8) :: lengthxc
|
||||
#else
|
||||
INTEGER :: lengthxc
|
||||
#endif
|
||||
#endif
|
||||
!
|
||||
!$acc data deviceptr( rho(length,ns), grho(3,length,ns), tau(length,ns), ex(length), &
|
||||
!$acc& ec(length), v1x(length,ns), v2x(length,ns), v3x(length,ns), &
|
||||
!$acc& v1c(length,ns), v2c(np,length,ns), v3c(length,ns) )
|
||||
!
|
||||
#if defined(__LIBXC)
|
||||
lengthxc = length
|
||||
!
|
||||
ex = 0.0_DP ; v1x = 0.0_DP ; v2x = 0.0_DP ; v3x = 0.0_DP
|
||||
ec = 0.0_DP ; v1c = 0.0_DP ; v2c = 0.0_DP ; v3c = 0.0_DP
|
||||
!ex = 0.0_DP ; v1x = 0.0_DP ; v2x = 0.0_DP ; v3x = 0.0_DP
|
||||
!ec = 0.0_DP ; v1c = 0.0_DP ; v2c = 0.0_DP ; v3c = 0.0_DP
|
||||
!
|
||||
POLARIZED = .FALSE.
|
||||
IF (ns == 2) THEN
|
||||
|
@ -425,6 +444,8 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
|
|||
!
|
||||
ALLOCATE( rho_lxc(length*ns), sigma(length*np), tau_lxc(length*ns) )
|
||||
ALLOCATE( lapl_rho(length*ns) )
|
||||
!$acc data copyout( rho_lxc, sigma, tau_lxc, lapl_rho )
|
||||
!$acc host_data use_device( rho_lxc, sigma, tau_lxc, lapl_rho )
|
||||
!
|
||||
ALLOCATE( ex_lxc(length) , ec_lxc(length) )
|
||||
ALLOCATE( vx_rho(length*ns) , vx_sigma(length*np), vx_tau(length*ns) )
|
||||
|
@ -434,16 +455,18 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
|
|||
!
|
||||
IF ( ns == 1 ) THEN
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rho_lxc(k) = ABS( rho(k,1) )
|
||||
sigma(k) = MAX( grho(1,k,1)**2 + grho(2,k,1)**2 + grho(3,k,1)**2, &
|
||||
grho2_threshold_mgga )
|
||||
tau_lxc(k) = MAX( tau(k,1), tau_threshold_mgga )
|
||||
vlapl_rho(k) = 0._DP
|
||||
lapl_rho(k) = 0.d0
|
||||
ENDDO
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rho_lxc(2*k-1) = ABS( rho(k,1) )
|
||||
rho_lxc(2*k) = ABS( rho(k,2) )
|
||||
|
@ -455,12 +478,14 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
|
|||
grho2_threshold_mgga )
|
||||
tau_lxc(2*k-1) = MAX( tau(k,1), small )
|
||||
tau_lxc(2*k) = MAX( tau(k,2), small )
|
||||
vlapl_rho(2*k-1) = 0._DP
|
||||
vlapl_rho(2*k) = 0._DP
|
||||
lapl_rho(2*k-1) = 0.d0
|
||||
lapl_rho(2*k) = 0.d0
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
vlapl_rho = 0._DP
|
||||
!
|
||||
IF ( .NOT.is_libxc(5) .AND. imetac==0 ) THEN
|
||||
!
|
||||
ALLOCATE( grho2(length,ns) )
|
||||
|
@ -469,8 +494,8 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
|
|||
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
|
||||
ENDDO
|
||||
!
|
||||
IF (ns == 1) CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, &
|
||||
v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1),&
|
||||
IF (ns == 1) CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, &
|
||||
v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c, &
|
||||
v3c(:,1) )
|
||||
IF (ns == 2) CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x,&
|
||||
v1c, v2c, v3c )
|
||||
|
@ -479,6 +504,9 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
|
|||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
! ... META EXCHANGE
|
||||
!
|
||||
IF ( is_libxc(5) ) THEN
|
||||
|
@ -523,17 +551,6 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
|
|||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
! **Now this is done directly inside Libxc for SCAN0**
|
||||
! ... only for HK/MCA: SCAN0 (used in CPV)
|
||||
!IF ( scan_exx ) THEN
|
||||
! IF (exx_started) THEN
|
||||
! ex = (1.0_DP - exx_fraction) * ex
|
||||
! v1x = (1.0_DP - exx_fraction) * v1x
|
||||
! v2x = (1.0_DP - exx_fraction) * v2x
|
||||
! v3x = (1.0_DP - exx_fraction) * v3x
|
||||
! ENDIF
|
||||
!ENDIF
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
! ... META CORRELATION
|
||||
|
@ -590,15 +607,20 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
|
|||
#else
|
||||
!
|
||||
ALLOCATE( grho2(length,ns) )
|
||||
!$acc data create( grho2(length,ns) )
|
||||
!$acc host_data use_device( grho2 )
|
||||
!
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, ns
|
||||
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
|
||||
DO k = 1, length
|
||||
grho2(k,is) = grho(1,k,is)**2 + grho(2,k,is)**2 + grho(3,k,is)**2
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
IF (ns == 1) THEN
|
||||
!
|
||||
CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, v1x(:,1), &
|
||||
v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1), v3c(:,1) )
|
||||
v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) )
|
||||
!
|
||||
ELSEIF (ns == 2) THEN
|
||||
!
|
||||
|
@ -607,9 +629,13 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
|
|||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
#endif
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
|
|
@ -78,7 +78,8 @@ subroutine vxcgc( ndm, mesh, nspin, r, r2, rho, rhoc, vgc, egc, &
|
|||
!
|
||||
use kinds, only : DP
|
||||
use constants, only : fpi, e2
|
||||
use xc_lib, only : xclib_set_threshold, xclib_dft_is, xc_gcx
|
||||
use xc_lib, only : xclib_set_threshold, xclib_dft_is, xc_gcx, &
|
||||
xc_metagcx
|
||||
implicit none
|
||||
integer, intent(in) :: ndm,mesh,nspin,iflag
|
||||
real(DP), intent(in) :: r(mesh), r2(mesh), rho(ndm,2), rhoc(ndm)
|
||||
|
|
Loading…
Reference in New Issue