Vxc_acc - mgga xc on openacc

This commit is contained in:
fabrizio22 2021-11-14 15:41:41 +01:00
parent ff28e52e9f
commit 17d5d34017
8 changed files with 214 additions and 147 deletions

View File

@ -11,7 +11,7 @@ SUBROUTINE tpssmeta(nnr, nspin,grho,rho,kedtau,etxc)
! ===================
!--------------------------------------------------------------------
use kinds, only: dp
use xc_lib, only: xclib_set_threshold
use xc_lib, only: xclib_set_threshold, xc_metagcx
!
IMPLICIT NONE
!

View File

@ -12,7 +12,7 @@ SUBROUTINE stres_gradcorr( rho, rhog, rho_core, rhog_core, kedtau, nspin, &
!----------------------------------------------------------------------------
!
USE kinds, ONLY: DP
USE xc_lib, ONLY: xclib_dft_is, xclib_get_id, xc_gcx
USE xc_lib, ONLY: xclib_dft_is, xclib_get_id, xc_gcx, xc_metagcx
USE noncollin_module, ONLY: domag
USE mp_bands, ONLY: intra_bgrp_comm
USE mp, ONLY: mp_sum

View File

@ -159,6 +159,7 @@ SUBROUTINE v_xc_meta( rho, rho_core, rhog_core, etxc, vtxc, v, kedtaur )
USE cell_base, ONLY : omega
USE funct, ONLY : dft_is_nonlocc, nlc
USE scf, ONLY : scf_type, rhoz_or_updw
USE xc_lib, ONLY : xc_metagcx
USE mp, ONLY : mp_sum
USE mp_bands, ONLY : intra_bgrp_comm
!

View File

@ -94,7 +94,6 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
!$omp v1c_out, v2c_out, sx_out, sc_out )
!$omp do
#endif
DO ir = 1, length
!
grho = grho_in(ir)

View File

@ -17,7 +17,6 @@ MODULE qe_drivers_mgga
USE kind_l, ONLY: DP
USE dft_setting_params, ONLY: imeta, imetac, rho_threshold_mgga, &
grho2_threshold_mgga, tau_threshold_mgga
USE metagga
!
IMPLICIT NONE
!
@ -37,31 +36,33 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c
!! Available cases: M06L and TPSS. Other mGGA functionals can be used
!! through Libxc.
!
USE metagga
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: length
!! Number of k-points
REAL(DP), DIMENSION(length) :: rho
REAL(DP), INTENT(IN) :: rho(length)
!! Charge density
REAL(DP), DIMENSION(length) :: grho2
REAL(DP), INTENT(IN) :: grho2(length)
!! Square modulus of the density gradient
REAL(DP), DIMENSION(length) :: tau
REAL(DP), INTENT(IN) :: tau(length)
!! Laplacian of the density
REAL(DP), DIMENSION(length) :: ex
REAL(DP), INTENT(OUT) :: ex(length)
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
REAL(DP), DIMENSION(length) :: ec
REAL(DP), INTENT(OUT) :: ec(length)
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
REAL(DP), DIMENSION(length) :: v1x
REAL(DP), INTENT(OUT) :: v1x(length)
!! \( D\ E_x\ /\ D\ \text{rho} \)
REAL(DP), DIMENSION(length) :: v2x
REAL(DP), INTENT(OUT) :: v2x(length)
!! \( D\ E_x\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
REAL(DP), DIMENSION(length) :: v3x
REAL(DP), INTENT(OUT) :: v3x(length)
!! \( D\ E_x\ /\ D\ \text{tau} \)
REAL(DP), DIMENSION(length) :: v1c
REAL(DP), INTENT(OUT) :: v1c(length)
!! \( D\ E_c\ /\ D\ \text{rho} \)
REAL(DP), DIMENSION(length) :: v2c
REAL(DP), INTENT(OUT) :: v2c(1,length,1)
!! \( D\ E_c\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
REAL(DP), DIMENSION(length) :: v3c
REAL(DP), INTENT(OUT) :: v3c(length)
!! \( D\ E_c\ /\ D\ \text{tau} \)
!
! ... local variables
@ -69,15 +70,23 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c
INTEGER :: k
REAL(DP) :: arho
!
v1x=0.d0 ; v2x=0.d0 ; v3x=0.d0 ; ex=0.d0
v1c=0.d0 ; v2c=0.d0 ; v3c=0.d0 ; ec=0.d0
#if defined(_OPENACC)
!$acc data deviceptr( rho(length), grho2(length), tau(length), ex(length), ec(length), &
!$acc& v1x(length), v2x(length), v3x(length), v1c(length), v2c(1,length,1), &
!$acc& v3c(length) )
!
!$acc parallel loop
#endif
DO k = 1, length
!
arho = ABS(rho(k))
!
IF ( (arho<=rho_threshold_mgga).OR.(grho2(k)<=grho2_threshold_mgga).OR. &
(ABS(tau(k))<=rho_threshold_mgga) ) CYCLE
(ABS(tau(k))<=rho_threshold_mgga) ) THEN
v1x(k)=0.d0 ; v2x(k)=0.d0 ; v3x(k)=0.d0 ; ex(k)=0.d0
v1c(k)=0.d0 ; v2c(1,k,1)=0.d0 ; v3c(k)=0.d0 ; ec(k)=0.d0
CYCLE
ENDIF
!
! ...libxc-like threshold management
!grho2(k) = MIN( grho2(k), (8.d0*rho(k)*tau(k))**2 )
@ -86,21 +95,25 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c
CASE( 1 )
!
CALL tpsscxc( arho, grho2(k), tau(k), ex(k), ec(k), v1x(k), v2x(k), &
v3x(k), v1c(k), v2c(k), v3c(k) )
v3x(k), v1c(k), v2c(1,k,1), v3c(k) )
!
CASE( 2 )
!
CALL m06lxc( arho, grho2(k), tau(k), ex(k), ec(k), v1x(k), v2x(k), &
v3x(k), v1c(k), v2c(k), v3c(k) )
v3x(k), v1c(k), v2c(1,k,1), v3c(k) )
!
CASE DEFAULT
!
CALL xclib_error( 'tau_xc', 'This case is not implemented', imeta )
!CALL xclib_error( 'tau_xc', 'This case is not implemented', imeta )
!
END SELECT
!
ENDDO
!
#if defined(_OPENACC)
!$acc end data
#endif
!
RETURN
!
END SUBROUTINE tau_xc
@ -114,65 +127,74 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c,
!! Available cases: M06L and TPSS. Other mGGA functionals can be used
!! through Libxc.
!
USE metagga
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: length
!! Number of k-points
REAL(DP), INTENT(IN), DIMENSION(length,2) :: rho
REAL(DP), INTENT(IN) :: rho(length,2)
!! Charge density
REAL(DP), INTENT(IN), DIMENSION(3,length,2) :: grho
REAL(DP), INTENT(IN) :: grho(3,length,2)
!! The density gradient
REAL(DP), INTENT(IN), DIMENSION(length,2) :: tau
REAL(DP), INTENT(IN) :: tau(length,2)
!! Laplacian of the density
REAL(DP), INTENT(OUT), DIMENSION(length) :: ex
REAL(DP), INTENT(OUT) :: ex(length)
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
REAL(DP), INTENT(OUT), DIMENSION(length) :: ec
REAL(DP), INTENT(OUT) :: ec(length)
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1x
REAL(DP), INTENT(OUT) :: v1x(length,2)
!! \( D\ E_x\ /\ D\ \text{rho} \)
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v2x
REAL(DP), INTENT(OUT) :: v2x(length,2)
!! \( D\ E_x\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v3x
REAL(DP), INTENT(OUT) :: v3x(length,2)
!! \( D\ E_x\ /\ D\ \text{tau} \)
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1c
REAL(DP), INTENT(OUT) :: v1c(length,2)
!! \( D\ E_c\ /\ D\ \text{rho} \)
REAL(DP), INTENT(OUT), DIMENSION(3,length,2) :: v2c
REAL(DP), INTENT(OUT) :: v2c(3,length,2)
!! \( D\ E_c\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v3c
REAL(DP), INTENT(OUT) :: v3c(length,2)
!! \( D\ E_c\ /\ D\ \text{tau} \)
!
! ... local variables
!
INTEGER :: k
REAL(DP) :: rh, zeta, atau, grho2(2), ggrho2
REAL(DP) :: rh, zeta, atau, grho2up, grho2dw, ggrho2
REAL(DP) :: v2cup, v2cdw
!
ex=0.0_DP ; v1x=0.0_DP ; v2x=0.0_DP ; v3x=0.0_DP
ec=0.0_DP ; v1c=0.0_DP ; v2c=0.0_DP ; v3c=0.0_DP
#if defined(_OPENACC)
!$acc data deviceptr( rho(length,2), grho(3,length,2), tau(length,2), ex(length), &
!$acc& ec(length), v1x(length,2), v2x(length,2), v3x(length,2), &
!$acc& v1c(length,2), v2c(3,length,2), v3c(length,2) )
!
!$acc parallel loop
#endif
DO k = 1, length
!
rh = rho(k,1) + rho(k,2)
atau = tau(k,1) + tau(k,2) ! KE-density in Hartree
! ...libxc-like threshold management
!grho2(1) = MIN( SUM(grho(:,k,1)**2), (8.d0*rho(k,1)*tau(k,1))**2 )
!grho2(2) = MIN( SUM(grho(:,k,2)**2), (8.d0*rho(k,2)*tau(k,2))**2 )
grho2(1) = SUM(grho(:,k,1)**2)
grho2(2) = SUM(grho(:,k,2)**2)
ggrho2 = ( grho2(1) + grho2(2) ) * 4.0_DP
!grho2up = MIN( SUM(grho(:,k,1)**2), (8.d0*rho(k,1)*tau(k,1))**2 )
!grho2dw = MIN( SUM(grho(:,k,2)**2), (8.d0*rho(k,2)*tau(k,2))**2 )
grho2up = SUM(grho(:,k,1)**2)
grho2dw = SUM(grho(:,k,2)**2)
ggrho2 = ( grho2up + grho2dw ) * 4.0_DP
!
IF ( (rh <= rho_threshold_mgga).OR.(ggrho2 <= grho2_threshold_mgga).OR.&
(ABS(atau) <= tau_threshold_mgga) ) CYCLE
(ABS(atau) <= tau_threshold_mgga) ) THEN
v1x(k,:)=0.d0 ; v2x(k,:)=0.d0 ; v3x(k,:)=0.d0 ; ex(k)=0.d0
v1c(k,:)=0.d0 ; v2c(:,k,:)=0.d0 ; v3c(k,:)=0.d0 ; ec(k)=0.d0
CYCLE
ENDIF
!
SELECT CASE( imeta )
CASE( 1 )
!
CALL tpsscx_spin( rho(k,1), rho(k,2), grho2(1), grho2(2), tau(k,1), &
CALL tpsscx_spin( rho(k,1), rho(k,2), grho2up, grho2dw, tau(k,1), &
tau(k,2), ex(k), v1x(k,1), v1x(k,2), v2x(k,1), &
v2x(k,2), v3x(k,1), v3x(k,2) )
!
zeta = (rho(k,1) - rho(k,2)) / rh
zeta = MAX( MIN( 0.99999999_DP, zeta ), -0.99999999_DP )
zeta = MAX( MIN( 0.99999999_DP, (rho(k,1)-rho(k,2))/rh ), -0.99999999_DP )
!
CALL tpsscc_spin( rh, zeta, grho(:,k,1), grho(:,k,2), atau, ec(k), &
v1c(k,1), v1c(k,2), v2c(:,k,1), v2c(:,k,2), &
@ -180,7 +202,7 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c,
!
CASE( 2 )
!
CALL m06lxc_spin( rho(k,1), rho(k,2), grho2(1), grho2(2), tau(k,1), &
CALL m06lxc_spin( rho(k,1), rho(k,2), grho2up, grho2dw, tau(k,1), &
tau(k,2), ex(k), ec(k), v1x(k,1), v1x(k,2), &
v2x(k,1), v2x(k,2), v3x(k,1), v3x(k,2), v1c(k,1), &
v1c(k,2), v2cup, v2cdw, v3c(k,1), v3c(k,2) )
@ -190,12 +212,16 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c,
!
CASE DEFAULT
!
CALL xclib_error( 'tau_xc_spin', 'This case not implemented', imeta )
!CALL xclib_error( 'tau_xc_spin', 'This case not implemented', imeta )
!
END SELECT
!
ENDDO
!
#if defined(_OPENACC)
!$acc end data
#endif
!
RETURN
!
END SUBROUTINE tau_xc_spin

View File

@ -6,22 +6,23 @@
! or http://www.gnu.org/copyleft/gpl.txt .
!
!------------------------------------------------------------------------
MODULE metagga !<GPU:metagga=>metagga_gpu>
MODULE metagga
!-------------------------------------------------------------------------
!! MetaGGA functionals. Available functionals:
!
!! * TPSS (Tao, Perdew, Staroverov & Scuseria)
!! * M06L
!
USE exch_lda, ONLY: slater !<GPU:slater=>slater_d,exch_lda=>exch_lda_gpu>
USE corr_lda, ONLY: pw, pw_spin !<GPU:pw=>pw_d,pw_spin=>pw_spin_d,corr_lda=>corr_lda_gpu>
USE corr_gga, ONLY: pbec, pbec_spin !<GPU:pbec=>pbec_d,pbec_spin=>pbec_spin_d,corr_gga=>corr_gga_gpu>
USE exch_lda, ONLY: slater
USE corr_lda, ONLY: pw, pw_spin
USE corr_gga, ONLY: pbec, pbec_spin
!
CONTAINS
! TPSS
!
!-------------------------------------------------------------------------
SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c ) !<GPU:DEVICE>
SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c )
!$acc routine seq
!-----------------------------------------------------------------------
!! TPSS metaGGA corrections for exchange and correlation - Hartree a.u.
!! Definition: \(E_x = \int E_x(\text{rho},\text{grho}) dr\)
@ -70,9 +71,9 @@ SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c )
ENDIF
!
! exchange
CALL metax( rho, grho, tau, sx, v1x, v2x, v3x ) !<GPU:metax=>metax_d>
CALL metax( rho, grho, tau, sx, v1x, v2x, v3x )
! correlation
CALL metac( rho, grho, tau, sc, v1c, v2c, v3c ) !<GPU:metac=>metac_d>
CALL metac( rho, grho, tau, sc, v1c, v2c, v3c )
!
RETURN
!
@ -80,7 +81,8 @@ END SUBROUTINE tpsscxc
!
!
!-------------------------------------------------------------------------
SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x )
!$acc routine seq
!--------------------------------------------------------------------
!! TPSS meta-GGA exchange potential and energy.
!
@ -133,8 +135,8 @@ SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:
ENDIF
!
rs = pi34/rho**third
CALL slater( rs, ex_unif, vx_unif ) !<GPU:slater=>slater_d>
CALL metaFX( rho, grho2, tau, fx, f1x, f2x, f3x ) !<GPU:metaFX=>metaFX_d>
CALL slater( rs, ex_unif, vx_unif )
CALL metaFX( rho, grho2, tau, fx, f1x, f2x, f3x )
!
ex = rho*ex_unif
v1x = vx_unif*fx + ex*f1x
@ -148,7 +150,8 @@ END SUBROUTINE metax
!
!
!------------------------------------------------------------------
SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) !<GPU:DEVICE>
SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c )
!$acc routine seq
!--------------------------------------------------------------
!! TPSS meta-GGA correlation energy and potentials.
!
@ -208,11 +211,11 @@ SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) !<GPU:
IF (rhoup > small) THEN
!
rs = (pi34/rhoup)**third
CALL pw_spin( rs, 1.0_DP, ec_unif, vc_unif_s(1), vc_unif_s(2) ) !<GPU:pw_spin=>pw_spin_d>
CALL pw_spin( rs, 1.0_DP, ec_unif, vc_unif_s(1), vc_unif_s(2) )
!
IF (ABS(grhoup) > small) THEN
! zeta=1.0_DP-small to avoid pow_e of 0 in pbec_spin
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup**2, 1, & !<GPU:pbec_spin=>pbec_spin_d>
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup**2, 1, &
ec_sum, v1c_sum(1), v1c_sum(2), v2c_sum )
ELSE
ec_sum = 0.0_DP
@ -229,13 +232,13 @@ SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) !<GPU:
ENDIF
!
rs = (pi34/rho)**third
CALL pw( rs, 1, ec_unif, vc_unif ) !<GPU:pw=>pw_d>
CALL pw( rs, 1, ec_unif, vc_unif )
!
! ... PBE correlation energy and potential:
! ec_pbe=rho*H, not rho*(epsion_c_uinf + H)
! v1c_pbe=D (rho*H) /D rho
! v2c_pbe= for rho, 2 for
CALL pbec( rho, grho2, 1, ec_pbe, v1c_pbe, v2c_pbe ) !<GPU:pbec=>pbec_d>
CALL pbec( rho, grho2, 1, ec_pbe, v1c_pbe, v2c_pbe )
!
ec_pbe = ec_pbe/rho+ec_unif
v1c_pbe = (v1c_pbe+vc_unif-ec_pbe)/rho
@ -275,7 +278,8 @@ END SUBROUTINE metac
!
!
!-------------------------------------------------------------------------
SUBROUTINE metaFX( rho, grho2, tau, fx, f1x, f2x, f3x ) !<GPU:DEVICE>
SUBROUTINE metaFX( rho, grho2, tau, fx, f1x, f2x, f3x )
!$acc routine seq
!-------------------------------------------------------------------------
!
USE kind_l, ONLY : DP
@ -397,8 +401,9 @@ END SUBROUTINE metaFX
!
!
!---------------------------------------------------------------------------
SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, & !<GPU:DEVICE>
SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, &
v1xup, v1xdw, v2xup, v2xdw, v3xup, v3xdw )
!$acc routine seq
!-----------------------------------------------------------------------
!! TPSS metaGGA for exchange - Hartree a.u.
!
@ -442,7 +447,7 @@ SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, &
rho = rhoup + rhodw
IF (rhoup>small .AND. SQRT(ABS(grhoup2))>small &
.AND. ABS(tauup) > small) THEN
CALL metax( 2.0_DP*rhoup, 4.0_DP*grhoup2, & !<GPU:metax=>metax_d>
CALL metax( 2.0_DP*rhoup, 4.0_DP*grhoup2, &
2.0_DP*tauup, sxup, v1xup, v2xup, v3xup )
ELSE
sxup = 0.0_DP
@ -453,7 +458,7 @@ SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, &
!
IF (rhodw > small .AND. SQRT(ABS(grhodw2)) > small &
.AND. ABS(taudw) > small) THEN
CALL metax( 2.0_DP*rhodw, 4.0_DP*grhodw2, & !<GPU:metax=>metax_d>
CALL metax( 2.0_DP*rhodw, 4.0_DP*grhodw2, &
2.0_DP*taudw, sxdw, v1xdw, v2xdw, v3xdw )
ELSE
sxdw = 0.0_DP
@ -472,8 +477,9 @@ END SUBROUTINE tpsscx_spin
!
!
!---------------------------------------------------------------------------
SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, &
atau, sc, v1cup, v1cdw, v2cup, v2cdw, v3cup, v3cdw )
!$acc routine seq
!--------------------------------------------------------------------------
!! TPSS metaGGA for correlations - Hartree a.u.
!
@ -536,7 +542,7 @@ SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
!
v3c = 0.0_DP
ELSE
CALL metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:metac_spin=>metac_spin_d>
CALL metac_spin( rho, zeta, grhoup, grhodw, &
atau, sc, v1cup, v1cdw, v2cup, v2cdw, v3c )
ENDIF
!
@ -549,8 +555,9 @@ END SUBROUTINE tpsscc_spin
!
!
!---------------------------------------------------------------
SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, &
tau, sc, v1up, v1dw, v2up, v2dw, v3 )
!$acc routine seq
!---------------------------------------------------------------
USE kind_l, ONLY : DP
!
@ -634,10 +641,10 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
!
v2_tmp = 0.0_DP
rs = (pi34/rho)**third
CALL pw_spin( rs, zeta, ec_u, vc_u(1), vc_u(2) ) !<GPU:pw_spin=>pw_spin_d>
CALL pw_spin( rs, zeta, ec_u, vc_u(1), vc_u(2) )
!
IF ((ABS(grho) > small) .AND. (zeta <= 1.0_DP)) THEN
CALL pbec_spin( rho, zeta, grho2, 1, ec_pbe, v1_pbe(1), v1_pbe(2), v2_tmp ) !<GPU:pbec_spin=>pbec_spin_d>
CALL pbec_spin( rho, zeta, grho2, 1, ec_pbe, v1_pbe(1), v1_pbe(2), v2_tmp )
v1up_pbe=v1_pbe(1)
v1dw_pbe=v1_pbe(2)
ELSE
@ -668,10 +675,10 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
v2_tmp = 0.0_DP
!
rs = (pi34/rhoup)**third
CALL pw_spin( rs, 1.0_DP, ec_u, vc_u(1), vc_u(2) ) !<GPU:pw_spin=>pw_spin_d>
CALL pw_spin( rs, 1.0_DP, ec_u, vc_u(1), vc_u(2) )
!
IF (SQRT(grhoup2) > small) THEN
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup2, 1, & !<GPU:pbec_spin=>pbec_spin_d>
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup2, 1, &
ecup_0, v1_0v(1), v1_0v(2), v2_tmp )
v1up_0 = v1_0v(1)
v1dw_0 = v1_0v(2)
@ -709,11 +716,11 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
v2_tmp = 0.0_DP
!
rs = (pi34/rhodw)**third
CALL pw_spin( rs, -1.0_DP, ec_u, vc_u(1), vc_u(2) ) !<GPU:pw_spin=>pw_spin_d>
CALL pw_spin( rs, -1.0_DP, ec_u, vc_u(1), vc_u(2) )
!
IF (SQRT(grhodw2) > small) THEN
!
CALL pbec_spin( rhodw, -1.0_DP+small, grhodw2, 1, & !<GPU:pbec_spin=>pbec_spin_d>
CALL pbec_spin( rhodw, -1.0_DP+small, grhodw2, 1, &
ecdw_0, v1_0v(1), v1_0v(2), v2_tmp )
!
v1up_0 = v1_0v(1)
@ -877,7 +884,8 @@ END SUBROUTINE metac_spin
! ec, v1c, v2c, v3c as above for correlation
!
!-------------------------------------------------------------------------
SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) !<GPU:DEVICE>
SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
!$acc routine seq
!-----------------------------------------------------------------------
!
USE kind_l, ONLY : DP
@ -923,14 +931,14 @@ SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
taub = taua ! Tau is defined as summ_i( |nabla phi_i|**2 )
! in the following M06L routines
!
CALL m06lx( rhoa, grho2a, taua, ex, v1x, v2x, v3x ) !<GPU:m06lx=>m06lx_d>
CALL m06lx( rhoa, grho2a, taua, ex, v1x, v2x, v3x )
!
ex = two * ex ! Add the two components up + dw
!
v2x = 0.5_dp * v2x
!v3x = 2.0_dp * v3x !**mismatch with Libxc by a factor of 2
!
CALL m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c, v2c, v3c, & !<GPU:m06lc=>m06lc_d>
CALL m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c, v2c, v3c, &
v1cb, v2cb, v3cb )
!
v2c = 0.5_dp * v2c
@ -940,9 +948,10 @@ END SUBROUTINE m06lxc
!
!
!-------------------------------------------------------------------------
SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, & !<GPU:DEVICE>
SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, &
ex, ec, v1xup, v1xdw, v2xup, v2xdw, v3xup, v3xdw, &
v1cup, v1cdw, v2cup, v2cdw, v3cup, v3cdw )
!$acc routine seq
!-----------------------------------------------------------------------
!
USE kind_l, ONLY : DP
@ -959,14 +968,14 @@ SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, &
taua = tauup * two ! Tau is defined as summ_i( |nabla phi_i|**2 )
taub = taudw * two ! in the rest of the routine
!
CALL m06lx( rhoup, grhoup2, taua, exup, v1xup, v2xup, v3xup ) !<GPU:m06lx=>m06lx_d>
CALL m06lx( rhodw, grhodw2, taub, exdw, v1xdw, v2xdw, v3xdw ) !<GPU:m06lx=>m06lx_d>
CALL m06lx( rhoup, grhoup2, taua, exup, v1xup, v2xup, v3xup )
CALL m06lx( rhodw, grhodw2, taub, exdw, v1xdw, v2xdw, v3xdw )
!
ex = exup + exdw
!v3xup = 2.0_dp * v3xup !**mismatch with Libxc by a factor of 2
!v3xdw = 2.0_dp * v3xdw !**
!
CALL m06lc( rhoup, rhodw, grhoup2, grhodw2, taua, taub, & !<GPU:m06lc=>m06lc_d>
CALL m06lc( rhoup, rhodw, grhoup2, grhodw2, taua, taub, &
ec, v1cup, v2cup, v3cup, v1cdw, v2cdw, v3cdw )
!v3cup = 2.0_dp * v3cup !**
!v3cdw = 2.0_dp * v3cdw !**
@ -977,7 +986,8 @@ END SUBROUTINE m06lxc_spin
!=============================== M06L exchange ==========================
!
!-------------------------------------------------------------------------------
SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x )
!$acc routine seq
!---------------------------------------------------------------------------
!! M06L exchange.
!
@ -1082,7 +1092,7 @@ SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
gh = one + alpha * (xs2 + zs)
!
IF (gh >= small) THEN
CALL gvt4( xs2, zs, d0, d1, d2, d3, d4, d5, alpha, hg, dhg_dxs2, dhg_dzs ) !<GPU:gvt4=>gvt4_d>
CALL gvt4( xs2, zs, d0, d1, d2, d3, d4, d5, alpha, hg, dhg_dxs2, dhg_dzs )
ELSE
hg = zero
dhg_dxs2 = zero
@ -1121,7 +1131,7 @@ SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
dfws_drho = dfws*dws_dts*dts_drho
dfws_dtau = dfws*dws_dts*dts_dtau
!
CALL pbex_m06l( two*rho, four*grho2, sx_pbe, v1x_pbe, v2x_pbe ) !<GPU:pbex_m06l=>pbex_m06l_d>
CALL pbex_m06l( two*rho, four*grho2, sx_pbe, v1x_pbe, v2x_pbe )
!
v1x_unif = f43 * CX * rho13
!
@ -1143,7 +1153,8 @@ END SUBROUTINE m06lx
!
!
!-------------------------------------------------------------------
SUBROUTINE pbex_m06l( rho, grho2, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE pbex_m06l( rho, grho2, sx, v1x, v2x )
!$acc routine seq
!---------------------------------------------------------------
!! PBE exchange (without Slater exchange):
!! J.P.Perdew, K.Burke, M.Ernzerhof, PRL 77, 3865 (1996).
@ -1211,8 +1222,9 @@ END SUBROUTINE pbex_m06l
!=============================== M06L correlation ==========================
!
!---------------------------------------------------------------------------------
SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, & !<GPU:DEVICE>
SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
v3c_up, v1c_dw, v2c_dw, v3c_dw )
!$acc routine seq
!-------------------------------------------------------------------------------
!! M06L correlation.
!
@ -1366,12 +1378,12 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
!
rs = rsa
zeta = one
CALL pw_spin( rs, zeta, ec_pw_a, vc_v(1), vc_v(2) ) !<GPU:pw_spin=>pw_spin_d>
CALL pw_spin( rs, zeta, ec_pw_a, vc_v(1), vc_v(2) )
vc_pw_a = vc_v(1)
vv = vc_v(2)
!
CALL gvt4( xs2a, zsa, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hga, dhga_dxs2a, dhga_dzsa ) !<GPU:gvt4=>gvt4_d>
CALL gfunc( cs, gama_s, xs2a, gsa, dgsa_dxs2a ) !<GPU:gfunc=>gfunc_d>
CALL gvt4( xs2a, zsa, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hga, dhga_dxs2a, dhga_dzsa )
CALL gfunc( cs, gama_s, xs2a, gsa, dgsa_dxs2a )
!
Ec_UEG_aa = rhoa*ec_pw_a
num = (dgsa_dxs2a + dhga_dxs2a)*Dsa + (gsa + hga)*dDsa_dxs2a
@ -1426,12 +1438,12 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
!
zeta = one
rs = rsb
CALL pw_spin( rs, zeta, ec_pw_b, vc_v(1), vc_v(2) ) !<GPU:pw_spin=>pw_spin_d>
CALL pw_spin( rs, zeta, ec_pw_b, vc_v(1), vc_v(2) )
vc_pw_b = vc_v(1)
vv = vc_v(2)
!
CALL gvt4( xs2b, zsb, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hgb, dhgb_dxs2b, dhgb_dzsb ) !<GPU:gvt4=>gvt4_d>
CALL gfunc( cs, gama_s, xs2b, gsb, dgsb_dxs2b ) !<GPU:gfunc=>gfunc_d>
CALL gvt4( xs2b, zsb, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hgb, dhgb_dxs2b, dhgb_dzsb )
CALL gfunc( cs, gama_s, xs2b, gsb, dgsb_dxs2b )
!
Ec_UEG_bb = rhob*ec_pw_b
num = (dgsb_dxs2b + dhgb_dxs2b)*Dsb + (gsb + hgb)*dDsb_dxs2b
@ -1468,13 +1480,13 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
zeta = (rhoa - rhob)/rho
rs = (pi34/rho)**f13
!
CALL gvt4( xs2ab, zsab, dab0, dab1, dab2, dab3, dab4, dab5, alpha_ab, hgab, & !<GPU:gvt4=>gvt4_d>
CALL gvt4( xs2ab, zsab, dab0, dab1, dab2, dab3, dab4, dab5, alpha_ab, hgab, &
dhgab_dxs2ab, dhgab_dzsab )
!
CALL pw_spin( rs, zeta, ec_pw_ab, vc_v(1), vc_v(2) ) !<GPU:pw_spin=>pw_spin_d>
CALL pw_spin( rs, zeta, ec_pw_ab, vc_v(1), vc_v(2) )
vc_pw_up=vc_v(1) ; vc_pw_dw=vc_v(2)
!
CALL gfunc( cab, gama_ab, xs2ab, gsab, dgsab_dxs2ab ) !<GPU:gfunc=>gfunc_d>
CALL gfunc( cab, gama_ab, xs2ab, gsab, dgsab_dxs2ab )
!
decab_drhoa = vc_pw_up - vc_pw_a
decab_drhob = vc_pw_dw - vc_pw_b
@ -1515,7 +1527,8 @@ END SUBROUTINE m06lc
!
!
!-------------------------------------------------------------------
SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx ) !<GPU:DEVICE>
SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx )
!$acc routine seq
!-----------------------------------------------------------------
!
USE kind_l, ONLY : DP
@ -1545,7 +1558,8 @@ SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx ) !<GPU:DEVICE>
!
!
!-------------------------------------------------------------------------
SUBROUTINE gvt4( x, z, a, b, c, d, e, f, alpha, hg, dh_dx, dh_dz ) !<GPU:DEVICE>
SUBROUTINE gvt4( x, z, a, b, c, d, e, f, alpha, hg, dh_dx, dh_dz )
!$acc routine seq
!----------------------------------------------------------------------
!
USE kind_l, ONLY : DP

View File

@ -48,7 +48,6 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
!! whether you wish to run on gpu in case use_gpu is true
!
LOGICAL :: run_on_gpu
REAL(DP), ALLOCATABLE :: v2c_dummy(:)
!
run_on_gpu = .FALSE.
IF ( PRESENT(run_on_gpu_) ) run_on_gpu = run_on_gpu_
@ -126,8 +125,8 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v
!
! ... local variables
!
INTEGER :: is
REAL(DP), ALLOCATABLE :: grho2(:,:)
INTEGER :: is, k, ipol, pol_unpol
REAL(DP), ALLOCATABLE :: grho2(:) ,v2cc(:)
REAL(DP), PARAMETER :: small = 1.E-10_DP
!
#if defined(__LIBXC)
@ -137,7 +136,6 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v
REAL(DP), ALLOCATABLE :: vc_rho(:), vc_sigma(:), vc_tau(:)
REAL(DP), ALLOCATABLE :: lapl_rho(:), vlapl_rho(:) ! not used in QE
!
INTEGER :: k, ipol, pol_unpol
LOGICAL :: POLARIZED
REAL(DP) :: rh, ggrho2, atau
#if (XC_MAJOR_VERSION > 4)
@ -198,19 +196,30 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v
!
IF ( .NOT.is_libxc(5) .AND. imetac==0 ) THEN
!
ALLOCATE( grho2(length,ns) )
!
DO is = 1, ns
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
ENDDO
!
IF (ns == 1) CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, &
v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1),&
v3c(:,1) )
IF (ns == 2) CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x,&
v1c, v2c, v3c )
!
DEALLOCATE( grho2 )
IF (ns == 1) THEN
!
ALLOCATE( grho2(length) )
grho2(:) = grho(1,:,1)**2 + grho(2,:,1)**2 + grho(3,:,1)**2
!
!$acc data copyin( rho, grho2, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
!$acc host_data use_device( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
CALL tau_xc( length, rho(:,1), grho2, tau(:,1), ex, ec, &
v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) )
!$acc end host_data
!$acc end data
!
DEALLOCATE( grho2 )
!
ELSEIF (ns == 2) THEN
!
!$acc data copyin( rho, grho, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
!$acc host_data use_device( rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, &
v1c, v2c, v3c )
!$acc end host_data
!$acc end data
!
ENDIF
!
ENDIF
!
@ -312,27 +321,32 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v
DEALLOCATE( vc_rho , vc_sigma, vc_tau, vlapl_rho )
!
#else
!
ALLOCATE( grho2(length,ns) )
!
DO is = 1, ns
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
ENDDO
!
IF (ns == 1) THEN
!
CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, v1x(:,1), &
v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1), v3c(:,1) )
ALLOCATE( grho2(length) )
grho2(:) = grho(1,:,1)**2 + grho(2,:,1)**2 + grho(3,:,1)**2
!
!$acc data copyin( rho, grho2, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
!$acc host_data use_device( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
CALL tau_xc( length, rho(:,1), grho2, tau(:,1), ex, ec, v1x(:,1), &
v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) )
!$acc end host_data
!$acc end data
!
DEALLOCATE( grho2 )
!
ELSEIF (ns == 2) THEN
!
!$acc data copyin( rho, grho, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
!$acc host_data use_device( rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, &
v2c, v3c )
!$acc end host_data
!$acc end data
!
ENDIF
!
DEALLOCATE( grho2 )
!
#endif
!
RETURN
@ -391,7 +405,7 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
!
! ... local variables
!
INTEGER :: is
INTEGER :: k, is, ipol, pol_unpol
REAL(DP), ALLOCATABLE :: grho2(:,:)
REAL(DP), PARAMETER :: small = 1.E-10_DP
!
@ -402,19 +416,24 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
REAL(DP), ALLOCATABLE :: vc_rho(:), vc_sigma(:), vc_tau(:)
REAL(DP), ALLOCATABLE :: lapl_rho(:), vlapl_rho(:) ! not used in QE
!
INTEGER :: k, ipol, pol_unpol
LOGICAL :: POLARIZED
REAL(DP) :: rh, ggrho2, atau
#if (XC_MAJOR_VERSION > 4)
INTEGER(8) :: lengthxc
#else
INTEGER :: lengthxc
#endif
#endif
!
!$acc data deviceptr( rho(length,ns), grho(3,length,ns), tau(length,ns), ex(length), &
!$acc& ec(length), v1x(length,ns), v2x(length,ns), v3x(length,ns), &
!$acc& v1c(length,ns), v2c(np,length,ns), v3c(length,ns) )
!
#if defined(__LIBXC)
lengthxc = length
!
ex = 0.0_DP ; v1x = 0.0_DP ; v2x = 0.0_DP ; v3x = 0.0_DP
ec = 0.0_DP ; v1c = 0.0_DP ; v2c = 0.0_DP ; v3c = 0.0_DP
!ex = 0.0_DP ; v1x = 0.0_DP ; v2x = 0.0_DP ; v3x = 0.0_DP
!ec = 0.0_DP ; v1c = 0.0_DP ; v2c = 0.0_DP ; v3c = 0.0_DP
!
POLARIZED = .FALSE.
IF (ns == 2) THEN
@ -425,6 +444,8 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
!
ALLOCATE( rho_lxc(length*ns), sigma(length*np), tau_lxc(length*ns) )
ALLOCATE( lapl_rho(length*ns) )
!$acc data copyout( rho_lxc, sigma, tau_lxc, lapl_rho )
!$acc host_data use_device( rho_lxc, sigma, tau_lxc, lapl_rho )
!
ALLOCATE( ex_lxc(length) , ec_lxc(length) )
ALLOCATE( vx_rho(length*ns) , vx_sigma(length*np), vx_tau(length*ns) )
@ -434,16 +455,18 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
!
IF ( ns == 1 ) THEN
!
!$acc parallel loop
DO k = 1, length
rho_lxc(k) = ABS( rho(k,1) )
sigma(k) = MAX( grho(1,k,1)**2 + grho(2,k,1)**2 + grho(3,k,1)**2, &
grho2_threshold_mgga )
tau_lxc(k) = MAX( tau(k,1), tau_threshold_mgga )
vlapl_rho(k) = 0._DP
lapl_rho(k) = 0.d0
ENDDO
!
ELSE
!
!$acc parallel loop
DO k = 1, length
rho_lxc(2*k-1) = ABS( rho(k,1) )
rho_lxc(2*k) = ABS( rho(k,2) )
@ -455,12 +478,14 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
grho2_threshold_mgga )
tau_lxc(2*k-1) = MAX( tau(k,1), small )
tau_lxc(2*k) = MAX( tau(k,2), small )
vlapl_rho(2*k-1) = 0._DP
vlapl_rho(2*k) = 0._DP
lapl_rho(2*k-1) = 0.d0
lapl_rho(2*k) = 0.d0
ENDDO
!
ENDIF
!
vlapl_rho = 0._DP
!
IF ( .NOT.is_libxc(5) .AND. imetac==0 ) THEN
!
ALLOCATE( grho2(length,ns) )
@ -469,8 +494,8 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
ENDDO
!
IF (ns == 1) CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, &
v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1),&
IF (ns == 1) CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, &
v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c, &
v3c(:,1) )
IF (ns == 2) CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x,&
v1c, v2c, v3c )
@ -479,6 +504,9 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
!
ENDIF
!
!$acc end host_data
!$acc end data
!
! ... META EXCHANGE
!
IF ( is_libxc(5) ) THEN
@ -523,17 +551,6 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
ENDDO
ENDIF
!
! **Now this is done directly inside Libxc for SCAN0**
! ... only for HK/MCA: SCAN0 (used in CPV)
!IF ( scan_exx ) THEN
! IF (exx_started) THEN
! ex = (1.0_DP - exx_fraction) * ex
! v1x = (1.0_DP - exx_fraction) * v1x
! v2x = (1.0_DP - exx_fraction) * v2x
! v3x = (1.0_DP - exx_fraction) * v3x
! ENDIF
!ENDIF
!
ENDIF
!
! ... META CORRELATION
@ -590,15 +607,20 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
#else
!
ALLOCATE( grho2(length,ns) )
!$acc data create( grho2(length,ns) )
!$acc host_data use_device( grho2 )
!
!$acc parallel loop collapse(2)
DO is = 1, ns
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
DO k = 1, length
grho2(k,is) = grho(1,k,is)**2 + grho(2,k,is)**2 + grho(3,k,is)**2
ENDDO
ENDDO
!
IF (ns == 1) THEN
!
CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, v1x(:,1), &
v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1), v3c(:,1) )
v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) )
!
ELSEIF (ns == 2) THEN
!
@ -607,9 +629,13 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x
!
ENDIF
!
!$acc end host_data
!$acc end data
DEALLOCATE( grho2 )
!
#endif
!
!$acc end data
!
RETURN
!

View File

@ -78,7 +78,8 @@ subroutine vxcgc( ndm, mesh, nspin, r, r2, rho, rhoc, vgc, egc, &
!
use kinds, only : DP
use constants, only : fpi, e2
use xc_lib, only : xclib_set_threshold, xclib_dft_is, xc_gcx
use xc_lib, only : xclib_set_threshold, xclib_dft_is, xc_gcx, &
xc_metagcx
implicit none
integer, intent(in) :: ndm,mesh,nspin,iflag
real(DP), intent(in) :: r(mesh), r2(mesh), rho(ndm,2), rhoc(ndm)