From 17d5d3401741230a56ef3652b075e7fdbd4e64be Mon Sep 17 00:00:00 2001 From: fabrizio22 Date: Sun, 14 Nov 2021 15:41:41 +0100 Subject: [PATCH] Vxc_acc - mgga xc on openacc --- CPV/src/metaxc.f90 | 2 +- PW/src/stres_gradcorr.f90 | 2 +- PW/src/v_of_rho.f90 | 1 + XClib/qe_drivers_gga.f90 | 1 - XClib/qe_drivers_mgga.f90 | 112 +++++++++++++++++++++------------- XClib/qe_funct_mgga.f90 | 116 +++++++++++++++++++---------------- XClib/xc_wrapper_mgga.f90 | 124 +++++++++++++++++++++++--------------- atomic/src/vxcgc.f90 | 3 +- 8 files changed, 214 insertions(+), 147 deletions(-) diff --git a/CPV/src/metaxc.f90 b/CPV/src/metaxc.f90 index bec71f946..3f7746177 100644 --- a/CPV/src/metaxc.f90 +++ b/CPV/src/metaxc.f90 @@ -11,7 +11,7 @@ SUBROUTINE tpssmeta(nnr, nspin,grho,rho,kedtau,etxc) ! =================== !-------------------------------------------------------------------- use kinds, only: dp - use xc_lib, only: xclib_set_threshold + use xc_lib, only: xclib_set_threshold, xc_metagcx ! IMPLICIT NONE ! diff --git a/PW/src/stres_gradcorr.f90 b/PW/src/stres_gradcorr.f90 index 163b52953..76f0ab00a 100644 --- a/PW/src/stres_gradcorr.f90 +++ b/PW/src/stres_gradcorr.f90 @@ -12,7 +12,7 @@ SUBROUTINE stres_gradcorr( rho, rhog, rho_core, rhog_core, kedtau, nspin, & !---------------------------------------------------------------------------- ! USE kinds, ONLY: DP - USE xc_lib, ONLY: xclib_dft_is, xclib_get_id, xc_gcx + USE xc_lib, ONLY: xclib_dft_is, xclib_get_id, xc_gcx, xc_metagcx USE noncollin_module, ONLY: domag USE mp_bands, ONLY: intra_bgrp_comm USE mp, ONLY: mp_sum diff --git a/PW/src/v_of_rho.f90 b/PW/src/v_of_rho.f90 index cfebe3d97..e90ad55fa 100644 --- a/PW/src/v_of_rho.f90 +++ b/PW/src/v_of_rho.f90 @@ -159,6 +159,7 @@ SUBROUTINE v_xc_meta( rho, rho_core, rhog_core, etxc, vtxc, v, kedtaur ) USE cell_base, ONLY : omega USE funct, ONLY : dft_is_nonlocc, nlc USE scf, ONLY : scf_type, rhoz_or_updw + USE xc_lib, ONLY : xc_metagcx USE mp, ONLY : mp_sum USE mp_bands, ONLY : intra_bgrp_comm ! diff --git a/XClib/qe_drivers_gga.f90 b/XClib/qe_drivers_gga.f90 index 86cb807ee..28913c469 100644 --- a/XClib/qe_drivers_gga.f90 +++ b/XClib/qe_drivers_gga.f90 @@ -94,7 +94,6 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, & !$omp v1c_out, v2c_out, sx_out, sc_out ) !$omp do #endif - DO ir = 1, length ! grho = grho_in(ir) diff --git a/XClib/qe_drivers_mgga.f90 b/XClib/qe_drivers_mgga.f90 index cb63e559c..6f7c955a6 100644 --- a/XClib/qe_drivers_mgga.f90 +++ b/XClib/qe_drivers_mgga.f90 @@ -17,7 +17,6 @@ MODULE qe_drivers_mgga USE kind_l, ONLY: DP USE dft_setting_params, ONLY: imeta, imetac, rho_threshold_mgga, & grho2_threshold_mgga, tau_threshold_mgga - USE metagga ! IMPLICIT NONE ! @@ -37,31 +36,33 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c !! Available cases: M06L and TPSS. Other mGGA functionals can be used !! through Libxc. ! + USE metagga + ! IMPLICIT NONE ! INTEGER, INTENT(IN) :: length !! Number of k-points - REAL(DP), DIMENSION(length) :: rho + REAL(DP), INTENT(IN) :: rho(length) !! Charge density - REAL(DP), DIMENSION(length) :: grho2 + REAL(DP), INTENT(IN) :: grho2(length) !! Square modulus of the density gradient - REAL(DP), DIMENSION(length) :: tau + REAL(DP), INTENT(IN) :: tau(length) !! Laplacian of the density - REAL(DP), DIMENSION(length) :: ex + REAL(DP), INTENT(OUT) :: ex(length) !! \(E_x = \int e_x(\text{rho},\text{grho}) dr \) - REAL(DP), DIMENSION(length) :: ec + REAL(DP), INTENT(OUT) :: ec(length) !! \(E_x = \int e_x(\text{rho},\text{grho}) dr \) - REAL(DP), DIMENSION(length) :: v1x + REAL(DP), INTENT(OUT) :: v1x(length) !! \( D\ E_x\ /\ D\ \text{rho} \) - REAL(DP), DIMENSION(length) :: v2x + REAL(DP), INTENT(OUT) :: v2x(length) !! \( D\ E_x\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \) - REAL(DP), DIMENSION(length) :: v3x + REAL(DP), INTENT(OUT) :: v3x(length) !! \( D\ E_x\ /\ D\ \text{tau} \) - REAL(DP), DIMENSION(length) :: v1c + REAL(DP), INTENT(OUT) :: v1c(length) !! \( D\ E_c\ /\ D\ \text{rho} \) - REAL(DP), DIMENSION(length) :: v2c + REAL(DP), INTENT(OUT) :: v2c(1,length,1) !! \( D\ E_c\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \) - REAL(DP), DIMENSION(length) :: v3c + REAL(DP), INTENT(OUT) :: v3c(length) !! \( D\ E_c\ /\ D\ \text{tau} \) ! ! ... local variables @@ -69,15 +70,23 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c INTEGER :: k REAL(DP) :: arho ! - v1x=0.d0 ; v2x=0.d0 ; v3x=0.d0 ; ex=0.d0 - v1c=0.d0 ; v2c=0.d0 ; v3c=0.d0 ; ec=0.d0 +#if defined(_OPENACC) + !$acc data deviceptr( rho(length), grho2(length), tau(length), ex(length), ec(length), & + !$acc& v1x(length), v2x(length), v3x(length), v1c(length), v2c(1,length,1), & + !$acc& v3c(length) ) ! + !$acc parallel loop +#endif DO k = 1, length ! arho = ABS(rho(k)) ! IF ( (arho<=rho_threshold_mgga).OR.(grho2(k)<=grho2_threshold_mgga).OR. & - (ABS(tau(k))<=rho_threshold_mgga) ) CYCLE + (ABS(tau(k))<=rho_threshold_mgga) ) THEN + v1x(k)=0.d0 ; v2x(k)=0.d0 ; v3x(k)=0.d0 ; ex(k)=0.d0 + v1c(k)=0.d0 ; v2c(1,k,1)=0.d0 ; v3c(k)=0.d0 ; ec(k)=0.d0 + CYCLE + ENDIF ! ! ...libxc-like threshold management !grho2(k) = MIN( grho2(k), (8.d0*rho(k)*tau(k))**2 ) @@ -86,21 +95,25 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c CASE( 1 ) ! CALL tpsscxc( arho, grho2(k), tau(k), ex(k), ec(k), v1x(k), v2x(k), & - v3x(k), v1c(k), v2c(k), v3c(k) ) + v3x(k), v1c(k), v2c(1,k,1), v3c(k) ) ! CASE( 2 ) ! CALL m06lxc( arho, grho2(k), tau(k), ex(k), ec(k), v1x(k), v2x(k), & - v3x(k), v1c(k), v2c(k), v3c(k) ) + v3x(k), v1c(k), v2c(1,k,1), v3c(k) ) ! CASE DEFAULT ! - CALL xclib_error( 'tau_xc', 'This case is not implemented', imeta ) + !CALL xclib_error( 'tau_xc', 'This case is not implemented', imeta ) ! END SELECT ! ENDDO ! +#if defined(_OPENACC) + !$acc end data +#endif + ! RETURN ! END SUBROUTINE tau_xc @@ -114,65 +127,74 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, !! Available cases: M06L and TPSS. Other mGGA functionals can be used !! through Libxc. ! + USE metagga + ! IMPLICIT NONE ! INTEGER, INTENT(IN) :: length !! Number of k-points - REAL(DP), INTENT(IN), DIMENSION(length,2) :: rho + REAL(DP), INTENT(IN) :: rho(length,2) !! Charge density - REAL(DP), INTENT(IN), DIMENSION(3,length,2) :: grho + REAL(DP), INTENT(IN) :: grho(3,length,2) !! The density gradient - REAL(DP), INTENT(IN), DIMENSION(length,2) :: tau + REAL(DP), INTENT(IN) :: tau(length,2) !! Laplacian of the density - REAL(DP), INTENT(OUT), DIMENSION(length) :: ex + REAL(DP), INTENT(OUT) :: ex(length) !! \(E_x = \int e_x(\text{rho},\text{grho}) dr \) - REAL(DP), INTENT(OUT), DIMENSION(length) :: ec + REAL(DP), INTENT(OUT) :: ec(length) !! \(E_x = \int e_x(\text{rho},\text{grho}) dr \) - REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1x + REAL(DP), INTENT(OUT) :: v1x(length,2) !! \( D\ E_x\ /\ D\ \text{rho} \) - REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v2x + REAL(DP), INTENT(OUT) :: v2x(length,2) !! \( D\ E_x\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \) - REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v3x + REAL(DP), INTENT(OUT) :: v3x(length,2) !! \( D\ E_x\ /\ D\ \text{tau} \) - REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1c + REAL(DP), INTENT(OUT) :: v1c(length,2) !! \( D\ E_c\ /\ D\ \text{rho} \) - REAL(DP), INTENT(OUT), DIMENSION(3,length,2) :: v2c + REAL(DP), INTENT(OUT) :: v2c(3,length,2) !! \( D\ E_c\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \) - REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v3c + REAL(DP), INTENT(OUT) :: v3c(length,2) !! \( D\ E_c\ /\ D\ \text{tau} \) ! ! ... local variables ! INTEGER :: k - REAL(DP) :: rh, zeta, atau, grho2(2), ggrho2 + REAL(DP) :: rh, zeta, atau, grho2up, grho2dw, ggrho2 REAL(DP) :: v2cup, v2cdw ! - ex=0.0_DP ; v1x=0.0_DP ; v2x=0.0_DP ; v3x=0.0_DP - ec=0.0_DP ; v1c=0.0_DP ; v2c=0.0_DP ; v3c=0.0_DP +#if defined(_OPENACC) + !$acc data deviceptr( rho(length,2), grho(3,length,2), tau(length,2), ex(length), & + !$acc& ec(length), v1x(length,2), v2x(length,2), v3x(length,2), & + !$acc& v1c(length,2), v2c(3,length,2), v3c(length,2) ) ! + !$acc parallel loop +#endif DO k = 1, length ! rh = rho(k,1) + rho(k,2) atau = tau(k,1) + tau(k,2) ! KE-density in Hartree ! ...libxc-like threshold management - !grho2(1) = MIN( SUM(grho(:,k,1)**2), (8.d0*rho(k,1)*tau(k,1))**2 ) - !grho2(2) = MIN( SUM(grho(:,k,2)**2), (8.d0*rho(k,2)*tau(k,2))**2 ) - grho2(1) = SUM(grho(:,k,1)**2) - grho2(2) = SUM(grho(:,k,2)**2) - ggrho2 = ( grho2(1) + grho2(2) ) * 4.0_DP + !grho2up = MIN( SUM(grho(:,k,1)**2), (8.d0*rho(k,1)*tau(k,1))**2 ) + !grho2dw = MIN( SUM(grho(:,k,2)**2), (8.d0*rho(k,2)*tau(k,2))**2 ) + grho2up = SUM(grho(:,k,1)**2) + grho2dw = SUM(grho(:,k,2)**2) + ggrho2 = ( grho2up + grho2dw ) * 4.0_DP ! IF ( (rh <= rho_threshold_mgga).OR.(ggrho2 <= grho2_threshold_mgga).OR.& - (ABS(atau) <= tau_threshold_mgga) ) CYCLE + (ABS(atau) <= tau_threshold_mgga) ) THEN + v1x(k,:)=0.d0 ; v2x(k,:)=0.d0 ; v3x(k,:)=0.d0 ; ex(k)=0.d0 + v1c(k,:)=0.d0 ; v2c(:,k,:)=0.d0 ; v3c(k,:)=0.d0 ; ec(k)=0.d0 + CYCLE + ENDIF ! SELECT CASE( imeta ) CASE( 1 ) ! - CALL tpsscx_spin( rho(k,1), rho(k,2), grho2(1), grho2(2), tau(k,1), & + CALL tpsscx_spin( rho(k,1), rho(k,2), grho2up, grho2dw, tau(k,1), & tau(k,2), ex(k), v1x(k,1), v1x(k,2), v2x(k,1), & v2x(k,2), v3x(k,1), v3x(k,2) ) ! - zeta = (rho(k,1) - rho(k,2)) / rh - zeta = MAX( MIN( 0.99999999_DP, zeta ), -0.99999999_DP ) + zeta = MAX( MIN( 0.99999999_DP, (rho(k,1)-rho(k,2))/rh ), -0.99999999_DP ) ! CALL tpsscc_spin( rh, zeta, grho(:,k,1), grho(:,k,2), atau, ec(k), & v1c(k,1), v1c(k,2), v2c(:,k,1), v2c(:,k,2), & @@ -180,7 +202,7 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, ! CASE( 2 ) ! - CALL m06lxc_spin( rho(k,1), rho(k,2), grho2(1), grho2(2), tau(k,1), & + CALL m06lxc_spin( rho(k,1), rho(k,2), grho2up, grho2dw, tau(k,1), & tau(k,2), ex(k), ec(k), v1x(k,1), v1x(k,2), & v2x(k,1), v2x(k,2), v3x(k,1), v3x(k,2), v1c(k,1), & v1c(k,2), v2cup, v2cdw, v3c(k,1), v3c(k,2) ) @@ -190,12 +212,16 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, ! CASE DEFAULT ! - CALL xclib_error( 'tau_xc_spin', 'This case not implemented', imeta ) + !CALL xclib_error( 'tau_xc_spin', 'This case not implemented', imeta ) ! END SELECT ! ENDDO ! +#if defined(_OPENACC) + !$acc end data +#endif + ! RETURN ! END SUBROUTINE tau_xc_spin diff --git a/XClib/qe_funct_mgga.f90 b/XClib/qe_funct_mgga.f90 index 79bf5d20e..d13e96481 100644 --- a/XClib/qe_funct_mgga.f90 +++ b/XClib/qe_funct_mgga.f90 @@ -6,22 +6,23 @@ ! or http://www.gnu.org/copyleft/gpl.txt . ! !------------------------------------------------------------------------ -MODULE metagga !metagga_gpu> +MODULE metagga !------------------------------------------------------------------------- !! MetaGGA functionals. Available functionals: ! !! * TPSS (Tao, Perdew, Staroverov & Scuseria) !! * M06L ! -USE exch_lda, ONLY: slater !slater_d,exch_lda=>exch_lda_gpu> -USE corr_lda, ONLY: pw, pw_spin !pw_d,pw_spin=>pw_spin_d,corr_lda=>corr_lda_gpu> -USE corr_gga, ONLY: pbec, pbec_spin !pbec_d,pbec_spin=>pbec_spin_d,corr_gga=>corr_gga_gpu> +USE exch_lda, ONLY: slater +USE corr_lda, ONLY: pw, pw_spin +USE corr_gga, ONLY: pbec, pbec_spin ! CONTAINS ! TPSS ! !------------------------------------------------------------------------- -SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c ) ! +SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c ) +!$acc routine seq !----------------------------------------------------------------------- !! TPSS metaGGA corrections for exchange and correlation - Hartree a.u. !! Definition: \(E_x = \int E_x(\text{rho},\text{grho}) dr\) @@ -70,9 +71,9 @@ SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c ) ENDIF ! ! exchange - CALL metax( rho, grho, tau, sx, v1x, v2x, v3x ) !metax_d> + CALL metax( rho, grho, tau, sx, v1x, v2x, v3x ) ! correlation - CALL metac( rho, grho, tau, sc, v1c, v2c, v3c ) !metac_d> + CALL metac( rho, grho, tau, sc, v1c, v2c, v3c ) ! RETURN ! @@ -80,7 +81,8 @@ END SUBROUTINE tpsscxc ! ! !------------------------------------------------------------------------- -SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x ) ! +SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x ) +!$acc routine seq !-------------------------------------------------------------------- !! TPSS meta-GGA exchange potential and energy. ! @@ -133,8 +135,8 @@ SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x ) !slater_d> - CALL metaFX( rho, grho2, tau, fx, f1x, f2x, f3x ) !metaFX_d> + CALL slater( rs, ex_unif, vx_unif ) + CALL metaFX( rho, grho2, tau, fx, f1x, f2x, f3x ) ! ex = rho*ex_unif v1x = vx_unif*fx + ex*f1x @@ -148,7 +150,8 @@ END SUBROUTINE metax ! ! !------------------------------------------------------------------ -SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) ! +SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) +!$acc routine seq !-------------------------------------------------------------- !! TPSS meta-GGA correlation energy and potentials. ! @@ -208,11 +211,11 @@ SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) ! small) THEN ! rs = (pi34/rhoup)**third - CALL pw_spin( rs, 1.0_DP, ec_unif, vc_unif_s(1), vc_unif_s(2) ) !pw_spin_d> + CALL pw_spin( rs, 1.0_DP, ec_unif, vc_unif_s(1), vc_unif_s(2) ) ! IF (ABS(grhoup) > small) THEN ! zeta=1.0_DP-small to avoid pow_e of 0 in pbec_spin - CALL pbec_spin( rhoup, 1.0_DP-small, grhoup**2, 1, & !pbec_spin_d> + CALL pbec_spin( rhoup, 1.0_DP-small, grhoup**2, 1, & ec_sum, v1c_sum(1), v1c_sum(2), v2c_sum ) ELSE ec_sum = 0.0_DP @@ -229,13 +232,13 @@ SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) !pw_d> + CALL pw( rs, 1, ec_unif, vc_unif ) ! ! ... PBE correlation energy and potential: ! ec_pbe=rho*H, not rho*(epsion_c_uinf + H) ! v1c_pbe=D (rho*H) /D rho ! v2c_pbe= for rho, 2 for - CALL pbec( rho, grho2, 1, ec_pbe, v1c_pbe, v2c_pbe ) !pbec_d> + CALL pbec( rho, grho2, 1, ec_pbe, v1c_pbe, v2c_pbe ) ! ec_pbe = ec_pbe/rho+ec_unif v1c_pbe = (v1c_pbe+vc_unif-ec_pbe)/rho @@ -275,7 +278,8 @@ END SUBROUTINE metac ! ! !------------------------------------------------------------------------- -SUBROUTINE metaFX( rho, grho2, tau, fx, f1x, f2x, f3x ) ! +SUBROUTINE metaFX( rho, grho2, tau, fx, f1x, f2x, f3x ) +!$acc routine seq !------------------------------------------------------------------------- ! USE kind_l, ONLY : DP @@ -397,8 +401,9 @@ END SUBROUTINE metaFX ! ! !--------------------------------------------------------------------------- -SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, & ! +SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, & v1xup, v1xdw, v2xup, v2xdw, v3xup, v3xdw ) +!$acc routine seq !----------------------------------------------------------------------- !! TPSS metaGGA for exchange - Hartree a.u. ! @@ -442,7 +447,7 @@ SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, & rho = rhoup + rhodw IF (rhoup>small .AND. SQRT(ABS(grhoup2))>small & .AND. ABS(tauup) > small) THEN - CALL metax( 2.0_DP*rhoup, 4.0_DP*grhoup2, & !metax_d> + CALL metax( 2.0_DP*rhoup, 4.0_DP*grhoup2, & 2.0_DP*tauup, sxup, v1xup, v2xup, v3xup ) ELSE sxup = 0.0_DP @@ -453,7 +458,7 @@ SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, & ! IF (rhodw > small .AND. SQRT(ABS(grhodw2)) > small & .AND. ABS(taudw) > small) THEN - CALL metax( 2.0_DP*rhodw, 4.0_DP*grhodw2, & !metax_d> + CALL metax( 2.0_DP*rhodw, 4.0_DP*grhodw2, & 2.0_DP*taudw, sxdw, v1xdw, v2xdw, v3xdw ) ELSE sxdw = 0.0_DP @@ -472,8 +477,9 @@ END SUBROUTINE tpsscx_spin ! ! !--------------------------------------------------------------------------- -SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, & ! +SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, & atau, sc, v1cup, v1cdw, v2cup, v2cdw, v3cup, v3cdw ) +!$acc routine seq !-------------------------------------------------------------------------- !! TPSS metaGGA for correlations - Hartree a.u. ! @@ -536,7 +542,7 @@ SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, & ! ! v3c = 0.0_DP ELSE - CALL metac_spin( rho, zeta, grhoup, grhodw, & !metac_spin_d> + CALL metac_spin( rho, zeta, grhoup, grhodw, & atau, sc, v1cup, v1cdw, v2cup, v2cdw, v3c ) ENDIF ! @@ -549,8 +555,9 @@ END SUBROUTINE tpsscc_spin ! ! !--------------------------------------------------------------- -SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & ! +SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & tau, sc, v1up, v1dw, v2up, v2dw, v3 ) +!$acc routine seq !--------------------------------------------------------------- USE kind_l, ONLY : DP ! @@ -634,10 +641,10 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & ! ! v2_tmp = 0.0_DP rs = (pi34/rho)**third - CALL pw_spin( rs, zeta, ec_u, vc_u(1), vc_u(2) ) !pw_spin_d> + CALL pw_spin( rs, zeta, ec_u, vc_u(1), vc_u(2) ) ! IF ((ABS(grho) > small) .AND. (zeta <= 1.0_DP)) THEN - CALL pbec_spin( rho, zeta, grho2, 1, ec_pbe, v1_pbe(1), v1_pbe(2), v2_tmp ) !pbec_spin_d> + CALL pbec_spin( rho, zeta, grho2, 1, ec_pbe, v1_pbe(1), v1_pbe(2), v2_tmp ) v1up_pbe=v1_pbe(1) v1dw_pbe=v1_pbe(2) ELSE @@ -668,10 +675,10 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & ! v2_tmp = 0.0_DP ! rs = (pi34/rhoup)**third - CALL pw_spin( rs, 1.0_DP, ec_u, vc_u(1), vc_u(2) ) !pw_spin_d> + CALL pw_spin( rs, 1.0_DP, ec_u, vc_u(1), vc_u(2) ) ! IF (SQRT(grhoup2) > small) THEN - CALL pbec_spin( rhoup, 1.0_DP-small, grhoup2, 1, & !pbec_spin_d> + CALL pbec_spin( rhoup, 1.0_DP-small, grhoup2, 1, & ecup_0, v1_0v(1), v1_0v(2), v2_tmp ) v1up_0 = v1_0v(1) v1dw_0 = v1_0v(2) @@ -709,11 +716,11 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & ! v2_tmp = 0.0_DP ! rs = (pi34/rhodw)**third - CALL pw_spin( rs, -1.0_DP, ec_u, vc_u(1), vc_u(2) ) !pw_spin_d> + CALL pw_spin( rs, -1.0_DP, ec_u, vc_u(1), vc_u(2) ) ! IF (SQRT(grhodw2) > small) THEN ! - CALL pbec_spin( rhodw, -1.0_DP+small, grhodw2, 1, & !pbec_spin_d> + CALL pbec_spin( rhodw, -1.0_DP+small, grhodw2, 1, & ecdw_0, v1_0v(1), v1_0v(2), v2_tmp ) ! v1up_0 = v1_0v(1) @@ -877,7 +884,8 @@ END SUBROUTINE metac_spin ! ec, v1c, v2c, v3c as above for correlation ! !------------------------------------------------------------------------- -SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) ! +SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) +!$acc routine seq !----------------------------------------------------------------------- ! USE kind_l, ONLY : DP @@ -923,14 +931,14 @@ SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) taub = taua ! Tau is defined as summ_i( |nabla phi_i|**2 ) ! in the following M06L routines ! - CALL m06lx( rhoa, grho2a, taua, ex, v1x, v2x, v3x ) !m06lx_d> + CALL m06lx( rhoa, grho2a, taua, ex, v1x, v2x, v3x ) ! ex = two * ex ! Add the two components up + dw ! v2x = 0.5_dp * v2x !v3x = 2.0_dp * v3x !**mismatch with Libxc by a factor of 2 ! - CALL m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c, v2c, v3c, & !m06lc_d> + CALL m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c, v2c, v3c, & v1cb, v2cb, v3cb ) ! v2c = 0.5_dp * v2c @@ -940,9 +948,10 @@ END SUBROUTINE m06lxc ! ! !------------------------------------------------------------------------- -SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, & ! +SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, & ex, ec, v1xup, v1xdw, v2xup, v2xdw, v3xup, v3xdw, & v1cup, v1cdw, v2cup, v2cdw, v3cup, v3cdw ) +!$acc routine seq !----------------------------------------------------------------------- ! USE kind_l, ONLY : DP @@ -959,14 +968,14 @@ SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, & taua = tauup * two ! Tau is defined as summ_i( |nabla phi_i|**2 ) taub = taudw * two ! in the rest of the routine ! - CALL m06lx( rhoup, grhoup2, taua, exup, v1xup, v2xup, v3xup ) !m06lx_d> - CALL m06lx( rhodw, grhodw2, taub, exdw, v1xdw, v2xdw, v3xdw ) !m06lx_d> + CALL m06lx( rhoup, grhoup2, taua, exup, v1xup, v2xup, v3xup ) + CALL m06lx( rhodw, grhodw2, taub, exdw, v1xdw, v2xdw, v3xdw ) ! ex = exup + exdw !v3xup = 2.0_dp * v3xup !**mismatch with Libxc by a factor of 2 !v3xdw = 2.0_dp * v3xdw !** ! - CALL m06lc( rhoup, rhodw, grhoup2, grhodw2, taua, taub, & !m06lc_d> + CALL m06lc( rhoup, rhodw, grhoup2, grhodw2, taua, taub, & ec, v1cup, v2cup, v3cup, v1cdw, v2cdw, v3cdw ) !v3cup = 2.0_dp * v3cup !** !v3cdw = 2.0_dp * v3cdw !** @@ -977,7 +986,8 @@ END SUBROUTINE m06lxc_spin !=============================== M06L exchange ========================== ! !------------------------------------------------------------------------------- -SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) ! +SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) +!$acc routine seq !--------------------------------------------------------------------------- !! M06L exchange. ! @@ -1082,7 +1092,7 @@ SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) ! gh = one + alpha * (xs2 + zs) ! IF (gh >= small) THEN - CALL gvt4( xs2, zs, d0, d1, d2, d3, d4, d5, alpha, hg, dhg_dxs2, dhg_dzs ) !gvt4_d> + CALL gvt4( xs2, zs, d0, d1, d2, d3, d4, d5, alpha, hg, dhg_dxs2, dhg_dzs ) ELSE hg = zero dhg_dxs2 = zero @@ -1121,7 +1131,7 @@ SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) ! dfws_drho = dfws*dws_dts*dts_drho dfws_dtau = dfws*dws_dts*dts_dtau ! - CALL pbex_m06l( two*rho, four*grho2, sx_pbe, v1x_pbe, v2x_pbe ) !pbex_m06l_d> + CALL pbex_m06l( two*rho, four*grho2, sx_pbe, v1x_pbe, v2x_pbe ) ! v1x_unif = f43 * CX * rho13 ! @@ -1143,7 +1153,8 @@ END SUBROUTINE m06lx ! ! !------------------------------------------------------------------- -SUBROUTINE pbex_m06l( rho, grho2, sx, v1x, v2x ) ! +SUBROUTINE pbex_m06l( rho, grho2, sx, v1x, v2x ) +!$acc routine seq !--------------------------------------------------------------- !! PBE exchange (without Slater exchange): !! J.P.Perdew, K.Burke, M.Ernzerhof, PRL 77, 3865 (1996). @@ -1211,8 +1222,9 @@ END SUBROUTINE pbex_m06l !=============================== M06L correlation ========================== ! !--------------------------------------------------------------------------------- -SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, & ! +SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, & v3c_up, v1c_dw, v2c_dw, v3c_dw ) +!$acc routine seq !------------------------------------------------------------------------------- !! M06L correlation. ! @@ -1366,12 +1378,12 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, & ! rs = rsa zeta = one - CALL pw_spin( rs, zeta, ec_pw_a, vc_v(1), vc_v(2) ) !pw_spin_d> + CALL pw_spin( rs, zeta, ec_pw_a, vc_v(1), vc_v(2) ) vc_pw_a = vc_v(1) vv = vc_v(2) ! - CALL gvt4( xs2a, zsa, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hga, dhga_dxs2a, dhga_dzsa ) !gvt4_d> - CALL gfunc( cs, gama_s, xs2a, gsa, dgsa_dxs2a ) !gfunc_d> + CALL gvt4( xs2a, zsa, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hga, dhga_dxs2a, dhga_dzsa ) + CALL gfunc( cs, gama_s, xs2a, gsa, dgsa_dxs2a ) ! Ec_UEG_aa = rhoa*ec_pw_a num = (dgsa_dxs2a + dhga_dxs2a)*Dsa + (gsa + hga)*dDsa_dxs2a @@ -1426,12 +1438,12 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, & ! zeta = one rs = rsb - CALL pw_spin( rs, zeta, ec_pw_b, vc_v(1), vc_v(2) ) !pw_spin_d> + CALL pw_spin( rs, zeta, ec_pw_b, vc_v(1), vc_v(2) ) vc_pw_b = vc_v(1) vv = vc_v(2) ! - CALL gvt4( xs2b, zsb, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hgb, dhgb_dxs2b, dhgb_dzsb ) !gvt4_d> - CALL gfunc( cs, gama_s, xs2b, gsb, dgsb_dxs2b ) !gfunc_d> + CALL gvt4( xs2b, zsb, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hgb, dhgb_dxs2b, dhgb_dzsb ) + CALL gfunc( cs, gama_s, xs2b, gsb, dgsb_dxs2b ) ! Ec_UEG_bb = rhob*ec_pw_b num = (dgsb_dxs2b + dhgb_dxs2b)*Dsb + (gsb + hgb)*dDsb_dxs2b @@ -1468,13 +1480,13 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, & zeta = (rhoa - rhob)/rho rs = (pi34/rho)**f13 ! - CALL gvt4( xs2ab, zsab, dab0, dab1, dab2, dab3, dab4, dab5, alpha_ab, hgab, & !gvt4_d> + CALL gvt4( xs2ab, zsab, dab0, dab1, dab2, dab3, dab4, dab5, alpha_ab, hgab, & dhgab_dxs2ab, dhgab_dzsab ) ! - CALL pw_spin( rs, zeta, ec_pw_ab, vc_v(1), vc_v(2) ) !pw_spin_d> + CALL pw_spin( rs, zeta, ec_pw_ab, vc_v(1), vc_v(2) ) vc_pw_up=vc_v(1) ; vc_pw_dw=vc_v(2) ! - CALL gfunc( cab, gama_ab, xs2ab, gsab, dgsab_dxs2ab ) !gfunc_d> + CALL gfunc( cab, gama_ab, xs2ab, gsab, dgsab_dxs2ab ) ! decab_drhoa = vc_pw_up - vc_pw_a decab_drhob = vc_pw_dw - vc_pw_b @@ -1515,7 +1527,8 @@ END SUBROUTINE m06lc ! ! !------------------------------------------------------------------- -SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx ) ! +SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx ) +!$acc routine seq !----------------------------------------------------------------- ! USE kind_l, ONLY : DP @@ -1545,7 +1558,8 @@ SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx ) ! ! ! !------------------------------------------------------------------------- -SUBROUTINE gvt4( x, z, a, b, c, d, e, f, alpha, hg, dh_dx, dh_dz ) ! +SUBROUTINE gvt4( x, z, a, b, c, d, e, f, alpha, hg, dh_dx, dh_dz ) +!$acc routine seq !---------------------------------------------------------------------- ! USE kind_l, ONLY : DP diff --git a/XClib/xc_wrapper_mgga.f90 b/XClib/xc_wrapper_mgga.f90 index 047850041..337164919 100644 --- a/XClib/xc_wrapper_mgga.f90 +++ b/XClib/xc_wrapper_mgga.f90 @@ -48,7 +48,6 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1 !! whether you wish to run on gpu in case use_gpu is true ! LOGICAL :: run_on_gpu - REAL(DP), ALLOCATABLE :: v2c_dummy(:) ! run_on_gpu = .FALSE. IF ( PRESENT(run_on_gpu_) ) run_on_gpu = run_on_gpu_ @@ -126,8 +125,8 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v ! ! ... local variables ! - INTEGER :: is - REAL(DP), ALLOCATABLE :: grho2(:,:) + INTEGER :: is, k, ipol, pol_unpol + REAL(DP), ALLOCATABLE :: grho2(:) ,v2cc(:) REAL(DP), PARAMETER :: small = 1.E-10_DP ! #if defined(__LIBXC) @@ -137,7 +136,6 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v REAL(DP), ALLOCATABLE :: vc_rho(:), vc_sigma(:), vc_tau(:) REAL(DP), ALLOCATABLE :: lapl_rho(:), vlapl_rho(:) ! not used in QE ! - INTEGER :: k, ipol, pol_unpol LOGICAL :: POLARIZED REAL(DP) :: rh, ggrho2, atau #if (XC_MAJOR_VERSION > 4) @@ -198,19 +196,30 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v ! IF ( .NOT.is_libxc(5) .AND. imetac==0 ) THEN ! - ALLOCATE( grho2(length,ns) ) - ! - DO is = 1, ns - grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2 - ENDDO - ! - IF (ns == 1) CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, & - v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1),& - v3c(:,1) ) - IF (ns == 2) CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x,& - v1c, v2c, v3c ) - ! - DEALLOCATE( grho2 ) + IF (ns == 1) THEN + ! + ALLOCATE( grho2(length) ) + grho2(:) = grho(1,:,1)**2 + grho(2,:,1)**2 + grho(3,:,1)**2 + ! + !$acc data copyin( rho, grho2, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) + !$acc host_data use_device( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) + CALL tau_xc( length, rho(:,1), grho2, tau(:,1), ex, ec, & + v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) ) + !$acc end host_data + !$acc end data + ! + DEALLOCATE( grho2 ) + ! + ELSEIF (ns == 2) THEN + ! + !$acc data copyin( rho, grho, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) + !$acc host_data use_device( rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) + CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, & + v1c, v2c, v3c ) + !$acc end host_data + !$acc end data + ! + ENDIF ! ENDIF ! @@ -312,27 +321,32 @@ SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v DEALLOCATE( vc_rho , vc_sigma, vc_tau, vlapl_rho ) ! #else - ! - ALLOCATE( grho2(length,ns) ) - ! - DO is = 1, ns - grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2 - ENDDO ! IF (ns == 1) THEN ! - CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, v1x(:,1), & - v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1), v3c(:,1) ) + ALLOCATE( grho2(length) ) + grho2(:) = grho(1,:,1)**2 + grho(2,:,1)**2 + grho(3,:,1)**2 + ! + !$acc data copyin( rho, grho2, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) + !$acc host_data use_device( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) + CALL tau_xc( length, rho(:,1), grho2, tau(:,1), ex, ec, v1x(:,1), & + v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) ) + !$acc end host_data + !$acc end data + ! + DEALLOCATE( grho2 ) ! ELSEIF (ns == 2) THEN ! + !$acc data copyin( rho, grho, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) + !$acc host_data use_device( rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, & v2c, v3c ) + !$acc end host_data + !$acc end data ! ENDIF ! - DEALLOCATE( grho2 ) - ! #endif ! RETURN @@ -391,7 +405,7 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x ! ! ... local variables ! - INTEGER :: is + INTEGER :: k, is, ipol, pol_unpol REAL(DP), ALLOCATABLE :: grho2(:,:) REAL(DP), PARAMETER :: small = 1.E-10_DP ! @@ -402,19 +416,24 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x REAL(DP), ALLOCATABLE :: vc_rho(:), vc_sigma(:), vc_tau(:) REAL(DP), ALLOCATABLE :: lapl_rho(:), vlapl_rho(:) ! not used in QE ! - INTEGER :: k, ipol, pol_unpol LOGICAL :: POLARIZED REAL(DP) :: rh, ggrho2, atau #if (XC_MAJOR_VERSION > 4) INTEGER(8) :: lengthxc #else INTEGER :: lengthxc +#endif #endif ! + !$acc data deviceptr( rho(length,ns), grho(3,length,ns), tau(length,ns), ex(length), & + !$acc& ec(length), v1x(length,ns), v2x(length,ns), v3x(length,ns), & + !$acc& v1c(length,ns), v2c(np,length,ns), v3c(length,ns) ) + ! +#if defined(__LIBXC) lengthxc = length ! - ex = 0.0_DP ; v1x = 0.0_DP ; v2x = 0.0_DP ; v3x = 0.0_DP - ec = 0.0_DP ; v1c = 0.0_DP ; v2c = 0.0_DP ; v3c = 0.0_DP + !ex = 0.0_DP ; v1x = 0.0_DP ; v2x = 0.0_DP ; v3x = 0.0_DP + !ec = 0.0_DP ; v1c = 0.0_DP ; v2c = 0.0_DP ; v3c = 0.0_DP ! POLARIZED = .FALSE. IF (ns == 2) THEN @@ -425,6 +444,8 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x ! ALLOCATE( rho_lxc(length*ns), sigma(length*np), tau_lxc(length*ns) ) ALLOCATE( lapl_rho(length*ns) ) + !$acc data copyout( rho_lxc, sigma, tau_lxc, lapl_rho ) + !$acc host_data use_device( rho_lxc, sigma, tau_lxc, lapl_rho ) ! ALLOCATE( ex_lxc(length) , ec_lxc(length) ) ALLOCATE( vx_rho(length*ns) , vx_sigma(length*np), vx_tau(length*ns) ) @@ -434,16 +455,18 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x ! IF ( ns == 1 ) THEN ! + !$acc parallel loop DO k = 1, length rho_lxc(k) = ABS( rho(k,1) ) sigma(k) = MAX( grho(1,k,1)**2 + grho(2,k,1)**2 + grho(3,k,1)**2, & grho2_threshold_mgga ) tau_lxc(k) = MAX( tau(k,1), tau_threshold_mgga ) - vlapl_rho(k) = 0._DP + lapl_rho(k) = 0.d0 ENDDO ! ELSE ! + !$acc parallel loop DO k = 1, length rho_lxc(2*k-1) = ABS( rho(k,1) ) rho_lxc(2*k) = ABS( rho(k,2) ) @@ -455,12 +478,14 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x grho2_threshold_mgga ) tau_lxc(2*k-1) = MAX( tau(k,1), small ) tau_lxc(2*k) = MAX( tau(k,2), small ) - vlapl_rho(2*k-1) = 0._DP - vlapl_rho(2*k) = 0._DP + lapl_rho(2*k-1) = 0.d0 + lapl_rho(2*k) = 0.d0 ENDDO ! ENDIF ! + vlapl_rho = 0._DP + ! IF ( .NOT.is_libxc(5) .AND. imetac==0 ) THEN ! ALLOCATE( grho2(length,ns) ) @@ -469,8 +494,8 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2 ENDDO ! - IF (ns == 1) CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, & - v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1),& + IF (ns == 1) CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, & + v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c, & v3c(:,1) ) IF (ns == 2) CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x,& v1c, v2c, v3c ) @@ -479,6 +504,9 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x ! ENDIF ! + !$acc end host_data + !$acc end data + ! ! ... META EXCHANGE ! IF ( is_libxc(5) ) THEN @@ -523,17 +551,6 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x ENDDO ENDIF ! - ! **Now this is done directly inside Libxc for SCAN0** - ! ... only for HK/MCA: SCAN0 (used in CPV) - !IF ( scan_exx ) THEN - ! IF (exx_started) THEN - ! ex = (1.0_DP - exx_fraction) * ex - ! v1x = (1.0_DP - exx_fraction) * v1x - ! v2x = (1.0_DP - exx_fraction) * v2x - ! v3x = (1.0_DP - exx_fraction) * v3x - ! ENDIF - !ENDIF - ! ENDIF ! ! ... META CORRELATION @@ -590,15 +607,20 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x #else ! ALLOCATE( grho2(length,ns) ) + !$acc data create( grho2(length,ns) ) + !$acc host_data use_device( grho2 ) ! + !$acc parallel loop collapse(2) DO is = 1, ns - grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2 + DO k = 1, length + grho2(k,is) = grho(1,k,is)**2 + grho(2,k,is)**2 + grho(3,k,is)**2 + ENDDO ENDDO ! IF (ns == 1) THEN ! CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, v1x(:,1), & - v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1), v3c(:,1) ) + v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) ) ! ELSEIF (ns == 2) THEN ! @@ -607,9 +629,13 @@ SUBROUTINE xc_metagcx_gpu( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x ! ENDIF ! + !$acc end host_data + !$acc end data DEALLOCATE( grho2 ) ! #endif + ! + !$acc end data ! RETURN ! diff --git a/atomic/src/vxcgc.f90 b/atomic/src/vxcgc.f90 index ab61041b1..1ccd55e35 100644 --- a/atomic/src/vxcgc.f90 +++ b/atomic/src/vxcgc.f90 @@ -78,7 +78,8 @@ subroutine vxcgc( ndm, mesh, nspin, r, r2, rho, rhoc, vgc, egc, & ! use kinds, only : DP use constants, only : fpi, e2 - use xc_lib, only : xclib_set_threshold, xclib_dft_is, xc_gcx + use xc_lib, only : xclib_set_threshold, xclib_dft_is, xc_gcx, & + xc_metagcx implicit none integer, intent(in) :: ndm,mesh,nspin,iflag real(DP), intent(in) :: r(mesh), r2(mesh), rho(ndm,2), rhoc(ndm)