diff --git a/XClib/qe_drivers_lda_lsda.f90 b/XClib/qe_drivers_lda_lsda.f90 index 03eabed4c..6a5baab98 100644 --- a/XClib/qe_drivers_lda_lsda.f90 +++ b/XClib/qe_drivers_lda_lsda.f90 @@ -64,11 +64,13 @@ SUBROUTINE xc_lda( length, rho_in, ex_out, ec_out, vx_out, vc_out ) REAL(DP), INTENT(OUT), DIMENSION(length) :: ex_out !! \(\epsilon_x(rho)\) ( NOT \(E_x(\text{rho})\) ) REAL(DP), INTENT(OUT), DIMENSION(length) :: vx_out - !! \(dE_x(\text{rho})/d\text{rho}\) ( NOT \(d\epsilon_x(\text{rho})/d\text{rho}\) ) + !! \(dE_x(\text{rho})/d\text{rho}\) ( NOT + !! \(d\epsilon_x(\text{rho})/d\text{rho}\) ) REAL(DP), INTENT(OUT), DIMENSION(length) :: ec_out !! \(\epsilon_c(rho)\) ( NOT \(E_c(\text{rho})\) ) REAL(DP), INTENT(OUT), DIMENSION(length) :: vc_out - !! \(dE_c(\text{rho})/d\text{rho}\) ( NOT \(d\epsilon_c(\text{rho})/d\text{rho}\) ) + !! \(dE_c(\text{rho})/d\text{rho}\) ( NOT + !! \(d\epsilon_c(\text{rho})/d\text{rho}\) ) ! ! ... local variables ! @@ -79,20 +81,26 @@ SUBROUTINE xc_lda( length, rho_in, ex_out, ec_out, vx_out, vc_out ) REAL(DP), PARAMETER :: third = 1.0_DP/3.0_DP, & pi34 = 0.6203504908994_DP, e2 = 2.0_DP ! pi34 = (3/4pi)^(1/3) - ! -#if defined(_OPENMP) +#if defined(__OPENMP) INTEGER :: ntids INTEGER, EXTERNAL :: omp_get_num_threads ! ntids = omp_get_num_threads() #endif ! + ! +#if defined(_OPENACC) +!$acc data copyin(rho_in), copyout(ex_out, vx_out, ec_out, vc_out) +!$acc parallel loop +#endif +#if defined(__OPENMP) && !defined(_OPENACC) !$omp parallel if(ntids==1) default(none) & !$omp private( rho, rs, ex, ec, ec_, vx, vc, vc_ ) & !$omp shared( rho_in, length, iexch, icorr, ex_out, ec_out, vx_out, vc_out, & !$omp finite_size_cell_volume, exx_fraction, exx_started, & !$omp rho_threshold_lda ) !$omp do +#endif DO ir = 1, length ! rho = ABS(rho_in(ir)) @@ -247,8 +255,13 @@ SUBROUTINE xc_lda( length, rho_in, ex_out, ec_out, vx_out, vc_out ) vx_out(ir) = vx ; vc_out(ir) = vc ! ENDDO +#if defined(_OPENACC) +!$acc end data +#endif +#if defined(__OPENMP) && !defined(_OPENACC) !$omp end do !$omp end parallel +#endif ! ! RETURN diff --git a/XClib/qe_funct_corr_lda_lsda.f90 b/XClib/qe_funct_corr_lda_lsda.f90 index a29daabdc..96e8ce267 100644 --- a/XClib/qe_funct_corr_lda_lsda.f90 +++ b/XClib/qe_funct_corr_lda_lsda.f90 @@ -6,14 +6,15 @@ ! or http://www.gnu.org/copyleft/gpl.txt . ! !------------------------------------------------------------------------- -MODULE corr_lda !corr_lda_gpu> +MODULE corr_lda !------------------------------------------------------------------------- !! LDA correlation functionals ! CONTAINS ! !------------------------------------------------------------------------- -SUBROUTINE pz( rs, iflag, ec, vc ) ! +SUBROUTINE pz( rs, iflag, ec, vc ) +!$acc routine (pz) seq !----------------------------------------------------------------------- !! LDA parametrization from Monte Carlo DATA: ! @@ -24,7 +25,7 @@ SUBROUTINE pz( rs, iflag, ec, vc ) ! ! IMPLICIT NONE ! - INTEGER, INTENT(IN) :: iflag ! + INTEGER, INTENT(IN) :: iflag !! see routine comments REAL(DP), INTENT(IN) :: rs !! Wigner-Seitz radius @@ -72,7 +73,8 @@ END SUBROUTINE pz ! ! !----------------------------------------------------------------------- -SUBROUTINE pzKZK( rs, ec, vc, vol ) ! +SUBROUTINE pzKZK( rs, ec, vc, vol ) +!$acc routine (pzKZK) seq !----------------------------------------------------------------------- !! LDA parametrization from Monte Carlo DATA: ! @@ -89,7 +91,7 @@ SUBROUTINE pzKZK( rs, ec, vc, vol ) ! !! correlation energy REAL(DP), INTENT(OUT) :: vc !! correlation potential - REAL(DP) :: vol ! + REAL(DP) :: vol !! volume element ! ! ... local variables @@ -187,7 +189,8 @@ END SUBROUTINE pzKZK ! ! !----------------------------------------------------------------------- -SUBROUTINE vwn( rs, ec, vc ) ! +SUBROUTINE vwn( rs, ec, vc ) +!$acc routine (vwn) seq !----------------------------------------------------------------------- !! S.H. Vosko, L. Wilk, and M. Nusair, Can. J. Phys. 58, 1200 (1980). ! @@ -233,7 +236,8 @@ END SUBROUTINE vwn ! ! !----------------------------------------------------------------------- -SUBROUTINE vwn1_rpa( rs, ec, vc ) ! +SUBROUTINE vwn1_rpa( rs, ec, vc ) +!$acc routine (vwn1_rpa) seq !----------------------------------------------------------------------- !! S.H. Vosko, L. Wilk, and M. Nusair, Can. J. Phys. 58, 1200 (1980). ! @@ -278,7 +282,8 @@ END SUBROUTINE vwn1_rpa ! ! !----------------------------------------------------------------------- -SUBROUTINE lyp( rs, ec, vc ) ! +SUBROUTINE lyp( rs, ec, vc ) +!$acc routine (lyp) seq !----------------------------------------------------------------------- !! C. Lee, W. Yang, and R.G. Parr, PRB 37, 785 (1988). !! LDA part only. @@ -315,7 +320,8 @@ END SUBROUTINE lyp ! ! !----------------------------------------------------------------------- -SUBROUTINE pw( rs, iflag, ec, vc ) ! +SUBROUTINE pw( rs, iflag, ec, vc ) +!$acc routine (pw) seq !----------------------------------------------------------------------- !! * iflag=1: J.P. Perdew and Y. Wang, PRB 45, 13244 (1992) !! * iflag=2: G. Ortiz and P. Ballone, PRB 50, 1391 (1994) @@ -326,7 +332,7 @@ SUBROUTINE pw( rs, iflag, ec, vc ) ! ! REAL(DP), INTENT(IN) :: rs !! Wigner-Seitz radius - INTEGER, INTENT(IN) :: iflag ! + INTEGER, INTENT(IN) :: iflag !! see routine comments REAL(DP), INTENT(OUT) :: ec !! correlation energy @@ -385,7 +391,8 @@ END SUBROUTINE pw ! ! !----------------------------------------------------------------------- -SUBROUTINE wignerc( rs, ec, vc ) ! +SUBROUTINE wignerc( rs, ec, vc ) +!$acc routine (wignerc) seq !----------------------------------------------------------------------- !! Wigner correlation. ! @@ -418,7 +425,8 @@ END SUBROUTINE wignerc ! ! !----------------------------------------------------------------------- -SUBROUTINE hl( rs, ec, vc ) ! +SUBROUTINE hl( rs, ec, vc ) +!$acc routine (hl) seq !----------------------------------------------------------------------- !! L. Hedin and B.I. Lundqvist, J. Phys. C 4, 2064 (1971). ! @@ -450,7 +458,8 @@ END SUBROUTINE hl ! ! !----------------------------------------------------------------------- -SUBROUTINE gl( rs, ec, vc ) ! +SUBROUTINE gl( rs, ec, vc ) +!$acc routine (gl) seq !--------------------------------------------------------------------- !! O. Gunnarsson and B. I. Lundqvist, PRB 13, 4274 (1976). ! @@ -484,7 +493,8 @@ END SUBROUTINE gl ! ... LSDA ! !----------------------------------------------------------------------- -SUBROUTINE pz_polarized( rs, ec, vc ) ! +SUBROUTINE pz_polarized( rs, ec, vc ) +!$acc routine (pz_polarized) seq !----------------------------------------------------------------------- !! J.P. Perdew and A. Zunger, PRB 23, 5048 (1981). !! spin-polarized energy and potential. @@ -539,7 +549,8 @@ END SUBROUTINE pz_polarized ! ! !----------------------------------------------------------------------- -SUBROUTINE pz_spin( rs, zeta, ec, vc_up, vc_dw ) ! +SUBROUTINE pz_spin( rs, zeta, ec, vc_up, vc_dw ) +!$acc routine (pz_spin) seq !----------------------------------------------------------------------- !! Perdew and Zunger, PRB 23, 5048 (1981). Spin polarized case. ! @@ -563,10 +574,10 @@ SUBROUTINE pz_spin( rs, zeta, ec, vc_up, vc_dw ) !pz_d> + CALL pz( rs, 1, ecu, vcu ) ! ! polarization contribution - CALL pz_polarized( rs, ecp, vcp ) !pz_polarized_d> + CALL pz_polarized( rs, ecp, vcp ) ! fz = ( (1.0d0 + zeta)**p43 + (1.d0 - zeta)**p43 - 2.d0) / & (2.d0**p43 - 2.d0) @@ -582,7 +593,8 @@ SUBROUTINE pz_spin( rs, zeta, ec, vc_up, vc_dw ) ! +SUBROUTINE vwn_spin( rs, zeta, ec, vc_up, vc_dw ) +!$acc routine (vwn_spin) seq !------------------------------------------------------------------------------ !! S.H. Vosko, L. Wilk, and M. Nusair. Spin polarized case. ! @@ -639,9 +651,9 @@ SUBROUTINE vwn_spin( rs, zeta, ec, vc_up, vc_dw ) !padefit_ParSet1_d> - CALL padefit_ParSet1( sqrtrs, 2, ecF, vcF ) ! ecP = e_c Ferromagnetic !padefit_ParSet1_d> - CALL padefit_ParSet1( sqrtrs, 3, ac, dac ) ! ac = "spin stiffness" !padefit_ParSet1_d> + CALL padefit_ParSet1( sqrtrs, 1, ecP, vcP ) ! ecF = e_c Paramagnetic + CALL padefit_ParSet1( sqrtrs, 2, ecF, vcF ) ! ecP = e_c Ferromagnetic + CALL padefit_ParSet1( sqrtrs, 3, ac, dac ) ! ac = "spin stiffness" ! ac = ac * iddfz0 dac = dac * iddfz0 @@ -659,7 +671,8 @@ END SUBROUTINE vwn_spin ! ! !---- -SUBROUTINE padefit_ParSet1( x, i, fit, dfit ) ! +SUBROUTINE padefit_ParSet1( x, i, fit, dfit ) +!$acc routine (padefit_ParSet1) seq !---- !! It implements formula [4.4] in: !! S.H. Vosko, L. Wilk, and M. Nusair, Can. J. Phys. 58, 1200 (1980) @@ -709,7 +722,8 @@ SUBROUTINE padefit_ParSet1( x, i, fit, dfit ) ! +SUBROUTINE padefit_ParSet2( x, i, fit, dfit ) +!$acc routine (padefit_ParSet2) seq !---- !! It implements formula [4.4] in: !! S.H. Vosko, L. Wilk, and M. Nusair, Can. J. Phys. 58, 1200 (1980) @@ -762,7 +776,8 @@ END SUBROUTINE ! ! !----------------------------------------------------------------------------------------- -SUBROUTINE vwn1_rpa_spin( rs, zeta, ec, vc_up, vc_dw ) ! +SUBROUTINE vwn1_rpa_spin( rs, zeta, ec, vc_up, vc_dw ) +!$acc routine (vwn1_rpa_spin) seq !--------------------------------------------------------------------------------------- ! USE kind_l, ONLY: DP @@ -817,9 +832,9 @@ SUBROUTINE vwn1_rpa_spin( rs, zeta, ec, vc_up, vc_dw ) !padefit_ParSet2_d> - CALL padefit_ParSet2( sqrtrs, 2, ecF, vcF ) ! ecP = e_c Ferromagnetic !padefit_ParSet2_d> - CALL padefit_ParSet2( sqrtrs, 3, ac, dac ) ! ac = "spin stiffness" !padefit_ParSet2_d> + CALL padefit_ParSet2( sqrtrs, 1, ecP, vcP ) ! ecF = e_c Paramagnetic + CALL padefit_ParSet2( sqrtrs, 2, ecF, vcF ) ! ecP = e_c Ferromagnetic + CALL padefit_ParSet2( sqrtrs, 3, ac, dac ) ! ac = "spin stiffness" ! ac = ac * iddfz0 dac = dac * iddfz0 @@ -837,7 +852,8 @@ END SUBROUTINE ! ! !----------------------------------------------------------------------- -SUBROUTINE pw_spin( rs, zeta, ec, vc_up, vc_dw ) ! +SUBROUTINE pw_spin( rs, zeta, ec, vc_up, vc_dw ) +!$acc routine (pw_spin) seq !----------------------------------------------------------------------- !! J.P. Perdew and Y. Wang, PRB 45, 13244 (1992). ! @@ -946,7 +962,8 @@ END SUBROUTINE pw_spin ! ! !----------------------------------------------------------------------------- -SUBROUTINE lsd_lyp( rho, zeta, elyp, vlyp_up, vlyp_dw ) ! +SUBROUTINE lsd_lyp( rho, zeta, elyp, vlyp_up, vlyp_dw ) +!$acc routine (lsd_lyp) seq !==--------------------------------------------------------------== !== C. LEE, W. YANG, AND R.G. PARR, PRB 37, 785 (1988) == !== THIS IS ONLY THE LDA PART == diff --git a/XClib/qe_funct_exch_lda_lsda.f90 b/XClib/qe_funct_exch_lda_lsda.f90 index e5c093009..124cfeb72 100644 --- a/XClib/qe_funct_exch_lda_lsda.f90 +++ b/XClib/qe_funct_exch_lda_lsda.f90 @@ -6,14 +6,15 @@ ! or http://www.gnu.org/copyleft/gpl.txt . ! !---------------------------------------------------------------------- -MODULE exch_lda !exch_lda_gpu> +MODULE exch_lda !---------------------------------------------------------------------- !! LDA exchange functionals. ! CONTAINS ! !----------------------------------------------------------------------- -SUBROUTINE slater( rs, ex, vx ) ! +SUBROUTINE slater( rs, ex, vx ) +!$acc routine (slater) seq !--------------------------------------------------------------------- !! Slater exchange with alpha=2/3 ! @@ -41,7 +42,8 @@ END SUBROUTINE slater ! ! !----------------------------------------------------------------------- -SUBROUTINE slater1( rs, ex, vx ) ! +SUBROUTINE slater1( rs, ex, vx ) +!$acc routine (slater1) seq !--------------------------------------------------------------------- !! Slater exchange with alpha=1, corresponding to -1.374/r_s Ry. !! Used to recover old results. @@ -70,7 +72,8 @@ END SUBROUTINE slater1 ! ! !----------------------------------------------------------------------- -SUBROUTINE slater_rxc( rs, ex, vx ) ! +SUBROUTINE slater_rxc( rs, ex, vx ) +!$acc routine (slater_rxc) seq !--------------------------------------------------------------------- !! Slater exchange with alpha=2/3 and Relativistic exchange. ! @@ -123,7 +126,8 @@ END SUBROUTINE slater_rxc ! ! !----------------------------------------------------------------------- -SUBROUTINE slaterKZK( rs, ex, vx, vol ) ! +SUBROUTINE slaterKZK( rs, ex, vx, vol ) +!$acc routine (slaterKZK) seq !--------------------------------------------------------------------- !! Slater exchange with alpha=2/3, Kwee, Zhang and Krakauer KE !! correction. @@ -138,7 +142,7 @@ SUBROUTINE slaterKZK( rs, ex, vx, vol ) ! !! Exchange energy (per unit volume) REAL(DP), INTENT(OUT) :: vx !! Exchange potential - REAL(DP) :: vol ! + REAL(DP) :: vol !! Finite size volume element ! ! ... local variables @@ -177,7 +181,8 @@ END SUBROUTINE slaterKZK ! ... LSDA ! !----------------------------------------------------------------------- -SUBROUTINE slater_spin( rho, zeta, ex, vx_up, vx_dw ) ! +SUBROUTINE slater_spin( rho, zeta, ex, vx_up, vx_dw ) +!$acc routine (slater_spin) seq !----------------------------------------------------------------------- !! Slater exchange with alpha=2/3, spin-polarized case. ! @@ -218,7 +223,8 @@ END SUBROUTINE slater_spin ! ! !----------------------------------------------------------------------- -SUBROUTINE slater_rxc_spin( rho, z, ex, vx_up, vx_dw ) ! +SUBROUTINE slater_rxc_spin( rho, z, ex, vx_up, vx_dw ) +!$acc routine (slater_rxc_spin) seq !----------------------------------------------------------------------- !! Slater exchange with alpha=2/3, relativistic exchange case. !! Spin-polarized case. @@ -284,7 +290,8 @@ END SUBROUTINE slater_rxc_spin ! ! !----------------------------------------------------------------------- -SUBROUTINE slater1_spin( rho, zeta, ex, vx_up, vx_dw ) ! +SUBROUTINE slater1_spin( rho, zeta, ex, vx_up, vx_dw ) +!$acc routine (slater1_spin) seq !----------------------------------------------------------------------- !! Slater exchange with alpha=2/3, spin-polarized case !