Merge branch 'XClib_on_openacc' into 'develop'

XClib on openacc

See merge request QEF/q-e!1487
This commit is contained in:
giannozz 2021-07-12 18:32:08 +00:00
commit ad570e32e7
6 changed files with 572 additions and 351 deletions

View File

@ -80,6 +80,15 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
#endif
!
!
#if defined(_OPENACC)
!
IF (igcx==43 .OR. igcc==14) CALL xclib_error( 'gcxc', 'BEEF not available with&
& OpenACC enabled', 1 )
!
!$acc data copyin(rho_in,grho_in), copyout(sx_out,sc_out,v1x_out,v2x_out,v1c_out,v2c_out)
!$acc parallel loop
#endif
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp parallel if(ntids==1) default(none) &
!$omp private( rho, grho, sx, sx_, sxsr, v1x, v1x_, v1xsr, &
!$omp v2x, v2x_, v2xsr, sc, v1c, v2c ) &
@ -88,6 +97,7 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
!$omp screening_parameter, exx_fraction, igcc, v1x_out, v2x_out, &
!$omp v1c_out, v2c_out, sx_out, sc_out )
!$omp do
#endif
DO ir = 1, length
!
grho = grho_in(ir)
@ -291,10 +301,12 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
v2x = (1.0_DP - exx_fraction) * v2x
ENDIF
!
CASE( 43 ) ! 'beefx'
#if !defined(_OPENACC)
CASE( 43 ) ! 'beefx' --- BEEF unavailable with OpenACC-- Will be enabled soon
! last parameter = 0 means do not add LDA (=Slater) exchange
! (espresso) will add it itself
CALL beefx(rho, grho, sx, v1x, v2x, 0)
#endif
!
CASE( 44 ) ! 'RPBE'
!
@ -367,10 +379,12 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
v2c = 0.871_DP * v2c
ENDIF
!
CASE( 14 ) ! 'BEEF'
#if !defined(_OPENACC)
CASE( 14 ) ! BEEF unavailable with OpenACC-- Will be enabled soon
! last parameter 0 means: do not add lda contributions
! espresso will do that itself
call beeflocalcorr(rho, grho, sc, v1c, v2c, 0)
#endif
!
CASE DEFAULT
!
@ -384,9 +398,14 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
v1x_out(ir) = v1x ; v1c_out(ir) = v1c
v2x_out(ir) = v2x ; v2c_out(ir) = v2c
!
ENDDO
ENDDO
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp end do
!$omp end parallel
#endif
#if defined(_OPENACC)
!$acc end data
#endif
!
!
RETURN
@ -412,7 +431,7 @@ SUBROUTINE gcx_spin( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
!! Up and down charge density
REAL(DP), INTENT(IN), DIMENSION(length,2) :: grho2_in
!! Up and down gradient of the charge
REAL(DP), INTENT(OUT), DIMENSION(length) :: sx_tot
REAL(DP), INTENT(OUT), DIMENSION(length) :: sx_tot
!! Energy exchange GGA
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1x_out
!! Exchange potential (density part)
@ -421,17 +440,18 @@ SUBROUTINE gcx_spin( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
!
! ... local variables
!
INTEGER :: ir, is, iflag
REAL(DP) :: rho(2), grho2(2)
REAL(DP) :: v1x(2), v2x(2)
REAL(DP) :: sx(2), rnull(2)
REAL(DP) :: sxsr(2)
REAL(DP) :: v1xsr(2), v2xsr(2)
INTEGER :: ir, is, iflag
REAL(DP) :: rho_up, rho_dw, grho2_up, grho2_dw
REAL(DP) :: v1x_up, v1x_dw, v2x_up, v2x_dw
REAL(DP) :: sx_up, sx_dw, rnull_up, rnull_dw
REAL(DP) :: sxsr_up, sxsr_dw
REAL(DP) :: v1xsr_up, v1xsr_dw, v2xsr_up, v2xsr_dw
!
REAL(DP), PARAMETER :: small=1.D-10
REAL(DP), PARAMETER :: rho_trash=0.5_DP, grho2_trash=0.2_DP
! temporary values assigned to rho and grho when they
! are too small in order to avoid numerical problems.
!
#if defined(_OPENMP)
INTEGER :: ntids
INTEGER, EXTERNAL :: omp_get_num_threads
@ -441,60 +461,82 @@ SUBROUTINE gcx_spin( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
!
sx_tot = 0.0_DP
!
#if defined(_OPENACC)
!
IF (igcx==43) CALL xclib_error( 'gcx_spin', 'BEEF not available with&
& OpenACC enabled', 1 )
!
!$acc data copyin(rho_in, grho2_in), copyout(sx_tot, v1x_out, v2x_out)
!$acc parallel loop
#endif
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp parallel if(ntids==1) default(none) &
!$omp private( rho, grho2, rnull, sx, sxsr, v1x, v1xsr, &
!$omp v2x, v2xsr, iflag ) &
!$omp shared(rho_in, length, grho2_in, sx_tot, v1x_out, v2x_out,&
!$omp igcx, exx_started, exx_fraction, screening_parameter, gau_parameter)
!$omp private( rho_up, rho_dw, grho2_up, grho2_dw, rnull_up, rnull_dw, &
!$omp sx_up, sx_dw, sxsr_up, sxsr_dw, v1xsr_up, v1xsr_dw, &
!$omp v1x_up, v1x_dw, v2x_up, v2x_dw, v2xsr_up, v2xsr_dw, &
!$omp iflag ) &
!$omp shared( rho_in, length, grho2_in, sx_tot, v1x_out, v2x_out, &
!$omp igcx, exx_started, exx_fraction, screening_parameter,&
!$omp gau_parameter )
!$omp do
#endif
!
DO ir = 1, length
!
rho(:) = rho_in(ir,:)
grho2(:) = grho2_in(ir,:)
rnull(:) = 1.0_DP
rho_up = rho_in(ir,1)
rho_dw = rho_in(ir,2)
grho2_up = grho2_in(ir,1)
grho2_dw = grho2_in(ir,2)
rnull_up = 1.0_DP
rnull_dw = 1.0_DP
!
IF ( rho(1)+rho(2) <= small ) THEN
IF ( rho_up+rho_dw <= small ) THEN
sx_tot(ir) = 0.0_DP
v1x_out(ir,:) = 0.0_DP ; v2x_out(ir,:) = 0.0_DP
v1x_out(ir,1) = 0.0_DP
v2x_out(ir,1) = 0.0_DP
v1x_out(ir,2) = 0.0_DP
v2x_out(ir,2) = 0.0_DP
CYCLE
ELSE
DO is = 1, 2
IF ( rho(is)<=small .OR. SQRT(ABS(grho2(is)))<=small ) THEN
rho(is) = rho_trash
grho2(is) = grho2_trash
rnull(is) = 0.0_DP
ENDIF
ENDDO
IF ( rho_up<=small .OR. SQRT(ABS(grho2_up))<=small ) THEN
rho_up = rho_trash
grho2_up = grho2_trash
rnull_up = 0.0_DP
ENDIF
IF ( rho_dw<=small .OR. SQRT(ABS(grho2_dw))<=small ) THEN
rho_dw = rho_trash
grho2_dw = grho2_trash
rnull_dw = 0.0_DP
ENDIF
ENDIF
!
!
! ... exchange
!
SELECT CASE( igcx )
CASE( 0 )
!
sx_tot(ir) = 0.0_DP
v1x = 0.0_DP
v2x = 0.0_DP
v1x_up = 0.0_DP ; v1x_dw = 0.0_DP
v2x_up = 0.0_DP ; v2x_dw = 0.0_DP
!
CASE( 1 )
!
CALL becke88_spin( rho(1), rho(2), grho2(1), grho2(2), sx(1), sx(2), &
v1x(1), v1x(2), v2x(1), v2x(2) )
CALL becke88_spin( rho_up, rho_dw, grho2_up, grho2_dw, sx_up, sx_dw, &
v1x_up, v1x_dw, v2x_up, v2x_dw )
!
sx_tot(ir) = sx(1)*rnull(1) + sx(2)*rnull(2)
sx_tot(ir) = sx_up*rnull_up + sx_dw*rnull_dw
!
CASE( 2 )
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL ggax( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL ggax( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL ggax( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL ggax( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
CASE( 3, 4, 8, 10, 12, 20, 23, 24, 25, 44, 45 )
! igcx=3: PBE, igcx=4: revised PBE, igcx=8: PBE0, igcx=10: PBEsol
@ -510,272 +552,313 @@ SUBROUTINE gcx_spin( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
IF ( igcx==44 ) iflag = 8
IF ( igcx==45 ) iflag = 9
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL pbex( rho(1), grho2(1), iflag, sx(1), v1x(1), v2x(1) )
CALL pbex( rho(2), grho2(2), iflag, sx(2), v1x(2), v2x(2) )
CALL pbex( rho_up, grho2_up, iflag, sx_up, v1x_up, v2x_up )
CALL pbex( rho_dw, grho2_dw, iflag, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
IF ( igcx == 8 .AND. exx_started ) THEN
!
sx_tot(ir) = (1.0_DP - exx_fraction) * sx_tot(ir)
v1x = (1.0_DP - exx_fraction) * v1x
v2x = (1.0_DP - exx_fraction) * v2x
v1x_up = (1.0_DP - exx_fraction) * v1x_up
v1x_dw = (1.0_DP - exx_fraction) * v1x_dw
v2x_up = (1.0_DP - exx_fraction) * v2x_up
v2x_dw = (1.0_DP - exx_fraction) * v2x_dw
!
ELSEIF ( igcx == 12 .AND. exx_started ) THEN
!
CALL pbexsr( rho(1), grho2(1), sxsr(1), v1xsr(1), &
v2xsr(1), screening_parameter )
CALL pbexsr( rho(2), grho2(2), sxsr(2), v1xsr(2), &
v2xsr(2), screening_parameter )
CALL pbexsr( rho_up, grho2_up, sxsr_up, v1xsr_up, &
v2xsr_up, screening_parameter )
CALL pbexsr( rho_dw, grho2_dw, sxsr_dw, v1xsr_dw, &
v2xsr_dw, screening_parameter )
!
sx_tot(ir) = sx_tot(ir) - exx_fraction*0.5_DP*( sxsr(1)*rnull(1) + &
sxsr(2)*rnull(2) )
v1x = v1x - exx_fraction * v1xsr
v2x = v2x - exx_fraction * v2xsr * 2.0_DP
sx_tot(ir) = sx_tot(ir) - exx_fraction*0.5_DP*( sxsr_up*rnull_up + &
sxsr_dw*rnull_dw )
v1x_up = v1x_up - exx_fraction * v1xsr_up
v1x_dw = v1x_dw - exx_fraction * v1xsr_dw
v2x_up = v2x_up - exx_fraction * v2xsr_up * 2.0_DP
v2x_dw = v2x_dw - exx_fraction * v2xsr_dw * 2.0_DP
!
ELSEIF ( igcx == 20 .AND. exx_started ) THEN
! gau-pbe
!CALL pbexgau_lsd( rho, grho2, sxsr, v1xsr, v2xsr, gau_parameter )
CALL pbexgau( rho(1),grho2(1), sxsr(1), v1xsr(1),v2xsr(1), gau_parameter )
CALL pbexgau( rho(2),grho2(2), sxsr(2), v1xsr(2),v2xsr(2), gau_parameter )
CALL pbexgau( rho_up,grho2_up, sxsr_up, v1xsr_up,v2xsr_up, gau_parameter )
CALL pbexgau( rho_dw,grho2_dw, sxsr_dw, v1xsr_dw,v2xsr_dw, gau_parameter )
!
sx_tot(ir) = sx_tot(ir) - exx_fraction*0.5_DP * ( sxsr(1)*rnull(1) + &
sxsr(2)*rnull(2) )
v1x = v1x - exx_fraction * v1xsr
v2x = v2x - exx_fraction * v2xsr * 2.0_DP
sx_tot(ir) = sx_tot(ir) - exx_fraction*0.5_DP * ( sxsr_up*rnull_up + &
sxsr_dw*rnull_dw )
v1x_up = v1x_up - exx_fraction * v1xsr_up
v1x_dw = v1x_dw - exx_fraction * v1xsr_dw
v2x_up = v2x_up - exx_fraction * v2xsr_up * 2.0_DP
v2x_dw = v2x_dw - exx_fraction * v2xsr_dw * 2.0_DP
!
ENDIF
!
CASE( 9 ) ! B3LYP
!
CALL becke88_spin( rho(1), rho(2), grho2(1), grho2(2), sx(1), sx(2), &
v1x(1), v1x(2), v2x(1), v2x(2) )
CALL becke88_spin( rho_up, rho_dw, grho2_up, grho2_dw, sx_up, sx_dw, &
v1x_up, v1x_dw, v2x_up, v2x_dw )
!
sx_tot(ir) = sx(1)*rnull(1) + sx(2)*rnull(2)
sx_tot(ir) = sx_up*rnull_up + sx_dw*rnull_dw
!
IF ( exx_started ) THEN
sx_tot(ir) = 0.72_DP * sx_tot(ir)
v1x = 0.72_DP * v1x
v2x = 0.72_DP * v2x
v1x_up = 0.72_DP * v1x_up ; v1x_dw = 0.72_DP * v1x_dw
v2x_up = 0.72_DP * v2x_up ; v2x_dw = 0.72_DP * v2x_dw
ENDIF
!
CASE( 11 ) ! 'Wu-Cohen'
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL wcx( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL wcx( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL wcx( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL wcx( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
CASE( 13 ) ! 'revised PW86 for vdw-df2'
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL rPW86( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL rPW86( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL rPW86( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL rPW86( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
CASE( 16 ) ! 'c09x for vdw-df-c09.'
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL c09x( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL c09x( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL c09x( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL c09x( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
CASE( 21 ) ! 'PW86'
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL pw86( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL pw86( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL pw86( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL pw86( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
CASE( 22 ) ! 'B86B'
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL becke86b( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL becke86b( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL becke86b( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL becke86b( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
CASE( 26, 46 ) ! 'B86R for rev-vdW-DF2'
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
IF ( igcx==26 ) iflag = 3 ! B86R for rev-vdW-DF2
IF ( igcx==46 ) iflag = 4 ! W32X for vdW-DF3-opt2
CALL b86b( rho(1), grho2(1), iflag, sx(1), v1x(1), v2x(1) )
CALL b86b( rho(2), grho2(2), iflag, sx(2), v1x(2), v2x(2) )
CALL b86b( rho_up, grho2_up, iflag, sx_up, v1x_up, v2x_up )
CALL b86b( rho_dw, grho2_dw, iflag, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
CASE( 27 ) ! 'cx13 for vdw-df-cx'
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL cx13( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL cx13( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL cx13( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL cx13( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
CASE( 28 ) ! X3LYP
!
CALL becke88_spin( rho(1), rho(2), grho2(1), grho2(2), sx(1), sx(2), &
v1x(1), v1x(2), v2x(1), v2x(2) )
CALL becke88_spin( rho_up, rho_dw, grho2_up, grho2_dw, sx_up, sx_dw, &
v1x_up, v1x_dw, v2x_up, v2x_dw )
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up
rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up
grho2_dw = 4.0_DP * grho2_dw
!
CALL pbex( rho(1), grho2(1), 1, sxsr(1), v1xsr(1), v2xsr(1) )
CALL pbex( rho(2), grho2(2), 1, sxsr(2), v1xsr(2), v2xsr(2) )
CALL pbex( rho_up, grho2_up, 1, sxsr_up, v1xsr_up, v2xsr_up )
CALL pbex( rho_dw, grho2_dw, 1, sxsr_dw, v1xsr_dw, v2xsr_dw )
!
sx_tot(ir) = 0.5_DP*( sxsr(1)*rnull(1) + sxsr(2)*rnull(2) )*0.235_DP + &
( sx(1)*rnull(1) + sx(2)*rnull(2) )*0.765_DP
v1x = v1xsr * 0.235_DP + v1x * 0.765_DP
v2x = v2xsr * 0.235_DP * 2.0_DP + v2x * 0.765_DP
sx_tot(ir) = 0.5_DP*( sxsr_up*rnull_up + sxsr_dw*rnull_dw )*0.235_DP + &
( sx_up*rnull_up + sx_dw*rnull_dw )*0.765_DP
v1x_up = v1xsr_up * 0.235_DP + v1x_up * 0.765_DP
v1x_dw = v1xsr_dw * 0.235_DP + v1x_dw * 0.765_DP
v2x_up = v2xsr_up * 0.235_DP * 2.0_DP + v2x_up * 0.765_DP
v2x_dw = v2xsr_dw * 0.235_DP * 2.0_DP + v2x_dw * 0.765_DP
!
IF ( exx_started ) THEN
sx_tot(ir) = 0.709_DP * sx_tot(ir)
v1x = 0.709_DP * v1x
v2x = 0.709_DP * v2x
v1x_up = 0.709_DP * v1x_up
v1x_dw = 0.709_DP * v1x_dw
v2x_up = 0.709_DP * v2x_up
v2x_dw = 0.709_DP * v2x_dw
ENDIF
!
CASE( 29, 31 ) ! 'cx0 for vdw-df-cx0' or `cx0p for vdW-DF-cx0p'
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL cx13( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL cx13( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL cx13( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL cx13( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
IF ( exx_started ) THEN
sx_tot(ir) = (1.0_DP - exx_fraction) * sx_tot(ir)
v1x = (1.0_DP - exx_fraction) * v1x
v2x = (1.0_DP - exx_fraction) * v2x
v1x_up = (1.0_DP - exx_fraction) * v1x_up
v1x_up = (1.0_DP - exx_fraction) * v1x_up
v2x_dw = (1.0_DP - exx_fraction) * v2x_dw
v2x_dw = (1.0_DP - exx_fraction) * v2x_dw
ENDIF
!
CASE( 30 ) ! 'R860' = 'rPW86-0' for vdw-df2-0'
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL rPW86( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL rPW86( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL rPW86( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL rPW86( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
IF ( exx_started ) THEN
sx_tot(ir) = (1.0_DP - exx_fraction) * sx_tot(ir)
v1x = (1.0_DP - exx_fraction) * v1x
v2x = (1.0_DP - exx_fraction) * v2x
v1x_up = (1.0_DP - exx_fraction) * v1x_up
v1x_dw = (1.0_DP - exx_fraction) * v1x_dw
v2x_up = (1.0_DP - exx_fraction) * v2x_up
v2x_dw = (1.0_DP - exx_fraction) * v2x_dw
ENDIF
!
CASE( 38 ) ! 'br0 for vdw-df2-BR0' etc
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL b86b( rho(1), grho2(1), 3, sx(1), v1x(1), v2x(1) )
CALL b86b( rho(2), grho2(2), 3, sx(2), v1x(2), v2x(2) )
CALL b86b( rho_up, grho2_up, 3, sx_up, v1x_up, v2x_up )
CALL b86b( rho_dw, grho2_dw, 3, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
IF ( exx_started ) THEN
sx_tot(ir) = (1.0_DP - exx_fraction) * sx_tot(ir)
v1x = (1.0_DP - exx_fraction) * v1x
v2x = (1.0_DP - exx_fraction) * v2x
v1x_up = (1.0_DP - exx_fraction) * v1x_up
v1x_dw = (1.0_DP - exx_fraction) * v1x_dw
v2x_up = (1.0_DP - exx_fraction) * v2x_up
v2x_dw = (1.0_DP - exx_fraction) * v2x_dw
ENDIF
!
CASE( 40 ) ! 'c090 for vdw-df-c090' etc
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL c09x( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL c09x( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL c09x( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL c09x( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot(ir) = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
IF ( exx_started ) THEN
sx_tot(ir) = (1.0_DP - exx_fraction) * sx_tot(ir)
v1x = (1.0_DP - exx_fraction) * v1x
v2x = (1.0_DP - exx_fraction) * v2x
v1x_up = (1.0_DP - exx_fraction) * v1x_up
v1x_dw = (1.0_DP - exx_fraction) * v1x_dw
v2x_up = (1.0_DP - exx_fraction) * v2x_up
v2x_dw = (1.0_DP - exx_fraction) * v2x_dw
ENDIF
!
CASE( 41 ) ! B86X for B86BPBEX hybrid
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL becke86b( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL becke86b( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL becke86b( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL becke86b( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
IF ( exx_started ) THEN
sx_tot = (1.0_DP - exx_fraction) * sx_tot
v1x = (1.0_DP - exx_fraction) * v1x
v2x = (1.0_DP - exx_fraction) * v2x
sx_tot(ir) = (1.0_DP - exx_fraction) * sx_tot(ir)
v1x_up = (1.0_DP - exx_fraction) * v1x_up
v1x_dw = (1.0_DP - exx_fraction) * v1x_dw
v2x_up = (1.0_DP - exx_fraction) * v2x_up
v2x_dw = (1.0_DP - exx_fraction) * v2x_dw
ENDIF
!
CASE( 42 ) ! B88X for BHANDHLYP
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL becke88( rho(1), grho2(1), sx(1), v1x(1), v2x(1) )
CALL becke88( rho(2), grho2(2), sx(2), v1x(2), v2x(2) )
CALL becke88( rho_up, grho2_up, sx_up, v1x_up, v2x_up )
CALL becke88( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw )
!
sx_tot = 0.5_DP * ( sx(1)*rnull(1) + sx(2)*rnull(2) )
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * ( sx_up*rnull_up + sx_dw*rnull_dw )
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
!
IF ( exx_started ) THEN
sx_tot = (1.0_DP - exx_fraction) * sx_tot
v1x = (1.0_DP - exx_fraction) * v1x
v2x = (1.0_DP - exx_fraction) * v2x
sx_tot(ir) = (1.0_DP - exx_fraction) * sx_tot(ir)
v1x_up = (1.0_DP - exx_fraction) * v1x_up
v1x_dw = (1.0_DP - exx_fraction) * v1x_dw
v2x_up = (1.0_DP - exx_fraction) * v2x_up
v2x_dw = (1.0_DP - exx_fraction) * v2x_dw
ENDIF
!
CASE( 43 ) ! 'beefx'
#if !defined(_OPENACC)
CASE( 43 ) ! 'beefx' --- BEEF unavailable with OpenACC-- Will be enabled soon
!
rho = 2.0_DP * rho
grho2 = 4.0_DP * grho2
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
!
CALL beefx(rho(1), grho2(1), sx(1), v1x(1), v2x(1), 0)
CALL beefx(rho(2), grho2(2), sx(2), v1x(2), v2x(2), 0)
CALL beefx(rho_up, grho2_up, sx_up, v1x_up, v2x_up, 0)
CALL beefx(rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw, 0)
!
sx_tot(ir) = 0.5_DP * (sx(1)*rnull(1) + sx(2)*rnull(2))
v2x = 2.0_DP * v2x
sx_tot(ir) = 0.5_DP * (sx_up*rnull_up + sx_dw*rnull_dw)
v2x_up = 2.0_DP * v2x_up
v2x_dw = 2.0_DP * v2x_dw
#endif
!
! case igcx == 5 (HCTH) and 6 (OPTX) not implemented
! case igcx == 7 (meta-GGA) must be treated in a separate call to another
@ -783,18 +866,25 @@ SUBROUTINE gcx_spin( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
!
CASE DEFAULT
!
sx = 0.0_DP
v1x = 0.0_DP
v2x = 0.0_DP
sx_tot(ir) = 0.0_DP
v1x_up = 0.0_DP ; v1x_dw = 0.0_DP
v2x_up = 0.0_DP ; v2x_dw = 0.0_DP
!
END SELECT
!
v1x_out(ir,:) = v1x(:) * rnull(:)
v2x_out(ir,:) = v2x(:) * rnull(:)
v1x_out(ir,1) = v1x_up * rnull_up
v1x_out(ir,2) = v1x_dw * rnull_dw
v2x_out(ir,1) = v2x_up * rnull_up
v2x_out(ir,2) = v2x_dw * rnull_dw
!
ENDDO
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp end do
!$omp end parallel
#endif
#if defined(_OPENACC)
!$acc end data
#endif
!
!
RETURN
@ -832,7 +922,7 @@ SUBROUTINE gcc_spin( length, rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out
!
INTEGER :: ir
REAL(DP) :: rho, zeta, grho
REAL(DP) :: sc, v1c(2), v2c
REAL(DP) :: sc, v1c_up, v1c_dw, v2c
!REAL(DP), PARAMETER :: small=1.E-10_DP !, epsr=1.E-6_DP
!
#if defined(_OPENMP)
@ -842,12 +932,21 @@ SUBROUTINE gcc_spin( length, rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out
ntids = omp_get_num_threads()
#endif
!
#if defined(_OPENACC)
IF (igcc==14) CALL xclib_error( 'gcc_spin', 'BEEF not available with&
& OpenACC enabled', 1 )
!
!$acc data copyin(rho_in, grho_in), copyout(sc_out, v1c_out, v2c_out), copy(zeta_io)
!$acc parallel loop
#endif
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp parallel if(ntids==1) default(none) &
!$omp private( rho, zeta, grho, sc, v1c, v2c ) &
!$omp private( rho, zeta, grho, sc, v1c_up, v1c_dw, v2c ) &
!$omp shared( igcc, sc_out, v1c_out, v2c_out, &
!$omp rho_threshold_gga, zeta_io, length, &
!$omp grho_in, rho_in )
!$omp do
#endif
DO ir = 1, length
!
rho = rho_in(ir)
@ -859,7 +958,8 @@ SUBROUTINE gcc_spin( length, rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out
IF ( ABS(zeta)>1.0_DP .OR. rho<=rho_threshold_gga .OR. &
SQRT(ABS(grho))<=rho_threshold_gga ) THEN
sc_out(ir) = 0.0_DP
v1c_out(ir,:) = 0.0_DP ; v2c_out(ir) = 0.0_DP
v1c_out(ir,1) = 0.0_DP ; v2c_out(ir) = 0.0_DP
v1c_out(ir,2) = 0.0_DP
CYCLE
ENDIF
!
@ -867,44 +967,54 @@ SUBROUTINE gcc_spin( length, rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out
CASE( 0 )
!
sc = 0.0_DP
v1c = 0.0_DP
v1c_up = 0.0_DP
v1c_dw = 0.0_DP
v2c = 0.0_DP
!
CASE( 1 )
!
CALL perdew86_spin( rho, zeta, grho, sc, v1c(1), v1c(2), v2c )
CALL perdew86_spin( rho, zeta, grho, sc, v1c_up, v1c_dw, v2c )
!
CASE( 2 )
!
CALL ggac_spin( rho, zeta, grho, sc, v1c(1), v1c(2), v2c )
CALL ggac_spin( rho, zeta, grho, sc, v1c_up, v1c_dw, v2c )
!
CASE( 4 )
!
CALL pbec_spin( rho, zeta, grho, 1, sc, v1c(1), v1c(2), v2c )
CALL pbec_spin( rho, zeta, grho, 1, sc, v1c_up, v1c_dw, v2c )
!
CASE( 8 )
!
CALL pbec_spin( rho, zeta, grho, 2, sc, v1c(1), v1c(2), v2c )
CALL pbec_spin( rho, zeta, grho, 2, sc, v1c_up, v1c_dw, v2c )
!
CASE( 14 )
#if !defined(_OPENACC)
CASE( 14 ) !*****BEEF unavailable with OpenACC-- Will be enabled soon
!
call beeflocalcorrspin(rho, zeta, grho, sc, v1c(1), v1c(2), v2c, 0)
call beeflocalcorrspin(rho, zeta, grho, sc, v1c_up, v1c_dw, v2c, 0)
#endif
!
CASE DEFAULT
!
sc = 0.0_DP
v1c = 0.0_DP
v1c_up = 0.0_DP
v1c_dw = 0.0_DP
v2c = 0.0_DP
!
END SELECT
!
sc_out(ir) = sc
v1c_out(ir,:) = v1c(:)
v1c_out(ir,1) = v1c_up
v1c_out(ir,2) = v1c_dw
v2c_out(ir) = v2c
!
ENDDO
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp end do
!$omp end parallel
#endif
#if defined(_OPENACC)
!$acc end data
#endif
!
RETURN
!
@ -949,11 +1059,13 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
! ... local variables
!
INTEGER :: ir
REAL(DP) :: rho(2), grho(2)
REAL(DP) :: rho_up, rho_dw, grho_up, grho_dw
REAL(DP) :: grho_ud
#if defined(_OPENMP)
INTEGER :: ntids
INTEGER, EXTERNAL :: omp_get_num_threads
!
ntids = omp_get_num_threads()
#endif
!
sc = 0.0_DP
@ -961,30 +1073,35 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
v2c = 0.0_DP
v2c_ud = 0.0_DP
!
#if defined(_OPENMP)
ntids = omp_get_num_threads()
#if defined(_OPENACC)
!$acc data copyin(rho_in, grho_in, grho_ud_in), copyout(sc, v1c, v2c, v2c_ud)
!$acc parallel loop
#endif
!
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp parallel if(ntids==1) default(none) &
!$omp private( rho, grho, grho_ud ) &
!$omp private( rho_up, rho_dw, grho_up, grho_dw, grho_ud ) &
!$omp shared( length, rho_in, grho_in, grho_ud_in, &
!$omp rho_threshold_gga, sc, exx_started, &
!$omp igcc, v1c, v2c, v2c_ud)
!$omp do
#endif
DO ir = 1, length
!
rho(:) = rho_in(ir,:)
grho(:) = grho_in(ir,:)
rho_up = rho_in(ir,1)
rho_dw = rho_in(ir,2)
grho_up = grho_in(ir,1)
grho_dw = grho_in(ir,2)
grho_ud = grho_ud_in(ir)
!
IF ( rho(1)+rho(2) < rho_threshold_gga ) THEN
IF ( rho_up+rho_dw < rho_threshold_gga ) THEN
sc(ir) = 0.0_DP
v1c(ir,:) = 0.0_DP
v2c(ir,:) = 0.0_DP ; v2c_ud(ir) = 0.0_DP
v1c(ir,1) = 0.0_DP ; v1c(ir,2) = 0.0_DP
v2c(ir,1) = 0.0_DP ; v2c_ud(ir) = 0.0_DP
v2c(ir,2) = 0.0_DP
CYCLE
ENDIF
!
CALL lsd_glyp( rho(1), rho(2), grho(1), grho(2), grho_ud, &
CALL lsd_glyp( rho_up, rho_dw, grho_up, grho_dw, grho_ud, &
sc(ir), v1c(ir,1), v1c(ir,2), v2c(ir,1), &
v2c(ir,2), v2c_ud(ir) )
!
@ -997,8 +1114,10 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
!
IF ( exx_started ) THEN
sc(ir) = 0.81_DP * sc(ir)
v1c(ir,:) = 0.81_DP * v1c(ir,:)
v2c(ir,:) = 0.81_DP * v2c(ir,:)
v1c(ir,1) = 0.81_DP * v1c(ir,1)
v1c(ir,2) = 0.81_DP * v1c(ir,2)
v2c(ir,1) = 0.81_DP * v2c(ir,1)
v2c(ir,2) = 0.81_DP * v2c(ir,2)
v2c_ud(ir) = 0.81_DP * v2c_ud(ir)
ENDIF
!
@ -1006,20 +1125,27 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
!
IF ( exx_started ) THEN
sc(ir) = 0.871_DP * sc(ir)
v1c(ir,:) = 0.871_DP * v1c(ir,:)
v2c(ir,:) = 0.871_DP * v2c(ir,:)
v1c(ir,1) = 0.871_DP * v1c(ir,1)
v1c(ir,2) = 0.871_DP * v1c(ir,2)
v2c(ir,1) = 0.871_DP * v2c(ir,1)
v2c(ir,2) = 0.871_DP * v2c(ir,2)
v2c_ud(ir) = 0.871_DP * v2c_ud(ir)
ENDIF
!
CASE DEFAULT
!
CALL xclib_error(" gcc_spin_more "," gradient correction not implemented ",1)
!CALL xclib_error(" gcc_spin_more "," gradient correction not implemented ",1) !***acc test
!
END SELECT
!
ENDDO
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp end do
!$omp end parallel
#endif
#if defined(_OPENACC)
!$acc end data
#endif
!
RETURN
!

View File

@ -64,11 +64,13 @@ SUBROUTINE xc_lda( length, rho_in, ex_out, ec_out, vx_out, vc_out )
REAL(DP), INTENT(OUT), DIMENSION(length) :: ex_out
!! \(\epsilon_x(rho)\) ( NOT \(E_x(\text{rho})\) )
REAL(DP), INTENT(OUT), DIMENSION(length) :: vx_out
!! \(dE_x(\text{rho})/d\text{rho}\) ( NOT \(d\epsilon_x(\text{rho})/d\text{rho}\) )
!! \(dE_x(\text{rho})/d\text{rho}\) ( NOT
!! \(d\epsilon_x(\text{rho})/d\text{rho}\) )
REAL(DP), INTENT(OUT), DIMENSION(length) :: ec_out
!! \(\epsilon_c(rho)\) ( NOT \(E_c(\text{rho})\) )
REAL(DP), INTENT(OUT), DIMENSION(length) :: vc_out
!! \(dE_c(\text{rho})/d\text{rho}\) ( NOT \(d\epsilon_c(\text{rho})/d\text{rho}\) )
!! \(dE_c(\text{rho})/d\text{rho}\) ( NOT
!! \(d\epsilon_c(\text{rho})/d\text{rho}\) )
!
! ... local variables
!
@ -79,7 +81,6 @@ SUBROUTINE xc_lda( length, rho_in, ex_out, ec_out, vx_out, vc_out )
REAL(DP), PARAMETER :: third = 1.0_DP/3.0_DP, &
pi34 = 0.6203504908994_DP, e2 = 2.0_DP
! pi34 = (3/4pi)^(1/3)
!
#if defined(_OPENMP)
INTEGER :: ntids
INTEGER, EXTERNAL :: omp_get_num_threads
@ -87,12 +88,19 @@ SUBROUTINE xc_lda( length, rho_in, ex_out, ec_out, vx_out, vc_out )
ntids = omp_get_num_threads()
#endif
!
!
#if defined(_OPENACC)
!$acc data copyin(rho_in), copyout(ex_out, vx_out, ec_out, vc_out)
!$acc parallel loop
#endif
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp parallel if(ntids==1) default(none) &
!$omp private( rho, rs, ex, ec, ec_, vx, vc, vc_ ) &
!$omp shared( rho_in, length, iexch, icorr, ex_out, ec_out, vx_out, vc_out, &
!$omp finite_size_cell_volume, exx_fraction, exx_started, &
!$omp rho_threshold_lda )
!$omp do
#endif
DO ir = 1, length
!
rho = ABS(rho_in(ir))
@ -247,8 +255,13 @@ SUBROUTINE xc_lda( length, rho_in, ex_out, ec_out, vx_out, vc_out )
vx_out(ir) = vx ; vc_out(ir) = vc
!
ENDDO
#if defined(_OPENACC)
!$acc end data
#endif
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp end do
!$omp end parallel
#endif
!
!
RETURN
@ -289,7 +302,8 @@ SUBROUTINE xc_lsda( length, rho_in, zeta_in, ex_out, ec_out, vx_out, vc_out )
INTEGER :: ir
REAL(DP) :: rho, rs, zeta
REAL(DP) :: ex, ec, ec_
REAL(DP) :: vx(2), vc(2), vc_(2)
REAL(DP) :: vx_up, vc_up, vc_up_
REAL(DP) :: vx_dw, vc_dw, vc_dw_
!
REAL(DP), PARAMETER :: third = 1.0_DP/3.0_DP, &
pi34 = 0.6203504908994_DP
@ -302,12 +316,19 @@ SUBROUTINE xc_lsda( length, rho_in, zeta_in, ex_out, ec_out, vx_out, vc_out )
ntids = omp_get_num_threads()
#endif
!
#if defined(_OPENACC)
!$acc data copyin(rho_in, zeta_in), copyout(ex_out, vx_out, ec_out, vc_out)
!$acc parallel loop
#endif
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp parallel if(ntids==1) default(none) &
!$omp private( rho, rs, zeta, ex, ec, ec_, vx, vc, vc_ ) &
!$omp private( rho, rs, zeta, ex, ec, ec_, vx_up, vx_dw, vc_up, &
!$omp vc_dw, vc_up_, vc_dw_ ) &
!$omp shared( length, iexch, icorr, exx_fraction, &
!$omp vx_out, vc_out, ex_out, ec_out, &
!$omp zeta_in, exx_started, rho_in, rho_threshold_lda)
!$omp zeta_in, exx_started, rho_in, rho_threshold_lda )
!$omp do
#endif
DO ir = 1, length
!
zeta = zeta_in(ir)
@ -318,8 +339,8 @@ SUBROUTINE xc_lsda( length, rho_in, zeta_in, ex_out, ec_out, vx_out, vc_out )
IF ( rho > rho_threshold_lda ) THEN
rs = pi34 / rho**third
ELSE
ex_out(ir) = 0.0_DP ; vx_out(ir,:) = 0.0_DP
ec_out(ir) = 0.0_DP ; vc_out(ir,:) = 0.0_DP
ex_out(ir) = 0.0_DP ; vx_out(ir,1) = 0.0_DP ; vx_out(ir,2) = 0.0_DP
ec_out(ir) = 0.0_DP ; vc_out(ir,1) = 0.0_DP ; vc_out(ir,2) = 0.0_DP
CYCLE
ENDIF
!
@ -329,53 +350,57 @@ SUBROUTINE xc_lsda( length, rho_in, zeta_in, ex_out, ec_out, vx_out, vc_out )
SELECT CASE( iexch )
CASE( 1 ) ! 'sla'
!
CALL slater_spin( rho, zeta, ex, vx(1), vx(2) )
CALL slater_spin( rho, zeta, ex, vx_up, vx_dw )
!
CASE( 2 ) ! 'sl1'
!
CALL slater1_spin( rho, zeta, ex, vx(1), vx(2) )
CALL slater1_spin( rho, zeta, ex, vx_up, vx_dw )
!
CASE( 3 ) ! 'rxc'
!
CALL slater_rxc_spin( rho, zeta, ex, vx(1), vx(2) )
CALL slater_rxc_spin( rho, zeta, ex, vx_up, vx_dw )
!
CASE( 4, 5 ) ! 'oep','hf'
!
IF ( exx_started ) THEN
ex = 0.0_DP
vx = 0.0_DP
vx_up = 0.0_DP ; vx_dw = 0.0_DP
ELSE
CALL slater_spin( rho, zeta, ex, vx(1), vx(2) )
CALL slater_spin( rho, zeta, ex, vx_up, vx_dw )
ENDIF
!
CASE( 6 ) ! 'pb0x'
!
CALL slater_spin( rho, zeta, ex, vx(1), vx(2) )
CALL slater_spin( rho, zeta, ex, vx_up, vx_dw )
IF ( exx_started ) THEN
ex = (1.0_DP - exx_fraction) * ex
vx = (1.0_DP - exx_fraction) * vx
vx_up = (1.0_DP - exx_fraction) * vx_up
vx_dw = (1.0_DP - exx_fraction) * vx_dw
ENDIF
!
CASE( 7 ) ! 'B3LYP'
!
CALL slater_spin( rho, zeta, ex, vx(1), vx(2) )
CALL slater_spin( rho, zeta, ex, vx_up, vx_dw )
IF ( exx_started ) THEN
ex = (1.0_DP - exx_fraction) * ex
vx = (1.0_DP - exx_fraction) * vx
vx_up = (1.0_DP - exx_fraction) * vx_up
vx_dw = (1.0_DP - exx_fraction) * vx_dw
ENDIF
!
CASE( 9 ) ! 'X3LYP'
!
CALL slater_spin( rho, zeta, ex, vx(1), vx(2) )
CALL slater_spin( rho, zeta, ex, vx_up, vx_dw )
IF ( exx_started ) THEN
ex = (1.0_DP - exx_fraction) * ex
vx = (1.0_DP - exx_fraction) * vx
vx_up = (1.0_DP - exx_fraction) * vx_up
vx_dw = (1.0_DP - exx_fraction) * vx_dw
ENDIF
!
CASE DEFAULT
!
ex = 0.0_DP
vx = 0.0_DP
vx_up = 0.0_DP
vx_dw = 0.0_DP
!
END SELECT
!
@ -386,67 +411,79 @@ SUBROUTINE xc_lsda( length, rho_in, zeta_in, ex_out, ec_out, vx_out, vc_out )
CASE( 0 )
!
ec = 0.0_DP
vc = 0.0_DP
vc_up = 0.0_DP ; vc_dw = 0.0_DP
!
CASE( 1 )
!
CALL pz_spin( rs, zeta, ec, vc(1), vc(2) )
CALL pz_spin( rs, zeta, ec, vc_up, vc_dw )
!
CASE( 2 )
!
CALL vwn_spin( rs, zeta, ec, vc(1), vc(2) )
CALL vwn_spin( rs, zeta, ec, vc_up, vc_dw )
!
CASE( 3 )
!
CALL lsd_lyp( rho, zeta, ec, vc(1), vc(2) ) ! from CP/FPMD (more_functionals)
CALL lsd_lyp( rho, zeta, ec, vc_up, vc_dw ) ! from CP/FPMD
!
CASE( 4 )
!
CALL pw_spin( rs, zeta, ec, vc(1), vc(2) )
CALL pw_spin( rs, zeta, ec, vc_up, vc_dw )
!
CASE( 12 ) ! 'B3LYP'
!
CALL vwn_spin( rs, zeta, ec, vc(1), vc(2) )
CALL vwn_spin( rs, zeta, ec, vc_up, vc_dw )
ec = 0.19_DP * ec
vc = 0.19_DP * vc
vc_up = 0.19_DP * vc_up
vc_dw = 0.19_DP * vc_dw
!
CALL lsd_lyp( rho, zeta, ec_, vc_(1), vc_(2) ) ! from CP/FPMD (more_functionals)
CALL lsd_lyp( rho, zeta, ec_, vc_up_, vc_dw_ ) ! from CP/FPMD
ec = ec + 0.81_DP * ec_
vc = vc + 0.81_DP * vc_
vc_up = vc_up + 0.81_DP * vc_up_
vc_dw = vc_dw + 0.81_DP * vc_dw_
!
CASE( 13 ) ! 'B3LYP-V1R'
!
CALL vwn1_rpa_spin( rs, zeta, ec, vc(1), vc(2) )
CALL vwn1_rpa_spin( rs, zeta, ec, vc_up, vc_dw )
ec = 0.19_DP * ec
vc = 0.19_DP * vc
vc_up = 0.19_DP * vc_up
vc_dw = 0.19_DP * vc_dw
!
CALL lsd_lyp( rho, zeta, ec_, vc_(1), vc_(2) ) ! from CP/FPMD (more_functionals)
CALL lsd_lyp( rho, zeta, ec_, vc_up_, vc_dw_ ) ! from CP/FPMD
ec = ec + 0.81_DP * ec_
vc = vc + 0.81_DP * vc_
vc_up = vc_up + 0.81_DP * vc_up_
vc_dw = vc_dw + 0.81_DP * vc_dw_
!
CASE( 14 ) ! 'X3LYP
!
CALL vwn1_rpa_spin( rs, zeta, ec, vc(1), vc(2) )
CALL vwn1_rpa_spin( rs, zeta, ec, vc_up, vc_dw )
ec = 0.129_DP * ec
vc = 0.129_DP * vc
vc_up = 0.129_DP * vc_up
vc_dw = 0.129_DP * vc_dw
!
CALL lsd_lyp( rho, zeta, ec_, vc_(1), vc_(2) ) ! from CP/FPMD (more_functionals)
CALL lsd_lyp( rho, zeta, ec_, vc_up_, vc_dw_ ) ! from CP/FPMD
ec = ec + 0.871_DP * ec_
vc = vc + 0.871_DP * vc_
vc_up = vc_up + 0.871_DP * vc_up_
vc_dw = vc_dw + 0.871_DP * vc_dw_
!
CASE DEFAULT
!
ec = 0.0_DP !rrr
vc = 0.0_DP
ec = 0.0_DP
vc_up = 0.0_DP
vc_dw = 0.0_DP
!
END SELECT
!
ex_out(ir) = ex ; vx_out(ir,:) = vx(:)
ec_out(ir) = ec ; vc_out(ir,:) = vc(:)
ex_out(ir) = ex ; vx_out(ir,1) = vx_up ; vx_out(ir,2) = vx_dw
ec_out(ir) = ec ; vc_out(ir,1) = vc_up ; vc_out(ir,2) = vc_dw
!
ENDDO
#if defined(_OPENMP) && !defined(_OPENACC)
!$omp end do
!$omp end parallel
#endif
#if defined(_OPENACC)
!$acc end data
#endif
!
!
RETURN

View File

@ -1,14 +1,20 @@
!
MODULE corr_gga !<GPU:corr_gga=>corr_gga_gpu>
! Copyright (C) 2020 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
MODULE corr_gga
!
USE corr_lda, ONLY: pw, pw_spin !<GPU:pw_spin=>pw_spin_d,pw=>pw_d,corr_lda=>corr_lda_gpu>
USE corr_lda, ONLY: pw, pw_spin
!
CONTAINS
!
!
!-----------------------------------------------------------------------
SUBROUTINE perdew86( rho, grho, sc, v1c, v2c ) !<GPU:DEVICE>
SUBROUTINE perdew86( rho, grho, sc, v1c, v2c )
!$acc routine (perdew86) seq
!-----------------------------------------------------------------------
!! Perdew gradient correction on correlation: PRB 33, 8822 (1986).
!
@ -59,7 +65,8 @@ END SUBROUTINE perdew86
!
!
!-----------------------------------------------------------------------
SUBROUTINE ggac( rho, grho, sc, v1c, v2c ) !<GPU:DEVICE>
SUBROUTINE ggac( rho, grho, sc, v1c, v2c )
!$acc routine (ggac) seq
!-----------------------------------------------------------------------
!! Perdew-Wang GGA (PW91) correlation part
!
@ -89,7 +96,7 @@ SUBROUTINE ggac( rho, grho, sc, v1c, v2c ) !<GPU:DEVICE>
!
rs = pi34 / rho**third
!
CALL pw( rs, 1, ec, vc ) !<GPU:pw=>pw_d>
CALL pw( rs, 1, ec, vc )
!
rs1 = rs
rs2 = rs1 * rs1
@ -137,7 +144,8 @@ END SUBROUTINE ggac
!
!
!-----------------------------------------------------------------------
SUBROUTINE glyp( rho, grho, sc, v1c, v2c ) !<GPU:DEVICE>
SUBROUTINE glyp( rho, grho, sc, v1c, v2c )
!$acc routine (glyp) seq
!-----------------------------------------------------------------------
!! Lee Yang Parr: gradient correction part.
!
@ -178,7 +186,8 @@ END SUBROUTINE glyp
!
!
!---------------------------------------------------------------
SUBROUTINE pbec( rho, grho, iflag, sc, v1c, v2c ) !<GPU:DEVICE>
SUBROUTINE pbec( rho, grho, iflag, sc, v1c, v2c )
!$acc routine (pbec) seq
!---------------------------------------------------------------
!! PBE correlation (without LDA part)
!
@ -190,7 +199,7 @@ SUBROUTINE pbec( rho, grho, iflag, sc, v1c, v2c ) !<GPU:DEVIC
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: iflag !<GPU:VALUE>
INTEGER, INTENT(IN) :: iflag
REAL(DP), INTENT(IN) :: rho, grho
! input: charge and squared gradient
REAL(DP), INTENT(OUT) :: sc, v1c, v2c
@ -210,7 +219,7 @@ SUBROUTINE pbec( rho, grho, iflag, sc, v1c, v2c ) !<GPU:DEVIC
!
rs = pi34 / rho**third
!
CALL pw( rs, 1, ec, vc ) !<GPU:pw=>pw_d>
CALL pw( rs, 1, ec, vc )
!
kf = xkf / rs
ks = xks * SQRT(kf)
@ -232,7 +241,7 @@ SUBROUTINE pbec( rho, grho, iflag, sc, v1c, v2c ) !<GPU:DEVIC
v2c = ddh0
! q2D
IF (iflag == 3) THEN
CALL cpbe2d( rho, grho, sc2D, v1c2D, v2c2D ) !<GPU:cpbe2d=>cpbe2d_d>
CALL cpbe2d( rho, grho, sc2D, v1c2D, v2c2D )
sc = sc + sc2D
v1c = v1c + v1c2D
v2c = v2c + v2c2D
@ -246,7 +255,8 @@ END SUBROUTINE pbec
! ===========> SPIN <===========
!
!-----------------------------------------------------------------------
SUBROUTINE perdew86_spin( rho, zeta, grho, sc, v1c_up, v1c_dw, v2c ) !<GPU:DEVICE>
SUBROUTINE perdew86_spin( rho, zeta, grho, sc, v1c_up, v1c_dw, v2c )
!$acc routine (perdew86_spin) seq
!---------------------------------------------------------------------
!! Perdew gradient correction on correlation: PRB 33, 8822 (1986)
!! spin-polarized case.
@ -318,7 +328,8 @@ END SUBROUTINE perdew86_spin
!
!
!-----------------------------------------------------------------------
SUBROUTINE ggac_spin( rho, zeta, grho, sc, v1c_up, v1c_dw, v2c ) !<GPU:DEVICE>
SUBROUTINE ggac_spin( rho, zeta, grho, sc, v1c_up, v1c_dw, v2c )
!$acc routine (ggac_spin) seq
!---------------------------------------------------------------------
!! Perdew-Wang GGA (PW91) correlation part - spin-polarized
!
@ -359,7 +370,7 @@ SUBROUTINE ggac_spin( rho, zeta, grho, sc, v1c_up, v1c_dw, v2c )
!
rs = pi34 / rho**third
!
CALL pw_spin( rs, zeta, ec, vc_up, vc_dn ) !<GPU:pw_spin=>pw_spin_d>
CALL pw_spin( rs, zeta, ec, vc_up, vc_dn )
!
rs2 = rs * rs
rs3 = rs * rs2
@ -423,7 +434,8 @@ END SUBROUTINE ggac_spin
!
!
!-------------------------------------------------------------------
SUBROUTINE pbec_spin( rho, zeta, grho, iflag, sc, v1c_up, v1c_dw, v2c ) !<GPU:DEVICE>
SUBROUTINE pbec_spin( rho, zeta, grho, iflag, sc, v1c_up, v1c_dw, v2c )
!$acc routine (pbec_spin) seq
!-----------------------------------------------------------------
!! PBE correlation (without LDA part) - spin-polarized.
!
@ -434,7 +446,7 @@ SUBROUTINE pbec_spin( rho, zeta, grho, iflag, sc, v1c_up, v1c_dw, v2c )
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: iflag !<GPU:VALUE>
INTEGER, INTENT(IN) :: iflag
!! see main comments
REAL(DP), INTENT(IN) :: rho
!! the total charge density
@ -467,7 +479,7 @@ SUBROUTINE pbec_spin( rho, zeta, grho, iflag, sc, v1c_up, v1c_dw, v2c )
!
rs = pi34 / rho**third
!
CALL pw_spin( rs, zeta, ec, vc_up, vc_dn ) !<GPU:pw_spin=>pw_spin_d>
CALL pw_spin( rs, zeta, ec, vc_up, vc_dn )
!
kf = xkf / rs
ks = xks * SQRT(kf)
@ -520,7 +532,8 @@ END SUBROUTINE pbec_spin
!
!
!------------------------------------------------------------------------
SUBROUTINE lsd_glyp( rho_in_up, rho_in_dw, grho_up, grho_dw, grho_ud, sc, v1c_up, v1c_dw, v2c_up, v2c_dw, v2c_ud ) !<GPU:DEVICE>
SUBROUTINE lsd_glyp( rho_in_up, rho_in_dw, grho_up, grho_dw, grho_ud, sc, v1c_up, v1c_dw, v2c_up, v2c_dw, v2c_ud )
!$acc routine (lsd_glyp) seq
!----------------------------------------------------------------------
!! Lee, Yang, Parr: gradient correction part.
!
@ -597,7 +610,8 @@ END SUBROUTINE lsd_glyp
!
!
!---------------------------------------------------------------
SUBROUTINE cpbe2d( rho, grho, sc, v1c, v2c ) !<GPU:DEVICE>
SUBROUTINE cpbe2d( rho, grho, sc, v1c, v2c )
!$acc routine (cpbe2d) seq
!---------------------------------------------------------------
!! 2D correction (last term of Eq. 5, PRL 108, 126402 (2012))
!

View File

@ -6,14 +6,15 @@
! or http://www.gnu.org/copyleft/gpl.txt .
!
!-------------------------------------------------------------------------
MODULE corr_lda !<GPU:corr_lda=>corr_lda_gpu>
MODULE corr_lda
!-------------------------------------------------------------------------
!! LDA correlation functionals
!
CONTAINS
!
!-------------------------------------------------------------------------
SUBROUTINE pz( rs, iflag, ec, vc ) !<GPU:DEVICE>
SUBROUTINE pz( rs, iflag, ec, vc )
!$acc routine (pz) seq
!-----------------------------------------------------------------------
!! LDA parametrization from Monte Carlo DATA:
!
@ -24,7 +25,7 @@ SUBROUTINE pz( rs, iflag, ec, vc ) !<GPU:DEVICE>
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: iflag !<GPU:VALUE>
INTEGER, INTENT(IN) :: iflag
!! see routine comments
REAL(DP), INTENT(IN) :: rs
!! Wigner-Seitz radius
@ -72,7 +73,8 @@ END SUBROUTINE pz
!
!
!-----------------------------------------------------------------------
SUBROUTINE pzKZK( rs, ec, vc, vol ) !<GPU:DEVICE>
SUBROUTINE pzKZK( rs, ec, vc, vol )
!$acc routine (pzKZK) seq
!-----------------------------------------------------------------------
!! LDA parametrization from Monte Carlo DATA:
!
@ -89,7 +91,7 @@ SUBROUTINE pzKZK( rs, ec, vc, vol ) !<GPU:DEVICE>
!! correlation energy
REAL(DP), INTENT(OUT) :: vc
!! correlation potential
REAL(DP) :: vol !<GPU:VALUE>
REAL(DP) :: vol
!! volume element
!
! ... local variables
@ -187,7 +189,8 @@ END SUBROUTINE pzKZK
!
!
!-----------------------------------------------------------------------
SUBROUTINE vwn( rs, ec, vc ) !<GPU:DEVICE>
SUBROUTINE vwn( rs, ec, vc )
!$acc routine (vwn) seq
!-----------------------------------------------------------------------
!! S.H. Vosko, L. Wilk, and M. Nusair, Can. J. Phys. 58, 1200 (1980).
!
@ -233,7 +236,8 @@ END SUBROUTINE vwn
!
!
!-----------------------------------------------------------------------
SUBROUTINE vwn1_rpa( rs, ec, vc ) !<GPU:DEVICE>
SUBROUTINE vwn1_rpa( rs, ec, vc )
!$acc routine (vwn1_rpa) seq
!-----------------------------------------------------------------------
!! S.H. Vosko, L. Wilk, and M. Nusair, Can. J. Phys. 58, 1200 (1980).
!
@ -278,7 +282,8 @@ END SUBROUTINE vwn1_rpa
!
!
!-----------------------------------------------------------------------
SUBROUTINE lyp( rs, ec, vc ) !<GPU:DEVICE>
SUBROUTINE lyp( rs, ec, vc )
!$acc routine (lyp) seq
!-----------------------------------------------------------------------
!! C. Lee, W. Yang, and R.G. Parr, PRB 37, 785 (1988).
!! LDA part only.
@ -315,7 +320,8 @@ END SUBROUTINE lyp
!
!
!-----------------------------------------------------------------------
SUBROUTINE pw( rs, iflag, ec, vc ) !<GPU:DEVICE>
SUBROUTINE pw( rs, iflag, ec, vc )
!$acc routine (pw) seq
!-----------------------------------------------------------------------
!! * iflag=1: J.P. Perdew and Y. Wang, PRB 45, 13244 (1992)
!! * iflag=2: G. Ortiz and P. Ballone, PRB 50, 1391 (1994)
@ -326,7 +332,7 @@ SUBROUTINE pw( rs, iflag, ec, vc ) !<GPU:DEVICE>
!
REAL(DP), INTENT(IN) :: rs
!! Wigner-Seitz radius
INTEGER, INTENT(IN) :: iflag !<GPU:VALUE>
INTEGER, INTENT(IN) :: iflag
!! see routine comments
REAL(DP), INTENT(OUT) :: ec
!! correlation energy
@ -385,7 +391,8 @@ END SUBROUTINE pw
!
!
!-----------------------------------------------------------------------
SUBROUTINE wignerc( rs, ec, vc ) !<GPU:DEVICE>
SUBROUTINE wignerc( rs, ec, vc )
!$acc routine (wignerc) seq
!-----------------------------------------------------------------------
!! Wigner correlation.
!
@ -418,7 +425,8 @@ END SUBROUTINE wignerc
!
!
!-----------------------------------------------------------------------
SUBROUTINE hl( rs, ec, vc ) !<GPU:DEVICE>
SUBROUTINE hl( rs, ec, vc )
!$acc routine (hl) seq
!-----------------------------------------------------------------------
!! L. Hedin and B.I. Lundqvist, J. Phys. C 4, 2064 (1971).
!
@ -450,7 +458,8 @@ END SUBROUTINE hl
!
!
!-----------------------------------------------------------------------
SUBROUTINE gl( rs, ec, vc ) !<GPU:DEVICE>
SUBROUTINE gl( rs, ec, vc )
!$acc routine (gl) seq
!---------------------------------------------------------------------
!! O. Gunnarsson and B. I. Lundqvist, PRB 13, 4274 (1976).
!
@ -484,7 +493,8 @@ END SUBROUTINE gl
! ... LSDA
!
!-----------------------------------------------------------------------
SUBROUTINE pz_polarized( rs, ec, vc ) !<GPU:DEVICE>
SUBROUTINE pz_polarized( rs, ec, vc )
!$acc routine (pz_polarized) seq
!-----------------------------------------------------------------------
!! J.P. Perdew and A. Zunger, PRB 23, 5048 (1981).
!! spin-polarized energy and potential.
@ -539,7 +549,8 @@ END SUBROUTINE pz_polarized
!
!
!-----------------------------------------------------------------------
SUBROUTINE pz_spin( rs, zeta, ec, vc_up, vc_dw ) !<GPU:DEVICE>
SUBROUTINE pz_spin( rs, zeta, ec, vc_up, vc_dw )
!$acc routine (pz_spin) seq
!-----------------------------------------------------------------------
!! Perdew and Zunger, PRB 23, 5048 (1981). Spin polarized case.
!
@ -563,10 +574,10 @@ SUBROUTINE pz_spin( rs, zeta, ec, vc_up, vc_dw ) !<GPU:DEVICE
REAL(DP), PARAMETER :: p43=4.0d0/3.d0, third=1.d0/3.d0
!
! unpolarized part (Perdew-Zunger formula)
CALL pz( rs, 1, ecu, vcu ) !<GPU:pz=>pz_d>
CALL pz( rs, 1, ecu, vcu )
!
! polarization contribution
CALL pz_polarized( rs, ecp, vcp ) !<GPU:pz_polarized=>pz_polarized_d>
CALL pz_polarized( rs, ecp, vcp )
!
fz = ( (1.0d0 + zeta)**p43 + (1.d0 - zeta)**p43 - 2.d0) / &
(2.d0**p43 - 2.d0)
@ -582,7 +593,8 @@ SUBROUTINE pz_spin( rs, zeta, ec, vc_up, vc_dw ) !<GPU:DEVICE
END SUBROUTINE pz_spin
!
!-------------------------------------------------------------------------------
SUBROUTINE vwn_spin( rs, zeta, ec, vc_up, vc_dw ) !<GPU:DEVICE>
SUBROUTINE vwn_spin( rs, zeta, ec, vc_up, vc_dw )
!$acc routine (vwn_spin) seq
!------------------------------------------------------------------------------
!! S.H. Vosko, L. Wilk, and M. Nusair. Spin polarized case.
!
@ -639,9 +651,9 @@ SUBROUTINE vwn_spin( rs, zeta, ec, vc_up, vc_dw ) !<GPU:DEVIC
fz = cfz1 * (trup13*trup + trdw13*trdw - 2.0_DP) ! f(zeta)
dfz = cfz2 * (trup13 - trdw13) ! df / dzeta
!
CALL padefit_ParSet1( sqrtrs, 1, ecP, vcP ) ! ecF = e_c Paramagnetic !<GPU:padefit_ParSet1=>padefit_ParSet1_d>
CALL padefit_ParSet1( sqrtrs, 2, ecF, vcF ) ! ecP = e_c Ferromagnetic !<GPU:padefit_ParSet1=>padefit_ParSet1_d>
CALL padefit_ParSet1( sqrtrs, 3, ac, dac ) ! ac = "spin stiffness" !<GPU:padefit_ParSet1=>padefit_ParSet1_d>
CALL padefit_ParSet1( sqrtrs, 1, ecP, vcP ) ! ecF = e_c Paramagnetic
CALL padefit_ParSet1( sqrtrs, 2, ecF, vcF ) ! ecP = e_c Ferromagnetic
CALL padefit_ParSet1( sqrtrs, 3, ac, dac ) ! ac = "spin stiffness"
!
ac = ac * iddfz0
dac = dac * iddfz0
@ -659,7 +671,8 @@ END SUBROUTINE vwn_spin
!
!
!----
SUBROUTINE padefit_ParSet1( x, i, fit, dfit ) !<GPU:DEVICE>
SUBROUTINE padefit_ParSet1( x, i, fit, dfit )
!$acc routine (padefit_ParSet1) seq
!----
!! It implements formula [4.4] in:
!! S.H. Vosko, L. Wilk, and M. Nusair, Can. J. Phys. 58, 1200 (1980)
@ -709,7 +722,8 @@ SUBROUTINE padefit_ParSet1( x, i, fit, dfit ) !<GPU:DE
bx0fx0(i) * ( 2.0_DP/xx0 - txbfx - 4.0_DP*(b(i)+2.0_DP*x0(i))*itxbQ ) )
!
END SUBROUTINE
SUBROUTINE padefit_ParSet2( x, i, fit, dfit ) !<GPU:DEVICE>
SUBROUTINE padefit_ParSet2( x, i, fit, dfit )
!$acc routine (padefit_ParSet2) seq
!----
!! It implements formula [4.4] in:
!! S.H. Vosko, L. Wilk, and M. Nusair, Can. J. Phys. 58, 1200 (1980)
@ -762,7 +776,8 @@ END SUBROUTINE
!
!
!-----------------------------------------------------------------------------------------
SUBROUTINE vwn1_rpa_spin( rs, zeta, ec, vc_up, vc_dw ) !<GPU:DEVICE>
SUBROUTINE vwn1_rpa_spin( rs, zeta, ec, vc_up, vc_dw )
!$acc routine (vwn1_rpa_spin) seq
!---------------------------------------------------------------------------------------
!
USE kind_l, ONLY: DP
@ -817,9 +832,9 @@ SUBROUTINE vwn1_rpa_spin( rs, zeta, ec, vc_up, vc_dw ) !<GPU:
fz = cfz1 * (trup13*trup + trdw13*trdw - 2.0_DP) ! f(zeta)
dfz = cfz2 * (trup13 - trdw13) ! df / dzeta
!
CALL padefit_ParSet2( sqrtrs, 1, ecP, vcP ) ! ecF = e_c Paramagnetic !<GPU:padefit_ParSet2=>padefit_ParSet2_d>
CALL padefit_ParSet2( sqrtrs, 2, ecF, vcF ) ! ecP = e_c Ferromagnetic !<GPU:padefit_ParSet2=>padefit_ParSet2_d>
CALL padefit_ParSet2( sqrtrs, 3, ac, dac ) ! ac = "spin stiffness" !<GPU:padefit_ParSet2=>padefit_ParSet2_d>
CALL padefit_ParSet2( sqrtrs, 1, ecP, vcP ) ! ecF = e_c Paramagnetic
CALL padefit_ParSet2( sqrtrs, 2, ecF, vcF ) ! ecP = e_c Ferromagnetic
CALL padefit_ParSet2( sqrtrs, 3, ac, dac ) ! ac = "spin stiffness"
!
ac = ac * iddfz0
dac = dac * iddfz0
@ -837,7 +852,8 @@ END SUBROUTINE
!
!
!-----------------------------------------------------------------------
SUBROUTINE pw_spin( rs, zeta, ec, vc_up, vc_dw ) !<GPU:DEVICE>
SUBROUTINE pw_spin( rs, zeta, ec, vc_up, vc_dw )
!$acc routine (pw_spin) seq
!-----------------------------------------------------------------------
!! J.P. Perdew and Y. Wang, PRB 45, 13244 (1992).
!
@ -946,7 +962,8 @@ END SUBROUTINE pw_spin
!
!
!-----------------------------------------------------------------------------
SUBROUTINE lsd_lyp( rho, zeta, elyp, vlyp_up, vlyp_dw ) !<GPU:DEVICE>
SUBROUTINE lsd_lyp( rho, zeta, elyp, vlyp_up, vlyp_dw )
!$acc routine (lsd_lyp) seq
!==--------------------------------------------------------------==
!== C. LEE, W. YANG, AND R.G. PARR, PRB 37, 785 (1988) ==
!== THIS IS ONLY THE LDA PART ==

View File

@ -6,14 +6,15 @@
! or http://www.gnu.org/copyleft/gpl.txt .
!
!------------------------------------------------------------------------
MODULE exch_gga !<GPU:exch_gga=>exch_gga_gpu>
MODULE exch_gga
!------------------------------------------------------------------------
!! GGA exchange functionals
!
CONTAINS
!
!-----------------------------------------------------------------------
SUBROUTINE becke88( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE becke88( rho, grho, sx, v1x, v2x )
!$acc routine (becke88) seq
!-----------------------------------------------------------------------
!! Becke exchange: A.D. Becke, PRA 38, 3098 (1988)
!! only gradient-corrected part, no Slater term included
@ -56,7 +57,8 @@ END SUBROUTINE becke88
!
!
!-----------------------------------------------------------------------
SUBROUTINE ggax( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE ggax( rho, grho, sx, v1x, v2x )
!$acc routine (ggax) seq
!-----------------------------------------------------------------------
!! Perdew-Wang GGA (PW91), exchange part:
!! J.P. Perdew et al.,PRB 46, 6671 (1992)
@ -104,7 +106,8 @@ END SUBROUTINE ggax
!
!
!---------------------------------------------------------------
SUBROUTINE pbex( rho, grho, iflag, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE pbex( rho, grho, iflag, sx, v1x, v2x )
!$acc routine (pbex) seq
!---------------------------------------------------------------
!! PBE exchange (without Slater exchange):
!! iflag=1 J.P.Perdew, K.Burke, M.Ernzerhof, PRL 77, 3865 (1996)
@ -121,7 +124,7 @@ SUBROUTINE pbex( rho, grho, iflag, sx, v1x, v2x ) !<GPU:DEVIC
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: iflag !<GPU:VALUE>
INTEGER, INTENT(IN) :: iflag
REAL(DP), INTENT(IN) :: rho, grho
! input: charge and squared gradient
REAL(DP), INTENT(OUT) :: sx, v1x, v2x
@ -326,7 +329,8 @@ END SUBROUTINE pbex
!
!
!----------------------------------------------------------------------------
SUBROUTINE hcth( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE hcth( rho, grho, sx, v1x, v2x )
!$acc routine (hcth) seq
!--------------------------------------------------------------------------
!! HCTH/120, JCP 109, p. 6264 (1998)
!! Parameters set-up after N.L. Doltsisnis & M. Sprik (1999)
@ -401,10 +405,10 @@ SUBROUTINE hcth( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
rab = r3q2*ra
dra_drho = -0.260530881d0/rho_o34
drab_drho = r3q2*dra_drho
CALL pwcorr( ra, cg1, g, dg ) !<GPU:pwcorr=>pwcorr_d>
CALL pwcorr( ra, cg1, g, dg )
era1 = g
dera1_dra = dg
CALL pwcorr( rab, cg0, g, dg ) !<GPU:pwcorr=>pwcorr_d>
CALL pwcorr( rab, cg0, g, dg )
erab0 = g
derab0_drab = dg
ex = -0.75d0*r3pi*rho_o34
@ -452,7 +456,8 @@ SUBROUTINE hcth( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
END SUBROUTINE hcth
!
!-------------------------------------------------------
SUBROUTINE pwcorr( r, c, g, dg ) !<GPU:DEVICE>
SUBROUTINE pwcorr( r, c, g, dg )
!$acc routine (pwcorr) seq
!-----------------------------------------------------
!
USE kind_l, ONLY: DP
@ -481,7 +486,8 @@ END SUBROUTINE hcth
!
!
!-----------------------------------------------------------------------------
SUBROUTINE optx( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE optx( rho, grho, sx, v1x, v2x )
!$acc routine (optx) seq
!---------------------------------------------------------------------------
!! OPTX, Handy et al. JCP 116, p. 5411 (2002) and refs. therein
!! Present release: Mauro Boero, Tsukuba, 10/9/2002
@ -527,7 +533,8 @@ END SUBROUTINE optx
!
!
!---------------------------------------------------------------
SUBROUTINE wcx( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE wcx( rho, grho, sx, v1x, v2x )
!$acc routine (wcx) seq
!---------------------------------------------------------------
!! Wu-Cohen exchange (without Slater exchange):
!! Z. Wu and R. E. Cohen, PRB 73, 235116 (2006)
@ -599,14 +606,15 @@ END SUBROUTINE wcx
!
!
!-----------------------------------------------------------------------
SUBROUTINE pbexsr( rho, grho, sxsr, v1xsr, v2xsr, omega ) !<GPU:DEVICE>
SUBROUTINE pbexsr( rho, grho, sxsr, v1xsr, v2xsr, omega )
!$acc routine (pbexsr) seq
!---------------------------------------------------------------------
! INCLUDE 'cnst.inc'
USE kind_l, ONLY: DP
!
IMPLICIT NONE
!
REAL(DP), INTENT(IN) :: omega !<GPU:VALUE>
REAL(DP), INTENT(IN) :: omega
REAL(DP), INTENT(IN) :: rho, grho
REAL(DP), INTENT(OUT) :: sxsr, v1xsr, v2xsr
!
@ -635,7 +643,7 @@ SUBROUTINE pbexsr( rho, grho, sxsr, v1xsr, v2xsr, omega ) !<G
s = 8.572844D0 - 18.796223D0/s2
ENDIF
!
CALL wpbe_analy_erfc_approx_grad( rho, s, omega, fx, d1x, d2x ) !<GPU:wpbe_analy_erfc_approx_grad=>wpbe_analy_erfc_approx_grad_d>
CALL wpbe_analy_erfc_approx_grad( rho, s, omega, fx, d1x, d2x )
!
sxsr = ex*fx ! - ex
dsdn = -4.D0/3.D0*s/rho
@ -653,7 +661,8 @@ END SUBROUTINE pbexsr
!
!
!-----------------------------------------------------------------------
SUBROUTINE rPW86( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE rPW86( rho, grho, sx, v1x, v2x )
!$acc routine (rPW86) seq
!---------------------------------------------------------------------
!! PRB 33, 8800 (1986) and J. Chem. Theory comp. 5, 2754 (2009).
!
@ -697,7 +706,8 @@ END SUBROUTINE rPW86
!
!
!-----------------------------------------------------------------
SUBROUTINE c09x( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE c09x( rho, grho, sx, v1x, v2x )
!$acc routine (c09x) seq
!---------------------------------------------------------------
!! Cooper '09 exchange for vdW-DF (without Slater exchange):
!! V. R. Cooper, Phys. Rev. B 81, 161104(R) (2010)
@ -768,7 +778,8 @@ END SUBROUTINE c09x
!
!
!---------------------------------------------------------------
SUBROUTINE sogga( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE sogga( rho, grho, sx, v1x, v2x )
!$acc routine (sogga) seq
!-------------------------------------------------------------
!! SOGGA exchange
!
@ -827,14 +838,15 @@ END SUBROUTINE sogga
!
!
!-------------------------------------------------------------------------
SUBROUTINE pbexgau( rho, grho, sxsr, v1xsr, v2xsr, alpha_gau ) !<GPU:DEVICE>
SUBROUTINE pbexgau( rho, grho, sxsr, v1xsr, v2xsr, alpha_gau )
!$acc routine (pbexgau) seq
!-----------------------------------------------------------------------
!
USE kind_l, ONLY: DP
!
IMPLICIT NONE
!
REAL(DP), INTENT(IN) :: alpha_gau !<GPU:VALUE>
REAL(DP), INTENT(IN) :: alpha_gau
REAL(DP), INTENT(IN) :: rho, grho
REAL(DP), INTENT(OUT) :: sxsr, v1xsr, v2xsr
!
@ -858,7 +870,7 @@ SUBROUTINE pbexgau( rho, grho, sxsr, v1xsr, v2xsr, alpha_gau )
IF (s > 10.D0) THEN
s = 10.D0
ENDIF
CALL pbe_gauscheme( rho, s, alpha_gau, fx, d1x, d2x ) !<GPU:pbe_gauscheme=>pbe_gauscheme_d>
CALL pbe_gauscheme( rho, s, alpha_gau, fx, d1x, d2x )
sxsr = ex*fx ! - EX
dsdn = -4.D0/3.D0*s/rho
v1xsr = vx*fx + (dsdn*d2x+d1x)*ex ! - VX
@ -874,7 +886,8 @@ SUBROUTINE pbexgau( rho, grho, sxsr, v1xsr, v2xsr, alpha_gau )
END SUBROUTINE pbexgau
!
!-----------------------------------------------------------------------
SUBROUTINE pbe_gauscheme( rho, s, alpha_gau, Fx, dFxdr, dFxds ) !<GPU:DEVICE>
SUBROUTINE pbe_gauscheme( rho, s, alpha_gau, Fx, dFxdr, dFxds )
!$acc routine (pbe_gauscheme) seq
!--------------------------------------------------------------------
!
IMPLICIT NONE
@ -917,7 +930,7 @@ SUBROUTINE pbe_gauscheme( rho, s, alpha_gau, Fx, dFxdr, dFxds )
!
! cx = exp(-One/Four/bx/bx) - One
IF (ABS(One/bx/bx) < 1.0D-4) THEN
cx = TayExp(-One/bx/bx) !<GPU:TayExp=>TayExp_d>
cx = TayExp(-One/bx/bx)
ELSE
cx = EXP(-One/bx/bx) - One
ENDIF
@ -963,12 +976,13 @@ END SUBROUTINE pbe_gauscheme
!
!
!-------------------------------------------------
FUNCTION TayExp(X) !<GPU:DEVICE>
FUNCTION TayExp(X)
!$acc routine (TayExp) seq
!-------------------------------------------
USE kind_l, ONLY: DP
IMPLICIT NONE
REAL(DP), INTENT(IN) :: X
REAL(DP) :: TAYEXP !<GPU:TAYEXP=>TAYEXP_d>
REAL(DP) :: TAYEXP
INTEGER :: NTERM,I
REAL(DP) :: SUMVAL,IVAL,COEF
PARAMETER (NTERM=16)
@ -981,7 +995,7 @@ FUNCTION TayExp(X) !<GPU:DEVICE>
IVAL = IVAL * (X / COEF)
SUMVAL = SUMVAL + IVAL
10 CONTINUE
TAYEXP = SUMVAL !<GPU:TAYEXP=>TAYEXP_d>
TAYEXP = SUMVAL
!
RETURN
!
@ -990,7 +1004,8 @@ END FUNCTION TayExp
!
!
!-------------------------------------------------------------------------
SUBROUTINE PW86( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE PW86( rho, grho, sx, v1x, v2x )
!$acc routine (PW86) seq
!-----------------------------------------------------------------------
!! Perdew-Wang 1986 exchange gradient correction: PRB 33, 8800 (1986)
!
@ -1034,7 +1049,8 @@ END SUBROUTINE PW86
!
!
!-----------------------------------------------------------------------
SUBROUTINE becke86b( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE becke86b( rho, grho, sx, v1x, v2x )
!$acc routine (becke86b) seq
!-----------------------------------------------------------------------
!! Becke 1986 gradient correction to exchange
!! A.D. Becke, J. Chem. Phys. 85 (1986) 7184
@ -1074,7 +1090,8 @@ END SUBROUTINE becke86b
!
!
!---------------------------------------------------------------
SUBROUTINE b86b( rho, grho, iflag, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE b86b( rho, grho, iflag, sx, v1x, v2x )
!$acc routine (b86b) seq
!-------------------------------------------------------------
!! Becke exchange (without Slater exchange):
!! iflag=1: A. D. Becke, J. Chem. Phys. 85, 7184 (1986) (B86b)
@ -1088,7 +1105,7 @@ SUBROUTINE b86b( rho, grho, iflag, sx, v1x, v2x ) !<GPU:DEVIC
USE kind_l, ONLY : DP
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: iflag !<GPU:VALUE>
INTEGER, INTENT(IN) :: iflag
REAL(DP), INTENT(IN) :: rho, grho
REAL(DP), INTENT(OUT) :: sx, v1x, v2x
!
@ -1144,7 +1161,8 @@ END SUBROUTINE b86b
!
!
!-----------------------------------------------------------------------
SUBROUTINE cx13( rho, grho, sx, v1x, v2x ) !<GPU:DEVICE>
SUBROUTINE cx13( rho, grho, sx, v1x, v2x )
!$acc routine (cx13) seq
!-----------------------------------------------------------------------
!! The new exchange partner for a vdW-DF1-cx suggested
!! by K. Berland and P. Hyldgaard, see PRB 89, 035412 (2014),
@ -1201,7 +1219,8 @@ END SUBROUTINE cx13
! ===========> SPIN <===========
!
!-----------------------------------------------------------------------
SUBROUTINE becke88_spin( rho_up, rho_dw, grho_up, grho_dw, sx_up, sx_dw, v1x_up, v1x_dw, v2x_up, v2x_dw ) !<GPU:DEVICE>
SUBROUTINE becke88_spin( rho_up, rho_dw, grho_up, grho_dw, sx_up, sx_dw, v1x_up, v1x_dw, v2x_up, v2x_dw )
!$acc routine (becke88_spin) seq
!-----------------------------------------------------------------------
!! Becke exchange: A.D. Becke, PRA 38, 3098 (1988) - Spin polarized case
!
@ -1261,7 +1280,8 @@ END SUBROUTINE becke88_spin
!
!
!-----------------------------------------------------------------------------
SUBROUTINE wpbe_analy_erfc_approx_grad( rho, s, omega, Fx_wpbe, d1rfx, d1sfx ) !<GPU:DEVICE>
SUBROUTINE wpbe_analy_erfc_approx_grad( rho, s, omega, Fx_wpbe, d1rfx, d1sfx )
!$acc routine (wpbe_analy_erfc_approx_grad) seq
!-----------------------------------------------------------------------
!
! wPBE Enhancement Factor (erfc approx.,analytical, gradients)
@ -1653,8 +1673,7 @@ SUBROUTINE wpbe_analy_erfc_approx_grad( rho, s, omega, Fx_wpbe, d1rfx, d1sfx )
!
piexperf = pi*EXP(HsbwA94)*ERFC(HsbwA9412)
! expei = Exp(HsbwA94)*Ei(-HsbwA94)
expei = EXP(HsbwA94)*(-expint(1,HsbwA94)) !<GPU:expint=>expint_d>
expei = EXP(HsbwA94)*(-expint(1,HsbwA94))
ELSE
!
! print *,rho,s," LARGE HsbwA94"
@ -1860,7 +1879,8 @@ SUBROUTINE wpbe_analy_erfc_approx_grad( rho, s, omega, Fx_wpbe, d1rfx, d1sfx )
END SUBROUTINE wpbe_analy_erfc_approx_grad
!
!------------------------------------------------------------------
FUNCTION EXPINT(n, x) !<GPU:DEVICE>
FUNCTION EXPINT(n, x)
!$acc routine (expint) seq
!-----------------------------------------------------------------------
! Evaluates the exponential integral E_n(x)
! Parameters: maxit is the maximum allowed number of iterations,
@ -1872,7 +1892,7 @@ FUNCTION EXPINT(n, x) !<GPU:DEVICE>
IMPLICIT NONE
INTEGER, INTENT(IN) :: n
REAL(DP), INTENT(IN) :: x
REAL(DP) :: expint !<GPU:expint=>expint_d>
REAL(DP) :: expint
INTEGER, parameter :: maxit=200
REAL(DP), parameter :: eps=1E-12, big=huge(x)*eps
REAL(DP), parameter :: euler = 0.577215664901532860606512d0
@ -1887,12 +1907,12 @@ FUNCTION EXPINT(n, x) !<GPU:DEVICE>
END IF
IF (n == 0) THEN
expint = exp(-x)/x !<GPU:expint=>expint_d>
expint = exp(-x)/x
RETURN
END IF
nm1 = n-1
IF (x == 0.0d0) THEN
expint = 1.0d0/nm1 !<GPU:expint=>expint_d>
expint = 1.0d0/nm1
ELSE IF (x > 1.0d0) THEN
b = x+n
c = big
@ -1908,12 +1928,12 @@ FUNCTION EXPINT(n, x) !<GPU:DEVICE>
IF (ABS(del-1.0d0) <= EPS) EXIT
END DO
IF (i > maxit) STOP !CALL xclib_error('expint','continued fraction failed',1)
expint = h*EXP(-x) !<GPU:expint=>expint_d>
expint = h*EXP(-x)
ELSE
IF (nm1 /= 0) THEN
expint = 1.0d0/nm1 !<GPU:expint=>expint_d>
expint = 1.0d0/nm1
ELSE
expint = -LOG(x)-euler !<GPU:expint=>expint_d>
expint = -LOG(x)-euler
END IF
fact = 1.0d0
do i=1,maxit
@ -1930,8 +1950,8 @@ FUNCTION EXPINT(n, x) !<GPU:DEVICE>
del = fact*(-LOG(x)-euler+iarsum)
! del = fact*(-LOG(x)-euler+sum(1.0d0/arth(1,1,nm1)))
END IF
expint = expint + del !<GPU:expint=>expint_d>
IF (ABS(del) < ABS(expint)*eps) EXIT !<GPU:expint=>expint_d>
expint = expint + del
IF (ABS(del) < ABS(expint)*eps) EXIT
END DO
IF (i > maxit) STOP !CALL xclib_error('expint','series failed',1)
END IF

View File

@ -6,14 +6,15 @@
! or http://www.gnu.org/copyleft/gpl.txt .
!
!----------------------------------------------------------------------
MODULE exch_lda !<GPU:exch_lda=>exch_lda_gpu>
MODULE exch_lda
!----------------------------------------------------------------------
!! LDA exchange functionals.
!
CONTAINS
!
!-----------------------------------------------------------------------
SUBROUTINE slater( rs, ex, vx ) !<GPU:DEVICE>
SUBROUTINE slater( rs, ex, vx )
!$acc routine (slater) seq
!---------------------------------------------------------------------
!! Slater exchange with alpha=2/3
!
@ -41,7 +42,8 @@ END SUBROUTINE slater
!
!
!-----------------------------------------------------------------------
SUBROUTINE slater1( rs, ex, vx ) !<GPU:DEVICE>
SUBROUTINE slater1( rs, ex, vx )
!$acc routine (slater1) seq
!---------------------------------------------------------------------
!! Slater exchange with alpha=1, corresponding to -1.374/r_s Ry.
!! Used to recover old results.
@ -70,7 +72,8 @@ END SUBROUTINE slater1
!
!
!-----------------------------------------------------------------------
SUBROUTINE slater_rxc( rs, ex, vx ) !<GPU:DEVICE>
SUBROUTINE slater_rxc( rs, ex, vx )
!$acc routine (slater_rxc) seq
!---------------------------------------------------------------------
!! Slater exchange with alpha=2/3 and Relativistic exchange.
!
@ -123,7 +126,8 @@ END SUBROUTINE slater_rxc
!
!
!-----------------------------------------------------------------------
SUBROUTINE slaterKZK( rs, ex, vx, vol ) !<GPU:DEVICE>
SUBROUTINE slaterKZK( rs, ex, vx, vol )
!$acc routine (slaterKZK) seq
!---------------------------------------------------------------------
!! Slater exchange with alpha=2/3, Kwee, Zhang and Krakauer KE
!! correction.
@ -138,7 +142,7 @@ SUBROUTINE slaterKZK( rs, ex, vx, vol ) !<GPU:DEVICE>
!! Exchange energy (per unit volume)
REAL(DP), INTENT(OUT) :: vx
!! Exchange potential
REAL(DP) :: vol !<GPU:VALUE>
REAL(DP) :: vol
!! Finite size volume element
!
! ... local variables
@ -177,7 +181,8 @@ END SUBROUTINE slaterKZK
! ... LSDA
!
!-----------------------------------------------------------------------
SUBROUTINE slater_spin( rho, zeta, ex, vx_up, vx_dw ) !<GPU:DEVICE>
SUBROUTINE slater_spin( rho, zeta, ex, vx_up, vx_dw )
!$acc routine (slater_spin) seq
!-----------------------------------------------------------------------
!! Slater exchange with alpha=2/3, spin-polarized case.
!
@ -218,7 +223,8 @@ END SUBROUTINE slater_spin
!
!
!-----------------------------------------------------------------------
SUBROUTINE slater_rxc_spin( rho, z, ex, vx_up, vx_dw ) !<GPU:DEVICE>
SUBROUTINE slater_rxc_spin( rho, z, ex, vx_up, vx_dw )
!$acc routine (slater_rxc_spin) seq
!-----------------------------------------------------------------------
!! Slater exchange with alpha=2/3, relativistic exchange case.
!! Spin-polarized case.
@ -284,7 +290,8 @@ END SUBROUTINE slater_rxc_spin
!
!
!-----------------------------------------------------------------------
SUBROUTINE slater1_spin( rho, zeta, ex, vx_up, vx_dw ) !<GPU:DEVICE>
SUBROUTINE slater1_spin( rho, zeta, ex, vx_up, vx_dw )
!$acc routine (slater1_spin) seq
!-----------------------------------------------------------------------
!! Slater exchange with alpha=2/3, spin-polarized case
!