mirror of https://gitlab.com/QEF/q-e.git
Merge branch 'Vxc_on_gpu2' into 'develop'
Vxc on gpu See merge request QEF/q-e!1630
This commit is contained in:
commit
5f9ebda198
|
@ -306,11 +306,18 @@ endif(QE_ENABLE_OPENMP)
|
|||
# OpenACC
|
||||
# The following targets will be defined:
|
||||
add_library(qe_openacc_fortran INTERFACE)
|
||||
qe_install_targets(qe_openacc_fortran)
|
||||
add_library(qe_openacc_c INTERFACE)
|
||||
qe_install_targets(qe_openacc_fortran qe_openacc_c)
|
||||
###########################################################
|
||||
if(QE_ENABLE_OPENACC)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.16.0)
|
||||
message(FATAL_ERROR "CMake versions >= 3.16 required for QE_ENABLE_OPENACC=ON!")
|
||||
endif()
|
||||
find_package(OpenACC REQUIRED Fortran C)
|
||||
target_link_libraries(qe_openacc_fortran
|
||||
INTERFACE OpenACC::OpenACC_Fortran)
|
||||
target_link_libraries(qe_openacc_c
|
||||
INTERFACE OpenACC::OpenACC_C)
|
||||
endif(QE_ENABLE_OPENACC)
|
||||
|
||||
###########################################################
|
||||
|
|
|
@ -373,7 +373,7 @@
|
|||
subroutine exch_corr_cp(nnr,nspin,grhor,rhor,etxc)
|
||||
use kinds, only: DP
|
||||
use xc_lib, only: xclib_dft_is, xclib_dft_is_libxc, xclib_get_id, &
|
||||
xclib_set_threshold, xc_gcx
|
||||
xclib_set_threshold, xc, xc_gcx
|
||||
implicit none
|
||||
integer, intent(in) :: nnr
|
||||
integer, intent(in) :: nspin
|
||||
|
|
|
@ -11,7 +11,7 @@ SUBROUTINE tpssmeta(nnr, nspin,grho,rho,kedtau,etxc)
|
|||
! ===================
|
||||
!--------------------------------------------------------------------
|
||||
use kinds, only: dp
|
||||
use xc_lib, only: xclib_set_threshold
|
||||
use xc_lib, only: xclib_set_threshold, xc_metagcx
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
|
|
@ -23,7 +23,7 @@ MODULE fft_helper_subroutines
|
|||
#endif
|
||||
END INTERFACE
|
||||
PRIVATE
|
||||
PUBLIC :: fftx_threed2oned, fftx_oned2threed
|
||||
PUBLIC :: fftx_threed2oned, fftx_threed2oned_gpu, fftx_oned2threed
|
||||
PUBLIC :: tg_reduce_rho
|
||||
PUBLIC :: tg_get_nnr, tg_get_recip_inc, fftx_ntgrp, fftx_tgpe, &
|
||||
tg_get_group_nr3
|
||||
|
@ -469,6 +469,8 @@ CONTAINS
|
|||
END SUBROUTINE
|
||||
|
||||
SUBROUTINE fftx_threed2oned( desc, vin, vout1, vout2 )
|
||||
!! Copy charge density from 3D array to 1D array in Fourier
|
||||
!! space.
|
||||
USE fft_param
|
||||
USE fft_types, ONLY : fft_type_descriptor
|
||||
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
||||
|
@ -490,7 +492,40 @@ CONTAINS
|
|||
END DO
|
||||
END IF
|
||||
END SUBROUTINE
|
||||
|
||||
|
||||
SUBROUTINE fftx_threed2oned_gpu( desc, vin_d, vout1_d, vout2_d )
|
||||
!! GPU version of \(\texttt{fftx_threed2oned}\).
|
||||
USE fft_param
|
||||
USE fft_types, ONLY : fft_type_descriptor
|
||||
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
||||
complex(DP), INTENT(OUT) :: vout1_d(:)
|
||||
complex(DP), OPTIONAL, INTENT(OUT) :: vout2_d(:)
|
||||
complex(DP), INTENT(IN) :: vin_d(:)
|
||||
INTEGER, POINTER :: nl_d(:), nlm_d(:)
|
||||
COMPLEX(DP) :: fp, fm
|
||||
INTEGER :: ig
|
||||
#if defined(__CUDA) && defined(_OPENACC)
|
||||
attributes(DEVICE) :: nl_d, nlm_d
|
||||
!$acc data deviceptr( vout1_d(:), vout2_d(:), vin_d(:) )
|
||||
nl_d => desc%nl_d
|
||||
nlm_d => desc%nlm_d
|
||||
IF( PRESENT( vout2_d ) ) THEN
|
||||
!$acc parallel loop
|
||||
DO ig=1,desc%ngm
|
||||
fp=vin_d(nl_d(ig))+vin_d(nlm_d(ig))
|
||||
fm=vin_d(nl_d(ig))-vin_d(nlm_d(ig))
|
||||
vout1_d(ig) = CMPLX(0.5d0,0.d0,kind=DP)*CMPLX( DBLE(fp),AIMAG(fm),kind=DP)
|
||||
vout2_d(ig) = CMPLX(0.5d0,0.d0,kind=DP)*CMPLX(AIMAG(fp),-DBLE(fm),kind=DP)
|
||||
END DO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO ig=1,desc%ngm
|
||||
vout1_d(ig) = vin_d(nl_d(ig))
|
||||
END DO
|
||||
END IF
|
||||
!$acc end data
|
||||
#endif
|
||||
END SUBROUTINE
|
||||
|
||||
SUBROUTINE fftx_psi2c_gamma( desc, vin, vout1, vout2 )
|
||||
USE fft_param
|
||||
|
|
|
@ -16,7 +16,7 @@ MODULE fft_rho
|
|||
!
|
||||
IMPLICIT NONE
|
||||
PRIVATE
|
||||
PUBLIC :: rho_r2g, rho_g2r
|
||||
PUBLIC :: rho_r2g, rho_g2r, rho_r2g_gpu
|
||||
!
|
||||
INTERFACE rho_g2r
|
||||
MODULE PROCEDURE rho_g2r_1, rho_g2r_2, rho_g2r_sum_components
|
||||
|
@ -24,7 +24,10 @@ MODULE fft_rho
|
|||
!
|
||||
CONTAINS
|
||||
!
|
||||
!-------------------------------------------------------------
|
||||
SUBROUTINE rho_r2g ( desc, rhor, rhog, v )
|
||||
!------------------------------------------------------
|
||||
!! Bring charge density rho from real to G- space
|
||||
USE fft_types, ONLY: fft_type_descriptor
|
||||
USE fft_helper_subroutines, ONLY: fftx_threed2oned
|
||||
!
|
||||
|
@ -93,6 +96,97 @@ CONTAINS
|
|||
|
||||
END SUBROUTINE rho_r2g
|
||||
!
|
||||
!-----------------------------------------------------------------
|
||||
SUBROUTINE rho_r2g_gpu( desc, rhor_d, rhog_d, v_d )
|
||||
!---------------------------------------------------------------
|
||||
!! Bring charge density rho from real to G- space - GPU version.
|
||||
!
|
||||
USE fft_types, ONLY: fft_type_descriptor
|
||||
USE fft_helper_subroutines, ONLY: fftx_threed2oned_gpu
|
||||
!
|
||||
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
||||
REAL(dp), INTENT(in) :: rhor_d(:,:)
|
||||
!! rho in real space
|
||||
COMPLEX(dp), INTENT(OUT):: rhog_d(:,:)
|
||||
!! rho in G-space
|
||||
REAL(dp), OPTIONAL, INTENT(in) :: v_d(:)
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: ir, ig, iss, isup, isdw
|
||||
INTEGER :: nspin
|
||||
COMPLEX(dp):: fp, fm
|
||||
COMPLEX(dp), ALLOCATABLE :: psi_d(:)
|
||||
!
|
||||
!$acc data deviceptr( rhor_d(:,:), rhog_d(:,:), v_d(:) )
|
||||
!
|
||||
nspin= SIZE(rhor_d, 2)
|
||||
!
|
||||
ALLOCATE( psi_d(desc%nnr) )
|
||||
!$acc data create( psi_d(desc%nnr) )
|
||||
!$acc host_data use_device( psi_d )
|
||||
IF( nspin == 1 ) THEN
|
||||
iss=1
|
||||
IF( PRESENT( v_d ) ) THEN
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,iss)+v_d(ir),0.0_dp,kind=dp)
|
||||
END DO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,iss),0.0_dp,kind=dp)
|
||||
END DO
|
||||
END IF
|
||||
CALL fwfft('Rho', psi_d, desc )
|
||||
CALL fftx_threed2oned_gpu( desc, psi_d, rhog_d(:,iss) )
|
||||
ELSE
|
||||
IF ( gamma_only ) THEN
|
||||
! nspin/2 = 1 for LSDA, = 2 for noncolinear
|
||||
DO iss=1,nspin/2
|
||||
isup=1+(iss-1)*nspin/2 ! 1 for LSDA, 1 and 3 for noncolinear
|
||||
isdw=2+(iss-1)*nspin/2 ! 2 for LSDA, 2 and 4 for noncolinear
|
||||
IF( PRESENT( v_d ) ) THEN
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,isup)+v_d(ir),rhor_d(ir,isdw)+v_d(ir),kind=dp)
|
||||
END DO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,isup),rhor_d(ir,isdw),kind=dp)
|
||||
END DO
|
||||
END IF
|
||||
CALL fwfft('Rho', psi_d, desc )
|
||||
CALL fftx_threed2oned_gpu( desc, psi_d, rhog_d(:,isup), rhog_d(:,isdw) )
|
||||
END DO
|
||||
ELSE
|
||||
DO iss=1,nspin
|
||||
IF( PRESENT( v_d ) ) THEN
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,iss)+v_d(ir),0.0_dp,kind=dp)
|
||||
END DO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,iss),0.0_dp,kind=dp)
|
||||
END DO
|
||||
END IF
|
||||
CALL fwfft('Rho', psi_d, desc )
|
||||
CALL fftx_threed2oned_gpu( desc, psi_d, rhog_d(:,iss) )
|
||||
END DO
|
||||
END IF
|
||||
ENDIF
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( psi_d )
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
END SUBROUTINE rho_r2g_gpu
|
||||
!
|
||||
!
|
||||
SUBROUTINE rho_g2r_1 ( desc, rhog, rhor )
|
||||
USE fft_types, ONLY: fft_type_descriptor
|
||||
USE fft_helper_subroutines, ONLY: fftx_threed2oned, fftx_oned2threed
|
||||
|
|
|
@ -264,7 +264,152 @@ SUBROUTINE fft_gradient_g2r( dfft, a, g, ga )
|
|||
RETURN
|
||||
!
|
||||
END SUBROUTINE fft_gradient_g2r
|
||||
|
||||
!
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
SUBROUTINE fft_gradient_g2r_gpu( dfft, a_d, g_d, ga_d )
|
||||
!----------------------------------------------------------------------------
|
||||
!! GPU double of \(\texttt{fft_gradient_g2r}\).
|
||||
!
|
||||
USE cell_base, ONLY : tpiba
|
||||
USE kinds, ONLY : DP
|
||||
USE fft_interfaces, ONLY : invfft
|
||||
USE fft_types, ONLY : fft_type_descriptor
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
TYPE(fft_type_descriptor),INTENT(IN) :: dfft
|
||||
!! FFT descriptor
|
||||
COMPLEX(DP), INTENT(IN) :: a_d(dfft%ngm)
|
||||
!! a(G), a complex function in G-space
|
||||
REAL(DP), INTENT(IN) :: g_d(3,dfft%ngm)
|
||||
!! G-vectors, in \( 2\pi/a \) units
|
||||
REAL(DP), INTENT(OUT) :: ga_d(3,dfft%nnr)
|
||||
!! The gradient of \({\bf a}\), real, on the real-space FFT grid
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: ipol, n, ip
|
||||
INTEGER, POINTER :: nl_d(:), nlm_d(:)
|
||||
COMPLEX(DP), ALLOCATABLE :: gaux_d(:)
|
||||
!
|
||||
#if defined(__CUDA) && defined(_OPENACC)
|
||||
attributes(DEVICE) :: g_d, nl_d, nlm_d
|
||||
!
|
||||
!$acc data deviceptr( a_d(dfft%ngm), ga_d(3,dfft%nnr) )
|
||||
!
|
||||
nl_d => dfft%nl_d
|
||||
nlm_d => dfft%nlm_d
|
||||
!
|
||||
ALLOCATE( gaux_d(dfft%nnr) )
|
||||
!$acc data create( gaux_d )
|
||||
!$acc host_data use_device( gaux_d )
|
||||
!
|
||||
IF ( dfft%lgamma ) THEN
|
||||
!
|
||||
! ... Gamma tricks: perform 2 FFT's in a single shot
|
||||
! x and y
|
||||
ipol = 1
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
DO ip = 1, 3
|
||||
ga_d(ip,n) = 0.D0
|
||||
ENDDO
|
||||
gaux_d(n) = (0.0_dp,0.0_dp)
|
||||
ENDDO
|
||||
!
|
||||
! ... multiply a(G) by iG to get the gradient in real space
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%ngm
|
||||
gaux_d(nl_d(n) ) = CMPLX( 0.0_dp, g_d(ipol,n),kind=DP)* a_d(n) - &
|
||||
CMPLX(g_d(ipol+1,n),kind=DP) * a_d(n)
|
||||
gaux_d(nlm_d(n)) = CMPLX( 0.0_dp,-g_d(ipol,n),kind=DP)*CONJG(a_d(n)) + &
|
||||
CMPLX(g_d(ipol+1,n),kind=DP) * CONJG(a_d(n))
|
||||
ENDDO
|
||||
!
|
||||
! ... bring back to R-space, (\grad_ipol a)(r) ...
|
||||
!
|
||||
CALL invfft( 'Rho', gaux_d, dfft )
|
||||
!
|
||||
! ... bring back to R-space, (\grad_ipol a)(r)
|
||||
! ... add the factor 2\pi/a missing in the definition of q+G
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
ga_d(ipol, n) = REAL( gaux_d(n), kind=DP ) * tpiba
|
||||
ga_d(ipol+1, n) = AIMAG( gaux_d(n) ) * tpiba
|
||||
ENDDO
|
||||
! ... for z
|
||||
ipol = 3
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
gaux_d(n) = (0.0_dp,0.0_dp)
|
||||
ENDDO
|
||||
!
|
||||
! ... multiply a(G) by iG to get the gradient in real space
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%ngm
|
||||
gaux_d(nl_d(n)) = CMPLX(g_d(ipol,n),kind=DP) * CMPLX( -AIMAG(a_d(n)), REAL(a_d(n)),kind=DP)
|
||||
gaux_d(nlm_d(n)) = CONJG( gaux_d(nl_d(n)) )
|
||||
ENDDO
|
||||
!
|
||||
! ... bring back to R-space, (\grad_ipol a)(r) ...
|
||||
!
|
||||
CALL invfft( 'Rho', gaux_d, dfft )
|
||||
!
|
||||
! ...and add the factor 2\pi/a missing in the definition of G
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
ga_d(ipol,n) = tpiba * REAL( gaux_d(n), kind=DP )
|
||||
ENDDO
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
DO ipol = 1, 3
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
ga_d(ipol,n) = 0.D0
|
||||
gaux_d(n) = (0.0_dp,0.0_dp)
|
||||
ENDDO
|
||||
!
|
||||
! ... multiply a(G) by iG to get the gradient in real space
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%ngm
|
||||
gaux_d(nl_d(n)) = CMPLX(g_d(ipol,n), kind=DP) * CMPLX( -AIMAG(a_d(n)), REAL(a_d(n)), kind=DP)
|
||||
ENDDO
|
||||
!
|
||||
! ... bring back to R-space, (\grad_ipol a)(r) ...
|
||||
!
|
||||
CALL invfft( 'Rho', gaux_d, dfft )
|
||||
!
|
||||
! ...and add the factor 2\pi/a missing in the definition of G
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
ga_d(ipol,n) = tpiba * REAL( gaux_d(n), kind=DP )
|
||||
ENDDO
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( gaux_d )
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
#endif
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE fft_gradient_g2r_gpu
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
SUBROUTINE fft_graddot( dfft, a, g, da )
|
||||
!----------------------------------------------------------------------------
|
||||
|
@ -373,7 +518,152 @@ SUBROUTINE fft_graddot( dfft, a, g, da )
|
|||
RETURN
|
||||
!
|
||||
END SUBROUTINE fft_graddot
|
||||
|
||||
!
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
SUBROUTINE fft_graddot_gpu( dfft, a_d, g_d, da_d )
|
||||
!---------------------------------------------------------------------------
|
||||
!! Calculates \( da = \sum_i \nabla_i a_i \) in R-space.
|
||||
!
|
||||
USE cell_base, ONLY : tpiba
|
||||
USE kinds, ONLY : DP
|
||||
USE fft_interfaces, ONLY : fwfft, invfft
|
||||
USE fft_types, ONLY : fft_type_descriptor
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
TYPE(fft_type_descriptor),INTENT(IN) :: dfft
|
||||
!! FFT descriptor
|
||||
REAL(DP), INTENT(IN) :: a_d(3,dfft%nnr)
|
||||
!! A real function on the real-space FFT grid
|
||||
REAL(DP), INTENT(IN) :: g_d(3,dfft%ngm)
|
||||
!! G-vectors, in \( 2\pi/a \) units
|
||||
REAL(DP), INTENT(OUT) :: da_d(dfft%nnr)
|
||||
!! \( \sum_i \nabla_i a_i \), real, on the real-space FFT grid
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: n, ipol
|
||||
INTEGER, POINTER :: nl_d(:), nlm_d(:)
|
||||
COMPLEX(DP), ALLOCATABLE :: aux_d(:), gaux_d(:)
|
||||
COMPLEX(DP) :: fp, fm, aux1, aux2
|
||||
!
|
||||
#if defined(__CUDA) && defined(_OPENACC)
|
||||
attributes(DEVICE) :: g_d, nl_d, nlm_d
|
||||
!
|
||||
!$acc data deviceptr( a_d(3,dfft%ngm), da_d(dfft%nnr) )
|
||||
!
|
||||
nl_d => dfft%nl_d
|
||||
nlm_d => dfft%nlm_d
|
||||
!
|
||||
ALLOCATE( aux_d(dfft%nnr), gaux_d(dfft%nnr) )
|
||||
!$acc data create( aux_d, gaux_d )
|
||||
!$acc host_data use_device( aux_d, gaux_d )
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
gaux_d(n) = (0.0_dp,0.0_dp)
|
||||
ENDDO
|
||||
!
|
||||
IF ( dfft%lgamma ) THEN
|
||||
!
|
||||
! Gamma tricks: perform 2 FFT's in a single shot
|
||||
! x and y
|
||||
ipol = 1
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
aux_d(n) = CMPLX( a_d(ipol,n), a_d(ipol+1,n), kind=DP)
|
||||
ENDDO
|
||||
!
|
||||
! ... bring a(ipol,r) to G-space, a(G) ...
|
||||
!
|
||||
CALL fwfft( 'Rho', aux_d, dfft )
|
||||
!
|
||||
! ... multiply by iG to get the gradient in G-space
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%ngm
|
||||
fp = (aux_d(nl_d(n)) + aux_d(nlm_d(n)))*0.5_dp
|
||||
fm = (aux_d(nl_d(n)) - aux_d(nlm_d(n)))*0.5_dp
|
||||
aux1 = CMPLX( REAL(fp), AIMAG(fm), kind=DP)
|
||||
aux2 = CMPLX(AIMAG(fp), -REAL(fm), kind=DP)
|
||||
gaux_d(nl_d(n)) = &
|
||||
CMPLX(0.0_dp, g_d(ipol ,n),kind=DP) * aux1 + &
|
||||
CMPLX(0.0_dp, g_d(ipol+1,n),kind=DP) * aux2
|
||||
ENDDO
|
||||
! ... for z
|
||||
ipol = 3
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
aux_d(n) = CMPLX( a_d(ipol,n), 0.0_dp, kind=DP)
|
||||
ENDDO
|
||||
!
|
||||
! ... bring a(ipol,r) to G-space, a(G) ...
|
||||
!
|
||||
CALL fwfft( 'Rho', aux_d, dfft )
|
||||
!
|
||||
! ... multiply by iG to get the gradient in G-space
|
||||
! ... fill both gaux(G) and gaux(-G) = gaux*(G)
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%ngm
|
||||
gaux_d(nl_d(n)) = gaux_d(nl_d(n)) + CMPLX(g_d(ipol,n),kind=DP) * &
|
||||
CMPLX( -AIMAG( aux_d(nl_d(n)) ), &
|
||||
REAL( aux_d(nl_d(n)) ), kind=DP)
|
||||
gaux_d(nlm_d(n)) = CONJG( gaux_d(nl_d(n)) )
|
||||
ENDDO
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
DO ipol = 1, 3
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
aux_d(n) = CMPLX( a_d(ipol,n), 0.0_dp, kind=DP)
|
||||
ENDDO
|
||||
!
|
||||
! ... bring a(ipol,r) to G-space, a(G) ...
|
||||
!
|
||||
CALL fwfft( 'Rho', aux_d, dfft )
|
||||
!
|
||||
! ... multiply by iG to get the gradient in G-space
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%ngm
|
||||
gaux_d(nl_d(n)) = gaux_d(nl_d(n)) + CMPLX(g_d(ipol,n),kind=DP) * &
|
||||
CMPLX( -AIMAG( aux_d(nl_d(n)) ), &
|
||||
REAL( aux_d(nl_d(n)),kind=DP ), kind=DP)
|
||||
ENDDO
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
! ... bring back to R-space, (\grad_ipol a)(r) ...
|
||||
!
|
||||
CALL invfft( 'Rho', gaux_d, dfft )
|
||||
!
|
||||
! ... add the factor 2\pi/a missing in the definition of G and sum
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO n = 1, dfft%nnr
|
||||
da_d(n) = tpiba * REAL( gaux_d(n), kind=DP )
|
||||
ENDDO
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( aux_d, gaux_d )
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
#endif
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE fft_graddot_gpu
|
||||
!
|
||||
!
|
||||
!--------------------------------------------------------------------
|
||||
SUBROUTINE fft_qgraddot ( dfft, a, xq, g, da)
|
||||
!--------------------------------------------------------------------
|
||||
|
|
|
@ -55,7 +55,7 @@ PROGRAM do_ppacf
|
|||
USE funct, ONLY : dft_is_nonlocc, nlc, enforce_input_dft
|
||||
USE xc_lib, ONLY : xclib_get_id, xclib_set_auxiliary_flags, &
|
||||
xclib_set_exx_fraction, xclib_dft_is_libxc, &
|
||||
xclib_set_threshold, xc_gcx
|
||||
xclib_set_threshold, xc, xc_gcx
|
||||
USE wvfct, ONLY : npw, npwx
|
||||
USE environment, ONLY : environment_start, environment_end
|
||||
USE vdW_DF, ONLY : Nqs, vdW_DF_potential, vdW_DF_energy, vdW_DF_analysis
|
||||
|
|
|
@ -53,3 +53,60 @@
|
|||
END IF
|
||||
RETURN
|
||||
END SUBROUTINE compute_rho
|
||||
!
|
||||
!
|
||||
!------------------------------------------------------------------------------
|
||||
SUBROUTINE compute_rho_gpu( rho_d, rhoout_d, segni_d, nrxx )
|
||||
!--------------------------------------------------------------------------
|
||||
!! GPU double of \(\texttt{compute_rho}\).
|
||||
!
|
||||
USE kinds, ONLY : DP
|
||||
USE noncollin_module, ONLY : lsign, ux
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER :: nrxx
|
||||
!! input: the dimension of the mesh
|
||||
REAL(DP), INTENT(IN) :: rho_d(nrxx,4)
|
||||
!! the four components of the charge
|
||||
REAL(DP), INTENT(OUT) :: rhoout_d(nrxx,2)
|
||||
!! the spin up and spin down charge
|
||||
REAL(DP), INTENT(OUT) :: segni_d(nrxx)
|
||||
!! the orientation when needed
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
REAL(DP) :: ux1, ux2, ux3, amag
|
||||
INTEGER :: ir ! counter on mesh points
|
||||
!
|
||||
!$acc data deviceptr( rho_d(nrxx,4), rhoout_d(nrxx,2), segni_d(nrxx) )
|
||||
!
|
||||
IF (lsign) THEN
|
||||
!
|
||||
ux1 = ux(1) ; ux2 = ux(2) ; ux3 = ux(3)
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO ir = 1, nrxx
|
||||
segni_d(ir) = SIGN(1.0_DP,rho_d(ir,2)*ux1+rho_d(ir,3)*ux2+rho_d(ir,4)*ux3)
|
||||
amag = SQRT(rho_d(ir,2)**2+rho_d(ir,3)**2+rho_d(ir,4)**2)
|
||||
rhoout_d(ir,1) = 0.5d0*(rho_d(ir,1)+segni_d(ir)*amag)
|
||||
rhoout_d(ir,2) = 0.5d0*(rho_d(ir,1)-segni_d(ir)*amag)
|
||||
ENDDO
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO ir = 1, nrxx
|
||||
segni_d(ir) = 1.0_DP
|
||||
amag = SQRT(rho_d(ir,2)**2+rho_d(ir,3)**2+rho_d(ir,4)**2)
|
||||
rhoout_d(ir,1) = 0.5d0*(rho_d(ir,1) + amag)
|
||||
rhoout_d(ir,2) = 0.5d0*(rho_d(ir,1) - amag)
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE compute_rho_gpu
|
||||
|
|
|
@ -13,14 +13,15 @@ SUBROUTINE gradcorr( rho, rhog, rho_core, rhog_core, etxc, vtxc, v )
|
|||
!
|
||||
USE constants, ONLY : e2
|
||||
USE kinds, ONLY : DP
|
||||
USE gvect, ONLY : ngm, g
|
||||
USE gvect, ONLY : ngm, g, g_d
|
||||
USE lsda_mod, ONLY : nspin
|
||||
USE cell_base, ONLY : omega
|
||||
USE xc_lib, ONLY : igcc_is_lyp, xclib_dft_is, xc_gcx
|
||||
USE noncollin_module, ONLY : domag
|
||||
USE fft_base, ONLY : dfftp
|
||||
USE fft_interfaces, ONLY : fwfft
|
||||
USE fft_rho, ONLY: rho_r2g
|
||||
USE fft_rho, ONLY : rho_r2g, rho_r2g_gpu
|
||||
USE control_flags, ONLY : use_gpu
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -39,15 +40,15 @@ SUBROUTINE gradcorr( rho, rhog, rho_core, rhog_core, etxc, vtxc, v )
|
|||
REAL(DP), ALLOCATABLE :: v1c(:,:), v2c(:,:), v2c_ud(:)
|
||||
REAL(DP) :: sx(dfftp%nnr), sc(dfftp%nnr)
|
||||
!
|
||||
REAL(DP) :: sgn(2), etxcgc, vtxcgc, fac, amag
|
||||
!
|
||||
REAL(DP) :: sgn_is, etxcgc, vtxcgc, fac, amag
|
||||
REAL(DP) :: grup, grdw
|
||||
!
|
||||
REAL(DP), PARAMETER :: epsr = 1.D-6, epsg = 1.D-10
|
||||
!
|
||||
!
|
||||
IF ( .NOT. xclib_dft_is('gradient') ) RETURN
|
||||
!
|
||||
!$acc data deviceptr( rho(dfftp%nnr,nspin), rho_core(dfftp%nnr), &
|
||||
!$acc& rhog(ngm,nspin), rhog_core(ngm), v(dfftp%nnr,nspin) )
|
||||
!
|
||||
etxcgc = 0.0_DP
|
||||
vtxcgc = 0.0_DP
|
||||
!
|
||||
|
@ -55,77 +56,111 @@ SUBROUTINE gradcorr( rho, rhog, rho_core, rhog_core, etxc, vtxc, v )
|
|||
IF ( nspin==4 ) nspin0 = 1
|
||||
IF ( nspin==4 .AND. domag ) nspin0 = 2
|
||||
fac = 1.0_DP / DBLE( nspin0 )
|
||||
sgn(1) = 1._DP ; sgn(2) = -1._DP
|
||||
!
|
||||
ALLOCATE( h(3,dfftp%nnr,nspin0) )
|
||||
ALLOCATE( grho(3,dfftp%nnr,nspin0) )
|
||||
ALLOCATE( rhoaux(dfftp%nnr,nspin0) )
|
||||
!
|
||||
ALLOCATE( v1x(dfftp%nnr,nspin0), v2x(dfftp%nnr,nspin0) )
|
||||
ALLOCATE( v1c(dfftp%nnr,nspin0), v2c(dfftp%nnr,nspin0) )
|
||||
!$acc data create( rhoaux, grho, sx, sc, v1x, v2x, v1c, v2c, h )
|
||||
!
|
||||
IF ( nspin==4 .AND. domag ) THEN
|
||||
ALLOCATE( vgg(dfftp%nnr,nspin0) )
|
||||
ALLOCATE( vsave(dfftp%nnr,nspin) )
|
||||
ALLOCATE( segni(dfftp%nnr) )
|
||||
vsave=v
|
||||
v=0._DP
|
||||
ENDIF
|
||||
!
|
||||
ALLOCATE( rhogaux( ngm, nspin0 ) )
|
||||
ALLOCATE( rhogaux(ngm,nspin0) )
|
||||
!$acc data create( rhogaux )
|
||||
!
|
||||
! ... calculate the gradient of rho + rho_core in real space
|
||||
!
|
||||
IF ( nspin == 4 .AND. domag ) THEN
|
||||
!
|
||||
CALL compute_rho( rho, rhoaux, segni, dfftp%nnr )
|
||||
!
|
||||
! ... bring starting rhoaux to G-space
|
||||
!
|
||||
CALL rho_r2g ( dfftp, rhoaux(:,1:nspin0), rhogaux(:,1:nspin0) )
|
||||
!
|
||||
ALLOCATE( vsave(dfftp%nnr,nspin) )
|
||||
ALLOCATE( segni(dfftp%nnr) )
|
||||
!$acc data copyout( segni, vsave )
|
||||
!
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, nspin
|
||||
DO ir = 1, dfftp%nnr
|
||||
vsave(ir,is) = v(ir,is)
|
||||
v(ir,is) = 0._DP
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
! ... bring starting rhoaux to G-space
|
||||
IF ( use_gpu ) THEN
|
||||
!$acc host_data use_device( rhoaux, rhogaux, segni )
|
||||
CALL compute_rho_gpu( rho, rhoaux, segni, dfftp%nnr )
|
||||
CALL rho_r2g_gpu( dfftp, rhoaux(:,1:nspin0), rhogaux(:,1:nspin0) )
|
||||
!$acc end host_data
|
||||
ELSE
|
||||
CALL compute_rho( rho, rhoaux, segni, dfftp%nnr )
|
||||
CALL rho_r2g( dfftp, rhoaux(:,1:nspin0), rhogaux(:,1:nspin0) )
|
||||
ENDIF
|
||||
!$acc end data
|
||||
ELSE
|
||||
!
|
||||
! ... for convenience rhoaux and rhogaux are in (up,down) format, when LSDA
|
||||
!
|
||||
DO is = 1, nspin0
|
||||
rhoaux(:,is) = ( rho(:,1) + sgn(is) * rho(:,nspin0) ) * 0.5_DP
|
||||
rhogaux(:,is) = ( rhog(:,1) + sgn(is) * rhog(:,nspin0) ) * 0.5_DP
|
||||
ENDDO
|
||||
!
|
||||
! ... for convenience rhoaux and rhogaux are in (up,down) format, when LSDA
|
||||
!
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, nspin0
|
||||
DO ir = 1, dfftp%nnr
|
||||
sgn_is = DBLE(3-2*is)
|
||||
rhoaux(ir,is) = ( rho(ir,1) + sgn_is * rho(ir,nspin0) ) * 0.5_DP
|
||||
ENDDO
|
||||
ENDDO
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, nspin0
|
||||
DO ir = 1, ngm
|
||||
sgn_is = DBLE(3-2*is)
|
||||
rhogaux(ir,is) = ( rhog(ir,1) + CMPLX(sgn_is) * rhog(ir,nspin0) ) &
|
||||
* (0.5_DP,0._DP)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, nspin0
|
||||
!
|
||||
rhoaux(:,is) = fac * rho_core(:) + rhoaux(:,is)
|
||||
rhogaux(:,is) = fac * rhog_core(:) + rhogaux(:,is)
|
||||
!
|
||||
CALL fft_gradient_g2r( dfftp, rhogaux(1,is), g, grho(1,1,is) )
|
||||
!
|
||||
DO ir = 1, dfftp%nnr
|
||||
rhoaux(ir,is) = fac * rho_core(ir) + rhoaux(ir,is)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, nspin0
|
||||
DO ir = 1, ngm
|
||||
rhogaux(ir,is) = CMPLX(fac,kind=DP) * rhog_core(ir) + rhogaux(ir,is)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
DEALLOCATE( rhogaux )
|
||||
DO is = 1, nspin0
|
||||
IF ( use_gpu ) THEN
|
||||
!$acc host_data use_device( rhogaux, grho )
|
||||
CALL fft_gradient_g2r_gpu( dfftp, rhogaux(:,is), g_d, grho(:,:,is) )
|
||||
!$acc end host_data
|
||||
ELSE
|
||||
CALL fft_gradient_g2r( dfftp, rhogaux(:,is), g, grho(:,:,is) )
|
||||
ENDIF
|
||||
ENDDO
|
||||
!
|
||||
!$acc end data
|
||||
DEALLOCATE( rhogaux )
|
||||
!
|
||||
IF ( nspin0 == 1 ) THEN
|
||||
!
|
||||
! ... This is the spin-unpolarised case
|
||||
!
|
||||
CALL xc_gcx( dfftp%nnr, nspin0, rhoaux, grho, sx, sc, v1x, v2x, v1c, v2c )
|
||||
!$acc host_data use_device( rhoaux, grho, sx, sc, v1x, v2x, v1c, v2c )
|
||||
CALL xc_gcx( dfftp%nnr, nspin0, rhoaux, grho, sx, sc, v1x, v2x, v1c, v2c, &
|
||||
gpu_args_=.TRUE. )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc parallel loop reduction(+:etxcgc) reduction(+:vtxcgc)
|
||||
DO k = 1, dfftp%nnr
|
||||
!
|
||||
! ... first term of the gradient correction : D(rho*Exc)/D(rho)
|
||||
v(k,1) = v(k,1) + e2 * ( v1x(k,1) + v1c(k,1) )
|
||||
!
|
||||
! ... h contains: D(rho*Exc) / D(|grad rho|) * (grad rho) / |grad rho|
|
||||
h(:,k,1) = e2 * ( v2x(k,1) + v2c(k,1) ) * grho(:,k,1)
|
||||
DO ipol = 1, 3
|
||||
h(ipol,k,1) = e2 * ( v2x(k,1) + v2c(k,1) ) * grho(ipol,k,1)
|
||||
ENDDO
|
||||
!
|
||||
vtxcgc = vtxcgc + e2 * ( v1x(k,1) + v1c(k,1) ) * &
|
||||
(rhoaux(k,1) - rho_core(k) )
|
||||
!
|
||||
( rhoaux(k,1) - rho_core(k) )
|
||||
etxcgc = etxcgc + e2 * ( sx(k) + sc(k) )
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ELSE
|
||||
|
@ -133,71 +168,84 @@ SUBROUTINE gradcorr( rho, rhog, rho_core, rhog_core, etxc, vtxc, v )
|
|||
! ... spin-polarised case
|
||||
!
|
||||
ALLOCATE( v2c_ud(dfftp%nnr) )
|
||||
!
|
||||
!
|
||||
CALL xc_gcx( dfftp%nnr, nspin0, rhoaux, grho, sx, sc, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
!$acc data create( v2c_ud )
|
||||
!
|
||||
!$acc host_data use_device( rhoaux, grho, sx, sc, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
CALL xc_gcx( dfftp%nnr, nspin0, rhoaux, grho, sx, sc, v1x, v2x, v1c, v2c, &
|
||||
v2c_ud, gpu_args_=.TRUE. )
|
||||
!$acc end host_data
|
||||
!
|
||||
! ... h contains D(rho*Exc)/D(|grad rho|) * (grad rho) / |grad rho|
|
||||
!
|
||||
!$acc parallel loop reduction(+:etxcgc) reduction(+:vtxcgc)
|
||||
DO k = 1, dfftp%nnr
|
||||
v(k,1:nspin0) = v(k,1:nspin0) + e2*( v1x(k,1:nspin0) + v1c(k,1:nspin0))
|
||||
!
|
||||
DO is = 1, nspin0
|
||||
v(k,is) = v(k,is) + e2*(v1x(k,is) + v1c(k,is))
|
||||
ENDDO
|
||||
!
|
||||
DO ipol = 1, 3
|
||||
!
|
||||
grup = grho(ipol,k,1)
|
||||
grdw = grho(ipol,k,2)
|
||||
h(ipol,k,1) = e2*( (v2x(k,1) + v2c(k,1))*grup + v2c_ud(k)*grdw )
|
||||
h(ipol,k,2) = e2*( (v2x(k,2) + v2c(k,2))*grdw + v2c_ud(k)*grup )
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
vtxcgc = vtxcgc + &
|
||||
e2 * ( v1x(k,1) + v1c(k,1) ) * ( rhoaux(k,1) - rho_core(k) * fac )
|
||||
vtxcgc = vtxcgc + &
|
||||
e2 * ( v1x(k,2) + v1c(k,2) ) * ( rhoaux(k,2) - rho_core(k) * fac )
|
||||
vtxcgc = vtxcgc + e2 * ( &
|
||||
( v1x(k,1) + v1c(k,1) ) * ( rhoaux(k,1) - rho_core(k) * fac ) + &
|
||||
( v1x(k,2) + v1c(k,2) ) * ( rhoaux(k,2) - rho_core(k) * fac ) )
|
||||
etxcgc = etxcgc + e2 * ( sx(k) + sc(k) )
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
!$acc end data
|
||||
DEALLOCATE( v2c_ud )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, nspin0
|
||||
!
|
||||
rhoaux(:,is) = rhoaux(:,is) - fac * rho_core(:)
|
||||
!
|
||||
END DO
|
||||
DO k = 1, dfftp%nnr
|
||||
rhoaux(k,is) = rhoaux(k,is) - fac * rho_core(k)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
DEALLOCATE( grho )
|
||||
DEALLOCATE( v1x, v2x )
|
||||
DEALLOCATE( v1c, v2c )
|
||||
!
|
||||
ALLOCATE( dh(dfftp%nnr) )
|
||||
ALLOCATE( dh(dfftp%nnr) )
|
||||
!$acc data create( dh )
|
||||
!
|
||||
! ... second term of the gradient correction :
|
||||
! ... \sum_alpha (D / D r_alpha) ( D(rho*Exc)/D(grad_alpha rho) )
|
||||
!
|
||||
!$acc host_data use_device( rhoaux, dh, h )
|
||||
DO is = 1, nspin0
|
||||
!
|
||||
CALL fft_graddot( dfftp, h(1,1,is), g, dh )
|
||||
!
|
||||
v(:,is) = v(:,is) - dh(:)
|
||||
!
|
||||
vtxcgc = vtxcgc - SUM( dh(:) * rhoaux(:,is) )
|
||||
!
|
||||
END DO
|
||||
IF ( use_gpu ) CALL fft_graddot_gpu( dfftp, h(1,1,is), g_d, dh )
|
||||
IF ( .NOT. use_gpu ) CALL fft_graddot( dfftp, h(1,1,is), g, dh )
|
||||
!$acc parallel loop
|
||||
DO k = 1, dfftp%nnr
|
||||
v(k,is) = v(k,is) - dh(k)
|
||||
vtxcgc = vtxcgc - dh(k) * rhoaux(k,is)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
vtxc = vtxc + omega * vtxcgc / ( dfftp%nr1 * dfftp%nr2 * dfftp%nr3 )
|
||||
etxc = etxc + omega * etxcgc / ( dfftp%nr1 * dfftp%nr2 * dfftp%nr3 )
|
||||
!
|
||||
IF (nspin==4 .AND. domag) THEN
|
||||
DO is = 1, nspin0
|
||||
vgg(:,is) = v(:,is)
|
||||
ALLOCATE( vgg(dfftp%nnr,nspin0) )
|
||||
!$acc data copyin( segni, vsave ) create( vgg )
|
||||
!$acc host_data use_device( segni, vgg, vsave )
|
||||
!
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, nspin
|
||||
DO ir = 1, dfftp%nnr
|
||||
IF (is<=nspin0) vgg(ir,is) = v(ir,is)
|
||||
v(ir,is) = vsave(ir,is)
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
v = vsave
|
||||
!$acc parallel loop
|
||||
DO k = 1, dfftp%nnr
|
||||
v(k,1) = v(k,1) + 0.5d0*(vgg(k,1)+vgg(k,2))
|
||||
amag = SQRT(rho(k,2)**2+rho(k,3)**2+rho(k,4)**2)
|
||||
|
@ -207,17 +255,21 @@ SUBROUTINE gradcorr( rho, rhog, rho_core, rhog_core, etxc, vtxc, v )
|
|||
v(k,4) = v(k,4) + segni(k)*0.5d0*(vgg(k,1)-vgg(k,2))*rho(k,4)/amag
|
||||
ENDIF
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
DEALLOCATE( dh )
|
||||
DEALLOCATE( h )
|
||||
DEALLOCATE( rhoaux )
|
||||
IF (nspin==4 .AND. domag) THEN
|
||||
DEALLOCATE( vgg )
|
||||
DEALLOCATE( vsave )
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( segni )
|
||||
DEALLOCATE( vgg, vsave )
|
||||
ENDIF
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
DEALLOCATE( rhoaux, grho )
|
||||
DEALLOCATE( v1x, v2x )
|
||||
DEALLOCATE( v1c, v2c )
|
||||
DEALLOCATE( dh, h )
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
|
|
@ -429,7 +429,7 @@ MODULE paw_onecenter
|
|||
USE constants, ONLY : e2, eps12
|
||||
USE lsda_mod, ONLY : nspin
|
||||
USE atom, ONLY : g => rgrid
|
||||
USE xc_lib, ONLY : xclib_dft_is
|
||||
USE xc_lib, ONLY : xclib_dft_is, xc
|
||||
USE constants, ONLY : fpi ! REMOVE
|
||||
!
|
||||
TYPE(paw_info), INTENT(IN) :: i
|
||||
|
|
|
@ -12,7 +12,7 @@ SUBROUTINE stres_gradcorr( rho, rhog, rho_core, rhog_core, kedtau, nspin, &
|
|||
!----------------------------------------------------------------------------
|
||||
!
|
||||
USE kinds, ONLY: DP
|
||||
USE xc_lib, ONLY: xclib_dft_is, xclib_get_id, xc_gcx
|
||||
USE xc_lib, ONLY: xclib_dft_is, xclib_get_id, xc_gcx, xc_metagcx
|
||||
USE noncollin_module, ONLY: domag
|
||||
USE mp_bands, ONLY: intra_bgrp_comm
|
||||
USE mp, ONLY: mp_sum
|
||||
|
|
|
@ -63,7 +63,7 @@ SUBROUTINE v_of_rho( rho, rho_core, rhog_core, &
|
|||
!
|
||||
! ... calculate exchange-correlation potential
|
||||
!
|
||||
IF (xclib_dft_is('meta')) then
|
||||
IF (xclib_dft_is('meta')) THEN
|
||||
CALL v_xc_meta( rho, rho_core, rhog_core, etxc, vtxc, v%of_r, v%kin_r )
|
||||
ELSE
|
||||
CALL v_xc( rho, rho_core, rhog_core, etxc, vtxc, v%of_r )
|
||||
|
@ -154,13 +154,15 @@ SUBROUTINE v_xc_meta( rho, rho_core, rhog_core, etxc, vtxc, v, kedtaur )
|
|||
USE constants, ONLY : e2, eps8
|
||||
USE io_global, ONLY : stdout
|
||||
USE fft_base, ONLY : dfftp
|
||||
USE gvect, ONLY : g, ngm
|
||||
USE gvect, ONLY : g, g_d, ngm
|
||||
USE lsda_mod, ONLY : nspin
|
||||
USE cell_base, ONLY : omega
|
||||
USE funct, ONLY : dft_is_nonlocc, nlc
|
||||
USE scf, ONLY : scf_type, rhoz_or_updw
|
||||
USE scf, ONLY : scf_type
|
||||
USE xc_lib, ONLY : xc_metagcx
|
||||
USE mp, ONLY : mp_sum
|
||||
USE mp_bands, ONLY : intra_bgrp_comm
|
||||
USE control_flags, ONLY : use_gpu
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -181,20 +183,19 @@ SUBROUTINE v_xc_meta( rho, rho_core, rhog_core, etxc, vtxc, v, kedtaur )
|
|||
!
|
||||
! ... local variables
|
||||
!
|
||||
REAL(DP) :: zeta, rh, sgn(2)
|
||||
REAL(DP) :: zeta, rh, sgn_is
|
||||
INTEGER :: k, ipol, is, np
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: ex(:), ec(:)
|
||||
REAL(DP), ALLOCATABLE :: v1x(:,:), v2x(:,:), v3x(:,:)
|
||||
REAL(DP), ALLOCATABLE :: v1c(:,:), v2c(:,:,:), v3c(:,:)
|
||||
!
|
||||
REAL(DP) :: fac
|
||||
|
||||
REAL(DP), DIMENSION(2) :: grho2, rhoneg
|
||||
REAL(DP) :: fac, rhoneg1, rhoneg2
|
||||
REAL(DP), DIMENSION(2) :: grho2
|
||||
REAL(DP), DIMENSION(3) :: grhoup, grhodw
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: grho(:,:,:), h(:,:,:), dh(:)
|
||||
REAL(DP), ALLOCATABLE :: rhoout(:)
|
||||
REAL(DP), ALLOCATABLE :: h(:,:,:), dh(:)
|
||||
REAL(DP), ALLOCATABLE :: rho_updw(:,:), grho(:,:,:), tau(:,:)
|
||||
COMPLEX(DP), ALLOCATABLE :: rhogsum(:)
|
||||
REAL(DP), PARAMETER :: eps12 = 1.0d-12, zero=0._dp
|
||||
!
|
||||
|
@ -203,15 +204,17 @@ SUBROUTINE v_xc_meta( rho, rho_core, rhog_core, etxc, vtxc, v, kedtaur )
|
|||
etxc = zero
|
||||
vtxc = zero
|
||||
v(:,:) = zero
|
||||
rhoneg(:) = zero
|
||||
sgn(1) = 1._dp ; sgn(2) = -1._dp
|
||||
rhoneg1 = zero ; rhoneg2 = zero
|
||||
fac = 1.D0 / DBLE( nspin )
|
||||
np = 1
|
||||
IF (nspin==2) np=3
|
||||
!
|
||||
!$acc data copyin( rho ) copyout( kedtaur, v )
|
||||
!
|
||||
ALLOCATE( grho(3,dfftp%nnr,nspin) )
|
||||
ALLOCATE( h(3,dfftp%nnr,nspin) )
|
||||
ALLOCATE( rhogsum(ngm) )
|
||||
ALLOCATE( rhogsum(ngm), tau(dfftp%nnr,nspin) )
|
||||
!$acc data create( tau, grho, h )
|
||||
!
|
||||
ALLOCATE( ex(dfftp%nnr), ec(dfftp%nnr) )
|
||||
ALLOCATE( v1x(dfftp%nnr,nspin), v2x(dfftp%nnr,nspin) , v3x(dfftp%nnr,nspin) )
|
||||
|
@ -220,104 +223,158 @@ SUBROUTINE v_xc_meta( rho, rho_core, rhog_core, etxc, vtxc, v, kedtaur )
|
|||
! ... calculate the gradient of rho + rho_core in real space
|
||||
! ... in LSDA case rhogsum is in (up,down) format
|
||||
!
|
||||
!$acc data create( rhogsum ) copyin( rhog_core, rho%of_g, rho%kin_r )
|
||||
DO is = 1, nspin
|
||||
!
|
||||
rhogsum(:) = fac*rhog_core(:) + ( rho%of_g(:,1) + sgn(is)*rho%of_g(:,nspin) )*0.5D0
|
||||
sgn_is = (-1.d0)**(is+1)
|
||||
!
|
||||
CALL fft_gradient_g2r( dfftp, rhogsum, g, grho(1,1,is) )
|
||||
!$acc parallel loop present(rho)
|
||||
DO k = 1, ngm
|
||||
rhogsum(k) = fac*rhog_core(k) + ( rho%of_g(k,1) + sgn_is*rho%of_g(k,nspin) )*0.5D0
|
||||
ENDDO
|
||||
!
|
||||
IF ( use_gpu ) THEN
|
||||
!$acc host_data use_device( rhogsum, grho )
|
||||
CALL fft_gradient_g2r_gpu( dfftp, rhogsum, g_d, grho(:,:,is) )
|
||||
!$acc end host_data
|
||||
ELSE
|
||||
CALL fft_gradient_g2r( dfftp, rhogsum, g, grho(:,:,is) )
|
||||
ENDIF
|
||||
!
|
||||
ENDDO
|
||||
DEALLOCATE(rhogsum)
|
||||
!
|
||||
!$acc parallel loop collapse(2) present(rho)
|
||||
DO is = 1, nspin
|
||||
DO k = 1, dfftp%nnr
|
||||
tau(k,is) = rho%kin_r(k,is)/e2
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
!$acc end data
|
||||
DEALLOCATE( rhogsum )
|
||||
!
|
||||
!$acc data copyin( rho%of_r )
|
||||
!$acc data create( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
IF (nspin == 1) THEN
|
||||
!
|
||||
CALL xc_metagcx( dfftp%nnr, 1, np, rho%of_r, grho, rho%kin_r/e2, ex, ec, &
|
||||
v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc host_data use_device( rho%of_r, grho, tau, ex, ec, &
|
||||
!$acc& v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
CALL xc_metagcx( dfftp%nnr, 1, np, rho%of_r, grho, tau, ex, ec, &
|
||||
v1x, v2x, v3x, v1c, v2c, v3c, gpu_args_=.TRUE. )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc parallel loop reduction(+:etxc) reduction(+:vtxc) reduction(-:rhoneg1) &
|
||||
!$acc& reduction(-:rhoneg2) present(rho)
|
||||
DO k = 1, dfftp%nnr
|
||||
!
|
||||
v(k,1) = (v1x(k,1)+v1c(k,1)) * e2
|
||||
!
|
||||
! h contains D(rho*Exc)/D(|grad rho|) * (grad rho) / |grad rho|
|
||||
h(:,k,1) = (v2x(k,1)+v2c(1,k,1)) * grho(:,k,1) * e2
|
||||
! ... h contains D(rho*Exc)/D(|grad rho|) * (grad rho) / |grad rho|
|
||||
DO ipol = 1, 3
|
||||
h(ipol,k,1) = (v2x(k,1)+v2c(1,k,1)) * grho(ipol,k,1) * e2
|
||||
ENDDO
|
||||
!
|
||||
kedtaur(k,1) = (v3x(k,1)+v3c(k,1)) * 0.5d0 * e2
|
||||
!
|
||||
etxc = etxc + (ex(k)+ec(k)) * e2
|
||||
vtxc = vtxc + (v1x(k,1)+v1c(k,1)) * e2 * ABS(rho%of_r(k,1))
|
||||
!
|
||||
IF (rho%of_r(k,1) < zero) rhoneg(1) = rhoneg(1)-rho%of_r(k,1)
|
||||
IF (rho%of_r(k,1) < zero) rhoneg1 = rhoneg1-rho%of_r(k,1)
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
CALL rhoz_or_updw( rho, 'only_r', '->updw' )
|
||||
ALLOCATE( rho_updw(dfftp%nnr,2) )
|
||||
!$acc data create( rho_updw )
|
||||
!
|
||||
CALL xc_metagcx( dfftp%nnr, 2, np, rho%of_r, grho, rho%kin_r/e2, ex, ec, &
|
||||
v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc parallel loop present(rho)
|
||||
DO k = 1, dfftp%nnr
|
||||
rho_updw(k,1) = ( rho%of_r(k,1) + rho%of_r(k,2) ) * 0.5d0
|
||||
rho_updw(k,2) = ( rho%of_r(k,1) - rho%of_r(k,2) ) * 0.5d0
|
||||
ENDDO
|
||||
!
|
||||
! first term of the gradient correction : D(rho*Exc)/D(rho)
|
||||
!$acc host_data use_device( rho_updw, grho, tau, ex, ec, &
|
||||
!$acc& v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
CALL xc_metagcx( dfftp%nnr, 2, np, rho_updw, grho, tau, ex, ec, &
|
||||
v1x, v2x, v3x, v1c, v2c, v3c, gpu_args_=.TRUE. )
|
||||
!$acc end host_data
|
||||
!
|
||||
! ... first term of the gradient correction : D(rho*Exc)/D(rho)
|
||||
!
|
||||
!$acc parallel loop reduction(+:etxc) reduction(+:vtxc) reduction(-:rhoneg1) &
|
||||
!$acc& reduction(-:rhoneg2)
|
||||
DO k = 1, dfftp%nnr
|
||||
!
|
||||
v(k,1) = (v1x(k,1) + v1c(k,1)) * e2
|
||||
v(k,2) = (v1x(k,2) + v1c(k,2)) * e2
|
||||
!
|
||||
! h contains D(rho*Exc)/D(|grad rho|) * (grad rho) / |grad rho|
|
||||
! ... h contains D(rho*Exc)/D(|grad rho|) * (grad rho) / |grad rho|
|
||||
!
|
||||
h(:,k,1) = (v2x(k,1) * grho(:,k,1) + v2c(:,k,1)) * e2
|
||||
h(:,k,2) = (v2x(k,2) * grho(:,k,2) + v2c(:,k,2)) * e2
|
||||
DO ipol = 1, 3
|
||||
h(ipol,k,1) = (v2x(k,1) * grho(ipol,k,1) + v2c(ipol,k,1)) * e2
|
||||
h(ipol,k,2) = (v2x(k,2) * grho(ipol,k,2) + v2c(ipol,k,2)) * e2
|
||||
ENDDO
|
||||
!
|
||||
kedtaur(k,1) = (v3x(k,1) + v3c(k,1)) * 0.5d0 * e2
|
||||
kedtaur(k,2) = (v3x(k,2) + v3c(k,2)) * 0.5d0 * e2
|
||||
!
|
||||
etxc = etxc + (ex(k)+ec(k)) * e2
|
||||
vtxc = vtxc + (v1x(k,1)+v1c(k,1)) * ABS(rho%of_r(k,1)) * e2
|
||||
vtxc = vtxc + (v1x(k,2)+v1c(k,2)) * ABS(rho%of_r(k,2)) * e2
|
||||
vtxc = vtxc + (v1x(k,1)+v1c(k,1)) * ABS(rho_updw(k,1)) * e2 + &
|
||||
(v1x(k,2)+v1c(k,2)) * ABS(rho_updw(k,2)) * e2
|
||||
!
|
||||
IF ( rho%of_r(k,1) < 0.d0 ) rhoneg(1) = rhoneg(1) - rho%of_r(k,1)
|
||||
IF ( rho%of_r(k,2) < 0.d0 ) rhoneg(2) = rhoneg(2) - rho%of_r(k,2)
|
||||
IF ( rho_updw(k,1) < 0.d0 ) rhoneg1 = rhoneg1 - rho_updw(k,1)
|
||||
IF ( rho_updw(k,2) < 0.d0 ) rhoneg2 = rhoneg2 - rho_updw(k,2)
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
CALL rhoz_or_updw( rho, 'only_r', '->rhoz' )
|
||||
!$acc end data
|
||||
DEALLOCATE( rho_updw )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc end data
|
||||
DEALLOCATE( ex, ec )
|
||||
DEALLOCATE( v1x, v2x, v3x )
|
||||
DEALLOCATE( v1c, v2c, v3c )
|
||||
!
|
||||
!
|
||||
ALLOCATE( dh( dfftp%nnr ) )
|
||||
ALLOCATE( dh( dfftp%nnr ) )
|
||||
!$acc data create( dh )
|
||||
!
|
||||
! ... second term of the gradient correction :
|
||||
! ... \sum_alpha (D / D r_alpha) ( D(rho*Exc)/D(grad_alpha rho) )
|
||||
!
|
||||
ALLOCATE (rhoout(dfftp%nnr))
|
||||
!$acc host_data use_device( rho%of_r, dh, h )
|
||||
DO is = 1, nspin
|
||||
!
|
||||
CALL fft_graddot( dfftp, h(1,1,is), g, dh )
|
||||
sgn_is = (-1.d0)**(is+1)
|
||||
!
|
||||
v(:,is) = v(:,is) - dh(:)
|
||||
IF ( use_gpu ) CALL fft_graddot_gpu( dfftp, h(1,1,is), g_d, dh )
|
||||
IF ( .NOT. use_gpu ) CALL fft_graddot( dfftp, h(1,1,is), g, dh )
|
||||
!
|
||||
! ... rhoout is in (up,down) format
|
||||
!$acc parallel loop reduction(-:vtxc) present(rho)
|
||||
DO k = 1, dfftp%nnr
|
||||
v(k,is) = v(k,is) - dh(k)
|
||||
vtxc = vtxc - dh(k) * ( rho%of_r(k,1) + sgn_is*rho%of_r(k,nspin) )*0.5D0
|
||||
ENDDO
|
||||
!
|
||||
rhoout(:) = ( rho%of_r(:,1) + sgn(is)*rho%of_r(:,nspin) )*0.5D0
|
||||
vtxc = vtxc - SUM( dh(:) * rhoout(:) )
|
||||
!
|
||||
END DO
|
||||
DEALLOCATE(rhoout)
|
||||
DEALLOCATE(dh)
|
||||
ENDDO
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( dh )
|
||||
!
|
||||
CALL mp_sum( rhoneg, intra_bgrp_comm )
|
||||
!$acc end data
|
||||
!$acc end data
|
||||
!$acc end data
|
||||
!
|
||||
rhoneg(:) = rhoneg(:) * omega / ( dfftp%nr1*dfftp%nr2*dfftp%nr3 )
|
||||
CALL mp_sum( rhoneg1, intra_bgrp_comm )
|
||||
CALL mp_sum( rhoneg2, intra_bgrp_comm )
|
||||
!
|
||||
IF ((rhoneg(1) > eps8) .OR. (rhoneg(2) > eps8)) THEN
|
||||
write (stdout, '(/,5x, "negative rho (up,down): ", 2es10.3)') rhoneg(:)
|
||||
rhoneg1 = rhoneg1 * omega / ( dfftp%nr1*dfftp%nr2*dfftp%nr3 )
|
||||
rhoneg2 = rhoneg2 * omega / ( dfftp%nr1*dfftp%nr2*dfftp%nr3 )
|
||||
!
|
||||
IF ((rhoneg1 > eps8) .OR. (rhoneg2 > eps8)) THEN
|
||||
write (stdout, '(/,5x, "negative rho (up,down): ", 2es10.3)') rhoneg1, rhoneg2
|
||||
ENDIF
|
||||
!
|
||||
vtxc = omega * vtxc / ( dfftp%nr1*dfftp%nr2*dfftp%nr3 )
|
||||
|
@ -328,8 +385,8 @@ SUBROUTINE v_xc_meta( rho, rho_core, rhog_core, etxc, vtxc, v, kedtaur )
|
|||
CALL mp_sum( vtxc , intra_bgrp_comm )
|
||||
CALL mp_sum( etxc , intra_bgrp_comm )
|
||||
!
|
||||
DEALLOCATE(grho)
|
||||
DEALLOCATE(h)
|
||||
DEALLOCATE( tau, grho )
|
||||
DEALLOCATE( h )
|
||||
!
|
||||
CALL stop_clock( 'v_xc_meta' )
|
||||
!
|
||||
|
@ -337,10 +394,10 @@ SUBROUTINE v_xc_meta( rho, rho_core, rhog_core, etxc, vtxc, v, kedtaur )
|
|||
!
|
||||
END SUBROUTINE v_xc_meta
|
||||
!
|
||||
!
|
||||
!------------------------------------------------------------------------------
|
||||
SUBROUTINE v_xc( rho, rho_core, rhog_core, etxc, vtxc, v )
|
||||
!----------------------------------------------------------------------------
|
||||
!! Exchange-Correlation potential Vxc(r) from n(r)
|
||||
!! Exchange-Correlation potential from charge density.
|
||||
!
|
||||
USE kinds, ONLY : DP
|
||||
USE constants, ONLY : e2, eps8
|
||||
|
@ -352,8 +409,10 @@ SUBROUTINE v_xc( rho, rho_core, rhog_core, etxc, vtxc, v )
|
|||
USE noncollin_module, ONLY : domag
|
||||
USE funct, ONLY : nlc, dft_is_nonlocc
|
||||
USE scf, ONLY : scf_type
|
||||
USE xc_lib, ONLY : xc
|
||||
USE mp_bands, ONLY : intra_bgrp_comm
|
||||
USE mp, ONLY : mp_sum
|
||||
USE control_flags, ONLY : use_gpu
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -364,19 +423,17 @@ SUBROUTINE v_xc( rho, rho_core, rhog_core, etxc, vtxc, v )
|
|||
COMPLEX(DP), INTENT(IN) :: rhog_core(ngm)
|
||||
!! the core charge in reciprocal space
|
||||
REAL(DP), INTENT(OUT) :: v(dfftp%nnr,nspin)
|
||||
!! V_xc potential
|
||||
!! \(V_{xc}\) potential
|
||||
REAL(DP), INTENT(OUT) :: vtxc
|
||||
!! integral V_xc * rho
|
||||
!! integral \(V_{xc}\cdot\text{rho}\)
|
||||
REAL(DP), INTENT(OUT) :: etxc
|
||||
!! E_xc energy
|
||||
!! \(E_{xc}\) energy
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
REAL(DP) :: rhoneg(2), vs
|
||||
!
|
||||
!REAL(DP), ALLOCATABLE :: arhox(:), amag(:), zeta(:)
|
||||
REAL(DP) :: arho, amag
|
||||
REAL(DP) :: rhoup2, rhodw2
|
||||
REAL(DP) :: rhoup2, rhodw2, rhoneg1, rhoneg2
|
||||
REAL(DP) :: arho, amag, vtxc24
|
||||
REAL(DP), ALLOCATABLE :: ex(:), ec(:)
|
||||
REAL(DP), ALLOCATABLE :: vx(:,:), vc(:,:)
|
||||
! In order:
|
||||
|
@ -390,98 +447,110 @@ SUBROUTINE v_xc( rho, rho_core, rhog_core, etxc, vtxc, v )
|
|||
INTEGER :: ir, ipol
|
||||
! counter on mesh points
|
||||
! counter on nspin
|
||||
!
|
||||
! number of mesh points (=dfftp%nnr)
|
||||
REAL(DP), PARAMETER :: vanishing_charge = 1.D-10, &
|
||||
vanishing_mag = 1.D-20
|
||||
!
|
||||
!
|
||||
CALL start_clock( 'v_xc' )
|
||||
!
|
||||
ALLOCATE( ex(dfftp%nnr) )
|
||||
ALLOCATE( ec(dfftp%nnr) )
|
||||
ALLOCATE( vx(dfftp%nnr,nspin) )
|
||||
ALLOCATE( vc(dfftp%nnr,nspin) )
|
||||
etxc = 0.D0 ; rhoneg1 = 0.D0
|
||||
vtxc = 0.D0 ; rhoneg2 = 0.D0
|
||||
!
|
||||
etxc = 0.D0
|
||||
vtxc = 0.D0
|
||||
v(:,:) = 0.D0
|
||||
rhoneg = 0.D0
|
||||
!$acc data copyin( rho_core, rhog_core, rho ) copyout( v )
|
||||
!$acc data copyin( rho%of_r, rho%of_g )
|
||||
!
|
||||
ALLOCATE( ex(dfftp%nnr), vx(dfftp%nnr,nspin) )
|
||||
ALLOCATE( ec(dfftp%nnr), vc(dfftp%nnr,nspin) )
|
||||
!$acc data create( ex, ec, vx, vc )
|
||||
!
|
||||
rho%of_r(:,1) = rho%of_r(:,1) + rho_core(:)
|
||||
!$acc host_data use_device( rho%of_r, rho%of_g, rho_core, rhog_core, v,&
|
||||
!$acc& ex, ec, vx, vc )
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO ir = 1, dfftp%nnr
|
||||
rho%of_r(ir,1) = rho%of_r(ir,1) + rho_core(ir)
|
||||
ENDDO
|
||||
!
|
||||
IF ( nspin == 1 .OR. ( nspin == 4 .AND. .NOT. domag ) ) THEN
|
||||
! ... spin-unpolarized case
|
||||
!
|
||||
CALL xc( dfftp%nnr, 1, 1, rho%of_r, ex, ec, vx, vc )
|
||||
CALL xc( dfftp%nnr, 1, 1, rho%of_r, ex, ec, vx, vc, gpu_args_=.TRUE. )
|
||||
!
|
||||
!$acc parallel loop reduction(+:etxc) reduction(+:vtxc) reduction(-:rhoneg1) &
|
||||
!$acc& present(rho)
|
||||
DO ir = 1, dfftp%nnr
|
||||
v(ir,1) = e2*( vx(ir,1) + vc(ir,1) )
|
||||
etxc = etxc + e2*( ex(ir) + ec(ir) )*rho%of_r(ir,1)
|
||||
rho%of_r(ir,1) = rho%of_r(ir,1) - rho_core(ir)
|
||||
vtxc = vtxc + v(ir,1)*rho%of_r(ir,1)
|
||||
IF (rho%of_r(ir,1) < 0.D0) rhoneg(1) = rhoneg(1)-rho%of_r(ir,1)
|
||||
IF (rho%of_r(ir,1) < 0.D0) rhoneg1 = rhoneg1-rho%of_r(ir,1)
|
||||
ENDDO
|
||||
!
|
||||
!
|
||||
ELSEIF ( nspin == 2 ) THEN
|
||||
! ... spin-polarized case
|
||||
!
|
||||
CALL xc( dfftp%nnr, 2, 2, rho%of_r, ex, ec, vx, vc )
|
||||
CALL xc( dfftp%nnr, 2, 2, rho%of_r, ex, ec, vx, vc, gpu_args_=.TRUE. )
|
||||
!
|
||||
DO ir = 1, dfftp%nnr !OMP ?
|
||||
v(ir,:) = e2*( vx(ir,:) + vc(ir,:) )
|
||||
!$acc parallel loop reduction(+:etxc) reduction(+:vtxc) reduction(-:rhoneg1) &
|
||||
!$acc& reduction(-:rhoneg2) present(rho)
|
||||
DO ir = 1, dfftp%nnr
|
||||
v(ir,1) = e2*( vx(ir,1) + vc(ir,1) )
|
||||
v(ir,2) = e2*( vx(ir,2) + vc(ir,2) )
|
||||
etxc = etxc + e2*( (ex(ir) + ec(ir))*rho%of_r(ir,1) )
|
||||
rho%of_r(ir,1) = rho%of_r(ir,1) - rho_core(ir)
|
||||
vtxc = vtxc + ( ( v(ir,1) + v(ir,2) )*rho%of_r(ir,1) + &
|
||||
( v(ir,1) - v(ir,2) )*rho%of_r(ir,2) )
|
||||
( v(ir,1) - v(ir,2) )*rho%of_r(ir,2) )*0.5d0
|
||||
!
|
||||
rhoup2 = rho%of_r(ir,1)+rho%of_r(ir,2)
|
||||
rhodw2 = rho%of_r(ir,1)-rho%of_r(ir,2)
|
||||
IF (rhoup2 < 0.d0) rhoneg(1) = rhoneg(1) - rhoup2
|
||||
IF (rhodw2 < 0.d0) rhoneg(2) = rhoneg(2) - rhodw2
|
||||
IF (rhoup2 < 0.d0) rhoneg1 = rhoneg1 - rhoup2*0.5d0
|
||||
IF (rhodw2 < 0.d0) rhoneg2 = rhoneg2 - rhodw2*0.5d0
|
||||
ENDDO
|
||||
!
|
||||
vtxc = 0.5d0 * vtxc
|
||||
rhoneg = 0.5d0 * rhoneg
|
||||
!
|
||||
!
|
||||
ELSE IF ( nspin == 4 ) THEN
|
||||
! ... noncolinear case
|
||||
!
|
||||
CALL xc( dfftp%nnr, 4, 2, rho%of_r, ex, ec, vx, vc )
|
||||
!
|
||||
DO ir = 1, dfftp%nnr !OMP ?
|
||||
arho = ABS( rho%of_r(ir,1) )
|
||||
IF ( arho < vanishing_charge ) CYCLE
|
||||
vs = 0.5D0*( vx(ir,1) + vc(ir,1) - vx(ir,2) - vc(ir,2) )
|
||||
v(ir,1) = e2*( 0.5D0*( vx(ir,1) + vc(ir,1) + vx(ir,2) + vc(ir,2) ) )
|
||||
!
|
||||
amag = SQRT( SUM( rho%of_r(ir,2:4)**2 ) )
|
||||
IF ( amag > vanishing_mag ) THEN
|
||||
v(ir,2:4) = e2 * vs * rho%of_r(ir,2:4) / amag
|
||||
vtxc = vtxc + SUM( v(ir,2:4) * rho%of_r(ir,2:4) )
|
||||
ENDIF
|
||||
etxc = etxc + e2*( ex(ir) + ec(ir) ) * arho
|
||||
!
|
||||
rho%of_r(ir,1) = rho%of_r(ir,1) - rho_core(ir)
|
||||
IF ( rho%of_r(ir,1) < 0.D0 ) rhoneg(1) = rhoneg(1) - rho%of_r(ir,1)
|
||||
IF ( amag / arho > 1.D0 ) rhoneg(2) = rhoneg(2) + 1.D0/omega
|
||||
vtxc = vtxc + v(ir,1) * rho%of_r(ir,1)
|
||||
ENDDO
|
||||
!
|
||||
!
|
||||
ELSEIF ( nspin == 4 ) THEN
|
||||
! ... noncollinear case
|
||||
!
|
||||
CALL xc( dfftp%nnr, 4, 2, rho%of_r, ex, ec, vx, vc, gpu_args_=.TRUE. )
|
||||
!
|
||||
!$acc parallel loop reduction(+:etxc) reduction(+:vtxc) reduction(-:rhoneg1) &
|
||||
!$acc& reduction(+:rhoneg2) present(rho)
|
||||
DO ir = 1, dfftp%nnr
|
||||
arho = ABS( rho%of_r(ir,1) )
|
||||
IF ( arho < vanishing_charge ) CYCLE
|
||||
vs = 0.5D0*( vx(ir,1) + vc(ir,1) - vx(ir,2) - vc(ir,2) )
|
||||
v(ir,1) = e2*( 0.5D0*( vx(ir,1) + vc(ir,1) + vx(ir,2) + vc(ir,2) ) )
|
||||
!
|
||||
amag = SQRT( SUM( rho%of_r(ir,2:4)**2 ) )
|
||||
IF ( amag > vanishing_mag ) THEN
|
||||
v(ir,2:4) = e2 * vs * rho%of_r(ir,2:4) / amag
|
||||
vtxc24 = SUM( v(ir,2:4) * rho%of_r(ir,2:4) )
|
||||
ELSE
|
||||
vtxc24 = 0.d0
|
||||
ENDIF
|
||||
etxc = etxc + e2*( ex(ir) + ec(ir) ) * arho
|
||||
!
|
||||
rho%of_r(ir,1) = rho%of_r(ir,1) - rho_core(ir)
|
||||
IF ( rho%of_r(ir,1) < 0.D0 ) rhoneg1 = rhoneg1 - rho%of_r(ir,1)
|
||||
IF ( amag / arho > 1.D0 ) rhoneg2 = rhoneg2 + 1.D0/omega
|
||||
vtxc = vtxc + vtxc24 + v(ir,1) * rho%of_r(ir,1)
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
DEALLOCATE( ex, ec )
|
||||
DEALLOCATE( vx, vc )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( ex, vx )
|
||||
DEALLOCATE( ec, vc )
|
||||
!
|
||||
CALL mp_sum( rhoneg , intra_bgrp_comm )
|
||||
CALL mp_sum( rhoneg1 , intra_bgrp_comm )
|
||||
CALL mp_sum( rhoneg2 , intra_bgrp_comm )
|
||||
!
|
||||
rhoneg(:) = rhoneg(:) * omega / ( dfftp%nr1*dfftp%nr2*dfftp%nr3 )
|
||||
rhoneg1 = rhoneg1 * omega / ( dfftp%nr1*dfftp%nr2*dfftp%nr3 )
|
||||
rhoneg2 = rhoneg2 * omega / ( dfftp%nr1*dfftp%nr2*dfftp%nr3 )
|
||||
!
|
||||
IF ( rhoneg(1) > eps8 .OR. rhoneg(2) > eps8 ) &
|
||||
WRITE( stdout,'(/,5X,"negative rho (up, down): ",2ES10.3)') rhoneg
|
||||
IF ( rhoneg1 > eps8 .OR. rhoneg2 > eps8 ) &
|
||||
WRITE( stdout,'(/,5X,"negative rho (up, down): ",2ES10.3)') rhoneg1, rhoneg2
|
||||
!
|
||||
! ... energy terms, local-density contribution
|
||||
!
|
||||
|
@ -490,7 +559,12 @@ SUBROUTINE v_xc( rho, rho_core, rhog_core, etxc, vtxc, v )
|
|||
!
|
||||
! ... add gradient corrections (if any)
|
||||
!
|
||||
!$acc host_data use_device( rho%of_r, rho%of_g, rho_core, rhog_core, v )
|
||||
CALL gradcorr( rho%of_r, rho%of_g, rho_core, rhog_core, etxc, vtxc, v )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc end data
|
||||
!$acc end data
|
||||
!
|
||||
! ... add non local corrections (if any)
|
||||
!
|
||||
|
|
|
@ -44,6 +44,8 @@ set(sources_libbeef
|
|||
beefun.c)
|
||||
qe_add_library(qe_libbeef ${sources_libbeef})
|
||||
target_link_libraries(qe_libbeef
|
||||
PUBLIC
|
||||
qe_openacc_c
|
||||
PRIVATE
|
||||
qe_lapack)
|
||||
|
||||
|
@ -87,6 +89,7 @@ if(QE_ENABLE_TEST)
|
|||
qe_mpi_fortran
|
||||
qe_upflib
|
||||
qe_external_libxc
|
||||
qe_libbeef
|
||||
qe_xclib)
|
||||
|
||||
find_program(BASH_PROGRAM bash)
|
||||
|
|
|
@ -41,6 +41,7 @@ xclib_utils_and_para.o \
|
|||
dft_setting_params.o \
|
||||
dft_setting_routines.o \
|
||||
qe_dft_list.o \
|
||||
xc_beef_interface.o \
|
||||
xc_lib.o
|
||||
|
||||
all: xc_lib.a
|
||||
|
|
|
@ -8,14 +8,25 @@
|
|||
|
||||
#define nmax 30
|
||||
|
||||
#pragma acc routine (dgemv_) seq
|
||||
extern void dgemv_(const char *, const int *, const int *, const double *,
|
||||
double *, const int *, double *, const int *, const double *, double *,
|
||||
const int *);
|
||||
|
||||
#pragma acc routine seq
|
||||
double ddot1(double v[], double u[], int n)
|
||||
{
|
||||
double result = 0.0;
|
||||
for (int i = 0; i < n; i++)
|
||||
result += v[i]*u[i];
|
||||
return result;
|
||||
}
|
||||
|
||||
extern double ddot_(const int *, double *, const int *, double *, const int *);
|
||||
|
||||
//beef exchange enhancement factor legendre polynomial coefficients
|
||||
static double mi[] = {
|
||||
#pragma acc declare copyin (mi[0:nmax-1])
|
||||
static double mi[nmax] = {
|
||||
1.516501714304992365356,
|
||||
0.441353209874497942611,
|
||||
-0.091821352411060291887,
|
||||
|
@ -392,7 +403,7 @@ static double beefmat[] = {
|
|||
L[1] = x; int i=0;\
|
||||
for(i=2;i<nmax;i++) { \
|
||||
L[i] = 2.*(x)*L[i-1] - L[i-2] - ((x)*L[i-1] - L[i-2])/((double) i); \
|
||||
dL[i] = L[i-1]*i + dL[i-1]*(x); \
|
||||
dL[i] = L[i-1]*((double) i) + dL[i-1]*(x); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
@ -419,7 +430,10 @@ static double beefmat[] = {
|
|||
return L[n]; \
|
||||
}
|
||||
|
||||
#define PRAGMA_ACC_ROUTINE _Pragma("acc routine seq")
|
||||
|
||||
#define defLdL(n) \
|
||||
PRAGMA_ACC_ROUTINE \
|
||||
static void LdL ## n (double x, double *y, double *z) { \
|
||||
double L[n+1]; \
|
||||
double dL[n+1]; \
|
||||
|
@ -497,8 +511,9 @@ static double(*Ln[])(double) = {
|
|||
L29
|
||||
};
|
||||
|
||||
|
||||
#pragma acc routine seq
|
||||
static void LdL0(double x, double *y, double *z) {*y=1.; *z=0.;}
|
||||
#pragma acc routine seq
|
||||
static void LdL1(double x, double *y, double *z) {*y=x; *z=1.;}
|
||||
defLdL(2)
|
||||
defLdL(3)
|
||||
|
@ -562,6 +577,42 @@ static void(*LdLn[])(double,double *,double *) = {
|
|||
LdL29
|
||||
};
|
||||
|
||||
#pragma acc routine seq
|
||||
static void LdLnACC(double t,double *fx,double *dl, int bo) {
|
||||
switch(bo) {
|
||||
case 0: LdL0(t ,fx ,dl ); break;
|
||||
case 1: LdL1(t ,fx ,dl ); break;
|
||||
case 2: LdL2(t ,fx ,dl ); break;
|
||||
case 3: LdL3(t ,fx ,dl ); break;
|
||||
case 4: LdL4(t ,fx ,dl ); break;
|
||||
case 5: LdL5(t ,fx ,dl ); break;
|
||||
case 6: LdL6(t ,fx ,dl ); break;
|
||||
case 7: LdL7(t ,fx ,dl ); break;
|
||||
case 8: LdL8(t ,fx ,dl ); break;
|
||||
case 9: LdL9(t ,fx ,dl ); break;
|
||||
case 10: LdL10(t ,fx ,dl ); break;
|
||||
case 11: LdL11(t ,fx ,dl ); break;
|
||||
case 12: LdL12(t ,fx ,dl ); break;
|
||||
case 13: LdL13(t ,fx ,dl ); break;
|
||||
case 14: LdL14(t ,fx ,dl ); break;
|
||||
case 15: LdL15(t ,fx ,dl ); break;
|
||||
case 16: LdL16(t ,fx ,dl ); break;
|
||||
case 17: LdL17(t ,fx ,dl ); break;
|
||||
case 18: LdL18(t ,fx ,dl ); break;
|
||||
case 19: LdL19(t ,fx ,dl ); break;
|
||||
case 20: LdL20(t ,fx ,dl ); break;
|
||||
case 21: LdL21(t ,fx ,dl ); break;
|
||||
case 22: LdL22(t ,fx ,dl ); break;
|
||||
case 23: LdL23(t ,fx ,dl ); break;
|
||||
case 24: LdL24(t ,fx ,dl ); break;
|
||||
case 25: LdL25(t ,fx ,dl ); break;
|
||||
case 26: LdL26(t ,fx ,dl ); break;
|
||||
case 27: LdL27(t ,fx ,dl ); break;
|
||||
case 28: LdL28(t ,fx ,dl ); break;
|
||||
case 29: LdL29(t ,fx ,dl ); break;
|
||||
}
|
||||
}
|
||||
|
||||
// LDA and PBEc coefficients
|
||||
|
||||
//pix = 1./(2.*(3.*pi**2)**(1./3.))**2
|
||||
|
@ -582,21 +633,24 @@ static void(*LdLn[])(double,double *,double *) = {
|
|||
#define normrand() \
|
||||
( sqrt(-2.*log(unirandex0())) * cos(M_PI*2.*unirandinc0()) )
|
||||
|
||||
|
||||
// beeforder==-1 : calculate beefxc with standard expansion coefficients
|
||||
// (is changed by beefsetmode_)
|
||||
static int beeforder = -1;
|
||||
#pragma acc declare copyin (beeforder)
|
||||
|
||||
//arrays holding current Legendre polynomials and derivatives
|
||||
static __thread double L[nmax] = {1.};
|
||||
static __thread double dL[nmax] = {0.,1.};
|
||||
//static __thread double L[nmax] = {1.};
|
||||
//static __thread double dL[nmax] = {0.,1.};
|
||||
|
||||
//static double L[nmax] = {1.};
|
||||
//static double dL[nmax] = {0.,1.};
|
||||
|
||||
static inline double sq(double x) {return x*x;}
|
||||
|
||||
// beeftype is a switch set by beef_set_type
|
||||
// which determines which version/type of beef is used
|
||||
static int beeftype = 0;
|
||||
#pragma acc declare copyin (beeftype)
|
||||
|
||||
#define output_spacing " "
|
||||
#define output_marker "**************************************************************************"
|
||||
|
|
|
@ -10,15 +10,17 @@
|
|||
#include "pbecor.h"
|
||||
|
||||
// evaluate bee exchange energy and its derivatives de/drho and ( de/d|grad rho| ) / |grad rho|
|
||||
#pragma acc routine seq
|
||||
void beefx_(double *r, double *g, double *e, double *dr, double *dg, int *addlda)
|
||||
{
|
||||
double s2,t,r43,r83,s,sx,dx,fx,dl,dfx;
|
||||
const int n=nmax;
|
||||
const int i1=1;
|
||||
const int i2=1;
|
||||
|
||||
double L[nmax]={1.};
|
||||
double dL[nmax]={0.,1.};
|
||||
|
||||
switch(beeftype) {
|
||||
case 0: //BEEF-vdW xc
|
||||
case 0: //BEEF-vdW xc
|
||||
r43 = pow(*r, 4./3.);
|
||||
r83 = r43*r43;
|
||||
sx = r2e * r43;
|
||||
|
@ -31,13 +33,14 @@ void beefx_(double *r, double *g, double *e, double *dr, double *dg, int *addlda
|
|||
if(beeforder==-1)
|
||||
{
|
||||
calclegdleg(t);
|
||||
|
||||
if(!(*addlda))
|
||||
fx = ddot_(&n, mi, &i1, L, &i2) - 1.;
|
||||
else
|
||||
fx = ddot_(&n, mi, &i1, L, &i2);
|
||||
dl = ddot_(&n, mi, &i1, dL, &i2);
|
||||
|
||||
if(!(*addlda)){
|
||||
fx = ddot1(mi,L,n) - 1.;
|
||||
}
|
||||
else{
|
||||
fx = ddot1(mi,L,n);
|
||||
}
|
||||
|
||||
dl = ddot1(mi,dL,n);
|
||||
dfx = dl*( 4.*s / (4.+s2) - 4.*s2*s/sq(4.+s2) );
|
||||
*dr = dx*fx - 4./3.*s2/(s*(*r))*sx*dfx;
|
||||
*dg = sx*dfx*pix/(s*r83);
|
||||
|
@ -47,8 +50,8 @@ void beefx_(double *r, double *g, double *e, double *dr, double *dg, int *addlda
|
|||
|
||||
if(beeforder>=0)
|
||||
{
|
||||
(*LdLn[beeforder])(t, &fx, &dl);
|
||||
|
||||
//(*LdLn[beeforder])(t, &fx, &dl);
|
||||
LdLnACC(t, &fx, &dl, beeforder);
|
||||
dfx = dl*( 4.*s / (4.+s2) - 4.*s2*s/sq(4.+s2) );
|
||||
*dr = dx*fx - 4./3.*s2/(s*(*r))*sx*dfx;
|
||||
*dg = sx*dfx*pix/(s*r83);
|
||||
|
@ -60,13 +63,13 @@ void beefx_(double *r, double *g, double *e, double *dr, double *dg, int *addlda
|
|||
*dg = 0.;
|
||||
*e = 0.;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// evaluate local part of bee correlation and its derivatives de/drho and ( de/d|grad rho| ) / |grad rho|
|
||||
#pragma acc routine seq
|
||||
void beeflocalcorr_(double *r, double *g, double *e, double *dr, double *dg, int *addlda)
|
||||
{
|
||||
double rs, ldac, ldadr, pbec, pbedr, pbed2rho;
|
||||
|
@ -80,10 +83,9 @@ void beeflocalcorr_(double *r, double *g, double *e, double *dr, double *dg, int
|
|||
}
|
||||
|
||||
switch(beeftype) {
|
||||
case 0: //BEEF-vdW xc
|
||||
case 0: //BEEF-vdW xc
|
||||
rs = invpi075tothird / pow(*r,1./3.);
|
||||
corpbe(rs, 0.5/r2k * sqrt(*g*rs) / (*r),
|
||||
(beeforder>-3), 1, &ldac, &ldadr, &pbec, &pbedr, &pbed2rho);
|
||||
corpbe(rs, 0.5/r2k * sqrt(*g*rs) / (*r),(beeforder>-3), 1, &ldac, &ldadr, &pbec, &pbedr, &pbed2rho);
|
||||
|
||||
if(beeforder==-1)
|
||||
{
|
||||
|
@ -131,9 +133,11 @@ void beefxpot_(double *r, double *g, double *e, int *addlda)
|
|||
const int n=nmax;
|
||||
const int i1=1;
|
||||
const int i2=1;
|
||||
|
||||
double L[nmax]={1.};
|
||||
|
||||
switch(beeftype) {
|
||||
case 0: //BEEF-vdW xc
|
||||
case 0: //BEEF-vdW xc
|
||||
r43 = pow(*r, 4./3.);
|
||||
|
||||
s2 = *g*pix / (r43*r43);
|
||||
|
@ -171,7 +175,7 @@ void beeflocalcorrpot_(double *r, double *g, double *e, int *addlda)
|
|||
}
|
||||
|
||||
switch(beeftype) {
|
||||
case 0: //BEEF-vdW xc
|
||||
case 0: //BEEF-vdW xc
|
||||
rs = invpi075tothird / pow(*r,1./3.);
|
||||
corpbe(rs, 0.5/r2k * sqrt(*g*rs) / (*r),
|
||||
(beeforder>-3), 0, &ldac, &ldadr, &pbec, &pbedr, &pbed2rho);
|
||||
|
@ -199,6 +203,7 @@ void beeflocalcorrpot_(double *r, double *g, double *e, int *addlda)
|
|||
|
||||
|
||||
// evaluate local part of bee correlation for spin polarized system
|
||||
#pragma acc routine seq
|
||||
void beeflocalcorrspin_(double *r, double *z, double *g, double *e,
|
||||
double *drup, double *drdown, double *dg, int *addlda) {
|
||||
double rs, ldac, ldadrup, ldadrdown, pbec, pbedrup, pbedrdown, pbed2rho;
|
||||
|
@ -213,7 +218,7 @@ void beeflocalcorrspin_(double *r, double *z, double *g, double *e,
|
|||
}
|
||||
|
||||
switch(beeftype) {
|
||||
case 0: //BEEF-vdW xc
|
||||
case 0: //BEEF-vdW xc
|
||||
rs = invpi075tothird / pow(*r,1./3.);
|
||||
corpbespin(rs, 0.5/r2k * sqrt(*g*rs) / (*r), *z,
|
||||
(beeforder>-3), 1, &ldac, &ldadrup, &ldadrdown, &pbec,
|
||||
|
@ -275,7 +280,7 @@ void beeflocalcorrpotspin_(double *r, double *z, double *g, double *e, int *addl
|
|||
}
|
||||
|
||||
switch(beeftype) {
|
||||
case 0: //BEEF-vdW xc
|
||||
case 0: //BEEF-vdW xc
|
||||
rs = invpi075tothird / pow(*r,1./3.);
|
||||
corpbespin(rs, 0.5/r2k * sqrt(*g*rs) / (*r), *z,
|
||||
(beeforder>-3), 0, &ldac, &ldadrup, &ldadrdown, &pbec,
|
||||
|
@ -311,6 +316,7 @@ void beeflocalcorrpotspin_(double *r, double *z, double *g, double *e, int *addl
|
|||
void beefsetmode_(int *mode)
|
||||
{
|
||||
beeforder = *mode;
|
||||
#pragma acc update device(beeforder)
|
||||
}
|
||||
|
||||
// initialize pseudo random number generator
|
||||
|
@ -357,6 +363,7 @@ void beefensemble_(double *beefxc, double *ensemble)
|
|||
int beef_set_type_(int *tbeef, int *ionode)
|
||||
{
|
||||
beeftype = *tbeef;
|
||||
#pragma acc update device(beeftype)
|
||||
|
||||
if(*ionode)
|
||||
{
|
||||
|
|
|
@ -546,7 +546,6 @@ CONTAINS
|
|||
DO l = 1, LEN_TRIM(lxc_name)
|
||||
lxc_name(l:l) = capital( lxc_name(l:l) )
|
||||
ENDDO
|
||||
nlxc = 0
|
||||
ii = 0
|
||||
DO WHILE ( ii < LEN_TRIM(lxc_name) )
|
||||
ii = ii + 1
|
||||
|
|
|
@ -29,9 +29,9 @@
|
|||
/* ---------------------------------------------------------------------- */
|
||||
/* ###################################################################### */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* Subroutine */ static void gcor2(double a, double a1, double b1,
|
||||
double b2, double b3, double b4, double rtrs,
|
||||
double *gg, double *ggrs)
|
||||
/* Subroutine */
|
||||
#pragma acc routine seq
|
||||
static void gcor2(double a, double a1, double b1, double b2, double b3, double b4, double rtrs, double *gg, double *ggrs)
|
||||
{
|
||||
/* Local variables */
|
||||
double q0, q1, q2, q3;
|
||||
|
@ -61,10 +61,9 @@
|
|||
|
||||
/* ###################################################################### */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* Subroutine */ void corpbe(double rs, double t,
|
||||
int lgga,
|
||||
int lpot, double *ec, double *vc,
|
||||
double *h__, double *dvc, double *dv2rho)
|
||||
/* Subroutine */
|
||||
#pragma acc routine seq
|
||||
void corpbe(double rs, double t, int lgga, int lpot, double *ec, double *vc, double *h__, double *dvc, double *dv2rho)
|
||||
{
|
||||
/* Local variables */
|
||||
double b, b2, q4, t2, q5, t4,
|
||||
|
@ -168,7 +167,9 @@
|
|||
|
||||
/* ###################################################################### */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* Subroutine */ void corpbespin(double rs, double t, double zet,
|
||||
/* Subroutine */
|
||||
#pragma acc routine seq
|
||||
void corpbespin(double rs, double t, double zet,
|
||||
int lgga,
|
||||
int lpot, double *ec, double *vcup, double *vcdown,
|
||||
double *h__, double *dvcup, double *dvcdown, double *dv2rho)
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
#ifndef pbecor_h
|
||||
#define pbecor_h 1
|
||||
|
||||
#pragma acc routine seq
|
||||
extern void corpbe(double, double,
|
||||
int, int, double *, double *,
|
||||
double *, double *, double *);
|
||||
|
||||
#pragma acc routine seq
|
||||
void corpbespin(double, double, double,
|
||||
int, int, double *, double *, double *,
|
||||
double *, double *, double *, double *);
|
||||
|
|
|
@ -35,7 +35,7 @@ SUBROUTINE dgcxc_unpol( length, r_in, s2_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc
|
|||
!! This routine computes the derivative of the exchange and correlation
|
||||
!! potentials of GGA family.
|
||||
!
|
||||
USE qe_drivers_gga, ONLY: gcxc, gcxc_beef
|
||||
USE qe_drivers_gga, ONLY: gcxc
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -90,10 +90,11 @@ SUBROUTINE dgcxc_unpol( length, r_in, s2_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc
|
|||
raux(i3:f3) = r_in ; s2aux(i3:f3) = (s+ds)**2
|
||||
raux(i4:f4) = r_in ; s2aux(i4:f4) = (s-ds)**2
|
||||
!
|
||||
!$acc data copyin( raux, s2aux ) copyout( sx, sc, v1x, v2x, v1c, v2c )
|
||||
!$acc host_data use_device( raux, s2aux, sx, sc, v1x, v2x, v1c, v2c )
|
||||
CALL gcxc( length*4, raux, s2aux, sx, sc, v1x, v2x, v1c, v2c )
|
||||
!
|
||||
IF ( igcx==43 .OR. igcc==14 ) CALL gcxc_beef( length*4, raux, s2aux, &
|
||||
sx, sc, v1x, v2x, v1c, v2c )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
! ... to avoid NaN in the next operations
|
||||
WHERE( r_in<=small .OR. s2_in<=small )
|
||||
|
@ -129,7 +130,7 @@ SUBROUTINE dgcxc_spin( length, r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, &
|
|||
!! This routine computes the derivative of the exchange and correlation
|
||||
!! potentials in the spin-polarized case.
|
||||
!
|
||||
USE qe_drivers_gga, ONLY: gcx_spin, gcc_spin, gcx_spin_beef, gcc_spin_beef
|
||||
USE qe_drivers_gga, ONLY: gcx_spin, gcc_spin
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -239,8 +240,11 @@ SUBROUTINE dgcxc_spin( length, r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, &
|
|||
raux(i8:f8,:) = r ; s2aux(i8:f8,:) = (s-dsdw)**2
|
||||
!
|
||||
!
|
||||
IF ( igcx/=43 ) CALL gcx_spin( length*8, raux, s2aux, sx, v1x, v2x )
|
||||
IF ( igcx==43 ) CALL gcx_spin_beef( length*8, raux, s2aux, sx, v1x, v2x )
|
||||
!$acc data copyin( raux, s2aux ) copyout( sx, v1x, v2x )
|
||||
!$acc host_data use_device( raux, s2aux, sx, v1x, v2x )
|
||||
CALL gcx_spin( length*8, raux, s2aux, sx, v1x, v2x )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
! ... up
|
||||
vrrx(:,1) = 0.5_DP * (v1x(i1:f1,1) - v1x(i2:f2,1)) / drup(:,1)
|
||||
|
@ -311,8 +315,11 @@ SUBROUTINE dgcxc_spin( length, r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, &
|
|||
rtaux(i5:f5) = rt ; s2taux(i5:f5) = s2t ; zetaux(i5:f5) = zeta+dz
|
||||
rtaux(i6:f6) = rt ; s2taux(i6:f6) = s2t ; zetaux(i6:f6) = zeta-dz
|
||||
!
|
||||
IF ( igcc/=14 ) CALL gcc_spin( length*6, rtaux, zetaux, s2taux, sc, v1c, v2c )
|
||||
IF ( igcc==14 ) CALL gcc_spin_beef( length*6, rtaux, zetaux, s2taux, sc, v1c, v2c )
|
||||
!$acc data copyin( rtaux, zetaux, s2taux ) copyout( sc, v1c, v2c )
|
||||
!$acc host_data use_device( rtaux, zetaux, s2taux, sc, v1c, v2c )
|
||||
CALL gcc_spin( length*6, rtaux, zetaux, s2taux, sc, v1c, v2c )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
vrrc(:,1) = 0.5_DP * (v1c(i1:f1,1) - v1c(i2:f2,1)) / dr * null_v(:,1)
|
||||
vrrc(:,2) = 0.5_DP * (v1c(i1:f1,2) - v1c(i2:f2,2)) / dr * null_v(:,1)
|
||||
|
|
|
@ -125,7 +125,11 @@ SUBROUTINE dmxc_lda( length, rho_in, dmuxc )
|
|||
rhoaux(i1:f1) = arho+dr
|
||||
rhoaux(i2:f2) = arho-dr
|
||||
!
|
||||
!$acc data copyin( rhoaux ) copyout( ex, ec, vx, vc )
|
||||
!$acc host_data use_device( rhoaux, ex, ec, vx, vc )
|
||||
CALL xc_lda( length*2, rhoaux, ex, ec, vx, vc )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
WHERE ( arho < small ) dr = 1.0_DP ! ... to avoid NaN in the next operation
|
||||
!
|
||||
|
@ -316,7 +320,11 @@ SUBROUTINE dmxc_lsda( length, rho_in, dmuxc )
|
|||
rhoaux(i3:f3) = rhotot ; zetaux(i3:f3) = zeta_eff + dz
|
||||
rhoaux(i4:f4) = rhotot ; zetaux(i4:f4) = zeta_eff - dz
|
||||
!
|
||||
!$acc data copyin( rhoaux, zetaux ) copyout( aux1, aux2, vx, vc )
|
||||
!$acc host_data use_device( rhoaux, zetaux, aux1, aux2, vx, vc )
|
||||
CALL xc_lsda( length*4, rhoaux, zetaux, aux1, aux2, vx, vc )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
WHERE (rhotot <= small) ! ... to avoid NaN in the next operations
|
||||
dr=1.0_DP ; rhotot=0.5d0
|
||||
|
@ -438,7 +446,11 @@ SUBROUTINE dmxc_nc( length, rho_in, m, dmuxc )
|
|||
rhoaux(i5:f5) = rhotot ; zetaux(i5:f5) = zeta_eff - dz
|
||||
!
|
||||
!
|
||||
!$acc data copyin( rhoaux, zetaux ) copyout( aux1, aux2, vx, vc )
|
||||
!$acc host_data use_device( rhoaux, zetaux, aux1, aux2, vx, vc )
|
||||
CALL xc_lsda( length*5, rhoaux, zetaux, aux1, aux2, vx, vc )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
!
|
||||
vs(:) = 0.5_DP*( vx(i1:f1,1)+vc(i1:f1,1)-vx(i1:f1,2)-vc(i1:f1,2) )
|
||||
|
|
|
@ -26,8 +26,7 @@ MODULE qe_drivers_gga
|
|||
!
|
||||
PRIVATE
|
||||
!
|
||||
PUBLIC :: gcxc, gcx_spin, gcc_spin, gcc_spin_more, gcxc_beef, gcx_spin_beef, &
|
||||
gcc_spin_beef
|
||||
PUBLIC :: gcxc, gcx_spin, gcc_spin, gcc_spin_more
|
||||
!
|
||||
!
|
||||
CONTAINS
|
||||
|
@ -41,6 +40,7 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
|
|||
!
|
||||
USE exch_gga
|
||||
USE corr_gga
|
||||
USE beef_interface, ONLY: beefx, beeflocalcorr
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -80,12 +80,11 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
|
|||
ntids = omp_get_num_threads()
|
||||
#endif
|
||||
!
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data copyin(rho_in,grho_in), copyout(sx_out,sc_out,v1x_out,v2x_out,v1c_out,v2c_out)
|
||||
!$acc data deviceptr( rho_in(length), grho_in(length), sx_out(length), sc_out(length), &
|
||||
!$acc& v1x_out(length), v2x_out(length), v1c_out(length), v2c_out(length) )
|
||||
!$acc parallel loop
|
||||
#endif
|
||||
#if defined(_OPENMP) && !defined(_OPENACC)
|
||||
#else
|
||||
!$omp parallel if(ntids==1) default(none) &
|
||||
!$omp private( rho, grho, sx, sx_, sxsr, v1x, v1x_, v1xsr, &
|
||||
!$omp v2x, v2x_, v2xsr, sc, v1c, v2c ) &
|
||||
|
@ -298,6 +297,10 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
|
|||
v2x = (1.0_DP - exx_fraction) * v2x
|
||||
ENDIF
|
||||
!
|
||||
CASE( 43 ) ! 'BEEX'
|
||||
!
|
||||
CALL beefx( rho, grho, sx, v1x, v2x, 0 )
|
||||
!
|
||||
CASE( 44 ) ! 'RPBE'
|
||||
!
|
||||
CALL pbex( rho, grho, 8, sx, v1x, v2x )
|
||||
|
@ -369,6 +372,11 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
|
|||
v2c = 0.871_DP * v2c
|
||||
ENDIF
|
||||
!
|
||||
CASE( 14 ) !'BEEC'
|
||||
! last parameter 0 means: do not add lda contributions
|
||||
! espresso will do that itself
|
||||
CALL beeflocalcorr( rho, grho, sc, v1c, v2c, 0 )
|
||||
!
|
||||
CASE DEFAULT
|
||||
!
|
||||
sc = 0.0_DP
|
||||
|
@ -402,6 +410,7 @@ SUBROUTINE gcx_spin( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
|
|||
!! Gradient corrections for exchange - Hartree a.u.
|
||||
!
|
||||
USE exch_gga
|
||||
USE beef_interface, ONLY: beefx
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -439,10 +448,11 @@ SUBROUTINE gcx_spin( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
|
|||
ntids = omp_get_num_threads()
|
||||
#endif
|
||||
!
|
||||
sx_tot = 0.0_DP
|
||||
!sx_tot = 0.0_DP
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data copyin(rho_in, grho2_in), copyout(sx_tot, v1x_out, v2x_out)
|
||||
!$acc data deviceptr( rho_in(length,2), grho2_in(length,2), sx_tot(length), &
|
||||
!$acc& v1x_out(length,2), v2x_out(length,2) )
|
||||
!$acc parallel loop
|
||||
#else
|
||||
!$omp parallel if(ntids==1) default(none) &
|
||||
|
@ -824,6 +834,18 @@ SUBROUTINE gcx_spin( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
|
|||
! case igcx == 7 (meta-GGA) must be treated in a separate call to another
|
||||
! routine: needs kinetic energy density in addition to rho and grad rho
|
||||
!
|
||||
CASE( 43 ) ! BEEX
|
||||
!
|
||||
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
|
||||
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
|
||||
!
|
||||
CALL beefx( rho_up, grho2_up, sx_up, v1x_up, v2x_up, 0 )
|
||||
CALL beefx( rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw, 0 )
|
||||
!
|
||||
sx_tot(ir) = 0.5_DP * (sx_up*rnull_up + sx_dw*rnull_dw)
|
||||
v2x_up = 2.0_DP * v2x_up
|
||||
v2x_dw = 2.0_DP * v2x_dw
|
||||
!
|
||||
CASE DEFAULT
|
||||
!
|
||||
sx_tot(ir) = 0.0_DP
|
||||
|
@ -858,6 +880,7 @@ SUBROUTINE gcc_spin( length, rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out
|
|||
!! Implemented: Perdew86, GGA (PW91), PBE
|
||||
!
|
||||
USE corr_gga
|
||||
USE beef_interface, ONLY: beeflocalcorrspin
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -891,7 +914,8 @@ SUBROUTINE gcc_spin( length, rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out
|
|||
#endif
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data copyin(rho_in, grho_in), copyout(sc_out, v1c_out, v2c_out), copy(zeta_io)
|
||||
!$acc data deviceptr( rho_in(length), zeta_io(length), grho_in(length), &
|
||||
!$acc& sc_out(length), v1c_out(length,2), v2c_out(length) )
|
||||
!$acc parallel loop
|
||||
#else
|
||||
!$omp parallel if(ntids==1) default(none) &
|
||||
|
@ -941,6 +965,10 @@ SUBROUTINE gcc_spin( length, rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out
|
|||
!
|
||||
CALL pbec_spin( rho, zeta, grho, 2, sc, v1c_up, v1c_dw, v2c )
|
||||
!
|
||||
CASE( 14 )
|
||||
!
|
||||
CALL beeflocalcorrspin( rho, zeta, grho, sc, v1c_up, v1c_dw, v2c, 0 )
|
||||
!
|
||||
CASE DEFAULT
|
||||
!
|
||||
sc = 0.0_DP
|
||||
|
@ -1015,13 +1043,9 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
|
|||
ntids = omp_get_num_threads()
|
||||
#endif
|
||||
!
|
||||
sc = 0.0_DP
|
||||
v1c = 0.0_DP
|
||||
v2c = 0.0_DP
|
||||
v2c_ud = 0.0_DP
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data copyin(rho_in, grho_in, grho_ud_in), copyout(sc, v1c, v2c, v2c_ud)
|
||||
!$acc data deviceptr( rho_in(length,2), grho_in(length,2), grho_ud_in(length), &
|
||||
!$acc& sc(length), v1c(length,2), v2c(length,2), v2c_ud(length) )
|
||||
!$acc parallel loop
|
||||
#else
|
||||
!$omp parallel if(ntids==1) default(none) &
|
||||
|
@ -1080,7 +1104,7 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
|
|||
!
|
||||
CASE DEFAULT
|
||||
!
|
||||
!CALL xclib_error(" gcc_spin_more "," gradient correction not implemented ",1) !***acc test
|
||||
!CALL xclib_error(" gcc_spin_more "," gradient correction not implemented ",1)
|
||||
!
|
||||
END SELECT
|
||||
!
|
||||
|
@ -1091,220 +1115,10 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
|
|||
!$omp end do
|
||||
!$omp end parallel
|
||||
#endif
|
||||
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE gcc_spin_more
|
||||
!
|
||||
!
|
||||
! ========> BEEF GGA DRIVERS <========================
|
||||
!
|
||||
!------------------------------------------------------------------------
|
||||
SUBROUTINE gcxc_beef( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
|
||||
v2x_out, v1c_out, v2c_out )
|
||||
!---------------------------------------------------------------------
|
||||
!! Driver for BEEF gga xc terms. Unpolarized case.
|
||||
!
|
||||
USE beef_interface, ONLY: beefx, beeflocalcorr
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
REAL(DP), INTENT(IN), DIMENSION(length) :: rho_in
|
||||
REAL(DP), INTENT(IN), DIMENSION(length) :: grho_in
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: sx_out, sc_out
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: v1x_out, v2x_out
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: v1c_out, v2c_out
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: ir
|
||||
REAL(DP) :: rho, grho
|
||||
REAL(DP) :: sx, v1x, v2x
|
||||
REAL(DP) :: sc, v1c, v2c
|
||||
!
|
||||
IF (igcx == 43 .OR. igcc == 14) THEN
|
||||
!
|
||||
DO ir = 1, length
|
||||
grho = grho_in(ir)
|
||||
IF ( rho_in(ir) <= rho_threshold_gga .OR. grho <= grho_threshold_gga ) THEN
|
||||
sx_out(ir) = 0.0_DP ; sc_out(ir) = 0.0_DP
|
||||
v1x_out(ir) = 0.0_DP ; v1c_out(ir) = 0.0_DP
|
||||
v2x_out(ir) = 0.0_DP ; v2c_out(ir) = 0.0_DP
|
||||
CYCLE
|
||||
ENDIF
|
||||
!
|
||||
rho = ABS(rho_in(ir))
|
||||
!
|
||||
IF ( igcx == 43 ) THEN
|
||||
! last parameter = 0 means do not add LDA (=Slater) exchange
|
||||
! (espresso) will add it itself
|
||||
CALL beefx( rho, grho, sx, v1x, v2x, 0 )
|
||||
sx_out(ir) = sx
|
||||
v1x_out(ir) = v1x
|
||||
v2x_out(ir) = v2x
|
||||
ENDIF
|
||||
!
|
||||
IF ( igcc == 14 ) THEN
|
||||
! last parameter 0 means: do not add lda contributions
|
||||
! espresso will do that itself
|
||||
CALL beeflocalcorr( rho, grho, sc, v1c, v2c, 0 )
|
||||
sc_out(ir) = sc
|
||||
v1c_out(ir) = v1c
|
||||
v2c_out(ir) = v2c
|
||||
ENDIF
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE gcxc_beef
|
||||
!
|
||||
!-----------------------------------------------------------------------------
|
||||
SUBROUTINE gcx_spin_beef( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
|
||||
!--------------------------------------------------------------------------
|
||||
!! Driver for BEEF gga exchange term. Polarized case.
|
||||
!
|
||||
USE beef_interface, ONLY: beefx
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
REAL(DP), INTENT(IN), DIMENSION(length,2) :: rho_in
|
||||
REAL(DP), INTENT(IN), DIMENSION(length,2) :: grho2_in
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: sx_tot
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1x_out
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v2x_out
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: ir
|
||||
REAL(DP) :: rho_up, rho_dw, grho2_up, grho2_dw
|
||||
REAL(DP) :: v1x_up, v1x_dw, v2x_up, v2x_dw
|
||||
REAL(DP) :: sx_up, sx_dw, rnull_up, rnull_dw
|
||||
!
|
||||
REAL(DP), PARAMETER :: small=1.D-10
|
||||
REAL(DP), PARAMETER :: rho_trash=0.5_DP, grho2_trash=0.2_DP
|
||||
|
||||
! ... Temporary workaround for BEEF, which does not support GPU.
|
||||
IF ( igcx == 43 ) THEN
|
||||
!
|
||||
DO ir = 1, length
|
||||
!
|
||||
rho_up = rho_in(ir,1)
|
||||
rho_dw = rho_in(ir,2)
|
||||
grho2_up = grho2_in(ir,1)
|
||||
grho2_dw = grho2_in(ir,2)
|
||||
rnull_up = 1.0_DP
|
||||
rnull_dw = 1.0_DP
|
||||
!
|
||||
IF ( rho_up+rho_dw <= small ) THEN
|
||||
sx_tot(ir) = 0.0_DP
|
||||
v1x_out(ir,1) = 0.0_DP
|
||||
v2x_out(ir,1) = 0.0_DP
|
||||
v1x_out(ir,2) = 0.0_DP
|
||||
v2x_out(ir,2) = 0.0_DP
|
||||
CYCLE
|
||||
ELSE
|
||||
IF ( rho_up<=small .OR. SQRT(ABS(grho2_up))<=small ) THEN
|
||||
rho_up = rho_trash
|
||||
grho2_up = grho2_trash
|
||||
rnull_up = 0.0_DP
|
||||
ENDIF
|
||||
IF ( rho_dw<=small .OR. SQRT(ABS(grho2_dw))<=small ) THEN
|
||||
rho_dw = rho_trash
|
||||
grho2_dw = grho2_trash
|
||||
rnull_dw = 0.0_DP
|
||||
ENDIF
|
||||
ENDIF
|
||||
!
|
||||
rho_up = 2.0_DP * rho_up ; rho_dw = 2.0_DP * rho_dw
|
||||
grho2_up = 4.0_DP * grho2_up ; grho2_dw = 4.0_DP * grho2_dw
|
||||
!
|
||||
CALL beefx(rho_up, grho2_up, sx_up, v1x_up, v2x_up, 0)
|
||||
CALL beefx(rho_dw, grho2_dw, sx_dw, v1x_dw, v2x_dw, 0)
|
||||
!
|
||||
sx_tot(ir) = 0.5_DP * (sx_up*rnull_up + sx_dw*rnull_dw)
|
||||
v2x_up = 2.0_DP * v2x_up
|
||||
v2x_dw = 2.0_DP * v2x_dw
|
||||
!
|
||||
v1x_out(ir,1) = v1x_up * rnull_up
|
||||
v1x_out(ir,2) = v1x_dw * rnull_dw
|
||||
v2x_out(ir,1) = v2x_up * rnull_up
|
||||
v2x_out(ir,2) = v2x_dw * rnull_dw
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE gcx_spin_beef
|
||||
!
|
||||
!
|
||||
!--------------------------------------------------------------------------------
|
||||
SUBROUTINE gcc_spin_beef( length, rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out )
|
||||
!-------------------------------------------------------------------------------
|
||||
!! BEEF Gradient correction.
|
||||
!
|
||||
USE beef_interface, ONLY: beeflocalcorrspin
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
REAL(DP), INTENT(IN), DIMENSION(length) :: rho_in
|
||||
REAL(DP), INTENT(INOUT), DIMENSION(length) :: zeta_io
|
||||
REAL(DP), INTENT(IN), DIMENSION(length) :: grho_in
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: sc_out
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1c_out
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: v2c_out
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: ir
|
||||
REAL(DP) :: rho, zeta, grho
|
||||
REAL(DP) :: sc, v1c_up, v1c_dw, v2c
|
||||
!
|
||||
IF ( igcc == 14 ) THEN
|
||||
!
|
||||
DO ir = 1, length
|
||||
!
|
||||
rho = rho_in(ir)
|
||||
grho = grho_in(ir)
|
||||
IF ( ABS(zeta_io(ir))<=1.0_DP ) zeta_io(ir) = SIGN( MIN(ABS(zeta_io(ir)), &
|
||||
(1.0_DP-rho_threshold_gga)), zeta_io(ir) )
|
||||
zeta = zeta_io(ir)
|
||||
!
|
||||
IF ( ABS(zeta)>1.0_DP .OR. rho<=rho_threshold_gga .OR. &
|
||||
SQRT(ABS(grho))<=rho_threshold_gga ) THEN
|
||||
sc_out(ir) = 0.0_DP
|
||||
v1c_out(ir,1) = 0.0_DP ; v2c_out(ir) = 0.0_DP
|
||||
v1c_out(ir,2) = 0.0_DP
|
||||
CYCLE
|
||||
ENDIF
|
||||
rho = rho_in(ir)
|
||||
grho = grho_in(ir)
|
||||
zeta = zeta_io(ir)
|
||||
!
|
||||
IF ( ABS(zeta)>1.0_DP .OR. rho<=rho_threshold_gga .OR. &
|
||||
SQRT(ABS(grho))<=rho_threshold_gga ) CYCLE
|
||||
!
|
||||
CALL beeflocalcorrspin( rho, zeta, grho, sc, v1c_up, v1c_dw, v2c, 0 )
|
||||
!
|
||||
sc_out(ir) = sc
|
||||
v1c_out(ir,1) = v1c_up
|
||||
v1c_out(ir,2) = v1c_dw
|
||||
v2c_out(ir) = v2c
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE gcc_spin_beef
|
||||
!
|
||||
!
|
||||
END MODULE qe_drivers_gga
|
||||
|
|
|
@ -90,10 +90,9 @@ SUBROUTINE xc_lda( length, rho_in, ex_out, ec_out, vx_out, vc_out )
|
|||
!
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data copyin(rho_in), copyout(ex_out, vx_out, ec_out, vc_out)
|
||||
!$acc data deviceptr( rho_in(length),ex_out(length),vx_out(length),ec_out(length),vc_out(length) )
|
||||
!$acc parallel loop
|
||||
#endif
|
||||
#if defined(_OPENMP) && !defined(_OPENACC)
|
||||
#else
|
||||
!$omp parallel if(ntids==1) default(none) &
|
||||
!$omp private( rho, rs, ex, ec, ec_, vx, vc, vc_ ) &
|
||||
!$omp shared( rho_in, length, iexch, icorr, ex_out, ec_out, vx_out, vc_out, &
|
||||
|
@ -257,8 +256,7 @@ SUBROUTINE xc_lda( length, rho_in, ex_out, ec_out, vx_out, vc_out )
|
|||
ENDDO
|
||||
#if defined(_OPENACC)
|
||||
!$acc end data
|
||||
#endif
|
||||
#if defined(_OPENMP) && !defined(_OPENACC)
|
||||
#else
|
||||
!$omp end do
|
||||
!$omp end parallel
|
||||
#endif
|
||||
|
@ -317,10 +315,9 @@ SUBROUTINE xc_lsda( length, rho_in, zeta_in, ex_out, ec_out, vx_out, vc_out )
|
|||
#endif
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data copyin(rho_in, zeta_in), copyout(ex_out, vx_out, ec_out, vc_out)
|
||||
!$acc data deviceptr( rho_in(length), zeta_in(length) ,ex_out(length),vx_out(length,2),ec_out(length),vc_out(length,2) )
|
||||
!$acc parallel loop
|
||||
#endif
|
||||
#if defined(_OPENMP) && !defined(_OPENACC)
|
||||
#else
|
||||
!$omp parallel if(ntids==1) default(none) &
|
||||
!$omp private( rho, rs, zeta, ex, ec, ec_, vx_up, vx_dw, vc_up, &
|
||||
!$omp vc_dw, vc_up_, vc_dw_ ) &
|
||||
|
@ -477,14 +474,12 @@ SUBROUTINE xc_lsda( length, rho_in, zeta_in, ex_out, ec_out, vx_out, vc_out )
|
|||
ec_out(ir) = ec ; vc_out(ir,1) = vc_up ; vc_out(ir,2) = vc_dw
|
||||
!
|
||||
ENDDO
|
||||
#if defined(_OPENMP) && !defined(_OPENACC)
|
||||
#if defined(_OPENACC)
|
||||
!$acc end data
|
||||
#else
|
||||
!$omp end do
|
||||
!$omp end parallel
|
||||
#endif
|
||||
#if defined(_OPENACC)
|
||||
!$acc end data
|
||||
#endif
|
||||
!
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
|
|
@ -17,7 +17,6 @@ MODULE qe_drivers_mgga
|
|||
USE kind_l, ONLY: DP
|
||||
USE dft_setting_params, ONLY: imeta, imetac, rho_threshold_mgga, &
|
||||
grho2_threshold_mgga, tau_threshold_mgga
|
||||
USE metagga
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -37,31 +36,33 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c
|
|||
!! Available cases: M06L and TPSS. Other mGGA functionals can be used
|
||||
!! through Libxc.
|
||||
!
|
||||
USE metagga
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
!! Number of k-points
|
||||
REAL(DP), DIMENSION(length) :: rho
|
||||
REAL(DP), INTENT(IN) :: rho(length)
|
||||
!! Charge density
|
||||
REAL(DP), DIMENSION(length) :: grho2
|
||||
REAL(DP), INTENT(IN) :: grho2(length)
|
||||
!! Square modulus of the density gradient
|
||||
REAL(DP), DIMENSION(length) :: tau
|
||||
REAL(DP), INTENT(IN) :: tau(length)
|
||||
!! Laplacian of the density
|
||||
REAL(DP), DIMENSION(length) :: ex
|
||||
REAL(DP), INTENT(OUT) :: ex(length)
|
||||
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
|
||||
REAL(DP), DIMENSION(length) :: ec
|
||||
REAL(DP), INTENT(OUT) :: ec(length)
|
||||
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
|
||||
REAL(DP), DIMENSION(length) :: v1x
|
||||
REAL(DP), INTENT(OUT) :: v1x(length)
|
||||
!! \( D\ E_x\ /\ D\ \text{rho} \)
|
||||
REAL(DP), DIMENSION(length) :: v2x
|
||||
REAL(DP), INTENT(OUT) :: v2x(length)
|
||||
!! \( D\ E_x\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
|
||||
REAL(DP), DIMENSION(length) :: v3x
|
||||
REAL(DP), INTENT(OUT) :: v3x(length)
|
||||
!! \( D\ E_x\ /\ D\ \text{tau} \)
|
||||
REAL(DP), DIMENSION(length) :: v1c
|
||||
REAL(DP), INTENT(OUT) :: v1c(length)
|
||||
!! \( D\ E_c\ /\ D\ \text{rho} \)
|
||||
REAL(DP), DIMENSION(length) :: v2c
|
||||
REAL(DP), INTENT(OUT) :: v2c(1,length,1)
|
||||
!! \( D\ E_c\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
|
||||
REAL(DP), DIMENSION(length) :: v3c
|
||||
REAL(DP), INTENT(OUT) :: v3c(length)
|
||||
!! \( D\ E_c\ /\ D\ \text{tau} \)
|
||||
!
|
||||
! ... local variables
|
||||
|
@ -69,15 +70,23 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c
|
|||
INTEGER :: k
|
||||
REAL(DP) :: arho
|
||||
!
|
||||
v1x=0.d0 ; v2x=0.d0 ; v3x=0.d0 ; ex=0.d0
|
||||
v1c=0.d0 ; v2c=0.d0 ; v3c=0.d0 ; ec=0.d0
|
||||
#if defined(_OPENACC)
|
||||
!$acc data deviceptr( rho(length), grho2(length), tau(length), ex(length), ec(length), &
|
||||
!$acc& v1x(length), v2x(length), v3x(length), v1c(length), v2c(1,length,1), &
|
||||
!$acc& v3c(length) )
|
||||
!
|
||||
!$acc parallel loop
|
||||
#endif
|
||||
DO k = 1, length
|
||||
!
|
||||
arho = ABS(rho(k))
|
||||
!
|
||||
IF ( (arho<=rho_threshold_mgga).OR.(grho2(k)<=grho2_threshold_mgga).OR. &
|
||||
(ABS(tau(k))<=rho_threshold_mgga) ) CYCLE
|
||||
(ABS(tau(k))<=rho_threshold_mgga) ) THEN
|
||||
v1x(k)=0.d0 ; v2x(k)=0.d0 ; v3x(k)=0.d0 ; ex(k)=0.d0
|
||||
v1c(k)=0.d0 ; v2c(1,k,1)=0.d0 ; v3c(k)=0.d0 ; ec(k)=0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
!
|
||||
! ...libxc-like threshold management
|
||||
!grho2(k) = MIN( grho2(k), (8.d0*rho(k)*tau(k))**2 )
|
||||
|
@ -86,21 +95,25 @@ SUBROUTINE tau_xc( length, rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c
|
|||
CASE( 1 )
|
||||
!
|
||||
CALL tpsscxc( arho, grho2(k), tau(k), ex(k), ec(k), v1x(k), v2x(k), &
|
||||
v3x(k), v1c(k), v2c(k), v3c(k) )
|
||||
v3x(k), v1c(k), v2c(1,k,1), v3c(k) )
|
||||
!
|
||||
CASE( 2 )
|
||||
!
|
||||
CALL m06lxc( arho, grho2(k), tau(k), ex(k), ec(k), v1x(k), v2x(k), &
|
||||
v3x(k), v1c(k), v2c(k), v3c(k) )
|
||||
v3x(k), v1c(k), v2c(1,k,1), v3c(k) )
|
||||
!
|
||||
CASE DEFAULT
|
||||
!
|
||||
CALL xclib_error( 'tau_xc', 'This case is not implemented', imeta )
|
||||
!CALL xclib_error( 'tau_xc', 'This case is not implemented', imeta )
|
||||
!
|
||||
END SELECT
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc end data
|
||||
#endif
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE tau_xc
|
||||
|
@ -114,65 +127,74 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c,
|
|||
!! Available cases: M06L and TPSS. Other mGGA functionals can be used
|
||||
!! through Libxc.
|
||||
!
|
||||
USE metagga
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
!! Number of k-points
|
||||
REAL(DP), INTENT(IN), DIMENSION(length,2) :: rho
|
||||
REAL(DP), INTENT(IN) :: rho(length,2)
|
||||
!! Charge density
|
||||
REAL(DP), INTENT(IN), DIMENSION(3,length,2) :: grho
|
||||
REAL(DP), INTENT(IN) :: grho(3,length,2)
|
||||
!! The density gradient
|
||||
REAL(DP), INTENT(IN), DIMENSION(length,2) :: tau
|
||||
REAL(DP), INTENT(IN) :: tau(length,2)
|
||||
!! Laplacian of the density
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: ex
|
||||
REAL(DP), INTENT(OUT) :: ex(length)
|
||||
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: ec
|
||||
REAL(DP), INTENT(OUT) :: ec(length)
|
||||
!! \(E_x = \int e_x(\text{rho},\text{grho}) dr \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1x
|
||||
REAL(DP), INTENT(OUT) :: v1x(length,2)
|
||||
!! \( D\ E_x\ /\ D\ \text{rho} \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v2x
|
||||
REAL(DP), INTENT(OUT) :: v2x(length,2)
|
||||
!! \( D\ E_x\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v3x
|
||||
REAL(DP), INTENT(OUT) :: v3x(length,2)
|
||||
!! \( D\ E_x\ /\ D\ \text{tau} \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v1c
|
||||
REAL(DP), INTENT(OUT) :: v1c(length,2)
|
||||
!! \( D\ E_c\ /\ D\ \text{rho} \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(3,length,2) :: v2c
|
||||
REAL(DP), INTENT(OUT) :: v2c(3,length,2)
|
||||
!! \( D\ E_c\ /\ D( D\ \text{rho}/D\ r_\alpha )/|\nabla\text{rho}| \)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v3c
|
||||
REAL(DP), INTENT(OUT) :: v3c(length,2)
|
||||
!! \( D\ E_c\ /\ D\ \text{tau} \)
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: k
|
||||
REAL(DP) :: rh, zeta, atau, grho2(2), ggrho2
|
||||
REAL(DP) :: rh, zeta, atau, grho2up, grho2dw, ggrho2
|
||||
REAL(DP) :: v2cup, v2cdw
|
||||
!
|
||||
ex=0.0_DP ; v1x=0.0_DP ; v2x=0.0_DP ; v3x=0.0_DP
|
||||
ec=0.0_DP ; v1c=0.0_DP ; v2c=0.0_DP ; v3c=0.0_DP
|
||||
#if defined(_OPENACC)
|
||||
!$acc data deviceptr( rho(length,2), grho(3,length,2), tau(length,2), ex(length), &
|
||||
!$acc& ec(length), v1x(length,2), v2x(length,2), v3x(length,2), &
|
||||
!$acc& v1c(length,2), v2c(3,length,2), v3c(length,2) )
|
||||
!
|
||||
!$acc parallel loop
|
||||
#endif
|
||||
DO k = 1, length
|
||||
!
|
||||
rh = rho(k,1) + rho(k,2)
|
||||
atau = tau(k,1) + tau(k,2) ! KE-density in Hartree
|
||||
! ...libxc-like threshold management
|
||||
!grho2(1) = MIN( SUM(grho(:,k,1)**2), (8.d0*rho(k,1)*tau(k,1))**2 )
|
||||
!grho2(2) = MIN( SUM(grho(:,k,2)**2), (8.d0*rho(k,2)*tau(k,2))**2 )
|
||||
grho2(1) = SUM(grho(:,k,1)**2)
|
||||
grho2(2) = SUM(grho(:,k,2)**2)
|
||||
ggrho2 = ( grho2(1) + grho2(2) ) * 4.0_DP
|
||||
!grho2up = MIN( SUM(grho(:,k,1)**2), (8.d0*rho(k,1)*tau(k,1))**2 )
|
||||
!grho2dw = MIN( SUM(grho(:,k,2)**2), (8.d0*rho(k,2)*tau(k,2))**2 )
|
||||
grho2up = SUM(grho(:,k,1)**2)
|
||||
grho2dw = SUM(grho(:,k,2)**2)
|
||||
ggrho2 = ( grho2up + grho2dw ) * 4.0_DP
|
||||
!
|
||||
IF ( (rh <= rho_threshold_mgga).OR.(ggrho2 <= grho2_threshold_mgga).OR.&
|
||||
(ABS(atau) <= tau_threshold_mgga) ) CYCLE
|
||||
(ABS(atau) <= tau_threshold_mgga) ) THEN
|
||||
v1x(k,:)=0.d0 ; v2x(k,:)=0.d0 ; v3x(k,:)=0.d0 ; ex(k)=0.d0
|
||||
v1c(k,:)=0.d0 ; v2c(:,k,:)=0.d0 ; v3c(k,:)=0.d0 ; ec(k)=0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
!
|
||||
SELECT CASE( imeta )
|
||||
CASE( 1 )
|
||||
!
|
||||
CALL tpsscx_spin( rho(k,1), rho(k,2), grho2(1), grho2(2), tau(k,1), &
|
||||
CALL tpsscx_spin( rho(k,1), rho(k,2), grho2up, grho2dw, tau(k,1), &
|
||||
tau(k,2), ex(k), v1x(k,1), v1x(k,2), v2x(k,1), &
|
||||
v2x(k,2), v3x(k,1), v3x(k,2) )
|
||||
!
|
||||
zeta = (rho(k,1) - rho(k,2)) / rh
|
||||
zeta = MAX( MIN( 0.99999999_DP, zeta ), -0.99999999_DP )
|
||||
zeta = MAX( MIN( 0.99999999_DP, (rho(k,1)-rho(k,2))/rh ), -0.99999999_DP )
|
||||
!
|
||||
CALL tpsscc_spin( rh, zeta, grho(:,k,1), grho(:,k,2), atau, ec(k), &
|
||||
v1c(k,1), v1c(k,2), v2c(:,k,1), v2c(:,k,2), &
|
||||
|
@ -180,7 +202,7 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c,
|
|||
!
|
||||
CASE( 2 )
|
||||
!
|
||||
CALL m06lxc_spin( rho(k,1), rho(k,2), grho2(1), grho2(2), tau(k,1), &
|
||||
CALL m06lxc_spin( rho(k,1), rho(k,2), grho2up, grho2dw, tau(k,1), &
|
||||
tau(k,2), ex(k), ec(k), v1x(k,1), v1x(k,2), &
|
||||
v2x(k,1), v2x(k,2), v3x(k,1), v3x(k,2), v1c(k,1), &
|
||||
v1c(k,2), v2cup, v2cdw, v3c(k,1), v3c(k,2) )
|
||||
|
@ -190,12 +212,16 @@ SUBROUTINE tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c,
|
|||
!
|
||||
CASE DEFAULT
|
||||
!
|
||||
CALL xclib_error( 'tau_xc_spin', 'This case not implemented', imeta )
|
||||
!CALL xclib_error( 'tau_xc_spin', 'This case not implemented', imeta )
|
||||
!
|
||||
END SELECT
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc end data
|
||||
#endif
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE tau_xc_spin
|
||||
|
|
|
@ -6,22 +6,23 @@
|
|||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
!------------------------------------------------------------------------
|
||||
MODULE metagga !<GPU:metagga=>metagga_gpu>
|
||||
MODULE metagga
|
||||
!-------------------------------------------------------------------------
|
||||
!! MetaGGA functionals. Available functionals:
|
||||
!
|
||||
!! * TPSS (Tao, Perdew, Staroverov & Scuseria)
|
||||
!! * M06L
|
||||
!
|
||||
USE exch_lda, ONLY: slater !<GPU:slater=>slater_d,exch_lda=>exch_lda_gpu>
|
||||
USE corr_lda, ONLY: pw, pw_spin !<GPU:pw=>pw_d,pw_spin=>pw_spin_d,corr_lda=>corr_lda_gpu>
|
||||
USE corr_gga, ONLY: pbec, pbec_spin !<GPU:pbec=>pbec_d,pbec_spin=>pbec_spin_d,corr_gga=>corr_gga_gpu>
|
||||
USE exch_lda, ONLY: slater
|
||||
USE corr_lda, ONLY: pw, pw_spin
|
||||
USE corr_gga, ONLY: pbec, pbec_spin
|
||||
!
|
||||
CONTAINS
|
||||
! TPSS
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c ) !<GPU:DEVICE>
|
||||
SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc routine seq
|
||||
!-----------------------------------------------------------------------
|
||||
!! TPSS metaGGA corrections for exchange and correlation - Hartree a.u.
|
||||
!! Definition: \(E_x = \int E_x(\text{rho},\text{grho}) dr\)
|
||||
|
@ -70,9 +71,9 @@ SUBROUTINE tpsscxc( rho, grho, tau, sx, sc, v1x, v2x, v3x, v1c, v2c, v3c )
|
|||
ENDIF
|
||||
!
|
||||
! exchange
|
||||
CALL metax( rho, grho, tau, sx, v1x, v2x, v3x ) !<GPU:metax=>metax_d>
|
||||
CALL metax( rho, grho, tau, sx, v1x, v2x, v3x )
|
||||
! correlation
|
||||
CALL metac( rho, grho, tau, sc, v1c, v2c, v3c ) !<GPU:metac=>metac_d>
|
||||
CALL metac( rho, grho, tau, sc, v1c, v2c, v3c )
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
@ -80,7 +81,8 @@ END SUBROUTINE tpsscxc
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
|
||||
SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x )
|
||||
!$acc routine seq
|
||||
!--------------------------------------------------------------------
|
||||
!! TPSS meta-GGA exchange potential and energy.
|
||||
!
|
||||
|
@ -133,8 +135,8 @@ SUBROUTINE metax( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:
|
|||
ENDIF
|
||||
!
|
||||
rs = pi34/rho**third
|
||||
CALL slater( rs, ex_unif, vx_unif ) !<GPU:slater=>slater_d>
|
||||
CALL metaFX( rho, grho2, tau, fx, f1x, f2x, f3x ) !<GPU:metaFX=>metaFX_d>
|
||||
CALL slater( rs, ex_unif, vx_unif )
|
||||
CALL metaFX( rho, grho2, tau, fx, f1x, f2x, f3x )
|
||||
!
|
||||
ex = rho*ex_unif
|
||||
v1x = vx_unif*fx + ex*f1x
|
||||
|
@ -148,7 +150,8 @@ END SUBROUTINE metax
|
|||
!
|
||||
!
|
||||
!------------------------------------------------------------------
|
||||
SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) !<GPU:DEVICE>
|
||||
SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c )
|
||||
!$acc routine seq
|
||||
!--------------------------------------------------------------
|
||||
!! TPSS meta-GGA correlation energy and potentials.
|
||||
!
|
||||
|
@ -208,11 +211,11 @@ SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) !<GPU:
|
|||
IF (rhoup > small) THEN
|
||||
!
|
||||
rs = (pi34/rhoup)**third
|
||||
CALL pw_spin( rs, 1.0_DP, ec_unif, vc_unif_s(1), vc_unif_s(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, 1.0_DP, ec_unif, vc_unif_s(1), vc_unif_s(2) )
|
||||
!
|
||||
IF (ABS(grhoup) > small) THEN
|
||||
! zeta=1.0_DP-small to avoid pow_e of 0 in pbec_spin
|
||||
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup**2, 1, & !<GPU:pbec_spin=>pbec_spin_d>
|
||||
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup**2, 1, &
|
||||
ec_sum, v1c_sum(1), v1c_sum(2), v2c_sum )
|
||||
ELSE
|
||||
ec_sum = 0.0_DP
|
||||
|
@ -229,13 +232,13 @@ SUBROUTINE metac( rho, grho2, tau, ec, v1c, v2c, v3c ) !<GPU:
|
|||
ENDIF
|
||||
!
|
||||
rs = (pi34/rho)**third
|
||||
CALL pw( rs, 1, ec_unif, vc_unif ) !<GPU:pw=>pw_d>
|
||||
CALL pw( rs, 1, ec_unif, vc_unif )
|
||||
!
|
||||
! ... PBE correlation energy and potential:
|
||||
! ec_pbe=rho*H, not rho*(epsion_c_uinf + H)
|
||||
! v1c_pbe=D (rho*H) /D rho
|
||||
! v2c_pbe= for rho, 2 for
|
||||
CALL pbec( rho, grho2, 1, ec_pbe, v1c_pbe, v2c_pbe ) !<GPU:pbec=>pbec_d>
|
||||
CALL pbec( rho, grho2, 1, ec_pbe, v1c_pbe, v2c_pbe )
|
||||
!
|
||||
ec_pbe = ec_pbe/rho+ec_unif
|
||||
v1c_pbe = (v1c_pbe+vc_unif-ec_pbe)/rho
|
||||
|
@ -275,7 +278,8 @@ END SUBROUTINE metac
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE metaFX( rho, grho2, tau, fx, f1x, f2x, f3x ) !<GPU:DEVICE>
|
||||
SUBROUTINE metaFX( rho, grho2, tau, fx, f1x, f2x, f3x )
|
||||
!$acc routine seq
|
||||
!-------------------------------------------------------------------------
|
||||
!
|
||||
USE kind_l, ONLY : DP
|
||||
|
@ -397,8 +401,9 @@ END SUBROUTINE metaFX
|
|||
!
|
||||
!
|
||||
!---------------------------------------------------------------------------
|
||||
SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, & !<GPU:DEVICE>
|
||||
SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, &
|
||||
v1xup, v1xdw, v2xup, v2xdw, v3xup, v3xdw )
|
||||
!$acc routine seq
|
||||
!-----------------------------------------------------------------------
|
||||
!! TPSS metaGGA for exchange - Hartree a.u.
|
||||
!
|
||||
|
@ -442,7 +447,7 @@ SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, &
|
|||
rho = rhoup + rhodw
|
||||
IF (rhoup>small .AND. SQRT(ABS(grhoup2))>small &
|
||||
.AND. ABS(tauup) > small) THEN
|
||||
CALL metax( 2.0_DP*rhoup, 4.0_DP*grhoup2, & !<GPU:metax=>metax_d>
|
||||
CALL metax( 2.0_DP*rhoup, 4.0_DP*grhoup2, &
|
||||
2.0_DP*tauup, sxup, v1xup, v2xup, v3xup )
|
||||
ELSE
|
||||
sxup = 0.0_DP
|
||||
|
@ -453,7 +458,7 @@ SUBROUTINE tpsscx_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, sx, &
|
|||
!
|
||||
IF (rhodw > small .AND. SQRT(ABS(grhodw2)) > small &
|
||||
.AND. ABS(taudw) > small) THEN
|
||||
CALL metax( 2.0_DP*rhodw, 4.0_DP*grhodw2, & !<GPU:metax=>metax_d>
|
||||
CALL metax( 2.0_DP*rhodw, 4.0_DP*grhodw2, &
|
||||
2.0_DP*taudw, sxdw, v1xdw, v2xdw, v3xdw )
|
||||
ELSE
|
||||
sxdw = 0.0_DP
|
||||
|
@ -472,8 +477,9 @@ END SUBROUTINE tpsscx_spin
|
|||
!
|
||||
!
|
||||
!---------------------------------------------------------------------------
|
||||
SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
||||
SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, &
|
||||
atau, sc, v1cup, v1cdw, v2cup, v2cdw, v3cup, v3cdw )
|
||||
!$acc routine seq
|
||||
!--------------------------------------------------------------------------
|
||||
!! TPSS metaGGA for correlations - Hartree a.u.
|
||||
!
|
||||
|
@ -536,7 +542,7 @@ SUBROUTINE tpsscc_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
|||
!
|
||||
v3c = 0.0_DP
|
||||
ELSE
|
||||
CALL metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:metac_spin=>metac_spin_d>
|
||||
CALL metac_spin( rho, zeta, grhoup, grhodw, &
|
||||
atau, sc, v1cup, v1cdw, v2cup, v2cdw, v3c )
|
||||
ENDIF
|
||||
!
|
||||
|
@ -549,8 +555,9 @@ END SUBROUTINE tpsscc_spin
|
|||
!
|
||||
!
|
||||
!---------------------------------------------------------------
|
||||
SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
||||
SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, &
|
||||
tau, sc, v1up, v1dw, v2up, v2dw, v3 )
|
||||
!$acc routine seq
|
||||
!---------------------------------------------------------------
|
||||
USE kind_l, ONLY : DP
|
||||
!
|
||||
|
@ -634,10 +641,10 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
|||
!
|
||||
v2_tmp = 0.0_DP
|
||||
rs = (pi34/rho)**third
|
||||
CALL pw_spin( rs, zeta, ec_u, vc_u(1), vc_u(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, zeta, ec_u, vc_u(1), vc_u(2) )
|
||||
!
|
||||
IF ((ABS(grho) > small) .AND. (zeta <= 1.0_DP)) THEN
|
||||
CALL pbec_spin( rho, zeta, grho2, 1, ec_pbe, v1_pbe(1), v1_pbe(2), v2_tmp ) !<GPU:pbec_spin=>pbec_spin_d>
|
||||
CALL pbec_spin( rho, zeta, grho2, 1, ec_pbe, v1_pbe(1), v1_pbe(2), v2_tmp )
|
||||
v1up_pbe=v1_pbe(1)
|
||||
v1dw_pbe=v1_pbe(2)
|
||||
ELSE
|
||||
|
@ -668,10 +675,10 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
|||
v2_tmp = 0.0_DP
|
||||
!
|
||||
rs = (pi34/rhoup)**third
|
||||
CALL pw_spin( rs, 1.0_DP, ec_u, vc_u(1), vc_u(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, 1.0_DP, ec_u, vc_u(1), vc_u(2) )
|
||||
!
|
||||
IF (SQRT(grhoup2) > small) THEN
|
||||
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup2, 1, & !<GPU:pbec_spin=>pbec_spin_d>
|
||||
CALL pbec_spin( rhoup, 1.0_DP-small, grhoup2, 1, &
|
||||
ecup_0, v1_0v(1), v1_0v(2), v2_tmp )
|
||||
v1up_0 = v1_0v(1)
|
||||
v1dw_0 = v1_0v(2)
|
||||
|
@ -709,11 +716,11 @@ SUBROUTINE metac_spin( rho, zeta, grhoup, grhodw, & !<GPU:DEVICE>
|
|||
v2_tmp = 0.0_DP
|
||||
!
|
||||
rs = (pi34/rhodw)**third
|
||||
CALL pw_spin( rs, -1.0_DP, ec_u, vc_u(1), vc_u(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, -1.0_DP, ec_u, vc_u(1), vc_u(2) )
|
||||
!
|
||||
IF (SQRT(grhodw2) > small) THEN
|
||||
!
|
||||
CALL pbec_spin( rhodw, -1.0_DP+small, grhodw2, 1, & !<GPU:pbec_spin=>pbec_spin_d>
|
||||
CALL pbec_spin( rhodw, -1.0_DP+small, grhodw2, 1, &
|
||||
ecdw_0, v1_0v(1), v1_0v(2), v2_tmp )
|
||||
!
|
||||
v1up_0 = v1_0v(1)
|
||||
|
@ -877,7 +884,8 @@ END SUBROUTINE metac_spin
|
|||
! ec, v1c, v2c, v3c as above for correlation
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c ) !<GPU:DEVICE>
|
||||
SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc routine seq
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
USE kind_l, ONLY : DP
|
||||
|
@ -923,14 +931,14 @@ SUBROUTINE m06lxc( rho, grho2, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
|||
taub = taua ! Tau is defined as summ_i( |nabla phi_i|**2 )
|
||||
! in the following M06L routines
|
||||
!
|
||||
CALL m06lx( rhoa, grho2a, taua, ex, v1x, v2x, v3x ) !<GPU:m06lx=>m06lx_d>
|
||||
CALL m06lx( rhoa, grho2a, taua, ex, v1x, v2x, v3x )
|
||||
!
|
||||
ex = two * ex ! Add the two components up + dw
|
||||
!
|
||||
v2x = 0.5_dp * v2x
|
||||
!v3x = 2.0_dp * v3x !**mismatch with Libxc by a factor of 2
|
||||
!
|
||||
CALL m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c, v2c, v3c, & !<GPU:m06lc=>m06lc_d>
|
||||
CALL m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c, v2c, v3c, &
|
||||
v1cb, v2cb, v3cb )
|
||||
!
|
||||
v2c = 0.5_dp * v2c
|
||||
|
@ -940,9 +948,10 @@ END SUBROUTINE m06lxc
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, & !<GPU:DEVICE>
|
||||
SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, &
|
||||
ex, ec, v1xup, v1xdw, v2xup, v2xdw, v3xup, v3xdw, &
|
||||
v1cup, v1cdw, v2cup, v2cdw, v3cup, v3cdw )
|
||||
!$acc routine seq
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
USE kind_l, ONLY : DP
|
||||
|
@ -959,14 +968,14 @@ SUBROUTINE m06lxc_spin( rhoup, rhodw, grhoup2, grhodw2, tauup, taudw, &
|
|||
taua = tauup * two ! Tau is defined as summ_i( |nabla phi_i|**2 )
|
||||
taub = taudw * two ! in the rest of the routine
|
||||
!
|
||||
CALL m06lx( rhoup, grhoup2, taua, exup, v1xup, v2xup, v3xup ) !<GPU:m06lx=>m06lx_d>
|
||||
CALL m06lx( rhodw, grhodw2, taub, exdw, v1xdw, v2xdw, v3xdw ) !<GPU:m06lx=>m06lx_d>
|
||||
CALL m06lx( rhoup, grhoup2, taua, exup, v1xup, v2xup, v3xup )
|
||||
CALL m06lx( rhodw, grhodw2, taub, exdw, v1xdw, v2xdw, v3xdw )
|
||||
!
|
||||
ex = exup + exdw
|
||||
!v3xup = 2.0_dp * v3xup !**mismatch with Libxc by a factor of 2
|
||||
!v3xdw = 2.0_dp * v3xdw !**
|
||||
!
|
||||
CALL m06lc( rhoup, rhodw, grhoup2, grhodw2, taua, taub, & !<GPU:m06lc=>m06lc_d>
|
||||
CALL m06lc( rhoup, rhodw, grhoup2, grhodw2, taua, taub, &
|
||||
ec, v1cup, v2cup, v3cup, v1cdw, v2cdw, v3cdw )
|
||||
!v3cup = 2.0_dp * v3cup !**
|
||||
!v3cdw = 2.0_dp * v3cdw !**
|
||||
|
@ -977,7 +986,8 @@ END SUBROUTINE m06lxc_spin
|
|||
!=============================== M06L exchange ==========================
|
||||
!
|
||||
!-------------------------------------------------------------------------------
|
||||
SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
|
||||
SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x )
|
||||
!$acc routine seq
|
||||
!---------------------------------------------------------------------------
|
||||
!! M06L exchange.
|
||||
!
|
||||
|
@ -1082,7 +1092,7 @@ SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
|
|||
gh = one + alpha * (xs2 + zs)
|
||||
!
|
||||
IF (gh >= small) THEN
|
||||
CALL gvt4( xs2, zs, d0, d1, d2, d3, d4, d5, alpha, hg, dhg_dxs2, dhg_dzs ) !<GPU:gvt4=>gvt4_d>
|
||||
CALL gvt4( xs2, zs, d0, d1, d2, d3, d4, d5, alpha, hg, dhg_dxs2, dhg_dzs )
|
||||
ELSE
|
||||
hg = zero
|
||||
dhg_dxs2 = zero
|
||||
|
@ -1121,7 +1131,7 @@ SUBROUTINE m06lx( rho, grho2, tau, ex, v1x, v2x, v3x ) !<GPU:DEVICE>
|
|||
dfws_drho = dfws*dws_dts*dts_drho
|
||||
dfws_dtau = dfws*dws_dts*dts_dtau
|
||||
!
|
||||
CALL pbex_m06l( two*rho, four*grho2, sx_pbe, v1x_pbe, v2x_pbe ) !<GPU:pbex_m06l=>pbex_m06l_d>
|
||||
CALL pbex_m06l( two*rho, four*grho2, sx_pbe, v1x_pbe, v2x_pbe )
|
||||
!
|
||||
v1x_unif = f43 * CX * rho13
|
||||
!
|
||||
|
@ -1143,7 +1153,8 @@ END SUBROUTINE m06lx
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------
|
||||
SUBROUTINE pbex_m06l( rho, grho2, sx, v1x, v2x ) !<GPU:DEVICE>
|
||||
SUBROUTINE pbex_m06l( rho, grho2, sx, v1x, v2x )
|
||||
!$acc routine seq
|
||||
!---------------------------------------------------------------
|
||||
!! PBE exchange (without Slater exchange):
|
||||
!! J.P.Perdew, K.Burke, M.Ernzerhof, PRL 77, 3865 (1996).
|
||||
|
@ -1211,8 +1222,9 @@ END SUBROUTINE pbex_m06l
|
|||
!=============================== M06L correlation ==========================
|
||||
!
|
||||
!---------------------------------------------------------------------------------
|
||||
SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, & !<GPU:DEVICE>
|
||||
SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
|
||||
v3c_up, v1c_dw, v2c_dw, v3c_dw )
|
||||
!$acc routine seq
|
||||
!-------------------------------------------------------------------------------
|
||||
!! M06L correlation.
|
||||
!
|
||||
|
@ -1366,12 +1378,12 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
|
|||
!
|
||||
rs = rsa
|
||||
zeta = one
|
||||
CALL pw_spin( rs, zeta, ec_pw_a, vc_v(1), vc_v(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, zeta, ec_pw_a, vc_v(1), vc_v(2) )
|
||||
vc_pw_a = vc_v(1)
|
||||
vv = vc_v(2)
|
||||
!
|
||||
CALL gvt4( xs2a, zsa, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hga, dhga_dxs2a, dhga_dzsa ) !<GPU:gvt4=>gvt4_d>
|
||||
CALL gfunc( cs, gama_s, xs2a, gsa, dgsa_dxs2a ) !<GPU:gfunc=>gfunc_d>
|
||||
CALL gvt4( xs2a, zsa, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hga, dhga_dxs2a, dhga_dzsa )
|
||||
CALL gfunc( cs, gama_s, xs2a, gsa, dgsa_dxs2a )
|
||||
!
|
||||
Ec_UEG_aa = rhoa*ec_pw_a
|
||||
num = (dgsa_dxs2a + dhga_dxs2a)*Dsa + (gsa + hga)*dDsa_dxs2a
|
||||
|
@ -1426,12 +1438,12 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
|
|||
!
|
||||
zeta = one
|
||||
rs = rsb
|
||||
CALL pw_spin( rs, zeta, ec_pw_b, vc_v(1), vc_v(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, zeta, ec_pw_b, vc_v(1), vc_v(2) )
|
||||
vc_pw_b = vc_v(1)
|
||||
vv = vc_v(2)
|
||||
!
|
||||
CALL gvt4( xs2b, zsb, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hgb, dhgb_dxs2b, dhgb_dzsb ) !<GPU:gvt4=>gvt4_d>
|
||||
CALL gfunc( cs, gama_s, xs2b, gsb, dgsb_dxs2b ) !<GPU:gfunc=>gfunc_d>
|
||||
CALL gvt4( xs2b, zsb, ds0, ds1, ds2, ds3, ds4, ds5, alpha_s, hgb, dhgb_dxs2b, dhgb_dzsb )
|
||||
CALL gfunc( cs, gama_s, xs2b, gsb, dgsb_dxs2b )
|
||||
!
|
||||
Ec_UEG_bb = rhob*ec_pw_b
|
||||
num = (dgsb_dxs2b + dhgb_dxs2b)*Dsb + (gsb + hgb)*dDsb_dxs2b
|
||||
|
@ -1468,13 +1480,13 @@ SUBROUTINE m06lc( rhoa, rhob, grho2a, grho2b, taua, taub, ec, v1c_up, v2c_up, &
|
|||
zeta = (rhoa - rhob)/rho
|
||||
rs = (pi34/rho)**f13
|
||||
!
|
||||
CALL gvt4( xs2ab, zsab, dab0, dab1, dab2, dab3, dab4, dab5, alpha_ab, hgab, & !<GPU:gvt4=>gvt4_d>
|
||||
CALL gvt4( xs2ab, zsab, dab0, dab1, dab2, dab3, dab4, dab5, alpha_ab, hgab, &
|
||||
dhgab_dxs2ab, dhgab_dzsab )
|
||||
!
|
||||
CALL pw_spin( rs, zeta, ec_pw_ab, vc_v(1), vc_v(2) ) !<GPU:pw_spin=>pw_spin_d>
|
||||
CALL pw_spin( rs, zeta, ec_pw_ab, vc_v(1), vc_v(2) )
|
||||
vc_pw_up=vc_v(1) ; vc_pw_dw=vc_v(2)
|
||||
!
|
||||
CALL gfunc( cab, gama_ab, xs2ab, gsab, dgsab_dxs2ab ) !<GPU:gfunc=>gfunc_d>
|
||||
CALL gfunc( cab, gama_ab, xs2ab, gsab, dgsab_dxs2ab )
|
||||
!
|
||||
decab_drhoa = vc_pw_up - vc_pw_a
|
||||
decab_drhob = vc_pw_dw - vc_pw_b
|
||||
|
@ -1515,7 +1527,8 @@ END SUBROUTINE m06lc
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------
|
||||
SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx ) !<GPU:DEVICE>
|
||||
SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx )
|
||||
!$acc routine seq
|
||||
!-----------------------------------------------------------------
|
||||
!
|
||||
USE kind_l, ONLY : DP
|
||||
|
@ -1545,7 +1558,8 @@ SUBROUTINE gfunc( cspin, gama, xspin, gs, dgs_dx ) !<GPU:DEVICE>
|
|||
!
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE gvt4( x, z, a, b, c, d, e, f, alpha, hg, dh_dx, dh_dz ) !<GPU:DEVICE>
|
||||
SUBROUTINE gvt4( x, z, a, b, c, d, e, f, alpha, hg, dh_dx, dh_dz )
|
||||
!$acc routine seq
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
USE kind_l, ONLY : DP
|
||||
|
|
|
@ -22,20 +22,21 @@ MODULE beef_interface
|
|||
INTERFACE
|
||||
!
|
||||
SUBROUTINE beefx( r, g, e, dr, dg, addlda ) BIND(C, NAME="beefx_")
|
||||
!$acc routine seq
|
||||
USE iso_c_binding
|
||||
REAL (C_DOUBLE) :: r, g, e, dr, dg
|
||||
INTEGER(C_INT), INTENT(IN) :: addlda
|
||||
END SUBROUTINE beefx
|
||||
!
|
||||
SUBROUTINE beeflocalcorr( r, g, e, dr, dg, addlda) &
|
||||
BIND(C, NAME="beeflocalcorr_")
|
||||
SUBROUTINE beeflocalcorr( r, g, e, dr, dg, addlda) BIND(C, NAME="beeflocalcorr_")
|
||||
!$acc routine seq
|
||||
USE iso_c_binding
|
||||
REAL (C_DOUBLE), INTENT(INOUT) :: r, g, e, dr, dg
|
||||
INTEGER(C_INT), INTENT(IN) :: addlda
|
||||
END SUBROUTINE beeflocalcorr
|
||||
!
|
||||
SUBROUTINE beeflocalcorrspin(r, z, g, e, drup, drdown, dg, addlda) &
|
||||
BIND(C, NAME="beeflocalcorrspin_")
|
||||
SUBROUTINE beeflocalcorrspin(r, z, g, e, drup, drdown, dg, addlda) BIND(C, NAME="beeflocalcorrspin_")
|
||||
!$acc routine seq
|
||||
USE iso_c_binding
|
||||
REAL (C_DOUBLE), INTENT(INOUT) :: r, z, g, e, drup, drdown, dg
|
||||
INTEGER(C_INT), INTENT(IN) :: addlda
|
||||
|
|
|
@ -14,16 +14,16 @@ MODULE xc_lib
|
|||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
SAVE
|
||||
!
|
||||
SAVE
|
||||
!
|
||||
PRIVATE
|
||||
!
|
||||
!
|
||||
!PUBLIC :: xc, dmxc !LDA
|
||||
PUBLIC :: xc !, dmxc !LDA
|
||||
!
|
||||
PUBLIC :: xc_gcx !, dgcxc !GGA
|
||||
!
|
||||
!PUBLIC :: xc_metagcx !MGGA
|
||||
PUBLIC :: xc_metagcx !MGGA
|
||||
!
|
||||
PUBLIC :: xclib_set_dft_from_name, &
|
||||
xclib_set_dft_IDs, &
|
||||
|
@ -56,17 +56,46 @@ MODULE xc_lib
|
|||
xclib_reset_dft, &
|
||||
dft_force_hybrid
|
||||
!
|
||||
INTERFACE xc
|
||||
SUBROUTINE xc( length, srd, svd, rho_in, ex_out, ec_out, vx_out, vc_out, gpu_args_ )
|
||||
USE kind_l, ONLY: DP
|
||||
IMPLICIT NONE
|
||||
INTEGER, INTENT(IN) :: length, srd, svd
|
||||
REAL(DP), INTENT(IN) :: rho_in(length,srd)
|
||||
REAL(DP), INTENT(OUT) :: ex_out(length), ec_out(length)
|
||||
REAL(DP), INTENT(OUT) :: vx_out(length,svd), vc_out(length,svd)
|
||||
LOGICAL, OPTIONAL, INTENT(IN) :: gpu_args_
|
||||
END SUBROUTINE
|
||||
END INTERFACE
|
||||
!
|
||||
!
|
||||
INTERFACE xc_gcx
|
||||
SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
USE kind_l, ONLY: DP
|
||||
SUBROUTINE xc_gcx( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud, &
|
||||
gpu_args_ )
|
||||
USE kind_l, ONLY: DP
|
||||
IMPLICIT NONE
|
||||
INTEGER, INTENT(IN) :: length, ns
|
||||
REAL(DP), INTENT(IN) :: rho(:,:), grho(:,:,:)
|
||||
REAL(DP), INTENT(OUT) :: ex(:), ec(:)
|
||||
REAL(DP), INTENT(OUT) :: v1x(:,:), v2x(:,:)
|
||||
REAL(DP), INTENT(OUT) :: v1c(:,:), v2c(:,:)
|
||||
REAL(DP), OPTIONAL, INTENT(OUT) :: v2c_ud(:)
|
||||
REAL(DP), INTENT(IN) :: rho(length,ns), grho(3,length,ns)
|
||||
REAL(DP), INTENT(OUT) :: ex(length), ec(length)
|
||||
REAL(DP), INTENT(OUT) :: v1x(length,ns), v2x(length,ns)
|
||||
REAL(DP), INTENT(OUT) :: v1c(length,ns), v2c(length,ns)
|
||||
REAL(DP), OPTIONAL, INTENT(OUT) :: v2c_ud(length)
|
||||
LOGICAL, OPTIONAL, INTENT(IN) :: gpu_args_
|
||||
END SUBROUTINE
|
||||
END INTERFACE
|
||||
!
|
||||
!
|
||||
INTERFACE xc_metagcx
|
||||
SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, &
|
||||
v1c, v2c, v3c, gpu_args_ )
|
||||
USE kind_l, ONLY: DP
|
||||
IMPLICIT NONE
|
||||
INTEGER, INTENT(IN) :: length, ns, np
|
||||
REAL(DP), INTENT(IN) :: rho(length,ns), grho(3,length,ns), tau(length,ns)
|
||||
REAL(DP), INTENT(OUT) :: ex(length), ec(length)
|
||||
REAL(DP), INTENT(OUT) :: v1x(length,ns), v2x(length,ns), v3x(length,ns)
|
||||
REAL(DP), INTENT(OUT) :: v1c(length,ns), v2c(np,length,ns), v3c(length,ns)
|
||||
LOGICAL, OPTIONAL, INTENT(IN) :: gpu_args_
|
||||
END SUBROUTINE
|
||||
END INTERFACE
|
||||
!
|
||||
|
|
|
@ -42,13 +42,13 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
! ... local variables
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: vrrx(:,:), vsrx(:,:), vssx(:,:)
|
||||
REAL(DP), ALLOCATABLE :: vrrc(:,:), vsrc(:,:), vssc(:), vrzc(:,:)
|
||||
REAL(DP), ALLOCATABLE :: vrrc(:,:), vsrc(:,:), vssc(:), vrzc(:,:)
|
||||
!
|
||||
#if defined(__LIBXC)
|
||||
INTEGER :: fkind=-10
|
||||
REAL(DP), ALLOCATABLE :: rho_lxc(:)
|
||||
REAL(DP), ALLOCATABLE :: v2rho2_x(:), v2rhosigma_x(:), v2sigma2_x(:)
|
||||
REAL(DP), ALLOCATABLE :: v2rho2_c(:), v2rhosigma_c(:), v2sigma2_c(:)
|
||||
REAL(DP), ALLOCATABLE :: v2rho2_x(:), v2rhosigma_x(:), v2sigma2_x(:)
|
||||
REAL(DP), ALLOCATABLE :: v2rho2_c(:), v2rhosigma_c(:), v2sigma2_c(:)
|
||||
#if (XC_MAJOR_VERSION > 4)
|
||||
INTEGER(8) :: lengthxc
|
||||
#else
|
||||
|
@ -83,7 +83,7 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
CASE( 1 )
|
||||
!
|
||||
DO k = 1, length
|
||||
rho_lxc(k) = r_in(k,1)
|
||||
rho_lxc(k) = r_in(k,1)
|
||||
sigma(k) = g_in(k,1,1)**2 + g_in(k,2,1)**2 + g_in(k,3,1)**2
|
||||
ENDDO
|
||||
!
|
||||
|
@ -130,7 +130,7 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
v2sigma2_c(length_dlxc*sp) )
|
||||
! ... DERIVATIVE FOR CORRELATION
|
||||
v2rho2_c = 0._DP ; v2rhosigma_c = 0._DP ; v2sigma2_c = 0._DP
|
||||
IF (igcc /= 0) THEN
|
||||
IF (igcc /= 0) THEN
|
||||
fkind = xc_f03_func_info_get_kind( xc_info(4) )
|
||||
CALL xc_f03_func_set_dens_threshold( xc_func(4), epsr )
|
||||
CALL xc_f03_gga_fxc( xc_func(4), lengthxc, rho_lxc(1), sigma(1), v2rho2_c(1), &
|
||||
|
@ -153,7 +153,7 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
IF (.NOT. ALLOCATED(sigma)) THEN
|
||||
ALLOCATE( sigma(length) )
|
||||
sigma(:) = g_in(:,1,1)**2 + g_in(:,2,1)**2 + g_in(:,3,1)**2
|
||||
ENDIF
|
||||
ENDIF
|
||||
!
|
||||
CALL dgcxc_unpol( length, r_in(:,1), sigma, vrrx(:,1), vsrx(:,1), vssx(:,1), &
|
||||
vrrc(:,1), vsrc(:,1), vssc )
|
||||
|
@ -176,12 +176,12 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
dvxc_rr(k,1,1) = e2 * (vrrx(k,1) + vrrc(k,1))
|
||||
dvxc_sr(k,1,1) = e2 * (vsrx(k,1) + vsrc(k,1))
|
||||
dvxc_ss(k,1,1) = e2 * (vssx(k,1) + vssc(k) )
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
DEALLOCATE( vrrx, vsrx, vssx )
|
||||
DEALLOCATE( vrrc, vsrc, vssc )
|
||||
!
|
||||
ENDIF
|
||||
ENDIF
|
||||
!
|
||||
IF ( is_libxc(3) ) THEN
|
||||
DO k = 1, length
|
||||
|
@ -189,7 +189,7 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
IF ( rho_lxc(k)>rho_threshold_lda ) THEN
|
||||
dvxc_rr(k,1,1) = dvxc_rr(k,1,1) + e2 * v2rho2_x(k)
|
||||
dvxc_sr(k,1,1) = dvxc_sr(k,1,1) + e2 * v2rhosigma_x(k)*2._DP
|
||||
ENDIF
|
||||
ENDIF
|
||||
dvxc_ss(k,1,1) = dvxc_ss(k,1,1) + e2 * v2sigma2_x(k)*4._DP
|
||||
ENDDO
|
||||
ENDIF
|
||||
|
@ -199,7 +199,7 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
IF ( rho_lxc(k)>rho_threshold_lda ) THEN
|
||||
dvxc_rr(k,1,1) = dvxc_rr(k,1,1) + e2 * v2rho2_c(k)
|
||||
dvxc_sr(k,1,1) = dvxc_sr(k,1,1) + e2 * v2rhosigma_c(k)*2._DP
|
||||
ENDIF
|
||||
ENDIF
|
||||
dvxc_ss(k,1,1) = dvxc_ss(k,1,1) + e2 * v2sigma2_c(k)*4._DP
|
||||
ENDDO
|
||||
ENDIF
|
||||
|
@ -230,23 +230,23 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
dvxc_ss(k,2,1) = e2*vssc(k)
|
||||
dvxc_ss(k,2,2) = e2*(vssx(k,2) + vssc(k))
|
||||
!
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
DEALLOCATE( vrrx, vsrx, vssx )
|
||||
DEALLOCATE( vrrc, vsrc, vssc )
|
||||
DEALLOCATE( vrzc )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
IF ( is_libxc(3) ) THEN
|
||||
!
|
||||
!
|
||||
IF ( is_libxc(3) ) THEN
|
||||
!
|
||||
DO k = 1, length
|
||||
thr_up_cond = r_in(k,1)>epsr .AND. SQRT(ABS(sigma(3*k-2)))>epsg
|
||||
thr_dw_cond = r_in(k,2)>epsr .AND. SQRT(ABS(sigma(3*k)))>epsg
|
||||
IF ( .NOT.thr_up_cond .OR. .NOT.thr_dw_cond ) CYCLE
|
||||
IF ( .NOT.thr_up_cond .OR. .NOT.thr_dw_cond ) CYCLE
|
||||
dvxc_rr(k,1,1) = dvxc_rr(k,1,1) + e2 * v2rho2_x(3*k-2)
|
||||
dvxc_ss(k,1,1) = dvxc_ss(k,1,1) + e2 * v2sigma2_x(6*k-5)*4._DP
|
||||
dvxc_rr(k,2,2) = dvxc_rr(k,2,2) + e2 * v2rho2_x(3*k)
|
||||
dvxc_rr(k,2,2) = dvxc_rr(k,2,2) + e2 * v2rho2_x(3*k)
|
||||
dvxc_ss(k,2,2) = dvxc_ss(k,2,2) + e2 * v2sigma2_x(6*k)*4._DP
|
||||
dvxc_rr(k,1,2) = dvxc_rr(k,1,2) + e2 * v2rho2_x(3*k-1)
|
||||
dvxc_sr(k,1,1) = dvxc_sr(k,1,1) + e2 * v2rhosigma_x(6*k-5)*2._DP
|
||||
|
@ -260,30 +260,30 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
!
|
||||
IF ( is_libxc(4) ) THEN
|
||||
!
|
||||
DO k = 1, length
|
||||
DO k = 1, length
|
||||
thr_up_cond = r_in(k,1)>epsr .AND. SQRT(ABS(sigma(3*k-2)))>epsg
|
||||
thr_dw_cond = r_in(k,2)>epsr .AND. SQRT(ABS(sigma(3*k)))>epsg
|
||||
IF ( .NOT.thr_up_cond .OR. .NOT.thr_dw_cond ) CYCLE
|
||||
dvxc_rr(k,1,1) = dvxc_rr(k,1,1) + e2 * v2rho2_c(3*k-2)
|
||||
dvxc_rr(k,1,2) = dvxc_rr(k,1,2) + e2 * v2rho2_c(3*k-1)
|
||||
dvxc_rr(k,2,1) = dvxc_rr(k,2,1) + e2 * v2rho2_c(3*k-1)
|
||||
dvxc_rr(k,2,2) = dvxc_rr(k,2,2) + e2 * v2rho2_c(3*k)
|
||||
dvxc_sr(k,1,1) = dvxc_sr(k,1,1) + e2 * v2rhosigma_c(6*k-5)*2.d0
|
||||
dvxc_ss(k,1,1) = dvxc_ss(k,1,1) + e2 * v2sigma2_c(6*k)*4.d0
|
||||
dvxc_sr(k,1,2) = dvxc_sr(k,1,2) + e2 * v2rhosigma_c(6*k-4)
|
||||
dvxc_sr(k,2,1) = dvxc_sr(k,2,1) + e2 * v2rhosigma_c(6*k-1)
|
||||
dvxc_ss(k,1,2) = dvxc_ss(k,1,2) + e2 * v2sigma2_c(6*k-2)
|
||||
dvxc_ss(k,2,1) = dvxc_ss(k,2,1) + e2 * v2sigma2_c(6*k-2)
|
||||
dvxc_sr(k,2,2) = dvxc_sr(k,2,1) + e2 * v2rhosigma_c(6*k)*2.d0
|
||||
dvxc_ss(k,2,2) = dvxc_ss(k,2,2) + e2 * v2sigma2_c(6*k)*4.d0
|
||||
ENDDO
|
||||
!
|
||||
DEALLOCATE( v2rho2_c, v2rhosigma_c, v2sigma2_c )
|
||||
!
|
||||
IF ( .NOT.thr_up_cond .OR. .NOT.thr_dw_cond ) CYCLE
|
||||
dvxc_rr(k,1,1) = dvxc_rr(k,1,1) + e2 * v2rho2_c(3*k-2)
|
||||
dvxc_rr(k,1,2) = dvxc_rr(k,1,2) + e2 * v2rho2_c(3*k-1)
|
||||
dvxc_rr(k,2,1) = dvxc_rr(k,2,1) + e2 * v2rho2_c(3*k-1)
|
||||
dvxc_rr(k,2,2) = dvxc_rr(k,2,2) + e2 * v2rho2_c(3*k)
|
||||
dvxc_sr(k,1,1) = dvxc_sr(k,1,1) + e2 * v2rhosigma_c(6*k-5)*2.d0
|
||||
dvxc_ss(k,1,1) = dvxc_ss(k,1,1) + e2 * v2sigma2_c(6*k)*4.d0
|
||||
dvxc_sr(k,1,2) = dvxc_sr(k,1,2) + e2 * v2rhosigma_c(6*k-4)
|
||||
dvxc_sr(k,2,1) = dvxc_sr(k,2,1) + e2 * v2rhosigma_c(6*k-1)
|
||||
dvxc_ss(k,1,2) = dvxc_ss(k,1,2) + e2 * v2sigma2_c(6*k-2)
|
||||
dvxc_ss(k,2,1) = dvxc_ss(k,2,1) + e2 * v2sigma2_c(6*k-2)
|
||||
dvxc_sr(k,2,2) = dvxc_sr(k,2,1) + e2 * v2rhosigma_c(6*k)*2.d0
|
||||
dvxc_ss(k,2,2) = dvxc_ss(k,2,2) + e2 * v2sigma2_c(6*k)*4.d0
|
||||
ENDDO
|
||||
!
|
||||
DEALLOCATE( v2rho2_c, v2rhosigma_c, v2sigma2_c )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
IF (ANY(is_libxc(3:4))) DEALLOCATE( rho_lxc )
|
||||
!
|
||||
!
|
||||
ENDIF
|
||||
IF ( ALLOCATED(sigma) ) DEALLOCATE( sigma )
|
||||
!
|
||||
|
@ -303,7 +303,7 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
!
|
||||
dvxc_rr(:,1,1) = e2*(vrrx(:,1) + vrrc(:,1))
|
||||
dvxc_sr(:,1,1) = e2*(vsrx(:,1) + vsrc(:,1))
|
||||
dvxc_ss(:,1,1) = e2*(vssx(:,1) + vssc(:) )
|
||||
dvxc_ss(:,1,1) = e2*(vssx(:,1) + vssc(:) )
|
||||
!
|
||||
CASE( 2 )
|
||||
!
|
||||
|
@ -323,14 +323,14 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
dvxc_rr(k,2,2) = e2*(vrrx(k,2) + vrrc(k,2) - vrzc(k,2)*(1.d0 + zeta)/rht)
|
||||
ENDIF
|
||||
!
|
||||
dvxc_sr(k,1,1) = e2 * (vsrx(k,1) + vsrc(k,1))
|
||||
dvxc_sr(k,1,2) = e2 * vsrc(k,1)
|
||||
dvxc_sr(k,2,1) = e2 * vsrc(k,2)
|
||||
dvxc_sr(k,2,2) = e2 * (vsrx(k,2) + vsrc(k,2))
|
||||
dvxc_sr(k,1,1) = e2 * (vsrx(k,1) + vsrc(k,1))
|
||||
dvxc_sr(k,1,2) = e2 * vsrc(k,1)
|
||||
dvxc_sr(k,2,1) = e2 * vsrc(k,2)
|
||||
dvxc_sr(k,2,2) = e2 * (vsrx(k,2) + vsrc(k,2))
|
||||
!
|
||||
dvxc_ss(k,1,1) = e2 * (vssx(k,1) + vssc(k))
|
||||
dvxc_ss(k,1,2) = e2 * vssc(k)
|
||||
dvxc_ss(k,2,1) = e2 * vssc(k)
|
||||
dvxc_ss(k,1,1) = e2 * (vssx(k,1) + vssc(k))
|
||||
dvxc_ss(k,1,2) = e2 * vssc(k)
|
||||
dvxc_ss(k,2,1) = e2 * vssc(k)
|
||||
dvxc_ss(k,2,2) = e2 * (vssx(k,2) + vssc(k))
|
||||
ENDDO
|
||||
!
|
||||
|
|
|
@ -6,13 +6,98 @@
|
|||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
!---------------------------------------------------------------------------
|
||||
SUBROUTINE xc_gcx( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud, &
|
||||
gpu_args_ )
|
||||
!-------------------------------------------------------------------------
|
||||
!! Wrapper to gpu or non gpu version of \(\texttt{xc_gcx}\).
|
||||
!
|
||||
USE kind_l, ONLY: DP
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
!! length of the I/O arrays
|
||||
INTEGER, INTENT(IN) :: ns
|
||||
!! spin dimension for input
|
||||
REAL(DP), INTENT(IN) :: rho(length,ns)
|
||||
!! Charge density
|
||||
REAL(DP), INTENT(IN) :: grho(3,length,ns)
|
||||
!! gradient
|
||||
REAL(DP), INTENT(OUT) :: ex(length)
|
||||
!! exchange energy
|
||||
REAL(DP), INTENT(OUT) :: ec(length)
|
||||
!! correlation energy
|
||||
REAL(DP), INTENT(OUT) :: v1x(length,ns)
|
||||
!! exchange potential (density part)
|
||||
REAL(DP), INTENT(OUT) :: v2x(length,ns)
|
||||
!! exchange potential (gradient part)
|
||||
REAL(DP), INTENT(OUT) :: v1c(length,ns)
|
||||
!! correlation potential (density part)
|
||||
REAL(DP), INTENT(OUT) :: v2c(length,ns)
|
||||
!! correlation potential (gradient part)
|
||||
REAL(DP), INTENT(OUT), OPTIONAL :: v2c_ud(length)
|
||||
!! correlation potential, cross term
|
||||
LOGICAL, INTENT(IN), OPTIONAL :: gpu_args_
|
||||
!! whether you wish to run on gpu in case use_gpu is true
|
||||
!
|
||||
LOGICAL :: gpu_args
|
||||
REAL(DP), ALLOCATABLE :: v2c_dummy(:)
|
||||
!
|
||||
gpu_args = .FALSE.
|
||||
!
|
||||
IF ( PRESENT(gpu_args_) ) gpu_args = gpu_args_
|
||||
!
|
||||
IF (ns==2 .AND. .NOT. PRESENT(v2c_ud)) CALL xclib_infomsg( 'xc_gcx', 'WARNING: cross &
|
||||
&term v2c_ud not found xc_gcx (gga) call with polarized case' )
|
||||
!
|
||||
IF ( gpu_args ) THEN
|
||||
!
|
||||
!$acc data deviceptr( rho(length,ns), grho(3,length,ns), ex(length), ec(length), &
|
||||
!$acc& v1x(length,ns), v2x(length,ns), v1c(length,ns), v2c(length,ns) )
|
||||
IF (PRESENT(v2c_ud)) THEN
|
||||
!$acc data deviceptr( v2c_ud(length) )
|
||||
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
!$acc end data
|
||||
ELSE
|
||||
ALLOCATE( v2c_dummy(length) )
|
||||
!$acc data deviceptr( v2c_dummy(length) )
|
||||
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
|
||||
!$acc end data
|
||||
DEALLOCATE( v2c_dummy )
|
||||
ENDIF
|
||||
!$acc end data
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
!$acc data copyin( rho, grho ), copyout( ex, ec, v1x, v2x, v1c, v2c )
|
||||
IF (PRESENT(v2c_ud)) THEN
|
||||
!$acc data copyout( v2c_ud )
|
||||
!$acc host_data use_device( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
ELSE
|
||||
ALLOCATE( v2c_dummy(length) )
|
||||
!$acc data create( v2c_dummy )
|
||||
!$acc host_data use_device( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
|
||||
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( v2c_dummy )
|
||||
ENDIF
|
||||
!$acc end data
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE
|
||||
!
|
||||
!
|
||||
!---------------------------------------------------------------------------
|
||||
SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
!-------------------------------------------------------------------------
|
||||
!! Wrapper routine. Calls xc_gga-driver internal routines or the external
|
||||
!! ones from libxc, depending on the input choice.
|
||||
!
|
||||
!! NOTE: differently from 'xc_lda_drivers', here the input rho is in (up,down)
|
||||
!! form (in the LSDA case).
|
||||
!! GGA wrapper routine - gpu double.
|
||||
!
|
||||
#if defined(__LIBXC)
|
||||
#include "xc_version.h"
|
||||
|
@ -28,27 +113,16 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
!! length of the I/O arrays
|
||||
INTEGER, INTENT(IN) :: ns
|
||||
!! spin dimension for input
|
||||
REAL(DP), INTENT(IN) :: rho(:,:)
|
||||
!! Charge density
|
||||
REAL(DP), INTENT(IN) :: grho(:,:,:)
|
||||
!! gradient
|
||||
REAL(DP), INTENT(OUT) :: ex(:)
|
||||
!! exchange energy
|
||||
REAL(DP), INTENT(OUT) :: ec(:)
|
||||
!! correlation energy
|
||||
REAL(DP), INTENT(OUT) :: v1x(:,:)
|
||||
!! exchange potential (density part)
|
||||
REAL(DP), INTENT(OUT) :: v2x(:,:)
|
||||
!! exchange potential (gradient part)
|
||||
REAL(DP), INTENT(OUT) :: v1c(:,:)
|
||||
!! correlation potential (density part)
|
||||
REAL(DP), INTENT(OUT) :: v2c(:,:)
|
||||
!! correlation potential (gradient part)
|
||||
REAL(DP), INTENT(OUT), OPTIONAL :: v2c_ud(:)
|
||||
!! correlation potential, cross term
|
||||
REAL(DP), INTENT(IN) :: rho(length,ns)
|
||||
REAL(DP), INTENT(IN) :: grho(3,length,ns)
|
||||
REAL(DP), INTENT(OUT) :: ex(length)
|
||||
REAL(DP), INTENT(OUT) :: ec(length)
|
||||
REAL(DP), INTENT(OUT) :: v1x(length,ns)
|
||||
REAL(DP), INTENT(OUT) :: v2x(length,ns)
|
||||
REAL(DP), INTENT(OUT) :: v1c(length,ns)
|
||||
REAL(DP), INTENT(OUT) :: v2c(length,ns)
|
||||
REAL(DP), INTENT(OUT) :: v2c_ud(length)
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
|
@ -59,32 +133,27 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
REAL(DP), ALLOCATABLE :: vc_rho(:), vc_sigma(:)
|
||||
!
|
||||
INTEGER :: fkind_x, np
|
||||
REAL(DP) :: rs, rtot, zet, vc_2(2), arho_k
|
||||
REAL(DP), PARAMETER :: pi34 = 0.6203504908994_DP
|
||||
!
|
||||
LOGICAL :: POLARIZED
|
||||
INTEGER :: pol_unpol
|
||||
INTEGER :: ildax, ildac, pol_unpol
|
||||
#if (XC_MAJOR_VERSION > 4)
|
||||
INTEGER(8) :: lengthxc
|
||||
#else
|
||||
INTEGER :: lengthxc
|
||||
#endif
|
||||
#endif
|
||||
REAL(DP), ALLOCATABLE :: arho(:,:)
|
||||
REAL(DP), ALLOCATABLE :: rh(:), zeta(:)
|
||||
REAL(DP), ALLOCATABLE :: grho2(:,:), grho_ud(:)
|
||||
!
|
||||
INTEGER :: k, is
|
||||
REAL(DP) :: sgn(2)
|
||||
REAL(DP) :: rho_up, rho_dw, grho_up, grho_dw
|
||||
REAL(DP) :: rho_up, rho_dw, grho_up, grho_dw, sgn1
|
||||
REAL(DP), PARAMETER :: small = 1.E-10_DP
|
||||
!
|
||||
!
|
||||
IF (ns==2 .AND. .NOT. PRESENT(v2c_ud)) CALL xclib_infomsg( 'xc_gcx', 'WARNING: cross &
|
||||
&term v2c_ud not found xc_gcx (gga) call with polarized case' )
|
||||
!
|
||||
ex = 0.0_DP ; v1x = 0.0_DP ; v2x = 0.0_DP
|
||||
ec = 0.0_DP ; v1c = 0.0_DP ; v2c = 0.0_DP
|
||||
IF ( PRESENT(v2c_ud) ) v2c_ud = 0.0_DP
|
||||
!$acc data deviceptr( rho(length,ns), grho(3,length,ns), ex(length), ec(length), &
|
||||
!$acc& v1x(length,ns), v2x(length,ns), v1c(length,ns), v2c(length,ns), &
|
||||
!$acc& v2c_ud(length) )
|
||||
!
|
||||
#if defined(__LIBXC)
|
||||
!
|
||||
|
@ -103,24 +172,31 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
np = 3
|
||||
ENDIF
|
||||
!
|
||||
ALLOCATE( rho_lxc(length*ns) )
|
||||
ALLOCATE( sigma(length*np) )
|
||||
!
|
||||
ALLOCATE( ex_lxc(length) , ec_lxc(length) )
|
||||
ALLOCATE( vx_rho(length*ns) , vx_sigma(length*np) )
|
||||
ALLOCATE( vc_rho(length*ns) , vc_sigma(length*np) )
|
||||
ALLOCATE( rho_lxc(length*ns), sigma(length*np) )
|
||||
!$acc data create( rho_lxc, sigma )
|
||||
!
|
||||
IF ( is_libxc(3) ) THEN
|
||||
ALLOCATE( ex_lxc(length) )
|
||||
ALLOCATE( vx_rho(length*ns), vx_sigma(length*np) )
|
||||
ENDIF
|
||||
IF ( is_libxc(4) ) THEN
|
||||
ALLOCATE( ec_lxc(length) )
|
||||
ALLOCATE( vc_rho(length*ns), vc_sigma(length*np) )
|
||||
ENDIF
|
||||
!
|
||||
IF ( ns == 1 ) THEN
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rho_lxc(k) = ABS( rho(k,1) )
|
||||
IF ( rho_lxc(k) > rho_threshold_gga ) &
|
||||
arho_k = ABS(rho(k,1))
|
||||
rho_lxc(k) = arho_k
|
||||
IF ( arho_k > rho_threshold_gga ) &
|
||||
sigma(k) = grho(1,k,1)**2 + grho(2,k,1)**2 + grho(3,k,1)**2
|
||||
ENDDO
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rho_lxc(2*k-1) = rho(k,1)
|
||||
rho_lxc(2*k) = rho(k,2)
|
||||
|
@ -135,25 +211,26 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
!
|
||||
IF ( ns==1 .AND. ANY(.NOT.is_libxc(3:4)) ) THEN
|
||||
!
|
||||
CALL gcxc( length, ABS(rho(:,1)), sigma, ex, ec, v1x(:,1), v2x(:,1), v1c(:,1), v2c(:,1) )
|
||||
!
|
||||
IF ( (igcx==43.AND..NOT.is_libxc(3)) .OR. (igcc==14.AND..NOT.is_libxc(4)) ) &
|
||||
CALL gcxc_beef( length, ABS(rho(:,1)), sigma, ex, ec, v1x(:,1), v2x(:,1), &
|
||||
v1c(:,1), v2c(:,1) )
|
||||
!$acc host_data use_device( rho_lxc, sigma )
|
||||
CALL gcxc( length, rho_lxc, sigma, ex, ec, v1x(:,1), v2x(:,1), v1c(:,1), v2c(:,1) )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
sgn(1) = SIGN(1._DP, rho(k,1))
|
||||
ex(k) = ex(k) * sgn(1)
|
||||
ec(k) = ec(k) * sgn(1)
|
||||
sgn1 = SIGN(1._DP, rho(k,1))
|
||||
ex(k) = ex(k) * sgn1
|
||||
ec(k) = ec(k) * sgn1
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc update self( rho_lxc, sigma )
|
||||
!
|
||||
! ---- GGA CORRELATION
|
||||
!
|
||||
IF (is_libxc(4)) fkind_x = xc_f03_func_info_get_kind( xc_info(4) )
|
||||
!
|
||||
IF ( is_libxc(4) ) THEN !lda part of LYP not present in libxc (still so? - check)
|
||||
!
|
||||
fkind_x = xc_f03_func_info_get_kind( xc_info(4) )
|
||||
!
|
||||
CALL xc_f03_func_set_dens_threshold( xc_func(4), small )!rho_threshold_gga )
|
||||
IF (libxc_flags(4,0)==1) THEN
|
||||
|
@ -163,89 +240,112 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
ec_lxc = 0.d0
|
||||
ENDIF
|
||||
!
|
||||
!$acc data copyin( ec_lxc, vc_rho, vc_sigma )
|
||||
IF (.NOT. POLARIZED) THEN
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
IF ( rho_lxc(k) <= rho_threshold_lda ) CYCLE
|
||||
IF ( rho_lxc(k) <= rho_threshold_lda ) THEN
|
||||
ec(k)=0.d0 ; v1c(k,1)=0.d0 ; v2c(k,1)=0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
ec(k) = ec_lxc(k) * rho_lxc(k) * SIGN(1.0_DP, rho(k,1))
|
||||
v1c(k,1) = vc_rho(k)
|
||||
IF ( rho_lxc(k) <= rho_threshold_gga .OR. &
|
||||
SQRT(ABS(sigma(k))) <= grho_threshold_gga) CYCLE
|
||||
SQRT(ABS(sigma(k))) <= grho_threshold_gga) THEN
|
||||
v2c(k,1) = 0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
v2c(k,1) = vc_sigma(k)*2.d0
|
||||
ENDDO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rho_up = rho_lxc(2*k-1)
|
||||
rho_dw = rho_lxc(2*k)
|
||||
grho_up = SQRT(ABS(sigma(3*k-2)))
|
||||
grho_dw = SQRT(ABS(sigma(3*k)))
|
||||
IF ( rho_up <= rho_threshold_lda .OR. rho_dw <= rho_threshold_lda ) CYCLE
|
||||
IF ( rho_up <= rho_threshold_lda .OR. rho_dw <= rho_threshold_lda ) THEN
|
||||
ec(k) = 0.d0 ; v1c(k,1) = 0.d0 ; v1c(k,2) = 0.d0
|
||||
v2c(k,1) = 0.d0 ; v2c_ud(k)= 0.d0 ; v2c(k,2) = 0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
ec(k) = ec_lxc(k) * (rho_up+rho_dw)
|
||||
v1c(k,1) = vc_rho(2*k-1)
|
||||
v1c(k,2) = vc_rho(2*k)
|
||||
IF ( rho_up <= rho_threshold_gga .OR. rho_dw <= rho_threshold_gga .OR. &
|
||||
grho_up<=grho_threshold_gga .OR. grho_dw<=grho_threshold_gga ) CYCLE
|
||||
grho_up<=grho_threshold_gga .OR. grho_dw<=grho_threshold_gga ) THEN
|
||||
v2c(k,1) = 0.d0 ; v2c_ud(k)= 0.d0 ; v2c(k,2) = 0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
v2c(k,1) = vc_sigma(3*k-2)*2.d0
|
||||
v2c_ud(k)= vc_sigma(3*k-1)
|
||||
v2c(k,2) = vc_sigma(3*k)*2.d0
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
!
|
||||
!$acc end data
|
||||
DEALLOCATE( ec_lxc, vc_rho, vc_sigma )
|
||||
!
|
||||
ELSEIF ( (.NOT.is_libxc(4)) .AND. fkind_x/=XC_EXCHANGE_CORRELATION ) THEN
|
||||
!
|
||||
ALLOCATE( arho(length,ns), grho2(length,ns) )
|
||||
ALLOCATE( grho2(length,ns) )
|
||||
!
|
||||
IF ( ns /= 1 ) THEN
|
||||
!
|
||||
DO is = 1, 2
|
||||
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
|
||||
ENDDO
|
||||
!
|
||||
IF (igcc==3 .OR. igcc==7 .OR. igcc==13 ) THEN
|
||||
!
|
||||
ALLOCATE( grho_ud(length) )
|
||||
!
|
||||
grho_ud = grho(1,:,1) * grho(1,:,2) + grho(2,:,1) * grho(2,:,2) + &
|
||||
grho(3,:,1) * grho(3,:,2)
|
||||
!
|
||||
arho = rho
|
||||
!
|
||||
WHERE ( rho(:,1)+rho(:,2) < rho_threshold_gga )
|
||||
arho(:,1) = 0.0_DP
|
||||
arho(:,2) = 0.0_DP
|
||||
ENDWHERE
|
||||
!
|
||||
CALL gcc_spin_more( length, arho, grho2, grho_ud, ec, v1c, v2c, v2c_ud )
|
||||
!
|
||||
!$acc data create( grho2, grho_ud )
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
grho2(k,1) = sigma(3*k-2)
|
||||
grho2(k,2) = sigma(3*k)
|
||||
grho_ud(k) = sigma(3*k-1)
|
||||
ENDDO
|
||||
!$acc host_data use_device( grho2, grho_ud )
|
||||
CALL gcc_spin_more( length, rho, grho2, grho_ud, ec, v1c, v2c, v2c_ud )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( grho_ud )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
ALLOCATE( rh(length), zeta(length) )
|
||||
!$acc data create( rh, zeta, grho2 )
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rh(k) = rho(k,1) + rho(k,2)
|
||||
IF ( rh(k) > rho_threshold_gga ) THEN
|
||||
zeta(k) = ( rho(k,1) - rho(k,2) ) / rh(k)
|
||||
ELSE
|
||||
zeta(k) = 2.0_DP ! trash value, gcc-routines get rid of it when present
|
||||
ENDIF
|
||||
grho2(k,1) = ( grho(1,k,1) + grho(1,k,2) )**2 + &
|
||||
( grho(2,k,1) + grho(2,k,2) )**2 + &
|
||||
( grho(3,k,1) + grho(3,k,2) )**2
|
||||
grho2(k,2) = grho(1,k,2)**2 + grho(2,k,2)**2 + grho(3,k,2)**2
|
||||
ENDDO
|
||||
!
|
||||
rh = rho(:,1) + rho(:,2)
|
||||
!$acc host_data use_device( rh, zeta, grho2 )
|
||||
CALL gcc_spin( length, rh, zeta, grho2(:,1), ec, v1c, v2c(:,1) )
|
||||
!$acc end host_data
|
||||
!
|
||||
zeta = 2.0_DP ! trash value, gcc-routines get rid of it when present
|
||||
WHERE ( rh > rho_threshold_gga ) zeta = ( rho(:,1) - rho(:,2) ) / rh(:)
|
||||
!
|
||||
grho2(:,1) = ( grho(1,:,1) + grho(1,:,2) )**2 + &
|
||||
( grho(2,:,1) + grho(2,:,2) )**2 + &
|
||||
( grho(3,:,1) + grho(3,:,2) )**2
|
||||
!
|
||||
IF ( igcc/=14 ) CALL gcc_spin( length, rh, zeta, grho2(:,1), ec, v1c, v2c(:,1) )
|
||||
IF ( igcc==14 ) CALL gcc_spin_beef( length, rh, zeta, grho2(:,1), ec, v1c, v2c(:,1) )
|
||||
!
|
||||
v2c(:,2) = v2c(:,1)
|
||||
IF ( PRESENT(v2c_ud) ) v2c_ud(:) = v2c(:,1)
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
v2c(k,2) = v2c(k,1)
|
||||
IF ( ns==2 ) v2c_ud(k) = v2c(k,1)
|
||||
ENDDO
|
||||
!
|
||||
!$acc end data
|
||||
DEALLOCATE( rh, zeta )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
DEALLOCATE( arho, grho2 )
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
ENDIF
|
||||
ENDIF
|
||||
!
|
||||
! --- GGA EXCHANGE
|
||||
!
|
||||
|
@ -259,134 +359,177 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
ex_lxc = 0.d0
|
||||
ENDIF
|
||||
!
|
||||
!$acc data copyin( ex_lxc, vx_rho, vx_sigma )
|
||||
IF (.NOT. POLARIZED) THEN
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
IF ( rho_lxc(k) <= rho_threshold_lda ) CYCLE
|
||||
IF ( rho_lxc(k) <= rho_threshold_lda ) THEN
|
||||
ex(k) = 0.d0 ; v1x(k,1) = 0.d0 ; v2x(k,1) = 0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
ex(k) = ex_lxc(k) * rho_lxc(k) * SIGN(1.0_DP, rho(k,1))
|
||||
v1x(k,1) = vx_rho(k)
|
||||
IF ( rho_lxc(k) <= rho_threshold_gga .OR. &
|
||||
SQRT(ABS(sigma(k))) <= grho_threshold_gga) CYCLE
|
||||
SQRT(ABS(sigma(k))) <= grho_threshold_gga) THEN
|
||||
v2x(k,1) = 0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
v2x(k,1) = vx_sigma(k)*2.d0
|
||||
ENDDO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rho_up = rho_lxc(2*k-1)
|
||||
rho_dw = rho_lxc(2*k)
|
||||
grho_up = SQRT(ABS(sigma(3*k-2)))
|
||||
grho_dw = SQRT(ABS(sigma(3*k)))
|
||||
IF ( rho_up <= rho_threshold_lda .OR. rho_dw <= rho_threshold_lda ) CYCLE
|
||||
IF ( rho_up <= rho_threshold_lda .OR. rho_dw <= rho_threshold_lda ) THEN
|
||||
ex(k) = 0.d0
|
||||
v1x(k,1) = 0.d0 ; v1x(k,2) = 0.d0
|
||||
v2x(k,1) = 0.d0 ; v2x(k,2) = 0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
ex(k) = ex_lxc(k) * (rho_up+rho_dw)
|
||||
v1x(k,1) = vx_rho(2*k-1)
|
||||
v1x(k,2) = vx_rho(2*k)
|
||||
IF ( rho_up <= rho_threshold_gga .OR. rho_dw <= rho_threshold_gga .OR. &
|
||||
grho_up<=grho_threshold_gga .OR. grho_dw<=grho_threshold_gga ) CYCLE
|
||||
grho_up<=grho_threshold_gga .OR. grho_dw<=grho_threshold_gga ) THEN
|
||||
v2x(k,1) = 0.d0 ; v2x(k,2) = 0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
v2x(k,1) = vx_sigma(3*k-2)*2.d0
|
||||
v2x(k,2) = vx_sigma(3*k)*2.d0
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
!$acc end data
|
||||
DEALLOCATE( ex_lxc, vx_rho, vx_sigma )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
ALLOCATE( grho2(length,ns) )
|
||||
!
|
||||
IF ( ns /= 1 ) THEN
|
||||
!
|
||||
DO is = 1, 2
|
||||
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
|
||||
ENDDO
|
||||
!
|
||||
IF ( igcx/=43 ) CALL gcx_spin( length, rho, grho2, ex, v1x, v2x )
|
||||
IF ( igcx==43 ) CALL gcx_spin_beef( length, rho, grho2, ex, v1x, v2x )
|
||||
!
|
||||
!
|
||||
ALLOCATE( grho2(length,ns) )
|
||||
!$acc data create( grho2 )
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, ns
|
||||
DO k = 1, length
|
||||
grho2(k,is) = grho(1,k,is)**2 + grho(2,k,is)**2 + grho(3,k,is)**2
|
||||
ENDDO
|
||||
ENDDO
|
||||
!$acc host_data use_device( grho2 )
|
||||
CALL gcx_spin( length, rho, grho2, ex, v1x, v2x )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc end data
|
||||
DEALLOCATE( rho_lxc, sigma )
|
||||
DEALLOCATE( ex_lxc , ec_lxc )
|
||||
DEALLOCATE( vx_rho , vx_sigma )
|
||||
DEALLOCATE( vc_rho , vc_sigma )
|
||||
!
|
||||
#else
|
||||
!
|
||||
ALLOCATE( arho(length,ns), grho2(length,ns) )
|
||||
arho = 0.0_DP
|
||||
grho2 = 0.0_DP
|
||||
ALLOCATE( grho2(length,ns) )
|
||||
!$acc data create( grho2 )
|
||||
!
|
||||
IF ( ns == 1 ) THEN
|
||||
!
|
||||
! ... This is the spin-unpolarised case
|
||||
ALLOCATE( rh(length) )
|
||||
!$acc data create( rh )
|
||||
!$acc host_data use_device( rh, grho2 )
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
IF ( ABS(rho(k,1)) > rho_threshold_gga ) &
|
||||
grho2(k,1) = grho(1,k,1)**2 + grho(2,k,1)**2 + grho(3,k,1)**2
|
||||
rh(k) = ABS(rho(k,1))
|
||||
IF ( rh(k) > rho_threshold_gga ) &
|
||||
grho2(k,1) = grho(1,k,1)**2 + grho(2,k,1)**2 + grho(3,k,1)**2
|
||||
ENDDO
|
||||
!
|
||||
CALL gcxc( length, rh, grho2(:,1), ex, ec, v1x(:,1), v2x(:,1), v1c(:,1), v2c(:,1) )
|
||||
!
|
||||
CALL gcxc( length, ABS(rho(:,1)), grho2(:,1), ex, ec, v1x(:,1), v2x(:,1), v1c(:,1), v2c(:,1) )
|
||||
!
|
||||
IF ( igcx==43 .OR. igcc==14 ) CALL gcxc_beef( length, ABS(rho(:,1)), grho2(:,1), ex, ec, &
|
||||
v1x(:,1), v2x(:,1), v1c(:,1), v2c(:,1) )
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
sgn(1) = SIGN(1._DP, rho(k,1))
|
||||
ex(k) = ex(k) * sgn(1)
|
||||
ec(k) = ec(k) * sgn(1)
|
||||
sgn1 = SIGN(1._DP, rho(k,1))
|
||||
ex(k) = ex(k) * sgn1
|
||||
ec(k) = ec(k) * sgn1
|
||||
ENDDO
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( rh )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
DO is = 1, 2
|
||||
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
|
||||
!$acc host_data use_device( grho2 )
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, ns
|
||||
DO k = 1, length
|
||||
grho2(k,is) = grho(1,k,is)**2 + grho(2,k,is)**2 + grho(3,k,is)**2
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
IF ( igcx/=43 ) CALL gcx_spin( length, rho, grho2, ex, v1x, v2x )
|
||||
IF ( igcx==43 ) CALL gcx_spin_beef( length, rho, grho2, ex, v1x, v2x )
|
||||
CALL gcx_spin( length, rho, grho2, ex, v1x, v2x )
|
||||
!$acc end host_data
|
||||
!
|
||||
IF (igcc==3 .OR. igcc==7 .OR. igcc==13 ) THEN
|
||||
!
|
||||
ALLOCATE( grho_ud(length) )
|
||||
!$acc data create( grho_ud )
|
||||
!$acc host_data use_device( grho_ud, grho2 )
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
grho_ud(k) = grho(1,k,1) * grho(1,k,2) + grho(2,k,1) * grho(2,k,2) + &
|
||||
grho(3,k,1) * grho(3,k,2)
|
||||
ENDDO
|
||||
!
|
||||
grho_ud = grho(1,:,1) * grho(1,:,2) + grho(2,:,1) * grho(2,:,2) + &
|
||||
grho(3,:,1) * grho(3,:,2)
|
||||
!
|
||||
arho = rho
|
||||
!
|
||||
CALL gcc_spin_more( length, arho, grho2, grho_ud, ec, v1c, v2c, v2c_ud )
|
||||
CALL gcc_spin_more( length, rho, grho2, grho_ud, ec, v1c, v2c, v2c_ud )
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( grho_ud )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
ALLOCATE( rh(length), zeta(length) )
|
||||
!$acc data create( rh, zeta )
|
||||
!$acc host_data use_device( rh, zeta, grho2 )
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rh(k) = rho(k,1) + rho(k,2)
|
||||
IF ( rh(k) > rho_threshold_gga ) THEN
|
||||
zeta(k) = ( rho(k,1) - rho(k,2) ) / rh(k)
|
||||
ELSE
|
||||
zeta(k) = 2.0_DP ! trash value, gcc-routines get rid of it when present
|
||||
ENDIF
|
||||
grho2(k,1) = ( grho(1,k,1) + grho(1,k,2) )**2 + &
|
||||
( grho(2,k,1) + grho(2,k,2) )**2 + &
|
||||
( grho(3,k,1) + grho(3,k,2) )**2
|
||||
ENDDO
|
||||
!
|
||||
rh = rho(:,1) + rho(:,2)
|
||||
CALL gcc_spin( length, rh, zeta, grho2(:,1), ec, v1c, v2c(:,1) )
|
||||
!
|
||||
zeta = 2.0_DP ! trash value, gcc-routines get rid of it when present
|
||||
WHERE ( rh > rho_threshold_gga ) zeta = ( rho(:,1) - rho(:,2) ) / rh(:)
|
||||
!
|
||||
grho2(:,1) = ( grho(1,:,1) + grho(1,:,2) )**2 + &
|
||||
( grho(2,:,1) + grho(2,:,2) )**2 + &
|
||||
( grho(3,:,1) + grho(3,:,2) )**2
|
||||
!
|
||||
|
||||
IF ( igcc/=14 ) CALL gcc_spin( length, rh, zeta, grho2(:,1), ec, v1c, v2c(:,1) )
|
||||
IF ( igcc==14 ) CALL gcc_spin_beef( length, rh, zeta, grho2(:,1), ec, v1c, v2c(:,1) )
|
||||
!
|
||||
v2c(:,2) = v2c(:,1)
|
||||
IF ( PRESENT(v2c_ud) ) v2c_ud(:) = v2c(:,1)
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
v2c(k,2) = v2c(k,1)
|
||||
IF ( ns==2 ) v2c_ud(k) = v2c(k,1)
|
||||
ENDDO
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( rh, zeta )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
DEALLOCATE( arho, grho2 )
|
||||
!$acc end data
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
#endif
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
|
|
|
@ -5,16 +5,71 @@
|
|||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
!------------------------------------------------------------------------------------------
|
||||
SUBROUTINE xc( length, srd, svd, rho_in, ex_out, ec_out, vx_out, vc_out, gpu_args_ )
|
||||
!--------------------------------------------------------------------------------------
|
||||
!! Wrapper routine to \(\texttt{xc_}\) or \(\texttt{xc_gpu}\).
|
||||
!
|
||||
USE kind_l, ONLY: DP
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
!! length of the I/O arrays
|
||||
INTEGER, INTENT(IN) :: srd
|
||||
!! spin dimension of rho
|
||||
INTEGER, INTENT(IN) :: svd
|
||||
!! spin dimension of v
|
||||
REAL(DP), INTENT(IN) :: rho_in(length,srd)
|
||||
!! Charge density
|
||||
REAL(DP), INTENT(OUT) :: ex_out(length)
|
||||
!! \(\epsilon_x(rho)\) ( NOT \(E_x(\text{rho})\) )
|
||||
REAL(DP), INTENT(OUT) :: vx_out(length,svd)
|
||||
!! \(dE_x(\text{rho})/d\text{rho} ( NOT d\epsilon_x(\text{rho})/d\text{rho} )
|
||||
REAL(DP), INTENT(OUT) :: ec_out(length)
|
||||
!! \(\epsilon_c(rho)\) ( NOT \(E_c(\text{rho})\) )
|
||||
REAL(DP), INTENT(OUT) :: vc_out(length,svd)
|
||||
!! \(dE_c(\text{rho})/d\text{rho} ( NOT d\epsilon_c(\text{rho})/d\text{rho} )
|
||||
LOGICAL, OPTIONAL, INTENT(IN) :: gpu_args_
|
||||
!! whether you wish to run on gpu in case use_gpu is true
|
||||
!
|
||||
LOGICAL :: gpu_args
|
||||
!
|
||||
gpu_args = .FALSE.
|
||||
IF ( PRESENT(gpu_args_) ) gpu_args = gpu_args_
|
||||
!
|
||||
IF ( gpu_args ) THEN
|
||||
!
|
||||
!$acc data deviceptr( rho_in(length,srd), ex_out(length), ec_out(length), &
|
||||
!$acc& vx_out(length,svd), vc_out(length,svd) )
|
||||
CALL xc_( length, srd, svd, rho_in, ex_out, ec_out, vx_out, vc_out )
|
||||
!$acc end data
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
!$acc data copyin( rho_in ), copyout( ex_out, ec_out, vx_out, vc_out )
|
||||
!$acc host_data use_device( rho_in, ex_out, ec_out, vx_out, vc_out )
|
||||
CALL xc_( length, srd, svd, rho_in, ex_out, ec_out, vx_out, vc_out )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE
|
||||
!
|
||||
!
|
||||
!---------------------------------------------------------------------------
|
||||
SUBROUTINE xc( length, sr_d, sv_d, rho_in, ex_out, ec_out, vx_out, vc_out )
|
||||
SUBROUTINE xc_( length, srd, svd, rho_in, ex_out, ec_out, vx_out, vc_out )
|
||||
!-------------------------------------------------------------------------
|
||||
!! Wrapper routine. Calls internal XC-driver routines or external ones
|
||||
!! from Libxc, depending on the input choice.
|
||||
!! Wrapper xc LDA - openACC version.
|
||||
!! See comments in routine \(\texttt{xc}\) for variable explanations.
|
||||
!
|
||||
#if defined(__LIBXC)
|
||||
#include "xc_version.h"
|
||||
USE xc_f03_lib_m
|
||||
USE dft_setting_params, ONLY: xc_func, xc_info, libxc_flags
|
||||
USE dft_setting_params, ONLY: xc_func, xc_info, libxc_flags
|
||||
#endif
|
||||
!
|
||||
USE kind_l, ONLY: DP
|
||||
|
@ -25,28 +80,19 @@ SUBROUTINE xc( length, sr_d, sv_d, rho_in, ex_out, ec_out, vx_out, vc_out )
|
|||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
!! length of the I/O arrays
|
||||
INTEGER, INTENT(IN) :: sr_d
|
||||
!! spin dimension of rho
|
||||
INTEGER, INTENT(IN) :: sv_d
|
||||
!! spin dimension of v
|
||||
REAL(DP), INTENT(IN) :: rho_in(length,sr_d)
|
||||
!! Charge density
|
||||
REAL(DP), INTENT(OUT) :: ex_out(length)
|
||||
!! \(\epsilon_x(rho)\) ( NOT \(E_x(\text{rho})\) )
|
||||
REAL(DP), INTENT(OUT) :: vx_out(length,sv_d)
|
||||
!! \(dE_x(\text{rho})/d\text{rho} ( NOT d\epsilon_x(\text{rho})/d\text{rho} )
|
||||
REAL(DP), INTENT(OUT) :: ec_out(length)
|
||||
!! \(\epsilon_c(rho)\) ( NOT \(E_c(\text{rho})\) )
|
||||
REAL(DP), INTENT(OUT) :: vc_out(length,sv_d)
|
||||
!! \(dE_c(\text{rho})/d\text{rho} ( NOT d\epsilon_c(\text{rho})/d\text{rho} )
|
||||
INTEGER, INTENT(IN) :: srd, svd
|
||||
REAL(DP), INTENT(IN) :: rho_in(length,srd)
|
||||
REAL(DP), INTENT(OUT) :: ex_out(length), vx_out(length,svd)
|
||||
REAL(DP), INTENT(OUT) :: ec_out(length), vc_out(length,svd)
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
#if defined(__LIBXC)
|
||||
INTEGER :: fkind_x, eflag
|
||||
LOGICAL :: is_libxc1, is_libxc2
|
||||
INTEGER :: fkind_x
|
||||
REAL(DP) :: amag
|
||||
REAL(DP), ALLOCATABLE :: rho_lxc(:)
|
||||
REAL(DP), ALLOCATABLE :: ex_lxc(:), ec_lxc(:)
|
||||
REAL(DP), ALLOCATABLE :: vx_lxc(:), vc_lxc(:)
|
||||
#if (XC_MAJOR_VERSION > 4)
|
||||
INTEGER(8) :: lengthxc
|
||||
|
@ -54,104 +100,129 @@ SUBROUTINE xc( length, sr_d, sv_d, rho_in, ex_out, ec_out, vx_out, vc_out )
|
|||
INTEGER :: lengthxc
|
||||
#endif
|
||||
#endif
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: arho(:), zeta(:)
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: zeta(:)
|
||||
REAL(DP) :: arho_ir
|
||||
INTEGER :: ir
|
||||
!
|
||||
ex_out = 0.0_DP ; vx_out = 0.0_DP
|
||||
ec_out = 0.0_DP ; vc_out = 0.0_DP
|
||||
!$acc data deviceptr( rho_in(length,srd), ex_out(length), ec_out(length), &
|
||||
!$acc& vx_out(length,svd), vc_out(length,svd) )
|
||||
!
|
||||
#if defined(__LIBXC)
|
||||
!
|
||||
is_libxc1 = is_libxc(1)
|
||||
is_libxc2 = is_libxc(2)
|
||||
fkind_x = -1
|
||||
lengthxc = length
|
||||
!
|
||||
IF ( ANY(is_libxc(1:2)) ) THEN
|
||||
IF ( is_libxc(1) ) ALLOCATE( ex_lxc(length), vx_lxc(length*svd) )
|
||||
IF ( is_libxc(2) ) ALLOCATE( ec_lxc(length), vc_lxc(length*svd) )
|
||||
!
|
||||
IF ( is_libxc(1) .OR. is_libxc(2) ) THEN
|
||||
!
|
||||
ALLOCATE( rho_lxc(length*sv_d) )
|
||||
ALLOCATE( vx_lxc(length*sv_d), vc_lxc(length*sv_d) )
|
||||
ALLOCATE( rho_lxc(length*svd) )
|
||||
!$acc data copyout( rho_lxc )
|
||||
!
|
||||
! ... set libxc input
|
||||
SELECT CASE( sr_d )
|
||||
SELECT CASE( srd )
|
||||
CASE( 1 )
|
||||
!
|
||||
rho_lxc(:) = ABS(rho_in(:,1))
|
||||
!
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
rho_lxc(ir) = ABS(rho_in(ir,1))
|
||||
ENDDO
|
||||
!
|
||||
CASE( 2 )
|
||||
!
|
||||
DO ir = 1, length
|
||||
rho_lxc(2*ir-1) = (rho_in(ir,1) + rho_in(ir,2)) * 0.5_DP
|
||||
rho_lxc(2*ir) = (rho_in(ir,1) - rho_in(ir,2)) * 0.5_DP
|
||||
ENDDO
|
||||
!
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
rho_lxc(2*ir-1) = (rho_in(ir,1) + rho_in(ir,2)) * 0.5_DP
|
||||
rho_lxc(2*ir) = (rho_in(ir,1) - rho_in(ir,2)) * 0.5_DP
|
||||
ENDDO
|
||||
!
|
||||
CASE( 4 )
|
||||
!
|
||||
DO ir = 1, length
|
||||
amag = SQRT( SUM(rho_in(ir,2:4)**2) )
|
||||
rho_lxc(2*ir-1) = (rho_in(ir,1) + amag) * 0.5_DP
|
||||
rho_lxc(2*ir) = (rho_in(ir,1) - amag) * 0.5_DP
|
||||
ENDDO
|
||||
!
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
amag = SQRT( SUM(rho_in(ir,2:4)**2) )
|
||||
rho_lxc(2*ir-1) = (rho_in(ir,1) + amag) * 0.5_DP
|
||||
rho_lxc(2*ir) = (rho_in(ir,1) - amag) * 0.5_DP
|
||||
ENDDO
|
||||
!
|
||||
CASE DEFAULT
|
||||
!
|
||||
CALL xclib_error( 'xc_LDA', 'Wrong number of spin dimensions', 1 )
|
||||
!
|
||||
!
|
||||
CALL xclib_error( 'xc_LDA', 'Wrong number of spin dimensions', 1 )
|
||||
!
|
||||
END SELECT
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!
|
||||
! ... EXCHANGE
|
||||
IF ( is_libxc(1) ) THEN
|
||||
CALL xc_f03_func_set_dens_threshold( xc_func(1), rho_threshold_lda )
|
||||
IF (libxc_flags(1,0)==1) THEN
|
||||
CALL xc_f03_lda_exc_vxc( xc_func(1), lengthxc, rho_lxc(1), ex_out(1), vx_lxc(1) )
|
||||
CALL xc_f03_lda_exc_vxc( xc_func(1), lengthxc, rho_lxc(1), ex_lxc(1), vx_lxc(1) )
|
||||
ELSE
|
||||
CALL xc_f03_lda_vxc( xc_func(1), lengthxc, rho_lxc(1), vx_lxc(1) )
|
||||
ex_lxc = 0.d0
|
||||
ENDIF
|
||||
ENDIF
|
||||
!
|
||||
! ... CORRELATION
|
||||
IF ( is_libxc(2) ) THEN
|
||||
CALL xc_f03_func_set_dens_threshold( xc_func(2), rho_threshold_lda )
|
||||
fkind_x = xc_f03_func_info_get_kind( xc_info(2) )
|
||||
IF (libxc_flags(2,0)==1) THEN
|
||||
CALL xc_f03_lda_exc_vxc( xc_func(2), lengthxc, rho_lxc(1), ec_out(1), vc_lxc(1) )
|
||||
ELSE
|
||||
CALL xc_f03_lda_vxc( xc_func(2), lengthxc, rho_lxc(1), vc_lxc(1) )
|
||||
ENDIF
|
||||
CALL xc_f03_func_set_dens_threshold( xc_func(2), rho_threshold_lda )
|
||||
fkind_x = xc_f03_func_info_get_kind( xc_info(2) )
|
||||
IF (libxc_flags(2,0)==1) THEN
|
||||
CALL xc_f03_lda_exc_vxc( xc_func(2), lengthxc, rho_lxc(1), ec_lxc(1), vc_lxc(1) )
|
||||
ELSE
|
||||
CALL xc_f03_lda_vxc( xc_func(2), lengthxc, rho_lxc(1), vc_lxc(1) )
|
||||
ec_lxc = 0.d0
|
||||
ENDIF
|
||||
ENDIF
|
||||
!
|
||||
IF ( ((.NOT.is_libxc(1)) .OR. (.NOT.is_libxc(2))) &
|
||||
.AND. fkind_x/=XC_EXCHANGE_CORRELATION ) THEN
|
||||
IF ( is_libxc(1) .OR. is_libxc(2) ) DEALLOCATE( rho_lxc )
|
||||
!
|
||||
IF ( ((.NOT.is_libxc(1)) .OR. (.NOT.is_libxc(2))) ) THEN
|
||||
!
|
||||
SELECT CASE( sr_d )
|
||||
SELECT CASE( srd )
|
||||
CASE( 1 )
|
||||
!
|
||||
IF ((iexch==8 .AND. .NOT.is_libxc(1)) .OR. (icorr==10 .AND. &
|
||||
.NOT. is_libxc(2))) THEN
|
||||
IF ((iexch==8 .AND. .NOT.is_libxc1) .OR. (icorr==10 .AND. .NOT.is_libxc2)) THEN
|
||||
IF (.NOT. finite_size_cell_volume_set) CALL xclib_error( 'XC',&
|
||||
'finite size corrected exchange used w/o initialization', 1 )
|
||||
ENDIF
|
||||
CALL xc_lda( length, ABS(rho_in(:,1)), ex_out, ec_out, vx_out(:,1), vc_out(:,1) )
|
||||
CALL xc_lda( length, rho_in(:,1), ex_out, ec_out, vx_out(:,1), vc_out(:,1) )
|
||||
!
|
||||
CASE( 2 )
|
||||
!
|
||||
ALLOCATE( arho(length), zeta(length) )
|
||||
arho = ABS(rho_in(:,1))
|
||||
WHERE (arho > rho_threshold_lda) zeta(:) = rho_in(:,2) / arho(:)
|
||||
CALL xc_lsda( length, arho, zeta, ex_out, ec_out, vx_out, vc_out )
|
||||
DEALLOCATE( arho, zeta )
|
||||
ALLOCATE( zeta(length) )
|
||||
!$acc data create( zeta )
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
arho_ir = ABS(rho_in(ir,1))
|
||||
IF (arho_ir > rho_threshold_lda) zeta(ir) = rho_in(ir,2) / arho_ir
|
||||
ENDDO
|
||||
!$acc host_data use_device( zeta )
|
||||
CALL xc_lsda( length, rho_in(:,1), zeta, ex_out, ec_out, vx_out, vc_out )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( zeta )
|
||||
!
|
||||
CASE( 4 )
|
||||
!
|
||||
ALLOCATE( arho(length), zeta(length) )
|
||||
arho = ABS( rho_in(:,1) )
|
||||
WHERE (arho > rho_threshold_lda) zeta(:) = SQRT( rho_in(:,2)**2 + rho_in(:,3)**2 + &
|
||||
rho_in(:,4)**2 ) / arho(:) ! amag/arho
|
||||
CALL xc_lsda( length, arho, zeta, ex_out, ec_out, vx_out, vc_out )
|
||||
DEALLOCATE( arho, zeta )
|
||||
ALLOCATE( zeta(length) )
|
||||
!$acc data create( zeta )
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
arho_ir = ABS( rho_in(ir,1) )
|
||||
IF (arho_ir > rho_threshold_lda) zeta(ir) = SQRT( rho_in(ir,2)**2 + rho_in(ir,3)**2 + &
|
||||
rho_in(ir,4)**2 ) / arho_ir ! amag/arho
|
||||
ENDDO
|
||||
!$acc host_data use_device( zeta )
|
||||
CALL xc_lsda( length, rho_in(:,1), zeta, ex_out, ec_out, vx_out, vc_out )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( zeta )
|
||||
!
|
||||
CASE DEFAULT
|
||||
!
|
||||
|
@ -162,67 +233,83 @@ SUBROUTINE xc( length, sr_d, sv_d, rho_in, ex_out, ec_out, vx_out, vc_out )
|
|||
ENDIF
|
||||
!
|
||||
! ... fill output arrays
|
||||
!
|
||||
IF (sv_d == 1) THEN
|
||||
IF (is_libxc(1)) vx_out(:,1) = vx_lxc(:)
|
||||
IF (is_libxc(2)) vc_out(:,1) = vc_lxc(:)
|
||||
ELSE
|
||||
IF (is_libxc(1)) THEN
|
||||
DO ir = 1, length
|
||||
vx_out(ir,1) = vx_lxc(2*ir-1)
|
||||
vx_out(ir,2) = vx_lxc(2*ir)
|
||||
ENDDO
|
||||
ENDIF
|
||||
IF (is_libxc(2)) THEN
|
||||
DO ir = 1, length
|
||||
vc_out(ir,1) = vc_lxc(2*ir-1)
|
||||
vc_out(ir,2) = vc_lxc(2*ir)
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
IF ( is_libxc(1) ) THEN
|
||||
!$acc data copyin( ex_lxc, vx_lxc )
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
ex_out(ir) = ex_lxc(ir)
|
||||
IF (svd == 1) THEN
|
||||
vx_out(ir,1) = vx_lxc(ir)
|
||||
ELSE
|
||||
vx_out(ir,1) = vx_lxc(2*ir-1)
|
||||
vx_out(ir,2) = vx_lxc(2*ir)
|
||||
ENDIF
|
||||
ENDDO
|
||||
!$acc end data
|
||||
DEALLOCATE( ex_lxc, vx_lxc )
|
||||
ENDIF
|
||||
!
|
||||
IF (ANY(is_libxc(1:2))) THEN
|
||||
DEALLOCATE( rho_lxc )
|
||||
DEALLOCATE( vx_lxc, vc_lxc )
|
||||
IF ( is_libxc(2) ) THEN
|
||||
!$acc data copyin( ec_lxc, vc_lxc )
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
ec_out(ir) = ec_lxc(ir)
|
||||
IF (svd == 1) THEN
|
||||
vc_out(ir,1) = vc_lxc(ir)
|
||||
ELSE
|
||||
vc_out(ir,1) = vc_lxc(2*ir-1)
|
||||
vc_out(ir,2) = vc_lxc(2*ir)
|
||||
ENDIF
|
||||
ENDDO
|
||||
!$acc end data
|
||||
DEALLOCATE( ec_lxc, vc_lxc )
|
||||
ENDIF
|
||||
!
|
||||
#else
|
||||
!
|
||||
SELECT CASE( sr_d )
|
||||
SELECT CASE( srd )
|
||||
CASE( 1 )
|
||||
!
|
||||
IF ((iexch==8 .AND. .NOT.is_libxc(1)) .OR. (icorr==10 .AND. &
|
||||
.NOT. is_libxc(2))) THEN
|
||||
IF (iexch==8 .OR. icorr==10) THEN
|
||||
IF (.NOT. finite_size_cell_volume_set) CALL xclib_error( 'XC',&
|
||||
'finite size corrected exchange used w/o initialization', 1 )
|
||||
ENDIF
|
||||
!
|
||||
CALL xc_lda( length, ABS(rho_in(:,1)), ex_out, ec_out, vx_out(:,1), vc_out(:,1) )
|
||||
CALL xc_lda( length, rho_in(:,1), ex_out, ec_out, vx_out(:,1), vc_out(:,1) )
|
||||
!
|
||||
CASE( 2 )
|
||||
!
|
||||
ALLOCATE( arho(length), zeta(length) )
|
||||
ALLOCATE( zeta(length) )
|
||||
!$acc data create( zeta )
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
arho_ir = ABS(rho_in(ir,1))
|
||||
IF (arho_ir > rho_threshold_lda) zeta(ir) = rho_in(ir,2) / arho_ir
|
||||
ENDDO
|
||||
!$acc host_data use_device( zeta )
|
||||
CALL xc_lsda( length, rho_in(:,1), zeta, ex_out, ec_out, vx_out, vc_out )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( zeta )
|
||||
!
|
||||
arho = ABS(rho_in(:,1))
|
||||
WHERE (arho > rho_threshold_lda) zeta(:) = rho_in(:,2) / arho(:)
|
||||
CASE( 4 )
|
||||
!
|
||||
CALL xc_lsda( length, arho, zeta, ex_out, ec_out, vx_out, vc_out )
|
||||
ALLOCATE( zeta(length) )
|
||||
!$acc data create( zeta )
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
arho_ir = ABS(rho_in(ir,1))
|
||||
IF (arho_ir > rho_threshold_lda) zeta(ir) = SQRT( rho_in(ir,2)**2 + rho_in(ir,3)**2 + &
|
||||
rho_in(ir,4)**2 ) / arho_ir ! amag/arho
|
||||
ENDDO
|
||||
!$acc host_data use_device( zeta )
|
||||
CALL xc_lsda( length, rho_in(:,1), zeta, ex_out, ec_out, vx_out, vc_out )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( zeta )
|
||||
!
|
||||
DEALLOCATE( arho, zeta )
|
||||
!
|
||||
CASE( 4 )
|
||||
!
|
||||
ALLOCATE( arho(length), zeta(length) )
|
||||
!
|
||||
arho = ABS( rho_in(:,1) )
|
||||
WHERE (arho > rho_threshold_lda) zeta(:) = SQRT( rho_in(:,2)**2 + rho_in(:,3)**2 + &
|
||||
rho_in(:,4)**2 ) / arho(:) ! amag/arho
|
||||
!
|
||||
CALL xc_lsda( length, arho, zeta, ex_out, ec_out, vx_out, vc_out )
|
||||
!
|
||||
DEALLOCATE( arho, zeta )
|
||||
!
|
||||
CASE DEFAULT
|
||||
CASE DEFAULT
|
||||
!
|
||||
CALL xclib_error( 'xc_LDA', 'Wrong ns input', 2 )
|
||||
!
|
||||
|
@ -230,7 +317,8 @@ SUBROUTINE xc( length, sr_d, sv_d, rho_in, ex_out, ec_out, vx_out, vc_out )
|
|||
!
|
||||
#endif
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE xc
|
||||
END SUBROUTINE xc_
|
||||
|
|
|
@ -5,8 +5,80 @@
|
|||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
!
|
||||
!-------------------------------------------------------------------------------------
|
||||
SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, &
|
||||
v2c, v3c, gpu_args_ )
|
||||
!----------------------------------------------------------------------------------
|
||||
!! Wrapper to gpu or non gpu version of \(\texttt{xc_metagcx}\).
|
||||
!
|
||||
USE kind_l, ONLY: DP
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
INTEGER, INTENT(IN) :: length
|
||||
!! length of the I/O arrays
|
||||
INTEGER, INTENT(IN) :: ns
|
||||
!! spin components
|
||||
INTEGER, INTENT(IN) :: np
|
||||
!! first dimension of v2c
|
||||
REAL(DP), INTENT(IN) :: rho(length,ns)
|
||||
!! the charge density
|
||||
REAL(DP), INTENT(IN) :: grho(3,length,ns)
|
||||
!! grho = \nabla rho
|
||||
REAL(DP), INTENT(IN) :: tau(length,ns)
|
||||
!! kinetic energy density
|
||||
REAL(DP), INTENT(OUT) :: ex(length)
|
||||
!! sx = E_x(rho,grho)
|
||||
REAL(DP), INTENT(OUT) :: ec(length)
|
||||
!! sc = E_c(rho,grho)
|
||||
REAL(DP), INTENT(OUT) :: v1x(length,ns)
|
||||
!! v1x = D(E_x)/D(rho)
|
||||
REAL(DP), INTENT(OUT) :: v2x(length,ns)
|
||||
!! v2x = D(E_x)/D( D rho/D r_alpha ) / |\nabla rho|
|
||||
REAL(DP), INTENT(OUT) :: v3x(length,ns)
|
||||
!! v3x = D(E_x)/D(tau)
|
||||
REAL(DP), INTENT(OUT) :: v1c(length,ns)
|
||||
!! v1c = D(E_c)/D(rho)
|
||||
REAL(DP), INTENT(OUT) :: v2c(np,length,ns)
|
||||
!! v2c = D(E_c)/D( D rho/D r_alpha ) / |\nabla rho|
|
||||
REAL(DP), INTENT(OUT) :: v3c(length,ns)
|
||||
!! v3c = D(E_c)/D(tau)
|
||||
LOGICAL, INTENT(IN), OPTIONAL :: gpu_args_
|
||||
!! whether you wish to run on gpu in case use_gpu is true
|
||||
!
|
||||
LOGICAL :: gpu_args
|
||||
!
|
||||
gpu_args = .FALSE.
|
||||
IF ( PRESENT(gpu_args_) ) gpu_args = gpu_args_
|
||||
!
|
||||
IF ( gpu_args ) THEN
|
||||
!
|
||||
!$acc data deviceptr( rho(length,ns), grho(3,length,ns), tau(length,ns), &
|
||||
!$acc& ex(length), ec(length), v1x(length,ns), v2x(length,ns), v3x(length,ns), &
|
||||
!$acc& v1c(length,ns), v2c(np,length,ns), v3c(length,ns) )
|
||||
CALL xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, &
|
||||
v2c, v3c )
|
||||
!$acc end data
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
!$acc data copyin( rho, grho, tau ), copyout( ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!$acc host_data use_device( rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
CALL xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, &
|
||||
v2c, v3c )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE
|
||||
!
|
||||
!
|
||||
!----------------------------------------------------------------------------------------
|
||||
SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
SUBROUTINE xc_metagcx_( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1c, v2c, v3c )
|
||||
!-------------------------------------------------------------------------------------
|
||||
!! Wrapper routine. Calls internal metaGGA drivers or the Libxc ones,
|
||||
!! depending on the input choice.
|
||||
|
@ -18,8 +90,8 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
#endif
|
||||
!
|
||||
USE kind_l, ONLY: DP
|
||||
USE dft_setting_params, ONLY: imetac, is_libxc, rho_threshold_mgga, &
|
||||
grho2_threshold_mgga, tau_threshold_mgga, &
|
||||
USE dft_setting_params, ONLY: imeta, imetac, is_libxc, rho_threshold_mgga,&
|
||||
grho2_threshold_mgga, tau_threshold_mgga, &
|
||||
scan_exx, exx_started, exx_fraction
|
||||
USE qe_drivers_mgga
|
||||
!
|
||||
|
@ -56,7 +128,7 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: is
|
||||
INTEGER :: k, is, ipol
|
||||
REAL(DP), ALLOCATABLE :: grho2(:,:)
|
||||
REAL(DP), PARAMETER :: small = 1.E-10_DP
|
||||
!
|
||||
|
@ -67,48 +139,70 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
REAL(DP), ALLOCATABLE :: vc_rho(:), vc_sigma(:), vc_tau(:)
|
||||
REAL(DP), ALLOCATABLE :: lapl_rho(:), vlapl_rho(:) ! not used in QE
|
||||
!
|
||||
INTEGER :: k, ipol, pol_unpol
|
||||
LOGICAL :: POLARIZED
|
||||
REAL(DP) :: rh, ggrho2, atau
|
||||
#if (XC_MAJOR_VERSION > 4)
|
||||
INTEGER(8) :: lengthxc
|
||||
#else
|
||||
INTEGER :: lengthxc
|
||||
#endif
|
||||
#endif
|
||||
!
|
||||
!$acc data deviceptr( rho(length,ns), grho(3,length,ns), tau(length,ns), ex(length), &
|
||||
!$acc& ec(length), v1x(length,ns), v2x(length,ns), v3x(length,ns), &
|
||||
!$acc& v1c(length,ns), v2c(np,length,ns), v3c(length,ns) )
|
||||
!
|
||||
#if defined(__LIBXC)
|
||||
lengthxc = length
|
||||
!
|
||||
ex = 0.0_DP ; v1x = 0.0_DP ; v2x = 0.0_DP ; v3x = 0.0_DP
|
||||
ec = 0.0_DP ; v1c = 0.0_DP ; v2c = 0.0_DP ; v3c = 0.0_DP
|
||||
ALLOCATE( rho_lxc(length*ns), sigma(length*np) )
|
||||
ALLOCATE( tau_lxc(length*ns), lapl_rho(length*ns) )
|
||||
!$acc data create( rho_lxc, sigma, tau_lxc, lapl_rho )
|
||||
!
|
||||
POLARIZED = .FALSE.
|
||||
IF (ns == 2) THEN
|
||||
POLARIZED = .TRUE.
|
||||
IF ( is_libxc(5) ) THEN
|
||||
ALLOCATE( ex_lxc(length) )
|
||||
ALLOCATE( vx_rho(length*ns), vx_sigma(length*np), vx_tau(length*ns) )
|
||||
IF ( imetac==0 ) THEN
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
ec(k) = 0.d0
|
||||
DO is = 1, ns
|
||||
v1c(k,is) = 0.d0 ; v3c(k,is) = 0.d0
|
||||
DO ipol = 1, np
|
||||
v2c(ipol,k,is) = 0.d0
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDIF
|
||||
ENDIF
|
||||
!
|
||||
pol_unpol = ns
|
||||
!
|
||||
ALLOCATE( rho_lxc(length*ns), sigma(length*np), tau_lxc(length*ns) )
|
||||
ALLOCATE( lapl_rho(length*ns) )
|
||||
!
|
||||
ALLOCATE( ex_lxc(length) , ec_lxc(length) )
|
||||
ALLOCATE( vx_rho(length*ns) , vx_sigma(length*np), vx_tau(length*ns) )
|
||||
ALLOCATE( vc_rho(length*ns) , vc_sigma(length*np), vc_tau(length*ns) )
|
||||
ALLOCATE( vlapl_rho(length*ns) )
|
||||
!
|
||||
IF ( is_libxc(6) ) THEN
|
||||
ALLOCATE( ec_lxc(length) )
|
||||
ALLOCATE( vc_rho(length*ns), vc_sigma(length*np), vc_tau(length*ns) )
|
||||
IF ( imeta==0 ) THEN
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
ex(k) = 0.d0
|
||||
DO is = 1, ns
|
||||
v1x(k,is) = 0.d0 ; v2x(k,is) = 0.d0 ; v3x(k,is) = 0.d0
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDIF
|
||||
ENDIF
|
||||
IF ( ANY(is_libxc(5:6)) ) ALLOCATE( vlapl_rho(length*ns) )
|
||||
!
|
||||
IF ( ns == 1 ) THEN
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rho_lxc(k) = ABS( rho(k,1) )
|
||||
sigma(k) = MAX( grho(1,k,1)**2 + grho(2,k,1)**2 + grho(3,k,1)**2, &
|
||||
grho2_threshold_mgga )
|
||||
tau_lxc(k) = MAX( tau(k,1), tau_threshold_mgga )
|
||||
vlapl_rho(k) = 0._DP
|
||||
lapl_rho(k) = 0.d0
|
||||
ENDDO
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rho_lxc(2*k-1) = ABS( rho(k,1) )
|
||||
rho_lxc(2*k) = ABS( rho(k,2) )
|
||||
|
@ -120,28 +214,28 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
grho2_threshold_mgga )
|
||||
tau_lxc(2*k-1) = MAX( tau(k,1), small )
|
||||
tau_lxc(2*k) = MAX( tau(k,2), small )
|
||||
vlapl_rho(2*k-1) = 0._DP
|
||||
vlapl_rho(2*k) = 0._DP
|
||||
lapl_rho(2*k-1) = 0.d0
|
||||
lapl_rho(2*k) = 0.d0
|
||||
ENDDO
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc update self( rho_lxc, sigma, tau_lxc, lapl_rho )
|
||||
!
|
||||
IF ( .NOT.is_libxc(5) .AND. imetac==0 ) THEN
|
||||
!
|
||||
ALLOCATE( grho2(length,ns) )
|
||||
!
|
||||
DO is = 1, ns
|
||||
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
|
||||
ENDDO
|
||||
!
|
||||
IF (ns == 1) CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, &
|
||||
v1x(:,1), v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1),&
|
||||
v3c(:,1) )
|
||||
IF (ns == 2) CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x,&
|
||||
v1c, v2c, v3c )
|
||||
!
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
IF (ns == 1) THEN
|
||||
!
|
||||
!$acc host_data use_device( sigma )
|
||||
CALL tau_xc( length, rho(:,1), sigma, tau(:,1), ex, ec, v1x(:,1), &
|
||||
v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) )
|
||||
!$acc end host_data
|
||||
!
|
||||
ELSEIF (ns == 2) THEN
|
||||
!
|
||||
CALL tau_xc_spin( length, rho, grho, tau, ex, ec, v1x, v2x, v3x, &
|
||||
v1c, v2c, v3c )
|
||||
!
|
||||
ENDIF
|
||||
ENDIF
|
||||
!
|
||||
! ... META EXCHANGE
|
||||
|
@ -157,19 +251,31 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
ex_lxc = 0.d0
|
||||
ENDIF
|
||||
!
|
||||
IF (.NOT. POLARIZED) THEN
|
||||
!$acc data copyin( ex_lxc, vx_rho, vx_sigma, vx_tau )
|
||||
IF ( ns==1 ) THEN
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
IF ( ABS(rho_lxc(k))<=rho_threshold_mgga .OR. &
|
||||
sigma(k)<=grho2_threshold_mgga .OR. &
|
||||
ABS(tau_lxc(k))<=rho_threshold_mgga ) CYCLE
|
||||
ABS(tau_lxc(k))<=rho_threshold_mgga ) THEN
|
||||
ex(k) = 0.d0 ; v1x(k,1) = 0.d0
|
||||
v2x(k,1) = 0.d0 ; v3x(k,1) = 0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
ex(k) = ex_lxc(k) * rho_lxc(k)
|
||||
v1x(k,1) = vx_rho(k)
|
||||
v2x(k,1) = vx_sigma(k) * 2.0_DP
|
||||
v3x(k,1) = vx_tau(k)
|
||||
ENDDO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
IF (rho_lxc(2*k-1)+rho_lxc(2*k) <= rho_threshold_mgga) CYCLE
|
||||
IF (rho_lxc(2*k-1)+rho_lxc(2*k) <= rho_threshold_mgga) THEN
|
||||
ex(k) = 0.d0
|
||||
v1x(k,1) = 0.d0 ; v2x(k,1) = 0.d0 ; v3x(k,1) = 0.d0
|
||||
v1x(k,2) = 0.d0 ; v2x(k,2) = 0.d0 ; v3x(k,2) = 0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
ex(k) = ex_lxc(k) * (rho_lxc(2*k-1)+rho_lxc(2*k))
|
||||
IF ( ABS(rho_lxc(2*k-1))>rho_threshold_mgga .AND. &
|
||||
sigma(3*k-2)>grho2_threshold_mgga .AND. &
|
||||
|
@ -177,6 +283,8 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
v1x(k,1) = vx_rho(2*k-1)
|
||||
v2x(k,1) = vx_sigma(3*k-2)*2.d0
|
||||
v3x(k,1) = vx_tau(2*k-1)
|
||||
ELSE
|
||||
v1x(k,1) = 0.d0 ; v2x(k,1) = 0.d0 ; v3x(k,1) = 0.d0
|
||||
ENDIF
|
||||
IF ( ABS(rho_lxc(2*k))>rho_threshold_mgga .AND. &
|
||||
sigma(3*k)>grho2_threshold_mgga .AND. &
|
||||
|
@ -184,20 +292,14 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
v1x(k,2) = vx_rho(2*k)
|
||||
v2x(k,2) = vx_sigma(3*k)*2.d0
|
||||
v3x(k,2) = vx_tau(2*k)
|
||||
ELSE
|
||||
v1x(k,2) = 0.d0 ; v2x(k,2) = 0.d0 ; v3x(k,2) = 0.d0
|
||||
ENDIF
|
||||
ENDDO
|
||||
ENDIF
|
||||
!
|
||||
! **Now this is done directly inside Libxc for SCAN0**
|
||||
! ... only for HK/MCA: SCAN0 (used in CPV)
|
||||
!IF ( scan_exx ) THEN
|
||||
! IF (exx_started) THEN
|
||||
! ex = (1.0_DP - exx_fraction) * ex
|
||||
! v1x = (1.0_DP - exx_fraction) * v1x
|
||||
! v2x = (1.0_DP - exx_fraction) * v2x
|
||||
! v3x = (1.0_DP - exx_fraction) * v3x
|
||||
! ENDIF
|
||||
!ENDIF
|
||||
!$acc end data
|
||||
DEALLOCATE( ex_lxc, vx_rho, vx_sigma, vx_tau )
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
|
@ -215,24 +317,40 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
ec_lxc = 0.d0
|
||||
ENDIF
|
||||
!
|
||||
IF (.NOT. POLARIZED) THEN
|
||||
!$acc data copyin( ec_lxc, vc_rho, vc_sigma, vc_tau )
|
||||
IF ( ns==1 ) THEN
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
IF ( ABS(rho_lxc(k))<=rho_threshold_mgga .OR. &
|
||||
sigma(k)<=grho2_threshold_mgga .OR. &
|
||||
ABS(tau_lxc(k))<=rho_threshold_mgga ) CYCLE
|
||||
ABS(tau_lxc(k))<=rho_threshold_mgga ) THEN
|
||||
ec(k) = 0.d0 ; v1c(k,1) = 0.d0
|
||||
v2c(1,k,1) = 0.d0 ; v3c(k,1) = 0.d0
|
||||
CYCLE
|
||||
ENDIF
|
||||
ec(k) = ec_lxc(k) * rho_lxc(k)
|
||||
v1c(k,1) = vc_rho(k)
|
||||
v2c(1,k,1) = vc_sigma(k) * 2.0_DP
|
||||
v3c(k,1) = vc_tau(k)
|
||||
ENDDO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rh = rho_lxc(2*k-1) + rho_lxc(2*k)
|
||||
atau = ABS(tau_lxc(2*k-1) + tau_lxc(2*k))
|
||||
ggrho2 = (sigma(3*k-2) + sigma(3*k))*4.0_DP
|
||||
IF ( rh <= rho_threshold_mgga .OR. &
|
||||
ggrho2 <= grho2_threshold_mgga .OR. &
|
||||
atau <= tau_threshold_mgga ) CYCLE
|
||||
atau <= tau_threshold_mgga ) THEN
|
||||
ec(k) = 0.d0
|
||||
v1c(k,1) = 0.d0 ; v3c(k,1) = 0.d0
|
||||
v1c(k,2) = 0.d0 ; v3c(k,2) = 0.d0
|
||||
DO ipol = 1, 3
|
||||
v2c(ipol,k,1) = 0.d0
|
||||
v2c(ipol,k,2) = 0.d0
|
||||
ENDDO
|
||||
CYCLE
|
||||
ENDIF
|
||||
ec(k) = ec_lxc(k) * (rho_lxc(2*k-1)+rho_lxc(2*k))
|
||||
v1c(k,1) = vc_rho(2*k-1)
|
||||
v1c(k,2) = vc_rho(2*k)
|
||||
|
@ -244,26 +362,33 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
v3c(k,2) = vc_tau(2*k)
|
||||
ENDDO
|
||||
ENDIF
|
||||
!$acc end data
|
||||
!
|
||||
DEALLOCATE( ec_lxc, vc_rho, vc_sigma, vc_tau )
|
||||
ENDIF
|
||||
!
|
||||
IF ( ANY(is_libxc(5:6)) ) DEALLOCATE( vlapl_rho )
|
||||
!
|
||||
!$acc end data
|
||||
DEALLOCATE( rho_lxc, sigma, tau_lxc, lapl_rho )
|
||||
DEALLOCATE( ex_lxc , ec_lxc )
|
||||
DEALLOCATE( vx_rho , vx_sigma, vx_tau )
|
||||
DEALLOCATE( vc_rho , vc_sigma, vc_tau, vlapl_rho )
|
||||
!
|
||||
#else
|
||||
!
|
||||
ALLOCATE( grho2(length,ns) )
|
||||
!$acc data create( grho2 )
|
||||
!$acc host_data use_device( grho2 )
|
||||
!
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, ns
|
||||
grho2(:,is) = grho(1,:,is)**2 + grho(2,:,is)**2 + grho(3,:,is)**2
|
||||
DO k = 1, length
|
||||
grho2(k,is) = grho(1,k,is)**2 + grho(2,k,is)**2 + grho(3,k,is)**2
|
||||
ENDDO
|
||||
ENDDO
|
||||
!
|
||||
IF (ns == 1) THEN
|
||||
!
|
||||
CALL tau_xc( length, rho(:,1), grho2(:,1), tau(:,1), ex, ec, v1x(:,1), &
|
||||
v2x(:,1), v3x(:,1), v1c(:,1), v2c(1,:,1), v3c(:,1) )
|
||||
v2x(:,1), v3x(:,1), v1c(:,1), v2c, v3c(:,1) )
|
||||
!
|
||||
ELSEIF (ns == 2) THEN
|
||||
!
|
||||
|
@ -272,10 +397,14 @@ SUBROUTINE xc_metagcx( length, ns, np, rho, grho, tau, ex, ec, v1x, v2x, v3x, v1
|
|||
!
|
||||
ENDIF
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
#endif
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
RETURN
|
||||
!
|
||||
END SUBROUTINE xc_metagcx
|
||||
END SUBROUTINE xc_metagcx_
|
||||
|
|
|
@ -31,18 +31,19 @@ PROGRAM xclib_test
|
|||
!
|
||||
!! See README.TEST file for more details.
|
||||
!
|
||||
USE kind_l, ONLY: DP
|
||||
USE constants_l, ONLY: pi
|
||||
USE xc_lib, ONLY: xclib_set_dft_from_name, xclib_set_exx_fraction, &
|
||||
xclib_get_ID, xclib_reset_dft, xc_gcx, &
|
||||
xclib_dft_is_libxc, xclib_init_libxc, &
|
||||
xclib_finalize_libxc, xclib_set_finite_size_volume, &
|
||||
xclib_set_auxiliary_flags, xclib_dft_is, start_exx
|
||||
USE kind_l, ONLY: DP
|
||||
USE constants_l, ONLY: pi
|
||||
USE beef_interface, ONLY: beefsetmode
|
||||
USE xc_lib, ONLY: xclib_set_dft_from_name, xclib_set_exx_fraction, &
|
||||
xclib_get_ID, xclib_reset_dft, xc, xc_gcx, &
|
||||
xc_metagcx, xclib_dft_is_libxc, xclib_init_libxc,&
|
||||
xclib_finalize_libxc, xclib_set_finite_size_volume,&
|
||||
xclib_set_auxiliary_flags, xclib_dft_is, start_exx
|
||||
USE xclib_utils_and_para
|
||||
!--xml
|
||||
USE xmltools, ONLY: xml_open_file, xml_closefile,xmlr_readtag, &
|
||||
xmlw_writetag, xmlw_opentag, xmlw_closetag, &
|
||||
xmlr_opentag, xmlr_closetag, get_attr, add_attr
|
||||
USE xmltools, ONLY: xml_open_file, xml_closefile,xmlr_readtag, &
|
||||
xmlw_writetag, xmlw_opentag, xmlw_closetag, &
|
||||
xmlr_opentag, xmlr_closetag, get_attr, add_attr
|
||||
#if defined(__LIBXC)
|
||||
#include "xc_version.h"
|
||||
USE xc_f03_lib_m
|
||||
|
@ -379,6 +380,15 @@ PROGRAM xclib_test
|
|||
IF (nr==1) nrpe = mype*nnr
|
||||
IF (nr==0) nrpe = npoints-(npoints/npes)*(npes-mype-1)
|
||||
!
|
||||
! ... openacc init (otherwise it offsets the wall time of the first test)
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data create( time )
|
||||
!$acc end data
|
||||
#endif
|
||||
!
|
||||
! ... capitalize input vars
|
||||
!
|
||||
IF (TRIM(polarization)=='UNPOLARIZED' ) THEN
|
||||
is_min = 1 ; is_max = 1
|
||||
ELSEIF (TRIM(polarization)=='POLARIZED' ) THEN
|
||||
|
@ -979,6 +989,9 @@ PROGRAM xclib_test
|
|||
ex1 = 0.d0 ; ec1 = 0.d0
|
||||
ENDIF
|
||||
!
|
||||
IF ( dft(1:3)=='BEE' ) CALL beefsetmode(-1) !** beeforder can be manually
|
||||
! ! changed here for other checks
|
||||
!
|
||||
IF (mype==root) time(3) = get_wall_time()
|
||||
CALL xc_gcx( nnr, ns, rho, grho, exg1, ecg1, v1x1, v2x1, v1c1, &
|
||||
v2c1, v2c_ud1 )
|
||||
|
@ -1798,9 +1811,9 @@ PROGRAM xclib_test
|
|||
WRITE(test_output_exe(56:96), '(a)') TRIM(status)
|
||||
IF (status==passed .AND. show_time) THEN
|
||||
WRITE(test_output_exe(80:85), '(a)') 'time:'
|
||||
WRITE(test_output_exe(86:92), '(F6.3)') time_tot2
|
||||
WRITE(test_output_exe(86:92), '(F6.3)') time_tot1
|
||||
WRITE(test_output_exe(93:100), '(a)') 's incr:'
|
||||
WRITE(test_output_exe(101:110), '(F8.3)') time_tot2/time_tot1*100.d0-100.d0
|
||||
WRITE(test_output_exe(101:110), '(F8.3)') time_tot1/time_tot2*100.d0-100.d0
|
||||
WRITE(test_output_exe(111:111), '(a)') '%'
|
||||
ENDIF
|
||||
WRITE(stdout,*) test_output_exe
|
||||
|
|
|
@ -13,7 +13,8 @@ subroutine vxc_t(lsd,rho,rhoc,exc,vxc)
|
|||
! this function returns the XC potential and energy in LDA or
|
||||
! LSDA approximation
|
||||
!
|
||||
use kinds, only : DP
|
||||
use kinds, only : DP
|
||||
use xc_lib, only : xc
|
||||
implicit none
|
||||
integer, intent(in) :: lsd ! 1 in the LSDA case, 0 otherwise
|
||||
real(DP), intent(in) :: rho(2), rhoc ! the system density
|
||||
|
@ -77,7 +78,8 @@ subroutine vxcgc( ndm, mesh, nspin, r, r2, rho, rhoc, vgc, egc, &
|
|||
!
|
||||
use kinds, only : DP
|
||||
use constants, only : fpi, e2
|
||||
use xc_lib, only : xclib_set_threshold, xclib_dft_is, xc_gcx
|
||||
use xc_lib, only : xclib_set_threshold, xclib_dft_is, xc_gcx, &
|
||||
xc_metagcx
|
||||
implicit none
|
||||
integer, intent(in) :: ndm,mesh,nspin,iflag
|
||||
real(DP), intent(in) :: r(mesh), r2(mesh), rho(ndm,2), rhoc(ndm)
|
||||
|
|
|
@ -5,13 +5,6 @@ qe_add_global_compile_definitions(__PGI)
|
|||
# set optimization specific flags
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Mcache_align -Mlarge_arrays")
|
||||
|
||||
if(QE_ENABLE_OPENACC)
|
||||
add_library(OpenACC::OpenACC_Fortran INTERFACE IMPORTED)
|
||||
set_target_properties(OpenACC::OpenACC_Fortran PROPERTIES
|
||||
INTERFACE_COMPILE_OPTIONS "-acc"
|
||||
INTERFACE_LINK_OPTIONS "-acc")
|
||||
endif()
|
||||
|
||||
if(QE_ENABLE_CUDA)
|
||||
if(CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER_EQUAL 20.7)
|
||||
set(CUDA_FLAG "-cuda")
|
||||
|
|
|
@ -4793,6 +4793,7 @@ $as_echo "$as_me: WARNING: Using legacy custom solver." >&2;}
|
|||
if test "$enable_openacc" == "yes"; then
|
||||
ldflags="$ldflags -acc"
|
||||
cuda_fflags="$cuda_fflags -acc"
|
||||
CUDA_CFLAGS="$CUDA_CFLAGS -acc -gpu=cc$with_cuda_cc,cuda$with_cuda_runtime"
|
||||
fi
|
||||
|
||||
fi
|
||||
|
|
|
@ -209,6 +209,7 @@ EOF
|
|||
if test "$enable_openacc" == "yes"; then
|
||||
ldflags="$ldflags -acc"
|
||||
cuda_fflags="$cuda_fflags -acc"
|
||||
CUDA_CFLAGS="$CUDA_CFLAGS -acc -gpu=cc$with_cuda_cc,cuda$with_cuda_runtime"
|
||||
fi
|
||||
|
||||
fi
|
||||
|
|
|
@ -101,6 +101,9 @@ CUDA_RUNTIME=@gpu_runtime@
|
|||
# CUDA F90 Flags
|
||||
CUDA_F90FLAGS=@cuda_fflags@ $(MOD_FLAG)$(TOPDIR)/external/devxlib/src
|
||||
|
||||
# CUDA C Flags
|
||||
CUDA_CFLAGS=@CUDA_CFLAGS@
|
||||
|
||||
# C preprocessor and preprocessing flags - for explicit preprocessing,
|
||||
# if needed (see the compilation rules above)
|
||||
# preprocessing flags must include DFLAGS and IFLAGS
|
||||
|
@ -112,7 +115,7 @@ CPPFLAGS = @cppflags@ $(DFLAGS) $(IFLAGS)
|
|||
# C flags must include DFLAGS and IFLAGS
|
||||
# F90 flags must include MODFLAGS, IFLAGS, and FDFLAGS with appropriate syntax
|
||||
|
||||
CFLAGS = @cflags@ $(DFLAGS) $(IFLAGS)
|
||||
CFLAGS = @cflags@ $(DFLAGS) $(IFLAGS) $(CUDA_CFLAGS)
|
||||
F90FLAGS = @f90flags@ @pre_fdflags@$(FDFLAGS) $(CUDA_F90FLAGS) $(IFLAGS) $(MODFLAGS)
|
||||
|
||||
# compiler flags with and without optimization for fortran-77
|
||||
|
|
Loading…
Reference in New Issue