XClib-acc - present directive only in GGA

This commit is contained in:
fabrizio22 2022-02-07 15:26:06 +01:00
parent 2d02c7357b
commit 4c4b7a9e17
4 changed files with 11 additions and 57 deletions

View File

@ -67,7 +67,7 @@ SUBROUTINE dgcxc_unpol( length, r_in, s2_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc
REAL(DP), ALLOCATABLE :: sx(:), sc(:)
REAL(DP), PARAMETER :: small = 1.E-30_DP
!
!$acc data deviceptr( r_in, s2_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc )
!$acc data present( r_in, s2_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc )
!
ALLOCATE( raux(4*length), s2aux(4*length), dr(length), s(length), ds(length) )
ALLOCATE( v1x(4*length), v2x(4*length), sx(4*length) )
@ -97,9 +97,7 @@ SUBROUTINE dgcxc_unpol( length, r_in, s2_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc
raux(i4-1+ir) = r_in(ir) ; s2aux(i4-1+ir) = (s(ir)-ds(ir))**2
ENDDO
!
!$acc host_data use_device( raux, s2aux, sx, sc, v1x, v2x, v1c, v2c )
CALL gcxc( length*4, raux, s2aux, sx, sc, v1x, v2x, v1c, v2c )
!$acc end host_data
!
!$acc parallel loop
DO ir = 1, length
@ -191,7 +189,7 @@ SUBROUTINE dgcxc_spin( length, r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, &
REAL(DP), PARAMETER :: rho_trash = 0.4_DP, zeta_trash = 0.2_DP, &
s2_trash = 0.1_DP
!
!$acc data deviceptr( r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, vssc, vrzc )
!$acc data present( r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, vssc, vrzc )
!
igcx_=igcx
igcc_=igcc
@ -255,9 +253,7 @@ SUBROUTINE dgcxc_spin( length, r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, &
raux(i8-1+ir,2) = r_dw ; s2aux(i8-1+ir,2) = (s_dw-ds_dw)**2
ENDDO
!
!$acc host_data use_device( raux, s2aux, sx, v1x, v2x )
CALL gcx_spin( length*8, raux, s2aux, sx, v1x, v2x )
!$acc end host_data
!
!$acc parallel loop
DO ir = 1, length
@ -348,9 +344,7 @@ SUBROUTINE dgcxc_spin( length, r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, &
rtaux(i6-1+ir) = rt ; s2taux(i6-1+ir) = s2t ; zetaux(i6-1+ir) = zeta-dz
ENDDO
!
!$acc host_data use_device( rtaux, zetaux, s2taux, sc, v1c, v2c )
CALL gcc_spin( length*6, rtaux, zetaux, s2taux, sc, v1c, v2c )
!$acc end host_data
!
!$acc parallel loop
DO ir = 1, length

View File

@ -81,8 +81,7 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
#endif
!
#if defined(_OPENACC)
!$acc data deviceptr( rho_in(length), grho_in(length), sx_out(length), sc_out(length), &
!$acc& v1x_out(length), v2x_out(length), v1c_out(length), v2c_out(length) )
!$acc data present( rho_in, grho_in, sx_out, sc_out, v1x_out, v2x_out, v1c_out, v2c_out )
!$acc parallel loop
#else
!$omp parallel if(ntids==1) default(none) &
@ -494,8 +493,7 @@ SUBROUTINE gcx_spin( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
!sx_tot = 0.0_DP
!
#if defined(_OPENACC)
!$acc data deviceptr( rho_in(length,2), grho2_in(length,2), sx_tot(length), &
!$acc& v1x_out(length,2), v2x_out(length,2) )
!$acc data present( rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
!$acc parallel loop
#else
!$omp parallel if(ntids==1) default(none) &
@ -1036,8 +1034,7 @@ SUBROUTINE gcc_spin( length, rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out
#endif
!
#if defined(_OPENACC)
!$acc data deviceptr( rho_in(length), zeta_io(length), grho_in(length), &
!$acc& sc_out(length), v1c_out(length,2), v2c_out(length) )
!$acc data present( rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out )
!$acc parallel loop
#else
!$omp parallel if(ntids==1) default(none) &
@ -1151,7 +1148,7 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v2c
!! correlation potential (gradient part)
REAL(DP), INTENT(OUT), DIMENSION(length) :: v2c_ud
!!correlation potential (off-diag. term)
!! correlation potential (off-diag. term)
!
! ... local variables
!
@ -1166,8 +1163,7 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
#endif
!
#if defined(_OPENACC)
!$acc data deviceptr( rho_in(length,2), grho_in(length,2), grho_ud_in(length), &
!$acc& sc(length), v1c(length,2), v2c(length,2), v2c_ud(length) )
!$acc data present( rho_in, grho_in, grho_ud_in, sc, v1c, v2c, v2c_ud )
!$acc parallel loop
#else
!$omp parallel if(ntids==1) default(none) &

View File

@ -36,17 +36,13 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss, gpu_args_ )
IF ( gpu_args ) THEN
!
!$acc data present( r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
!$acc host_data use_device( r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
CALL dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
!$acc end host_data
!$acc end data
!
ELSE
!
!$acc data copyin( r_in, g_in ), copyout( dvxc_rr, dvxc_sr, dvxc_ss )
!$acc host_data use_device( r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
CALL dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
!$acc end host_data
!$acc end data
!
ENDIF
@ -108,7 +104,7 @@ SUBROUTINE dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
REAL(DP), PARAMETER :: small = 1.E-10_DP, rho_trash = 0.5_DP
REAL(DP), PARAMETER :: epsr=1.0d-6, epsg=1.0d-6
!
!$acc data deviceptr( r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
!$acc data present( r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
!
IF ( ANY(.NOT.is_libxc(3:4)) ) THEN
rho_threshold_gga = small ; grho_threshold_gga = small
@ -293,10 +289,8 @@ SUBROUTINE dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
ENDIF
!
!$acc data copyin( sigma )
!$acc host_data use_device( sigma, vrrx, vsrx, vssx, vrrc, vsrc, vssc )
CALL dgcxc_unpol( length, r_in(:,1), sigma, vrrx(:,1), vsrx(:,1), vssx(:,1), &
vrrc(:,1), vsrc(:,1), vssc )
!$acc end host_data
!$acc end data
!
!$acc parallel loop
@ -311,9 +305,7 @@ SUBROUTINE dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
ALLOCATE( vrzc(length,sp) )
!$acc data create( vrzc )
!
!$acc host_data use_device( vrrx, vsrx, vssx, vrrc, vsrc, vssc, vrzc )
CALL dgcxc_spin( length, r_in, g_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc, vrzc )
!$acc end host_data
!
!$acc parallel loop
DO k = 1, length
@ -369,10 +361,8 @@ SUBROUTINE dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
sigma(k) = g_in(k,1,1)**2 + g_in(k,2,1)**2 + g_in(k,3,1)**2
ENDDO
!
!$acc host_data use_device( sigma, vrrx, vsrx, vssx, vrrc, vsrc, vssc )
CALL dgcxc_unpol( length, r_in(:,1), sigma, vrrx(:,1), vsrx(:,1), vssx(:,1), &
vrrc(:,1), vsrc(:,1), vssc )
!$acc end host_data
!
!$acc end data
DEALLOCATE( sigma )
@ -389,9 +379,7 @@ SUBROUTINE dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
ALLOCATE( vrzc(length,sp) )
!$acc data create( vrzc )
!
!$acc host_data use_device( vrrx, vsrx, vssx, vrrc, vsrc, vssc, vrzc )
CALL dgcxc_spin( length, r_in, g_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc, vrzc )
!$acc end host_data
!
!$acc parallel loop
DO k = 1, length

View File

@ -55,16 +55,12 @@ SUBROUTINE xc_gcx( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud, &
!$acc data present( rho, grho, ex, ec, v1x, v2x, v1c, v2c )
IF (PRESENT(v2c_ud)) THEN
!$acc data present( v2c_ud )
!$acc host_data use_device( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
!$acc end host_data
!$acc end data
ELSE
ALLOCATE( v2c_dummy(length) )
!$acc data create( v2c_dummy )
!$acc host_data use_device( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
!$acc end host_data
!$acc end data
DEALLOCATE( v2c_dummy )
ENDIF
@ -75,16 +71,12 @@ SUBROUTINE xc_gcx( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud, &
!$acc data copyin( rho, grho ), copyout( ex, ec, v1x, v2x, v1c, v2c )
IF (PRESENT(v2c_ud)) THEN
!$acc data copyout( v2c_ud )
!$acc host_data use_device( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
!$acc end host_data
!$acc end data
ELSE
ALLOCATE( v2c_dummy(length) )
!$acc data create( v2c_dummy )
!$acc host_data use_device( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
!$acc end host_data
!$acc end data
DEALLOCATE( v2c_dummy )
ENDIF
@ -154,9 +146,7 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
REAL(DP) :: rho_up, rho_dw, grho_up, grho_dw, sgn1
REAL(DP), PARAMETER :: small = 1.E-10_DP
!
!$acc data deviceptr( rho(length,ns), grho(3,length,ns), ex(length), ec(length), &
!$acc& v1x(length,ns), v2x(length,ns), v1c(length,ns), v2c(length,ns), &
!$acc& v2c_ud(length) )
!$acc data present( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
!
#if defined(__LIBXC)
!
@ -214,9 +204,7 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
!
IF ( ns==1 .AND. ANY(.NOT.is_libxc(3:4)) ) THEN
!
!$acc host_data use_device( rho_lxc, sigma )
CALL gcxc( length, rho_lxc, sigma, ex, ec, v1x(:,1), v2x(:,1), v1c(:,1), v2c(:,1) )
!$acc end host_data
!
!$acc parallel loop
DO k = 1, length
@ -305,9 +293,8 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
grho2(k,2) = sigma(3*k)
grho_ud(k) = sigma(3*k-1)
ENDDO
!$acc host_data use_device( grho2, grho_ud )
!
CALL gcc_spin_more( length, rho, grho2, grho_ud, ec, v1c, v2c, v2c_ud )
!$acc end host_data
!$acc end data
DEALLOCATE( grho_ud )
!
@ -329,9 +316,7 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
grho2(k,2) = grho(1,k,2)**2 + grho(2,k,2)**2 + grho(3,k,2)**2
ENDDO
!
!$acc host_data use_device( rh, zeta, grho2 )
CALL gcc_spin( length, rh, zeta, grho2(:,1), ec, v1c, v2c(:,1) )
!$acc end host_data
!
!$acc parallel loop
DO k = 1, length
@ -420,9 +405,8 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
grho2(k,is) = grho(1,k,is)**2 + grho(2,k,is)**2 + grho(3,k,is)**2
ENDDO
ENDDO
!$acc host_data use_device( grho2 )
!
CALL gcx_spin( length, rho, grho2, ex, v1x, v2x )
!$acc end host_data
!$acc end data
DEALLOCATE( grho2 )
!
@ -442,7 +426,6 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
!
ALLOCATE( rh(length) )
!$acc data create( rh )
!$acc host_data use_device( rh, grho2 )
!$acc parallel loop
DO k = 1, length
rh(k) = ABS(rho(k,1))
@ -459,13 +442,11 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
ec(k) = ec(k) * sgn1
ENDDO
!
!$acc end host_data
!$acc end data
DEALLOCATE( rh )
!
ELSE
!
!$acc host_data use_device( grho2 )
!$acc parallel loop collapse(2)
DO is = 1, ns
DO k = 1, length
@ -474,13 +455,11 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
ENDDO
!
CALL gcx_spin( length, rho, grho2, ex, v1x, v2x )
!$acc end host_data
!
IF (igcc==3 .OR. igcc==7 .OR. igcc==13 ) THEN
!
ALLOCATE( grho_ud(length) )
!$acc data create( grho_ud )
!$acc host_data use_device( grho_ud, grho2 )
!$acc parallel loop
DO k = 1, length
grho_ud(k) = grho(1,k,1) * grho(1,k,2) + grho(2,k,1) * grho(2,k,2) + &
@ -489,7 +468,6 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
!
CALL gcc_spin_more( length, rho, grho2, grho_ud, ec, v1c, v2c, v2c_ud )
!
!$acc end host_data
!$acc end data
DEALLOCATE( grho_ud )
!
@ -497,7 +475,6 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
!
ALLOCATE( rh(length), zeta(length) )
!$acc data create( rh, zeta )
!$acc host_data use_device( rh, zeta, grho2 )
!$acc parallel loop
DO k = 1, length
rh(k) = rho(k,1) + rho(k,2)
@ -519,7 +496,6 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
IF ( ns==2 ) v2c_ud(k) = v2c(k,1)
ENDDO
!
!$acc end host_data
!$acc end data
DEALLOCATE( rh, zeta )
!