mirror of https://gitlab.com/QEF/q-e.git
XClib-acc - present directive only in GGA
This commit is contained in:
parent
2d02c7357b
commit
4c4b7a9e17
|
@ -67,7 +67,7 @@ SUBROUTINE dgcxc_unpol( length, r_in, s2_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc
|
|||
REAL(DP), ALLOCATABLE :: sx(:), sc(:)
|
||||
REAL(DP), PARAMETER :: small = 1.E-30_DP
|
||||
!
|
||||
!$acc data deviceptr( r_in, s2_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc )
|
||||
!$acc data present( r_in, s2_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc )
|
||||
!
|
||||
ALLOCATE( raux(4*length), s2aux(4*length), dr(length), s(length), ds(length) )
|
||||
ALLOCATE( v1x(4*length), v2x(4*length), sx(4*length) )
|
||||
|
@ -97,9 +97,7 @@ SUBROUTINE dgcxc_unpol( length, r_in, s2_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc
|
|||
raux(i4-1+ir) = r_in(ir) ; s2aux(i4-1+ir) = (s(ir)-ds(ir))**2
|
||||
ENDDO
|
||||
!
|
||||
!$acc host_data use_device( raux, s2aux, sx, sc, v1x, v2x, v1c, v2c )
|
||||
CALL gcxc( length*4, raux, s2aux, sx, sc, v1x, v2x, v1c, v2c )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
|
@ -191,7 +189,7 @@ SUBROUTINE dgcxc_spin( length, r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, &
|
|||
REAL(DP), PARAMETER :: rho_trash = 0.4_DP, zeta_trash = 0.2_DP, &
|
||||
s2_trash = 0.1_DP
|
||||
!
|
||||
!$acc data deviceptr( r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, vssc, vrzc )
|
||||
!$acc data present( r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, vssc, vrzc )
|
||||
!
|
||||
igcx_=igcx
|
||||
igcc_=igcc
|
||||
|
@ -255,9 +253,7 @@ SUBROUTINE dgcxc_spin( length, r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, &
|
|||
raux(i8-1+ir,2) = r_dw ; s2aux(i8-1+ir,2) = (s_dw-ds_dw)**2
|
||||
ENDDO
|
||||
!
|
||||
!$acc host_data use_device( raux, s2aux, sx, v1x, v2x )
|
||||
CALL gcx_spin( length*8, raux, s2aux, sx, v1x, v2x )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
|
@ -348,9 +344,7 @@ SUBROUTINE dgcxc_spin( length, r_in, g_in, vrrx, vrsx, vssx, vrrc, vrsc, &
|
|||
rtaux(i6-1+ir) = rt ; s2taux(i6-1+ir) = s2t ; zetaux(i6-1+ir) = zeta-dz
|
||||
ENDDO
|
||||
!
|
||||
!$acc host_data use_device( rtaux, zetaux, s2taux, sc, v1c, v2c )
|
||||
CALL gcc_spin( length*6, rtaux, zetaux, s2taux, sc, v1c, v2c )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO ir = 1, length
|
||||
|
|
|
@ -81,8 +81,7 @@ SUBROUTINE gcxc( length, rho_in, grho_in, sx_out, sc_out, v1x_out, &
|
|||
#endif
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data deviceptr( rho_in(length), grho_in(length), sx_out(length), sc_out(length), &
|
||||
!$acc& v1x_out(length), v2x_out(length), v1c_out(length), v2c_out(length) )
|
||||
!$acc data present( rho_in, grho_in, sx_out, sc_out, v1x_out, v2x_out, v1c_out, v2c_out )
|
||||
!$acc parallel loop
|
||||
#else
|
||||
!$omp parallel if(ntids==1) default(none) &
|
||||
|
@ -494,8 +493,7 @@ SUBROUTINE gcx_spin( length, rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
|
|||
!sx_tot = 0.0_DP
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data deviceptr( rho_in(length,2), grho2_in(length,2), sx_tot(length), &
|
||||
!$acc& v1x_out(length,2), v2x_out(length,2) )
|
||||
!$acc data present( rho_in, grho2_in, sx_tot, v1x_out, v2x_out )
|
||||
!$acc parallel loop
|
||||
#else
|
||||
!$omp parallel if(ntids==1) default(none) &
|
||||
|
@ -1036,8 +1034,7 @@ SUBROUTINE gcc_spin( length, rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out
|
|||
#endif
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data deviceptr( rho_in(length), zeta_io(length), grho_in(length), &
|
||||
!$acc& sc_out(length), v1c_out(length,2), v2c_out(length) )
|
||||
!$acc data present( rho_in, zeta_io, grho_in, sc_out, v1c_out, v2c_out )
|
||||
!$acc parallel loop
|
||||
#else
|
||||
!$omp parallel if(ntids==1) default(none) &
|
||||
|
@ -1151,7 +1148,7 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
|
|||
REAL(DP), INTENT(OUT), DIMENSION(length,2) :: v2c
|
||||
!! correlation potential (gradient part)
|
||||
REAL(DP), INTENT(OUT), DIMENSION(length) :: v2c_ud
|
||||
!!correlation potential (off-diag. term)
|
||||
!! correlation potential (off-diag. term)
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
|
@ -1166,8 +1163,7 @@ SUBROUTINE gcc_spin_more( length, rho_in, grho_in, grho_ud_in, &
|
|||
#endif
|
||||
!
|
||||
#if defined(_OPENACC)
|
||||
!$acc data deviceptr( rho_in(length,2), grho_in(length,2), grho_ud_in(length), &
|
||||
!$acc& sc(length), v1c(length,2), v2c(length,2), v2c_ud(length) )
|
||||
!$acc data present( rho_in, grho_in, grho_ud_in, sc, v1c, v2c, v2c_ud )
|
||||
!$acc parallel loop
|
||||
#else
|
||||
!$omp parallel if(ntids==1) default(none) &
|
||||
|
|
|
@ -36,17 +36,13 @@ SUBROUTINE dgcxc( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss, gpu_args_ )
|
|||
IF ( gpu_args ) THEN
|
||||
!
|
||||
!$acc data present( r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
||||
!$acc host_data use_device( r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
||||
CALL dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
!$acc data copyin( r_in, g_in ), copyout( dvxc_rr, dvxc_sr, dvxc_ss )
|
||||
!$acc host_data use_device( r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
||||
CALL dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
ENDIF
|
||||
|
@ -108,7 +104,7 @@ SUBROUTINE dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
REAL(DP), PARAMETER :: small = 1.E-10_DP, rho_trash = 0.5_DP
|
||||
REAL(DP), PARAMETER :: epsr=1.0d-6, epsg=1.0d-6
|
||||
!
|
||||
!$acc data deviceptr( r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
||||
!$acc data present( r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
||||
!
|
||||
IF ( ANY(.NOT.is_libxc(3:4)) ) THEN
|
||||
rho_threshold_gga = small ; grho_threshold_gga = small
|
||||
|
@ -293,10 +289,8 @@ SUBROUTINE dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
ENDIF
|
||||
!
|
||||
!$acc data copyin( sigma )
|
||||
!$acc host_data use_device( sigma, vrrx, vsrx, vssx, vrrc, vsrc, vssc )
|
||||
CALL dgcxc_unpol( length, r_in(:,1), sigma, vrrx(:,1), vsrx(:,1), vssx(:,1), &
|
||||
vrrc(:,1), vsrc(:,1), vssc )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
!$acc parallel loop
|
||||
|
@ -311,9 +305,7 @@ SUBROUTINE dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
ALLOCATE( vrzc(length,sp) )
|
||||
!$acc data create( vrzc )
|
||||
!
|
||||
!$acc host_data use_device( vrrx, vsrx, vssx, vrrc, vsrc, vssc, vrzc )
|
||||
CALL dgcxc_spin( length, r_in, g_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc, vrzc )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
|
@ -369,10 +361,8 @@ SUBROUTINE dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
sigma(k) = g_in(k,1,1)**2 + g_in(k,2,1)**2 + g_in(k,3,1)**2
|
||||
ENDDO
|
||||
!
|
||||
!$acc host_data use_device( sigma, vrrx, vsrx, vssx, vrrc, vsrc, vssc )
|
||||
CALL dgcxc_unpol( length, r_in(:,1), sigma, vrrx(:,1), vsrx(:,1), vssx(:,1), &
|
||||
vrrc(:,1), vsrc(:,1), vssc )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc end data
|
||||
DEALLOCATE( sigma )
|
||||
|
@ -389,9 +379,7 @@ SUBROUTINE dgcxc_( length, sp, r_in, g_in, dvxc_rr, dvxc_sr, dvxc_ss )
|
|||
ALLOCATE( vrzc(length,sp) )
|
||||
!$acc data create( vrzc )
|
||||
!
|
||||
!$acc host_data use_device( vrrx, vsrx, vssx, vrrc, vsrc, vssc, vrzc )
|
||||
CALL dgcxc_spin( length, r_in, g_in, vrrx, vsrx, vssx, vrrc, vsrc, vssc, vrzc )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
|
|
|
@ -55,16 +55,12 @@ SUBROUTINE xc_gcx( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud, &
|
|||
!$acc data present( rho, grho, ex, ec, v1x, v2x, v1c, v2c )
|
||||
IF (PRESENT(v2c_ud)) THEN
|
||||
!$acc data present( v2c_ud )
|
||||
!$acc host_data use_device( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
ELSE
|
||||
ALLOCATE( v2c_dummy(length) )
|
||||
!$acc data create( v2c_dummy )
|
||||
!$acc host_data use_device( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
|
||||
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( v2c_dummy )
|
||||
ENDIF
|
||||
|
@ -75,16 +71,12 @@ SUBROUTINE xc_gcx( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud, &
|
|||
!$acc data copyin( rho, grho ), copyout( ex, ec, v1x, v2x, v1c, v2c )
|
||||
IF (PRESENT(v2c_ud)) THEN
|
||||
!$acc data copyout( v2c_ud )
|
||||
!$acc host_data use_device( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
ELSE
|
||||
ALLOCATE( v2c_dummy(length) )
|
||||
!$acc data create( v2c_dummy )
|
||||
!$acc host_data use_device( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
|
||||
CALL xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_dummy )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( v2c_dummy )
|
||||
ENDIF
|
||||
|
@ -154,9 +146,7 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
REAL(DP) :: rho_up, rho_dw, grho_up, grho_dw, sgn1
|
||||
REAL(DP), PARAMETER :: small = 1.E-10_DP
|
||||
!
|
||||
!$acc data deviceptr( rho(length,ns), grho(3,length,ns), ex(length), ec(length), &
|
||||
!$acc& v1x(length,ns), v2x(length,ns), v1c(length,ns), v2c(length,ns), &
|
||||
!$acc& v2c_ud(length) )
|
||||
!$acc data present( rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
||||
!
|
||||
#if defined(__LIBXC)
|
||||
!
|
||||
|
@ -214,9 +204,7 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
!
|
||||
IF ( ns==1 .AND. ANY(.NOT.is_libxc(3:4)) ) THEN
|
||||
!
|
||||
!$acc host_data use_device( rho_lxc, sigma )
|
||||
CALL gcxc( length, rho_lxc, sigma, ex, ec, v1x(:,1), v2x(:,1), v1c(:,1), v2c(:,1) )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
|
@ -305,9 +293,8 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
grho2(k,2) = sigma(3*k)
|
||||
grho_ud(k) = sigma(3*k-1)
|
||||
ENDDO
|
||||
!$acc host_data use_device( grho2, grho_ud )
|
||||
!
|
||||
CALL gcc_spin_more( length, rho, grho2, grho_ud, ec, v1c, v2c, v2c_ud )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( grho_ud )
|
||||
!
|
||||
|
@ -329,9 +316,7 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
grho2(k,2) = grho(1,k,2)**2 + grho(2,k,2)**2 + grho(3,k,2)**2
|
||||
ENDDO
|
||||
!
|
||||
!$acc host_data use_device( rh, zeta, grho2 )
|
||||
CALL gcc_spin( length, rh, zeta, grho2(:,1), ec, v1c, v2c(:,1) )
|
||||
!$acc end host_data
|
||||
!
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
|
@ -420,9 +405,8 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
grho2(k,is) = grho(1,k,is)**2 + grho(2,k,is)**2 + grho(3,k,is)**2
|
||||
ENDDO
|
||||
ENDDO
|
||||
!$acc host_data use_device( grho2 )
|
||||
!
|
||||
CALL gcx_spin( length, rho, grho2, ex, v1x, v2x )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( grho2 )
|
||||
!
|
||||
|
@ -442,7 +426,6 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
!
|
||||
ALLOCATE( rh(length) )
|
||||
!$acc data create( rh )
|
||||
!$acc host_data use_device( rh, grho2 )
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rh(k) = ABS(rho(k,1))
|
||||
|
@ -459,13 +442,11 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
ec(k) = ec(k) * sgn1
|
||||
ENDDO
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( rh )
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
!$acc host_data use_device( grho2 )
|
||||
!$acc parallel loop collapse(2)
|
||||
DO is = 1, ns
|
||||
DO k = 1, length
|
||||
|
@ -474,13 +455,11 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
ENDDO
|
||||
!
|
||||
CALL gcx_spin( length, rho, grho2, ex, v1x, v2x )
|
||||
!$acc end host_data
|
||||
!
|
||||
IF (igcc==3 .OR. igcc==7 .OR. igcc==13 ) THEN
|
||||
!
|
||||
ALLOCATE( grho_ud(length) )
|
||||
!$acc data create( grho_ud )
|
||||
!$acc host_data use_device( grho_ud, grho2 )
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
grho_ud(k) = grho(1,k,1) * grho(1,k,2) + grho(2,k,1) * grho(2,k,2) + &
|
||||
|
@ -489,7 +468,6 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
!
|
||||
CALL gcc_spin_more( length, rho, grho2, grho_ud, ec, v1c, v2c, v2c_ud )
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( grho_ud )
|
||||
!
|
||||
|
@ -497,7 +475,6 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
!
|
||||
ALLOCATE( rh(length), zeta(length) )
|
||||
!$acc data create( rh, zeta )
|
||||
!$acc host_data use_device( rh, zeta, grho2 )
|
||||
!$acc parallel loop
|
||||
DO k = 1, length
|
||||
rh(k) = rho(k,1) + rho(k,2)
|
||||
|
@ -519,7 +496,6 @@ SUBROUTINE xc_gcx_( length, ns, rho, grho, ex, ec, v1x, v2x, v1c, v2c, v2c_ud )
|
|||
IF ( ns==2 ) v2c_ud(k) = v2c(k,1)
|
||||
ENDDO
|
||||
!
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( rh, zeta )
|
||||
!
|
||||
|
|
Loading…
Reference in New Issue