mirror of https://gitlab.com/QEF/q-e.git
more device variables, more helper subs, bug fix for OpenMP with intel compiler
This commit is contained in:
parent
8fe16a7bfe
commit
657a7d2e12
|
@ -6,9 +6,16 @@
|
|||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
|
||||
#if defined(__CUDA)
|
||||
#define DEVICEATTR ,DEVICE
|
||||
#else
|
||||
#define DEVICEATTR
|
||||
#endif
|
||||
|
||||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE rhoofr_cp &
|
||||
( nfi, c_bgrp, irb, eigrb, bec_bgrp, dbec, rhovan, rhor, drhor, rhog, drhog, rhos, enl, denl, ekin, dekin, tstress, ndwwf )
|
||||
( nfi, c_bgrp, c_d, bec_bgrp, dbec, rhovan, rhor, drhor, rhog, drhog, rhos, &
|
||||
enl, denl, ekin, dekin, tstress, ndwwf )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
! this routine computes:
|
||||
|
@ -53,14 +60,14 @@
|
|||
USE mp, ONLY: mp_sum
|
||||
USE io_global, ONLY: stdout, ionode
|
||||
USE mp_global, ONLY: intra_bgrp_comm, nbgrp, inter_bgrp_comm, &
|
||||
me_bgrp, nproc_bgrp, root_bgrp
|
||||
me_bgrp, nproc_bgrp, root_bgrp
|
||||
USE funct, ONLY: dft_is_meta
|
||||
USE cg_module, ONLY: tcg
|
||||
USE cp_interfaces, ONLY: stress_kin, enkin
|
||||
USE fft_interfaces, ONLY: fwfft, invfft
|
||||
USE fft_base, ONLY: dffts, dfftp
|
||||
USE cp_interfaces, ONLY: checkrho, ennl, calrhovan, dennl
|
||||
USE cp_main_variables, ONLY: iprint_stdout, idesc
|
||||
USE cp_main_variables, ONLY: iprint_stdout, idesc, irb, eigrb
|
||||
USE wannier_base, ONLY: iwf
|
||||
USE exx_module, ONLY: rhopr
|
||||
USE input_parameters, ONLY: tcpbo ! BS
|
||||
|
@ -79,11 +86,10 @@
|
|||
REAL(DP) rhos(:,:)
|
||||
REAL(DP) enl, ekin
|
||||
REAL(DP) denl(3,3), dekin(6)
|
||||
COMPLEX(DP) eigrb( :, : )
|
||||
COMPLEX(DP) rhog( :, : )
|
||||
COMPLEX(DP) drhog( :, :, :, : )
|
||||
COMPLEX(DP) c_bgrp( :, : )
|
||||
INTEGER irb( :, : )
|
||||
COMPLEX(DP) DEVICEATTR :: c_d( :, : )
|
||||
LOGICAL, OPTIONAL, INTENT(IN) :: tstress
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: ndwwf
|
||||
|
||||
|
@ -217,6 +223,9 @@
|
|||
CALL errore( ' rhoofr ', ' c second dimension too small ', SIZE( c_bgrp, 2 ) )
|
||||
!
|
||||
c_bgrp( :, nbsp_bgrp + 1 ) = ( 0.d0, 0.d0 )
|
||||
#if defined (__CUDA)
|
||||
c_d( :, nbsp_bgrp + 1 ) = ( 0.d0, 0.d0 )
|
||||
#endif
|
||||
!
|
||||
ENDIF
|
||||
!
|
||||
|
@ -474,7 +483,6 @@
|
|||
|
||||
|
||||
ALLOCATE( psis( dffts%nnr * many_fft ) ) ! dffts%nnr * many_fft
|
||||
ALLOCATE( ptmp( SIZE(c_bgrp,1), 2 ) )
|
||||
ALLOCATE( rhos_d ( SIZE(rhos,1), SIZE(rhos,2) ) )
|
||||
!
|
||||
rhos_d = 0_DP
|
||||
|
@ -488,19 +496,16 @@
|
|||
ioff = 0
|
||||
DO ii = i, i + 2 * many_fft - 1, 2
|
||||
IF( ii < nbsp_bgrp ) THEN
|
||||
ptmp(:,1) = c_bgrp( :, ii )
|
||||
ptmp(:,2) = c_bgrp( :, ii + 1 )
|
||||
!$cuf kernel do(1)
|
||||
do ig = 1, dffts%ngw
|
||||
psis( nlm_d( ig ) + ioff) = CONJG( ptmp( ig, 1 ) ) + ci * conjg( ptmp( ig, 2 ))
|
||||
psis( nl_d( ig ) + ioff) = ptmp( ig, 1 ) + ci * ptmp( ig, 2 )
|
||||
psis( nlm_d( ig ) + ioff) = CONJG( c_d( ig, ii ) ) + ci * conjg( c_d( ig, ii+1 ))
|
||||
psis( nl_d( ig ) + ioff) = c_d( ig, ii ) + ci * c_d( ig, ii+1 )
|
||||
end do
|
||||
ELSE IF( ii == nbsp_bgrp ) THEN
|
||||
ptmp(:,1) = c_bgrp( :, ii )
|
||||
!$cuf kernel do(1)
|
||||
do ig = 1, dffts%ngw
|
||||
psis( nlm_d( ig ) + ioff) = CONJG( ptmp( ig, 1 ) )
|
||||
psis( nl_d( ig ) + ioff) = ptmp( ig, 1 )
|
||||
psis( nlm_d( ig ) + ioff) = CONJG( c_d( ig, ii ) )
|
||||
psis( nl_d( ig ) + ioff) = c_d( ig, ii+1 )
|
||||
end do
|
||||
END IF
|
||||
! CALL c2psi_gamma( dffts, psis, c_bgrp(:,ii), c_bgrp(:,ii+1) )
|
||||
|
@ -695,7 +700,7 @@ SUBROUTINE drhov(irb,eigrb,rhovan,drhovan,rhog,rhor,drhog,drhor)
|
|||
|
||||
!$omp parallel default(none) &
|
||||
!$omp shared(nat, ityp, ngb, nh, eigrb, dfftb, irb, v, &
|
||||
!$omp ci, i, j, dqgb, qgb, nhm, rhovan, drhovan, upf ) &
|
||||
!$omp i, j, dqgb, qgb, nhm, rhovan, drhovan, upf ) &
|
||||
!$omp private(mytid, ntids, is, ia, iv, jv, ijv, ig, iss, &
|
||||
!$omp qv, fg1, fg2, itid, dqgbt, dsumt, asumt )
|
||||
|
||||
|
@ -949,7 +954,7 @@ SUBROUTINE rhov(irb,eigrb,rhovan,rhog,rhor)
|
|||
|
||||
!$omp parallel default(none) &
|
||||
!$omp shared(na, ngb, nh, rhovan, qgb, eigrb, dfftb, iverbosity, omegab, irb, v, &
|
||||
!$omp stdout, ci, rhor, dfftp, upf, nsp, ityp, nat, nspin ) &
|
||||
!$omp stdout, rhor, dfftp, upf, nsp, ityp, nat, nspin ) &
|
||||
!$omp private(mytid, ntids, is, ia, iv, jv, ijv, sumrho, qgbt, ig, ca, &
|
||||
!$omp qv, itid, ir )
|
||||
|
||||
|
@ -1152,3 +1157,35 @@ CONTAINS
|
|||
ENDIF
|
||||
END SUBROUTINE
|
||||
END SUBROUTINE rhov
|
||||
|
||||
#if defined (__CUDA)
|
||||
SUBROUTINE rhoofr_host &
|
||||
( nfi, c_bgrp, irb, eigrb, bec_bgrp, dbec, rhovan, rhor, drhor, rhog, drhog, rhos, &
|
||||
enl, denl, ekin, dekin, tstress, ndwwf )
|
||||
USE kinds, ONLY: DP
|
||||
USE cudafor
|
||||
USE cp_interfaces
|
||||
IMPLICIT NONE
|
||||
INTEGER nfi
|
||||
COMPLEX(DP) c_bgrp( :, : )
|
||||
INTEGER irb( :, : )
|
||||
COMPLEX(DP) eigrb( :, : )
|
||||
REAL(DP) bec_bgrp(:,:)
|
||||
REAL(DP) dbec(:,:,:,:)
|
||||
REAL(DP) rhovan(:, :, : )
|
||||
REAL(DP) rhor(:,:)
|
||||
REAL(DP) drhor(:,:,:,:)
|
||||
COMPLEX(DP) rhog( :, : )
|
||||
COMPLEX(DP) drhog( :, :, :, : )
|
||||
REAL(DP) rhos(:,:)
|
||||
REAL(DP) enl, ekin
|
||||
REAL(DP) denl(3,3), dekin(6)
|
||||
LOGICAL, OPTIONAL, INTENT(IN) :: tstress
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: ndwwf
|
||||
COMPLEX(DP), ALLOCATABLE, DEVICE :: c(:,:)
|
||||
ALLOCATE( c, SOURCE=c_bgrp )
|
||||
CALL rhoofr(nfi, c_bgrp, c, bec_bgrp, dbec, rhovan, rhor, &
|
||||
drhor, rhog, drhog, rhos, enl, denl, ekin, dekin, tstress, ndwwf )
|
||||
DEALLOCATE( c )
|
||||
END SUBROUTINE rhoofr_host
|
||||
#endif
|
||||
|
|
|
@ -7,6 +7,12 @@
|
|||
!
|
||||
! written by Carlo Cavazzoni
|
||||
|
||||
#if defined(__CUDA)
|
||||
#define DEVICEATTR ,DEVICE
|
||||
#else
|
||||
#define DEVICEATTR
|
||||
#endif
|
||||
|
||||
!=----------------------------------------------------------------------------=!
|
||||
MODULE cp_interfaces
|
||||
!=----------------------------------------------------------------------------=!
|
||||
|
@ -247,8 +253,30 @@
|
|||
|
||||
INTERFACE rhoofr
|
||||
SUBROUTINE rhoofr_cp &
|
||||
( nfi, c_bgrp, irb, eigrb, bec, dbec, rhovan, rhor, drhor, rhog, drhog, rhos, enl, denl, ekin, dekin, tstress, ndwwf )
|
||||
( nfi, c_bgrp, c_d, bec, dbec, rhovan, rhor, drhor, rhog, drhog, rhos, enl, denl, ekin, dekin, tstress, ndwwf )
|
||||
USE kinds, ONLY: DP
|
||||
USE cudafor
|
||||
IMPLICIT NONE
|
||||
INTEGER nfi
|
||||
COMPLEX(DP) :: c_bgrp( :, : )
|
||||
COMPLEX(DP) DEVICEATTR :: c_d( :, : )
|
||||
REAL(DP) bec(:,:)
|
||||
REAL(DP) dbec(:,:,:,:)
|
||||
REAL(DP) rhovan(:, :, : )
|
||||
REAL(DP) rhor(:,:)
|
||||
REAL(DP) drhor(:,:,:,:)
|
||||
COMPLEX(DP) rhog( :, : )
|
||||
COMPLEX(DP) drhog( :, :, :, : )
|
||||
REAL(DP) rhos(:,:)
|
||||
REAL(DP) enl, ekin
|
||||
REAL(DP) denl(3,3), dekin(6)
|
||||
LOGICAL, OPTIONAL, INTENT(IN) :: tstress
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: ndwwf
|
||||
END SUBROUTINE rhoofr_cp
|
||||
#if defined (__CUDA)
|
||||
SUBROUTINE rhoofr_host &
|
||||
( nfi, c_bgrp, irb, eigrb, bec, dbec, rhovan, rhor, drhor, rhog, drhog, rhos, enl, denl, ekin, dekin, tstress, ndwwf )
|
||||
USE kinds, ONLY: DP
|
||||
IMPLICIT NONE
|
||||
INTEGER nfi
|
||||
COMPLEX(DP) c_bgrp( :, : )
|
||||
|
@ -266,7 +294,8 @@
|
|||
REAL(DP) denl(3,3), dekin(6)
|
||||
LOGICAL, OPTIONAL, INTENT(IN) :: tstress
|
||||
INTEGER, OPTIONAL, INTENT(IN) :: ndwwf
|
||||
END SUBROUTINE rhoofr_cp
|
||||
END SUBROUTINE rhoofr_host
|
||||
#endif
|
||||
END INTERFACE
|
||||
|
||||
INTERFACE checkrho
|
||||
|
@ -774,7 +803,7 @@
|
|||
|
||||
INTERFACE move_electrons
|
||||
SUBROUTINE move_electrons_x( &
|
||||
nfi, tfirst, tlast, b1, b2, b3, fion, c0_bgrp, cm_bgrp, phi_bgrp, enthal, enb, &
|
||||
nfi, tfirst, tlast, b1, b2, b3, fion, enthal, enb, &
|
||||
& enbi, fccc, ccc, dt2bye, stress,l_cprestart )
|
||||
USE kinds, ONLY: DP
|
||||
IMPLICIT NONE
|
||||
|
@ -782,7 +811,6 @@
|
|||
LOGICAL, INTENT(IN) :: tfirst, tlast
|
||||
REAL(DP), INTENT(IN) :: b1(3), b2(3), b3(3)
|
||||
REAL(DP) :: fion(:,:)
|
||||
COMPLEX(DP) :: c0_bgrp(:,:), cm_bgrp(:,:), phi_bgrp(:,:)
|
||||
REAL(DP), INTENT(IN) :: dt2bye
|
||||
REAL(DP) :: fccc, ccc
|
||||
REAL(DP) :: enb, enbi
|
||||
|
@ -898,6 +926,16 @@
|
|||
REAL(DP), INTENT(IN) :: f( : )
|
||||
REAL(DP) :: enkin_x
|
||||
END FUNCTION enkin_x
|
||||
#if defined (__CUDA)
|
||||
FUNCTION enkin_gpu_x( c, f, n )
|
||||
USE kinds, ONLY: dp
|
||||
IMPLICIT NONE
|
||||
INTEGER, INTENT(IN) :: n
|
||||
COMPLEX(DP), DEVICE, INTENT(IN) :: c( :, : )
|
||||
REAL(DP), INTENT(IN) :: f( : )
|
||||
REAL(DP) :: enkin_gpu_x
|
||||
END FUNCTION enkin_gpu_x
|
||||
#endif
|
||||
END INTERFACE
|
||||
|
||||
INTERFACE newinit
|
||||
|
|
|
@ -1640,30 +1640,82 @@ end subroutine dylmr2_
|
|||
! local
|
||||
|
||||
INTEGER :: ig, i
|
||||
REAL(DP) :: sk(n) ! automatic array
|
||||
REAL(DP) :: sk, rsum
|
||||
!
|
||||
sk = 0.0d0
|
||||
!$omp parallel do reduction(+:sk) default(none) &
|
||||
!$omp shared(sk,c,g2kin,gstart,ngw,n,f) private(i,ig,rsum)
|
||||
DO i=1,n
|
||||
sk(i)=0.0d0
|
||||
rsum = 0.0d0
|
||||
DO ig=gstart,ngw
|
||||
sk(i)=sk(i)+DBLE(CONJG(c(ig,i))*c(ig,i))*g2kin(ig)
|
||||
rsum = rsum + DBLE(CONJG(c(ig,i))*c(ig,i)) * g2kin(ig)
|
||||
END DO
|
||||
sk = sk + f(i) * rsum
|
||||
END DO
|
||||
!$omp end parallel do
|
||||
|
||||
CALL mp_sum( sk(1:n), intra_bgrp_comm )
|
||||
|
||||
enkin_x=0.0d0
|
||||
DO i=1,n
|
||||
enkin_x=enkin_x+f(i)*sk(i)
|
||||
END DO
|
||||
CALL mp_sum( sk, intra_bgrp_comm )
|
||||
|
||||
! ... reciprocal-space vectors are in units of alat/(2 pi) so a
|
||||
! ... multiplicative factor (2 pi/alat)**2 is required
|
||||
|
||||
enkin_x = enkin_x * tpiba2
|
||||
enkin_x = tpiba2 * sk
|
||||
!
|
||||
RETURN
|
||||
END FUNCTION enkin_x
|
||||
|
||||
#if defined (__CUDA)
|
||||
!-----------------------------------------------------------------------
|
||||
FUNCTION enkin_gpu_x( c, f, n )
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
USE kinds, ONLY: DP
|
||||
USE constants, ONLY: pi, fpi
|
||||
USE gvecw, ONLY: ngw
|
||||
USE gvect, ONLY: gstart
|
||||
USE gvecw, ONLY: g2kin
|
||||
USE mp, ONLY: mp_sum
|
||||
USE mp_global, ONLY: intra_bgrp_comm
|
||||
USE cell_base, ONLY: tpiba2
|
||||
USE cudafor
|
||||
|
||||
IMPLICIT NONE
|
||||
|
||||
REAL(DP) :: enkin_gpu_x
|
||||
|
||||
INTEGER, INTENT(IN) :: n
|
||||
COMPLEX(DP), DEVICE, INTENT(IN) :: c( :, : )
|
||||
REAL(DP), INTENT(IN) :: f( : )
|
||||
!
|
||||
! local
|
||||
|
||||
INTEGER :: ig, i
|
||||
REAL(DP) :: sk
|
||||
REAL(DP), ALLOCATABLE, DEVICE :: f_d(:)
|
||||
REAL(DP), ALLOCATABLE, DEVICE :: g2(:)
|
||||
REAL(DP) :: ddot
|
||||
!
|
||||
ALLOCATE( g2, SOURCE=g2kin )
|
||||
ALLOCATE( f_d, SOURCE=f )
|
||||
|
||||
sk=0.0d0
|
||||
!$cuf kernel do(2) <<<*,*>>>
|
||||
DO i=1,n
|
||||
DO ig=gstart,ngw
|
||||
sk = sk + f_d(i) * DBLE(CONJG(c(ig,i))*c(ig,i)) * g2(ig)
|
||||
END DO
|
||||
END DO
|
||||
|
||||
DEALLOCATE( f_d )
|
||||
DEALLOCATE( g2 )
|
||||
|
||||
CALL mp_sum( sk, intra_bgrp_comm )
|
||||
|
||||
enkin_gpu_x = tpiba2 * sk
|
||||
!
|
||||
RETURN
|
||||
END FUNCTION enkin_gpu_x
|
||||
#endif
|
||||
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE nlfl_bgrp_x( bec_bgrp, becdr_bgrp, lambda, idesc, fion )
|
||||
!-----------------------------------------------------------------------
|
||||
|
|
|
@ -80,7 +80,7 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out )
|
|||
electrons_nosevel, electrons_noseupd
|
||||
USE pres_ai_mod, ONLY : P_ext, P_in, P_fin, pvar, volclu, &
|
||||
surfclu, Surf_t, abivol, abisur
|
||||
USE wavefunctions, ONLY : c0_bgrp, cm_bgrp, phi_bgrp, cm_d, phi_d
|
||||
USE wavefunctions, ONLY : c0_bgrp, cm_bgrp, phi_bgrp, cm_d, phi_d, c0_d
|
||||
USE wannier_module, ONLY : allocate_wannier
|
||||
USE cp_interfaces, ONLY : printout_new, move_electrons, newinit
|
||||
USE cell_nose, ONLY : xnhh0, xnhhm, xnhhp, vnhh, temph, &
|
||||
|
@ -118,6 +118,7 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out )
|
|||
USE input_parameters, ONLY : tcpbo
|
||||
USE funct, ONLY : dft_is_hybrid, start_exx, exx_is_active
|
||||
USE funct, ONLY : dft_is_meta
|
||||
USE device_helper
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -289,13 +290,14 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out )
|
|||
!
|
||||
END IF
|
||||
!
|
||||
! ... why this call ??? from Paolo Umari
|
||||
IF( force_pairing ) THEN
|
||||
c0_bgrp(:,iupdwn(2):nbsp) = c0_bgrp(:,1:nupdwn(2))
|
||||
cm_bgrp(:,iupdwn(2):nbsp) = cm_bgrp(:,1:nupdwn(2))
|
||||
phi_bgrp(:,iupdwn(2):nbsp) = phi_bgrp(:,1:nupdwn(2))
|
||||
lambda(:,:, 2) = lambda(:,:, 1)
|
||||
ENDIF
|
||||
!
|
||||
IF ( tefield .or. tefield2 ) THEN
|
||||
!
|
||||
CALL calbec( 1, nsp, eigr, c0_bgrp, bec_bgrp ) ! ATTENZIONE
|
||||
!
|
||||
END IF
|
||||
CALL sync_to_host( c0_bgrp, c0_d )
|
||||
!
|
||||
! Autopilot (Dynamic Rules) Implimentation
|
||||
!
|
||||
|
@ -323,13 +325,6 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out )
|
|||
!
|
||||
!=======================================================================
|
||||
!
|
||||
IF( force_pairing ) THEN
|
||||
c0_bgrp(:,iupdwn(2):nbsp) = c0_bgrp(:,1:nupdwn(2))
|
||||
cm_bgrp(:,iupdwn(2):nbsp) = cm_bgrp(:,1:nupdwn(2))
|
||||
phi_bgrp(:,iupdwn(2):nbsp) = phi_bgrp(:,1:nupdwn(2))
|
||||
lambda(:,:, 2) = lambda(:,:, 1)
|
||||
ENDIF
|
||||
!
|
||||
! ... fake electronic kinetic energy
|
||||
!
|
||||
IF ( .NOT. tcg ) THEN
|
||||
|
@ -341,8 +336,7 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out )
|
|||
END IF
|
||||
!
|
||||
CALL move_electrons( nfi, tfirst, tlast, bg(:,1), bg(:,2), bg(:,3), &
|
||||
fion, c0_bgrp, cm_bgrp, phi_bgrp, &
|
||||
enthal, enb, enbi, fccc, ccc, dt2bye, stress, .false. )
|
||||
fion, enthal, enb, enbi, fccc, ccc, dt2bye, stress, .false. )
|
||||
!
|
||||
IF (lda_plus_u) fion = fion + forceh
|
||||
!
|
||||
|
@ -555,8 +549,8 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out )
|
|||
IF ( tortho ) THEN
|
||||
!
|
||||
#if defined (__CUDA)
|
||||
cm_d = cm_bgrp
|
||||
phi_d = phi_bgrp
|
||||
CALL sync_to_host( cm_bgrp, cm_d )
|
||||
CALL sync_to_host( phi_bgrp, phi_d )
|
||||
CALL ortho( eigr, cm_d, phi_d, lambda, idesc, bigr, iter, ccc, bephi, becp_bgrp )
|
||||
#else
|
||||
CALL ortho( eigr, cm_bgrp, phi_bgrp, lambda, idesc, bigr, iter, ccc, bephi, becp_bgrp )
|
||||
|
@ -577,8 +571,8 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out )
|
|||
IF ( tortho ) THEN
|
||||
#if defined (__CUDA)
|
||||
CALL updatc( ccc, lambda, phi_d, bephi, becp_bgrp, bec_d, cm_d, idesc )
|
||||
bec_bgrp = bec_d
|
||||
cm_bgrp = cm_d
|
||||
CALL sync_to_device( bec_bgrp, bec_d )
|
||||
CALL sync_to_device( cm_bgrp, cm_d )
|
||||
#else
|
||||
CALL updatc( ccc, lambda, phi_bgrp, bephi, becp_bgrp, bec_bgrp, cm_bgrp, idesc )
|
||||
#endif
|
||||
|
@ -831,8 +825,7 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out )
|
|||
lambdam = lambda
|
||||
!
|
||||
CALL move_electrons( nfi, tfirst, tlast, bg(:,1), bg(:,2), bg(:,3),&
|
||||
fion, c0_bgrp, cm_bgrp, phi_bgrp, enthal, enb,&
|
||||
enbi, fccc, ccc, dt2bye, stress,.true. )
|
||||
fion, enthal, enb, enbi, fccc, ccc, dt2bye, stress,.true. )
|
||||
!
|
||||
END IF
|
||||
!
|
||||
|
|
|
@ -48,7 +48,7 @@ SUBROUTINE from_scratch( )
|
|||
USE printout_base, ONLY : printout_pos
|
||||
USE orthogonalize_base, ONLY : updatc, calphi_bgrp
|
||||
USE wave_base, ONLY : wave_steepest
|
||||
USE wavefunctions, ONLY : c0_bgrp, cm_bgrp, phi_bgrp, c0_d, phi_d
|
||||
USE wavefunctions, ONLY : c0_bgrp, cm_bgrp, phi_bgrp, c0_d, phi_d, cm_d
|
||||
USE fft_base, ONLY : dfftp, dffts
|
||||
USE time_step, ONLY : delt
|
||||
USE cp_main_variables, ONLY : idesc, bephi, becp_bgrp, nfi, &
|
||||
|
@ -59,6 +59,7 @@ SUBROUTINE from_scratch( )
|
|||
USE mp_world, ONLY : mpime
|
||||
USE mp, ONLY : mp_sum
|
||||
USE matrix_inversion
|
||||
USE device_helper
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -144,6 +145,7 @@ SUBROUTINE from_scratch( )
|
|||
!
|
||||
if( iverbosity > 1 ) CALL dotcsc( eigr, cm_bgrp, ngw, nbsp )
|
||||
!
|
||||
CALL sync_to_host( cm_bgrp, cm_d )
|
||||
!
|
||||
! ... initialize bands
|
||||
!
|
||||
|
@ -194,7 +196,7 @@ SUBROUTINE from_scratch( )
|
|||
!
|
||||
if ( tstress ) CALL caldbec_bgrp( eigr, cm_bgrp, dbec, idesc )
|
||||
!
|
||||
CALL rhoofr( nfi, cm_bgrp, irb, eigrb, bec_bgrp, dbec, becsum, rhor, drhor, rhog, drhog, rhos, enl, denl, ekin, dekin6 )
|
||||
CALL rhoofr( nfi, cm_bgrp, cm_d, bec_bgrp, dbec, becsum, rhor, drhor, rhog, drhog, rhos, enl, denl, ekin, dekin6 )
|
||||
!
|
||||
edft%enl = enl
|
||||
edft%ekin = ekin
|
||||
|
@ -262,8 +264,8 @@ SUBROUTINE from_scratch( )
|
|||
|
||||
if( tortho ) then
|
||||
#if defined (__CUDA)
|
||||
c0_d = c0_bgrp
|
||||
phi_d = phi_bgrp
|
||||
CALL sync_to_host( c0_bgrp, c0_d )
|
||||
CALL sync_to_host( phi_bgrp, phi_d )
|
||||
CALL ortho( eigr, c0_d, phi_d, lambda, idesc, bigr, iter, ccc, bephi, becp_bgrp )
|
||||
#else
|
||||
CALL ortho( eigr, c0_bgrp, phi_bgrp, lambda, idesc, bigr, iter, ccc, bephi, becp_bgrp )
|
||||
|
@ -285,8 +287,8 @@ SUBROUTINE from_scratch( )
|
|||
IF ( tortho ) THEN
|
||||
#if defined (__CUDA)
|
||||
CALL updatc( ccc, lambda, phi_d, bephi, becp_bgrp, bec_d, c0_d, idesc )
|
||||
bec_bgrp = bec_d
|
||||
c0_bgrp = c0_d
|
||||
CALL sync_to_device( c0_bgrp, c0_d )
|
||||
CALL sync_to_device( bec_bgrp, bec_d )
|
||||
#else
|
||||
CALL updatc( ccc, lambda, phi_bgrp, bephi, becp_bgrp, bec_bgrp, c0_bgrp, idesc )
|
||||
#endif
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
!
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
SUBROUTINE move_electrons_x( nfi, tfirst, tlast, b1, b2, b3, fion, c0_bgrp, &
|
||||
cm_bgrp, phi_bgrp, enthal, enb, enbi, fccc, ccc, dt2bye, stress, l_cprestart )
|
||||
SUBROUTINE move_electrons_x( nfi, tfirst, tlast, b1, b2, b3, fion, &
|
||||
enthal, enb, enbi, fccc, ccc, dt2bye, stress, l_cprestart )
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
! ... this routine updates the electronic degrees of freedom
|
||||
|
@ -43,7 +43,7 @@ SUBROUTINE move_electrons_x( nfi, tfirst, tlast, b1, b2, b3, fion, c0_bgrp, &
|
|||
USE electrons_module, ONLY : distribute_c, collect_c, distribute_b
|
||||
USE gvect, ONLY : eigts1, eigts2, eigts3
|
||||
USE control_flags, ONLY : lwfpbe0nscf ! exx_wf related
|
||||
USE wavefunctions, ONLY : cv0 ! Lingzhu Kong
|
||||
USE wavefunctions, ONLY : cv0, c0_bgrp, cm_bgrp, phi_bgrp, c0_d, cm_d, phi_d
|
||||
USE funct, ONLY : dft_is_hybrid, exx_is_active
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
@ -52,7 +52,6 @@ SUBROUTINE move_electrons_x( nfi, tfirst, tlast, b1, b2, b3, fion, c0_bgrp, &
|
|||
LOGICAL, INTENT(IN) :: tfirst, tlast
|
||||
REAL(DP), INTENT(IN) :: b1(3), b2(3), b3(3)
|
||||
REAL(DP) :: fion(:,:)
|
||||
COMPLEX(DP) :: c0_bgrp(:,:), cm_bgrp(:,:), phi_bgrp(:,:)
|
||||
REAL(DP), INTENT(IN) :: dt2bye
|
||||
REAL(DP) :: fccc, ccc
|
||||
REAL(DP) :: enb, enbi
|
||||
|
@ -80,7 +79,7 @@ SUBROUTINE move_electrons_x( nfi, tfirst, tlast, b1, b2, b3, fion, c0_bgrp, &
|
|||
CALL get_wannier_center( tfirst, cm_bgrp, bec_bgrp, eigr, &
|
||||
eigrb, taub, irb, ibrav, b1, b2, b3 )
|
||||
!
|
||||
CALL rhoofr( nfi, c0_bgrp, irb, eigrb, bec_bgrp, dbec, becsum, rhor, &
|
||||
CALL rhoofr( nfi, c0_bgrp, c0_d, bec_bgrp, dbec, becsum, rhor, &
|
||||
drhor, rhog, drhog, rhos, enl, denl, ekin, dekin6 )
|
||||
!
|
||||
!=================================================================
|
||||
|
|
|
@ -686,9 +686,7 @@
|
|||
!
|
||||
real(DP) :: sumt, sums(2), ennl_t
|
||||
integer :: is, iv, jv, ijv, inl, jnl, ia, iss, i, indv
|
||||
#if defined(_OPENMP)
|
||||
INTEGER :: mytid, ntids, omp_get_thread_num, omp_get_num_threads
|
||||
#endif
|
||||
INTEGER :: omp_get_num_threads
|
||||
!
|
||||
ennl_t = 0.d0
|
||||
!
|
||||
|
|
|
@ -11,11 +11,69 @@
|
|||
! accelerator devices
|
||||
|
||||
|
||||
!=----------------------------------------------------------------------------=!
|
||||
|
||||
MODULE device_helper
|
||||
USE util_param, ONLY : DP
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
#endif
|
||||
IMPLICIT NONE
|
||||
SAVE
|
||||
PRIVATE
|
||||
|
||||
INTERFACE sync_to_device
|
||||
MODULE PROCEDURE sync_to_device_c2d, sync_to_device_r2d
|
||||
END INTERFACE
|
||||
INTERFACE sync_to_host
|
||||
MODULE PROCEDURE sync_to_host_c2d, sync_to_host_r2d
|
||||
END INTERFACE
|
||||
|
||||
PUBLIC :: sync_to_device
|
||||
PUBLIC :: sync_to_host
|
||||
|
||||
CONTAINS
|
||||
|
||||
SUBROUTINE sync_to_device_c2d( h, d )
|
||||
COMPLEX(DP), INTENT(IN) :: d(:,:)
|
||||
COMPLEX(DP), INTENT(OUT) :: h(:,:)
|
||||
#if defined(__CUDA)
|
||||
ATTRIBUTES(DEVICE) :: d
|
||||
h = d
|
||||
#endif
|
||||
END SUBROUTINE
|
||||
SUBROUTINE sync_to_host_c2d( h, d )
|
||||
COMPLEX(DP), INTENT(OUT) :: d(:,:)
|
||||
COMPLEX(DP), INTENT(IN) :: h(:,:)
|
||||
#if defined(__CUDA)
|
||||
ATTRIBUTES(DEVICE) :: d
|
||||
d = h
|
||||
#endif
|
||||
END SUBROUTINE
|
||||
SUBROUTINE sync_to_device_r2d( h, d )
|
||||
REAL(DP), INTENT(IN) :: d(:,:)
|
||||
REAL(DP), INTENT(OUT) :: h(:,:)
|
||||
#if defined(__CUDA)
|
||||
ATTRIBUTES(DEVICE) :: d
|
||||
h = d
|
||||
#endif
|
||||
END SUBROUTINE
|
||||
SUBROUTINE sync_to_host_r2d( h, d )
|
||||
REAL(DP), INTENT(OUT) :: d(:,:)
|
||||
REAL(DP), INTENT(IN) :: h(:,:)
|
||||
#if defined(__CUDA)
|
||||
ATTRIBUTES(DEVICE) :: d
|
||||
d = h
|
||||
#endif
|
||||
END SUBROUTINE
|
||||
|
||||
END MODULE
|
||||
|
||||
!=----------------------------------------------------------------------------=!
|
||||
|
||||
SUBROUTINE qe_device_sync()
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
USE cudafor
|
||||
#endif
|
||||
INTEGER :: info
|
||||
#if defined (__CUDA)
|
||||
|
|
|
@ -2,6 +2,7 @@ clocks_handler.o : parallel_include.o
|
|||
clocks_handler.o : util_param.o
|
||||
cuda_util.o : util_param.o
|
||||
data_buffer.o : util_param.o
|
||||
device_helper.o : util_param.o
|
||||
divide.o : mp.o
|
||||
error_handler.o : mp.o
|
||||
error_handler.o : util_param.o
|
||||
|
|
Loading…
Reference in New Issue