vkb NOT updated every time init_us_2 is called

This commit is contained in:
Ivan Carnimeo 2021-09-01 18:55:30 +02:00
parent 26a4632ff6
commit 26a2393060
4 changed files with 34 additions and 20 deletions

View File

@ -354,6 +354,8 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
FORALL( ig = 1 : npw ) FORALL( ig = 1 : npw )
h_diag(ig, 1) = g2kin(ig) + v_of_0 h_diag(ig, 1) = g2kin(ig) + v_of_0
END FORALL END FORALL
!
!$acc update self(vkb)
CALL usnldiag( npw, h_diag, s_diag ) CALL usnldiag( npw, h_diag, s_diag )
END IF END IF
! !
@ -693,6 +695,7 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
! !
CALL allocate_bec_type( nkb, nbnd, bec_evcel ) CALL allocate_bec_type( nkb, nbnd, bec_evcel )
! !
!$acc update self(vkb)
CALL calbec( npw, vkb, evcel, bec_evcel ) CALL calbec( npw, vkb, evcel, bec_evcel )
! !
ENDIF ENDIF
@ -719,6 +722,8 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
FORALL( ig = 1 : npwx ) FORALL( ig = 1 : npwx )
h_diag(ig, :) = g2kin(ig) + v_of_0 h_diag(ig, :) = g2kin(ig) + v_of_0
END FORALL END FORALL
!
!$acc update self(vkb)
CALL usnldiag( npw, h_diag, s_diag ) CALL usnldiag( npw, h_diag, s_diag )
ENDIF ENDIF
! !

View File

@ -153,14 +153,17 @@ SUBROUTINE force_hub_gpu( forceh )
CALL using_evc_d(0) CALL using_evc_d(0)
! !
CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb, .true. ) CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb, .true. )
!$acc update self(vkb)
!
! Compute spsi = S * psi ! Compute spsi = S * psi
CALL allocate_bec_type ( nkb, nbnd, becp) CALL allocate_bec_type ( nkb, nbnd, becp)
CALL using_becp_auto(2) ; CALL using_becp_d_auto(2) CALL using_becp_auto(2) ; CALL using_becp_d_auto(2)
!$acc data present(vkb(:,:)) !
!$acc host_data use_device(vkb) !$acc data present(vkb(:,:))
!$acc host_data use_device(vkb)
CALL calbec_gpu( npw, vkb, evc_d, becp_d ) CALL calbec_gpu( npw, vkb, evc_d, becp_d )
!$acc end host_data !$acc end host_data
!$acc end data !$acc end data
! !
CALL s_psi_gpu( npwx, npw, nbnd, evc_d, spsi_d ) CALL s_psi_gpu( npwx, npw, nbnd, evc_d, spsi_d )
CALL deallocate_bec_type (becp) CALL deallocate_bec_type (becp)

View File

@ -50,7 +50,7 @@ SUBROUTINE force_us_gpu( forcenl )
! ... local variables ! ... local variables
! !
COMPLEX(DP), ALLOCATABLE :: vkb1(:,:) ! contains g*|beta> COMPLEX(DP), ALLOCATABLE :: vkb1(:,:) ! contains g*|beta>
!$acc declare device_resident(vkb1) !$acc declare device_resident(vkb1)
! !
COMPLEX(DP), ALLOCATABLE :: deff_nc(:,:,:,:) COMPLEX(DP), ALLOCATABLE :: deff_nc(:,:,:,:)
REAL(DP), ALLOCATABLE :: deff(:,:,:) REAL(DP), ALLOCATABLE :: deff(:,:,:)
@ -93,26 +93,31 @@ SUBROUTINE force_us_gpu( forcenl )
! !
CALL using_evc_d(0) CALL using_evc_d(0)
CALL using_becp_d_auto(2) CALL using_becp_d_auto(2)
!$acc data present(vkb(:,:)) !
!$acc host_data use_device(vkb) !$acc data present(vkb(:,:))
!$acc host_data use_device(vkb)
CALL calbec_gpu ( npw, vkb, evc_d, becp_d ) CALL calbec_gpu ( npw, vkb, evc_d, becp_d )
!$acc end host_data !$acc end host_data
!$acc end data !$acc end data
! !
CALL using_evc_d(0) CALL using_evc_d(0)
DO ipol = 1, 3 DO ipol = 1, 3
!$acc data present(vkb(:,:), vkb1(npwx,nkb)) deviceptr(g_d(:,:)) copyin(igk_k(:,:)) !
!$acc host_data use_device(vkb, vkb1, igk_k) !$acc data present(vkb(:,:), vkb1(npwx,nkb)) copyin(igk_k(:,:))
!$acc parallel loop collapse(2) !$acc parallel loop collapse(2)
DO jkb = 1, nkb DO jkb = 1, nkb
DO ig = 1, npw DO ig = 1, npw
vkb1(ig,jkb) = vkb(ig,jkb) * (0.D0,-1.D0) * g_d(ipol,igk_k(ig,ik)) vkb1(ig,jkb) = vkb(ig,jkb) * (0.D0,-1.D0) * g_d(ipol,igk_k(ig,ik))
ENDDO ENDDO
ENDDO ENDDO
!$acc end data
! !
!$acc data present(vkb1(npwx,nkb))
!$acc host_data use_device(vkb1)
CALL calbec_gpu ( npw, vkb1, evc_d, dbecp_d ) CALL calbec_gpu ( npw, vkb1, evc_d, dbecp_d )
!$acc end host_data !$acc end host_data
!$acc end data !$acc end data
!
CALL synchronize_bec_type_gpu(dbecp_d, dbecp, 'h') CALL synchronize_bec_type_gpu(dbecp_d, dbecp, 'h')
! !
IF ( gamma_only ) THEN IF ( gamma_only ) THEN

View File

@ -36,15 +36,16 @@ SUBROUTINE init_us_2( npw_, igk_, q_, vkb_, run_on_gpu)
! !
CALL start_clock( 'init_us_2' ) CALL start_clock( 'init_us_2' )
! !
if(use_gpu.and.run_on_gpu) then if(use_gpu.and.run_on_gpu) then
!$acc data copyin(igk_(npw_), eigts1(:,:), eigts2(:,:), eigts3(:,:), mill(:,:), g(:,:)) present(vkb_(npwx,nkb)) !
!$acc host_data use_device(eigts1, eigts2, eigts3, mill, g, igk_, vkb_) !$acc data copyin(igk_(npw_), eigts1(:,:), eigts2(:,:), eigts3(:,:), mill(:,:), g(:,:)) present(vkb_(npwx,nkb))
!$acc host_data use_device(eigts1, eigts2, eigts3, mill, g, igk_, vkb_)
CALL init_us_2_base_gpu(npw_, npwx, igk_, q_, nat, tau, ityp, tpiba, omega,& CALL init_us_2_base_gpu(npw_, npwx, igk_, q_, nat, tau, ityp, tpiba, omega,&
dfftp%nr1, dfftp%nr2, dfftp%nr3, eigts1, eigts2, eigts3, mill, g,& dfftp%nr1, dfftp%nr2, dfftp%nr3, eigts1, eigts2, eigts3, mill, g,&
vkb_ ) vkb_ )
!$acc end host_data !$acc end host_data
!$acc update self(vkb_) !$acc end data
!$acc end data !
else else
CALL init_us_2_base(npw_, npwx, igk_, q_, nat, tau, ityp, tpiba, omega, & CALL init_us_2_base(npw_, npwx, igk_, q_, nat, tau, ityp, tpiba, omega, &
dfftp%nr1, dfftp%nr2, dfftp%nr3, eigts1, eigts2, eigts3, mill, g,& dfftp%nr1, dfftp%nr2, dfftp%nr3, eigts1, eigts2, eigts3, mill, g,&