mirror of https://gitlab.com/QEF/q-e.git
vkb NOT updated every time init_us_2 is called
This commit is contained in:
parent
26a4632ff6
commit
26a2393060
|
@ -354,6 +354,8 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
|||
FORALL( ig = 1 : npw )
|
||||
h_diag(ig, 1) = g2kin(ig) + v_of_0
|
||||
END FORALL
|
||||
!
|
||||
!$acc update self(vkb)
|
||||
CALL usnldiag( npw, h_diag, s_diag )
|
||||
END IF
|
||||
!
|
||||
|
@ -693,6 +695,7 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
|||
!
|
||||
CALL allocate_bec_type( nkb, nbnd, bec_evcel )
|
||||
!
|
||||
!$acc update self(vkb)
|
||||
CALL calbec( npw, vkb, evcel, bec_evcel )
|
||||
!
|
||||
ENDIF
|
||||
|
@ -719,6 +722,8 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
|||
FORALL( ig = 1 : npwx )
|
||||
h_diag(ig, :) = g2kin(ig) + v_of_0
|
||||
END FORALL
|
||||
!
|
||||
!$acc update self(vkb)
|
||||
CALL usnldiag( npw, h_diag, s_diag )
|
||||
ENDIF
|
||||
!
|
||||
|
|
|
@ -153,14 +153,17 @@ SUBROUTINE force_hub_gpu( forceh )
|
|||
CALL using_evc_d(0)
|
||||
!
|
||||
CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb, .true. )
|
||||
!$acc update self(vkb)
|
||||
!
|
||||
! Compute spsi = S * psi
|
||||
CALL allocate_bec_type ( nkb, nbnd, becp)
|
||||
CALL using_becp_auto(2) ; CALL using_becp_d_auto(2)
|
||||
!$acc data present(vkb(:,:))
|
||||
!$acc host_data use_device(vkb)
|
||||
!
|
||||
!$acc data present(vkb(:,:))
|
||||
!$acc host_data use_device(vkb)
|
||||
CALL calbec_gpu( npw, vkb, evc_d, becp_d )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
CALL s_psi_gpu( npwx, npw, nbnd, evc_d, spsi_d )
|
||||
CALL deallocate_bec_type (becp)
|
||||
|
|
|
@ -50,7 +50,7 @@ SUBROUTINE force_us_gpu( forcenl )
|
|||
! ... local variables
|
||||
!
|
||||
COMPLEX(DP), ALLOCATABLE :: vkb1(:,:) ! contains g*|beta>
|
||||
!$acc declare device_resident(vkb1)
|
||||
!$acc declare device_resident(vkb1)
|
||||
!
|
||||
COMPLEX(DP), ALLOCATABLE :: deff_nc(:,:,:,:)
|
||||
REAL(DP), ALLOCATABLE :: deff(:,:,:)
|
||||
|
@ -93,26 +93,31 @@ SUBROUTINE force_us_gpu( forcenl )
|
|||
!
|
||||
CALL using_evc_d(0)
|
||||
CALL using_becp_d_auto(2)
|
||||
!$acc data present(vkb(:,:))
|
||||
!$acc host_data use_device(vkb)
|
||||
!
|
||||
!$acc data present(vkb(:,:))
|
||||
!$acc host_data use_device(vkb)
|
||||
CALL calbec_gpu ( npw, vkb, evc_d, becp_d )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
CALL using_evc_d(0)
|
||||
DO ipol = 1, 3
|
||||
!$acc data present(vkb(:,:), vkb1(npwx,nkb)) deviceptr(g_d(:,:)) copyin(igk_k(:,:))
|
||||
!$acc host_data use_device(vkb, vkb1, igk_k)
|
||||
!$acc parallel loop collapse(2)
|
||||
!
|
||||
!$acc data present(vkb(:,:), vkb1(npwx,nkb)) copyin(igk_k(:,:))
|
||||
!$acc parallel loop collapse(2)
|
||||
DO jkb = 1, nkb
|
||||
DO ig = 1, npw
|
||||
vkb1(ig,jkb) = vkb(ig,jkb) * (0.D0,-1.D0) * g_d(ipol,igk_k(ig,ik))
|
||||
ENDDO
|
||||
ENDDO
|
||||
!$acc end data
|
||||
!
|
||||
!$acc data present(vkb1(npwx,nkb))
|
||||
!$acc host_data use_device(vkb1)
|
||||
CALL calbec_gpu ( npw, vkb1, evc_d, dbecp_d )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
CALL synchronize_bec_type_gpu(dbecp_d, dbecp, 'h')
|
||||
!
|
||||
IF ( gamma_only ) THEN
|
||||
|
|
|
@ -36,15 +36,16 @@ SUBROUTINE init_us_2( npw_, igk_, q_, vkb_, run_on_gpu)
|
|||
!
|
||||
CALL start_clock( 'init_us_2' )
|
||||
!
|
||||
if(use_gpu.and.run_on_gpu) then
|
||||
!$acc data copyin(igk_(npw_), eigts1(:,:), eigts2(:,:), eigts3(:,:), mill(:,:), g(:,:)) present(vkb_(npwx,nkb))
|
||||
!$acc host_data use_device(eigts1, eigts2, eigts3, mill, g, igk_, vkb_)
|
||||
if(use_gpu.and.run_on_gpu) then
|
||||
!
|
||||
!$acc data copyin(igk_(npw_), eigts1(:,:), eigts2(:,:), eigts3(:,:), mill(:,:), g(:,:)) present(vkb_(npwx,nkb))
|
||||
!$acc host_data use_device(eigts1, eigts2, eigts3, mill, g, igk_, vkb_)
|
||||
CALL init_us_2_base_gpu(npw_, npwx, igk_, q_, nat, tau, ityp, tpiba, omega,&
|
||||
dfftp%nr1, dfftp%nr2, dfftp%nr3, eigts1, eigts2, eigts3, mill, g,&
|
||||
vkb_ )
|
||||
!$acc end host_data
|
||||
!$acc update self(vkb_)
|
||||
!$acc end data
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
else
|
||||
CALL init_us_2_base(npw_, npwx, igk_, q_, nat, tau, ityp, tpiba, omega, &
|
||||
dfftp%nr1, dfftp%nr2, dfftp%nr3, eigts1, eigts2, eigts3, mill, g,&
|
||||
|
|
Loading…
Reference in New Issue