mirror of https://gitlab.com/QEF/q-e.git
vkb NOT updated every time init_us_2 is called
This commit is contained in:
parent
26a4632ff6
commit
26a2393060
|
@ -354,6 +354,8 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
||||||
FORALL( ig = 1 : npw )
|
FORALL( ig = 1 : npw )
|
||||||
h_diag(ig, 1) = g2kin(ig) + v_of_0
|
h_diag(ig, 1) = g2kin(ig) + v_of_0
|
||||||
END FORALL
|
END FORALL
|
||||||
|
!
|
||||||
|
!$acc update self(vkb)
|
||||||
CALL usnldiag( npw, h_diag, s_diag )
|
CALL usnldiag( npw, h_diag, s_diag )
|
||||||
END IF
|
END IF
|
||||||
!
|
!
|
||||||
|
@ -693,6 +695,7 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
||||||
!
|
!
|
||||||
CALL allocate_bec_type( nkb, nbnd, bec_evcel )
|
CALL allocate_bec_type( nkb, nbnd, bec_evcel )
|
||||||
!
|
!
|
||||||
|
!$acc update self(vkb)
|
||||||
CALL calbec( npw, vkb, evcel, bec_evcel )
|
CALL calbec( npw, vkb, evcel, bec_evcel )
|
||||||
!
|
!
|
||||||
ENDIF
|
ENDIF
|
||||||
|
@ -719,6 +722,8 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
|
||||||
FORALL( ig = 1 : npwx )
|
FORALL( ig = 1 : npwx )
|
||||||
h_diag(ig, :) = g2kin(ig) + v_of_0
|
h_diag(ig, :) = g2kin(ig) + v_of_0
|
||||||
END FORALL
|
END FORALL
|
||||||
|
!
|
||||||
|
!$acc update self(vkb)
|
||||||
CALL usnldiag( npw, h_diag, s_diag )
|
CALL usnldiag( npw, h_diag, s_diag )
|
||||||
ENDIF
|
ENDIF
|
||||||
!
|
!
|
||||||
|
|
|
@ -153,14 +153,17 @@ SUBROUTINE force_hub_gpu( forceh )
|
||||||
CALL using_evc_d(0)
|
CALL using_evc_d(0)
|
||||||
!
|
!
|
||||||
CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb, .true. )
|
CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb, .true. )
|
||||||
|
!$acc update self(vkb)
|
||||||
|
!
|
||||||
! Compute spsi = S * psi
|
! Compute spsi = S * psi
|
||||||
CALL allocate_bec_type ( nkb, nbnd, becp)
|
CALL allocate_bec_type ( nkb, nbnd, becp)
|
||||||
CALL using_becp_auto(2) ; CALL using_becp_d_auto(2)
|
CALL using_becp_auto(2) ; CALL using_becp_d_auto(2)
|
||||||
!$acc data present(vkb(:,:))
|
!
|
||||||
!$acc host_data use_device(vkb)
|
!$acc data present(vkb(:,:))
|
||||||
|
!$acc host_data use_device(vkb)
|
||||||
CALL calbec_gpu( npw, vkb, evc_d, becp_d )
|
CALL calbec_gpu( npw, vkb, evc_d, becp_d )
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
!$acc end data
|
||||||
!
|
!
|
||||||
CALL s_psi_gpu( npwx, npw, nbnd, evc_d, spsi_d )
|
CALL s_psi_gpu( npwx, npw, nbnd, evc_d, spsi_d )
|
||||||
CALL deallocate_bec_type (becp)
|
CALL deallocate_bec_type (becp)
|
||||||
|
|
|
@ -50,7 +50,7 @@ SUBROUTINE force_us_gpu( forcenl )
|
||||||
! ... local variables
|
! ... local variables
|
||||||
!
|
!
|
||||||
COMPLEX(DP), ALLOCATABLE :: vkb1(:,:) ! contains g*|beta>
|
COMPLEX(DP), ALLOCATABLE :: vkb1(:,:) ! contains g*|beta>
|
||||||
!$acc declare device_resident(vkb1)
|
!$acc declare device_resident(vkb1)
|
||||||
!
|
!
|
||||||
COMPLEX(DP), ALLOCATABLE :: deff_nc(:,:,:,:)
|
COMPLEX(DP), ALLOCATABLE :: deff_nc(:,:,:,:)
|
||||||
REAL(DP), ALLOCATABLE :: deff(:,:,:)
|
REAL(DP), ALLOCATABLE :: deff(:,:,:)
|
||||||
|
@ -93,26 +93,31 @@ SUBROUTINE force_us_gpu( forcenl )
|
||||||
!
|
!
|
||||||
CALL using_evc_d(0)
|
CALL using_evc_d(0)
|
||||||
CALL using_becp_d_auto(2)
|
CALL using_becp_d_auto(2)
|
||||||
!$acc data present(vkb(:,:))
|
!
|
||||||
!$acc host_data use_device(vkb)
|
!$acc data present(vkb(:,:))
|
||||||
|
!$acc host_data use_device(vkb)
|
||||||
CALL calbec_gpu ( npw, vkb, evc_d, becp_d )
|
CALL calbec_gpu ( npw, vkb, evc_d, becp_d )
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
!$acc end data
|
||||||
!
|
!
|
||||||
CALL using_evc_d(0)
|
CALL using_evc_d(0)
|
||||||
DO ipol = 1, 3
|
DO ipol = 1, 3
|
||||||
!$acc data present(vkb(:,:), vkb1(npwx,nkb)) deviceptr(g_d(:,:)) copyin(igk_k(:,:))
|
!
|
||||||
!$acc host_data use_device(vkb, vkb1, igk_k)
|
!$acc data present(vkb(:,:), vkb1(npwx,nkb)) copyin(igk_k(:,:))
|
||||||
!$acc parallel loop collapse(2)
|
!$acc parallel loop collapse(2)
|
||||||
DO jkb = 1, nkb
|
DO jkb = 1, nkb
|
||||||
DO ig = 1, npw
|
DO ig = 1, npw
|
||||||
vkb1(ig,jkb) = vkb(ig,jkb) * (0.D0,-1.D0) * g_d(ipol,igk_k(ig,ik))
|
vkb1(ig,jkb) = vkb(ig,jkb) * (0.D0,-1.D0) * g_d(ipol,igk_k(ig,ik))
|
||||||
ENDDO
|
ENDDO
|
||||||
ENDDO
|
ENDDO
|
||||||
|
!$acc end data
|
||||||
!
|
!
|
||||||
|
!$acc data present(vkb1(npwx,nkb))
|
||||||
|
!$acc host_data use_device(vkb1)
|
||||||
CALL calbec_gpu ( npw, vkb1, evc_d, dbecp_d )
|
CALL calbec_gpu ( npw, vkb1, evc_d, dbecp_d )
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
!$acc end data
|
||||||
|
!
|
||||||
CALL synchronize_bec_type_gpu(dbecp_d, dbecp, 'h')
|
CALL synchronize_bec_type_gpu(dbecp_d, dbecp, 'h')
|
||||||
!
|
!
|
||||||
IF ( gamma_only ) THEN
|
IF ( gamma_only ) THEN
|
||||||
|
|
|
@ -36,15 +36,16 @@ SUBROUTINE init_us_2( npw_, igk_, q_, vkb_, run_on_gpu)
|
||||||
!
|
!
|
||||||
CALL start_clock( 'init_us_2' )
|
CALL start_clock( 'init_us_2' )
|
||||||
!
|
!
|
||||||
if(use_gpu.and.run_on_gpu) then
|
if(use_gpu.and.run_on_gpu) then
|
||||||
!$acc data copyin(igk_(npw_), eigts1(:,:), eigts2(:,:), eigts3(:,:), mill(:,:), g(:,:)) present(vkb_(npwx,nkb))
|
!
|
||||||
!$acc host_data use_device(eigts1, eigts2, eigts3, mill, g, igk_, vkb_)
|
!$acc data copyin(igk_(npw_), eigts1(:,:), eigts2(:,:), eigts3(:,:), mill(:,:), g(:,:)) present(vkb_(npwx,nkb))
|
||||||
|
!$acc host_data use_device(eigts1, eigts2, eigts3, mill, g, igk_, vkb_)
|
||||||
CALL init_us_2_base_gpu(npw_, npwx, igk_, q_, nat, tau, ityp, tpiba, omega,&
|
CALL init_us_2_base_gpu(npw_, npwx, igk_, q_, nat, tau, ityp, tpiba, omega,&
|
||||||
dfftp%nr1, dfftp%nr2, dfftp%nr3, eigts1, eigts2, eigts3, mill, g,&
|
dfftp%nr1, dfftp%nr2, dfftp%nr3, eigts1, eigts2, eigts3, mill, g,&
|
||||||
vkb_ )
|
vkb_ )
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc update self(vkb_)
|
!$acc end data
|
||||||
!$acc end data
|
!
|
||||||
else
|
else
|
||||||
CALL init_us_2_base(npw_, npwx, igk_, q_, nat, tau, ityp, tpiba, omega, &
|
CALL init_us_2_base(npw_, npwx, igk_, q_, nat, tau, ityp, tpiba, omega, &
|
||||||
dfftp%nr1, dfftp%nr2, dfftp%nr3, eigts1, eigts2, eigts3, mill, g,&
|
dfftp%nr1, dfftp%nr2, dfftp%nr3, eigts1, eigts2, eigts3, mill, g,&
|
||||||
|
|
Loading…
Reference in New Issue