mirror of https://gitlab.com/QEF/q-e.git
Merge branch 'reduce_wait_openacc' into 'develop'
Reduce some overhead in openacc and compiler fix See merge request QEF/q-e!2422
This commit is contained in:
commit
2249e5e536
|
@ -318,10 +318,8 @@ subroutine phq_setup
|
|||
! 9) set the variables needed for the partial computation:
|
||||
! nat_todo, atomo, comp_irr
|
||||
|
||||
DO irr=0,nirr
|
||||
comp_irr(irr)=comp_irr_iq(irr,current_iq)
|
||||
IF (elph .AND. irr>0) comp_elph(irr)=comp_irr(irr)
|
||||
ENDDO
|
||||
comp_irr(0:nirr) = comp_irr_iq(0:nirr, current_iq)
|
||||
IF (elph) comp_elph(1:nirr) = comp_irr_iq(1:nirr, current_iq)
|
||||
!
|
||||
! The gamma_gamma case needs a different treatment
|
||||
!
|
||||
|
|
|
@ -240,6 +240,7 @@ SUBROUTINE add_vuspsi_gpu( lda, n, m, hpsi_d )
|
|||
becp_k = becp%k
|
||||
!$acc end kernels
|
||||
!
|
||||
!$acc data present(deeq) deviceptr(deeaux_d)
|
||||
DO nt = 1, ntyp
|
||||
!
|
||||
IF ( nh(nt) == 0 ) CYCLE
|
||||
|
@ -255,7 +256,7 @@ SUBROUTINE add_vuspsi_gpu( lda, n, m, hpsi_d )
|
|||
!
|
||||
!deeaux_d(:,:) = CMPLX(deeq(1:nh(nt),1:nh(nt),na,current_spin), 0.0_dp, KIND=dp )
|
||||
!
|
||||
!$acc parallel loop collapse(2) present(deeq)
|
||||
!$acc parallel loop collapse(2)
|
||||
DO j = 1, nhnt
|
||||
DO k = 1, nhnt
|
||||
deeaux_d(k,j) = CMPLX(deeq(k,j,na,current_spin), 0.0_dp, KIND=DP )
|
||||
|
@ -273,6 +274,7 @@ SUBROUTINE add_vuspsi_gpu( lda, n, m, hpsi_d )
|
|||
END DO
|
||||
!
|
||||
END DO
|
||||
!$acc end data
|
||||
CALL dev_buf%release_buffer(deeaux_d, ierr) ! DEALLOCATE (deeaux_d)
|
||||
!
|
||||
!$acc host_data use_device(vkb)
|
||||
|
|
Loading…
Reference in New Issue