Merge branch 'reduce_wait_openacc' into 'develop'

Reduce some overhead in openacc and compiler fix

See merge request QEF/q-e!2422
This commit is contained in:
giannozz 2024-09-03 05:55:34 +00:00
commit 2249e5e536
2 changed files with 5 additions and 5 deletions

View File

@ -318,10 +318,8 @@ subroutine phq_setup
! 9) set the variables needed for the partial computation:
! nat_todo, atomo, comp_irr
DO irr=0,nirr
comp_irr(irr)=comp_irr_iq(irr,current_iq)
IF (elph .AND. irr>0) comp_elph(irr)=comp_irr(irr)
ENDDO
comp_irr(0:nirr) = comp_irr_iq(0:nirr, current_iq)
IF (elph) comp_elph(1:nirr) = comp_irr_iq(1:nirr, current_iq)
!
! The gamma_gamma case needs a different treatment
!

View File

@ -240,6 +240,7 @@ SUBROUTINE add_vuspsi_gpu( lda, n, m, hpsi_d )
becp_k = becp%k
!$acc end kernels
!
!$acc data present(deeq) deviceptr(deeaux_d)
DO nt = 1, ntyp
!
IF ( nh(nt) == 0 ) CYCLE
@ -255,7 +256,7 @@ SUBROUTINE add_vuspsi_gpu( lda, n, m, hpsi_d )
!
!deeaux_d(:,:) = CMPLX(deeq(1:nh(nt),1:nh(nt),na,current_spin), 0.0_dp, KIND=dp )
!
!$acc parallel loop collapse(2) present(deeq)
!$acc parallel loop collapse(2)
DO j = 1, nhnt
DO k = 1, nhnt
deeaux_d(k,j) = CMPLX(deeq(k,j,na,current_spin), 0.0_dp, KIND=DP )
@ -273,6 +274,7 @@ SUBROUTINE add_vuspsi_gpu( lda, n, m, hpsi_d )
END DO
!
END DO
!$acc end data
CALL dev_buf%release_buffer(deeaux_d, ierr) ! DEALLOCATE (deeaux_d)
!
!$acc host_data use_device(vkb)