Oops, too_many_ffts for CPU case

This commit is contained in:
Paolo Giannozzi 2024-07-05 09:18:13 +02:00
parent 2a504028ae
commit d0c4028987
2 changed files with 6 additions and 2 deletions

View File

@ -280,8 +280,12 @@ MODULE control_flags
#endif #endif
! !
INTEGER, PUBLIC :: & INTEGER, PUBLIC :: &
#if defined(__CUDA)
many_fft = 16 ! the size of FFT batches in vloc_psi and many_fft = 16 ! the size of FFT batches in vloc_psi and
! sumband. Only use in accelerated subroutines. ! sumband. Only use in accelerated subroutines.
#else
many_fft = 1
#endif
! !
INTEGER :: ortho_max = 0 ! maximum number of iterations in routine ortho INTEGER :: ortho_max = 0 ! maximum number of iterations in routine ortho
REAL(DP) :: ortho_eps = 0.0_DP ! threshold for convergence in routine ortho REAL(DP) :: ortho_eps = 0.0_DP ! threshold for convergence in routine ortho

View File

@ -738,7 +738,7 @@ SUBROUTINE sum_band()
CALL get_rho_domag( rho%of_r(:,:), dffts%nnr, w1, psic_nc(1:,1:) ) CALL get_rho_domag( rho%of_r(:,:), dffts%nnr, w1, psic_nc(1:,1:) )
ELSE ELSE
!$acc kernels !$acc kernels
rho%of_r(:,2:4) = 0.0_DP ! OPTIMIZE HERE: this memset can be avoided rho%of_r(:,2:4) = 0.0_DP
!$acc end kernels !$acc end kernels
ENDIF ENDIF
! !
@ -1206,7 +1206,7 @@ SUBROUTINE sum_bec ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd )
!$acc parallel loop collapse(2) present(becsum) !$acc parallel loop collapse(2) present(becsum)
DO ih = 1, nhnt DO ih = 1, nhnt
DO jh = 1, nhnt DO jh = 1, nhnt
ijh = jh + ((ih-1)*(2*nhnt-ih))/2 ! or use ijtoh(ih,jh,np) ? OPTIMIZE !! ijh = jh + ((ih-1)*(2*nhnt-ih))/2 ! or use ijtoh(ih,jh,np) ?
! !
! nondiagonal terms summed and collapsed into a ! nondiagonal terms summed and collapsed into a
! single index (matrix is symmetric wrt (ih,jh)) ! single index (matrix is symmetric wrt (ih,jh))