mirror of https://gitlab.com/QEF/q-e.git
Oops, too_many_ffts for CPU case
This commit is contained in:
parent
2a504028ae
commit
d0c4028987
|
@ -280,8 +280,12 @@ MODULE control_flags
|
|||
#endif
|
||||
!
|
||||
INTEGER, PUBLIC :: &
|
||||
#if defined(__CUDA)
|
||||
many_fft = 16 ! the size of FFT batches in vloc_psi and
|
||||
! sumband. Only use in accelerated subroutines.
|
||||
#else
|
||||
many_fft = 1
|
||||
#endif
|
||||
!
|
||||
INTEGER :: ortho_max = 0 ! maximum number of iterations in routine ortho
|
||||
REAL(DP) :: ortho_eps = 0.0_DP ! threshold for convergence in routine ortho
|
||||
|
|
|
@ -738,7 +738,7 @@ SUBROUTINE sum_band()
|
|||
CALL get_rho_domag( rho%of_r(:,:), dffts%nnr, w1, psic_nc(1:,1:) )
|
||||
ELSE
|
||||
!$acc kernels
|
||||
rho%of_r(:,2:4) = 0.0_DP ! OPTIMIZE HERE: this memset can be avoided
|
||||
rho%of_r(:,2:4) = 0.0_DP
|
||||
!$acc end kernels
|
||||
ENDIF
|
||||
!
|
||||
|
@ -1206,7 +1206,7 @@ SUBROUTINE sum_bec ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd )
|
|||
!$acc parallel loop collapse(2) present(becsum)
|
||||
DO ih = 1, nhnt
|
||||
DO jh = 1, nhnt
|
||||
ijh = jh + ((ih-1)*(2*nhnt-ih))/2 ! or use ijtoh(ih,jh,np) ? OPTIMIZE !!
|
||||
ijh = jh + ((ih-1)*(2*nhnt-ih))/2 ! or use ijtoh(ih,jh,np) ?
|
||||
!
|
||||
! nondiagonal terms summed and collapsed into a
|
||||
! single index (matrix is symmetric wrt (ih,jh))
|
||||
|
|
Loading…
Reference in New Issue